diff --git a/clang-tools-extra/clang-move/tool/ClangMove.cpp b/clang-tools-extra/clang-move/tool/ClangMove.cpp index 1560dcaad6779..655ea81ee37d4 100644 --- a/clang-tools-extra/clang-move/tool/ClangMove.cpp +++ b/clang-tools-extra/clang-move/tool/ClangMove.cpp @@ -199,7 +199,7 @@ int main(int argc, const char **argv) { for (auto I = Files.begin(), E = Files.end(); I != E; ++I) { OS << " {\n"; OS << " \"FilePath\": \"" << *I << "\",\n"; - const auto Entry = FileMgr.getFile(*I); + const auto Entry = FileMgr.getOptionalFileRef(*I); auto ID = SM.translateFile(*Entry); std::string Content; llvm::raw_string_ostream ContentStream(Content); diff --git a/clang-tools-extra/clang-tidy/bugprone/ForwardDeclarationNamespaceCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/ForwardDeclarationNamespaceCheck.cpp index 0b38b18208194..d77df50f8fea2 100644 --- a/clang-tools-extra/clang-tidy/bugprone/ForwardDeclarationNamespaceCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/ForwardDeclarationNamespaceCheck.cpp @@ -107,7 +107,6 @@ static std::string getNameOfNamespace(const CXXRecordDecl *Decl) { std::string Ns; llvm::raw_string_ostream OStream(Ns); NsDecl->printQualifiedName(OStream); - OStream.flush(); return Ns.empty() ? "(global)" : Ns; } diff --git a/clang-tools-extra/clang-tidy/modernize/UseNullptrCheck.cpp b/clang-tools-extra/clang-tidy/modernize/UseNullptrCheck.cpp index 6a003a347bada..108717e151b57 100644 --- a/clang-tools-extra/clang-tidy/modernize/UseNullptrCheck.cpp +++ b/clang-tools-extra/clang-tidy/modernize/UseNullptrCheck.cpp @@ -35,10 +35,20 @@ AST_MATCHER(Type, sugaredNullptrType) { /// to null within. /// Finding sequences of explicit casts is necessary so that an entire sequence /// can be replaced instead of just the inner-most implicit cast. +/// +/// TODO/NOTE: The second "anyOf" below discards matches on a substituted type, +/// since we don't know if that would _always_ be a pointer type for all other +/// specializations, unless the expression was "__null", in which case we assume +/// that all specializations are expected to be for pointer types. Ideally this +/// would check for the "NULL" macro instead, but that'd be harder to express. +/// In practice, "NULL" is often defined as "__null", and this is a useful +/// condition. StatementMatcher makeCastSequenceMatcher(llvm::ArrayRef NameList) { auto ImplicitCastToNull = implicitCastExpr( anyOf(hasCastKind(CK_NullToPointer), hasCastKind(CK_NullToMemberPointer)), - unless(hasImplicitDestinationType(qualType(substTemplateTypeParmType()))), + anyOf(hasSourceExpression(gnuNullExpr()), + unless(hasImplicitDestinationType( + qualType(substTemplateTypeParmType())))), unless(hasSourceExpression(hasType(sugaredNullptrType()))), unless(hasImplicitDestinationType( qualType(matchers::matchesAnyListedTypeName(NameList))))); diff --git a/clang-tools-extra/clangd/AST.cpp b/clang-tools-extra/clangd/AST.cpp index fda1e5fdf8d82..333fc10f17d7b 100644 --- a/clang-tools-extra/clangd/AST.cpp +++ b/clang-tools-extra/clangd/AST.cpp @@ -187,7 +187,6 @@ std::string printQualifiedName(const NamedDecl &ND) { // In clangd, context is usually available and paths are mostly noise. Policy.AnonymousTagLocations = false; ND.printQualifiedName(OS, Policy); - OS.flush(); assert(!StringRef(QName).starts_with("::")); return QName; } @@ -270,7 +269,6 @@ std::string printTemplateSpecializationArgs(const NamedDecl &ND) { // location information. printTemplateArgumentList(OS, Cls->getTemplateArgs().asArray(), Policy); } - OS.flush(); return TemplateArgs; } @@ -303,7 +301,6 @@ std::string printObjCMethod(const ObjCMethodDecl &Method) { OS << ", ..."; OS << ']'; - OS.flush(); return Name; } @@ -314,7 +311,6 @@ std::string printObjCContainer(const ObjCContainerDecl &C) { const ObjCInterfaceDecl *Class = Category->getClassInterface(); OS << getNameOrErrForObjCInterface(Class) << '(' << Category->getName() << ')'; - OS.flush(); return Name; } if (const ObjCCategoryImplDecl *CID = dyn_cast(&C)) { @@ -322,7 +318,6 @@ std::string printObjCContainer(const ObjCContainerDecl &C) { llvm::raw_string_ostream OS(Name); const ObjCInterfaceDecl *Class = CID->getClassInterface(); OS << getNameOrErrForObjCInterface(Class) << '(' << CID->getName() << ')'; - OS.flush(); return Name; } return C.getNameAsString(); diff --git a/clang-tools-extra/clangd/Diagnostics.cpp b/clang-tools-extra/clangd/Diagnostics.cpp index 552dd36b6900b..a8214acc50558 100644 --- a/clang-tools-extra/clangd/Diagnostics.cpp +++ b/clang-tools-extra/clangd/Diagnostics.cpp @@ -319,7 +319,6 @@ std::string mainMessage(const Diag &D, const ClangdDiagnosticOptions &Opts) { OS << "\n\n"; printDiag(OS, Note); } - OS.flush(); return capitalize(std::move(Result)); } @@ -335,7 +334,6 @@ std::string noteMessage(const Diag &Main, const DiagBase &Note, OS << "\n\n"; printDiag(OS, Main); } - OS.flush(); return capitalize(std::move(Result)); } diff --git a/clang-tools-extra/clangd/FindSymbols.cpp b/clang-tools-extra/clangd/FindSymbols.cpp index 55f16b7085a6f..cf2f8b62a2841 100644 --- a/clang-tools-extra/clangd/FindSymbols.cpp +++ b/clang-tools-extra/clangd/FindSymbols.cpp @@ -182,7 +182,6 @@ std::string getSymbolName(ASTContext &Ctx, const NamedDecl &ND) { OS << (Method->isInstanceMethod() ? '-' : '+'); Method->getSelector().print(OS); - OS.flush(); return Name; } return printName(Ctx, ND); diff --git a/clang-tools-extra/clangd/Hover.cpp b/clang-tools-extra/clangd/Hover.cpp index de103e011c708..298fa79e3fd0b 100644 --- a/clang-tools-extra/clangd/Hover.cpp +++ b/clang-tools-extra/clangd/Hover.cpp @@ -150,7 +150,6 @@ std::string printDefinition(const Decl *D, PrintingPolicy PP, std::string Definition; llvm::raw_string_ostream OS(Definition); D->print(OS, PP); - OS.flush(); return Definition; } @@ -179,7 +178,6 @@ HoverInfo::PrintedType printType(QualType QT, ASTContext &ASTCtx, OS << TT->getDecl()->getKindName() << " "; } QT.print(OS, PP); - OS.flush(); const Config &Cfg = Config::current(); if (!QT.isNull() && Cfg.Hover.ShowAKA) { @@ -229,7 +227,6 @@ HoverInfo::PrintedType printType(const TemplateTemplateParmDecl *TTP, // FIXME: TemplateTemplateParameter doesn't store the info on whether this // param was a "typename" or "class". OS << "> class"; - OS.flush(); return Result; } @@ -821,7 +818,6 @@ std::string typeAsDefinition(const HoverInfo::PrintedType &PType) { OS << PType.Type; if (PType.AKA) OS << " // aka: " << *PType.AKA; - OS.flush(); return Result; } diff --git a/clang-tools-extra/clangd/Preamble.cpp b/clang-tools-extra/clangd/Preamble.cpp index 84e8fec342829..1fe534d78daec 100644 --- a/clang-tools-extra/clangd/Preamble.cpp +++ b/clang-tools-extra/clangd/Preamble.cpp @@ -913,7 +913,6 @@ PreamblePatch PreamblePatch::create(llvm::StringRef FileName, PP.PatchedMarks = std::move(ModifiedScan->Marks); PP.PatchedMacros = std::move(ModifiedScan->Macros); dlog("Created preamble patch: {0}", Patch.str()); - Patch.flush(); return PP; } diff --git a/clang-tools-extra/clangd/Quality.cpp b/clang-tools-extra/clangd/Quality.cpp index 7371d95fbf275..c1ab63fb22f61 100644 --- a/clang-tools-extra/clangd/Quality.cpp +++ b/clang-tools-extra/clangd/Quality.cpp @@ -554,7 +554,6 @@ std::string sortText(float Score, llvm::StringRef Name) { llvm::write_hex(OS, encodeFloat(-Score), llvm::HexPrintStyle::Lower, /*Width=*/2 * sizeof(Score)); OS << Name; - OS.flush(); return S; } diff --git a/clang-tools-extra/clangd/SourceCode.cpp b/clang-tools-extra/clangd/SourceCode.cpp index 3af99b9db056d..780aaa471dc8b 100644 --- a/clang-tools-extra/clangd/SourceCode.cpp +++ b/clang-tools-extra/clangd/SourceCode.cpp @@ -814,8 +814,8 @@ llvm::SmallVector ancestorNamespaces(llvm::StringRef NS) { // Checks whether \p FileName is a valid spelling of main file. bool isMainFile(llvm::StringRef FileName, const SourceManager &SM) { - auto FE = SM.getFileManager().getFile(FileName); - return FE && *FE == SM.getFileEntryForID(SM.getMainFileID()); + auto FE = SM.getFileManager().getOptionalFileRef(FileName); + return FE && FE == SM.getFileEntryRefForID(SM.getMainFileID()); } } // namespace diff --git a/clang-tools-extra/clangd/SystemIncludeExtractor.cpp b/clang-tools-extra/clangd/SystemIncludeExtractor.cpp index d4b9b173d149d..c1c2e9fab9664 100644 --- a/clang-tools-extra/clangd/SystemIncludeExtractor.cpp +++ b/clang-tools-extra/clangd/SystemIncludeExtractor.cpp @@ -483,7 +483,6 @@ std::string convertGlobToRegex(llvm::StringRef Glob) { } } RegStream << '$'; - RegStream.flush(); return RegText; } diff --git a/clang-tools-extra/clangd/index/StdLib.cpp b/clang-tools-extra/clangd/index/StdLib.cpp index 921ab5d1c96d5..d34838a45048d 100644 --- a/clang-tools-extra/clangd/index/StdLib.cpp +++ b/clang-tools-extra/clangd/index/StdLib.cpp @@ -87,7 +87,6 @@ std::string buildUmbrella(llvm::StringLiteral Mandatory, "#endif\n", Header); } - OS.flush(); return Result; } diff --git a/clang-tools-extra/clangd/unittests/ParsedASTTests.cpp b/clang-tools-extra/clangd/unittests/ParsedASTTests.cpp index 4bb76cd6ab830..6ee641caeefe3 100644 --- a/clang-tools-extra/clangd/unittests/ParsedASTTests.cpp +++ b/clang-tools-extra/clangd/unittests/ParsedASTTests.cpp @@ -397,10 +397,10 @@ TEST(ParsedASTTest, PatchesAdditionalIncludes) { auto &FM = SM.getFileManager(); // Copy so that we can use operator[] to get the children. IncludeStructure Includes = PatchedAST->getIncludeStructure(); - auto MainFE = FM.getFile(testPath("foo.cpp")); + auto MainFE = FM.getOptionalFileRef(testPath("foo.cpp")); ASSERT_TRUE(MainFE); auto MainID = Includes.getID(*MainFE); - auto AuxFE = FM.getFile(testPath("sub/aux.h")); + auto AuxFE = FM.getOptionalFileRef(testPath("sub/aux.h")); ASSERT_TRUE(AuxFE); auto AuxID = Includes.getID(*AuxFE); EXPECT_THAT(Includes.IncludeChildren[*MainID], Contains(*AuxID)); diff --git a/clang-tools-extra/clangd/unittests/TypeHierarchyTests.cpp b/clang-tools-extra/clangd/unittests/TypeHierarchyTests.cpp index 2f82ec7444d7a..15158d8a45ca8 100644 --- a/clang-tools-extra/clangd/unittests/TypeHierarchyTests.cpp +++ b/clang-tools-extra/clangd/unittests/TypeHierarchyTests.cpp @@ -242,7 +242,6 @@ const NamedDecl &findDeclWithTemplateArgs(ParsedAST &AST, // Use getNameForDiagnostic() which includes the template // arguments in the printed name. ND.getNameForDiagnostic(OS, Policy, /*Qualified=*/true); - OS.flush(); return QName == Query; }); } diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index 8f7b0b5333f3a..9a130a23b6e89 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -130,6 +130,11 @@ Changes in existing checks usages of ``sizeof()``, ``alignof()``, and ``offsetof()`` when adding or subtracting from a pointer. +- Improved :doc:`bugprone-unchecked-optional-access + ` to support + `bsl::optional` and `bdlb::NullableValue` from + _. + - Improved :doc:`cert-flp30-c ` check to fix false positive that floating point variable is only used in increment expression. @@ -161,6 +166,10 @@ Changes in existing checks a false positive when only an implicit conversion happened inside an initializer list. +- Improved :doc:`modernize-use-nullptr + ` check to also recognize + ``NULL``/``__null`` (but not ``0``) when used with a templated type. + - Improved :doc:`modernize-use-std-print ` check to support replacing member function calls too. diff --git a/clang-tools-extra/docs/clang-tidy/checks/bugprone/unchecked-optional-access.rst b/clang-tools-extra/docs/clang-tidy/checks/bugprone/unchecked-optional-access.rst index 5a6aaa077d9bf..97fe37b535356 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/bugprone/unchecked-optional-access.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/bugprone/unchecked-optional-access.rst @@ -8,9 +8,10 @@ results. Therefore, it may be more resource intensive (RAM, CPU) than the average clang-tidy check. This check identifies unsafe accesses to values contained in -``std::optional``, ``absl::optional``, ``base::Optional``, or -``folly::Optional`` objects. Below we will refer to all these types -collectively as ``optional``. +``std::optional``, ``absl::optional``, ``base::Optional``, +``folly::Optional``, ``bsl::optional``, or +``BloombergLP::bdlb::NullableValue`` objects. Below we will refer to all these +types collectively as ``optional``. An access to the value of an ``optional`` occurs when one of its ``value``, ``operator*``, or ``operator->`` member functions is invoked. To align with diff --git a/clang-tools-extra/include-cleaner/unittests/FindHeadersTest.cpp b/clang-tools-extra/include-cleaner/unittests/FindHeadersTest.cpp index c5fc465ced7a7..84e02e1d0d621 100644 --- a/clang-tools-extra/include-cleaner/unittests/FindHeadersTest.cpp +++ b/clang-tools-extra/include-cleaner/unittests/FindHeadersTest.cpp @@ -60,7 +60,7 @@ class FindHeadersTest : public testing::Test { llvm::SmallVector> findHeaders(llvm::StringRef FileName) { return include_cleaner::findHeaders( AST->sourceManager().translateFileLineCol( - AST->fileManager().getFile(FileName).get(), + *AST->fileManager().getOptionalFileRef(FileName), /*Line=*/1, /*Col=*/1), AST->sourceManager(), &PI); } diff --git a/clang-tools-extra/include-cleaner/unittests/RecordTest.cpp b/clang-tools-extra/include-cleaner/unittests/RecordTest.cpp index 715d95eb57346..b5a7b9720903e 100644 --- a/clang-tools-extra/include-cleaner/unittests/RecordTest.cpp +++ b/clang-tools-extra/include-cleaner/unittests/RecordTest.cpp @@ -234,7 +234,7 @@ TEST_F(RecordPPTest, CapturesMacroRefs) { const auto &SM = AST.sourceManager(); SourceLocation Def = SM.getComposedLoc( - SM.translateFile(AST.fileManager().getFile("header.h").get()), + SM.translateFile(*AST.fileManager().getOptionalFileRef("header.h")), Header.point("def")); ASSERT_THAT(Recorded.MacroReferences, Not(IsEmpty())); Symbol OrigX = Recorded.MacroReferences.front().Target; @@ -368,29 +368,29 @@ TEST_F(PragmaIncludeTest, IWYUKeep) { TestAST Processed = build(); auto &FM = Processed.fileManager(); - EXPECT_FALSE(PI.shouldKeep(FM.getFile("normal.h").get())); - EXPECT_FALSE(PI.shouldKeep(FM.getFile("std/vector").get())); + EXPECT_FALSE(PI.shouldKeep(*FM.getOptionalFileRef("normal.h"))); + EXPECT_FALSE(PI.shouldKeep(*FM.getOptionalFileRef("std/vector"))); // Keep - EXPECT_TRUE(PI.shouldKeep(FM.getFile("keep1.h").get())); - EXPECT_TRUE(PI.shouldKeep(FM.getFile("keep2.h").get())); - EXPECT_TRUE(PI.shouldKeep(FM.getFile("keep3.h").get())); - EXPECT_TRUE(PI.shouldKeep(FM.getFile("keep4.h").get())); - EXPECT_TRUE(PI.shouldKeep(FM.getFile("keep5.h").get())); - EXPECT_TRUE(PI.shouldKeep(FM.getFile("keep6.h").get())); - EXPECT_TRUE(PI.shouldKeep(FM.getFile("std/map").get())); + EXPECT_TRUE(PI.shouldKeep(*FM.getOptionalFileRef("keep1.h"))); + EXPECT_TRUE(PI.shouldKeep(*FM.getOptionalFileRef("keep2.h"))); + EXPECT_TRUE(PI.shouldKeep(*FM.getOptionalFileRef("keep3.h"))); + EXPECT_TRUE(PI.shouldKeep(*FM.getOptionalFileRef("keep4.h"))); + EXPECT_TRUE(PI.shouldKeep(*FM.getOptionalFileRef("keep5.h"))); + EXPECT_TRUE(PI.shouldKeep(*FM.getOptionalFileRef("keep6.h"))); + EXPECT_TRUE(PI.shouldKeep(*FM.getOptionalFileRef("std/map"))); // Exports - EXPECT_TRUE(PI.shouldKeep(FM.getFile("export1.h").get())); - EXPECT_TRUE(PI.shouldKeep(FM.getFile("export2.h").get())); - EXPECT_TRUE(PI.shouldKeep(FM.getFile("export3.h").get())); - EXPECT_TRUE(PI.shouldKeep(FM.getFile("std/set").get())); + EXPECT_TRUE(PI.shouldKeep(*FM.getOptionalFileRef("export1.h"))); + EXPECT_TRUE(PI.shouldKeep(*FM.getOptionalFileRef("export2.h"))); + EXPECT_TRUE(PI.shouldKeep(*FM.getOptionalFileRef("export3.h"))); + EXPECT_TRUE(PI.shouldKeep(*FM.getOptionalFileRef("std/set"))); } TEST_F(PragmaIncludeTest, AssociatedHeader) { createEmptyFiles({"foo/main.h", "bar/main.h", "bar/other.h", "std/vector"}); auto IsKeep = [&](llvm::StringRef Name, TestAST &AST) { - return PI.shouldKeep(AST.fileManager().getFile(Name).get()); + return PI.shouldKeep(*AST.fileManager().getOptionalFileRef(Name)); }; Inputs.FileName = "main.cc"; @@ -452,19 +452,19 @@ TEST_F(PragmaIncludeTest, IWYUPrivate) { // IWYU pragma: private )cpp"; TestAST Processed = build(); - auto PrivateFE = Processed.fileManager().getFile("private.h"); + auto PrivateFE = Processed.fileManager().getOptionalFileRef("private.h"); assert(PrivateFE); - EXPECT_TRUE(PI.isPrivate(PrivateFE.get())); - EXPECT_EQ(PI.getPublic(PrivateFE.get()), "\"public2.h\""); + EXPECT_TRUE(PI.isPrivate(*PrivateFE)); + EXPECT_EQ(PI.getPublic(*PrivateFE), "\"public2.h\""); - auto PublicFE = Processed.fileManager().getFile("public.h"); + auto PublicFE = Processed.fileManager().getOptionalFileRef("public.h"); assert(PublicFE); - EXPECT_EQ(PI.getPublic(PublicFE.get()), ""); // no mapping. - EXPECT_FALSE(PI.isPrivate(PublicFE.get())); + EXPECT_EQ(PI.getPublic(*PublicFE), ""); // no mapping. + EXPECT_FALSE(PI.isPrivate(*PublicFE)); - auto Private2FE = Processed.fileManager().getFile("private2.h"); + auto Private2FE = Processed.fileManager().getOptionalFileRef("private2.h"); assert(Private2FE); - EXPECT_TRUE(PI.isPrivate(Private2FE.get())); + EXPECT_TRUE(PI.isPrivate(*Private2FE)); } TEST_F(PragmaIncludeTest, IWYUExport) { @@ -486,13 +486,13 @@ TEST_F(PragmaIncludeTest, IWYUExport) { const auto &SM = Processed.sourceManager(); auto &FM = Processed.fileManager(); - EXPECT_THAT(PI.getExporters(FM.getFile("private.h").get(), FM), + EXPECT_THAT(PI.getExporters(*FM.getOptionalFileRef("private.h"), FM), testing::UnorderedElementsAre(FileNamed("export1.h"), FileNamed("export3.h"))); - EXPECT_TRUE(PI.getExporters(FM.getFile("export1.h").get(), FM).empty()); - EXPECT_TRUE(PI.getExporters(FM.getFile("export2.h").get(), FM).empty()); - EXPECT_TRUE(PI.getExporters(FM.getFile("export3.h").get(), FM).empty()); + EXPECT_TRUE(PI.getExporters(*FM.getOptionalFileRef("export1.h"), FM).empty()); + EXPECT_TRUE(PI.getExporters(*FM.getOptionalFileRef("export2.h"), FM).empty()); + EXPECT_TRUE(PI.getExporters(*FM.getOptionalFileRef("export3.h"), FM).empty()); EXPECT_TRUE( PI.getExporters(SM.getFileEntryForID(SM.getMainFileID()), FM).empty()); } @@ -546,26 +546,25 @@ TEST_F(PragmaIncludeTest, IWYUExportBlock) { for (auto &FE : FEs) { OS << FE.getName() << " "; } - OS.flush(); return Result; }; - auto Exporters = PI.getExporters(FM.getFile("private1.h").get(), FM); + auto Exporters = PI.getExporters(*FM.getOptionalFileRef("private1.h"), FM); EXPECT_THAT(Exporters, testing::UnorderedElementsAre(FileNamed("export1.h"), FileNamed("normal.h"))) << GetNames(Exporters); - Exporters = PI.getExporters(FM.getFile("private2.h").get(), FM); + Exporters = PI.getExporters(*FM.getOptionalFileRef("private2.h"), FM); EXPECT_THAT(Exporters, testing::UnorderedElementsAre(FileNamed("export1.h"))) << GetNames(Exporters); - Exporters = PI.getExporters(FM.getFile("private3.h").get(), FM); + Exporters = PI.getExporters(*FM.getOptionalFileRef("private3.h"), FM); EXPECT_THAT(Exporters, testing::UnorderedElementsAre(FileNamed("export1.h"))) << GetNames(Exporters); - Exporters = PI.getExporters(FM.getFile("foo.h").get(), FM); + Exporters = PI.getExporters(*FM.getOptionalFileRef("foo.h"), FM); EXPECT_TRUE(Exporters.empty()) << GetNames(Exporters); - Exporters = PI.getExporters(FM.getFile("bar.h").get(), FM); + Exporters = PI.getExporters(*FM.getOptionalFileRef("bar.h"), FM); EXPECT_TRUE(Exporters.empty()) << GetNames(Exporters); } @@ -581,8 +580,8 @@ TEST_F(PragmaIncludeTest, SelfContained) { Inputs.ExtraFiles["unguarded.h"] = ""; TestAST Processed = build(); auto &FM = Processed.fileManager(); - EXPECT_TRUE(PI.isSelfContained(FM.getFile("guarded.h").get())); - EXPECT_FALSE(PI.isSelfContained(FM.getFile("unguarded.h").get())); + EXPECT_TRUE(PI.isSelfContained(*FM.getOptionalFileRef("guarded.h"))); + EXPECT_FALSE(PI.isSelfContained(*FM.getOptionalFileRef("unguarded.h"))); } TEST_F(PragmaIncludeTest, AlwaysKeep) { @@ -597,8 +596,8 @@ TEST_F(PragmaIncludeTest, AlwaysKeep) { Inputs.ExtraFiles["usual.h"] = "#pragma once"; TestAST Processed = build(); auto &FM = Processed.fileManager(); - EXPECT_TRUE(PI.shouldKeep(FM.getFile("always_keep.h").get())); - EXPECT_FALSE(PI.shouldKeep(FM.getFile("usual.h").get())); + EXPECT_TRUE(PI.shouldKeep(*FM.getOptionalFileRef("always_keep.h"))); + EXPECT_FALSE(PI.shouldKeep(*FM.getOptionalFileRef("usual.h"))); } TEST_F(PragmaIncludeTest, ExportInUnnamedBuffer) { @@ -654,13 +653,13 @@ TEST_F(PragmaIncludeTest, OutlivesFMAndSM) { // Now this build gives us a new File&Source Manager. TestAST Processed = build(/*ResetPragmaIncludes=*/false); auto &FM = Processed.fileManager(); - auto PrivateFE = FM.getFile("private.h"); + auto PrivateFE = FM.getOptionalFileRef("private.h"); assert(PrivateFE); - EXPECT_EQ(PI.getPublic(PrivateFE.get()), "\"public.h\""); + EXPECT_EQ(PI.getPublic(*PrivateFE), "\"public.h\""); - auto Private2FE = FM.getFile("private2.h"); + auto Private2FE = FM.getOptionalFileRef("private2.h"); assert(Private2FE); - EXPECT_THAT(PI.getExporters(Private2FE.get(), FM), + EXPECT_THAT(PI.getExporters(*Private2FE, FM), testing::ElementsAre(llvm::cantFail(FM.getFileRef("public.h")))); } @@ -677,8 +676,8 @@ TEST_F(PragmaIncludeTest, CanRecordManyTimes) { TestAST Processed = build(); auto &FM = Processed.fileManager(); - auto PrivateFE = FM.getFile("private.h"); - llvm::StringRef Public = PI.getPublic(PrivateFE.get()); + auto PrivateFE = FM.getOptionalFileRef("private.h"); + llvm::StringRef Public = PI.getPublic(*PrivateFE); EXPECT_EQ(Public, "\"public.h\""); // This build populates same PI during build, but this time we don't have diff --git a/clang-tools-extra/modularize/Modularize.cpp b/clang-tools-extra/modularize/Modularize.cpp index 2c00c76c85533..4bb3bae0503ac 100644 --- a/clang-tools-extra/modularize/Modularize.cpp +++ b/clang-tools-extra/modularize/Modularize.cpp @@ -621,7 +621,6 @@ class CollectEntitiesVisitor std::string Name; llvm::raw_string_ostream OS(Name); ND->printQualifiedName(OS); - OS.flush(); if (Name.empty()) return true; diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone/Inputs/unchecked-optional-access/bde/types/bdlb_nullablevalue.h b/clang-tools-extra/test/clang-tidy/checkers/bugprone/Inputs/unchecked-optional-access/bde/types/bdlb_nullablevalue.h new file mode 100644 index 0000000000000..4411bcfd60a74 --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone/Inputs/unchecked-optional-access/bde/types/bdlb_nullablevalue.h @@ -0,0 +1,38 @@ +#ifndef LLVM_CLANG_TOOLS_EXTRA_TEST_CLANG_TIDY_CHECKERS_INPUTS_BDE_TYPES_NULLABLEVALUE_H_ +#define LLVM_CLANG_TOOLS_EXTRA_TEST_CLANG_TIDY_CHECKERS_INPUTS_BDE_TYPES_NULLABLEVALUE_H_ + +#include "bsl_optional.h" + +/// Mock of `bdlb::NullableValue`. +namespace BloombergLP::bdlb { + +template +class NullableValue : public bsl::optional { +public: + constexpr NullableValue() noexcept; + + constexpr NullableValue(bsl::nullopt_t) noexcept; + + NullableValue(const NullableValue &) = default; + + NullableValue(NullableValue &&) = default; + + const T &value() const &; + T &value() &; + + // 'operator bool' is inherited from bsl::optional + + constexpr bool isNull() const noexcept; + + template + constexpr T valueOr(U &&v) const &; + + // 'reset' is inherited from bsl::optional + + template NullableValue &operator=(const U &u); +}; + + +} // namespace BloombergLP::bdlb + +#endif // LLVM_CLANG_TOOLS_EXTRA_TEST_CLANG_TIDY_CHECKERS_INPUTS_BDE_TYPES_NULLABLEVALUE_H_ diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone/Inputs/unchecked-optional-access/bde/types/bsl_optional.h b/clang-tools-extra/test/clang-tidy/checkers/bugprone/Inputs/unchecked-optional-access/bde/types/bsl_optional.h new file mode 100644 index 0000000000000..7e1a129e04a55 --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone/Inputs/unchecked-optional-access/bde/types/bsl_optional.h @@ -0,0 +1,75 @@ +#ifndef LLVM_CLANG_TOOLS_EXTRA_TEST_CLANG_TIDY_CHECKERS_INPUTS_BDE_TYPES_OPTIONAL_H_ +#define LLVM_CLANG_TOOLS_EXTRA_TEST_CLANG_TIDY_CHECKERS_INPUTS_BDE_TYPES_OPTIONAL_H_ + +/// Mock of `bsl::optional`. +namespace bsl { + +// clang-format off +template struct remove_reference { using type = T; }; +template struct remove_reference { using type = T; }; +template struct remove_reference { using type = T; }; +// clang-format on + +template +using remove_reference_t = typename remove_reference::type; + +template +constexpr T &&forward(remove_reference_t &t) noexcept; + +template +constexpr T &&forward(remove_reference_t &&t) noexcept; + +template +constexpr remove_reference_t &&move(T &&x); + +struct nullopt_t { + constexpr explicit nullopt_t() {} +}; + +constexpr nullopt_t nullopt; + +template +class optional { +public: + constexpr optional() noexcept; + + constexpr optional(nullopt_t) noexcept; + + optional(const optional &) = default; + + optional(optional &&) = default; + + const T &operator*() const &; + T &operator*() &; + const T &&operator*() const &&; + T &&operator*() &&; + + const T *operator->() const; + T *operator->(); + + const T &value() const &; + T &value() &; + const T &&value() const &&; + T &&value() &&; + + constexpr explicit operator bool() const noexcept; + constexpr bool has_value() const noexcept; + + template + constexpr T value_or(U &&v) const &; + template + T value_or(U &&v) &&; + + template + T &emplace(Args &&...args); + + void reset() noexcept; + + void swap(optional &rhs) noexcept; + + template optional &operator=(const U &u); +}; + +} // namespace bsl + +#endif // LLVM_CLANG_TOOLS_EXTRA_TEST_CLANG_TIDY_CHECKERS_INPUTS_BDE_TYPES_OPTIONAL_H_ diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone/unchecked-optional-access.cpp b/clang-tools-extra/test/clang-tidy/checkers/bugprone/unchecked-optional-access.cpp index 13a3ff52f3ebc..3167b85f0e024 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/bugprone/unchecked-optional-access.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone/unchecked-optional-access.cpp @@ -2,6 +2,8 @@ #include "absl/types/optional.h" #include "folly/types/Optional.h" +#include "bde/types/bsl_optional.h" +#include "bde/types/bdlb_nullablevalue.h" void unchecked_value_access(const absl::optional &opt) { opt.value(); @@ -50,6 +52,95 @@ void folly_checked_access(const folly::Optional &opt) { } } +void bsl_optional_unchecked_value_access(const bsl::optional &opt) { + opt.value(); + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: unchecked access to optional value [bugprone-unchecked-optional-access] + + int x = *opt; + // CHECK-MESSAGES: :[[@LINE-1]]:12: warning: unchecked access to optional value [bugprone-unchecked-optional-access] + + if (!opt) { + return; + } + + opt.value(); + x = *opt; +} + +void bsl_optional_checked_access(const bsl::optional &opt) { + if (opt.has_value()) { + opt.value(); + } + if (opt) { + opt.value(); + } +} + +void bsl_optional_value_after_swap(bsl::optional &opt1, bsl::optional &opt2) { + if (opt1) { + opt1.swap(opt2); + opt1.value(); + // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: unchecked access to optional value + } +} + +void nullable_value_unchecked_value_access(const BloombergLP::bdlb::NullableValue &opt) { + opt.value(); + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: unchecked access to optional value [bugprone-unchecked-optional-access] + + int x = *opt; + // CHECK-MESSAGES: :[[@LINE-1]]:12: warning: unchecked access to optional value [bugprone-unchecked-optional-access] + + if (opt.isNull()) { + opt.value(); + } + // CHECK-MESSAGES: :[[@LINE-2]]:5: warning: unchecked access to optional value [bugprone-unchecked-optional-access] + + if (!opt) { + opt.value(); + } + // CHECK-MESSAGES: :[[@LINE-2]]:5: warning: unchecked access to optional value [bugprone-unchecked-optional-access] + + if (!opt) { + return; + } + + opt.value(); + x = *opt; +} + +void nullable_value_optional_checked_access(const BloombergLP::bdlb::NullableValue &opt) { + if (opt.has_value()) { + opt.value(); + } + if (opt) { + opt.value(); + } + if (!opt.isNull()) { + opt.value(); + } +} + +void nullable_value_emplaced(BloombergLP::bdlb::NullableValue &opt) { + opt.value(); + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: unchecked access to optional value [bugprone-unchecked-optional-access] + + opt.emplace(1); + opt.value(); + + opt.reset(); + opt.value(); + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: unchecked access to optional value [bugprone-unchecked-optional-access] +} + +void nullable_value_after_swap(BloombergLP::bdlb::NullableValue &opt1, BloombergLP::bdlb::NullableValue &opt2) { + if (opt1) { + opt1.swap(opt2); + opt1.value(); + // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: unchecked access to optional value + } +} + template void function_template_without_user(const absl::optional &opt) { opt.value(); // no-warning diff --git a/clang-tools-extra/test/clang-tidy/checkers/modernize/use-nullptr.cpp b/clang-tools-extra/test/clang-tidy/checkers/modernize/use-nullptr.cpp index 7bc0925136aa8..2c36349da896c 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/modernize/use-nullptr.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/modernize/use-nullptr.cpp @@ -84,6 +84,31 @@ void test_macro_expansion4() { #undef MY_NULL } +template struct pear { + // If you say __null (or NULL), we assume that T will always be a pointer + // type, so we suggest replacing it with nullptr. (We only check __null here, + // because in this test NULL is defined as 0, but real library implementations + // it is often defined as __null and the check will catch it.) + void f() { x = __null; } + // CHECK-MESSAGES: :[[@LINE-1]]:18: warning: use nullptr [modernize-use-nullptr] + // CHECK-FIXES: x = nullptr; + + // But if you say 0, we allow the possibility that T can be used with integral + // and pointer types, and "0" is an acceptable initializer (even if "{}" might + // be even better). + void g() { y = 0; } + // CHECK-MESSAGES-NOT: :[[@LINE-1]] warning: use nullptr + + T x; + T y; +}; +void test_templated() { + pear p; + p.f(); + p.g(); + dummy(p.x); +} + #define IS_EQ(x, y) if (x != y) return; void test_macro_args() { int i = 0; diff --git a/clang-tools-extra/unittests/include/common/VirtualFileHelper.h b/clang-tools-extra/unittests/include/common/VirtualFileHelper.h index 18b98d2796e67..abe1067495694 100644 --- a/clang-tools-extra/unittests/include/common/VirtualFileHelper.h +++ b/clang-tools-extra/unittests/include/common/VirtualFileHelper.h @@ -60,7 +60,7 @@ class VirtualFileHelper { I != E; ++I) { std::unique_ptr Buf = llvm::MemoryBuffer::getMemBuffer(I->Code); - const FileEntry *Entry = SM.getFileManager().getVirtualFile( + FileEntryRef Entry = SM.getFileManager().getVirtualFileRef( I->FileName, Buf->getBufferSize(), /*ModificationTime=*/0); SM.overrideFileContents(Entry, std::move(Buf)); } diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index e511614fcf245..14907e7db18de 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -43,7 +43,7 @@ code bases. still supporting SPARC V8 CPUs need to specify ``-mcpu=v8`` with a `config file `_. - + - The ``clang-rename`` tool has been removed. C/C++ Language Potentially Breaking Changes @@ -115,7 +115,7 @@ C++ Language Changes - Allow single element access of GCC vector/ext_vector_type object to be constant expression. Supports the `V.xyzw` syntax and other tidbits as seen in OpenCL. Selecting multiple elements is left as a future work. -- Implement `CWG1815 `_. Support lifetime extension +- Implement `CWG1815 `_. Support lifetime extension of temporary created by aggregate initialization using a default member initializer. @@ -336,6 +336,8 @@ Improvements to Clang's diagnostics local variables passed to function calls using the ``[[clang::musttail]]`` attribute. +- Clang now diagnoses cases where a dangling ``GSLOwner`` object is constructed, e.g. ``std::vector v = {std::string()};`` (#GH100526). + Improvements to Clang's time-trace ---------------------------------- @@ -452,6 +454,9 @@ Miscellaneous Clang Crashes Fixed - Fixed ``-ast-dump`` crashes on codes involving ``concept`` with ``-ast-dump-decl-types``. (#GH94928) +- Fixed internal assertion firing when a declaration in the implicit global + module is found through ADL. (GH#109879) + OpenACC Specific Changes ------------------------ diff --git a/clang/include/clang/AST/PropertiesBase.td b/clang/include/clang/AST/PropertiesBase.td index 9b934b20cf255..3057669e3758b 100644 --- a/clang/include/clang/AST/PropertiesBase.td +++ b/clang/include/clang/AST/PropertiesBase.td @@ -39,7 +39,7 @@ class EnumPropertyType : PropertyType {} /// Supports optional values by using the null representation. class RefPropertyType : PropertyType { let PackOptional = - "value ? *value : nullptr"; + "value.value_or(nullptr)"; let UnpackOptional = "value ? std::optional<" # CXXName # ">(value) : std::nullopt"; } diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index f23a148e546fa..53d88482698f0 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -6696,6 +6696,20 @@ When the Owner's lifetime ends, it will consider the Pointer to be dangling. P.getInt(); // P is dangling } +If a template class is annotated with ``[[gsl::Owner]]``, and the first +instantiated template argument is a pointer type (raw pointer, or ``[[gsl::Pointer]]``), +the analysis will consider the instantiated class as a container of the pointer. +When constructing such an object from a GSL owner object, the analysis will +assume that the container holds a pointer to the owner object. Consequently, +when the owner object is destroyed, the pointer will be considered dangling. + +.. code-block:: c++ + + int f() { + std::vector v = {std::string()}; // v holds a dangling pointer. + std::optional o = std::string(); // o holds a dangling pointer. + } + }]; } diff --git a/clang/include/clang/Basic/CodeGenOptions.def b/clang/include/clang/Basic/CodeGenOptions.def index b600198998d85..2893377e5a38b 100644 --- a/clang/include/clang/Basic/CodeGenOptions.def +++ b/clang/include/clang/Basic/CodeGenOptions.def @@ -96,6 +96,7 @@ CODEGENOPT(EmulatedTLS , 1, 0) ///< Set by default or -f[no-]emulated-tls. ENUM_CODEGENOPT(EmbedBitcode, EmbedBitcodeKind, 2, Embed_Off) /// Inline asm dialect, -masm=(att|intel) ENUM_CODEGENOPT(InlineAsmDialect, InlineAsmDialectKind, 1, IAD_ATT) +CODEGENOPT(OutputAsmVariant, 2, 3) ///< Set the asm variant for output (3: unspecified). CODEGENOPT(ForbidGuardVariables , 1, 0) ///< Issue errors if C++ guard variables ///< are required. CODEGENOPT(FunctionSections , 1, 0) ///< Set when -ffunction-sections is enabled. diff --git a/clang/include/clang/Basic/DiagnosticFrontendKinds.td b/clang/include/clang/Basic/DiagnosticFrontendKinds.td index 292e4af1b3b30..a6b17ccb6799d 100644 --- a/clang/include/clang/Basic/DiagnosticFrontendKinds.td +++ b/clang/include/clang/Basic/DiagnosticFrontendKinds.td @@ -109,8 +109,6 @@ def err_fe_expected_clang_command : Error< "expected a clang compiler command">; def err_fe_remap_missing_to_file : Error< "could not remap file '%0' to the contents of file '%1'">, DefaultFatal; -def err_fe_remap_missing_from_file : Error< - "could not remap from missing file '%0'">, DefaultFatal; def err_fe_unable_to_load_pch : Error< "unable to load PCH file">; def err_fe_unable_to_load_plugin : Error< diff --git a/clang/include/clang/Basic/FileManager.h b/clang/include/clang/Basic/FileManager.h index 527bbef24793e..ce4e8c1fbe16e 100644 --- a/clang/include/clang/Basic/FileManager.h +++ b/clang/include/clang/Basic/FileManager.h @@ -84,7 +84,7 @@ class FileManager : public RefCountedBase { /// VirtualDirectoryEntries/VirtualFileEntries above. /// llvm::StringMap, llvm::BumpPtrAllocator> - SeenDirEntries; + SeenDirEntries; /// A cache that maps paths to file entries (either real or /// virtual) we have looked up, or an error that occurred when we looked up @@ -190,6 +190,8 @@ class FileManager : public RefCountedBase { /// /// \param CacheFailure If true and the file does not exist, we'll cache /// the failure to find this file. + LLVM_DEPRECATED("Functions returning DirectoryEntry are deprecated.", + "getOptionalDirectoryRef()") llvm::ErrorOr getDirectory(StringRef DirName, bool CacheFailure = true); @@ -207,6 +209,8 @@ class FileManager : public RefCountedBase { /// /// \param CacheFailure If true and the file does not exist, we'll cache /// the failure to find this file. + LLVM_DEPRECATED("Functions returning FileEntry are deprecated.", + "getOptionalFileRef()") llvm::ErrorOr getFile(StringRef Filename, bool OpenFile = false, bool CacheFailure = true); @@ -269,6 +273,8 @@ class FileManager : public RefCountedBase { FileEntryRef getVirtualFileRef(StringRef Filename, off_t Size, time_t ModificationTime); + LLVM_DEPRECATED("Functions returning FileEntry are deprecated.", + "getVirtualFileRef()") const FileEntry *getVirtualFile(StringRef Filename, off_t Size, time_t ModificationTime); @@ -293,7 +299,7 @@ class FileManager : public RefCountedBase { bool RequiresNullTerminator = true, std::optional MaybeLimit = std::nullopt) const { return getBufferForFileImpl(Filename, - /*FileSize=*/(MaybeLimit ? *MaybeLimit : -1), + /*FileSize=*/MaybeLimit.value_or(-1), isVolatile, RequiresNullTerminator); } diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index edf73d9022b06..da496e30fbb52 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -2418,6 +2418,16 @@ let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2" in { def SVUUNPK_X4 : SInst<"svunpk_{d}[_{3}_x4]", "42.h", "UsUiUl", MergeNone, "aarch64_sve_uunpk_x4", [IsStreaming], []>; } +// +// Multi-vector scaling +// +let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2,fp8" in { + def FSCALE_SINGLE_X2 : Inst<"svscale[_single_{d}_x2]", "22x", "fhd", MergeNone, "aarch64_sme_fp8_scale_single_x2", [IsStreaming],[]>; + def FSCALE_SINGLE_X4 : Inst<"svscale[_single_{d}_x4]", "44x", "fhd", MergeNone, "aarch64_sme_fp8_scale_single_x4", [IsStreaming],[]>; + def FSCALE_X2 : Inst<"svscale[_{d}_x2]", "222.x", "fhd", MergeNone, "aarch64_sme_fp8_scale_x2", [IsStreaming],[]>; + def FSCALE_X4 : Inst<"svscale[_{d}_x4]", "444.x", "fhd", MergeNone, "aarch64_sme_fp8_scale_x4", [IsStreaming],[]>; +} + let SVETargetGuard = "sve2p1", SMETargetGuard = "sme2" in { // == BFloat16 multiply-subtract == def SVBFMLSLB : SInst<"svbfmlslb[_{d}]", "dd$$", "f", MergeNone, "aarch64_sve_bfmlslb", [IsOverloadNone, VerifyRuntimeMode], []>; diff --git a/clang/include/clang/Basic/riscv_vector.td b/clang/include/clang/Basic/riscv_vector.td index 5ef9602433697..6e57e51793a71 100644 --- a/clang/include/clang/Basic/riscv_vector.td +++ b/clang/include/clang/Basic/riscv_vector.td @@ -361,7 +361,11 @@ multiclass RVVNonTupleVCreateBuiltin src_lmul_list> { defvar src_s = FixedVString.S; def vcreate # src_v # dst_v : RVVBuiltin; + "csilfd">; + let RequiredFeatures = ["Zvfhmin"] in + def vcreate_h # src_v # dst_v : RVVBuiltin; let RequiredFeatures = ["Zvfbfmin"] in def vcreate_bf16 # src_v # dst_v : RVVBuiltin; + def vundefined : RVVBuiltin<"v", "v", "csilfd">; + let RequiredFeatures = ["Zvfhmin"] in + def vundefined_h : RVVBuiltin<"v", "v", "x">; let RequiredFeatures = ["Zvfbfmin"] in def vundefined_bf16 : RVVBuiltin<"v", "v", "y">; def vundefined_u : RVVBuiltin<"Uv", "Uv", "csil">; @@ -2482,7 +2488,9 @@ let HasMasked = false, HasVL = false, IRName = "" in { foreach nf = NFList in { let NF = nf in { defvar T = "(Tuple:" # nf # ")"; - def : RVVBuiltin; + def : RVVBuiltin; + let RequiredFeatures = ["Zvfhmin"] in + def : RVVBuiltin; let RequiredFeatures = ["Zvfbfmin"] in def : RVVBuiltin; def : RVVBuiltin; @@ -2502,7 +2510,10 @@ let HasMasked = false, HasVL = false, IRName = "" in { foreach dst_lmul = ["(SFixedLog2LMUL:-3)", "(SFixedLog2LMUL:-2)", "(SFixedLog2LMUL:-1)", "(SFixedLog2LMUL:0)", "(SFixedLog2LMUL:1)", "(SFixedLog2LMUL:2)"] in { def vlmul_trunc # dst_lmul : RVVBuiltin<"v" # dst_lmul # "v", - dst_lmul # "vv", "csilxfd", dst_lmul # "v">; + dst_lmul # "vv", "csilfd", dst_lmul # "v">; + let RequiredFeatures = ["Zvfhmin"] in + def vlmul_trunc_h # dst_lmul : RVVBuiltin<"v" # dst_lmul # "v", + dst_lmul # "vv", "x", dst_lmul # "v">; let RequiredFeatures = ["Zvfbfmin"] in def vlmul_trunc_bf16 # dst_lmul : RVVBuiltin<"v" # dst_lmul # "v", dst_lmul # "vv", "y", dst_lmul # "v">; @@ -2523,7 +2534,10 @@ let HasMasked = false, HasVL = false, IRName = "" in { foreach dst_lmul = ["(LFixedLog2LMUL:-2)", "(LFixedLog2LMUL:-1)", "(LFixedLog2LMUL:-0)", "(LFixedLog2LMUL:1)", "(LFixedLog2LMUL:2)", "(LFixedLog2LMUL:3)"] in { def vlmul_ext # dst_lmul : RVVBuiltin<"v" # dst_lmul # "v", - dst_lmul # "vv", "csilxfd", dst_lmul # "v">; + dst_lmul # "vv", "csilfd", dst_lmul # "v">; + let RequiredFeatures = ["Zvfhmin"] in + def vlmul_ext_h # dst_lmul : RVVBuiltin<"v" # dst_lmul # "v", + dst_lmul # "vv", "x", dst_lmul # "v">; let RequiredFeatures = ["Zvfbfmin"] in def vlmul_ext_bf16 # dst_lmul : RVVBuiltin<"v" # dst_lmul # "v", dst_lmul # "vv", "y", dst_lmul # "v">; @@ -2555,14 +2569,18 @@ let HasMasked = false, HasVL = false, IRName = "" in { } }] in { foreach dst_lmul = ["(SFixedLog2LMUL:0)", "(SFixedLog2LMUL:1)", "(SFixedLog2LMUL:2)"] in { - def : RVVBuiltin<"v" # dst_lmul # "v", dst_lmul # "vvKz", "csilxfd", dst_lmul # "v">; + def : RVVBuiltin<"v" # dst_lmul # "v", dst_lmul # "vvKz", "csilfd", dst_lmul # "v">; + let RequiredFeatures = ["Zvfhmin"] in + def : RVVBuiltin<"v" # dst_lmul # "v", dst_lmul # "vvKz", "x", dst_lmul # "v">; let RequiredFeatures = ["Zvfbfmin"] in def : RVVBuiltin<"v" # dst_lmul # "v", dst_lmul # "vvKz", "y", dst_lmul # "v">; def : RVVBuiltin<"Uv" # dst_lmul # "Uv", dst_lmul # "UvUvKz", "csil", dst_lmul # "Uv">; } foreach nf = NFList in { defvar T = "(Tuple:" # nf # ")"; - def : RVVBuiltin; + def : RVVBuiltin; + let RequiredFeatures = ["Zvfhmin"] in + def : RVVBuiltin; let RequiredFeatures = ["Zvfbfmin"] in def : RVVBuiltin; def : RVVBuiltin; @@ -2592,14 +2610,18 @@ let HasMasked = false, HasVL = false, IRName = "" in { } }] in { foreach dst_lmul = ["(LFixedLog2LMUL:1)", "(LFixedLog2LMUL:2)", "(LFixedLog2LMUL:3)"] in { - def : RVVBuiltin<"v" # dst_lmul # "v", dst_lmul # "v" # dst_lmul # "vKzv", "csilxfd">; + def : RVVBuiltin<"v" # dst_lmul # "v", dst_lmul # "v" # dst_lmul # "vKzv", "csilfd">; + let RequiredFeatures = ["Zvfhmin"] in + def : RVVBuiltin<"v" # dst_lmul # "v", dst_lmul # "v" # dst_lmul # "vKzv", "x">; let RequiredFeatures = ["Zvfbfmin"] in def : RVVBuiltin<"v" # dst_lmul # "v", dst_lmul # "v" # dst_lmul # "vKzv", "y">; def : RVVBuiltin<"Uv" # dst_lmul # "Uv", dst_lmul # "Uv" # dst_lmul #"UvKzUv", "csil">; } foreach nf = NFList in { defvar T = "(Tuple:" # nf # ")"; - def : RVVBuiltin<"v" # T # "v", T # "v" # T # "vKzv", "csilxfd">; + def : RVVBuiltin<"v" # T # "v", T # "v" # T # "vKzv", "csilfd">; + let RequiredFeatures = ["Zvfhmin"] in + def : RVVBuiltin<"v" # T # "v", T # "v" # T # "vKzv", "x">; let RequiredFeatures = ["Zvfbfmin"] in def : RVVBuiltin<"v" # T # "v", T # "v" # T # "vKzv", "y">; def : RVVBuiltin<"Uv" # T # "Uv", T # "Uv" # T # "UvKzUv", "csil">; @@ -2646,7 +2668,9 @@ let HasMasked = false, HasVL = false, IRName = "" in { defvar T = "(Tuple:" # nf # ")"; defvar V = VString.S; defvar UV = VString.S; - def : RVVBuiltin; + def : RVVBuiltin; + let RequiredFeatures = ["Zvfhmin"] in + def : RVVBuiltin; let RequiredFeatures = ["Zvfbfmin"] in def : RVVBuiltin; def : RVVBuiltin; diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 002f60350543d..23bd686a85f52 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -7217,6 +7217,9 @@ def fuse_ctor_homing: Flag<["-"], "fuse-ctor-homing">, def as_secure_log_file : Separate<["-"], "as-secure-log-file">, HelpText<"Emit .secure_log_unique directives to this filename.">, MarshallingInfoString>; +def output_asm_variant : Joined<["--"], "output-asm-variant=">, + HelpText<"Select the asm variant (integer) to use for output (3: unspecified)">, + MarshallingInfoInt, "3">; } // let Visibility = [CC1Option, CC1AsOption] @@ -8307,8 +8310,6 @@ def filetype : Separate<["-"], "filetype">, HelpText<"Specify the output file type ('asm', 'null', or 'obj')">; // Transliterate Options -def output_asm_variant : Separate<["-"], "output-asm-variant">, - HelpText<"Select the asm variant index to use for output">; def show_encoding : Flag<["-"], "show-encoding">, HelpText<"Show instruction encoding information in transliterate mode">; def show_inst : Flag<["-"], "show-inst">, diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h index 4410df296d8ef..5be33ae0ed1b9 100644 --- a/clang/include/clang/Serialization/ASTBitCodes.h +++ b/clang/include/clang/Serialization/ASTBitCodes.h @@ -724,6 +724,12 @@ enum ASTRecordTypes { /// Record code for vtables to emit. VTABLES_TO_EMIT = 70, + + /// Record code for the FunctionDecl to lambdas mapping. These lambdas have to + /// be loaded right after the function they belong to. It is required to have + /// canonical declaration for the lambda class from the same module as + /// enclosing function. + FUNCTION_DECL_TO_LAMBDAS_MAP = 71, }; /// Record types used within a source manager block. diff --git a/clang/include/clang/Serialization/ASTReader.h b/clang/include/clang/Serialization/ASTReader.h index 898f4392465fd..c1843218a4b8b 100644 --- a/clang/include/clang/Serialization/ASTReader.h +++ b/clang/include/clang/Serialization/ASTReader.h @@ -532,6 +532,18 @@ class ASTReader /// namespace as if it is not delayed. DelayedNamespaceOffsetMapTy DelayedNamespaceOffsetMap; + /// Mapping from FunctionDecl IDs to the corresponding lambda IDs. + /// + /// These lambdas have to be loaded right after the function they belong to. + /// It is required to have canonical declaration for lambda class from the + /// same module as enclosing function. This is required to correctly resolve + /// captured variables in the lambda. Without this, due to lazy + /// deserialization, canonical declarations for the function and lambdas can + /// be selected from different modules and DeclRefExprs may refer to the AST + /// nodes that don't exist in the function. + llvm::DenseMap> + FunctionToLambdasMap; + struct PendingUpdateRecord { Decl *D; GlobalDeclID ID; diff --git a/clang/include/clang/Serialization/ASTWriter.h b/clang/include/clang/Serialization/ASTWriter.h index 10a50b711043a..760866fd9de93 100644 --- a/clang/include/clang/Serialization/ASTWriter.h +++ b/clang/include/clang/Serialization/ASTWriter.h @@ -233,6 +233,14 @@ class ASTWriter : public ASTDeserializationListener, /// instead of comparing the result of `getDeclID()` or `GetDeclRef()`. llvm::SmallPtrSet PredefinedDecls; + /// Mapping from FunctionDecl to the list of lambda IDs inside the function. + /// + /// These lambdas have to be loaded right after the function they belong to. + /// In order to have canonical declaration for lambda class from the same + /// module as enclosing function during deserialization. + llvm::DenseMap> + FunctionToLambdasMap; + /// Offset of each declaration in the bitstream, indexed by /// the declaration's ID. std::vector DeclOffsets; diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h index 2f6cd481fd636..eef7a54f03bf1 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h @@ -326,14 +326,14 @@ class ProgramState : public llvm::FoldingSetNode { /// \param ITraits information about special handling for particular regions /// or symbols. [[nodiscard]] ProgramStateRef - invalidateRegions(ArrayRef Regions, const Expr *E, + invalidateRegions(ArrayRef Regions, const Stmt *S, unsigned BlockCount, const LocationContext *LCtx, bool CausesPointerEscape, InvalidatedSymbols *IS = nullptr, const CallEvent *Call = nullptr, RegionAndSymbolInvalidationTraits *ITraits = nullptr) const; [[nodiscard]] ProgramStateRef - invalidateRegions(ArrayRef Values, const Expr *E, unsigned BlockCount, + invalidateRegions(ArrayRef Values, const Stmt *S, unsigned BlockCount, const LocationContext *LCtx, bool CausesPointerEscape, InvalidatedSymbols *IS = nullptr, const CallEvent *Call = nullptr, diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SValBuilder.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SValBuilder.h index 6eedaf0544559..ec2b2b2456948 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SValBuilder.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SValBuilder.h @@ -202,11 +202,9 @@ class SValBuilder { const Expr *expr, const LocationContext *LCtx, unsigned count); - DefinedOrUnknownSVal conjureSymbolVal(const void *symbolTag, - const Expr *expr, + DefinedOrUnknownSVal conjureSymbolVal(const void *symbolTag, const Stmt *S, const LocationContext *LCtx, - QualType type, - unsigned count); + QualType type, unsigned count); DefinedOrUnknownSVal conjureSymbolVal(const Stmt *stmt, const LocationContext *LCtx, QualType type, diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/Store.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/Store.h index e08d5e104e9c0..332855a3c9c45 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/Store.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/Store.h @@ -215,7 +215,7 @@ class StoreManager { /// /// \param[in] store The initial store. /// \param[in] Values The values to invalidate. - /// \param[in] E The current statement being evaluated. Used to conjure + /// \param[in] S The current statement being evaluated. Used to conjure /// symbols to mark the values of invalidated regions. /// \param[in] Count The current block count. Used to conjure /// symbols to mark the values of invalidated regions. @@ -233,7 +233,7 @@ class StoreManager { /// even if they do not currently have bindings. Pass \c NULL if this /// information will not be used. virtual StoreRef invalidateRegions( - Store store, ArrayRef Values, const Expr *Ex, unsigned Count, + Store store, ArrayRef Values, const Stmt *S, unsigned Count, const LocationContext *LCtx, const CallEvent *Call, InvalidatedSymbols &IS, RegionAndSymbolInvalidationTraits &ITraits, InvalidatedRegions *TopLevelRegions, InvalidatedRegions *Invalidated) = 0; diff --git a/clang/lib/APINotes/APINotesYAMLCompiler.cpp b/clang/lib/APINotes/APINotesYAMLCompiler.cpp index 16fd59244086f..f72a1d65b5456 100644 --- a/clang/lib/APINotes/APINotesYAMLCompiler.cpp +++ b/clang/lib/APINotes/APINotesYAMLCompiler.cpp @@ -757,8 +757,8 @@ class YAMLConverter { OutInfo.addTypeInfo(idx++, N); audited = Nullability.size() > 0 || ReturnNullability; if (audited) - OutInfo.addTypeInfo(0, ReturnNullability ? *ReturnNullability - : NullabilityKind::NonNull); + OutInfo.addTypeInfo(0, + ReturnNullability.value_or(NullabilityKind::NonNull)); if (!audited) return; OutInfo.NullabilityAudited = audited; diff --git a/clang/lib/AST/ASTImporter.cpp b/clang/lib/AST/ASTImporter.cpp index bba97e289da2e..60175f1ccb342 100644 --- a/clang/lib/AST/ASTImporter.cpp +++ b/clang/lib/AST/ASTImporter.cpp @@ -10020,8 +10020,8 @@ Expected ASTImporter::Import(FileID FromID, bool IsBuiltin) { ToIncludeLocOrFakeLoc = ToSM.getLocForStartOfFile(ToSM.getMainFileID()); if (Cache->OrigEntry && Cache->OrigEntry->getDir()) { - // FIXME: We probably want to use getVirtualFile(), so we don't hit the - // disk again + // FIXME: We probably want to use getVirtualFileRef(), so we don't hit + // the disk again // FIXME: We definitely want to re-use the existing MemoryBuffer, rather // than mmap the files several times. auto Entry = diff --git a/clang/lib/AST/ByteCode/Compiler.cpp b/clang/lib/AST/ByteCode/Compiler.cpp index 754cd0db9868b..e54b6568d7060 100644 --- a/clang/lib/AST/ByteCode/Compiler.cpp +++ b/clang/lib/AST/ByteCode/Compiler.cpp @@ -725,9 +725,7 @@ bool Compiler::VisitParenExpr(const ParenExpr *E) { template bool Compiler::VisitBinaryOperator(const BinaryOperator *BO) { // Need short-circuiting for these. - if (BO->getType()->isVectorType()) - return this->VisitVectorBinOp(BO); - if (BO->isLogicalOp()) + if (BO->isLogicalOp() && !BO->getType()->isVectorType()) return this->VisitLogicalBinOp(BO); const Expr *LHS = BO->getLHS(); @@ -746,6 +744,8 @@ bool Compiler::VisitBinaryOperator(const BinaryOperator *BO) { if (BO->getType()->isAnyComplexType()) return this->VisitComplexBinOp(BO); + if (BO->getType()->isVectorType()) + return this->VisitVectorBinOp(BO); if ((LHS->getType()->isAnyComplexType() || RHS->getType()->isAnyComplexType()) && BO->isComparisonOp()) @@ -1264,6 +1264,8 @@ bool Compiler::VisitComplexBinOp(const BinaryOperator *E) { template bool Compiler::VisitVectorBinOp(const BinaryOperator *E) { + assert(!E->isCommaOp() && + "Comma op should be handled in VisitBinaryOperator"); assert(E->getType()->isVectorType()); assert(E->getLHS()->getType()->isVectorType()); assert(E->getRHS()->getType()->isVectorType()); @@ -1282,9 +1284,8 @@ bool Compiler::VisitVectorBinOp(const BinaryOperator *E) { ? BinaryOperator::getOpForCompoundAssignment(E->getOpcode()) : E->getOpcode(); - // The LHS and RHS of a comparison operator must have the same type. So we - // just use LHS vector element type here. PrimType ElemT = this->classifyVectorElementType(LHS->getType()); + PrimType RHSElemT = this->classifyVectorElementType(RHS->getType()); PrimType ResultElemT = this->classifyVectorElementType(E->getType()); // Evaluate LHS and save value to LHSOffset. @@ -1312,7 +1313,7 @@ bool Compiler::VisitVectorBinOp(const BinaryOperator *E) { PrimType PromotT = classifyPrim(PromotTy); PrimType OpT = NeedIntPromot ? PromotT : ElemT; - auto getElem = [=](unsigned Offset, unsigned Index) { + auto getElem = [=](unsigned Offset, PrimType ElemT, unsigned Index) { if (!this->emitGetLocal(PT_Ptr, Offset, E)) return false; if (!this->emitArrayElemPop(ElemT, Index, E)) @@ -1342,9 +1343,9 @@ bool Compiler::VisitVectorBinOp(const BinaryOperator *E) { } for (unsigned I = 0; I != VecTy->getNumElements(); ++I) { - if (!getElem(LHSOffset, I)) + if (!getElem(LHSOffset, ElemT, I)) return false; - if (!getElem(RHSOffset, I)) + if (!getElem(RHSOffset, RHSElemT, I)) return false; switch (Op) { case BO_Add: @@ -1372,11 +1373,11 @@ bool Compiler::VisitVectorBinOp(const BinaryOperator *E) { return false; break; case BO_Shl: - if (!this->emitShl(OpT, ElemT, E)) + if (!this->emitShl(OpT, RHSElemT, E)) return false; break; case BO_Shr: - if (!this->emitShr(OpT, ElemT, E)) + if (!this->emitShr(OpT, RHSElemT, E)) return false; break; case BO_EQ: diff --git a/clang/lib/AST/ByteCode/Descriptor.cpp b/clang/lib/AST/ByteCode/Descriptor.cpp index 170203fe81877..05ece907af42f 100644 --- a/clang/lib/AST/ByteCode/Descriptor.cpp +++ b/clang/lib/AST/ByteCode/Descriptor.cpp @@ -389,12 +389,17 @@ Descriptor::Descriptor(const DeclTy &D) } QualType Descriptor::getType() const { - if (const auto *E = asExpr()) - return E->getType(); if (const auto *D = asValueDecl()) return D->getType(); - if (const auto *T = dyn_cast(asDecl())) + if (const auto *T = dyn_cast_if_present(asDecl())) return QualType(T->getTypeForDecl(), 0); + + // The Source sometimes has a different type than the once + // we really save. Try to consult the Record first. + if (isRecord()) + return QualType(ElemRecord->getDecl()->getTypeForDecl(), 0); + if (const auto *E = asExpr()) + return E->getType(); llvm_unreachable("Invalid descriptor type"); } diff --git a/clang/lib/AST/ByteCode/Interp.cpp b/clang/lib/AST/ByteCode/Interp.cpp index 8b578ccbeb679..2f4a05a85753c 100644 --- a/clang/lib/AST/ByteCode/Interp.cpp +++ b/clang/lib/AST/ByteCode/Interp.cpp @@ -1136,6 +1136,7 @@ bool Call(InterpState &S, CodePtr OpPC, const Function *Func, InterpFrame *FrameBefore = S.Current; S.Current = NewFrame.get(); + InterpStateCCOverride CCOverride(S, Func->getDecl()->isImmediateFunction()); APValue CallResult; // Note that we cannot assert(CallResult.hasValue()) here since // Ret() above only sets the APValue if the curent frame doesn't @@ -1296,10 +1297,6 @@ bool CheckNewTypeMismatch(InterpState &S, CodePtr OpPC, const Expr *E, if (!InvalidNewDeleteExpr(S, OpPC, E)) return false; - // Assume proper types in std functions. - if (S.Current->isStdFunction()) - return true; - const auto *NewExpr = cast(E); QualType StorageType = Ptr.getType(); diff --git a/clang/lib/AST/ByteCode/Interp.h b/clang/lib/AST/ByteCode/Interp.h index 1f4c302b26197..b029399a1554b 100644 --- a/clang/lib/AST/ByteCode/Interp.h +++ b/clang/lib/AST/ByteCode/Interp.h @@ -2573,6 +2573,7 @@ inline bool ArrayElem(InterpState &S, CodePtr OpPC, uint32_t Index) { if (!CheckLoad(S, OpPC, Ptr)) return false; + assert(Ptr.atIndex(Index).getFieldDesc()->getPrimType() == Name); S.Stk.push(Ptr.atIndex(Index).deref()); return true; } @@ -2584,6 +2585,7 @@ inline bool ArrayElemPop(InterpState &S, CodePtr OpPC, uint32_t Index) { if (!CheckLoad(S, OpPC, Ptr)) return false; + assert(Ptr.atIndex(Index).getFieldDesc()->getPrimType() == Name); S.Stk.push(Ptr.atIndex(Index).deref()); return true; } diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 68710f67be200..82ed6d9e7a2ff 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -136,16 +136,17 @@ static bool retPrimValue(InterpState &S, CodePtr OpPC, APValue &Result, static bool interp__builtin_is_constant_evaluated(InterpState &S, CodePtr OpPC, const InterpFrame *Frame, const CallExpr *Call) { + unsigned Depth = S.Current->getDepth(); + auto isStdCall = [](const FunctionDecl *F) -> bool { + return F && F->isInStdNamespace() && F->getIdentifier() && + F->getIdentifier()->isStr("is_constant_evaluated"); + }; + const InterpFrame *Caller = Frame->Caller; // The current frame is the one for __builtin_is_constant_evaluated. // The one above that, potentially the one for std::is_constant_evaluated(). if (S.inConstantContext() && !S.checkingPotentialConstantExpression() && - Frame->Caller && S.getEvalStatus().Diag) { - auto isStdCall = [](const FunctionDecl *F) -> bool { - return F && F->isInStdNamespace() && F->getIdentifier() && - F->getIdentifier()->isStr("is_constant_evaluated"); - }; - const InterpFrame *Caller = Frame->Caller; - + S.getEvalStatus().Diag && + (Depth == 1 || (Depth == 2 && isStdCall(Caller->getCallee())))) { if (Caller->Caller && isStdCall(Caller->getCallee())) { const Expr *E = Caller->Caller->getExpr(Caller->getRetPC()); S.report(E->getExprLoc(), diff --git a/clang/lib/AST/ByteCode/InterpState.cpp b/clang/lib/AST/ByteCode/InterpState.cpp index 4ea05305540ee..287c3bd3bca3a 100644 --- a/clang/lib/AST/ByteCode/InterpState.cpp +++ b/clang/lib/AST/ByteCode/InterpState.cpp @@ -19,6 +19,13 @@ InterpState::InterpState(State &Parent, Program &P, InterpStack &Stk, Context &Ctx, SourceMapper *M) : Parent(Parent), M(M), P(P), Stk(Stk), Ctx(Ctx), Current(nullptr) {} +bool InterpState::inConstantContext() const { + if (ConstantContextOverride) + return *ConstantContextOverride; + + return Parent.InConstantContext; +} + InterpState::~InterpState() { while (Current) { InterpFrame *Next = Current->Caller; diff --git a/clang/lib/AST/ByteCode/InterpState.h b/clang/lib/AST/ByteCode/InterpState.h index 4b7371450cc98..2a1311c86a2f2 100644 --- a/clang/lib/AST/ByteCode/InterpState.h +++ b/clang/lib/AST/ByteCode/InterpState.h @@ -77,7 +77,7 @@ class InterpState final : public State, public SourceMapper { bool noteUndefinedBehavior() override { return Parent.noteUndefinedBehavior(); } - bool inConstantContext() const { return Parent.InConstantContext; } + bool inConstantContext() const; bool hasActiveDiagnostic() override { return Parent.hasActiveDiagnostic(); } void setActiveDiagnostic(bool Flag) override { Parent.setActiveDiagnostic(Flag); @@ -116,6 +116,7 @@ class InterpState final : public State, public SourceMapper { private: friend class EvaluationResult; + friend class InterpStateCCOverride; /// AST Walker state. State &Parent; /// Dead block chain. @@ -124,6 +125,7 @@ class InterpState final : public State, public SourceMapper { SourceMapper *M; /// Allocator used for dynamic allocations performed via the program. DynamicAllocator Alloc; + std::optional ConstantContextOverride; public: /// Reference to the module containing all bytecode. @@ -144,6 +146,26 @@ class InterpState final : public State, public SourceMapper { SeenGlobalTemporaries; }; +class InterpStateCCOverride final { +public: + InterpStateCCOverride(InterpState &Ctx, bool Value) + : Ctx(Ctx), OldCC(Ctx.ConstantContextOverride) { + // We only override this if the new value is true. + Enabled = Value; + if (Enabled) + Ctx.ConstantContextOverride = Value; + } + ~InterpStateCCOverride() { + if (Enabled) + Ctx.ConstantContextOverride = OldCC; + } + +private: + bool Enabled; + InterpState &Ctx; + std::optional OldCC; +}; + } // namespace interp } // namespace clang diff --git a/clang/lib/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.cpp b/clang/lib/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.cpp index 0707aa662e4cc..70ffe92753e05 100644 --- a/clang/lib/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.cpp +++ b/clang/lib/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.cpp @@ -38,10 +38,25 @@ namespace clang { namespace dataflow { -static bool isTopLevelNamespaceWithName(const NamespaceDecl &NS, - llvm::StringRef Name) { - return NS.getDeclName().isIdentifier() && NS.getName() == Name && - NS.getParent() != nullptr && NS.getParent()->isTranslationUnit(); +// Note: the Names appear in reverse order. E.g., to check +// if NS is foo::bar::, call isFullyQualifiedNamespaceEqualTo(NS, "bar", "foo") +template +static bool isFullyQualifiedNamespaceEqualTo(const NamespaceDecl &NS, + llvm::StringRef Name, + NameTypes... Names) { + if (!(NS.getDeclName().isIdentifier() && NS.getName() == Name && + NS.getParent() != nullptr)) + return false; + + if constexpr (sizeof...(NameTypes) > 0) { + if (NS.getParent()->isTranslationUnit()) + return false; + if (const auto *NextNS = dyn_cast_or_null(NS.getParent())) + return isFullyQualifiedNamespaceEqualTo(*NextNS, Names...); + return false; + } else { + return NS.getParent()->isTranslationUnit(); + } } static bool hasOptionalClassName(const CXXRecordDecl &RD) { @@ -50,15 +65,23 @@ static bool hasOptionalClassName(const CXXRecordDecl &RD) { if (RD.getName() == "optional") { if (const auto *N = dyn_cast_or_null(RD.getDeclContext())) - return N->isStdNamespace() || isTopLevelNamespaceWithName(*N, "absl"); + return N->isStdNamespace() || + isFullyQualifiedNamespaceEqualTo(*N, "absl") || + isFullyQualifiedNamespaceEqualTo(*N, "bsl"); return false; } if (RD.getName() == "Optional") { // Check whether namespace is "::base" or "::folly". const auto *N = dyn_cast_or_null(RD.getDeclContext()); - return N != nullptr && (isTopLevelNamespaceWithName(*N, "base") || - isTopLevelNamespaceWithName(*N, "folly")); + return N != nullptr && (isFullyQualifiedNamespaceEqualTo(*N, "base") || + isFullyQualifiedNamespaceEqualTo(*N, "folly")); + } + + if (RD.getName() == "NullableValue") { + const auto *N = dyn_cast_or_null(RD.getDeclContext()); + return N != nullptr && + isFullyQualifiedNamespaceEqualTo(*N, "bdlb", "BloombergLP"); } return false; @@ -195,22 +218,25 @@ auto isOptionalOperatorCallWithName( } auto isMakeOptionalCall() { - return callExpr(callee(functionDecl(hasAnyName( - "std::make_optional", "base::make_optional", - "absl::make_optional", "folly::make_optional"))), - hasOptionalType()); + return callExpr( + callee(functionDecl(hasAnyName( + "std::make_optional", "base::make_optional", "absl::make_optional", + "folly::make_optional", "bsl::make_optional"))), + hasOptionalType()); } auto nulloptTypeDecl() { return namedDecl(hasAnyName("std::nullopt_t", "absl::nullopt_t", - "base::nullopt_t", "folly::None")); + "base::nullopt_t", "folly::None", + "bsl::nullopt_t")); } auto hasNulloptType() { return hasType(nulloptTypeDecl()); } auto inPlaceClass() { return recordDecl(hasAnyName("std::in_place_t", "absl::in_place_t", - "base::in_place_t", "folly::in_place_t")); + "base::in_place_t", "folly::in_place_t", + "bsl::in_place_t")); } auto isOptionalNulloptConstructor() { @@ -415,6 +441,15 @@ void transferOptionalHasValueCall(const CXXMemberCallExpr *CallExpr, } } +void transferOptionalIsNullCall(const CXXMemberCallExpr *CallExpr, + const MatchFinder::MatchResult &, + LatticeTransferState &State) { + if (auto *HasValueVal = getHasValue( + State.Env, getImplicitObjectLocation(*CallExpr, State.Env))) { + State.Env.setValue(*CallExpr, State.Env.makeNot(*HasValueVal)); + } +} + /// `ModelPred` builds a logical formula relating the predicate in /// `ValueOrPredExpr` to the optional's `has_value` property. void transferValueOrImpl( @@ -784,6 +819,12 @@ auto buildTransferMatchSwitch() { isOptionalMemberCallWithNameMatcher(hasName("operator bool")), transferOptionalHasValueCall) + // NullableValue::isNull + // Only NullableValue has isNull + .CaseOfCFGStmt( + isOptionalMemberCallWithNameMatcher(hasName("isNull")), + transferOptionalIsNullCall) + // optional::emplace .CaseOfCFGStmt( isOptionalMemberCallWithNameMatcher(hasName("emplace")), diff --git a/clang/lib/Basic/TargetID.cpp b/clang/lib/Basic/TargetID.cpp index 3c06d9bad1dc0..fa1bfec2aacb9 100644 --- a/clang/lib/Basic/TargetID.cpp +++ b/clang/lib/Basic/TargetID.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "clang/Basic/TargetID.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" #include "llvm/Support/raw_ostream.h" #include "llvm/TargetParser/TargetParser.h" diff --git a/clang/lib/Basic/Targets/BPF.cpp b/clang/lib/Basic/Targets/BPF.cpp index 931f407ecb0d7..f4684765b7ffb 100644 --- a/clang/lib/Basic/Targets/BPF.cpp +++ b/clang/lib/Basic/Targets/BPF.cpp @@ -38,6 +38,7 @@ void BPFTargetInfo::getTargetDefines(const LangOptions &Opts, Builder.defineMacro("__BPF_FEATURE_ADDR_SPACE_CAST"); Builder.defineMacro("__BPF_FEATURE_MAY_GOTO"); + Builder.defineMacro("__BPF_FEATURE_ATOMIC_MEM_ORDERING"); if (CPU.empty()) CPU = "v3"; diff --git a/clang/lib/Basic/Targets/SPIR.h b/clang/lib/Basic/Targets/SPIR.h index 37cf9d7921bac..8a26db7971cba 100644 --- a/clang/lib/Basic/Targets/SPIR.h +++ b/clang/lib/Basic/Targets/SPIR.h @@ -335,6 +335,9 @@ class LLVM_LIBRARY_VISIBILITY SPIRV32TargetInfo : public BaseSPIRVTargetInfo { PointerWidth = PointerAlign = 32; SizeType = TargetInfo::UnsignedInt; PtrDiffType = IntPtrType = TargetInfo::SignedInt; + // SPIR-V has core support for atomic ops, and Int32 is always available; + // we take the maximum because it's possible the Host supports wider types. + MaxAtomicInlineWidth = std::max(MaxAtomicInlineWidth, 32); resetDataLayout("e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-" "v96:128-v192:256-v256:256-v512:512-v1024:1024-G1"); } @@ -356,6 +359,9 @@ class LLVM_LIBRARY_VISIBILITY SPIRV64TargetInfo : public BaseSPIRVTargetInfo { PointerWidth = PointerAlign = 64; SizeType = TargetInfo::UnsignedLong; PtrDiffType = IntPtrType = TargetInfo::SignedLong; + // SPIR-V has core support for atomic ops, and Int64 is always available; + // we take the maximum because it's possible the Host supports wider types. + MaxAtomicInlineWidth = std::max(MaxAtomicInlineWidth, 64); resetDataLayout("e-i64:64-v16:16-v24:32-v32:32-v48:64-" "v96:128-v192:256-v256:256-v512:512-v1024:1024-G1"); } diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index fa49763e312f1..916c92adb8930 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -509,6 +509,8 @@ static bool initTargetOptions(DiagnosticsEngine &Diags, Options.MCOptions.X86RelaxRelocations = CodeGenOpts.X86RelaxRelocations; Options.MCOptions.CompressDebugSections = CodeGenOpts.getCompressDebugSections(); + if (CodeGenOpts.OutputAsmVariant != 3) // 3 (default): not specified + Options.MCOptions.OutputAsmVariant = CodeGenOpts.OutputAsmVariant; Options.MCOptions.ABIName = TargetOpts.ABI; for (const auto &Entry : HSOpts.UserEntries) if (!Entry.IsFramework && diff --git a/clang/lib/CodeGen/CGAtomic.cpp b/clang/lib/CodeGen/CGAtomic.cpp index fbe9569e50ef6..a2a87e012b8b2 100644 --- a/clang/lib/CodeGen/CGAtomic.cpp +++ b/clang/lib/CodeGen/CGAtomic.cpp @@ -766,8 +766,19 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *Expr, Address Dest, // LLVM atomic instructions always have synch scope. If clang atomic // expression has no scope operand, use default LLVM synch scope. if (!ScopeModel) { + llvm::SyncScope::ID SS; + if (CGF.getLangOpts().OpenCL) + // OpenCL approach is: "The functions that do not have memory_scope + // argument have the same semantics as the corresponding functions with + // the memory_scope argument set to memory_scope_device." See ref.: + // https://registry.khronos.org/OpenCL/specs/3.0-unified/html/OpenCL_C.html#atomic-functions + SS = CGF.getTargetHooks().getLLVMSyncScopeID(CGF.getLangOpts(), + SyncScope::OpenCLDevice, + Order, CGF.getLLVMContext()); + else + SS = llvm::SyncScope::System; EmitAtomicOp(CGF, Expr, Dest, Ptr, Val1, Val2, IsWeak, FailureOrder, Size, - Order, CGF.CGM.getLLVMContext().getOrInsertSyncScopeID("")); + Order, SS); return; } diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 942468204f054..249aead33ad73 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -687,12 +687,31 @@ static Value *EmitSignBit(CodeGenFunction &CGF, Value *V) { return CGF.Builder.CreateICmpSLT(V, Zero); } +/// Checks no arguments or results are passed indirectly in the ABI (i.e. via a +/// hidden pointer). This is used to check annotating FP libcalls (that could +/// set `errno`) with "int" TBAA metadata is safe. If any floating-point +/// arguments are passed indirectly, setup for the call could be incorrectly +/// optimized out. +static bool HasNoIndirectArgumentsOrResults(CGFunctionInfo const &FnInfo) { + auto IsIndirect = [&](ABIArgInfo const &info) { + return info.isIndirect() || info.isIndirectAliased() || info.isInAlloca(); + }; + return !IsIndirect(FnInfo.getReturnInfo()) && + llvm::none_of(FnInfo.arguments(), + [&](CGFunctionInfoArgInfo const &ArgInfo) { + return IsIndirect(ArgInfo.info); + }); +} + static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *FD, const CallExpr *E, llvm::Constant *calleeValue) { CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E); CGCallee callee = CGCallee::forDirect(calleeValue, GlobalDecl(FD)); + llvm::CallBase *callOrInvoke = nullptr; + CGFunctionInfo const *FnInfo = nullptr; RValue Call = - CGF.EmitCall(E->getCallee()->getType(), callee, E, ReturnValueSlot()); + CGF.EmitCall(E->getCallee()->getType(), callee, E, ReturnValueSlot(), + /*Chain=*/nullptr, &callOrInvoke, &FnInfo); if (unsigned BuiltinID = FD->getBuiltinID()) { // Check whether a FP math builtin function, such as BI__builtin_expf @@ -702,12 +721,12 @@ static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *FD, // Restrict to target with errno, for example, MacOS doesn't set errno. // TODO: Support builtin function with complex type returned, eg: cacosh if (ConstWithoutErrnoAndExceptions && CGF.CGM.getLangOpts().MathErrno && - !CGF.Builder.getIsFPConstrained() && Call.isScalar()) { + !CGF.Builder.getIsFPConstrained() && Call.isScalar() && + HasNoIndirectArgumentsOrResults(*FnInfo)) { // Emit "int" TBAA metadata on FP math libcalls. clang::QualType IntTy = Context.IntTy; TBAAAccessInfo TBAAInfo = CGF.CGM.getTBAAAccessInfo(IntTy); - Instruction *Inst = cast(Call.getScalarVal()); - CGF.CGM.DecorateInstructionWithTBAA(Inst, TBAAInfo); + CGF.CGM.DecorateInstructionWithTBAA(callOrInvoke, TBAAInfo); } } return Call; @@ -1997,8 +2016,8 @@ struct CallObjCArcUse final : EHScopeStack::Cleanup { Value *CodeGenFunction::EmitCheckedArgForBuiltin(const Expr *E, BuiltinCheckKind Kind) { - assert((Kind == BCK_CLZPassedZero || Kind == BCK_CTZPassedZero) - && "Unsupported builtin check kind"); + assert((Kind == BCK_CLZPassedZero || Kind == BCK_CTZPassedZero) && + "Unsupported builtin check kind"); Value *ArgValue = EmitScalarExpr(E); if (!SanOpts.has(SanitizerKind::Builtin)) @@ -2015,6 +2034,21 @@ Value *CodeGenFunction::EmitCheckedArgForBuiltin(const Expr *E, return ArgValue; } +Value *CodeGenFunction::EmitCheckedArgForAssume(const Expr *E) { + Value *ArgValue = EvaluateExprAsBool(E); + if (!SanOpts.has(SanitizerKind::Builtin)) + return ArgValue; + + SanitizerScope SanScope(this); + EmitCheck( + std::make_pair(ArgValue, SanitizerKind::Builtin), + SanitizerHandler::InvalidBuiltin, + {EmitCheckSourceLocation(E->getExprLoc()), + llvm::ConstantInt::get(Builder.getInt8Ty(), BCK_AssumePassedFalse)}, + std::nullopt); + return ArgValue; +} + static Value *EmitAbs(CodeGenFunction &CGF, Value *ArgValue, bool HasNSW) { return CGF.Builder.CreateBinaryIntrinsic( Intrinsic::abs, ArgValue, @@ -3428,7 +3462,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, if (E->getArg(0)->HasSideEffects(getContext())) return RValue::get(nullptr); - Value *ArgValue = EmitScalarExpr(E->getArg(0)); + Value *ArgValue = EmitCheckedArgForAssume(E->getArg(0)); Function *FnAssume = CGM.getIntrinsic(Intrinsic::assume); Builder.CreateCall(FnAssume, ArgValue); return RValue::get(nullptr); @@ -9846,6 +9880,22 @@ Value *CodeGenFunction::EmitSVEPredicateCast(Value *Pred, return C; } +Value *CodeGenFunction::EmitSVEPredicateTupleCast(Value *PredTuple, + llvm::StructType *Ty) { + if (PredTuple->getType() == Ty) + return PredTuple; + + Value *Ret = llvm::PoisonValue::get(Ty); + for (unsigned I = 0; I < Ty->getNumElements(); ++I) { + Value *Pred = Builder.CreateExtractValue(PredTuple, I); + Pred = EmitSVEPredicateCast( + Pred, cast(Ty->getTypeAtIndex(I))); + Ret = Builder.CreateInsertValue(Ret, Pred, I); + } + + return Ret; +} + Value *CodeGenFunction::EmitSVEGatherLoad(const SVETypeFlags &TypeFlags, SmallVectorImpl &Ops, unsigned IntID) { @@ -10352,41 +10402,6 @@ Value *CodeGenFunction::EmitSVETupleCreate(const SVETypeFlags &TypeFlags, return Tuple; } -Value *CodeGenFunction::FormSVEBuiltinResult(Value *Call) { - // Multi-vector results should be broken up into a single (wide) result - // vector. - auto *StructTy = dyn_cast(Call->getType()); - if (!StructTy) - return Call; - - auto *VTy = dyn_cast(StructTy->getTypeAtIndex(0U)); - if (!VTy) - return Call; - unsigned N = StructTy->getNumElements(); - - // We may need to emit a cast to a svbool_t - bool IsPredTy = VTy->getElementType()->isIntegerTy(1); - unsigned MinElts = IsPredTy ? 16 : VTy->getMinNumElements(); - - ScalableVectorType *WideVTy = - ScalableVectorType::get(VTy->getElementType(), MinElts * N); - Value *Ret = llvm::PoisonValue::get(WideVTy); - for (unsigned I = 0; I < N; ++I) { - Value *SRet = Builder.CreateExtractValue(Call, I); - assert(SRet->getType() == VTy && "Unexpected type for result value"); - Value *Idx = ConstantInt::get(CGM.Int64Ty, I * MinElts); - - if (IsPredTy) - SRet = EmitSVEPredicateCast( - SRet, ScalableVectorType::get(Builder.getInt1Ty(), 16)); - - Ret = Builder.CreateInsertVector(WideVTy, Ret, SRet, Idx); - } - Call = Ret; - - return Call; -} - void CodeGenFunction::GetAArch64SVEProcessedOperands( unsigned BuiltinID, const CallExpr *E, SmallVectorImpl &Ops, SVETypeFlags TypeFlags) { @@ -10517,12 +10532,16 @@ Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, getSVEOverloadTypes(TypeFlags, Ty, Ops)); Value *Call = Builder.CreateCall(F, Ops); + if (Call->getType() == Ty) + return Call; + // Predicate results must be converted to svbool_t. - if (auto PredTy = dyn_cast(Call->getType())) - if (PredTy->getScalarType()->isIntegerTy(1)) - Call = EmitSVEPredicateCast(Call, cast(Ty)); + if (auto PredTy = dyn_cast(Ty)) + return EmitSVEPredicateCast(Call, PredTy); + if (auto PredTupleTy = dyn_cast(Ty)) + return EmitSVEPredicateTupleCast(Call, PredTupleTy); - return FormSVEBuiltinResult(Call); + llvm_unreachable("unsupported element count!"); } switch (BuiltinID) { @@ -10854,9 +10873,8 @@ Value *CodeGenFunction::EmitAArch64SMEBuiltinExpr(unsigned BuiltinID, TypeFlags.isOverloadNone() ? CGM.getIntrinsic(Builtin->LLVMIntrinsic) : CGM.getIntrinsic(Builtin->LLVMIntrinsic, {getSVEType(TypeFlags)}); - Value *Call = Builder.CreateCall(F, Ops); - return FormSVEBuiltinResult(Call); + return Builder.CreateCall(F, Ops); } Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp index 2d2c280941bd6..4782e80f22177 100644 --- a/clang/lib/CodeGen/CGDebugInfo.cpp +++ b/clang/lib/CodeGen/CGDebugInfo.cpp @@ -1249,8 +1249,12 @@ llvm::DIType *CGDebugInfo::CreatePointerLikeType(llvm::dwarf::Tag Tag, CGM.getTarget().getDWARFAddressSpace( CGM.getTypes().getTargetAddressSpace(PointeeTy)); + const BTFTagAttributedType *BTFAttrTy; + if (auto *Atomic = PointeeTy->getAs()) + BTFAttrTy = dyn_cast(Atomic->getValueType()); + else + BTFAttrTy = dyn_cast(PointeeTy); SmallVector Annots; - auto *BTFAttrTy = dyn_cast(PointeeTy); while (BTFAttrTy) { StringRef Tag = BTFAttrTy->getAttr()->getBTFTypeTag(); if (!Tag.empty()) { diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index 35b5daaf6d4b5..9166db4c74128 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -5932,7 +5932,8 @@ RValue CodeGenFunction::EmitCall(QualType CalleeType, const CGCallee &OrigCallee, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Value *Chain, - llvm::CallBase **CallOrInvoke) { + llvm::CallBase **CallOrInvoke, + CGFunctionInfo const **ResolvedFnInfo) { // Get the actual function type. The callee type will always be a pointer to // function type or a block pointer type. assert(CalleeType->isFunctionPointerType() && @@ -6111,6 +6112,9 @@ RValue CodeGenFunction::EmitCall(QualType CalleeType, const CGFunctionInfo &FnInfo = CGM.getTypes().arrangeFreeFunctionCall( Args, FnType, /*ChainCall=*/Chain); + if (ResolvedFnInfo) + *ResolvedFnInfo = &FnInfo; + // C99 6.5.2.2p6: // If the expression that denotes the called function has a type // that does not include a prototype, [the default argument diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp index 623857b43a557..9bf15fca0de48 100644 --- a/clang/lib/CodeGen/CGStmt.cpp +++ b/clang/lib/CodeGen/CGStmt.cpp @@ -754,7 +754,7 @@ void CodeGenFunction::EmitAttributedStmt(const AttributedStmt &S) { const Expr *Assumption = cast(A)->getAssumption(); if (getLangOpts().CXXAssumptions && Builder.GetInsertBlock() && !Assumption->HasSideEffects(getContext())) { - llvm::Value *AssumptionVal = EvaluateExprAsBool(Assumption); + llvm::Value *AssumptionVal = EmitCheckedArgForAssume(Assumption); Builder.CreateAssumption(AssumptionVal); } } break; diff --git a/clang/lib/CodeGen/CodeGenAction.cpp b/clang/lib/CodeGen/CodeGenAction.cpp index 883333f0924dd..c9f9b688d0d8a 100644 --- a/clang/lib/CodeGen/CodeGenAction.cpp +++ b/clang/lib/CodeGen/CodeGenAction.cpp @@ -586,9 +586,9 @@ const FullSourceLoc BackendConsumer::getBestLocationFromDebugLoc( if (D.isLocationAvailable()) { D.getLocation(Filename, Line, Column); if (Line > 0) { - auto FE = FileMgr.getFile(Filename); + auto FE = FileMgr.getOptionalFileRef(Filename); if (!FE) - FE = FileMgr.getFile(D.getAbsolutePath()); + FE = FileMgr.getOptionalFileRef(D.getAbsolutePath()); if (FE) { // If -gcolumn-info was not used, Column will be 0. This upsets the // source manager, so pass 1 if Column is not set. diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 2df17e83bae2e..3e2abbd9bc109 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -4397,7 +4397,8 @@ class CodeGenFunction : public CodeGenTypeCache { } RValue EmitCall(QualType FnType, const CGCallee &Callee, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Value *Chain = nullptr, - llvm::CallBase **CallOrInvoke = nullptr); + llvm::CallBase **CallOrInvoke = nullptr, + CGFunctionInfo const **ResolvedFnInfo = nullptr); // If a Call or Invoke instruction was emitted for this CallExpr, this method // writes the pointer to `CallOrInvoke` if it's not null. @@ -4645,6 +4646,8 @@ class CodeGenFunction : public CodeGenTypeCache { unsigned BuiltinID); llvm::Value *EmitSVEPredicateCast(llvm::Value *Pred, llvm::ScalableVectorType *VTy); + llvm::Value *EmitSVEPredicateTupleCast(llvm::Value *PredTuple, + llvm::StructType *Ty); llvm::Value *EmitSVEGatherLoad(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl &Ops, unsigned IntID); @@ -4669,12 +4672,6 @@ class CodeGenFunction : public CodeGenTypeCache { llvm::Value *EmitSVEStructStore(const SVETypeFlags &TypeFlags, SmallVectorImpl &Ops, unsigned IntID); - /// FormSVEBuiltinResult - Returns the struct of scalable vectors as a wider - /// vector. It extracts the scalable vector from the struct and inserts into - /// the wider vector. This avoids the error when allocating space in llvm - /// for struct of scalable vectors if a function returns struct. - llvm::Value *FormSVEBuiltinResult(llvm::Value *Call); - llvm::Value *EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E); llvm::Value *EmitSMELd1St1(const SVETypeFlags &TypeFlags, @@ -5084,12 +5081,17 @@ class CodeGenFunction : public CodeGenTypeCache { enum BuiltinCheckKind { BCK_CTZPassedZero, BCK_CLZPassedZero, + BCK_AssumePassedFalse, }; /// Emits an argument for a call to a builtin. If the builtin sanitizer is /// enabled, a runtime check specified by \p Kind is also emitted. llvm::Value *EmitCheckedArgForBuiltin(const Expr *E, BuiltinCheckKind Kind); + /// Emits an argument for a call to a `__builtin_assume`. If the builtin + /// sanitizer is enabled, a runtime check is also emitted. + llvm::Value *EmitCheckedArgForAssume(const Expr *E); + /// Emit a description of a type in a format suitable for passing to /// a runtime sanitizer handler. llvm::Constant *EmitCheckTypeDescriptor(QualType T); diff --git a/clang/lib/CodeGen/Targets/SPIR.cpp b/clang/lib/CodeGen/Targets/SPIR.cpp index cc52925e2e523..d5e8e4f7a5916 100644 --- a/clang/lib/CodeGen/Targets/SPIR.cpp +++ b/clang/lib/CodeGen/Targets/SPIR.cpp @@ -58,7 +58,36 @@ class SPIRVTargetCodeGenInfo : public CommonSPIRTargetCodeGenInfo { SPIRVTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT) : CommonSPIRTargetCodeGenInfo(std::make_unique(CGT)) {} void setCUDAKernelCallingConvention(const FunctionType *&FT) const override; + llvm::SyncScope::ID getLLVMSyncScopeID(const LangOptions &LangOpts, + SyncScope Scope, + llvm::AtomicOrdering Ordering, + llvm::LLVMContext &Ctx) const override; }; + +inline StringRef mapClangSyncScopeToLLVM(SyncScope Scope) { + switch (Scope) { + case SyncScope::HIPSingleThread: + case SyncScope::SingleScope: + return "singlethread"; + case SyncScope::HIPWavefront: + case SyncScope::OpenCLSubGroup: + case SyncScope::WavefrontScope: + return "subgroup"; + case SyncScope::HIPWorkgroup: + case SyncScope::OpenCLWorkGroup: + case SyncScope::WorkgroupScope: + return "workgroup"; + case SyncScope::HIPAgent: + case SyncScope::OpenCLDevice: + case SyncScope::DeviceScope: + return "device"; + case SyncScope::SystemScope: + case SyncScope::HIPSystem: + case SyncScope::OpenCLAllSVMDevices: + return ""; + } + return ""; +} } // End anonymous namespace. void CommonSPIRABIInfo::setCCs() { @@ -188,6 +217,13 @@ void SPIRVTargetCodeGenInfo::setCUDAKernelCallingConvention( } } +llvm::SyncScope::ID +SPIRVTargetCodeGenInfo::getLLVMSyncScopeID(const LangOptions &, SyncScope Scope, + llvm::AtomicOrdering, + llvm::LLVMContext &Ctx) const { + return Ctx.getOrInsertSyncScopeID(mapClangSyncScopeToLLVM(Scope)); +} + /// Construct a SPIR-V target extension type for the given OpenCL image type. static llvm::Type *getSPIRVImageType(llvm::LLVMContext &Ctx, StringRef BaseType, StringRef OpenCLName, diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 44548fa9d706f..d0c8bdba0ede9 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -1005,6 +1005,17 @@ static void appendOneArg(InputArgList &Args, const Arg *Opt, Copy->setOwnsValues(Opt->getOwnsValues()); Opt->setOwnsValues(false); Args.append(Copy); + if (Opt->getAlias()) { + const Arg *Alias = Opt->getAlias(); + unsigned Index = Args.MakeIndex(Alias->getSpelling()); + auto AliasCopy = std::make_unique(Alias->getOption(), + Args.getArgString(Index), Index); + AliasCopy->getValues() = Alias->getValues(); + AliasCopy->setOwnsValues(false); + if (Alias->isClaimed()) + AliasCopy->claim(); + Copy->setAlias(std::move(AliasCopy)); + } } bool Driver::readConfigFile(StringRef FileName, diff --git a/clang/lib/Driver/OffloadBundler.cpp b/clang/lib/Driver/OffloadBundler.cpp index aaa4fdf03be1e..687a38333e128 100644 --- a/clang/lib/Driver/OffloadBundler.cpp +++ b/clang/lib/Driver/OffloadBundler.cpp @@ -1192,7 +1192,7 @@ Error OffloadBundler::ListBundleIDsInFile( StringRef InputFileName, const OffloadBundlerConfig &BundlerConfig) { // Open Input file. ErrorOr> CodeOrErr = - MemoryBuffer::getFileOrSTDIN(InputFileName); + MemoryBuffer::getFileOrSTDIN(InputFileName, /*IsText=*/true); if (std::error_code EC = CodeOrErr.getError()) return createFileError(InputFileName, EC); @@ -1324,7 +1324,7 @@ Error OffloadBundler::BundleFiles() { InputBuffers.reserve(BundlerConfig.InputFileNames.size()); for (auto &I : BundlerConfig.InputFileNames) { ErrorOr> CodeOrErr = - MemoryBuffer::getFileOrSTDIN(I); + MemoryBuffer::getFileOrSTDIN(I, /*IsText=*/true); if (std::error_code EC = CodeOrErr.getError()) return createFileError(I, EC); InputBuffers.emplace_back(std::move(*CodeOrErr)); @@ -1392,7 +1392,8 @@ Error OffloadBundler::BundleFiles() { Error OffloadBundler::UnbundleFiles() { // Open Input file. ErrorOr> CodeOrErr = - MemoryBuffer::getFileOrSTDIN(BundlerConfig.InputFileNames.front()); + MemoryBuffer::getFileOrSTDIN(BundlerConfig.InputFileNames.front(), + /*IsText=*/true); if (std::error_code EC = CodeOrErr.getError()) return createFileError(BundlerConfig.InputFileNames.front(), EC); diff --git a/clang/lib/Driver/ToolChains/PS4CPU.cpp b/clang/lib/Driver/ToolChains/PS4CPU.cpp index 647580e4e235d..7c028f18c0308 100644 --- a/clang/lib/Driver/ToolChains/PS4CPU.cpp +++ b/clang/lib/Driver/ToolChains/PS4CPU.cpp @@ -186,6 +186,9 @@ void tools::PS4cpu::Linker::ConstructJob(Compilation &C, const JobAction &JA, if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) TC.addSanitizerArgs(Args, CmdArgs, "-l", ""); + // Other drivers typically add library search paths (`-L`) here via + // TC.AddFilePathLibArgs(). We don't do that on PS4 as the PS4 linker + // searches those locations by default. Args.addAllArgs(CmdArgs, {options::OPT_L, options::OPT_T_Group, options::OPT_s, options::OPT_t}); @@ -290,6 +293,7 @@ void tools::PS5cpu::Linker::ConstructJob(Compilation &C, const JobAction &JA, if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) TC.addSanitizerArgs(Args, CmdArgs, "-l", ""); + TC.AddFilePathLibArgs(Args, CmdArgs); Args.addAllArgs(CmdArgs, {options::OPT_L, options::OPT_T_Group, options::OPT_s, options::OPT_t}); @@ -338,7 +342,7 @@ toolchains::PS4PS5Base::PS4PS5Base(const Driver &D, const llvm::Triple &Triple, } // Allow --sysroot= to override the root directory for header and library - // search, and -sysroot to override header search. If both are specified, + // search, and -isysroot to override header search. If both are specified, // -isysroot overrides --sysroot for header search. auto OverrideRoot = [&](const options::ID &Opt, std::string &Root, StringRef Default) { @@ -382,6 +386,8 @@ toolchains::PS4PS5Base::PS4PS5Base(const Driver &D, const llvm::Triple &Triple, llvm::sys::path::append(Dir, "target/include"); CheckSDKPartExists(Dir, "system headers"); } + + getFilePaths().push_back("."); } void toolchains::PS4PS5Base::AddClangSystemIncludeArgs( diff --git a/clang/lib/Driver/ToolChains/Solaris.cpp b/clang/lib/Driver/ToolChains/Solaris.cpp index cf39038dcac37..fd3232b7c1b06 100644 --- a/clang/lib/Driver/ToolChains/Solaris.cpp +++ b/clang/lib/Driver/ToolChains/Solaris.cpp @@ -266,8 +266,7 @@ void solaris::Linker::ConstructJob(Compilation &C, const JobAction &JA, } } // Avoid AsanInitInternal cycle, Issue #64126. - if (ToolChain.getTriple().isX86() && SA.needsSharedRt() && - SA.needsAsanRt()) { + if (SA.needsSharedRt() && SA.needsAsanRt()) { CmdArgs.push_back("-z"); CmdArgs.push_back("now"); } @@ -334,10 +333,11 @@ Solaris::Solaris(const Driver &D, const llvm::Triple &Triple, } SanitizerMask Solaris::getSupportedSanitizers() const { + const bool IsSparc = getTriple().getArch() == llvm::Triple::sparc; const bool IsX86 = getTriple().getArch() == llvm::Triple::x86; SanitizerMask Res = ToolChain::getSupportedSanitizers(); - // FIXME: Omit X86_64 until 64-bit support is figured out. - if (IsX86) { + // FIXME: Omit SparcV9 and X86_64 until 64-bit support is figured out. + if (IsSparc || IsX86) { Res |= SanitizerKind::Address; Res |= SanitizerKind::PointerCompare; Res |= SanitizerKind::PointerSubtract; diff --git a/clang/lib/ExtractAPI/ExtractAPIConsumer.cpp b/clang/lib/ExtractAPI/ExtractAPIConsumer.cpp index 75c2dec22400b..6f42b36bd36a4 100644 --- a/clang/lib/ExtractAPI/ExtractAPIConsumer.cpp +++ b/clang/lib/ExtractAPI/ExtractAPIConsumer.cpp @@ -217,8 +217,8 @@ struct LocationFileChecker { SmallVector, bool>> &KnownFiles) : CI(CI), KnownFiles(KnownFiles), ExternalFileEntries() { for (const auto &KnownFile : KnownFiles) - if (auto FileEntry = CI.getFileManager().getFile(KnownFile.first)) - KnownFileEntries.insert(*FileEntry); + if (auto FE = CI.getFileManager().getOptionalFileRef(KnownFile.first)) + KnownFileEntries.insert(*FE); } private: diff --git a/clang/lib/Frontend/ASTUnit.cpp b/clang/lib/Frontend/ASTUnit.cpp index 93836ec5402fa..bffff0d27af3a 100644 --- a/clang/lib/Frontend/ASTUnit.cpp +++ b/clang/lib/Frontend/ASTUnit.cpp @@ -2395,7 +2395,7 @@ void ASTUnit::TranslateStoredDiagnostics( // Rebuild the StoredDiagnostic. if (SD.Filename.empty()) continue; - auto FE = FileMgr.getFile(SD.Filename); + auto FE = FileMgr.getOptionalFileRef(SD.Filename); if (!FE) continue; SourceLocation FileLoc; diff --git a/clang/lib/Frontend/CompilerInstance.cpp b/clang/lib/Frontend/CompilerInstance.cpp index 5f2a9637e3ea4..240305b33824b 100644 --- a/clang/lib/Frontend/CompilerInstance.cpp +++ b/clang/lib/Frontend/CompilerInstance.cpp @@ -427,12 +427,8 @@ static void InitializeFileRemapping(DiagnosticsEngine &Diags, } // Create the file entry for the file that we're mapping from. - const FileEntry *FromFile = - FileMgr.getVirtualFile(RF.first, ToFile->getSize(), 0); - if (!FromFile) { - Diags.Report(diag::err_fe_remap_missing_from_file) << RF.first; - continue; - } + FileEntryRef FromFile = + FileMgr.getVirtualFileRef(RF.first, ToFile->getSize(), 0); // Override the contents of the "from" file with the contents of // the "to" file. @@ -1926,7 +1922,7 @@ ModuleLoadResult CompilerInstance::findOrCompileModuleAndReadAST( // Check whether M refers to the file in the prebuilt module path. if (M && M->getASTFile()) - if (auto ModuleFile = FileMgr->getFile(ModuleFilename)) + if (auto ModuleFile = FileMgr->getOptionalFileRef(ModuleFilename)) if (*ModuleFile == M->getASTFile()) return M; diff --git a/clang/lib/Frontend/Rewrite/FrontendActions.cpp b/clang/lib/Frontend/Rewrite/FrontendActions.cpp index cf5a9437e89e6..6e1f949f543a5 100644 --- a/clang/lib/Frontend/Rewrite/FrontendActions.cpp +++ b/clang/lib/Frontend/Rewrite/FrontendActions.cpp @@ -213,7 +213,7 @@ class RewriteIncludesAction::RewriteImportsListener : public ASTReaderListener { void visitModuleFile(StringRef Filename, serialization::ModuleKind Kind) override { - auto File = CI.getFileManager().getFile(Filename); + auto File = CI.getFileManager().getOptionalFileRef(Filename); assert(File && "missing file for loaded module?"); // Only rewrite each module file once. diff --git a/clang/lib/Frontend/Rewrite/RewriteObjC.cpp b/clang/lib/Frontend/Rewrite/RewriteObjC.cpp index a1e792bf772ba..f49ccf7be68e2 100644 --- a/clang/lib/Frontend/Rewrite/RewriteObjC.cpp +++ b/clang/lib/Frontend/Rewrite/RewriteObjC.cpp @@ -128,10 +128,8 @@ namespace { SmallVector BlockDeclRefs; // Block related declarations. - SmallVector BlockByCopyDecls; - llvm::SmallPtrSet BlockByCopyDeclsPtrSet; - SmallVector BlockByRefDecls; - llvm::SmallPtrSet BlockByRefDeclsPtrSet; + llvm::SmallSetVector BlockByCopyDecls; + llvm::SmallSetVector BlockByRefDecls; llvm::DenseMap BlockByRefDeclNo; llvm::SmallPtrSet ImportedBlockDecls; llvm::SmallPtrSet ImportedLocalExternalDecls; @@ -3357,7 +3355,7 @@ std::string RewriteObjC::SynthesizeBlockHelperFuncs(BlockExpr *CE, int i, S += VD->getNameAsString(); S += ", (void*)src->"; S += VD->getNameAsString(); - if (BlockByRefDeclsPtrSet.count(VD)) + if (BlockByRefDecls.contains(VD)) S += ", " + utostr(BLOCK_FIELD_IS_BYREF) + "/*BLOCK_FIELD_IS_BYREF*/);"; else if (VD->getType()->isBlockPointerType()) S += ", " + utostr(BLOCK_FIELD_IS_BLOCK) + "/*BLOCK_FIELD_IS_BLOCK*/);"; @@ -3374,7 +3372,7 @@ std::string RewriteObjC::SynthesizeBlockHelperFuncs(BlockExpr *CE, int i, for (ValueDecl *VD : ImportedBlockDecls) { S += "_Block_object_dispose((void*)src->"; S += VD->getNameAsString(); - if (BlockByRefDeclsPtrSet.count(VD)) + if (BlockByRefDecls.contains(VD)) S += ", " + utostr(BLOCK_FIELD_IS_BYREF) + "/*BLOCK_FIELD_IS_BYREF*/);"; else if (VD->getType()->isBlockPointerType()) S += ", " + utostr(BLOCK_FIELD_IS_BLOCK) + "/*BLOCK_FIELD_IS_BLOCK*/);"; @@ -3553,14 +3551,10 @@ void RewriteObjC::SynthesizeBlockLiterals(SourceLocation FunLocStart, DeclRefExpr *Exp = InnerDeclRefs[count++]; ValueDecl *VD = Exp->getDecl(); BlockDeclRefs.push_back(Exp); - if (!VD->hasAttr() && !BlockByCopyDeclsPtrSet.count(VD)) { - BlockByCopyDeclsPtrSet.insert(VD); - BlockByCopyDecls.push_back(VD); - } - if (VD->hasAttr() && !BlockByRefDeclsPtrSet.count(VD)) { - BlockByRefDeclsPtrSet.insert(VD); - BlockByRefDecls.push_back(VD); - } + if (VD->hasAttr()) + BlockByRefDecls.insert(VD); + else + BlockByCopyDecls.insert(VD); // imported objects in the inner blocks not used in the outer // blocks must be copied/disposed in the outer block as well. if (VD->hasAttr() || @@ -3590,9 +3584,7 @@ void RewriteObjC::SynthesizeBlockLiterals(SourceLocation FunLocStart, BlockDeclRefs.clear(); BlockByRefDecls.clear(); - BlockByRefDeclsPtrSet.clear(); BlockByCopyDecls.clear(); - BlockByCopyDeclsPtrSet.clear(); ImportedBlockDecls.clear(); } if (RewriteSC) { @@ -4314,20 +4306,12 @@ void RewriteObjC::CollectBlockDeclRefInfo(BlockExpr *Exp) { if (BlockDeclRefs.size()) { // Unique all "by copy" declarations. for (unsigned i = 0; i < BlockDeclRefs.size(); i++) - if (!BlockDeclRefs[i]->getDecl()->hasAttr()) { - if (!BlockByCopyDeclsPtrSet.count(BlockDeclRefs[i]->getDecl())) { - BlockByCopyDeclsPtrSet.insert(BlockDeclRefs[i]->getDecl()); - BlockByCopyDecls.push_back(BlockDeclRefs[i]->getDecl()); - } - } + if (!BlockDeclRefs[i]->getDecl()->hasAttr()) + BlockByCopyDecls.insert(BlockDeclRefs[i]->getDecl()); // Unique all "by ref" declarations. for (unsigned i = 0; i < BlockDeclRefs.size(); i++) - if (BlockDeclRefs[i]->getDecl()->hasAttr()) { - if (!BlockByRefDeclsPtrSet.count(BlockDeclRefs[i]->getDecl())) { - BlockByRefDeclsPtrSet.insert(BlockDeclRefs[i]->getDecl()); - BlockByRefDecls.push_back(BlockDeclRefs[i]->getDecl()); - } - } + if (BlockDeclRefs[i]->getDecl()->hasAttr()) + BlockByRefDecls.insert(BlockDeclRefs[i]->getDecl()); // Find any imported blocks...they will need special attention. for (unsigned i = 0; i < BlockDeclRefs.size(); i++) if (BlockDeclRefs[i]->getDecl()->hasAttr() || @@ -4358,22 +4342,18 @@ Stmt *RewriteObjC::SynthBlockInitExpr(BlockExpr *Exp, for (unsigned i = 0; i < InnerBlockDeclRefs.size(); i++) { DeclRefExpr *Exp = InnerBlockDeclRefs[i]; ValueDecl *VD = Exp->getDecl(); - if (!VD->hasAttr() && - BlockByCopyDeclsPtrSet.insert(VD).second) { + if (!VD->hasAttr() && BlockByCopyDecls.insert(VD)) { // We need to save the copied-in variables in nested // blocks because it is needed at the end for some of the API // generations. See SynthesizeBlockLiterals routine. InnerDeclRefs.push_back(Exp); countOfInnerDecls++; BlockDeclRefs.push_back(Exp); - BlockByCopyDecls.push_back(VD); } - if (VD->hasAttr() && - BlockByRefDeclsPtrSet.insert(VD).second) { + if (VD->hasAttr() && BlockByRefDecls.insert(VD)) { InnerDeclRefs.push_back(Exp); countOfInnerDecls++; BlockDeclRefs.push_back(Exp); - BlockByRefDecls.push_back(VD); } } // Find any imported blocks...they will need special attention. @@ -4534,9 +4514,7 @@ Stmt *RewriteObjC::SynthBlockInitExpr(BlockExpr *Exp, NewRep); BlockDeclRefs.clear(); BlockByRefDecls.clear(); - BlockByRefDeclsPtrSet.clear(); BlockByCopyDecls.clear(); - BlockByCopyDeclsPtrSet.clear(); ImportedBlockDecls.clear(); return NewRep; } diff --git a/clang/lib/InstallAPI/Frontend.cpp b/clang/lib/InstallAPI/Frontend.cpp index 04d06f46d2652..2ebe72bf021cf 100644 --- a/clang/lib/InstallAPI/Frontend.cpp +++ b/clang/lib/InstallAPI/Frontend.cpp @@ -107,7 +107,7 @@ InstallAPIContext::findAndRecordFile(const FileEntry *FE, } void InstallAPIContext::addKnownHeader(const HeaderFile &H) { - auto FE = FM->getFile(H.getPath()); + auto FE = FM->getOptionalFileRef(H.getPath()); if (!FE) return; // File does not exist. KnownFiles[*FE] = H.getType(); diff --git a/clang/lib/Lex/HeaderSearch.cpp b/clang/lib/Lex/HeaderSearch.cpp index 4914c10e62d0c..8826ab449df49 100644 --- a/clang/lib/Lex/HeaderSearch.cpp +++ b/clang/lib/Lex/HeaderSearch.cpp @@ -227,7 +227,7 @@ std::string HeaderSearch::getPrebuiltModuleFileName(StringRef ModuleName, ".pcm"); else llvm::sys::path::append(Result, ModuleName + ".pcm"); - if (getFileMgr().getFile(Result.str())) + if (getFileMgr().getOptionalFileRef(Result)) return std::string(Result); } @@ -246,7 +246,7 @@ std::string HeaderSearch::getPrebuiltImplicitModuleFileName(Module *Module) { llvm::sys::path::append(CachePath, ModuleCacheHash); std::string FileName = getCachedModuleFileNameImpl(ModuleName, ModuleMapPath, CachePath); - if (!FileName.empty() && getFileMgr().getFile(FileName)) + if (!FileName.empty() && getFileMgr().getOptionalFileRef(FileName)) return FileName; } return {}; @@ -655,7 +655,7 @@ OptionalFileEntryRef DirectoryLookup::DoFrameworkLookup( ++NumFrameworkLookups; // If the framework dir doesn't exist, we fail. - auto Dir = FileMgr.getDirectory(FrameworkName); + auto Dir = FileMgr.getOptionalDirectoryRef(FrameworkName); if (!Dir) return std::nullopt; @@ -718,7 +718,7 @@ OptionalFileEntryRef DirectoryLookup::DoFrameworkLookup( bool FoundFramework = false; do { // Determine whether this directory exists. - auto Dir = FileMgr.getDirectory(FrameworkPath); + auto Dir = FileMgr.getOptionalDirectoryRef(FrameworkPath); if (!Dir) break; diff --git a/clang/lib/Lex/ModuleMap.cpp b/clang/lib/Lex/ModuleMap.cpp index eed7eca2e7356..2aada51c71c50 100644 --- a/clang/lib/Lex/ModuleMap.cpp +++ b/clang/lib/Lex/ModuleMap.cpp @@ -1144,7 +1144,8 @@ Module *ModuleMap::inferFrameworkModule(DirectoryEntryRef FrameworkDir, if (SubframeworkDirName.empty()) break; - if (auto SubDir = FileMgr.getDirectory(SubframeworkDirName)) { + if (auto SubDir = + FileMgr.getOptionalDirectoryRef(SubframeworkDirName)) { if (*SubDir == FrameworkDir) { FoundParent = true; break; diff --git a/clang/lib/Lex/PPLexerChange.cpp b/clang/lib/Lex/PPLexerChange.cpp index 8221db46e06ac..1a71f03b18236 100644 --- a/clang/lib/Lex/PPLexerChange.cpp +++ b/clang/lib/Lex/PPLexerChange.cpp @@ -229,7 +229,7 @@ static void computeRelativePath(FileManager &FM, const DirectoryEntry *Dir, StringRef FilePath = File.getDir().getName(); StringRef Path = FilePath; while (!Path.empty()) { - if (auto CurDir = FM.getDirectory(Path)) { + if (auto CurDir = FM.getOptionalDirectoryRef(Path)) { if (*CurDir == Dir) { Result = FilePath.substr(Path.size()); llvm::sys::path::append(Result, diff --git a/clang/lib/Sema/CheckExprLifetime.cpp b/clang/lib/Sema/CheckExprLifetime.cpp index e9e39c11ffbaa..009b8d000e6b0 100644 --- a/clang/lib/Sema/CheckExprLifetime.cpp +++ b/clang/lib/Sema/CheckExprLifetime.cpp @@ -271,6 +271,49 @@ static bool isInStlNamespace(const Decl *D) { return DC->isStdNamespace(); } +static bool isPointerLikeType(QualType Type) { + return isRecordWithAttr(Type) || Type->isPointerType() || + Type->isNullPtrType(); +} + +// Returns true if the given Record decl is a form of `GSLOwner` +// type, e.g. std::vector, std::optional. +static bool isContainerOfPointer(const RecordDecl *Container) { + if (const auto *CTSD = + dyn_cast_if_present(Container)) { + if (!CTSD->hasAttr()) // Container must be a GSL owner type. + return false; + const auto &TAs = CTSD->getTemplateArgs(); + return TAs.size() > 0 && TAs[0].getKind() == TemplateArgument::Type && + isPointerLikeType(TAs[0].getAsType()); + } + return false; +} +static bool isContainerOfOwner(const RecordDecl *Container) { + const auto *CTSD = + dyn_cast_if_present(Container); + if (!CTSD) + return false; + if (!CTSD->hasAttr()) // Container must be a GSL owner type. + return false; + const auto &TAs = CTSD->getTemplateArgs(); + return TAs.size() > 0 && TAs[0].getKind() == TemplateArgument::Type && + isRecordWithAttr(TAs[0].getAsType()); +} + +// Returns true if the given Record is `std::initializer_list`. +static bool isStdInitializerListOfPointer(const RecordDecl *RD) { + if (const auto *CTSD = + dyn_cast_if_present(RD)) { + const auto &TAs = CTSD->getTemplateArgs(); + return isInStlNamespace(RD) && RD->getIdentifier() && + RD->getName() == "initializer_list" && TAs.size() > 0 && + TAs[0].getKind() == TemplateArgument::Type && + isPointerLikeType(TAs[0].getAsType()); + } + return false; +} + static bool shouldTrackImplicitObjectArg(const CXXMethodDecl *Callee) { if (auto *Conv = dyn_cast_or_null(Callee)) if (isRecordWithAttr(Conv->getConversionType()) && @@ -282,8 +325,7 @@ static bool shouldTrackImplicitObjectArg(const CXXMethodDecl *Callee) { Callee->getFunctionObjectParameterType()) && !isRecordWithAttr(Callee->getFunctionObjectParameterType())) return false; - if (Callee->getReturnType()->isPointerType() || - isRecordWithAttr(Callee->getReturnType())) { + if (isPointerLikeType(Callee->getReturnType())) { if (!Callee->getIdentifier()) return false; return llvm::StringSwitch(Callee->getName()) @@ -331,6 +373,103 @@ static bool shouldTrackFirstArgument(const FunctionDecl *FD) { return false; } +// Returns true if the given constructor is a copy-like constructor, such as +// `Ctor(Owner&&)` or `Ctor(const Owner&)`. +static bool isCopyLikeConstructor(const CXXConstructorDecl *Ctor) { + if (!Ctor || Ctor->param_size() != 1) + return false; + const auto *ParamRefType = + Ctor->getParamDecl(0)->getType()->getAs(); + if (!ParamRefType) + return false; + + // Check if the first parameter type is "Owner". + if (const auto *TST = + ParamRefType->getPointeeType()->getAs()) + return TST->getTemplateName() + .getAsTemplateDecl() + ->getTemplatedDecl() + ->hasAttr(); + return false; +} + +// Returns true if we should perform the GSL analysis on the first argument for +// the given constructor. +static bool +shouldTrackFirstArgumentForConstructor(const CXXConstructExpr *Ctor) { + const auto *LHSRecordDecl = Ctor->getConstructor()->getParent(); + + // Case 1, construct a GSL pointer, e.g. std::string_view + // Always inspect when LHS is a pointer. + if (LHSRecordDecl->hasAttr()) + return true; + + if (Ctor->getConstructor()->getNumParams() != 1 || + !isContainerOfPointer(LHSRecordDecl)) + return false; + + // Now, the LHS is an Owner type, e.g., std::vector. + // + // At a high level, we cannot precisely determine what the nested pointer + // owns. However, by analyzing the RHS owner type, we can use heuristics to + // infer ownership information. These heuristics are designed to be + // conservative, minimizing false positives while still providing meaningful + // diagnostics. + // + // While this inference isn't perfect, it helps catch common use-after-free + // patterns. + auto RHSArgType = Ctor->getArg(0)->getType(); + const auto *RHSRD = RHSArgType->getAsRecordDecl(); + // LHS is constructed from an intializer_list. + // + // std::initializer_list is a proxy object that provides access to the backing + // array. We perform analysis on it to determine if there are any dangling + // temporaries in the backing array. + // E.g. std::vector abc = {string()}; + if (isStdInitializerListOfPointer(RHSRD)) + return true; + + // RHS must be an owner. + if (!isRecordWithAttr(RHSArgType)) + return false; + + // Bail out if the RHS is Owner. + // + // We cannot reliably determine what the LHS nested pointer owns -- it could + // be the entire RHS or the nested pointer in RHS. To avoid false positives, + // we skip this case, such as: + // std::stack s(std::deque{}); + // + // TODO: this also has a false negative, it doesn't catch the case like: + // std::optional> os = std::vector{} + if (isContainerOfPointer(RHSRD)) + return false; + + // Assume that the nested Pointer is constructed from the nested Owner. + // E.g. std::optional sv = std::optional(s); + if (isContainerOfOwner(RHSRD)) + return true; + + // Now, the LHS is an Owner and the RHS is an Owner, where X is + // neither an `Owner` nor a `Pointer`. + // + // Use the constructor's signature as a hint. If it is a copy-like constructor + // `Owner1(Owner2&&)`, we assume that the nested pointer is + // constructed from X. In such cases, we do not diagnose, as `X` is not an + // owner, e.g. + // std::optional sv = std::optional(); + if (const auto *PrimaryCtorTemplate = + Ctor->getConstructor()->getPrimaryTemplate(); + PrimaryCtorTemplate && + isCopyLikeConstructor(dyn_cast_if_present( + PrimaryCtorTemplate->getTemplatedDecl()))) { + return false; + } + // Assume that the nested pointer is constructed from the whole RHS. + // E.g. optional s = std::string(); + return true; +} + // Return true if this is an "normal" assignment operator. // We assuments that a normal assingment operator always returns *this, that is, // an lvalue reference that is the same type as the implicit object parameter @@ -473,12 +612,12 @@ static void visitFunctionCallArguments(IndirectLocalPath &Path, Expr *Call, if (CheckCoroCall || Callee->getParamDecl(I)->hasAttr()) VisitLifetimeBoundArg(Callee->getParamDecl(I), Args[I]); else if (EnableGSLAnalysis && I == 0) { + // Perform GSL analysis for the first argument if (shouldTrackFirstArgument(Callee)) { VisitGSLPointerArg(Callee, Args[0]); - } else if (auto *CCE = dyn_cast(Call); - CCE && - CCE->getConstructor()->getParent()->hasAttr()) { - VisitGSLPointerArg(CCE->getConstructor(), Args[0]); + } else if (auto *Ctor = dyn_cast(Call); + Ctor && shouldTrackFirstArgumentForConstructor(Ctor)) { + VisitGSLPointerArg(Ctor->getConstructor(), Args[0]); } } } diff --git a/clang/lib/Sema/SemaLookup.cpp b/clang/lib/Sema/SemaLookup.cpp index ed5d44aa898f4..f3f62474d0644 100644 --- a/clang/lib/Sema/SemaLookup.cpp +++ b/clang/lib/Sema/SemaLookup.cpp @@ -3850,8 +3850,9 @@ void Sema::ArgumentDependentLookup(DeclarationName Name, SourceLocation Loc, // exports are only valid in module purview and outside of any // PMF (although a PMF should not even be present in a module // with an import). - assert(FM && FM->isNamedModule() && !FM->isPrivateModule() && - "bad export context"); + assert(FM && + (FM->isNamedModule() || FM->isImplicitGlobalModule()) && + !FM->isPrivateModule() && "bad export context"); // .. are attached to a named module M, do not appear in the // translation unit containing the point of the lookup.. if (D->isInAnotherModuleUnit() && diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index ede3070787722..1f7946e61d175 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -2044,14 +2044,14 @@ ASTReader::getGlobalPreprocessedEntityID(ModuleFile &M, const FileEntry *HeaderFileInfoTrait::getFile(const internal_key_type &Key) { FileManager &FileMgr = Reader.getFileManager(); if (!Key.Imported) { - if (auto File = FileMgr.getFile(Key.Filename)) + if (auto File = FileMgr.getOptionalFileRef(Key.Filename)) return *File; return nullptr; } std::string Resolved = std::string(Key.Filename); Reader.ResolveImportedPath(M, Resolved); - if (auto File = FileMgr.getFile(Resolved)) + if (auto File = FileMgr.getOptionalFileRef(Resolved)) return *File; return nullptr; } @@ -3856,6 +3856,17 @@ llvm::Error ASTReader::ReadASTBlock(ModuleFile &F, break; } + case FUNCTION_DECL_TO_LAMBDAS_MAP: + for (unsigned I = 0, N = Record.size(); I != N; /*in loop*/) { + GlobalDeclID ID = ReadDeclID(F, Record, I); + auto &Lambdas = FunctionToLambdasMap[ID]; + unsigned NN = Record[I++]; + Lambdas.reserve(NN); + for (unsigned II = 0; II < NN; II++) + Lambdas.push_back(ReadDeclID(F, Record, I)); + } + break; + case OBJC_CATEGORIES_MAP: if (F.LocalNumObjCCategoriesInMap != 0) return llvm::createStringError( @@ -4206,7 +4217,7 @@ ASTReader::ReadModuleMapFileBlock(RecordData &Record, ModuleFile &F, assert(M && M->Name == F.ModuleName && "found module with different name"); // Check the primary module map file. - auto StoredModMap = FileMgr.getFile(F.ModuleMapPath); + auto StoredModMap = FileMgr.getOptionalFileRef(F.ModuleMapPath); if (!StoredModMap || *StoredModMap != ModMap) { assert(ModMap && "found module is missing module map file"); assert((ImportedBy || F.Kind == MK_ImplicitModule) && diff --git a/clang/lib/Serialization/ASTReaderDecl.cpp b/clang/lib/Serialization/ASTReaderDecl.cpp index 9272e23c7da3f..7cead2728ca93 100644 --- a/clang/lib/Serialization/ASTReaderDecl.cpp +++ b/clang/lib/Serialization/ASTReaderDecl.cpp @@ -4351,6 +4351,16 @@ void ASTReader::loadDeclUpdateRecords(PendingUpdateRecord &Record) { reader::ASTDeclContextNameLookupTrait(*this, *Update.Mod)); DC->setHasExternalVisibleStorage(true); } + + // Load any pending lambdas for the function. + if (auto *FD = dyn_cast(D); FD && FD->isCanonicalDecl()) { + if (auto IT = FunctionToLambdasMap.find(ID); + IT != FunctionToLambdasMap.end()) { + for (auto LID : IT->second) + GetDecl(LID); + FunctionToLambdasMap.erase(IT); + } + } } void ASTReader::loadPendingDeclChain(Decl *FirstLocal, uint64_t LocalOffset) { diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index 4ee14b1e26015..f326e3c2e2ff7 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -903,6 +903,7 @@ void ASTWriter::WriteBlockInfoBlock() { RECORD(PENDING_IMPLICIT_INSTANTIATIONS); RECORD(UPDATE_VISIBLE); RECORD(DELAYED_NAMESPACE_LEXICAL_VISIBLE_RECORD); + RECORD(FUNCTION_DECL_TO_LAMBDAS_MAP); RECORD(DECL_UPDATE_OFFSETS); RECORD(DECL_UPDATES); RECORD(CUDA_SPECIAL_DECL_REFS); @@ -5707,6 +5708,27 @@ void ASTWriter::WriteDeclAndTypes(ASTContext &Context) { Stream.EmitRecord(DELAYED_NAMESPACE_LEXICAL_VISIBLE_RECORD, DelayedNamespaceRecord); + if (!FunctionToLambdasMap.empty()) { + // TODO: on disk hash table for function to lambda mapping might be more + // efficent becuase it allows lazy deserialization. + RecordData FunctionToLambdasMapRecord; + for (const auto &Pair : FunctionToLambdasMap) { + FunctionToLambdasMapRecord.push_back( + GetDeclRef(Pair.first).getRawValue()); + FunctionToLambdasMapRecord.push_back(Pair.second.size()); + for (const auto &Lambda : Pair.second) + FunctionToLambdasMapRecord.push_back(Lambda.getRawValue()); + } + + auto Abv = std::make_shared(); + Abv->Add(llvm::BitCodeAbbrevOp(FUNCTION_DECL_TO_LAMBDAS_MAP)); + Abv->Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::Array)); + Abv->Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::VBR, 6)); + unsigned FunctionToLambdaMapAbbrev = Stream.EmitAbbrev(std::move(Abv)); + Stream.EmitRecord(FUNCTION_DECL_TO_LAMBDAS_MAP, FunctionToLambdasMapRecord, + FunctionToLambdaMapAbbrev); + } + const TranslationUnitDecl *TU = Context.getTranslationUnitDecl(); // Create a lexical update block containing all of the declarations in the // translation unit that do not come from other AST files. diff --git a/clang/lib/Serialization/ASTWriterDecl.cpp b/clang/lib/Serialization/ASTWriterDecl.cpp index 555f6325da646..50c090b195d61 100644 --- a/clang/lib/Serialization/ASTWriterDecl.cpp +++ b/clang/lib/Serialization/ASTWriterDecl.cpp @@ -1521,6 +1521,11 @@ void ASTDeclWriter::VisitCXXRecordDecl(CXXRecordDecl *D) { } else { Record.push_back(0); } + // For lambdas inside canonical FunctionDecl remember the mapping. + if (auto FD = llvm::dyn_cast_or_null(D->getDeclContext()); + FD && FD->isCanonicalDecl()) { + Writer.FunctionToLambdasMap[FD].push_back(Writer.GetDeclRef(D)); + } } else { Record.push_back(CXXRecNotTemplate); } diff --git a/clang/lib/Serialization/ModuleManager.cpp b/clang/lib/Serialization/ModuleManager.cpp index 51b6429412960..e74a16b636802 100644 --- a/clang/lib/Serialization/ModuleManager.cpp +++ b/clang/lib/Serialization/ModuleManager.cpp @@ -42,8 +42,8 @@ using namespace clang; using namespace serialization; ModuleFile *ModuleManager::lookupByFileName(StringRef Name) const { - auto Entry = FileMgr.getFile(Name, /*OpenFile=*/false, - /*CacheFailure=*/false); + auto Entry = FileMgr.getOptionalFileRef(Name, /*OpenFile=*/false, + /*CacheFailure=*/false); if (Entry) return lookup(*Entry); @@ -64,8 +64,8 @@ ModuleFile *ModuleManager::lookup(const FileEntry *File) const { std::unique_ptr ModuleManager::lookupBuffer(StringRef Name) { - auto Entry = FileMgr.getFile(Name, /*OpenFile=*/false, - /*CacheFailure=*/false); + auto Entry = FileMgr.getOptionalFileRef(Name, /*OpenFile=*/false, + /*CacheFailure=*/false); if (!Entry) return nullptr; return std::move(InMemoryBuffers[*Entry]); @@ -279,8 +279,8 @@ void ModuleManager::removeModules(ModuleIterator First) { void ModuleManager::addInMemoryBuffer(StringRef FileName, std::unique_ptr Buffer) { - const FileEntry *Entry = - FileMgr.getVirtualFile(FileName, Buffer->getBufferSize(), 0); + FileEntryRef Entry = + FileMgr.getVirtualFileRef(FileName, Buffer->getBufferSize(), 0); InMemoryBuffers[Entry] = std::move(Buffer); } diff --git a/clang/lib/StaticAnalyzer/Core/ProgramState.cpp b/clang/lib/StaticAnalyzer/Core/ProgramState.cpp index e6d3399a21942..0be2709f0907d 100644 --- a/clang/lib/StaticAnalyzer/Core/ProgramState.cpp +++ b/clang/lib/StaticAnalyzer/Core/ProgramState.cpp @@ -147,30 +147,24 @@ ProgramState::bindDefaultZero(SVal loc, const LocationContext *LCtx) const { typedef ArrayRef RegionList; typedef ArrayRef ValueList; -ProgramStateRef -ProgramState::invalidateRegions(RegionList Regions, - const Expr *E, unsigned Count, - const LocationContext *LCtx, - bool CausedByPointerEscape, - InvalidatedSymbols *IS, - const CallEvent *Call, - RegionAndSymbolInvalidationTraits *ITraits) const { +ProgramStateRef ProgramState::invalidateRegions( + RegionList Regions, const Stmt *S, unsigned Count, + const LocationContext *LCtx, bool CausedByPointerEscape, + InvalidatedSymbols *IS, const CallEvent *Call, + RegionAndSymbolInvalidationTraits *ITraits) const { SmallVector Values; for (const MemRegion *Reg : Regions) Values.push_back(loc::MemRegionVal(Reg)); - return invalidateRegions(Values, E, Count, LCtx, CausedByPointerEscape, IS, + return invalidateRegions(Values, S, Count, LCtx, CausedByPointerEscape, IS, Call, ITraits); } -ProgramStateRef -ProgramState::invalidateRegions(ValueList Values, - const Expr *E, unsigned Count, - const LocationContext *LCtx, - bool CausedByPointerEscape, - InvalidatedSymbols *IS, - const CallEvent *Call, - RegionAndSymbolInvalidationTraits *ITraits) const { +ProgramStateRef ProgramState::invalidateRegions( + ValueList Values, const Stmt *S, unsigned Count, + const LocationContext *LCtx, bool CausedByPointerEscape, + InvalidatedSymbols *IS, const CallEvent *Call, + RegionAndSymbolInvalidationTraits *ITraits) const { ProgramStateManager &Mgr = getStateManager(); ExprEngine &Eng = Mgr.getOwningEngine(); @@ -186,7 +180,7 @@ ProgramState::invalidateRegions(ValueList Values, StoreManager::InvalidatedRegions TopLevelInvalidated; StoreManager::InvalidatedRegions Invalidated; const StoreRef &NewStore = Mgr.StoreMgr->invalidateRegions( - getStore(), Values, E, Count, LCtx, Call, *IS, *ITraits, + getStore(), Values, S, Count, LCtx, Call, *IS, *ITraits, &TopLevelInvalidated, &Invalidated); ProgramStateRef NewState = makeWithStore(NewStore); diff --git a/clang/lib/StaticAnalyzer/Core/RegionStore.cpp b/clang/lib/StaticAnalyzer/Core/RegionStore.cpp index c257a87dff385..674099dd7e1f0 100644 --- a/clang/lib/StaticAnalyzer/Core/RegionStore.cpp +++ b/clang/lib/StaticAnalyzer/Core/RegionStore.cpp @@ -405,19 +405,15 @@ class RegionStoreManager : public StoreManager { //===-------------------------------------------------------------------===// // Binding values to regions. //===-------------------------------------------------------------------===// - RegionBindingsRef invalidateGlobalRegion(MemRegion::Kind K, - const Expr *Ex, + RegionBindingsRef invalidateGlobalRegion(MemRegion::Kind K, const Stmt *S, unsigned Count, const LocationContext *LCtx, RegionBindingsRef B, InvalidatedRegions *Invalidated); - StoreRef invalidateRegions(Store store, - ArrayRef Values, - const Expr *E, unsigned Count, - const LocationContext *LCtx, - const CallEvent *Call, - InvalidatedSymbols &IS, + StoreRef invalidateRegions(Store store, ArrayRef Values, const Stmt *S, + unsigned Count, const LocationContext *LCtx, + const CallEvent *Call, InvalidatedSymbols &IS, RegionAndSymbolInvalidationTraits &ITraits, InvalidatedRegions *Invalidated, InvalidatedRegions *InvalidatedTopLevel) override; @@ -975,7 +971,7 @@ RegionStoreManager::removeSubRegionBindings(RegionBindingsConstRef B, namespace { class InvalidateRegionsWorker : public ClusterAnalysis { - const Expr *Ex; + const Stmt *S; unsigned Count; const LocationContext *LCtx; InvalidatedSymbols &IS; @@ -983,18 +979,15 @@ class InvalidateRegionsWorker : public ClusterAnalysis StoreManager::InvalidatedRegions *Regions; GlobalsFilterKind GlobalsFilter; public: - InvalidateRegionsWorker(RegionStoreManager &rm, - ProgramStateManager &stateMgr, - RegionBindingsRef b, - const Expr *ex, unsigned count, - const LocationContext *lctx, - InvalidatedSymbols &is, + InvalidateRegionsWorker(RegionStoreManager &rm, ProgramStateManager &stateMgr, + RegionBindingsRef b, const Stmt *S, unsigned count, + const LocationContext *lctx, InvalidatedSymbols &is, RegionAndSymbolInvalidationTraits &ITraitsIn, StoreManager::InvalidatedRegions *r, GlobalsFilterKind GFK) - : ClusterAnalysis(rm, stateMgr, b), - Ex(ex), Count(count), LCtx(lctx), IS(is), ITraits(ITraitsIn), Regions(r), - GlobalsFilter(GFK) {} + : ClusterAnalysis(rm, stateMgr, b), S(S), + Count(count), LCtx(lctx), IS(is), ITraits(ITraitsIn), Regions(r), + GlobalsFilter(GFK) {} void VisitCluster(const MemRegion *baseR, const ClusterBindings *C); void VisitBinding(SVal V); @@ -1127,7 +1120,7 @@ void InvalidateRegionsWorker::VisitCluster(const MemRegion *baseR, // Invalidate the region by setting its default value to // conjured symbol. The type of the symbol is irrelevant. DefinedOrUnknownSVal V = - svalBuilder.conjureSymbolVal(baseR, Ex, LCtx, Ctx.IntTy, Count); + svalBuilder.conjureSymbolVal(baseR, S, LCtx, Ctx.IntTy, Count); B = B.addBinding(baseR, BindingKey::Default, V); return; } @@ -1148,8 +1141,8 @@ void InvalidateRegionsWorker::VisitCluster(const MemRegion *baseR, if (T->isRecordType()) { // Invalidate the region by setting its default value to // conjured symbol. The type of the symbol is irrelevant. - DefinedOrUnknownSVal V = svalBuilder.conjureSymbolVal(baseR, Ex, LCtx, - Ctx.IntTy, Count); + DefinedOrUnknownSVal V = + svalBuilder.conjureSymbolVal(baseR, S, LCtx, Ctx.IntTy, Count); B = B.addBinding(baseR, BindingKey::Default, V); return; } @@ -1216,15 +1209,14 @@ void InvalidateRegionsWorker::VisitCluster(const MemRegion *baseR, } conjure_default: // Set the default value of the array to conjured symbol. - DefinedOrUnknownSVal V = - svalBuilder.conjureSymbolVal(baseR, Ex, LCtx, - AT->getElementType(), Count); - B = B.addBinding(baseR, BindingKey::Default, V); - return; + DefinedOrUnknownSVal V = svalBuilder.conjureSymbolVal( + baseR, S, LCtx, AT->getElementType(), Count); + B = B.addBinding(baseR, BindingKey::Default, V); + return; } - DefinedOrUnknownSVal V = svalBuilder.conjureSymbolVal(baseR, Ex, LCtx, - T,Count); + DefinedOrUnknownSVal V = + svalBuilder.conjureSymbolVal(baseR, S, LCtx, T, Count); assert(SymbolManager::canSymbolicate(T) || V.isUnknown()); B = B.addBinding(baseR, BindingKey::Direct, V); } @@ -1252,19 +1244,16 @@ bool InvalidateRegionsWorker::includeEntireMemorySpace(const MemRegion *Base) { RegionAndSymbolInvalidationTraits::TK_EntireMemSpace); } -RegionBindingsRef -RegionStoreManager::invalidateGlobalRegion(MemRegion::Kind K, - const Expr *Ex, - unsigned Count, - const LocationContext *LCtx, - RegionBindingsRef B, - InvalidatedRegions *Invalidated) { +RegionBindingsRef RegionStoreManager::invalidateGlobalRegion( + MemRegion::Kind K, const Stmt *S, unsigned Count, + const LocationContext *LCtx, RegionBindingsRef B, + InvalidatedRegions *Invalidated) { // Bind the globals memory space to a new symbol that we will use to derive // the bindings for all globals. const GlobalsSpaceRegion *GS = MRMgr.getGlobalsRegion(K); - SVal V = svalBuilder.conjureSymbolVal(/* symbolTag = */ (const void*) GS, Ex, LCtx, - /* type does not matter */ Ctx.IntTy, - Count); + SVal V = + svalBuilder.conjureSymbolVal(/* symbolTag = */ (const void *)GS, S, LCtx, + /* type does not matter */ Ctx.IntTy, Count); B = B.removeBinding(GS) .addBinding(BindingKey::Make(GS, BindingKey::Default), V); @@ -1298,16 +1287,11 @@ void RegionStoreManager::populateWorkList(InvalidateRegionsWorker &W, } } -StoreRef -RegionStoreManager::invalidateRegions(Store store, - ArrayRef Values, - const Expr *Ex, unsigned Count, - const LocationContext *LCtx, - const CallEvent *Call, - InvalidatedSymbols &IS, - RegionAndSymbolInvalidationTraits &ITraits, - InvalidatedRegions *TopLevelRegions, - InvalidatedRegions *Invalidated) { +StoreRef RegionStoreManager::invalidateRegions( + Store store, ArrayRef Values, const Stmt *S, unsigned Count, + const LocationContext *LCtx, const CallEvent *Call, InvalidatedSymbols &IS, + RegionAndSymbolInvalidationTraits &ITraits, + InvalidatedRegions *TopLevelRegions, InvalidatedRegions *Invalidated) { GlobalsFilterKind GlobalsFilter; if (Call) { if (Call->isInSystemHeader()) @@ -1319,7 +1303,7 @@ RegionStoreManager::invalidateRegions(Store store, } RegionBindingsRef B = getRegionBindings(store); - InvalidateRegionsWorker W(*this, StateMgr, B, Ex, Count, LCtx, IS, ITraits, + InvalidateRegionsWorker W(*this, StateMgr, B, S, Count, LCtx, IS, ITraits, Invalidated, GlobalsFilter); // Scan the bindings and generate the clusters. @@ -1339,12 +1323,12 @@ RegionStoreManager::invalidateRegions(Store store, // TODO: This could possibly be more precise with modules. switch (GlobalsFilter) { case GFK_All: - B = invalidateGlobalRegion(MemRegion::GlobalInternalSpaceRegionKind, - Ex, Count, LCtx, B, Invalidated); + B = invalidateGlobalRegion(MemRegion::GlobalInternalSpaceRegionKind, S, + Count, LCtx, B, Invalidated); [[fallthrough]]; case GFK_SystemOnly: - B = invalidateGlobalRegion(MemRegion::GlobalSystemSpaceRegionKind, - Ex, Count, LCtx, B, Invalidated); + B = invalidateGlobalRegion(MemRegion::GlobalSystemSpaceRegionKind, S, Count, + LCtx, B, Invalidated); [[fallthrough]]; case GFK_None: break; diff --git a/clang/lib/StaticAnalyzer/Core/SValBuilder.cpp b/clang/lib/StaticAnalyzer/Core/SValBuilder.cpp index 7eca0579143f4..cb5fcbade2cfc 100644 --- a/clang/lib/StaticAnalyzer/Core/SValBuilder.cpp +++ b/clang/lib/StaticAnalyzer/Core/SValBuilder.cpp @@ -174,7 +174,7 @@ DefinedOrUnknownSVal SValBuilder::conjureSymbolVal(const void *SymbolTag, } DefinedOrUnknownSVal SValBuilder::conjureSymbolVal(const void *symbolTag, - const Expr *expr, + const Stmt *St, const LocationContext *LCtx, QualType type, unsigned count) { @@ -184,7 +184,7 @@ DefinedOrUnknownSVal SValBuilder::conjureSymbolVal(const void *symbolTag, if (!SymbolManager::canSymbolicate(type)) return UnknownVal(); - SymbolRef sym = SymMgr.conjureSymbol(expr, LCtx, type, count, symbolTag); + SymbolRef sym = SymMgr.conjureSymbol(St, LCtx, type, count, symbolTag); if (Loc::isLocType(type)) return loc::MemRegionVal(MemMgr.getSymbolicRegion(sym)); diff --git a/clang/lib/Tooling/Core/Replacement.cpp b/clang/lib/Tooling/Core/Replacement.cpp index 89a5b15244274..92e9859ca206e 100644 --- a/clang/lib/Tooling/Core/Replacement.cpp +++ b/clang/lib/Tooling/Core/Replacement.cpp @@ -614,7 +614,7 @@ std::map groupReplacementsByFile( std::map Result; llvm::SmallPtrSet ProcessedFileEntries; for (const auto &Entry : FileToReplaces) { - auto FE = FileMgr.getFile(Entry.first); + auto FE = FileMgr.getOptionalFileRef(Entry.first); if (!FE) llvm::errs() << "File path " << Entry.first << " is invalid.\n"; else if (ProcessedFileEntries.insert(*FE).second) diff --git a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp index c775adc0ddd73..677f426590ab9 100644 --- a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp +++ b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp @@ -241,7 +241,7 @@ ModuleDepCollector::getInvocationAdjustedForModuleBuildWithoutOutputs( ModuleMapInputKind); auto CurrentModuleMapEntry = - ScanInstance.getFileManager().getFile(Deps.ClangModuleMapFile); + ScanInstance.getFileManager().getOptionalFileRef(Deps.ClangModuleMapFile); assert(CurrentModuleMapEntry && "module map file entry not found"); // Remove directly passed modulemap files. They will get added back if they @@ -251,7 +251,8 @@ ModuleDepCollector::getInvocationAdjustedForModuleBuildWithoutOutputs( auto DepModuleMapFiles = collectModuleMapFiles(Deps.ClangModuleDeps); for (StringRef ModuleMapFile : Deps.ModuleMapFileDeps) { // TODO: Track these as `FileEntryRef` to simplify the equality check below. - auto ModuleMapEntry = ScanInstance.getFileManager().getFile(ModuleMapFile); + auto ModuleMapEntry = + ScanInstance.getFileManager().getOptionalFileRef(ModuleMapFile); assert(ModuleMapEntry && "module map file entry not found"); // Don't report module maps describing eagerly-loaded dependency. This @@ -299,7 +300,8 @@ llvm::DenseSet ModuleDepCollector::collectModuleMapFiles( ModuleDeps *MD = ModuleDepsByID.lookup(MID); assert(MD && "Inconsistent dependency info"); // TODO: Track ClangModuleMapFile as `FileEntryRef`. - auto FE = ScanInstance.getFileManager().getFile(MD->ClangModuleMapFile); + auto FE = ScanInstance.getFileManager().getOptionalFileRef( + MD->ClangModuleMapFile); assert(FE && "Missing module map file that was previously found"); ModuleMapFiles.insert(*FE); } diff --git a/clang/test/AST/ByteCode/placement-new.cpp b/clang/test/AST/ByteCode/placement-new.cpp index 7a562adae02a6..1ff6ff3ac1922 100644 --- a/clang/test/AST/ByteCode/placement-new.cpp +++ b/clang/test/AST/ByteCode/placement-new.cpp @@ -13,7 +13,8 @@ namespace std { }; template constexpr void construct_at(void *p, Args &&...args) { - new (p) T((Args&&)args...); // both-note {{in call to}} + new (p) T((Args&&)args...); // both-note {{in call to}} \ + // both-note {{placement new would change type of storage from 'int' to 'float'}} } } @@ -260,4 +261,13 @@ namespace ConstructAt { static_assert(ctorFail()); // both-error {{not an integral constant expression}} \ // both-note {{in call to 'ctorFail()'}} + + constexpr bool bad_construct_at_type() { + int a; + std::construct_at(&a, 1.0f); // both-note {{in call to}} + return true; + } + static_assert(bad_construct_at_type()); // both-error {{not an integral constant expression}} \ + // both-note {{in call}} + } diff --git a/clang/test/Analysis/ctu-on-demand-parsing.c b/clang/test/Analysis/ctu-on-demand-parsing.c index 72288def61b13..17ade150ded5e 100644 --- a/clang/test/Analysis/ctu-on-demand-parsing.c +++ b/clang/test/Analysis/ctu-on-demand-parsing.c @@ -24,6 +24,7 @@ // // FIXME: Path handling should work on all platforms. // REQUIRES: system-linux +// UNSUPPORTED: target={{.*}}-zos{{.*}} void clang_analyzer_eval(int); diff --git a/clang/test/Analysis/ctu-on-demand-parsing.cpp b/clang/test/Analysis/ctu-on-demand-parsing.cpp index d28d3c22c69b0..0c0128faefaea 100644 --- a/clang/test/Analysis/ctu-on-demand-parsing.cpp +++ b/clang/test/Analysis/ctu-on-demand-parsing.cpp @@ -35,6 +35,7 @@ // // FIXME: Path handling should work on all platforms. // REQUIRES: system-linux +// UNSUPPORTED: target={{.*}}-zos{{.*}} #include "ctu-hdr.h" diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vcreate.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vcreate.c index 4cf8bbf6c61ee..e2d493979732b 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vcreate.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vcreate.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 3 // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \ -// RUN: -target-feature +zvfh -disable-O0-optnone \ +// RUN: -target-feature +zvfhmin -disable-O0-optnone \ // RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ // RUN: FileCheck --check-prefix=CHECK-RV64 %s diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vget.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vget.c index a0e6555c03913..a1ddfc3a92c80 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vget.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vget.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2 // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \ -// RUN: -target-feature +zvfh -disable-O0-optnone \ +// RUN: -target-feature +zvfhmin -disable-O0-optnone \ // RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ // RUN: FileCheck --check-prefix=CHECK-RV64 %s diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vlmul_ext_v.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vlmul_ext_v.c index e6287775ed419..69471904720f8 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vlmul_ext_v.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vlmul_ext_v.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2 // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \ -// RUN: -target-feature +zvfh -disable-O0-optnone \ +// RUN: -target-feature +zvfhmin -disable-O0-optnone \ // RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ // RUN: FileCheck --check-prefix=CHECK-RV64 %s diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vlmul_trunc_v.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vlmul_trunc_v.c index dea288bdf4328..a3e8ab87d06a8 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vlmul_trunc_v.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vlmul_trunc_v.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2 // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \ -// RUN: -target-feature +zvfh -disable-O0-optnone \ +// RUN: -target-feature +zvfhmin -disable-O0-optnone \ // RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ // RUN: FileCheck --check-prefix=CHECK-RV64 %s diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vset.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vset.c index 06ccd3125c083..88b00653c56eb 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vset.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vset.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2 // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \ -// RUN: -target-feature +zvfh -disable-O0-optnone \ +// RUN: -target-feature +zvfhmin -disable-O0-optnone \ // RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ // RUN: FileCheck --check-prefix=CHECK-RV64 %s diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vundefined.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vundefined.c index 5950068181abf..f18b999e89232 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vundefined.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vundefined.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 3 // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \ -// RUN: -target-feature +zvfh -disable-O0-optnone \ +// RUN: -target-feature +zvfhmin -disable-O0-optnone \ // RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ // RUN: FileCheck --check-prefix=CHECK-RV64 %s diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vget.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vget.c index e156ec91bfd2c..afc9cff8dec04 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vget.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vget.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2 // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \ -// RUN: -target-feature +zvfh -disable-O0-optnone \ +// RUN: -target-feature +zvfhmin -disable-O0-optnone \ // RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ // RUN: FileCheck --check-prefix=CHECK-RV64 %s diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vlmul_ext_v.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vlmul_ext_v.c index 92b894f1f5ef5..8a01f5ebdbcfe 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vlmul_ext_v.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vlmul_ext_v.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2 // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \ -// RUN: -target-feature +zvfh -disable-O0-optnone \ +// RUN: -target-feature +zvfhmin -disable-O0-optnone \ // RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ // RUN: FileCheck --check-prefix=CHECK-RV64 %s diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vlmul_trunc_v.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vlmul_trunc_v.c index fa923d87bd1ba..18f6901073a1e 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vlmul_trunc_v.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vlmul_trunc_v.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2 // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \ -// RUN: -target-feature +zvfh -disable-O0-optnone \ +// RUN: -target-feature +zvfhmin -disable-O0-optnone \ // RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ // RUN: FileCheck --check-prefix=CHECK-RV64 %s diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vset.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vset.c index cc5a32878bd90..b63fa52fa3039 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vset.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vset.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2 // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \ -// RUN: -target-feature +zvfh -disable-O0-optnone \ +// RUN: -target-feature +zvfhmin -disable-O0-optnone \ // RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ // RUN: FileCheck --check-prefix=CHECK-RV64 %s diff --git a/clang/test/CodeGen/aarch64-fp8-intrinsics/acle_sme2_fp8_scale.c b/clang/test/CodeGen/aarch64-fp8-intrinsics/acle_sme2_fp8_scale.c new file mode 100644 index 0000000000000..6bcf9bc946b20 --- /dev/null +++ b/clang/test/CodeGen/aarch64-fp8-intrinsics/acle_sme2_fp8_scale.c @@ -0,0 +1,416 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +fp8 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +fp8 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +fp8 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +fp8 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +fp8 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED) A1 +#else +#define SVE_ACLE_FUNC(A1,A2) A1##A2 +#endif + + +// Single x2 +// CHECK-LABEL: @test_svscale_single_f16_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP1_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP1_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sme.fp8.scale.single.x2.nxv8f16( [[TMP2]], [[TMP3]], [[OP2:%.*]]) +// CHECK-NEXT: ret { , } [[TMP4]] +// +// CPP-CHECK-LABEL: @_Z26test_svscale_single_f16_x213svfloat16x2_tu11__SVInt16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP1_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP1_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sme.fp8.scale.single.x2.nxv8f16( [[TMP2]], [[TMP3]], [[OP2:%.*]]) +// CPP-CHECK-NEXT: ret { , } [[TMP4]] +// +svfloat16x2_t test_svscale_single_f16_x2(svfloat16x2_t op1, svint16_t op2) __arm_streaming +{ + return SVE_ACLE_FUNC(svscale,_single_f16_x2)(op1, op2); +} + +// CHECK-LABEL: @test_svscale_single_f32_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP1_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP1_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sme.fp8.scale.single.x2.nxv4f32( [[TMP2]], [[TMP3]], [[OP2:%.*]]) +// CHECK-NEXT: ret { , } [[TMP4]] +// +// CPP-CHECK-LABEL: @_Z26test_svscale_single_f32_x213svfloat32x2_tu11__SVInt32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP1_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP1_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sme.fp8.scale.single.x2.nxv4f32( [[TMP2]], [[TMP3]], [[OP2:%.*]]) +// CPP-CHECK-NEXT: ret { , } [[TMP4]] +// +svfloat32x2_t test_svscale_single_f32_x2(svfloat32x2_t op1, svint32_t op2) __arm_streaming +{ + return SVE_ACLE_FUNC(svscale,_single_f32_x2)(op1, op2); +} + +// CHECK-LABEL: @test_svscale_single_f64_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP1_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP1_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sme.fp8.scale.single.x2.nxv2f64( [[TMP2]], [[TMP3]], [[OP2:%.*]]) +// CHECK-NEXT: ret { , } [[TMP4]] +// +// CPP-CHECK-LABEL: @_Z26test_svscale_single_f64_x213svfloat64x2_tu11__SVInt64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP1_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP1_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sme.fp8.scale.single.x2.nxv2f64( [[TMP2]], [[TMP3]], [[OP2:%.*]]) +// CPP-CHECK-NEXT: ret { , } [[TMP4]] +// +svfloat64x2_t test_svscale_single_f64_x2(svfloat64x2_t op1, svint64_t op2) __arm_streaming +{ + return SVE_ACLE_FUNC(svscale,_single_f64_x2)(op1, op2); +} + +// Single x4 +// CHECK-LABEL: @test_svscale_single_f16_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP1_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP1_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP1_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP1_COERCE3:%.*]], 3 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sme.fp8.scale.single.x4.nxv8f16( [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]], [[OP2:%.*]]) +// CHECK-NEXT: ret { , , , } [[TMP8]] +// +// CPP-CHECK-LABEL: @_Z26test_svscale_single_f16_x413svfloat16x4_tu11__SVInt16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP1_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP1_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP1_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP1_COERCE3:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sme.fp8.scale.single.x4.nxv8f16( [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]], [[OP2:%.*]]) +// CPP-CHECK-NEXT: ret { , , , } [[TMP8]] +// +svfloat16x4_t test_svscale_single_f16_x4(svfloat16x4_t op1, svint16_t op2) __arm_streaming +{ + return SVE_ACLE_FUNC(svscale,_single_f16_x4)(op1, op2); +} + +// CHECK-LABEL: @test_svscale_single_f32_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP1_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP1_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP1_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP1_COERCE3:%.*]], 3 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sme.fp8.scale.single.x4.nxv4f32( [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]], [[OP2:%.*]]) +// CHECK-NEXT: ret { , , , } [[TMP8]] +// +// CPP-CHECK-LABEL: @_Z26test_svscale_single_f32_x413svfloat32x4_tu11__SVInt32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP1_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP1_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP1_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP1_COERCE3:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sme.fp8.scale.single.x4.nxv4f32( [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]], [[OP2:%.*]]) +// CPP-CHECK-NEXT: ret { , , , } [[TMP8]] +// +svfloat32x4_t test_svscale_single_f32_x4(svfloat32x4_t op1, svint32_t op2) __arm_streaming +{ + return SVE_ACLE_FUNC(svscale,_single_f32_x4)(op1, op2); +} + +// CHECK-LABEL: @test_svscale_single_f64_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP1_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP1_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP1_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP1_COERCE3:%.*]], 3 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sme.fp8.scale.single.x4.nxv2f64( [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]], [[OP2:%.*]]) +// CHECK-NEXT: ret { , , , } [[TMP8]] +// +// CPP-CHECK-LABEL: @_Z26test_svscale_single_f64_x413svfloat64x4_tu11__SVInt64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP1_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP1_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP1_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP1_COERCE3:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sme.fp8.scale.single.x4.nxv2f64( [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]], [[OP2:%.*]]) +// CPP-CHECK-NEXT: ret { , , , } [[TMP8]] +// +svfloat64x4_t test_svscale_single_f64_x4(svfloat64x4_t op1, svint64_t op2) __arm_streaming +{ + return SVE_ACLE_FUNC(svscale,_single_f64_x4)(op1, op2); +} + +// Multi x2 +// CHECK-LABEL: @test_svscale_f16_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP1_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP1_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , } poison, [[OP2_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } [[TMP2]], [[OP2_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP8:%.*]] = tail call { , } @llvm.aarch64.sme.fp8.scale.x2.nxv8f16( [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CHECK-NEXT: ret { , } [[TMP8]] +// +// CPP-CHECK-LABEL: @_Z19test_svscale_f16_x213svfloat16x2_t11svint16x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP1_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP1_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , } poison, [[OP2_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } [[TMP2]], [[OP2_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call { , } @llvm.aarch64.sme.fp8.scale.x2.nxv8f16( [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CPP-CHECK-NEXT: ret { , } [[TMP8]] +// +svfloat16x2_t test_svscale_f16_x2(svfloat16x2_t op1, svint16x2_t op2) __arm_streaming +{ + return SVE_ACLE_FUNC(svscale,_f16_x2)(op1, op2); +} + +// CHECK-LABEL: @test_svscale_f32_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP1_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP1_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , } poison, [[OP2_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } [[TMP2]], [[OP2_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP8:%.*]] = tail call { , } @llvm.aarch64.sme.fp8.scale.x2.nxv4f32( [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CHECK-NEXT: ret { , } [[TMP8]] +// +// CPP-CHECK-LABEL: @_Z19test_svscale_f32_x213svfloat32x2_t11svint32x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP1_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP1_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , } poison, [[OP2_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } [[TMP2]], [[OP2_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call { , } @llvm.aarch64.sme.fp8.scale.x2.nxv4f32( [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CPP-CHECK-NEXT: ret { , } [[TMP8]] +// +svfloat32x2_t test_svscale_f32_x2(svfloat32x2_t op1, svint32x2_t op2) __arm_streaming +{ + return SVE_ACLE_FUNC(svscale,_f32_x2)(op1, op2); +} + +// CHECK-LABEL: @test_svscale_f64_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP1_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP1_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , } poison, [[OP2_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } [[TMP2]], [[OP2_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP8:%.*]] = tail call { , } @llvm.aarch64.sme.fp8.scale.x2.nxv2f64( [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CHECK-NEXT: ret { , } [[TMP8]] +// +// CPP-CHECK-LABEL: @_Z19test_svscale_f64_x213svfloat64x2_t11svint64x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP1_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP1_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , } poison, [[OP2_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } [[TMP2]], [[OP2_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call { , } @llvm.aarch64.sme.fp8.scale.x2.nxv2f64( [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CPP-CHECK-NEXT: ret { , } [[TMP8]] +// +svfloat64x2_t test_svscale_f64_x2(svfloat64x2_t op1, svint64x2_t op2) __arm_streaming +{ + return SVE_ACLE_FUNC(svscale,_f64_x2)(op1, op2); +} + +// Multi x4 +// CHECK-LABEL: @test_svscale_f16_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP1_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP1_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP1_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP1_COERCE3:%.*]], 3 +// CHECK-NEXT: [[TMP4:%.*]] = insertvalue { , , , } poison, [[OP2_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = insertvalue { , , , } [[TMP4]], [[OP2_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , , , } [[TMP5]], [[OP2_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = insertvalue { , , , } [[TMP6]], [[OP2_COERCE3:%.*]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CHECK-NEXT: [[TMP12:%.*]] = extractvalue { , , , } [[TMP7]], 0 +// CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP7]], 1 +// CHECK-NEXT: [[TMP14:%.*]] = extractvalue { , , , } [[TMP7]], 2 +// CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP7]], 3 +// CHECK-NEXT: [[TMP16:%.*]] = tail call { , , , } @llvm.aarch64.sme.fp8.scale.x4.nxv8f16( [[TMP8]], [[TMP9]], [[TMP10]], [[TMP11]], [[TMP12]], [[TMP13]], [[TMP14]], [[TMP15]]) +// CHECK-NEXT: ret { , , , } [[TMP16]] +// +// CPP-CHECK-LABEL: @_Z19test_svscale_f16_x413svfloat16x4_t11svint16x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP1_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP1_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP1_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP1_COERCE3:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = insertvalue { , , , } poison, [[OP2_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = insertvalue { , , , } [[TMP4]], [[OP2_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , , , } [[TMP5]], [[OP2_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = insertvalue { , , , } [[TMP6]], [[OP2_COERCE3:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-CHECK-NEXT: [[TMP12:%.*]] = extractvalue { , , , } [[TMP7]], 0 +// CPP-CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP7]], 1 +// CPP-CHECK-NEXT: [[TMP14:%.*]] = extractvalue { , , , } [[TMP7]], 2 +// CPP-CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP7]], 3 +// CPP-CHECK-NEXT: [[TMP16:%.*]] = tail call { , , , } @llvm.aarch64.sme.fp8.scale.x4.nxv8f16( [[TMP8]], [[TMP9]], [[TMP10]], [[TMP11]], [[TMP12]], [[TMP13]], [[TMP14]], [[TMP15]]) +// CPP-CHECK-NEXT: ret { , , , } [[TMP16]] +// +svfloat16x4_t test_svscale_f16_x4(svfloat16x4_t op1, svint16x4_t op2) __arm_streaming +{ + return SVE_ACLE_FUNC(svscale,_f16_x4)(op1, op2); +} + +// CHECK-LABEL: @test_svscale_f32_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP1_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP1_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP1_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP1_COERCE3:%.*]], 3 +// CHECK-NEXT: [[TMP4:%.*]] = insertvalue { , , , } poison, [[OP2_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = insertvalue { , , , } [[TMP4]], [[OP2_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , , , } [[TMP5]], [[OP2_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = insertvalue { , , , } [[TMP6]], [[OP2_COERCE3:%.*]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CHECK-NEXT: [[TMP12:%.*]] = extractvalue { , , , } [[TMP7]], 0 +// CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP7]], 1 +// CHECK-NEXT: [[TMP14:%.*]] = extractvalue { , , , } [[TMP7]], 2 +// CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP7]], 3 +// CHECK-NEXT: [[TMP16:%.*]] = tail call { , , , } @llvm.aarch64.sme.fp8.scale.x4.nxv4f32( [[TMP8]], [[TMP9]], [[TMP10]], [[TMP11]], [[TMP12]], [[TMP13]], [[TMP14]], [[TMP15]]) +// CHECK-NEXT: ret { , , , } [[TMP16]] +// +// CPP-CHECK-LABEL: @_Z19test_svscale_f32_x413svfloat32x4_t11svint32x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP1_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP1_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP1_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP1_COERCE3:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = insertvalue { , , , } poison, [[OP2_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = insertvalue { , , , } [[TMP4]], [[OP2_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , , , } [[TMP5]], [[OP2_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = insertvalue { , , , } [[TMP6]], [[OP2_COERCE3:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-CHECK-NEXT: [[TMP12:%.*]] = extractvalue { , , , } [[TMP7]], 0 +// CPP-CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP7]], 1 +// CPP-CHECK-NEXT: [[TMP14:%.*]] = extractvalue { , , , } [[TMP7]], 2 +// CPP-CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP7]], 3 +// CPP-CHECK-NEXT: [[TMP16:%.*]] = tail call { , , , } @llvm.aarch64.sme.fp8.scale.x4.nxv4f32( [[TMP8]], [[TMP9]], [[TMP10]], [[TMP11]], [[TMP12]], [[TMP13]], [[TMP14]], [[TMP15]]) +// CPP-CHECK-NEXT: ret { , , , } [[TMP16]] +// +svfloat32x4_t test_svscale_f32_x4(svfloat32x4_t op1, svint32x4_t op2) __arm_streaming +{ + return SVE_ACLE_FUNC(svscale,_f32_x4)(op1, op2); +} + +// CHECK-LABEL: @test_svscale_f64_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP1_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP1_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP1_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP1_COERCE3:%.*]], 3 +// CHECK-NEXT: [[TMP4:%.*]] = insertvalue { , , , } poison, [[OP2_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = insertvalue { , , , } [[TMP4]], [[OP2_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , , , } [[TMP5]], [[OP2_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = insertvalue { , , , } [[TMP6]], [[OP2_COERCE3:%.*]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CHECK-NEXT: [[TMP12:%.*]] = extractvalue { , , , } [[TMP7]], 0 +// CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP7]], 1 +// CHECK-NEXT: [[TMP14:%.*]] = extractvalue { , , , } [[TMP7]], 2 +// CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP7]], 3 +// CHECK-NEXT: [[TMP16:%.*]] = tail call { , , , } @llvm.aarch64.sme.fp8.scale.x4.nxv2f64( [[TMP8]], [[TMP9]], [[TMP10]], [[TMP11]], [[TMP12]], [[TMP13]], [[TMP14]], [[TMP15]]) +// CHECK-NEXT: ret { , , , } [[TMP16]] +// +// CPP-CHECK-LABEL: @_Z19test_svscale_f64_x413svfloat64x4_t11svint64x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP1_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP1_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP1_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP1_COERCE3:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = insertvalue { , , , } poison, [[OP2_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = insertvalue { , , , } [[TMP4]], [[OP2_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , , , } [[TMP5]], [[OP2_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = insertvalue { , , , } [[TMP6]], [[OP2_COERCE3:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-CHECK-NEXT: [[TMP12:%.*]] = extractvalue { , , , } [[TMP7]], 0 +// CPP-CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP7]], 1 +// CPP-CHECK-NEXT: [[TMP14:%.*]] = extractvalue { , , , } [[TMP7]], 2 +// CPP-CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP7]], 3 +// CPP-CHECK-NEXT: [[TMP16:%.*]] = tail call { , , , } @llvm.aarch64.sme.fp8.scale.x4.nxv2f64( [[TMP8]], [[TMP9]], [[TMP10]], [[TMP11]], [[TMP12]], [[TMP13]], [[TMP14]], [[TMP15]]) +// CPP-CHECK-NEXT: ret { , , , } [[TMP16]] +// +svfloat64x4_t test_svscale_f64_x4(svfloat64x4_t op1, svint64x4_t op2) __arm_streaming +{ + return SVE_ACLE_FUNC(svscale,_f64_x4)(op1, op2); +} diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_clamp.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_clamp.c index 9c639984305d1..1297185c4b50e 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_clamp.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_clamp.c @@ -24,27 +24,13 @@ // CHECK-LABEL: @test_svclamp_single_s8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sclamp.single.x2.nxv16i8( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svclamp_single_s8_x210svint8x2_tu10__SVInt8_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sclamp.single.x2.nxv16i8( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint8x2_t test_svclamp_single_s8_x2(svint8x2_t op1, svint8_t op2, svint8_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_s8_x2, , )(op1, op2, op3); @@ -52,27 +38,13 @@ svint8x2_t test_svclamp_single_s8_x2(svint8x2_t op1, svint8_t op2, svint8_t op3) // CHECK-LABEL: @test_svclamp_single_s16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sclamp.single.x2.nxv8i16( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svclamp_single_s16_x211svint16x2_tu11__SVInt16_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sclamp.single.x2.nxv8i16( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint16x2_t test_svclamp_single_s16_x2(svint16x2_t op1, svint16_t op2, svint16_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_s16_x2, , )(op1, op2, op3); @@ -80,27 +52,13 @@ svint16x2_t test_svclamp_single_s16_x2(svint16x2_t op1, svint16_t op2, svint16_t // CHECK-LABEL: @test_svclamp_single_s32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sclamp.single.x2.nxv4i32( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svclamp_single_s32_x211svint32x2_tu11__SVInt32_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sclamp.single.x2.nxv4i32( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint32x2_t test_svclamp_single_s32_x2(svint32x2_t op1, svint32_t op2, svint32_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_s32_x2, , )(op1, op2, op3); @@ -108,27 +66,13 @@ svint32x2_t test_svclamp_single_s32_x2(svint32x2_t op1, svint32_t op2, svint32_t // CHECK-LABEL: @test_svclamp_single_s64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sclamp.single.x2.nxv2i64( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svclamp_single_s64_x211svint64x2_tu11__SVInt64_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sclamp.single.x2.nxv2i64( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint64x2_t test_svclamp_single_s64_x2(svint64x2_t op1, svint64_t op2, svint64_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_s64_x2, , )(op1, op2, op3); @@ -139,35 +83,13 @@ svint64x2_t test_svclamp_single_s64_x2(svint64x2_t op1, svint64_t op2, svint64_t // CHECK-LABEL: @test_svclamp_single_s8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sclamp.single.x4.nxv16i8( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svclamp_single_s8_x410svint8x4_tu10__SVInt8_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sclamp.single.x4.nxv16i8( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint8x4_t test_svclamp_single_s8_x4(svint8x4_t op1, svint8_t op2, svint8_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_s8_x4, , )(op1, op2, op3); @@ -175,35 +97,13 @@ svint8x4_t test_svclamp_single_s8_x4(svint8x4_t op1, svint8_t op2, svint8_t op3) // CHECK-LABEL: @test_svclamp_single_s16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sclamp.single.x4.nxv8i16( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svclamp_single_s16_x411svint16x4_tu11__SVInt16_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sclamp.single.x4.nxv8i16( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint16x4_t test_svclamp_single_s16_x4(svint16x4_t op1, svint16_t op2, svint16_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_s16_x4, , )(op1, op2, op3); @@ -211,35 +111,13 @@ svint16x4_t test_svclamp_single_s16_x4(svint16x4_t op1, svint16_t op2, svint16_t // CHECK-LABEL: @test_svclamp_single_s32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sclamp.single.x4.nxv4i32( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svclamp_single_s32_x411svint32x4_tu11__SVInt32_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sclamp.single.x4.nxv4i32( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint32x4_t test_svclamp_single_s32_x4(svint32x4_t op1, svint32_t op2, svint32_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_s32_x4, , )(op1, op2, op3); @@ -247,35 +125,13 @@ svint32x4_t test_svclamp_single_s32_x4(svint32x4_t op1, svint32_t op2, svint32_t // CHECK-LABEL: @test_svclamp_single_s64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sclamp.single.x4.nxv2i64( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svclamp_single_s64_x411svint64x4_tu11__SVInt64_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sclamp.single.x4.nxv2i64( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint64x4_t test_svclamp_single_s64_x4(svint64x4_t op1, svint64_t op2, svint64_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_s64_x4, , )(op1, op2, op3); @@ -288,27 +144,13 @@ svint64x4_t test_svclamp_single_s64_x4(svint64x4_t op1, svint64_t op2, svint64_t // CHECK-LABEL: @test_svclamp_single_u8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uclamp.single.x2.nxv16i8( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svclamp_single_u8_x211svuint8x2_tu11__SVUint8_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uclamp.single.x2.nxv16i8( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint8x2_t test_svclamp_single_u8_x2(svuint8x2_t op1, svuint8_t op2, svuint8_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_u8_x2, , )(op1, op2, op3); @@ -316,27 +158,13 @@ svuint8x2_t test_svclamp_single_u8_x2(svuint8x2_t op1, svuint8_t op2, svuint8_t // CHECK-LABEL: @test_svclamp_single_u16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uclamp.single.x2.nxv8i16( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svclamp_single_u16_x212svuint16x2_tu12__SVUint16_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uclamp.single.x2.nxv8i16( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint16x2_t test_svclamp_single_u16_x2(svuint16x2_t op1, svuint16_t op2, svuint16_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_u16_x2, , )(op1, op2, op3); @@ -344,27 +172,13 @@ svuint16x2_t test_svclamp_single_u16_x2(svuint16x2_t op1, svuint16_t op2, svuint // CHECK-LABEL: @test_svclamp_single_u32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uclamp.single.x2.nxv4i32( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svclamp_single_u32_x212svuint32x2_tu12__SVUint32_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uclamp.single.x2.nxv4i32( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint32x2_t test_svclamp_single_u32_x2(svuint32x2_t op1, svuint32_t op2, svuint32_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_u32_x2, , )(op1, op2, op3); @@ -372,27 +186,13 @@ svuint32x2_t test_svclamp_single_u32_x2(svuint32x2_t op1, svuint32_t op2, svuint // CHECK-LABEL: @test_svclamp_single_u64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uclamp.single.x2.nxv2i64( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svclamp_single_u64_x212svuint64x2_tu12__SVUint64_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uclamp.single.x2.nxv2i64( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint64x2_t test_svclamp_single_u64_x2(svuint64x2_t op1, svuint64_t op2, svuint64_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_u64_x2, , )(op1, op2, op3); @@ -403,35 +203,13 @@ svuint64x2_t test_svclamp_single_u64_x2(svuint64x2_t op1, svuint64_t op2, svuint // CHECK-LABEL: @test_svclamp_single_u8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uclamp.single.x4.nxv16i8( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svclamp_single_u8_x411svuint8x4_tu11__SVUint8_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uclamp.single.x4.nxv16i8( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint8x4_t test_svclamp_single_u8_x4(svuint8x4_t op1, svuint8_t op2, svuint8_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_u8_x4, , )(op1, op2, op3); @@ -439,35 +217,13 @@ svuint8x4_t test_svclamp_single_u8_x4(svuint8x4_t op1, svuint8_t op2, svuint8_t // CHECK-LABEL: @test_svclamp_single_u16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uclamp.single.x4.nxv8i16( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svclamp_single_u16_x412svuint16x4_tu12__SVUint16_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uclamp.single.x4.nxv8i16( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint16x4_t test_svclamp_single_u16_x4(svuint16x4_t op1, svuint16_t op2, svuint16_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_u16_x4, , )(op1, op2, op3); @@ -475,35 +231,13 @@ svuint16x4_t test_svclamp_single_u16_x4(svuint16x4_t op1, svuint16_t op2, svuint // CHECK-LABEL: @test_svclamp_single_u32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uclamp.single.x4.nxv4i32( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svclamp_single_u32_x412svuint32x4_tu12__SVUint32_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uclamp.single.x4.nxv4i32( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint32x4_t test_svclamp_single_u32_x4(svuint32x4_t op1, svuint32_t op2, svuint32_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_u32_x4, , )(op1, op2, op3); @@ -511,35 +245,13 @@ svuint32x4_t test_svclamp_single_u32_x4(svuint32x4_t op1, svuint32_t op2, svuint // CHECK-LABEL: @test_svclamp_single_u64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uclamp.single.x4.nxv2i64( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svclamp_single_u64_x412svuint64x4_tu12__SVUint64_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uclamp.single.x4.nxv2i64( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint64x4_t test_svclamp_single_u64_x4(svuint64x4_t op1, svuint64_t op2, svuint64_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_u64_x4, , )(op1, op2, op3); @@ -552,27 +264,13 @@ svuint64x4_t test_svclamp_single_u64_x4(svuint64x4_t op1, svuint64_t op2, svuint // CHECK-LABEL: @test_svclamp_single_f16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fclamp.single.x2.nxv8f16( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svclamp_single_f16_x213svfloat16x2_tu13__SVFloat16_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fclamp.single.x2.nxv8f16( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat16x2_t test_svclamp_single_f16_x2(svfloat16x2_t op1, svfloat16_t op2, svfloat16_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_f16_x2, , )(op1, op2, op3); @@ -580,27 +278,13 @@ svfloat16x2_t test_svclamp_single_f16_x2(svfloat16x2_t op1, svfloat16_t op2, svf // CHECK-LABEL: @test_svclamp_single_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fclamp.single.x2.nxv4f32( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svclamp_single_f32_x213svfloat32x2_tu13__SVFloat32_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fclamp.single.x2.nxv4f32( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svclamp_single_f32_x2(svfloat32x2_t op1, svfloat32_t op2, svfloat32_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_f32_x2, , )(op1, op2, op3); @@ -609,27 +293,13 @@ svfloat32x2_t test_svclamp_single_f32_x2(svfloat32x2_t op1, svfloat32_t op2, svf // CHECK-LABEL: @test_svclamp_single_f64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fclamp.single.x2.nxv2f64( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svclamp_single_f64_x213svfloat64x2_tu13__SVFloat64_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fclamp.single.x2.nxv2f64( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat64x2_t test_svclamp_single_f64_x2(svfloat64x2_t op1, svfloat64_t op2, svfloat64_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_f64_x2, , )(op1, op2, op3); @@ -640,35 +310,13 @@ svfloat64x2_t test_svclamp_single_f64_x2(svfloat64x2_t op1, svfloat64_t op2, svf // CHECK-LABEL: @test_svclamp_single_f16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fclamp.single.x4.nxv8f16( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svclamp_single_f16_x413svfloat16x4_tu13__SVFloat16_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fclamp.single.x4.nxv8f16( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat16x4_t test_svclamp_single_f16_x4(svfloat16x4_t op1, svfloat16_t op2, svfloat16_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_f16_x4, , )(op1, op2, op3); @@ -676,35 +324,13 @@ svfloat16x4_t test_svclamp_single_f16_x4(svfloat16x4_t op1, svfloat16_t op2, svf // CHECK-LABEL: @test_svclamp_single_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fclamp.single.x4.nxv4f32( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svclamp_single_f32_x413svfloat32x4_tu13__SVFloat32_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fclamp.single.x4.nxv4f32( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svclamp_single_f32_x4(svfloat32x4_t op1, svfloat32_t op2, svfloat32_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_f32_x4, , )(op1, op2, op3); @@ -712,35 +338,13 @@ svfloat32x4_t test_svclamp_single_f32_x4(svfloat32x4_t op1, svfloat32_t op2, svf // CHECK-LABEL: @test_svclamp_single_f64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fclamp.single.x4.nxv2f64( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svclamp_single_f64_x413svfloat64x4_tu13__SVFloat64_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fclamp.single.x4.nxv2f64( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat64x4_t test_svclamp_single_f64_x4(svfloat64x4_t op1, svfloat64_t op2, svfloat64_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_f64_x4, , )(op1, op2, op3); @@ -748,27 +352,13 @@ svfloat64x4_t test_svclamp_single_f64_x4(svfloat64x4_t op1, svfloat64_t op2, svf // CHECK-LABEL: @test_svclamp_single_bf16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.bfclamp.single.x2.nxv8bf16( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z27test_svclamp_single_bf16_x214svbfloat16x2_tu14__SVBfloat16_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.bfclamp.single.x2.nxv8bf16( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svbfloat16x2_t test_svclamp_single_bf16_x2(svbfloat16x2_t op1, svbfloat16_t op2, svbfloat16_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_bf16_x2, , )(op1, op2, op3); @@ -776,35 +366,13 @@ svbfloat16x2_t test_svclamp_single_bf16_x2(svbfloat16x2_t op1, svbfloat16_t op2, // CHECK-LABEL: @test_svclamp_single_bf16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.bfclamp.single.x4.nxv8bf16( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z27test_svclamp_single_bf16_x414svbfloat16x4_tu14__SVBfloat16_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.bfclamp.single.x4.nxv8bf16( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svbfloat16x4_t test_svclamp_single_bf16_x4(svbfloat16x4_t op1, svbfloat16_t op2, svbfloat16_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_bf16_x4, , )(op1, op2, op3); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_cvt.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_cvt.c index 2d61670fd6049..2851ea9ccd22c 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_cvt.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_cvt.c @@ -49,27 +49,13 @@ svbfloat16_t test_cvt_bf16_x2(svfloat32x2_t zn) __arm_streaming { // x2 // CHECK-LABEL: @test_svcvt_f32_u32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ucvtf.x2.nxv4f32.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z21test_svcvt_f32_u32_x212svuint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ucvtf.x2.nxv4f32.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svcvt_f32_u32_x2(svuint32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svcvt_f32,_u32_x2,,)(zn); @@ -77,27 +63,13 @@ svfloat32x2_t test_svcvt_f32_u32_x2(svuint32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svcvt_f32_s32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.scvtf.x2.nxv4f32.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z21test_svcvt_f32_s32_x211svint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.scvtf.x2.nxv4f32.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svcvt_f32_s32_x2(svint32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svcvt_f32,_s32_x2,,)(zn); @@ -105,27 +77,13 @@ svfloat32x2_t test_svcvt_f32_s32_x2(svint32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svcvt_u32_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fcvtzu.x2.nxv4i32.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z21test_svcvt_u32_f32_x213svfloat32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fcvtzu.x2.nxv4i32.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint32x2_t test_svcvt_u32_f32_x2(svfloat32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svcvt_u32,_f32_x2,,)(zn); @@ -133,27 +91,13 @@ svuint32x2_t test_svcvt_u32_f32_x2(svfloat32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svcvt_s32_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fcvtzs.x2.nxv4i32.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z21test_svcvt_s32_f32_x213svfloat32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fcvtzs.x2.nxv4i32.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint32x2_t test_svcvt_s32_f32_x2(svfloat32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svcvt_s32,_f32_x2,,)(zn); @@ -162,35 +106,13 @@ svint32x2_t test_svcvt_s32_f32_x2(svfloat32x2_t zn) __arm_streaming { // x4 // CHECK-LABEL: @test_svcvt_f32_u32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ucvtf.x4.nxv4f32.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z21test_svcvt_f32_u32_x412svuint32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ucvtf.x4.nxv4f32.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svcvt_f32_u32_x4(svuint32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svcvt_f32,_u32_x4,,)(zn); @@ -198,35 +120,13 @@ svfloat32x4_t test_svcvt_f32_u32_x4(svuint32x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svcvt_f32_s32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.scvtf.x4.nxv4f32.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z21test_svcvt_f32_s32_x411svint32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.scvtf.x4.nxv4f32.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svcvt_f32_s32_x4(svint32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svcvt_f32,_s32_x4,,)(zn); @@ -234,35 +134,13 @@ svfloat32x4_t test_svcvt_f32_s32_x4(svint32x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svcvt_u32_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fcvtzu.x4.nxv4i32.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z21test_svcvt_u32_f32_x413svfloat32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fcvtzu.x4.nxv4i32.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint32x4_t test_svcvt_u32_f32_x4(svfloat32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svcvt_u32,_f32_x4,,)(zn); @@ -270,35 +148,13 @@ svuint32x4_t test_svcvt_u32_f32_x4(svfloat32x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svcvt_s32_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fcvtzs.x4.nxv4i32.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z21test_svcvt_s32_f32_x413svfloat32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fcvtzs.x4.nxv4i32.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint32x4_t test_svcvt_s32_f32_x4(svfloat32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svcvt_s32,_f32_x4,,)(zn); @@ -432,27 +288,13 @@ svuint16_t test_qcvt_u16_s64_x4(svint64x4_t zn) __arm_streaming { // CHECK-LABEL: @test_cvt_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fcvt.widen.x2.nxv4f32( [[ZN:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z15test_cvt_f32_x2u13__SVFloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fcvt.widen.x2.nxv4f32( [[ZN:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // __attribute__((target("sme-f16f16"))) svfloat32x2_t test_cvt_f32_x2(svfloat16_t zn) __arm_streaming { return SVE_ACLE_FUNC(svcvt_f32,_f16_x2,,)(zn); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_cvtl.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_cvtl.c index fc5c0376e925e..5189ab4af8327 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_cvtl.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_cvtl.c @@ -19,27 +19,13 @@ // CHECK-LABEL: @test_cvtl_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fcvtl.widen.x2.nxv4f32( [[ZN:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z16test_cvtl_f32_x2u13__SVFloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fcvtl.widen.x2.nxv4f32( [[ZN:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_cvtl_f32_x2(svfloat16_t zn) __arm_streaming { return SVE_ACLE_FUNC(svcvtl_f32,_f16_x2,,)(zn); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_faminmax.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_faminmax.c index a1540bba2a8a9..d4d423f982e84 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_faminmax.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_faminmax.c @@ -19,27 +19,13 @@ // CHECK-LABEL: @test_svamax_f16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.famax.x2.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svamax_f16_x213svfloat16x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.famax.x2.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat16x2_t test_svamax_f16_x2(svfloat16x2_t zdn, svfloat16x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svamax,_f16_x2)(zdn, zm); @@ -47,27 +33,13 @@ svfloat16x2_t test_svamax_f16_x2(svfloat16x2_t zdn, svfloat16x2_t zm) __arm_stre // CHECK-LABEL: @test_svamax_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.famax.x2.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svamax_f32_x213svfloat32x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.famax.x2.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svamax_f32_x2(svfloat32x2_t zdn, svfloat32x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svamax,_f32_x2)(zdn, zm); @@ -75,27 +47,13 @@ svfloat32x2_t test_svamax_f32_x2(svfloat32x2_t zdn, svfloat32x2_t zm) __arm_stre // CHECK-LABEL: @test_svamax_f64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.famax.x2.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svamax_f64_x213svfloat64x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.famax.x2.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat64x2_t test_svamax_f64_x2(svfloat64x2_t zdn, svfloat64x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svamax,_f64_x2)(zdn, zm); @@ -103,27 +61,13 @@ svfloat64x2_t test_svamax_f64_x2(svfloat64x2_t zdn, svfloat64x2_t zm) __arm_stre // CHECK-LABEL: @test_svamin_f16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.famin.x2.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svamin_f16_x213svfloat16x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.famin.x2.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat16x2_t test_svamin_f16_x2(svfloat16x2_t zdn, svfloat16x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svamin,_f16_x2)(zdn, zm); @@ -131,27 +75,13 @@ svfloat16x2_t test_svamin_f16_x2(svfloat16x2_t zdn, svfloat16x2_t zm) __arm_stre // CHECK-LABEL: @test_svamin_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.famin.x2.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svamin_f32_x213svfloat32x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.famin.x2.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svamin_f32_x2(svfloat32x2_t zdn, svfloat32x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svamin,_f32_x2)(zdn, zm); @@ -159,27 +89,13 @@ svfloat32x2_t test_svamin_f32_x2(svfloat32x2_t zdn, svfloat32x2_t zm) __arm_stre // CHECK-LABEL: @test_svamin_f64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.famin.x2.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svamin_f64_x213svfloat64x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.famin.x2.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat64x2_t test_svamin_f64_x2(svfloat64x2_t zdn, svfloat64x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svamin,_f64_x2)(zdn, zm); @@ -189,35 +105,13 @@ svfloat64x2_t test_svamin_f64_x2(svfloat64x2_t zdn, svfloat64x2_t zm) __arm_stre // CHECK-LABEL: @test_svamax_f16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.famax.x4.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svamax_f16_x413svfloat16x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.famax.x4.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat16x4_t test_svamax_f16_x4(svfloat16x4_t zdn, svfloat16x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svamax,_f16_x4)(zdn, zm); @@ -225,35 +119,13 @@ svfloat16x4_t test_svamax_f16_x4(svfloat16x4_t zdn, svfloat16x4_t zm) __arm_stre // CHECK-LABEL: @test_svamax_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.famax.x4.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svamax_f32_x413svfloat32x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.famax.x4.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svamax_f32_x4(svfloat32x4_t zdn, svfloat32x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svamax,_f32_x4)(zdn, zm); @@ -261,35 +133,13 @@ svfloat32x4_t test_svamax_f32_x4(svfloat32x4_t zdn, svfloat32x4_t zm) __arm_stre // CHECK-LABEL: @test_svamax_f64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.famax.x4.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svamax_f64_x413svfloat64x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.famax.x4.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat64x4_t test_svamax_f64_x4(svfloat64x4_t zdn, svfloat64x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svamax,_f64_x4)(zdn, zm); @@ -297,35 +147,13 @@ svfloat64x4_t test_svamax_f64_x4(svfloat64x4_t zdn, svfloat64x4_t zm) __arm_stre // CHECK-LABEL: @test_svamin_f16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.famin.x4.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svamin_f16_x413svfloat16x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.famin.x4.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat16x4_t test_svamin_f16_x4(svfloat16x4_t zdn, svfloat16x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svamin,_f16_x4)(zdn, zm); @@ -333,35 +161,13 @@ svfloat16x4_t test_svamin_f16_x4(svfloat16x4_t zdn, svfloat16x4_t zm) __arm_stre // CHECK-LABEL: @test_svamin_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.famin.x4.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svamin_f32_x413svfloat32x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.famin.x4.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svamin_f32_x4(svfloat32x4_t zdn, svfloat32x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svamin,_f32_x4)(zdn, zm); @@ -369,35 +175,13 @@ svfloat32x4_t test_svamin_f32_x4(svfloat32x4_t zdn, svfloat32x4_t zm) __arm_stre // CHECK-LABEL: @test_svamin_f64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.famin.x4.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svamin_f64_x413svfloat64x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.famin.x4.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat64x4_t test_svamin_f64_x4(svfloat64x4_t zdn, svfloat64x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svamin,_f64_x4)(zdn, zm); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_frint.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_frint.c index abdb5a46d5453..8ab450587fc70 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_frint.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_frint.c @@ -21,27 +21,13 @@ // CHECK-LABEL: @test_svfrinta_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.frinta.x2.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z20test_svfrinta_f32_x213svfloat32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.frinta.x2.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svfrinta_f32_x2(svfloat32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svrinta,_f32_x2)(zn); @@ -49,35 +35,13 @@ svfloat32x2_t test_svfrinta_f32_x2(svfloat32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svfrinta_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.frinta.x4.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z20test_svfrinta_f32_x413svfloat32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.frinta.x4.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svfrinta_f32_x4(svfloat32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svrinta,_f32_x4)(zn); @@ -87,27 +51,13 @@ svfloat32x4_t test_svfrinta_f32_x4(svfloat32x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svfrintam_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.frintm.x2.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z21test_svfrintam_f32_x213svfloat32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.frintm.x2.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svfrintam_f32_x2(svfloat32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svrintm,_f32_x2)(zn); @@ -115,35 +65,13 @@ svfloat32x2_t test_svfrintam_f32_x2(svfloat32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svfrintm_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.frintm.x4.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z20test_svfrintm_f32_x413svfloat32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.frintm.x4.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svfrintm_f32_x4(svfloat32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svrintm,_f32_x4)(zn); @@ -153,27 +81,13 @@ svfloat32x4_t test_svfrintm_f32_x4(svfloat32x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svfrintn_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.frintn.x2.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z20test_svfrintn_f32_x213svfloat32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.frintn.x2.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svfrintn_f32_x2(svfloat32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svrintn,_f32_x2)(zn); @@ -181,35 +95,13 @@ svfloat32x2_t test_svfrintn_f32_x2(svfloat32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svfrintn_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.frintn.x4.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z20test_svfrintn_f32_x413svfloat32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.frintn.x4.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svfrintn_f32_x4(svfloat32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svrintn,_f32_x4)(zn); @@ -219,27 +111,13 @@ svfloat32x4_t test_svfrintn_f32_x4(svfloat32x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svfrintp_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.frintp.x2.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z20test_svfrintp_f32_x213svfloat32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.frintp.x2.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svfrintp_f32_x2(svfloat32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svrintp,_f32_x2)(zn); @@ -247,35 +125,13 @@ svfloat32x2_t test_svfrintp_f32_x2(svfloat32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svfrintp_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.frintp.x4.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z20test_svfrintp_f32_x413svfloat32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.frintp.x4.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svfrintp_f32_x4(svfloat32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svrintp,_f32_x4)(zn); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt_x2.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt_x2.c index 6dd55663d7d34..3b17c6d9edb19 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt_x2.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt_x2.c @@ -10,27 +10,13 @@ // CHECK-LABEL: @test_svluti2_lane_zt_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti2.lane.zt.x2.nxv16i8(i32 0, [[ZN:%.*]], i32 7) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z23test_svluti2_lane_zt_u8u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti2.lane.zt.x2.nxv16i8(i32 0, [[ZN:%.*]], i32 7) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint8x2_t test_svluti2_lane_zt_u8(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_u8_x2(0, zn, 7); @@ -39,27 +25,13 @@ svuint8x2_t test_svluti2_lane_zt_u8(svuint8_t zn) __arm_streaming __arm_in("zt0" // CHECK-LABEL: @test_svluti2_lane_zt_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti2.lane.zt.x2.nxv16i8(i32 0, [[ZN:%.*]], i32 7) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z23test_svluti2_lane_zt_s8u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti2.lane.zt.x2.nxv16i8(i32 0, [[ZN:%.*]], i32 7) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint8x2_t test_svluti2_lane_zt_s8(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_s8_x2(0, zn, 7); @@ -67,27 +39,13 @@ svint8x2_t test_svluti2_lane_zt_s8(svuint8_t zn) __arm_streaming __arm_in("zt0") // CHECK-LABEL: @test_svluti2_lane_zt_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti2.lane.zt.x2.nxv8i16(i32 0, [[ZN:%.*]], i32 7) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svluti2_lane_zt_u16u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti2.lane.zt.x2.nxv8i16(i32 0, [[ZN:%.*]], i32 7) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint16x2_t test_svluti2_lane_zt_u16(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_u16_x2(0, zn, 7); @@ -96,27 +54,13 @@ svuint16x2_t test_svluti2_lane_zt_u16(svuint8_t zn) __arm_streaming __arm_in("zt // CHECK-LABEL: @test_svluti2_lane_zt_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti2.lane.zt.x2.nxv8i16(i32 0, [[ZN:%.*]], i32 7) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svluti2_lane_zt_s16u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti2.lane.zt.x2.nxv8i16(i32 0, [[ZN:%.*]], i32 7) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint16x2_t test_svluti2_lane_zt_s16(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_s16_x2(0, zn, 7); @@ -124,27 +68,13 @@ svint16x2_t test_svluti2_lane_zt_s16(svuint8_t zn) __arm_streaming __arm_in("zt0 // CHECK-LABEL: @test_svluti2_lane_zt_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti2.lane.zt.x2.nxv8f16(i32 0, [[ZN:%.*]], i32 7) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svluti2_lane_zt_f16u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti2.lane.zt.x2.nxv8f16(i32 0, [[ZN:%.*]], i32 7) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat16x2_t test_svluti2_lane_zt_f16(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_f16_x2(0, zn, 7); @@ -152,27 +82,13 @@ svfloat16x2_t test_svluti2_lane_zt_f16(svuint8_t zn) __arm_streaming __arm_in("z // CHECK-LABEL: @test_svluti2_lane_zt_bf16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti2.lane.zt.x2.nxv8bf16(i32 0, [[ZN:%.*]], i32 7) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svluti2_lane_zt_bf16u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti2.lane.zt.x2.nxv8bf16(i32 0, [[ZN:%.*]], i32 7) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svbfloat16x2_t test_svluti2_lane_zt_bf16(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_bf16_x2(0, zn, 7); @@ -180,27 +96,13 @@ svbfloat16x2_t test_svluti2_lane_zt_bf16(svuint8_t zn) __arm_streaming __arm_in( // CHECK-LABEL: @test_svluti2_lane_zt_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti2.lane.zt.x2.nxv4i32(i32 0, [[ZN:%.*]], i32 7) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svluti2_lane_zt_u32u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti2.lane.zt.x2.nxv4i32(i32 0, [[ZN:%.*]], i32 7) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint32x2_t test_svluti2_lane_zt_u32(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_u32_x2(0, zn, 7); @@ -208,27 +110,13 @@ svuint32x2_t test_svluti2_lane_zt_u32(svuint8_t zn) __arm_streaming __arm_in("zt // CHECK-LABEL: @test_svluti2_lane_zt_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti2.lane.zt.x2.nxv4i32(i32 0, [[ZN:%.*]], i32 7) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svluti2_lane_zt_s32u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti2.lane.zt.x2.nxv4i32(i32 0, [[ZN:%.*]], i32 7) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint32x2_t test_svluti2_lane_zt_s32(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_s32_x2(0, zn, 7); @@ -236,27 +124,13 @@ svint32x2_t test_svluti2_lane_zt_s32(svuint8_t zn) __arm_streaming __arm_in("zt0 // CHECK-LABEL: @test_svluti2_lane_zt_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti2.lane.zt.x2.nxv4f32(i32 0, [[ZN:%.*]], i32 7) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svluti2_lane_zt_f32u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti2.lane.zt.x2.nxv4f32(i32 0, [[ZN:%.*]], i32 7) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svluti2_lane_zt_f32(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_f32_x2(0, zn, 7); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt_x4.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt_x4.c index 8650ec7f62dd8..38059019737f8 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt_x4.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt_x4.c @@ -10,35 +10,13 @@ // CHECK-LABEL: @test_svluti2_lane_zt_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti2.lane.zt.x4.nxv16i8(i32 0, [[ZN:%.*]], i32 3) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z23test_svluti2_lane_zt_u8u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti2.lane.zt.x4.nxv16i8(i32 0, [[ZN:%.*]], i32 3) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint8x4_t test_svluti2_lane_zt_u8(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_u8_x4(0, zn, 3); @@ -47,35 +25,13 @@ svuint8x4_t test_svluti2_lane_zt_u8(svuint8_t zn) __arm_streaming __arm_in("zt0" // CHECK-LABEL: @test_svluti2_lane_zt_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti2.lane.zt.x4.nxv16i8(i32 0, [[ZN:%.*]], i32 3) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z23test_svluti2_lane_zt_s8u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti2.lane.zt.x4.nxv16i8(i32 0, [[ZN:%.*]], i32 3) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint8x4_t test_svluti2_lane_zt_s8(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_s8_x4(0, zn, 3); @@ -83,35 +39,13 @@ svint8x4_t test_svluti2_lane_zt_s8(svuint8_t zn) __arm_streaming __arm_in("zt0") // CHECK-LABEL: @test_svluti2_lane_zt_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti2.lane.zt.x4.nxv8i16(i32 0, [[ZN:%.*]], i32 3) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svluti2_lane_zt_u16u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti2.lane.zt.x4.nxv8i16(i32 0, [[ZN:%.*]], i32 3) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint16x4_t test_svluti2_lane_zt_u16(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_u16_x4(0, zn, 3); @@ -119,35 +53,13 @@ svuint16x4_t test_svluti2_lane_zt_u16(svuint8_t zn) __arm_streaming __arm_in("zt // CHECK-LABEL: @test_svluti2_lane_zt_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti2.lane.zt.x4.nxv8i16(i32 0, [[ZN:%.*]], i32 3) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svluti2_lane_zt_s16u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti2.lane.zt.x4.nxv8i16(i32 0, [[ZN:%.*]], i32 3) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint16x4_t test_svluti2_lane_zt_s16(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_s16_x4(0, zn, 3); @@ -155,35 +67,13 @@ svint16x4_t test_svluti2_lane_zt_s16(svuint8_t zn) __arm_streaming __arm_in("zt0 // CHECK-LABEL: @test_svluti2_lane_zt_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti2.lane.zt.x4.nxv8f16(i32 0, [[ZN:%.*]], i32 3) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svluti2_lane_zt_f16u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti2.lane.zt.x4.nxv8f16(i32 0, [[ZN:%.*]], i32 3) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat16x4_t test_svluti2_lane_zt_f16(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_f16_x4(0, zn, 3); @@ -191,35 +81,13 @@ svfloat16x4_t test_svluti2_lane_zt_f16(svuint8_t zn) __arm_streaming __arm_in("z // CHECK-LABEL: @test_svluti2_lane_zt_bf16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti2.lane.zt.x4.nxv8bf16(i32 0, [[ZN:%.*]], i32 3) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svluti2_lane_zt_bf16u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti2.lane.zt.x4.nxv8bf16(i32 0, [[ZN:%.*]], i32 3) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svbfloat16x4_t test_svluti2_lane_zt_bf16(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_bf16_x4(0, zn, 3); @@ -227,35 +95,13 @@ svbfloat16x4_t test_svluti2_lane_zt_bf16(svuint8_t zn) __arm_streaming __arm_in( // CHECK-LABEL: @test_svluti2_lane_zt_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti2.lane.zt.x4.nxv4i32(i32 0, [[ZN:%.*]], i32 3) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svluti2_lane_zt_u32u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti2.lane.zt.x4.nxv4i32(i32 0, [[ZN:%.*]], i32 3) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint32x4_t test_svluti2_lane_zt_u32(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_u32_x4(0, zn, 3); @@ -263,35 +109,13 @@ svuint32x4_t test_svluti2_lane_zt_u32(svuint8_t zn) __arm_streaming __arm_in("zt // CHECK-LABEL: @test_svluti2_lane_zt_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti2.lane.zt.x4.nxv4i32(i32 0, [[ZN:%.*]], i32 3) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svluti2_lane_zt_s32u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti2.lane.zt.x4.nxv4i32(i32 0, [[ZN:%.*]], i32 3) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint32x4_t test_svluti2_lane_zt_s32(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_s32_x4(0, zn, 3); @@ -299,35 +123,13 @@ svint32x4_t test_svluti2_lane_zt_s32(svuint8_t zn) __arm_streaming __arm_in("zt0 // CHECK-LABEL: @test_svluti2_lane_zt_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti2.lane.zt.x4.nxv4f32(i32 0, [[ZN:%.*]], i32 3) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svluti2_lane_zt_f32u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti2.lane.zt.x4.nxv4f32(i32 0, [[ZN:%.*]], i32 3) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svluti2_lane_zt_f32(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_f32_x4(0, zn, 3); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_lane_zt_x2.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_lane_zt_x2.c index f4f11c9fc5b14..db615b3cd1c24 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_lane_zt_x2.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_lane_zt_x2.c @@ -10,27 +10,13 @@ // CHECK-LABEL: @test_svluti4_lane_zt_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti4.lane.zt.x2.nxv16i8(i32 0, [[ZN:%.*]], i32 3) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z23test_svluti4_lane_zt_u8u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti4.lane.zt.x2.nxv16i8(i32 0, [[ZN:%.*]], i32 3) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint8x2_t test_svluti4_lane_zt_u8(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti4_lane_zt_u8_x2(0, zn, 3); @@ -39,27 +25,13 @@ svuint8x2_t test_svluti4_lane_zt_u8(svuint8_t zn) __arm_streaming __arm_in("zt0" // CHECK-LABEL: @test_svluti4_lane_zt_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti4.lane.zt.x2.nxv16i8(i32 0, [[ZN:%.*]], i32 3) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z23test_svluti4_lane_zt_s8u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti4.lane.zt.x2.nxv16i8(i32 0, [[ZN:%.*]], i32 3) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint8x2_t test_svluti4_lane_zt_s8(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti4_lane_zt_s8_x2(0, zn, 3); @@ -67,27 +39,13 @@ svint8x2_t test_svluti4_lane_zt_s8(svuint8_t zn) __arm_streaming __arm_in("zt0") // CHECK-LABEL: @test_svluti4_lane_zt_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti4.lane.zt.x2.nxv8i16(i32 0, [[ZN:%.*]], i32 3) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svluti4_lane_zt_u16u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti4.lane.zt.x2.nxv8i16(i32 0, [[ZN:%.*]], i32 3) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint16x2_t test_svluti4_lane_zt_u16(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti4_lane_zt_u16_x2(0, zn, 3); @@ -96,27 +54,13 @@ svuint16x2_t test_svluti4_lane_zt_u16(svuint8_t zn) __arm_streaming __arm_in("zt // CHECK-LABEL: @test_svluti4_lane_zt_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti4.lane.zt.x2.nxv8i16(i32 0, [[ZN:%.*]], i32 3) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svluti4_lane_zt_s16u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti4.lane.zt.x2.nxv8i16(i32 0, [[ZN:%.*]], i32 3) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint16x2_t test_svluti4_lane_zt_s16(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti4_lane_zt_s16_x2(0, zn, 3); @@ -124,27 +68,13 @@ svint16x2_t test_svluti4_lane_zt_s16(svuint8_t zn) __arm_streaming __arm_in("zt0 // CHECK-LABEL: @test_svluti4_lane_zt_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti4.lane.zt.x2.nxv8f16(i32 0, [[ZN:%.*]], i32 3) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svluti4_lane_zt_f16u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti4.lane.zt.x2.nxv8f16(i32 0, [[ZN:%.*]], i32 3) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat16x2_t test_svluti4_lane_zt_f16(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti4_lane_zt_f16_x2(0, zn, 3); @@ -152,27 +82,13 @@ svfloat16x2_t test_svluti4_lane_zt_f16(svuint8_t zn) __arm_streaming __arm_in("z // CHECK-LABEL: @test_svluti4_lane_zt_bf16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti4.lane.zt.x2.nxv8bf16(i32 0, [[ZN:%.*]], i32 3) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svluti4_lane_zt_bf16u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti4.lane.zt.x2.nxv8bf16(i32 0, [[ZN:%.*]], i32 3) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svbfloat16x2_t test_svluti4_lane_zt_bf16(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti4_lane_zt_bf16_x2(0, zn, 3); @@ -180,27 +96,13 @@ svbfloat16x2_t test_svluti4_lane_zt_bf16(svuint8_t zn) __arm_streaming __arm_in( // CHECK-LABEL: @test_svluti4_lane_zt_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti4.lane.zt.x2.nxv4i32(i32 0, [[ZN:%.*]], i32 3) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svluti4_lane_zt_u32u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti4.lane.zt.x2.nxv4i32(i32 0, [[ZN:%.*]], i32 3) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint32x2_t test_svluti4_lane_zt_u32(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti4_lane_zt_u32_x2(0, zn, 3); @@ -208,27 +110,13 @@ svuint32x2_t test_svluti4_lane_zt_u32(svuint8_t zn) __arm_streaming __arm_in("zt // CHECK-LABEL: @test_svluti4_lane_zt_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti4.lane.zt.x2.nxv4i32(i32 0, [[ZN:%.*]], i32 3) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svluti4_lane_zt_s32u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti4.lane.zt.x2.nxv4i32(i32 0, [[ZN:%.*]], i32 3) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint32x2_t test_svluti4_lane_zt_s32(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti4_lane_zt_s32_x2(0, zn, 3); @@ -236,27 +124,13 @@ svint32x2_t test_svluti4_lane_zt_s32(svuint8_t zn) __arm_streaming __arm_in("zt0 // CHECK-LABEL: @test_svluti4_lane_zt_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti4.lane.zt.x2.nxv4f32(i32 0, [[ZN:%.*]], i32 3) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svluti4_lane_zt_f32u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti4.lane.zt.x2.nxv4f32(i32 0, [[ZN:%.*]], i32 3) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svluti4_lane_zt_f32(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti4_lane_zt_f32_x2(0, zn, 3); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_lane_zt_x4.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_lane_zt_x4.c index 16a7421326235..c4c89358c16f8 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_lane_zt_x4.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_lane_zt_x4.c @@ -11,36 +11,14 @@ // CHECK-LABEL: define dso_local { , , , } @test_svluti4_lane_zt_u16 // CHECK-SAME: ( [[ZN:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti4.lane.zt.x4.nxv8i16(i32 0, [[ZN]], i32 1) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z24test_svluti4_lane_zt_u16u11__SVUint8_t // CPP-CHECK-SAME: ( [[ZN:%.*]]) #[[ATTR0:[0-9]+]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti4.lane.zt.x4.nxv8i16(i32 0, [[ZN]], i32 1) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint16x4_t test_svluti4_lane_zt_u16(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti4_lane_zt_u16_x4(0, zn, 1); @@ -49,36 +27,14 @@ svuint16x4_t test_svluti4_lane_zt_u16(svuint8_t zn) __arm_streaming __arm_in("zt // CHECK-LABEL: define dso_local { , , , } @test_svluti4_lane_zt_f16 // CHECK-SAME: ( [[ZN:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti4.lane.zt.x4.nxv8f16(i32 0, [[ZN]], i32 1) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z24test_svluti4_lane_zt_f16u11__SVUint8_t // CPP-CHECK-SAME: ( [[ZN:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti4.lane.zt.x4.nxv8f16(i32 0, [[ZN]], i32 1) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat16x4_t test_svluti4_lane_zt_f16(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti4_lane_zt_f16_x4(0, zn, 1); @@ -87,36 +43,14 @@ svfloat16x4_t test_svluti4_lane_zt_f16(svuint8_t zn) __arm_streaming __arm_in("z // CHECK-LABEL: define dso_local { , , , } @test_svluti4_lane_zt_bf16 // CHECK-SAME: ( [[ZN:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti4.lane.zt.x4.nxv8bf16(i32 0, [[ZN]], i32 1) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z25test_svluti4_lane_zt_bf16u11__SVUint8_t // CPP-CHECK-SAME: ( [[ZN:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti4.lane.zt.x4.nxv8bf16(i32 0, [[ZN]], i32 1) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svbfloat16x4_t test_svluti4_lane_zt_bf16(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti4_lane_zt_bf16_x4(0, zn, 1); @@ -125,36 +59,14 @@ svbfloat16x4_t test_svluti4_lane_zt_bf16(svuint8_t zn) __arm_streaming __arm_in( // CHECK-LABEL: define dso_local { , , , } @test_svluti4_lane_zt_s16 // CHECK-SAME: ( [[ZN:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti4.lane.zt.x4.nxv8i16(i32 0, [[ZN]], i32 1) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z24test_svluti4_lane_zt_s16u11__SVUint8_t // CPP-CHECK-SAME: ( [[ZN:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti4.lane.zt.x4.nxv8i16(i32 0, [[ZN]], i32 1) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint16x4_t test_svluti4_lane_zt_s16(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti4_lane_zt_s16_x4(0, zn, 1); @@ -163,36 +75,14 @@ svint16x4_t test_svluti4_lane_zt_s16(svuint8_t zn) __arm_streaming __arm_in("zt0 // CHECK-LABEL: define dso_local { , , , } @test_svluti4_lane_zt_u32 // CHECK-SAME: ( [[ZN:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti4.lane.zt.x4.nxv4i32(i32 0, [[ZN]], i32 1) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z24test_svluti4_lane_zt_u32u11__SVUint8_t // CPP-CHECK-SAME: ( [[ZN:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti4.lane.zt.x4.nxv4i32(i32 0, [[ZN]], i32 1) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint32x4_t test_svluti4_lane_zt_u32(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti4_lane_zt_u32_x4(0, zn, 1); @@ -201,36 +91,14 @@ svuint32x4_t test_svluti4_lane_zt_u32(svuint8_t zn) __arm_streaming __arm_in("zt // CHECK-LABEL: define dso_local { , , , } @test_svluti4_lane_zt_s32 // CHECK-SAME: ( [[ZN:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti4.lane.zt.x4.nxv4i32(i32 0, [[ZN]], i32 1) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z24test_svluti4_lane_zt_s32u11__SVUint8_t // CPP-CHECK-SAME: ( [[ZN:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti4.lane.zt.x4.nxv4i32(i32 0, [[ZN]], i32 1) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint32x4_t test_svluti4_lane_zt_s32(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti4_lane_zt_s32_x4(0, zn, 1); @@ -239,36 +107,14 @@ svint32x4_t test_svluti4_lane_zt_s32(svuint8_t zn) __arm_streaming __arm_in("zt0 // CHECK-LABEL: define dso_local { , , , } @test_svluti4_lane_zt_f32 // CHECK-SAME: ( [[ZN:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti4.lane.zt.x4.nxv4f32(i32 0, [[ZN]], i32 1) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z24test_svluti4_lane_zt_f32u11__SVUint8_t // CPP-CHECK-SAME: ( [[ZN:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti4.lane.zt.x4.nxv4f32(i32 0, [[ZN]], i32 1) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svluti4_lane_zt_f32(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti4_lane_zt_f32_x4(0, zn, 1); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_max.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_max.c index efc68c0b42334..5d57ffb9bdf8c 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_max.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_max.c @@ -18,27 +18,13 @@ // CHECK-LABEL: @test_svmax_single_s8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smax.single.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z23test_svmax_single_s8_x210svint8x2_tu10__SVInt8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smax.single.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint8x2_t test_svmax_single_s8_x2(svint8x2_t zdn, svint8_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_s8_x2)(zdn, zm); @@ -46,27 +32,13 @@ svint8x2_t test_svmax_single_s8_x2(svint8x2_t zdn, svint8_t zm) __arm_streaming // CHECK-LABEL: @test_svmax_single_s16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smax.single.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmax_single_s16_x211svint16x2_tu11__SVInt16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smax.single.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint16x2_t test_svmax_single_s16_x2(svint16x2_t zdn, svint16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_s16_x2)(zdn, zm); @@ -74,27 +46,13 @@ svint16x2_t test_svmax_single_s16_x2(svint16x2_t zdn, svint16_t zm) __arm_stream // CHECK-LABEL: @test_svmax_single_s32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smax.single.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmax_single_s32_x211svint32x2_tu11__SVInt32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smax.single.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint32x2_t test_svmax_single_s32_x2(svint32x2_t zdn, svint32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_s32_x2)(zdn, zm); @@ -102,27 +60,13 @@ svint32x2_t test_svmax_single_s32_x2(svint32x2_t zdn, svint32_t zm) __arm_stream // CHECK-LABEL: @test_svmax_single_s64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smax.single.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmax_single_s64_x211svint64x2_tu11__SVInt64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smax.single.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint64x2_t test_svmax_single_s64_x2(svint64x2_t zdn, svint64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_s64_x2)(zdn, zm); @@ -130,27 +74,13 @@ svint64x2_t test_svmax_single_s64_x2(svint64x2_t zdn, svint64_t zm) __arm_stream // CHECK-LABEL: @test_svmax_single_u8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umax.single.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z23test_svmax_single_u8_x211svuint8x2_tu11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umax.single.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint8x2_t test_svmax_single_u8_x2(svuint8x2_t zdn, svuint8_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_u8_x2)(zdn, zm); @@ -158,27 +88,13 @@ svuint8x2_t test_svmax_single_u8_x2(svuint8x2_t zdn, svuint8_t zm) __arm_streami // CHECK-LABEL: @test_svmax_single_u16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umax.single.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmax_single_u16_x212svuint16x2_tu12__SVUint16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umax.single.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint16x2_t test_svmax_single_u16_x2(svuint16x2_t zdn, svuint16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_u16_x2)(zdn, zm); @@ -186,27 +102,13 @@ svuint16x2_t test_svmax_single_u16_x2(svuint16x2_t zdn, svuint16_t zm) __arm_str // CHECK-LABEL: @test_svmax_single_u32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umax.single.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmax_single_u32_x212svuint32x2_tu12__SVUint32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umax.single.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint32x2_t test_svmax_single_u32_x2(svuint32x2_t zdn, svuint32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_u32_x2)(zdn, zm); @@ -214,27 +116,13 @@ svuint32x2_t test_svmax_single_u32_x2(svuint32x2_t zdn, svuint32_t zm) __arm_str // CHECK-LABEL: @test_svmax_single_u64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umax.single.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmax_single_u64_x212svuint64x2_tu12__SVUint64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umax.single.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint64x2_t test_svmax_single_u64_x2(svuint64x2_t zdn, svuint64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_u64_x2)(zdn, zm); @@ -242,27 +130,13 @@ svuint64x2_t test_svmax_single_u64_x2(svuint64x2_t zdn, svuint64_t zm) __arm_str // CHECK-LABEL: @test_svmax_single_bf16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmax.single.x2.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svmax_single_bf16_x214svbfloat16x2_tu14__SVBfloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmax.single.x2.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svbfloat16x2_t test_svmax_single_bf16_x2(svbfloat16x2_t zdn, svbfloat16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_bf16_x2)(zdn, zm); @@ -270,27 +144,13 @@ svbfloat16x2_t test_svmax_single_bf16_x2(svbfloat16x2_t zdn, svbfloat16_t zm) __ // CHECK-LABEL: @test_svmax_single_f16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmax.single.x2.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmax_single_f16_x213svfloat16x2_tu13__SVFloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmax.single.x2.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat16x2_t test_svmax_single_f16_x2(svfloat16x2_t zdn, svfloat16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_f16_x2)(zdn, zm); @@ -298,27 +158,13 @@ svfloat16x2_t test_svmax_single_f16_x2(svfloat16x2_t zdn, svfloat16_t zm) __arm_ // CHECK-LABEL: @test_svmax_single_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmax.single.x2.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmax_single_f32_x213svfloat32x2_tu13__SVFloat32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmax.single.x2.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svmax_single_f32_x2(svfloat32x2_t zdn, svfloat32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_f32_x2)(zdn, zm); @@ -326,27 +172,13 @@ svfloat32x2_t test_svmax_single_f32_x2(svfloat32x2_t zdn, svfloat32_t zm) __arm_ // CHECK-LABEL: @test_svmax_single_f64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmax.single.x2.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmax_single_f64_x213svfloat64x2_tu13__SVFloat64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmax.single.x2.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat64x2_t test_svmax_single_f64_x2(svfloat64x2_t zdn, svfloat64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_f64_x2)(zdn, zm); @@ -356,35 +188,13 @@ svfloat64x2_t test_svmax_single_f64_x2(svfloat64x2_t zdn, svfloat64_t zm) __arm_ // CHECK-LABEL: @test_svmax_single_s8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smax.single.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z23test_svmax_single_s8_x410svint8x4_tu10__SVInt8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smax.single.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint8x4_t test_svmax_single_s8_x4(svint8x4_t zdn, svint8_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_s8_x4)(zdn, zm); @@ -392,35 +202,13 @@ svint8x4_t test_svmax_single_s8_x4(svint8x4_t zdn, svint8_t zm) __arm_streaming // CHECK-LABEL: @test_svmax_single_s16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smax.single.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmax_single_s16_x411svint16x4_tu11__SVInt16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smax.single.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint16x4_t test_svmax_single_s16_x4(svint16x4_t zdn, svint16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_s16_x4)(zdn, zm); @@ -428,35 +216,13 @@ svint16x4_t test_svmax_single_s16_x4(svint16x4_t zdn, svint16_t zm) __arm_stream // CHECK-LABEL: @test_svmax_single_s32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smax.single.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmax_single_s32_x411svint32x4_tu11__SVInt32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smax.single.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint32x4_t test_svmax_single_s32_x4(svint32x4_t zdn, svint32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_s32_x4)(zdn, zm); @@ -464,35 +230,13 @@ svint32x4_t test_svmax_single_s32_x4(svint32x4_t zdn, svint32_t zm) __arm_stream // CHECK-LABEL: @test_svmax_single_s64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smax.single.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmax_single_s64_x411svint64x4_tu11__SVInt64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smax.single.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint64x4_t test_svmax_single_s64_x4(svint64x4_t zdn, svint64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_s64_x4)(zdn, zm); @@ -500,35 +244,13 @@ svint64x4_t test_svmax_single_s64_x4(svint64x4_t zdn, svint64_t zm) __arm_stream // CHECK-LABEL: @test_svmax_single_u8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umax.single.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z23test_svmax_single_u8_x411svuint8x4_tu11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umax.single.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint8x4_t test_svmax_single_u8_x4(svuint8x4_t zdn, svuint8_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_u8_x4)(zdn, zm); @@ -536,35 +258,13 @@ svuint8x4_t test_svmax_single_u8_x4(svuint8x4_t zdn, svuint8_t zm) __arm_streami // CHECK-LABEL: @test_svmax_single_u16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umax.single.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmax_single_u16_x412svuint16x4_tu12__SVUint16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umax.single.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint16x4_t test_svmax_single_u16_x4(svuint16x4_t zdn, svuint16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_u16_x4)(zdn, zm); @@ -572,35 +272,13 @@ svuint16x4_t test_svmax_single_u16_x4(svuint16x4_t zdn, svuint16_t zm) __arm_str // CHECK-LABEL: @test_svmax_single_u32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umax.single.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmax_single_u32_x412svuint32x4_tu12__SVUint32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umax.single.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint32x4_t test_svmax_single_u32_x4(svuint32x4_t zdn, svuint32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_u32_x4)(zdn, zm); @@ -608,35 +286,13 @@ svuint32x4_t test_svmax_single_u32_x4(svuint32x4_t zdn, svuint32_t zm) __arm_str // CHECK-LABEL: @test_svmax_single_u64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umax.single.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmax_single_u64_x412svuint64x4_tu12__SVUint64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umax.single.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint64x4_t test_svmax_single_u64_x4(svuint64x4_t zdn, svuint64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_u64_x4)(zdn, zm); @@ -644,35 +300,13 @@ svuint64x4_t test_svmax_single_u64_x4(svuint64x4_t zdn, svuint64_t zm) __arm_str // CHECK-LABEL: @test_svmax_single_bf16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmax.single.x4.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svmax_single_bf16_x414svbfloat16x4_tu14__SVBfloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmax.single.x4.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svbfloat16x4_t test_svmax_single_bf16_x4(svbfloat16x4_t zdn, svbfloat16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_bf16_x4)(zdn, zm); @@ -680,35 +314,13 @@ svbfloat16x4_t test_svmax_single_bf16_x4(svbfloat16x4_t zdn, svbfloat16_t zm) __ // CHECK-LABEL: @test_svmax_single_f16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmax.single.x4.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmax_single_f16_x413svfloat16x4_tu13__SVFloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmax.single.x4.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat16x4_t test_svmax_single_f16_x4(svfloat16x4_t zdn, svfloat16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_f16_x4)(zdn, zm); @@ -716,35 +328,13 @@ svfloat16x4_t test_svmax_single_f16_x4(svfloat16x4_t zdn, svfloat16_t zm) __arm_ // CHECK-LABEL: @test_svmax_single_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmax.single.x4.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmax_single_f32_x413svfloat32x4_tu13__SVFloat32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmax.single.x4.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svmax_single_f32_x4(svfloat32x4_t zdn, svfloat32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_f32_x4)(zdn, zm); @@ -752,35 +342,13 @@ svfloat32x4_t test_svmax_single_f32_x4(svfloat32x4_t zdn, svfloat32_t zm) __arm_ // CHECK-LABEL: @test_svmax_single_f64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmax.single.x4.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmax_single_f64_x413svfloat64x4_tu13__SVFloat64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmax.single.x4.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat64x4_t test_svmax_single_f64_x4(svfloat64x4_t zdn, svfloat64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_f64_x4)(zdn, zm); @@ -790,27 +358,13 @@ svfloat64x4_t test_svmax_single_f64_x4(svfloat64x4_t zdn, svfloat64_t zm) __arm_ // CHECK-LABEL: @test_svmax_s8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smax.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z16test_svmax_s8_x210svint8x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smax.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint8x2_t test_svmax_s8_x2(svint8x2_t zdn, svint8x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_s8_x2)(zdn, zm); @@ -818,27 +372,13 @@ svint8x2_t test_svmax_s8_x2(svint8x2_t zdn, svint8x2_t zm) __arm_streaming { // CHECK-LABEL: @test_svmax_s16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smax.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmax_s16_x211svint16x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smax.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint16x2_t test_svmax_s16_x2(svint16x2_t zdn, svint16x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_s16_x2)(zdn, zm); @@ -846,27 +386,13 @@ svint16x2_t test_svmax_s16_x2(svint16x2_t zdn, svint16x2_t zm) __arm_streaming { // CHECK-LABEL: @test_svmax_s32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smax.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmax_s32_x211svint32x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smax.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint32x2_t test_svmax_s32_x2(svint32x2_t zdn, svint32x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_s32_x2)(zdn, zm); @@ -874,27 +400,13 @@ svint32x2_t test_svmax_s32_x2(svint32x2_t zdn, svint32x2_t zm) __arm_streaming { // CHECK-LABEL: @test_svmax_s64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smax.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmax_s64_x211svint64x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smax.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint64x2_t test_svmax_s64_x2(svint64x2_t zdn, svint64x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_s64_x2)(zdn, zm); @@ -902,27 +414,13 @@ svint64x2_t test_svmax_s64_x2(svint64x2_t zdn, svint64x2_t zm) __arm_streaming { // CHECK-LABEL: @test_svmax_u8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umax.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z16test_svmax_u8_x211svuint8x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umax.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint8x2_t test_svmax_u8_x2(svuint8x2_t zdn, svuint8x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_u8_x2)(zdn, zm); @@ -930,27 +428,13 @@ svuint8x2_t test_svmax_u8_x2(svuint8x2_t zdn, svuint8x2_t zm) __arm_streaming { // CHECK-LABEL: @test_svmax_u16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umax.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmax_u16_x212svuint16x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umax.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint16x2_t test_svmax_u16_x2(svuint16x2_t zdn, svuint16x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_u16_x2)(zdn, zm); @@ -958,27 +442,13 @@ svuint16x2_t test_svmax_u16_x2(svuint16x2_t zdn, svuint16x2_t zm) __arm_streamin // CHECK-LABEL: @test_svmax_u32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umax.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmax_u32_x212svuint32x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umax.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint32x2_t test_svmax_u32_x2(svuint32x2_t zdn, svuint32x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_u32_x2)(zdn, zm); @@ -986,27 +456,13 @@ svuint32x2_t test_svmax_u32_x2(svuint32x2_t zdn, svuint32x2_t zm) __arm_streamin // CHECK-LABEL: @test_svmax_u64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umax.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmax_u64_x212svuint64x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umax.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint64x2_t test_svmax_u64_x2(svuint64x2_t zdn, svuint64x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_u64_x2)(zdn, zm); @@ -1014,27 +470,13 @@ svuint64x2_t test_svmax_u64_x2(svuint64x2_t zdn, svuint64x2_t zm) __arm_streamin // CHECK-LABEL: @test_svmax_bf16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmax.x2.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svmax_bf16_x214svbfloat16x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmax.x2.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svbfloat16x2_t test_svmax_bf16_x2(svbfloat16x2_t zdn, svbfloat16x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_bf16_x2)(zdn, zm); @@ -1042,27 +484,13 @@ svbfloat16x2_t test_svmax_bf16_x2(svbfloat16x2_t zdn, svbfloat16x2_t zm) __arm_s // CHECK-LABEL: @test_svmax_f16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmax.x2.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmax_f16_x213svfloat16x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmax.x2.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat16x2_t test_svmax_f16_x2(svfloat16x2_t zdn, svfloat16x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_f16_x2)(zdn, zm); @@ -1070,27 +498,13 @@ svfloat16x2_t test_svmax_f16_x2(svfloat16x2_t zdn, svfloat16x2_t zm) __arm_strea // CHECK-LABEL: @test_svmax_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmax.x2.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmax_f32_x213svfloat32x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmax.x2.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svmax_f32_x2(svfloat32x2_t zdn, svfloat32x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_f32_x2)(zdn, zm); @@ -1098,27 +512,13 @@ svfloat32x2_t test_svmax_f32_x2(svfloat32x2_t zdn, svfloat32x2_t zm) __arm_strea // CHECK-LABEL: @test_svmax_f64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmax.x2.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmax_f64_x213svfloat64x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmax.x2.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat64x2_t test_svmax_f64_x2(svfloat64x2_t zdn, svfloat64x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_f64_x2)(zdn, zm); @@ -1128,35 +528,13 @@ svfloat64x2_t test_svmax_f64_x2(svfloat64x2_t zdn, svfloat64x2_t zm) __arm_strea // CHECK-LABEL: @test_svmax_s8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smax.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z16test_svmax_s8_x410svint8x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smax.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint8x4_t test_svmax_s8_x4(svint8x4_t zdn, svint8x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_s8_x4)(zdn, zm); @@ -1164,35 +542,13 @@ svint8x4_t test_svmax_s8_x4(svint8x4_t zdn, svint8x4_t zm) __arm_streaming { // CHECK-LABEL: @test_svmax_s16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smax.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmax_s16_x411svint16x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smax.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint16x4_t test_svmax_s16_x4(svint16x4_t zdn, svint16x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_s16_x4)(zdn, zm); @@ -1200,35 +556,13 @@ svint16x4_t test_svmax_s16_x4(svint16x4_t zdn, svint16x4_t zm) __arm_streaming { // CHECK-LABEL: @test_svmax_s32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smax.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmax_s32_x411svint32x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smax.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint32x4_t test_svmax_s32_x4(svint32x4_t zdn, svint32x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_s32_x4)(zdn, zm); @@ -1236,35 +570,13 @@ svint32x4_t test_svmax_s32_x4(svint32x4_t zdn, svint32x4_t zm) __arm_streaming { // CHECK-LABEL: @test_svmax_s64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smax.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmax_s64_x411svint64x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smax.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint64x4_t test_svmax_s64_x4(svint64x4_t zdn, svint64x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_s64_x4)(zdn, zm); @@ -1272,35 +584,13 @@ svint64x4_t test_svmax_s64_x4(svint64x4_t zdn, svint64x4_t zm) __arm_streaming { // CHECK-LABEL: @test_svmax_u8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umax.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z16test_svmax_u8_x411svuint8x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umax.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint8x4_t test_svmax_u8_x4(svuint8x4_t zdn, svuint8x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_u8_x4)(zdn, zm); @@ -1308,35 +598,13 @@ svuint8x4_t test_svmax_u8_x4(svuint8x4_t zdn, svuint8x4_t zm) __arm_streaming { // CHECK-LABEL: @test_svmax_u16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umax.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmax_u16_x412svuint16x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umax.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint16x4_t test_svmax_u16_x4(svuint16x4_t zdn, svuint16x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_u16_x4)(zdn, zm); @@ -1344,35 +612,13 @@ svuint16x4_t test_svmax_u16_x4(svuint16x4_t zdn, svuint16x4_t zm) __arm_streamin // CHECK-LABEL: @test_svmax_u32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umax.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmax_u32_x412svuint32x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umax.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint32x4_t test_svmax_u32_x4(svuint32x4_t zdn, svuint32x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_u32_x4)(zdn, zm); @@ -1380,35 +626,13 @@ svuint32x4_t test_svmax_u32_x4(svuint32x4_t zdn, svuint32x4_t zm) __arm_streamin // CHECK-LABEL: @test_svmax_u64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umax.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmax_u64_x412svuint64x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umax.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint64x4_t test_svmax_u64_x4(svuint64x4_t zdn, svuint64x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_u64_x4)(zdn, zm); @@ -1416,35 +640,13 @@ svuint64x4_t test_svmax_u64_x4(svuint64x4_t zdn, svuint64x4_t zm) __arm_streamin // CHECK-LABEL: @test_svmax_bf16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmax.x4.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svmax_bf16_x414svbfloat16x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmax.x4.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svbfloat16x4_t test_svmax_bf16_x4(svbfloat16x4_t zdn, svbfloat16x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_bf16_x4)(zdn, zm); @@ -1452,35 +654,13 @@ svbfloat16x4_t test_svmax_bf16_x4(svbfloat16x4_t zdn, svbfloat16x4_t zm) __arm_s // CHECK-LABEL: @test_svmax_f16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmax.x4.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmax_f16_x413svfloat16x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmax.x4.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat16x4_t test_svmax_f16_x4(svfloat16x4_t zdn, svfloat16x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_f16_x4)(zdn, zm); @@ -1488,35 +668,13 @@ svfloat16x4_t test_svmax_f16_x4(svfloat16x4_t zdn, svfloat16x4_t zm) __arm_strea // CHECK-LABEL: @test_svmax_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmax.x4.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmax_f32_x413svfloat32x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmax.x4.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svmax_f32_x4(svfloat32x4_t zdn, svfloat32x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_f32_x4)(zdn, zm); @@ -1524,35 +682,13 @@ svfloat32x4_t test_svmax_f32_x4(svfloat32x4_t zdn, svfloat32x4_t zm) __arm_strea // CHECK-LABEL: @test_svmax_f64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmax.x4.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmax_f64_x413svfloat64x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmax.x4.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat64x4_t test_svmax_f64_x4(svfloat64x4_t zdn, svfloat64x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_f64_x4)(zdn, zm); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_maxnm.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_maxnm.c index 5d06895497cc7..1d47abe8d487c 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_maxnm.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_maxnm.c @@ -19,27 +19,13 @@ // CHECK-LABEL: @test_svmaxnm_single_bf16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmaxnm.single.x2.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z27test_svmaxnm_single_bf16_x214svbfloat16x2_tu14__SVBfloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmaxnm.single.x2.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svbfloat16x2_t test_svmaxnm_single_bf16_x2(svbfloat16x2_t zdn, svbfloat16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmaxnm,_single_bf16_x2)(zdn, zm); @@ -47,27 +33,13 @@ svbfloat16x2_t test_svmaxnm_single_bf16_x2(svbfloat16x2_t zdn, svbfloat16_t zm) // CHECK-LABEL: @test_svmaxnm_single_f16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmaxnm.single.x2.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svmaxnm_single_f16_x213svfloat16x2_tu13__SVFloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmaxnm.single.x2.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat16x2_t test_svmaxnm_single_f16_x2(svfloat16x2_t zdn, svfloat16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmaxnm,_single_f16_x2)(zdn, zm); @@ -75,27 +47,13 @@ svfloat16x2_t test_svmaxnm_single_f16_x2(svfloat16x2_t zdn, svfloat16_t zm) __ar // CHECK-LABEL: @test_svmaxnm_single_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmaxnm.single.x2.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svmaxnm_single_f32_x213svfloat32x2_tu13__SVFloat32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmaxnm.single.x2.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svmaxnm_single_f32_x2(svfloat32x2_t zdn, svfloat32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmaxnm,_single_f32_x2)(zdn, zm); @@ -103,27 +61,13 @@ svfloat32x2_t test_svmaxnm_single_f32_x2(svfloat32x2_t zdn, svfloat32_t zm) __ar // CHECK-LABEL: @test_svmaxnm_single_f64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmaxnm.single.x2.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svmaxnm_single_f64_x213svfloat64x2_tu13__SVFloat64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmaxnm.single.x2.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat64x2_t test_svmaxnm_single_f64_x2(svfloat64x2_t zdn, svfloat64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmaxnm,_single_f64_x2)(zdn, zm); @@ -133,35 +77,13 @@ svfloat64x2_t test_svmaxnm_single_f64_x2(svfloat64x2_t zdn, svfloat64_t zm) __ar // CHECK-LABEL: @test_svmaxnm_single_bf16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmaxnm.single.x4.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z27test_svmaxnm_single_bf16_x414svbfloat16x4_tu14__SVBfloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmaxnm.single.x4.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svbfloat16x4_t test_svmaxnm_single_bf16_x4(svbfloat16x4_t zdn, svbfloat16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmaxnm,_single_bf16_x4)(zdn, zm); @@ -169,35 +91,13 @@ svbfloat16x4_t test_svmaxnm_single_bf16_x4(svbfloat16x4_t zdn, svbfloat16_t zm) // CHECK-LABEL: @test_svmaxnm_single_f16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmaxnm.single.x4.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svmaxnm_single_f16_x413svfloat16x4_tu13__SVFloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmaxnm.single.x4.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat16x4_t test_svmaxnm_single_f16_x4(svfloat16x4_t zdn, svfloat16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmaxnm,_single_f16_x4)(zdn, zm); @@ -205,35 +105,13 @@ svfloat16x4_t test_svmaxnm_single_f16_x4(svfloat16x4_t zdn, svfloat16_t zm) __ar // CHECK-LABEL: @test_svmaxnm_single_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmaxnm.single.x4.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svmaxnm_single_f32_x413svfloat32x4_tu13__SVFloat32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmaxnm.single.x4.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svmaxnm_single_f32_x4(svfloat32x4_t zdn, svfloat32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmaxnm,_single_f32_x4)(zdn, zm); @@ -241,35 +119,13 @@ svfloat32x4_t test_svmaxnm_single_f32_x4(svfloat32x4_t zdn, svfloat32_t zm) __ar // CHECK-LABEL: @test_svmaxnm_single_f64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmaxnm.single.x4.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svmaxnm_single_f64_x413svfloat64x4_tu13__SVFloat64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmaxnm.single.x4.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat64x4_t test_svmaxnm_single_f64_x4(svfloat64x4_t zdn, svfloat64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmaxnm,_single_f64_x4)(zdn, zm); @@ -279,27 +135,13 @@ svfloat64x4_t test_svmaxnm_single_f64_x4(svfloat64x4_t zdn, svfloat64_t zm) __ar // CHECK-LABEL: @test_svmaxnm_multi_bf16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmaxnm.x2.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svmaxnm_multi_bf16_x214svbfloat16x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmaxnm.x2.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svbfloat16x2_t test_svmaxnm_multi_bf16_x2(svbfloat16x2_t zdn, svbfloat16x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmaxnm,_bf16_x2)(zdn, zm); @@ -307,27 +149,13 @@ svbfloat16x2_t test_svmaxnm_multi_bf16_x2(svbfloat16x2_t zdn, svbfloat16x2_t zm) // CHECK-LABEL: @test_svmaxnm_multi_f16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmaxnm.x2.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svmaxnm_multi_f16_x213svfloat16x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmaxnm.x2.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat16x2_t test_svmaxnm_multi_f16_x2(svfloat16x2_t zdn, svfloat16x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmaxnm,_f16_x2)(zdn, zm); @@ -335,27 +163,13 @@ svfloat16x2_t test_svmaxnm_multi_f16_x2(svfloat16x2_t zdn, svfloat16x2_t zm) __a // CHECK-LABEL: @test_svmaxnm_multi_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmaxnm.x2.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svmaxnm_multi_f32_x213svfloat32x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmaxnm.x2.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svmaxnm_multi_f32_x2(svfloat32x2_t zdn, svfloat32x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmaxnm,_f32_x2)(zdn, zm); @@ -363,27 +177,13 @@ svfloat32x2_t test_svmaxnm_multi_f32_x2(svfloat32x2_t zdn, svfloat32x2_t zm) __a // CHECK-LABEL: @test_svmaxnm_multi_f64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmaxnm.x2.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svmaxnm_multi_f64_x213svfloat64x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmaxnm.x2.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat64x2_t test_svmaxnm_multi_f64_x2(svfloat64x2_t zdn, svfloat64x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmaxnm,_f64_x2)(zdn, zm); @@ -393,35 +193,13 @@ svfloat64x2_t test_svmaxnm_multi_f64_x2(svfloat64x2_t zdn, svfloat64x2_t zm) __a // CHECK-LABEL: @test_svmaxnm_multi_bf16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmaxnm.x4.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svmaxnm_multi_bf16_x414svbfloat16x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmaxnm.x4.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svbfloat16x4_t test_svmaxnm_multi_bf16_x4(svbfloat16x4_t zdn, svbfloat16x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmaxnm,_bf16_x4)(zdn, zm); @@ -429,35 +207,13 @@ svbfloat16x4_t test_svmaxnm_multi_bf16_x4(svbfloat16x4_t zdn, svbfloat16x4_t zm) // CHECK-LABEL: @test_svmaxnm_multi_f16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmaxnm.x4.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svmaxnm_multi_f16_x413svfloat16x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmaxnm.x4.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat16x4_t test_svmaxnm_multi_f16_x4(svfloat16x4_t zdn, svfloat16x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmaxnm,_f16_x4)(zdn, zm); @@ -465,35 +221,13 @@ svfloat16x4_t test_svmaxnm_multi_f16_x4(svfloat16x4_t zdn, svfloat16x4_t zm) __a // CHECK-LABEL: @test_svmaxnm_multi_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmaxnm.x4.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svmaxnm_multi_f32_x413svfloat32x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmaxnm.x4.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svmaxnm_multi_f32_x4(svfloat32x4_t zdn, svfloat32x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmaxnm,_f32_x4)(zdn, zm); @@ -501,35 +235,13 @@ svfloat32x4_t test_svmaxnm_multi_f32_x4(svfloat32x4_t zdn, svfloat32x4_t zm) __a // CHECK-LABEL: @test_svmaxnm_multi_f64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmaxnm.x4.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svmaxnm_multi_f64_x413svfloat64x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmaxnm.x4.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat64x4_t test_svmaxnm_multi_f64_x4(svfloat64x4_t zdn, svfloat64x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmaxnm,_f64_x4)(zdn, zm); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_min.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_min.c index 2fa7feeee404e..4e70a39311664 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_min.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_min.c @@ -18,27 +18,13 @@ // CHECK-LABEL: @test_svmin_single_s8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smin.single.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z23test_svmin_single_s8_x210svint8x2_tu10__SVInt8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smin.single.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint8x2_t test_svmin_single_s8_x2(svint8x2_t zdn, svint8_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_s8_x2)(zdn, zm); @@ -46,27 +32,13 @@ svint8x2_t test_svmin_single_s8_x2(svint8x2_t zdn, svint8_t zm) __arm_streaming // CHECK-LABEL: @test_svmin_single_s16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smin.single.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmin_single_s16_x211svint16x2_tu11__SVInt16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smin.single.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint16x2_t test_svmin_single_s16_x2(svint16x2_t zdn, svint16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_s16_x2)(zdn, zm); @@ -74,27 +46,13 @@ svint16x2_t test_svmin_single_s16_x2(svint16x2_t zdn, svint16_t zm) __arm_stream // CHECK-LABEL: @test_svmin_single_s32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smin.single.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmin_single_s32_x211svint32x2_tu11__SVInt32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smin.single.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint32x2_t test_svmin_single_s32_x2(svint32x2_t zdn, svint32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_s32_x2)(zdn, zm); @@ -102,27 +60,13 @@ svint32x2_t test_svmin_single_s32_x2(svint32x2_t zdn, svint32_t zm) __arm_stream // CHECK-LABEL: @test_svmin_single_s64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smin.single.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmin_single_s64_x211svint64x2_tu11__SVInt64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smin.single.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint64x2_t test_svmin_single_s64_x2(svint64x2_t zdn, svint64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_s64_x2)(zdn, zm); @@ -130,27 +74,13 @@ svint64x2_t test_svmin_single_s64_x2(svint64x2_t zdn, svint64_t zm) __arm_stream // CHECK-LABEL: @test_svmin_single_u8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umin.single.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z23test_svmin_single_u8_x211svuint8x2_tu11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umin.single.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint8x2_t test_svmin_single_u8_x2(svuint8x2_t zdn, svuint8_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_u8_x2)(zdn, zm); @@ -158,27 +88,13 @@ svuint8x2_t test_svmin_single_u8_x2(svuint8x2_t zdn, svuint8_t zm) __arm_streami // CHECK-LABEL: @test_svmin_single_u16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umin.single.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmin_single_u16_x212svuint16x2_tu12__SVUint16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umin.single.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint16x2_t test_svmin_single_u16_x2(svuint16x2_t zdn, svuint16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_u16_x2)(zdn, zm); @@ -186,27 +102,13 @@ svuint16x2_t test_svmin_single_u16_x2(svuint16x2_t zdn, svuint16_t zm) __arm_str // CHECK-LABEL: @test_svmin_single_u32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umin.single.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmin_single_u32_x212svuint32x2_tu12__SVUint32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umin.single.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint32x2_t test_svmin_single_u32_x2(svuint32x2_t zdn, svuint32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_u32_x2)(zdn, zm); @@ -214,27 +116,13 @@ svuint32x2_t test_svmin_single_u32_x2(svuint32x2_t zdn, svuint32_t zm) __arm_str // CHECK-LABEL: @test_svmin_single_u64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umin.single.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmin_single_u64_x212svuint64x2_tu12__SVUint64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umin.single.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint64x2_t test_svmin_single_u64_x2(svuint64x2_t zdn, svuint64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_u64_x2)(zdn, zm); @@ -242,27 +130,13 @@ svuint64x2_t test_svmin_single_u64_x2(svuint64x2_t zdn, svuint64_t zm) __arm_str // CHECK-LABEL: @test_svmin_single_bf16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmin.single.x2.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svmin_single_bf16_x214svbfloat16x2_tu14__SVBfloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmin.single.x2.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svbfloat16x2_t test_svmin_single_bf16_x2(svbfloat16x2_t zdn, svbfloat16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_bf16_x2)(zdn, zm); @@ -270,27 +144,13 @@ svbfloat16x2_t test_svmin_single_bf16_x2(svbfloat16x2_t zdn, svbfloat16_t zm) __ // CHECK-LABEL: @test_svmin_single_f16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmin.single.x2.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmin_single_f16_x213svfloat16x2_tu13__SVFloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmin.single.x2.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat16x2_t test_svmin_single_f16_x2(svfloat16x2_t zdn, svfloat16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_f16_x2)(zdn, zm); @@ -298,27 +158,13 @@ svfloat16x2_t test_svmin_single_f16_x2(svfloat16x2_t zdn, svfloat16_t zm) __arm_ // CHECK-LABEL: @test_svmin_single_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmin.single.x2.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmin_single_f32_x213svfloat32x2_tu13__SVFloat32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmin.single.x2.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svmin_single_f32_x2(svfloat32x2_t zdn, svfloat32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_f32_x2)(zdn, zm); @@ -326,27 +172,13 @@ svfloat32x2_t test_svmin_single_f32_x2(svfloat32x2_t zdn, svfloat32_t zm) __arm_ // CHECK-LABEL: @test_svmin_single_f64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmin.single.x2.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmin_single_f64_x213svfloat64x2_tu13__SVFloat64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmin.single.x2.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat64x2_t test_svmin_single_f64_x2(svfloat64x2_t zdn, svfloat64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_f64_x2)(zdn, zm); @@ -356,35 +188,13 @@ svfloat64x2_t test_svmin_single_f64_x2(svfloat64x2_t zdn, svfloat64_t zm) __arm_ // CHECK-LABEL: @test_svmin_single_s8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smin.single.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z23test_svmin_single_s8_x410svint8x4_tu10__SVInt8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smin.single.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint8x4_t test_svmin_single_s8_x4(svint8x4_t zdn, svint8_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_s8_x4)(zdn, zm); @@ -392,35 +202,13 @@ svint8x4_t test_svmin_single_s8_x4(svint8x4_t zdn, svint8_t zm) __arm_streaming // CHECK-LABEL: @test_svmin_single_s16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smin.single.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmin_single_s16_x411svint16x4_tu11__SVInt16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smin.single.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint16x4_t test_svmin_single_s16_x4(svint16x4_t zdn, svint16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_s16_x4)(zdn, zm); @@ -428,35 +216,13 @@ svint16x4_t test_svmin_single_s16_x4(svint16x4_t zdn, svint16_t zm) __arm_stream // CHECK-LABEL: @test_svmin_single_s32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smin.single.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmin_single_s32_x411svint32x4_tu11__SVInt32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smin.single.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint32x4_t test_svmin_single_s32_x4(svint32x4_t zdn, svint32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_s32_x4)(zdn, zm); @@ -464,35 +230,13 @@ svint32x4_t test_svmin_single_s32_x4(svint32x4_t zdn, svint32_t zm) __arm_stream // CHECK-LABEL: @test_svmin_single_s64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smin.single.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmin_single_s64_x411svint64x4_tu11__SVInt64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smin.single.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint64x4_t test_svmin_single_s64_x4(svint64x4_t zdn, svint64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_s64_x4)(zdn, zm); @@ -500,35 +244,13 @@ svint64x4_t test_svmin_single_s64_x4(svint64x4_t zdn, svint64_t zm) __arm_stream // CHECK-LABEL: @test_svmin_single_u8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umin.single.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z23test_svmin_single_u8_x411svuint8x4_tu11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umin.single.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint8x4_t test_svmin_single_u8_x4(svuint8x4_t zdn, svuint8_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_u8_x4)(zdn, zm); @@ -536,35 +258,13 @@ svuint8x4_t test_svmin_single_u8_x4(svuint8x4_t zdn, svuint8_t zm) __arm_streami // CHECK-LABEL: @test_svmin_single_u16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umin.single.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmin_single_u16_x412svuint16x4_tu12__SVUint16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umin.single.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint16x4_t test_svmin_single_u16_x4(svuint16x4_t zdn, svuint16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_u16_x4)(zdn, zm); @@ -572,35 +272,13 @@ svuint16x4_t test_svmin_single_u16_x4(svuint16x4_t zdn, svuint16_t zm) __arm_str // CHECK-LABEL: @test_svmin_single_u32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umin.single.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmin_single_u32_x412svuint32x4_tu12__SVUint32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umin.single.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint32x4_t test_svmin_single_u32_x4(svuint32x4_t zdn, svuint32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_u32_x4)(zdn, zm); @@ -608,35 +286,13 @@ svuint32x4_t test_svmin_single_u32_x4(svuint32x4_t zdn, svuint32_t zm) __arm_str // CHECK-LABEL: @test_svmin_single_u64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umin.single.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmin_single_u64_x412svuint64x4_tu12__SVUint64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umin.single.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint64x4_t test_svmin_single_u64_x4(svuint64x4_t zdn, svuint64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_u64_x4)(zdn, zm); @@ -644,35 +300,13 @@ svuint64x4_t test_svmin_single_u64_x4(svuint64x4_t zdn, svuint64_t zm) __arm_str // CHECK-LABEL: @test_svmin_single_bf16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmin.single.x4.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svmin_single_bf16_x414svbfloat16x4_tu14__SVBfloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmin.single.x4.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svbfloat16x4_t test_svmin_single_bf16_x4(svbfloat16x4_t zdn, svbfloat16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_bf16_x4)(zdn, zm); @@ -680,35 +314,13 @@ svbfloat16x4_t test_svmin_single_bf16_x4(svbfloat16x4_t zdn, svbfloat16_t zm) __ // CHECK-LABEL: @test_svmin_single_f16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmin.single.x4.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmin_single_f16_x413svfloat16x4_tu13__SVFloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmin.single.x4.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat16x4_t test_svmin_single_f16_x4(svfloat16x4_t zdn, svfloat16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_f16_x4)(zdn, zm); @@ -716,35 +328,13 @@ svfloat16x4_t test_svmin_single_f16_x4(svfloat16x4_t zdn, svfloat16_t zm) __arm_ // CHECK-LABEL: @test_svmin_single_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmin.single.x4.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmin_single_f32_x413svfloat32x4_tu13__SVFloat32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmin.single.x4.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svmin_single_f32_x4(svfloat32x4_t zdn, svfloat32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_f32_x4)(zdn, zm); @@ -752,35 +342,13 @@ svfloat32x4_t test_svmin_single_f32_x4(svfloat32x4_t zdn, svfloat32_t zm) __arm_ // CHECK-LABEL: @test_svmin_single_f64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmin.single.x4.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmin_single_f64_x413svfloat64x4_tu13__SVFloat64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmin.single.x4.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat64x4_t test_svmin_single_f64_x4(svfloat64x4_t zdn, svfloat64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_f64_x4)(zdn, zm); @@ -790,27 +358,13 @@ svfloat64x4_t test_svmin_single_f64_x4(svfloat64x4_t zdn, svfloat64_t zm) __arm_ // CHECK-LABEL: @test_svmin_s8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smin.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z16test_svmin_s8_x210svint8x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smin.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint8x2_t test_svmin_s8_x2(svint8x2_t zdn, svint8x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_s8_x2)(zdn, zm); @@ -818,27 +372,13 @@ svint8x2_t test_svmin_s8_x2(svint8x2_t zdn, svint8x2_t zm) __arm_streaming { // CHECK-LABEL: @test_svmin_s16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smin.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmin_s16_x211svint16x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smin.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint16x2_t test_svmin_s16_x2(svint16x2_t zdn, svint16x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_s16_x2)(zdn, zm); @@ -846,27 +386,13 @@ svint16x2_t test_svmin_s16_x2(svint16x2_t zdn, svint16x2_t zm) __arm_streaming { // CHECK-LABEL: @test_svmin_s32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smin.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmin_s32_x211svint32x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smin.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint32x2_t test_svmin_s32_x2(svint32x2_t zdn, svint32x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_s32_x2)(zdn, zm); @@ -874,27 +400,13 @@ svint32x2_t test_svmin_s32_x2(svint32x2_t zdn, svint32x2_t zm) __arm_streaming { // CHECK-LABEL: @test_svmin_s64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smin.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmin_s64_x211svint64x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smin.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint64x2_t test_svmin_s64_x2(svint64x2_t zdn, svint64x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_s64_x2)(zdn, zm); @@ -902,27 +414,13 @@ svint64x2_t test_svmin_s64_x2(svint64x2_t zdn, svint64x2_t zm) __arm_streaming { // CHECK-LABEL: @test_svmin_u8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umin.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z16test_svmin_u8_x211svuint8x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umin.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint8x2_t test_svmin_u8_x2(svuint8x2_t zdn, svuint8x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_u8_x2)(zdn, zm); @@ -930,27 +428,13 @@ svuint8x2_t test_svmin_u8_x2(svuint8x2_t zdn, svuint8x2_t zm) __arm_streaming { // CHECK-LABEL: @test_svmin_u16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umin.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmin_u16_x212svuint16x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umin.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint16x2_t test_svmin_u16_x2(svuint16x2_t zdn, svuint16x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_u16_x2)(zdn, zm); @@ -958,27 +442,13 @@ svuint16x2_t test_svmin_u16_x2(svuint16x2_t zdn, svuint16x2_t zm) __arm_streamin // CHECK-LABEL: @test_svmin_u32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umin.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmin_u32_x212svuint32x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umin.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint32x2_t test_svmin_u32_x2(svuint32x2_t zdn, svuint32x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_u32_x2)(zdn, zm); @@ -986,27 +456,13 @@ svuint32x2_t test_svmin_u32_x2(svuint32x2_t zdn, svuint32x2_t zm) __arm_streamin // CHECK-LABEL: @test_svmin_u64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umin.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmin_u64_x212svuint64x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umin.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint64x2_t test_svmin_u64_x2(svuint64x2_t zdn, svuint64x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_u64_x2)(zdn, zm); @@ -1014,27 +470,13 @@ svuint64x2_t test_svmin_u64_x2(svuint64x2_t zdn, svuint64x2_t zm) __arm_streamin // CHECK-LABEL: @test_svmin_bf16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmin.x2.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svmin_bf16_x214svbfloat16x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmin.x2.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svbfloat16x2_t test_svmin_bf16_x2(svbfloat16x2_t zdn, svbfloat16x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_bf16_x2)(zdn, zm); @@ -1042,27 +484,13 @@ svbfloat16x2_t test_svmin_bf16_x2(svbfloat16x2_t zdn, svbfloat16x2_t zm) __arm_s // CHECK-LABEL: @test_svmin_f16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmin.x2.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmin_f16_x213svfloat16x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmin.x2.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat16x2_t test_svmin_f16_x2(svfloat16x2_t zdn, svfloat16x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_f16_x2)(zdn, zm); @@ -1070,27 +498,13 @@ svfloat16x2_t test_svmin_f16_x2(svfloat16x2_t zdn, svfloat16x2_t zm) __arm_strea // CHECK-LABEL: @test_svmin_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmin.x2.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmin_f32_x213svfloat32x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmin.x2.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svmin_f32_x2(svfloat32x2_t zdn, svfloat32x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_f32_x2)(zdn, zm); @@ -1098,27 +512,13 @@ svfloat32x2_t test_svmin_f32_x2(svfloat32x2_t zdn, svfloat32x2_t zm) __arm_strea // CHECK-LABEL: @test_svmin_f64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmin.x2.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmin_f64_x213svfloat64x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmin.x2.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat64x2_t test_svmin_f64_x2(svfloat64x2_t zdn, svfloat64x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_f64_x2)(zdn, zm); @@ -1128,35 +528,13 @@ svfloat64x2_t test_svmin_f64_x2(svfloat64x2_t zdn, svfloat64x2_t zm) __arm_strea // CHECK-LABEL: @test_svmin_s8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smin.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z16test_svmin_s8_x410svint8x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smin.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint8x4_t test_svmin_s8_x4(svint8x4_t zdn, svint8x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_s8_x4)(zdn, zm); @@ -1164,35 +542,13 @@ svint8x4_t test_svmin_s8_x4(svint8x4_t zdn, svint8x4_t zm) __arm_streaming { // CHECK-LABEL: @test_svmin_s16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smin.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmin_s16_x411svint16x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smin.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint16x4_t test_svmin_s16_x4(svint16x4_t zdn, svint16x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_s16_x4)(zdn, zm); @@ -1200,35 +556,13 @@ svint16x4_t test_svmin_s16_x4(svint16x4_t zdn, svint16x4_t zm) __arm_streaming { // CHECK-LABEL: @test_svmin_s32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smin.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmin_s32_x411svint32x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smin.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint32x4_t test_svmin_s32_x4(svint32x4_t zdn, svint32x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_s32_x4)(zdn, zm); @@ -1236,35 +570,13 @@ svint32x4_t test_svmin_s32_x4(svint32x4_t zdn, svint32x4_t zm) __arm_streaming { // CHECK-LABEL: @test_svmin_s64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smin.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmin_s64_x411svint64x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smin.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint64x4_t test_svmin_s64_x4(svint64x4_t zdn, svint64x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_s64_x4)(zdn, zm); @@ -1272,35 +584,13 @@ svint64x4_t test_svmin_s64_x4(svint64x4_t zdn, svint64x4_t zm) __arm_streaming { // CHECK-LABEL: @test_svmin_u8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umin.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z16test_svmin_u8_x411svuint8x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umin.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint8x4_t test_svmin_u8_x4(svuint8x4_t zdn, svuint8x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_u8_x4)(zdn, zm); @@ -1308,35 +598,13 @@ svuint8x4_t test_svmin_u8_x4(svuint8x4_t zdn, svuint8x4_t zm) __arm_streaming { // CHECK-LABEL: @test_svmin_u16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umin.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmin_u16_x412svuint16x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umin.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint16x4_t test_svmin_u16_x4(svuint16x4_t zdn, svuint16x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_u16_x4)(zdn, zm); @@ -1344,35 +612,13 @@ svuint16x4_t test_svmin_u16_x4(svuint16x4_t zdn, svuint16x4_t zm) __arm_streamin // CHECK-LABEL: @test_svmin_u32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umin.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmin_u32_x412svuint32x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umin.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint32x4_t test_svmin_u32_x4(svuint32x4_t zdn, svuint32x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_u32_x4)(zdn, zm); @@ -1380,35 +626,13 @@ svuint32x4_t test_svmin_u32_x4(svuint32x4_t zdn, svuint32x4_t zm) __arm_streamin // CHECK-LABEL: @test_svmin_u64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umin.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmin_u64_x412svuint64x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umin.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint64x4_t test_svmin_u64_x4(svuint64x4_t zdn, svuint64x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_u64_x4)(zdn, zm); @@ -1416,35 +640,13 @@ svuint64x4_t test_svmin_u64_x4(svuint64x4_t zdn, svuint64x4_t zm) __arm_streamin // CHECK-LABEL: @test_svmin_bf16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmin.x4.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svmin_bf16_x414svbfloat16x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmin.x4.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svbfloat16x4_t test_svmin_bf16_x4(svbfloat16x4_t zdn, svbfloat16x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_bf16_x4)(zdn, zm); @@ -1452,35 +654,13 @@ svbfloat16x4_t test_svmin_bf16_x4(svbfloat16x4_t zdn, svbfloat16x4_t zm) __arm_s // CHECK-LABEL: @test_svmin_f16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmin.x4.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmin_f16_x413svfloat16x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmin.x4.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat16x4_t test_svmin_f16_x4(svfloat16x4_t zdn, svfloat16x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_f16_x4)(zdn, zm); @@ -1488,35 +668,13 @@ svfloat16x4_t test_svmin_f16_x4(svfloat16x4_t zdn, svfloat16x4_t zm) __arm_strea // CHECK-LABEL: @test_svmin_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmin.x4.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmin_f32_x413svfloat32x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmin.x4.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svmin_f32_x4(svfloat32x4_t zdn, svfloat32x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_f32_x4)(zdn, zm); @@ -1524,35 +682,13 @@ svfloat32x4_t test_svmin_f32_x4(svfloat32x4_t zdn, svfloat32x4_t zm) __arm_strea // CHECK-LABEL: @test_svmin_f64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmin.x4.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmin_f64_x413svfloat64x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmin.x4.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat64x4_t test_svmin_f64_x4(svfloat64x4_t zdn, svfloat64x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_f64_x4)(zdn, zm); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_minnm.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_minnm.c index 71b8914b816ca..838cb644e5e39 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_minnm.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_minnm.c @@ -19,27 +19,13 @@ // CHECK-LABEL: @test_svminnm_single_bf16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fminnm.single.x2.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z27test_svminnm_single_bf16_x214svbfloat16x2_tu14__SVBfloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fminnm.single.x2.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svbfloat16x2_t test_svminnm_single_bf16_x2(svbfloat16x2_t zdn, svbfloat16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svminnm,_single_bf16_x2)(zdn, zm); @@ -47,27 +33,13 @@ svbfloat16x2_t test_svminnm_single_bf16_x2(svbfloat16x2_t zdn, svbfloat16_t zm) // CHECK-LABEL: @test_svminnm_single_f16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fminnm.single.x2.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svminnm_single_f16_x213svfloat16x2_tu13__SVFloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fminnm.single.x2.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat16x2_t test_svminnm_single_f16_x2(svfloat16x2_t zdn, svfloat16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svminnm,_single_f16_x2)(zdn, zm); @@ -75,27 +47,13 @@ svfloat16x2_t test_svminnm_single_f16_x2(svfloat16x2_t zdn, svfloat16_t zm) __ar // CHECK-LABEL: @test_svminnm_single_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fminnm.single.x2.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svminnm_single_f32_x213svfloat32x2_tu13__SVFloat32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fminnm.single.x2.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svminnm_single_f32_x2(svfloat32x2_t zdn, svfloat32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svminnm,_single_f32_x2)(zdn, zm); @@ -103,27 +61,13 @@ svfloat32x2_t test_svminnm_single_f32_x2(svfloat32x2_t zdn, svfloat32_t zm) __ar // CHECK-LABEL: @test_svminnm_single_f64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fminnm.single.x2.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svminnm_single_f64_x213svfloat64x2_tu13__SVFloat64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fminnm.single.x2.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat64x2_t test_svminnm_single_f64_x2(svfloat64x2_t zdn, svfloat64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svminnm,_single_f64_x2)(zdn, zm); @@ -133,35 +77,13 @@ svfloat64x2_t test_svminnm_single_f64_x2(svfloat64x2_t zdn, svfloat64_t zm) __ar // CHECK-LABEL: @test_svminnm_single_bf16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fminnm.single.x4.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z27test_svminnm_single_bf16_x414svbfloat16x4_tu14__SVBfloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fminnm.single.x4.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svbfloat16x4_t test_svminnm_single_bf16_x4(svbfloat16x4_t zdn, svbfloat16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svminnm,_single_bf16_x4)(zdn, zm); @@ -169,35 +91,13 @@ svbfloat16x4_t test_svminnm_single_bf16_x4(svbfloat16x4_t zdn, svbfloat16_t zm) // CHECK-LABEL: @test_svminnm_single_f16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fminnm.single.x4.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svminnm_single_f16_x413svfloat16x4_tu13__SVFloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fminnm.single.x4.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat16x4_t test_svminnm_single_f16_x4(svfloat16x4_t zdn, svfloat16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svminnm,_single_f16_x4)(zdn, zm); @@ -205,35 +105,13 @@ svfloat16x4_t test_svminnm_single_f16_x4(svfloat16x4_t zdn, svfloat16_t zm) __ar // CHECK-LABEL: @test_svminnm_single_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fminnm.single.x4.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svminnm_single_f32_x413svfloat32x4_tu13__SVFloat32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fminnm.single.x4.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svminnm_single_f32_x4(svfloat32x4_t zdn, svfloat32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svminnm,_single_f32_x4)(zdn, zm); @@ -241,35 +119,13 @@ svfloat32x4_t test_svminnm_single_f32_x4(svfloat32x4_t zdn, svfloat32_t zm) __ar // CHECK-LABEL: @test_svminnm_single_f64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fminnm.single.x4.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svminnm_single_f64_x413svfloat64x4_tu13__SVFloat64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fminnm.single.x4.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat64x4_t test_svminnm_single_f64_x4(svfloat64x4_t zdn, svfloat64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svminnm,_single_f64_x4)(zdn, zm); @@ -279,27 +135,13 @@ svfloat64x4_t test_svminnm_single_f64_x4(svfloat64x4_t zdn, svfloat64_t zm) __ar // CHECK-LABEL: @test_svminnm_multi_bf16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fminnm.x2.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svminnm_multi_bf16_x214svbfloat16x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fminnm.x2.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svbfloat16x2_t test_svminnm_multi_bf16_x2(svbfloat16x2_t zdn, svbfloat16x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svminnm,_bf16_x2)(zdn, zm); @@ -307,27 +149,13 @@ svbfloat16x2_t test_svminnm_multi_bf16_x2(svbfloat16x2_t zdn, svbfloat16x2_t zm) // CHECK-LABEL: @test_svminnm_multi_f16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fminnm.x2.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svminnm_multi_f16_x213svfloat16x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fminnm.x2.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat16x2_t test_svminnm_multi_f16_x2(svfloat16x2_t zdn, svfloat16x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svminnm,_f16_x2)(zdn, zm); @@ -335,27 +163,13 @@ svfloat16x2_t test_svminnm_multi_f16_x2(svfloat16x2_t zdn, svfloat16x2_t zm) __a // CHECK-LABEL: @test_svminnm_multi_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fminnm.x2.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svminnm_multi_f32_x213svfloat32x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fminnm.x2.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svminnm_multi_f32_x2(svfloat32x2_t zdn, svfloat32x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svminnm,_f32_x2)(zdn, zm); @@ -363,27 +177,13 @@ svfloat32x2_t test_svminnm_multi_f32_x2(svfloat32x2_t zdn, svfloat32x2_t zm) __a // CHECK-LABEL: @test_svminnm_multi_f64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fminnm.x2.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svminnm_multi_f64_x213svfloat64x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fminnm.x2.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat64x2_t test_svminnm_multi_f64_x2(svfloat64x2_t zdn, svfloat64x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svminnm,_f64_x2)(zdn, zm); @@ -393,35 +193,13 @@ svfloat64x2_t test_svminnm_multi_f64_x2(svfloat64x2_t zdn, svfloat64x2_t zm) __a // CHECK-LABEL: @test_svminnm_multi_bf16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fminnm.x4.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svminnm_multi_bf16_x414svbfloat16x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fminnm.x4.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svbfloat16x4_t test_svminnm_multi_bf16_x4(svbfloat16x4_t zdn, svbfloat16x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svminnm,_bf16_x4)(zdn, zm); @@ -429,35 +207,13 @@ svbfloat16x4_t test_svminnm_multi_bf16_x4(svbfloat16x4_t zdn, svbfloat16x4_t zm) // CHECK-LABEL: @test_svminnm_multi_f16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fminnm.x4.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svminnm_multi_f16_x413svfloat16x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fminnm.x4.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat16x4_t test_svminnm_multi_f16_x4(svfloat16x4_t zdn, svfloat16x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svminnm,_f16_x4)(zdn, zm); @@ -465,35 +221,13 @@ svfloat16x4_t test_svminnm_multi_f16_x4(svfloat16x4_t zdn, svfloat16x4_t zm) __a // CHECK-LABEL: @test_svminnm_multi_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fminnm.x4.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svminnm_multi_f32_x413svfloat32x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fminnm.x4.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svminnm_multi_f32_x4(svfloat32x4_t zdn, svfloat32x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svminnm,_f32_x4)(zdn, zm); @@ -501,35 +235,13 @@ svfloat32x4_t test_svminnm_multi_f32_x4(svfloat32x4_t zdn, svfloat32x4_t zm) __a // CHECK-LABEL: @test_svminnm_multi_f64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fminnm.x4.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svminnm_multi_f64_x413svfloat64x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fminnm.x4.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat64x4_t test_svminnm_multi_f64_x4(svfloat64x4_t zdn, svfloat64x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svminnm,_f64_x4)(zdn, zm); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_read.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_read.c index da17c6b13d17c..b8cd1e1653ea9 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_read.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_read.c @@ -9,27 +9,13 @@ // CHECK-LABEL: @test_svread_ver_za8_u8_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv16i8(i32 0, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svread_ver_za8_u8_vg2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv16i8(i32 0, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint8x2_t test_svread_ver_za8_u8_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za8_u8_vg2(0, base); @@ -37,27 +23,13 @@ svuint8x2_t test_svread_ver_za8_u8_vg2(uint32_t base) __arm_streaming __arm_in(" // CHECK-LABEL: @test_svread_ver_za8_s8_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv16i8(i32 0, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svread_ver_za8_s8_vg2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv16i8(i32 0, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint8x2_t test_svread_ver_za8_s8_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za8_s8_vg2(0, base); @@ -65,27 +37,13 @@ svint8x2_t test_svread_ver_za8_s8_vg2(uint32_t base) __arm_streaming __arm_in("z // CHECK-LABEL: @test_svread_hor_za8_u8_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv16i8(i32 0, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svread_hor_za8_u8_vg2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv16i8(i32 0, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint8x2_t test_svread_hor_za8_u8_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za8_u8_vg2(0, base); @@ -93,27 +51,13 @@ svuint8x2_t test_svread_hor_za8_u8_vg2(uint32_t base) __arm_streaming __arm_in(" // CHECK-LABEL: @test_svread_hor_za8_s8_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv16i8(i32 0, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svread_hor_za8_s8_vg2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv16i8(i32 0, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint8x2_t test_svread_hor_za8_s8_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za8_s8_vg2(0, base); @@ -121,35 +65,13 @@ svint8x2_t test_svread_hor_za8_s8_vg2(uint32_t base) __arm_streaming __arm_in("z // CHECK-LABEL: @test_svread_hor_za8_u8_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv16i8(i32 0, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svread_hor_za8_u8_vg4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv16i8(i32 0, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint8x4_t test_svread_hor_za8_u8_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za8_u8_vg4(0, base); @@ -157,35 +79,13 @@ svuint8x4_t test_svread_hor_za8_u8_vg4(uint32_t base) __arm_streaming __arm_in(" // CHECK-LABEL: @test_svread_hor_za8_s8_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv16i8(i32 0, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svread_hor_za8_s8_vg4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv16i8(i32 0, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint8x4_t test_svread_hor_za8_s8_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za8_s8_vg4(0, base); @@ -193,35 +93,13 @@ svint8x4_t test_svread_hor_za8_s8_vg4(uint32_t base) __arm_streaming __arm_in("z // CHECK-LABEL: @test_svread_ver_za8_u8_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv16i8(i32 0, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svread_ver_za8_u8_vg4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv16i8(i32 0, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint8x4_t test_svread_ver_za8_u8_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za8_u8_vg4(0, base); @@ -229,35 +107,13 @@ svuint8x4_t test_svread_ver_za8_u8_vg4(uint32_t base) __arm_streaming __arm_in(" // CHECK-LABEL: @test_svread_ver_za8_s8_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv16i8(i32 0, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svread_ver_za8_s8_vg4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv16i8(i32 0, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint8x4_t test_svread_ver_za8_s8_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za8_s8_vg4(0, base); @@ -265,27 +121,13 @@ svint8x4_t test_svread_ver_za8_s8_vg4(uint32_t base) __arm_streaming __arm_in("z // CHECK-LABEL: @test_svread_hor_za16_u16_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv8i16(i32 1, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za16_u16_vg2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv8i16(i32 1, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint16x2_t test_svread_hor_za16_u16_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za16_u16_vg2(1, base); @@ -293,27 +135,13 @@ svuint16x2_t test_svread_hor_za16_u16_vg2(uint32_t base) __arm_streaming __arm_i // CHECK-LABEL: @test_svread_hor_za16_bf16_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv8bf16(i32 1, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z29test_svread_hor_za16_bf16_vg2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv8bf16(i32 1, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svbfloat16x2_t test_svread_hor_za16_bf16_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za16_bf16_vg2(1, base); @@ -321,27 +149,13 @@ svbfloat16x2_t test_svread_hor_za16_bf16_vg2(uint32_t base) __arm_streaming __ar // CHECK-LABEL: @test_svread_hor_za16_f16_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv8f16(i32 1, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za16_f16_vg2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv8f16(i32 1, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat16x2_t test_svread_hor_za16_f16_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za16_f16_vg2(1, base); @@ -349,27 +163,13 @@ svfloat16x2_t test_svread_hor_za16_f16_vg2(uint32_t base) __arm_streaming __arm_ // CHECK-LABEL: @test_svread_hor_za16_s16_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv8i16(i32 1, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za16_s16_vg2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv8i16(i32 1, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint16x2_t test_svread_hor_za16_s16_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za16_s16_vg2(1, base); @@ -377,27 +177,13 @@ svint16x2_t test_svread_hor_za16_s16_vg2(uint32_t base) __arm_streaming __arm_in // CHECK-LABEL: @test_svread_ver_za16_u16_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv8i16(i32 1, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za16_u16_vg2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv8i16(i32 1, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint16x2_t test_svread_ver_za16_u16_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za16_u16_vg2(1, base); @@ -405,27 +191,13 @@ svuint16x2_t test_svread_ver_za16_u16_vg2(uint32_t base) __arm_streaming __arm_i // CHECK-LABEL: @test_svread_ver_za16_bf16_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv8bf16(i32 1, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z29test_svread_ver_za16_bf16_vg2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv8bf16(i32 1, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svbfloat16x2_t test_svread_ver_za16_bf16_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za16_bf16_vg2(1, base); @@ -433,27 +205,13 @@ svbfloat16x2_t test_svread_ver_za16_bf16_vg2(uint32_t base) __arm_streaming __ar // CHECK-LABEL: @test_svread_ver_za16_f16_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv8f16(i32 1, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za16_f16_vg2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv8f16(i32 1, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat16x2_t test_svread_ver_za16_f16_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za16_f16_vg2(1, base); @@ -461,27 +219,13 @@ svfloat16x2_t test_svread_ver_za16_f16_vg2(uint32_t base) __arm_streaming __arm_ // CHECK-LABEL: @test_svread_ver_za16_s16_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv8i16(i32 1, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za16_s16_vg2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv8i16(i32 1, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint16x2_t test_svread_ver_za16_s16_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za16_s16_vg2(1, base); @@ -489,35 +233,13 @@ svint16x2_t test_svread_ver_za16_s16_vg2(uint32_t base) __arm_streaming __arm_in // CHECK-LABEL: @test_svread_hor_za16_u16_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv8i16(i32 1, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za16_u16_vg4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv8i16(i32 1, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint16x4_t test_svread_hor_za16_u16_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za16_u16_vg4(1, base); @@ -525,35 +247,13 @@ svuint16x4_t test_svread_hor_za16_u16_vg4(uint32_t base) __arm_streaming __arm_i // CHECK-LABEL: @test_svread_hor_za16_bf16_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv8bf16(i32 1, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z29test_svread_hor_za16_bf16_vg4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv8bf16(i32 1, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svbfloat16x4_t test_svread_hor_za16_bf16_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za16_bf16_vg4(1, base); @@ -561,35 +261,13 @@ svbfloat16x4_t test_svread_hor_za16_bf16_vg4(uint32_t base) __arm_streaming __ar // CHECK-LABEL: @test_svread_hor_za16_f16_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv8f16(i32 1, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za16_f16_vg4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv8f16(i32 1, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat16x4_t test_svread_hor_za16_f16_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za16_f16_vg4(1, base); @@ -597,35 +275,13 @@ svfloat16x4_t test_svread_hor_za16_f16_vg4(uint32_t base) __arm_streaming __arm_ // CHECK-LABEL: @test_svread_hor_za16_s16_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv8i16(i32 1, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za16_s16_vg4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv8i16(i32 1, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint16x4_t test_svread_hor_za16_s16_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za16_s16_vg4(1, base); @@ -633,35 +289,13 @@ svint16x4_t test_svread_hor_za16_s16_vg4(uint32_t base) __arm_streaming __arm_in // CHECK-LABEL: @test_svread_ver_za16_u16_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv8i16(i32 1, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za16_u16_vg4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv8i16(i32 1, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint16x4_t test_svread_ver_za16_u16_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za16_u16_vg4(1, base); @@ -669,35 +303,13 @@ svuint16x4_t test_svread_ver_za16_u16_vg4(uint32_t base) __arm_streaming __arm_i // CHECK-LABEL: @test_svread_ver_za16_bf16_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv8bf16(i32 1, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z29test_svread_ver_za16_bf16_vg4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv8bf16(i32 1, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svbfloat16x4_t test_svread_ver_za16_bf16_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za16_bf16_vg4(1, base); @@ -705,35 +317,13 @@ svbfloat16x4_t test_svread_ver_za16_bf16_vg4(uint32_t base) __arm_streaming __ar // CHECK-LABEL: @test_svread_ver_za16_f16_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv8f16(i32 1, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za16_f16_vg4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv8f16(i32 1, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat16x4_t test_svread_ver_za16_f16_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za16_f16_vg4(1, base); @@ -741,35 +331,13 @@ svfloat16x4_t test_svread_ver_za16_f16_vg4(uint32_t base) __arm_streaming __arm_ // CHECK-LABEL: @test_svread_ver_za16_s16_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv8i16(i32 1, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za16_s16_vg4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv8i16(i32 1, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint16x4_t test_svread_ver_za16_s16_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za16_s16_vg4(1, base); @@ -777,27 +345,13 @@ svint16x4_t test_svread_ver_za16_s16_vg4(uint32_t base) __arm_streaming __arm_in // CHECK-LABEL: @test_svread_hor_za32_u32_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv4i32(i32 3, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za32_u32_vg2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv4i32(i32 3, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint32x2_t test_svread_hor_za32_u32_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za32_u32_vg2(3, base); @@ -805,27 +359,13 @@ svuint32x2_t test_svread_hor_za32_u32_vg2(uint32_t base) __arm_streaming __arm_i // CHECK-LABEL: @test_svread_hor_za32_f32_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv4f32(i32 3, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za32_f32_vg2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv4f32(i32 3, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svread_hor_za32_f32_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za32_f32_vg2(3, base); @@ -833,27 +373,13 @@ svfloat32x2_t test_svread_hor_za32_f32_vg2(uint32_t base) __arm_streaming __arm_ // CHECK-LABEL: @test_svread_hor_za32_s32_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv4i32(i32 3, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za32_s32_vg2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv4i32(i32 3, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint32x2_t test_svread_hor_za32_s32_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za32_s32_vg2(3, base); @@ -861,27 +387,13 @@ svint32x2_t test_svread_hor_za32_s32_vg2(uint32_t base) __arm_streaming __arm_in // CHECK-LABEL: @test_svread_ver_za32_u32_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv4i32(i32 3, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za32_u32_vg2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv4i32(i32 3, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint32x2_t test_svread_ver_za32_u32_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za32_u32_vg2(3, base); @@ -889,27 +401,13 @@ svuint32x2_t test_svread_ver_za32_u32_vg2(uint32_t base) __arm_streaming __arm_i // CHECK-LABEL: @test_svread_ver_za32_f32_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv4f32(i32 3, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za32_f32_vg2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv4f32(i32 3, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svread_ver_za32_f32_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za32_f32_vg2(3, base); @@ -917,27 +415,13 @@ svfloat32x2_t test_svread_ver_za32_f32_vg2(uint32_t base) __arm_streaming __arm_ // CHECK-LABEL: @test_svread_ver_za32_s32_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv4i32(i32 3, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za32_s32_vg2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv4i32(i32 3, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint32x2_t test_svread_ver_za32_s32_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za32_s32_vg2(3, base); @@ -945,35 +429,13 @@ svint32x2_t test_svread_ver_za32_s32_vg2(uint32_t base) __arm_streaming __arm_in // CHECK-LABEL: @test_svread_hor_za32_u32_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv4i32(i32 3, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za32_u32_vg4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv4i32(i32 3, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint32x4_t test_svread_hor_za32_u32_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za32_u32_vg4(3, base); @@ -981,35 +443,13 @@ svuint32x4_t test_svread_hor_za32_u32_vg4(uint32_t base) __arm_streaming __arm_i // CHECK-LABEL: @test_svread_hor_za32_f32_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv4f32(i32 3, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za32_f32_vg4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv4f32(i32 3, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svread_hor_za32_f32_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za32_f32_vg4(3, base); @@ -1017,35 +457,13 @@ svfloat32x4_t test_svread_hor_za32_f32_vg4(uint32_t base) __arm_streaming __arm_ // CHECK-LABEL: @test_svread_hor_za32_s32_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv4i32(i32 3, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za32_s32_vg4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv4i32(i32 3, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint32x4_t test_svread_hor_za32_s32_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za32_s32_vg4(3, base); @@ -1053,35 +471,13 @@ svint32x4_t test_svread_hor_za32_s32_vg4(uint32_t base) __arm_streaming __arm_in // CHECK-LABEL: @test_svread_ver_za32_u32_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv4i32(i32 3, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za32_u32_vg4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv4i32(i32 3, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint32x4_t test_svread_ver_za32_u32_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za32_u32_vg4(3, base); @@ -1089,35 +485,13 @@ svuint32x4_t test_svread_ver_za32_u32_vg4(uint32_t base) __arm_streaming __arm_i // CHECK-LABEL: @test_svread_ver_za32_f32_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv4f32(i32 3, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za32_f32_vg4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv4f32(i32 3, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svread_ver_za32_f32_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za32_f32_vg4(3, base); @@ -1125,35 +499,13 @@ svfloat32x4_t test_svread_ver_za32_f32_vg4(uint32_t base) __arm_streaming __arm_ // CHECK-LABEL: @test_svread_ver_za32_s32_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv4i32(i32 3, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za32_s32_vg4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv4i32(i32 3, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint32x4_t test_svread_ver_za32_s32_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za32_s32_vg4(3, base); @@ -1161,27 +513,13 @@ svint32x4_t test_svread_ver_za32_s32_vg4(uint32_t base) __arm_streaming __arm_in // CHECK-LABEL: @test_svread_hor_za64_u64_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv2i64(i32 7, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za64_u64_vg2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv2i64(i32 7, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint64x2_t test_svread_hor_za64_u64_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za64_u64_vg2(7, base); @@ -1189,27 +527,13 @@ svuint64x2_t test_svread_hor_za64_u64_vg2(uint32_t base) __arm_streaming __arm_i // CHECK-LABEL: @test_svread_hor_za64_f64_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv2f64(i32 7, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za64_f64_vg2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv2f64(i32 7, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat64x2_t test_svread_hor_za64_f64_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za64_f64_vg2(7, base); @@ -1217,27 +541,13 @@ svfloat64x2_t test_svread_hor_za64_f64_vg2(uint32_t base) __arm_streaming __arm_ // CHECK-LABEL: @test_svread_hor_za64_s64_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv2i64(i32 7, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za64_s64_vg2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv2i64(i32 7, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint64x2_t test_svread_hor_za64_s64_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za64_s64_vg2(7, base); @@ -1245,27 +555,13 @@ svint64x2_t test_svread_hor_za64_s64_vg2(uint32_t base) __arm_streaming __arm_in // CHECK-LABEL: @test_svread_ver_za64_u64_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv2i64(i32 7, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za64_u64_vg2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv2i64(i32 7, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint64x2_t test_svread_ver_za64_u64_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za64_u64_vg2(7, base); @@ -1273,55 +569,28 @@ svuint64x2_t test_svread_ver_za64_u64_vg2(uint32_t base) __arm_streaming __arm_i // CHECK-LABEL: @test_svread_ver_za64_f64_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv2f64(i32 7, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za64_f64_vg2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv2f64(i32 7, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat64x2_t test_svread_ver_za64_f64_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za64_f64_vg2(7, base); } +// // CHECK-LABEL: @test_svread_ver_za64_s64_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv2i64(i32 7, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za64_s64_vg2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv2i64(i32 7, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint64x2_t test_svread_ver_za64_s64_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za64_s64_vg2(7, base); @@ -1329,35 +598,13 @@ svint64x2_t test_svread_ver_za64_s64_vg2(uint32_t base) __arm_streaming __arm_in // CHECK-LABEL: @test_svread_hor_za64_u64_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv2i64(i32 7, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za64_u64_vg4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv2i64(i32 7, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint64x4_t test_svread_hor_za64_u64_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za64_u64_vg4(7, base); @@ -1365,35 +612,13 @@ svuint64x4_t test_svread_hor_za64_u64_vg4(uint32_t base) __arm_streaming __arm_i // CHECK-LABEL: @test_svread_hor_za64_f64_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv2f64(i32 7, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za64_f64_vg4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv2f64(i32 7, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat64x4_t test_svread_hor_za64_f64_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za64_f64_vg4(7, base); @@ -1401,35 +626,13 @@ svfloat64x4_t test_svread_hor_za64_f64_vg4(uint32_t base) __arm_streaming __arm_ // CHECK-LABEL: @test_svread_hor_za64_s64_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv2i64(i32 7, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za64_s64_vg4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv2i64(i32 7, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint64x4_t test_svread_hor_za64_s64_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za64_s64_vg4(7, base); @@ -1437,35 +640,13 @@ svint64x4_t test_svread_hor_za64_s64_vg4(uint32_t base) __arm_streaming __arm_in // CHECK-LABEL: @test_svread_ver_za64_u64_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv2i64(i32 7, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za64_u64_vg4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv2i64(i32 7, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint64x4_t test_svread_ver_za64_u64_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za64_u64_vg4(7, base); @@ -1473,35 +654,13 @@ svuint64x4_t test_svread_ver_za64_u64_vg4(uint32_t base) __arm_streaming __arm_i // CHECK-LABEL: @test_svread_ver_za64_f64_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv2f64(i32 7, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za64_f64_vg4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv2f64(i32 7, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat64x4_t test_svread_ver_za64_f64_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za64_f64_vg4(7, base); @@ -1509,35 +668,13 @@ svfloat64x4_t test_svread_ver_za64_f64_vg4(uint32_t base) __arm_streaming __arm_ // CHECK-LABEL: @test_svread_ver_za64_s64_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv2i64(i32 7, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za64_s64_vg4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv2i64(i32 7, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint64x4_t test_svread_ver_za64_s64_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za64_s64_vg4(7, base); @@ -1545,27 +682,13 @@ svint64x4_t test_svread_ver_za64_s64_vg4(uint32_t base) __arm_streaming __arm_in // CHECK-LABEL: @test_svread_za8_s8_vg1x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv16i8(i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svread_za8_s8_vg1x2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv16i8(i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint8x2_t test_svread_za8_s8_vg1x2(uint32_t base) __arm_streaming __arm_in("za") { return svread_za8_s8_vg1x2(base); @@ -1573,27 +696,13 @@ svint8x2_t test_svread_za8_s8_vg1x2(uint32_t base) __arm_streaming __arm_in("za" // CHECK-LABEL: @test_svread_za8_u8_vg1x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv16i8(i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svread_za8_u8_vg1x2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv16i8(i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint8x2_t test_svread_za8_u8_vg1x2(uint32_t base) __arm_streaming __arm_in("za") { return svread_za8_u8_vg1x2(base); @@ -1601,56 +710,27 @@ svuint8x2_t test_svread_za8_u8_vg1x2(uint32_t base) __arm_streaming __arm_in("za // CHECK-LABEL: @test_svread_za16_s16_vg1x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv8i16(i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svread_za16_s16_vg1x2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv8i16(i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint16x2_t test_svread_za16_s16_vg1x2(uint32_t base) __arm_streaming __arm_in("za") { return svread_za16_s16_vg1x2(base); } -// // CHECK-LABEL: @test_svread_za16_u16_vg1x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv8i16(i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svread_za16_u16_vg1x2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv8i16(i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint16x2_t test_svread_za16_u16_vg1x2(uint32_t base) __arm_streaming __arm_in("za") { return svread_za16_u16_vg1x2(base); @@ -1658,27 +738,13 @@ svuint16x2_t test_svread_za16_u16_vg1x2(uint32_t base) __arm_streaming __arm_in( // CHECK-LABEL: @test_svread_za16_bf16_vg1x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv8bf16(i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z27test_svread_za16_bf16_vg1x2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv8bf16(i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svbfloat16x2_t test_svread_za16_bf16_vg1x2(uint32_t base) __arm_streaming __arm_in("za") { return svread_za16_bf16_vg1x2(base); @@ -1686,27 +752,13 @@ svbfloat16x2_t test_svread_za16_bf16_vg1x2(uint32_t base) __arm_streaming __arm_ // CHECK-LABEL: @test_svread_za16_f16_vg1x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv8f16(i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svread_za16_f16_vg1x2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv8f16(i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat16x2_t test_svread_za16_f16_vg1x2(uint32_t base) __arm_streaming __arm_in("za") { return svread_za16_f16_vg1x2(base); @@ -1714,27 +766,13 @@ svfloat16x2_t test_svread_za16_f16_vg1x2(uint32_t base) __arm_streaming __arm_in // CHECK-LABEL: @test_svread_za32_s32_vg1x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv4i32(i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svread_za32_s32_vg1x2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv4i32(i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint32x2_t test_svread_za32_s32_vg1x2(uint32_t base) __arm_streaming __arm_in("za") { return svread_za32_s32_vg1x2(base); @@ -1742,27 +780,13 @@ svint32x2_t test_svread_za32_s32_vg1x2(uint32_t base) __arm_streaming __arm_in(" // CHECK-LABEL: @test_svread_za32_u32_vg1x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv4i32(i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svread_za32_u32_vg1x2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv4i32(i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint32x2_t test_svread_za32_u32_vg1x2(uint32_t base) __arm_streaming __arm_in("za") { return svread_za32_u32_vg1x2(base); @@ -1770,27 +794,13 @@ svuint32x2_t test_svread_za32_u32_vg1x2(uint32_t base) __arm_streaming __arm_in( // CHECK-LABEL: @test_svread_za32_f32_vg1x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv4f32(i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svread_za32_f32_vg1x2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv4f32(i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svread_za32_f32_vg1x2(uint32_t base) __arm_streaming __arm_in("za") { return svread_za32_f32_vg1x2(base); @@ -1798,27 +808,13 @@ svfloat32x2_t test_svread_za32_f32_vg1x2(uint32_t base) __arm_streaming __arm_in // CHECK-LABEL: @test_svread_za64_u64_vg1x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv2i64(i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svread_za64_u64_vg1x2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv2i64(i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint64x2_t test_svread_za64_u64_vg1x2(uint32_t base) __arm_streaming __arm_in("za") { return svread_za64_u64_vg1x2(base); @@ -1826,27 +822,13 @@ svuint64x2_t test_svread_za64_u64_vg1x2(uint32_t base) __arm_streaming __arm_in( // CHECK-LABEL: @test_svread_za64_f64_vg1x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv2f64(i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svread_za64_f64_vg1x2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv2f64(i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat64x2_t test_svread_za64_f64_vg1x2(uint32_t base) __arm_streaming __arm_in("za") { return svread_za64_f64_vg1x2(base); @@ -1854,27 +836,13 @@ svfloat64x2_t test_svread_za64_f64_vg1x2(uint32_t base) __arm_streaming __arm_in // CHECK-LABEL: @test_svread_za64_s64_vg1x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv2i64(i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svread_za64_s64_vg1x2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv2i64(i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint64x2_t test_svread_za64_s64_vg1x2(uint32_t base) __arm_streaming __arm_in("za") { return svread_za64_s64_vg1x2(base); @@ -1882,35 +850,13 @@ svint64x2_t test_svread_za64_s64_vg1x2(uint32_t base) __arm_streaming __arm_in(" // CHECK-LABEL: @test_svread_za8_s8_vg1x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv16i8(i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svread_za8_s8_vg1x4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv16i8(i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint8x4_t test_svread_za8_s8_vg1x4(uint32_t base) __arm_streaming __arm_in("za") { return svread_za8_s8_vg1x4(base); @@ -1918,35 +864,13 @@ svint8x4_t test_svread_za8_s8_vg1x4(uint32_t base) __arm_streaming __arm_in("za" // CHECK-LABEL: @test_svread_za8_u8_vg1x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv16i8(i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svread_za8_u8_vg1x4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv16i8(i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint8x4_t test_svread_za8_u8_vg1x4(uint32_t base) __arm_streaming __arm_in("za") { return svread_za8_u8_vg1x4(base); @@ -1954,35 +878,13 @@ svuint8x4_t test_svread_za8_u8_vg1x4(uint32_t base) __arm_streaming __arm_in("za // CHECK-LABEL: @test_svread_za16_s16_vg1x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv8i16(i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svread_za16_s16_vg1x4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv8i16(i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint16x4_t test_svread_za16_s16_vg1x4(uint32_t base) __arm_streaming __arm_in("za") { return svread_za16_s16_vg1x4(base); @@ -1990,35 +892,13 @@ svint16x4_t test_svread_za16_s16_vg1x4(uint32_t base) __arm_streaming __arm_in(" // CHECK-LABEL: @test_svread_za16_u16_vg1x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv8i16(i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svread_za16_u16_vg1x4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv8i16(i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint16x4_t test_svread_za16_u16_vg1x4(uint32_t base) __arm_streaming __arm_in("za") { return svread_za16_u16_vg1x4(base); @@ -2026,35 +906,13 @@ svuint16x4_t test_svread_za16_u16_vg1x4(uint32_t base) __arm_streaming __arm_in( // CHECK-LABEL: @test_svread_za16_bf16_vg1x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv8bf16(i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z27test_svread_za16_bf16_vg1x4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv8bf16(i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svbfloat16x4_t test_svread_za16_bf16_vg1x4(uint32_t base) __arm_streaming __arm_in("za") { return svread_za16_bf16_vg1x4(base); @@ -2062,35 +920,13 @@ svbfloat16x4_t test_svread_za16_bf16_vg1x4(uint32_t base) __arm_streaming __arm_ // CHECK-LABEL: @test_svread_za16_f16_vg1x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv8f16(i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svread_za16_f16_vg1x4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv8f16(i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat16x4_t test_svread_za16_f16_vg1x4(uint32_t base) __arm_streaming __arm_in("za") { return svread_za16_f16_vg1x4(base); @@ -2098,35 +934,13 @@ svfloat16x4_t test_svread_za16_f16_vg1x4(uint32_t base) __arm_streaming __arm_in // CHECK-LABEL: @test_svread_za32_s32_vg1x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv4i32(i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svread_za32_s32_vg1x4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv4i32(i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint32x4_t test_svread_za32_s32_vg1x4(uint32_t base) __arm_streaming __arm_in("za") { return svread_za32_s32_vg1x4(base); @@ -2134,35 +948,13 @@ svint32x4_t test_svread_za32_s32_vg1x4(uint32_t base) __arm_streaming __arm_in(" // CHECK-LABEL: @test_svread_za32_u32_vg1x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv4i32(i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svread_za32_u32_vg1x4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv4i32(i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint32x4_t test_svread_za32_u32_vg1x4(uint32_t base) __arm_streaming __arm_in("za") { return svread_za32_u32_vg1x4(base); @@ -2170,35 +962,13 @@ svuint32x4_t test_svread_za32_u32_vg1x4(uint32_t base) __arm_streaming __arm_in( // CHECK-LABEL: @test_svread_za32_f32_vg1x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv4f32(i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svread_za32_f32_vg1x4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv4f32(i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svread_za32_f32_vg1x4(uint32_t base) __arm_streaming __arm_in("za") { return svread_za32_f32_vg1x4(base); @@ -2206,35 +976,13 @@ svfloat32x4_t test_svread_za32_f32_vg1x4(uint32_t base) __arm_streaming __arm_in // CHECK-LABEL: @test_svread_za64_u64_vg1x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv2i64(i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svread_za64_u64_vg1x4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv2i64(i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint64x4_t test_svread_za64_u64_vg1x4(uint32_t base) __arm_streaming __arm_in("za") { return svread_za64_u64_vg1x4(base); @@ -2242,35 +990,13 @@ svuint64x4_t test_svread_za64_u64_vg1x4(uint32_t base) __arm_streaming __arm_in( // CHECK-LABEL: @test_svread_za64_f64_vg1x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv2f64(i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svread_za64_f64_vg1x4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv2f64(i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat64x4_t test_svread_za64_f64_vg1x4(uint32_t base) __arm_streaming __arm_in("za") { return svread_za64_f64_vg1x4(base); @@ -2278,35 +1004,13 @@ svfloat64x4_t test_svread_za64_f64_vg1x4(uint32_t base) __arm_streaming __arm_in // CHECK-LABEL: @test_svread_za64_s64_vg1x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv2i64(i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svread_za64_s64_vg1x4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv2i64(i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint64x4_t test_svread_za64_s64_vg1x4(uint32_t base) __arm_streaming __arm_in("za") { return svread_za64_s64_vg1x4(base); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_sqdmulh.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_sqdmulh.c index 26804866a7563..5ff801666df88 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_sqdmulh.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_sqdmulh.c @@ -19,27 +19,13 @@ // CHECK-LABEL: @test_svqdmulh_single_s8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sqdmulh.single.vgx2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svqdmulh_single_s8_x210svint8x2_tu10__SVInt8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sqdmulh.single.vgx2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint8x2_t test_svqdmulh_single_s8_x2(svint8x2_t zdn, svint8_t zm) __arm_streaming { return SVE_ACLE_FUNC(svqdmulh,_single_s8_x2,,,)(zdn, zm); @@ -47,27 +33,13 @@ svint8x2_t test_svqdmulh_single_s8_x2(svint8x2_t zdn, svint8_t zm) __arm_streami // CHECK-LABEL: @test_svqdmulh_single_s16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sqdmulh.single.vgx2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z27test_svqdmulh_single_s16_x211svint16x2_tu11__SVInt16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sqdmulh.single.vgx2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint16x2_t test_svqdmulh_single_s16_x2(svint16x2_t zdn, svint16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svqdmulh,_single_s16_x2,,,)(zdn, zm); @@ -75,27 +47,13 @@ svint16x2_t test_svqdmulh_single_s16_x2(svint16x2_t zdn, svint16_t zm) __arm_str // CHECK-LABEL: @test_svqdmulh_single_s32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sqdmulh.single.vgx2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z27test_svqdmulh_single_s32_x211svint32x2_tu11__SVInt32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sqdmulh.single.vgx2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint32x2_t test_svqdmulh_single_s32_x2(svint32x2_t zdn, svint32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svqdmulh,_single_s32_x2,,,)(zdn, zm); @@ -103,27 +61,13 @@ svint32x2_t test_svqdmulh_single_s32_x2(svint32x2_t zdn, svint32_t zm) __arm_str // CHECK-LABEL: @test_svqdmulh_single_s64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sqdmulh.single.vgx2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z27test_svqdmulh_single_s64_x211svint64x2_tu11__SVInt64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sqdmulh.single.vgx2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint64x2_t test_svqdmulh_single_s64_x2(svint64x2_t zdn, svint64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svqdmulh,_single_s64_x2,,,)(zdn, zm); @@ -133,35 +77,13 @@ svint64x2_t test_svqdmulh_single_s64_x2(svint64x2_t zdn, svint64_t zm) __arm_str // CHECK-LABEL: @test_svqdmulh_single_s8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sqdmulh.single.vgx4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svqdmulh_single_s8_x410svint8x4_tu10__SVInt8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sqdmulh.single.vgx4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint8x4_t test_svqdmulh_single_s8_x4(svint8x4_t zdn, svint8_t zm) __arm_streaming { return SVE_ACLE_FUNC(svqdmulh,_single_s8_x4,,,)(zdn, zm); @@ -169,35 +91,13 @@ svint8x4_t test_svqdmulh_single_s8_x4(svint8x4_t zdn, svint8_t zm) __arm_streami // CHECK-LABEL: @test_svqdmulh_single_s16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sqdmulh.single.vgx4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z27test_svqdmulh_single_s16_x411svint16x4_tu11__SVInt16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sqdmulh.single.vgx4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint16x4_t test_svqdmulh_single_s16_x4(svint16x4_t zdn, svint16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svqdmulh,_single_s16_x4,,,)(zdn, zm); @@ -205,35 +105,13 @@ svint16x4_t test_svqdmulh_single_s16_x4(svint16x4_t zdn, svint16_t zm) __arm_str // CHECK-LABEL: @test_svqdmulh_single_s32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sqdmulh.single.vgx4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z27test_svqdmulh_single_s32_x411svint32x4_tu11__SVInt32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sqdmulh.single.vgx4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint32x4_t test_svqdmulh_single_s32_x4(svint32x4_t zdn, svint32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svqdmulh,_single_s32_x4,,,)(zdn, zm); @@ -241,35 +119,13 @@ svint32x4_t test_svqdmulh_single_s32_x4(svint32x4_t zdn, svint32_t zm) __arm_str // CHECK-LABEL: @test_svqdmulh_single_s64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sqdmulh.single.vgx4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z27test_svqdmulh_single_s64_x411svint64x4_tu11__SVInt64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sqdmulh.single.vgx4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint64x4_t test_svqdmulh_single_s64_x4(svint64x4_t zdn, svint64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svqdmulh,_single_s64_x4,,,)(zdn, zm); @@ -279,27 +135,13 @@ svint64x4_t test_svqdmulh_single_s64_x4(svint64x4_t zdn, svint64_t zm) __arm_str // CHECK-LABEL: @test_svqdmulh_s8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sqdmulh.vgx2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z19test_svqdmulh_s8_x210svint8x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sqdmulh.vgx2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint8x2_t test_svqdmulh_s8_x2(svint8x2_t zdn, svint8x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svqdmulh,_s8_x2,,,)(zdn, zm); @@ -307,27 +149,13 @@ svint8x2_t test_svqdmulh_s8_x2(svint8x2_t zdn, svint8x2_t zm) __arm_streaming { // CHECK-LABEL: @test_svqdmulh_s16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sqdmulh.vgx2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z20test_svqdmulh_s16_x211svint16x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sqdmulh.vgx2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint16x2_t test_svqdmulh_s16_x2(svint16x2_t zdn, svint16x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svqdmulh,_s16_x2,,,)(zdn, zm); @@ -335,27 +163,13 @@ svint16x2_t test_svqdmulh_s16_x2(svint16x2_t zdn, svint16x2_t zm) __arm_streamin // CHECK-LABEL: @test_svqdmulh_s32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sqdmulh.vgx2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z20test_svqdmulh_s32_x211svint32x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sqdmulh.vgx2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint32x2_t test_svqdmulh_s32_x2(svint32x2_t zdn, svint32x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svqdmulh,_s32_x2,,,)(zdn, zm); @@ -363,27 +177,13 @@ svint32x2_t test_svqdmulh_s32_x2(svint32x2_t zdn, svint32x2_t zm) __arm_streamin // CHECK-LABEL: @test_svqdmulh_s64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sqdmulh.vgx2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z20test_svqdmulh_s64_x211svint64x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sqdmulh.vgx2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint64x2_t test_svqdmulh_s64_x2(svint64x2_t zdn, svint64x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svqdmulh,_s64_x2,,,)(zdn, zm); @@ -393,35 +193,13 @@ svint64x2_t test_svqdmulh_s64_x2(svint64x2_t zdn, svint64x2_t zm) __arm_streamin // CHECK-LABEL: @test_svqdmulh_s8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sqdmulh.vgx4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z19test_svqdmulh_s8_x410svint8x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sqdmulh.vgx4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint8x4_t test_svqdmulh_s8_x4(svint8x4_t zdn, svint8x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svqdmulh,_s8_x4,,,)(zdn, zm); @@ -429,35 +207,13 @@ svint8x4_t test_svqdmulh_s8_x4(svint8x4_t zdn, svint8x4_t zm) __arm_streaming { // CHECK-LABEL: @test_svqdmulh_s16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sqdmulh.vgx4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z20test_svqdmulh_s16_x411svint16x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sqdmulh.vgx4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint16x4_t test_svqdmulh_s16_x4(svint16x4_t zdn, svint16x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svqdmulh,_s16_x4,,,)(zdn, zm); @@ -465,35 +221,13 @@ svint16x4_t test_svqdmulh_s16_x4(svint16x4_t zdn, svint16x4_t zm) __arm_streamin // CHECK-LABEL: @test_svqdmulh_s32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sqdmulh.vgx4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z20test_svqdmulh_s32_x411svint32x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sqdmulh.vgx4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint32x4_t test_svqdmulh_s32_x4(svint32x4_t zdn, svint32x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svqdmulh,_s32_x4,,,)(zdn, zm); @@ -501,35 +235,13 @@ svint32x4_t test_svqdmulh_s32_x4(svint32x4_t zdn, svint32x4_t zm) __arm_streamin // CHECK-LABEL: @test_svqdmulh_s64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sqdmulh.vgx4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z20test_svqdmulh_s64_x411svint64x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sqdmulh.vgx4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint64x4_t test_svqdmulh_s64_x4(svint64x4_t zdn, svint64x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svqdmulh,_s64_x4,,,)(zdn, zm); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_unpkx2.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_unpkx2.c index fa66c4ff19014..d3b09f071c58f 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_unpkx2.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_unpkx2.c @@ -19,27 +19,13 @@ // CHECK-LABEL: @test_svunpk_s16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sunpk.x2.nxv8i16( [[ZN:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svunpk_s16_x2u10__SVInt8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sunpk.x2.nxv8i16( [[ZN:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint16x2_t test_svunpk_s16_x2(svint8_t zn) __arm_streaming { return SVE_ACLE_FUNC(svunpk_s16,_s8_x2)(zn); @@ -47,27 +33,13 @@ svint16x2_t test_svunpk_s16_x2(svint8_t zn) __arm_streaming { // CHECK-LABEL: @test_svunpk_u16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uunpk.x2.nxv8i16( [[ZN:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svunpk_u16_x2u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uunpk.x2.nxv8i16( [[ZN:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint16x2_t test_svunpk_u16_x2(svuint8_t zn) __arm_streaming { return SVE_ACLE_FUNC(svunpk_u16,_u8_x2)(zn); @@ -75,27 +47,13 @@ svuint16x2_t test_svunpk_u16_x2(svuint8_t zn) __arm_streaming { // CHECK-LABEL: @test_svunpk_s32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sunpk.x2.nxv4i32( [[ZN:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svunpk_s32_x2u11__SVInt16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sunpk.x2.nxv4i32( [[ZN:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint32x2_t test_svunpk_s32_x2(svint16_t zn) __arm_streaming { return SVE_ACLE_FUNC(svunpk_s32,_s16_x2)(zn); @@ -103,27 +61,13 @@ svint32x2_t test_svunpk_s32_x2(svint16_t zn) __arm_streaming { // CHECK-LABEL: @test_svunpk_u32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uunpk.x2.nxv4i32( [[ZN:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svunpk_u32_x2u12__SVUint16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uunpk.x2.nxv4i32( [[ZN:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint32x2_t test_svunpk_u32_x2(svuint16_t zn) __arm_streaming { return SVE_ACLE_FUNC(svunpk_u32,_u16_x2)(zn); @@ -131,27 +75,13 @@ svuint32x2_t test_svunpk_u32_x2(svuint16_t zn) __arm_streaming { // CHECK-LABEL: @test_svunpk_s64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sunpk.x2.nxv2i64( [[ZN:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svunpk_s64_x2u11__SVInt32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sunpk.x2.nxv2i64( [[ZN:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint64x2_t test_svunpk_s64_x2(svint32_t zn) __arm_streaming { return SVE_ACLE_FUNC(svunpk_s64,_s32_x2)(zn); @@ -159,27 +89,13 @@ svint64x2_t test_svunpk_s64_x2(svint32_t zn) __arm_streaming { // CHECK-LABEL: @test_svunpk_u64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uunpk.x2.nxv2i64( [[ZN:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svunpk_u64_x2u12__SVUint32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uunpk.x2.nxv2i64( [[ZN:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint64x2_t test_svunpk_u64_x2(svuint32_t zn) __arm_streaming { return SVE_ACLE_FUNC(svunpk_u64,_u32_x2)(zn); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_unpkx4.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_unpkx4.c index 61718f0984ef3..45bc83eac7339 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_unpkx4.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_unpkx4.c @@ -19,35 +19,13 @@ // CHECK-LABEL: @test_svunpk_s16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sunpk.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svunpk_s16_x410svint8x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sunpk.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint16x4_t test_svunpk_s16_x4(svint8x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svunpk_s16,_s8_x4)(zn); @@ -55,35 +33,13 @@ svint16x4_t test_svunpk_s16_x4(svint8x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svunpk_u16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uunpk.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svunpk_u16_x411svuint8x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uunpk.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint16x4_t test_svunpk_u16_x4(svuint8x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svunpk_u16,_u8_x4)(zn); @@ -91,35 +47,13 @@ svuint16x4_t test_svunpk_u16_x4(svuint8x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svunpk_s32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sunpk.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svunpk_s32_x411svint16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sunpk.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint32x4_t test_svunpk_s32_x4(svint16x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svunpk_s32,_s16_x4)(zn); @@ -127,35 +61,13 @@ svint32x4_t test_svunpk_s32_x4(svint16x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svunpk_u32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uunpk.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svunpk_u32_x412svuint16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uunpk.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint32x4_t test_svunpk_u32_x4(svuint16x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svunpk_u32,_u16_x4)(zn); @@ -163,35 +75,13 @@ svuint32x4_t test_svunpk_u32_x4(svuint16x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svunpk_s64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sunpk.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svunpk_s64_x411svint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sunpk.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint64x4_t test_svunpk_s64_x4(svint32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svunpk_s64,_s32_x4)(zn); @@ -199,35 +89,13 @@ svint64x4_t test_svunpk_s64_x4(svint32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svunpk_u64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uunpk.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svunpk_u64_x412svuint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uunpk.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint64x4_t test_svunpk_u64_x4(svuint32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svunpk_u64,_u32_x4)(zn); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_add.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_add.c index c118a7192c6ca..de983bcf79309 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_add.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_add.c @@ -25,27 +25,13 @@ // CHECK-LABEL: @test_svadd_vector_single2_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svadd_vector_single2_s810svint8x2_tu10__SVInt8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint8x2_t test_svadd_vector_single2_s8(svint8x2_t zn, svint8_t zm) __arm_streaming { return SVE_ACLE_FUNC(svadd,_single_s8_x2,,,)(zn, zm); @@ -53,27 +39,13 @@ svint8x2_t test_svadd_vector_single2_s8(svint8x2_t zn, svint8_t zm) __arm_stream // CHECK-LABEL: @test_svadd_vector_single2_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svadd_vector_single2_u811svuint8x2_tu11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint8x2_t test_svadd_vector_single2_u8(svuint8x2_t zn, svuint8_t zm) __arm_streaming { return SVE_ACLE_FUNC(svadd,_single_u8_x2,,,)(zn, zm); @@ -81,27 +53,13 @@ svuint8x2_t test_svadd_vector_single2_u8(svuint8x2_t zn, svuint8_t zm) __arm_str // CHECK-LABEL: @test_svadd_vector_single2_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z29test_svadd_vector_single2_s1611svint16x2_tu11__SVInt16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint16x2_t test_svadd_vector_single2_s16(svint16x2_t zn, svint16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svadd,_single_s16_x2,,,)(zn, zm); @@ -109,27 +67,13 @@ svint16x2_t test_svadd_vector_single2_s16(svint16x2_t zn, svint16_t zm) __arm_st // CHECK-LABEL: @test_svadd_vector_single2_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z29test_svadd_vector_single2_u1612svuint16x2_tu12__SVUint16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint16x2_t test_svadd_vector_single2_u16(svuint16x2_t zn, svuint16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svadd,_single_u16_x2,,,)(zn, zm); @@ -137,27 +81,13 @@ svuint16x2_t test_svadd_vector_single2_u16(svuint16x2_t zn, svuint16_t zm) __arm // CHECK-LABEL: @test_svadd_vector_single2_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z29test_svadd_vector_single2_s3211svint32x2_tu11__SVInt32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint32x2_t test_svadd_vector_single2_s32(svint32x2_t zn, svint32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svadd,_single_s32_x2,,,)(zn, zm); @@ -165,27 +95,13 @@ svint32x2_t test_svadd_vector_single2_s32(svint32x2_t zn, svint32_t zm) __arm_st // CHECK-LABEL: @test_svadd_vector_single2_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z29test_svadd_vector_single2_u3212svuint32x2_tu12__SVUint32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint32x2_t test_svadd_vector_single2_u32(svuint32x2_t zn, svuint32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svadd,_single_u32_x2,,,)(zn, zm); @@ -193,27 +109,13 @@ svuint32x2_t test_svadd_vector_single2_u32(svuint32x2_t zn, svuint32_t zm) __arm // CHECK-LABEL: @test_svadd_vector_single2_s64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z29test_svadd_vector_single2_s6411svint64x2_tu11__SVInt64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint64x2_t test_svadd_vector_single2_s64(svint64x2_t zn, svint64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svadd,_single_s64_x2,,,)(zn, zm); @@ -221,27 +123,13 @@ svint64x2_t test_svadd_vector_single2_s64(svint64x2_t zn, svint64_t zm) __arm_st // CHECK-LABEL: @test_svadd_vector_single2_u64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z29test_svadd_vector_single2_u6412svuint64x2_tu12__SVUint64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint64x2_t test_svadd_vector_single2_u64(svuint64x2_t zn, svuint64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svadd,_single_u64_x2,,,)(zn, zm); @@ -252,35 +140,13 @@ svuint64x2_t test_svadd_vector_single2_u64(svuint64x2_t zn, svuint64_t zm) __arm // CHECK-LABEL: @test_svadd_vector_single4_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svadd_vector_single4_s810svint8x4_tu10__SVInt8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint8x4_t test_svadd_vector_single4_s8(svint8x4_t zn, svint8_t zm) __arm_streaming { return SVE_ACLE_FUNC(svadd,_single_s8_x4,,,)(zn, zm); @@ -288,35 +154,13 @@ svint8x4_t test_svadd_vector_single4_s8(svint8x4_t zn, svint8_t zm) __arm_stream // CHECK-LABEL: @test_svadd_vector_single4_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svadd_vector_single4_u811svuint8x4_tu11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint8x4_t test_svadd_vector_single4_u8(svuint8x4_t zn, svuint8_t zm) __arm_streaming { return SVE_ACLE_FUNC(svadd,_single_u8_x4,,,)(zn, zm); @@ -324,35 +168,13 @@ svuint8x4_t test_svadd_vector_single4_u8(svuint8x4_t zn, svuint8_t zm) __arm_str // CHECK-LABEL: @test_svadd_vector_single4_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z29test_svadd_vector_single4_s1611svint16x4_tu11__SVInt16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint16x4_t test_svadd_vector_single4_s16(svint16x4_t zn, svint16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svadd,_single_s16_x4,,,)(zn, zm); @@ -360,35 +182,13 @@ svint16x4_t test_svadd_vector_single4_s16(svint16x4_t zn, svint16_t zm) __arm_st // CHECK-LABEL: @test_svadd_vector_single4_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z29test_svadd_vector_single4_u1612svuint16x4_tu12__SVUint16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint16x4_t test_svadd_vector_single4_u16(svuint16x4_t zn, svuint16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svadd,_single_u16_x4,,,)(zn, zm); @@ -396,35 +196,13 @@ svuint16x4_t test_svadd_vector_single4_u16(svuint16x4_t zn, svuint16_t zm) __arm // CHECK-LABEL: @test_svadd_vector_single4_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z29test_svadd_vector_single4_s3211svint32x4_tu11__SVInt32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint32x4_t test_svadd_vector_single4_s32(svint32x4_t zn, svint32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svadd,_single_s32_x4,,,)(zn, zm); @@ -432,35 +210,13 @@ svint32x4_t test_svadd_vector_single4_s32(svint32x4_t zn, svint32_t zm) __arm_st // CHECK-LABEL: @test_svadd_vector_single4_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z29test_svadd_vector_single4_u3212svuint32x4_tu12__SVUint32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint32x4_t test_svadd_vector_single4_u32(svuint32x4_t zn, svuint32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svadd,_single_u32_x4,,,)(zn, zm); @@ -468,35 +224,13 @@ svuint32x4_t test_svadd_vector_single4_u32(svuint32x4_t zn, svuint32_t zm) __arm // CHECK-LABEL: @test_svadd_vector_single4_s64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z29test_svadd_vector_single4_s6411svint64x4_tu11__SVInt64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint64x4_t test_svadd_vector_single4_s64(svint64x4_t zn, svint64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svadd,_single_s64_x4,,,)(zn, zm); @@ -504,35 +238,13 @@ svint64x4_t test_svadd_vector_single4_s64(svint64x4_t zn, svint64_t zm) __arm_st // CHECK-LABEL: @test_svadd_vector_single4_u64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z29test_svadd_vector_single4_u6412svuint64x4_tu12__SVUint64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint64x4_t test_svadd_vector_single4_u64(svuint64x4_t zn, svuint64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svadd,_single_u64_x4,,,)(zn, zm); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_rshl.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_rshl.c index 87160444e3c0d..af5a389c7f736 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_rshl.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_rshl.c @@ -19,27 +19,13 @@ // CHECK-LABEL: @test_svrshl_single_s8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.srshl.single.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svrshl_single_s8_x210svint8x2_tu10__SVInt8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.srshl.single.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint8x2_t test_svrshl_single_s8_x2(svint8x2_t zdn, svint8_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_single_s8_x2,,,)(zdn, zm); @@ -47,27 +33,13 @@ svint8x2_t test_svrshl_single_s8_x2(svint8x2_t zdn, svint8_t zm) __arm_streaming // CHECK-LABEL: @test_svrshl_single_s16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.srshl.single.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svrshl_single_s16_x211svint16x2_tu11__SVInt16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.srshl.single.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint16x2_t test_svrshl_single_s16_x2(svint16x2_t zdn, svint16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_single_s16_x2,,,)(zdn, zm); @@ -75,27 +47,13 @@ svint16x2_t test_svrshl_single_s16_x2(svint16x2_t zdn, svint16_t zm) __arm_strea // CHECK-LABEL: @test_svrshl_single_s32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.srshl.single.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svrshl_single_s32_x211svint32x2_tu11__SVInt32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.srshl.single.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint32x2_t test_svrshl_single_s32_x2(svint32x2_t zdn, svint32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_single_s32_x2,,,)(zdn, zm); @@ -103,27 +61,13 @@ svint32x2_t test_svrshl_single_s32_x2(svint32x2_t zdn, svint32_t zm) __arm_strea // CHECK-LABEL: @test_svrshl_single_s64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.srshl.single.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svrshl_single_s64_x211svint64x2_tu11__SVInt64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.srshl.single.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint64x2_t test_svrshl_single_s64_x2(svint64x2_t zdn, svint64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_single_s64_x2,,,)(zdn, zm); @@ -131,27 +75,13 @@ svint64x2_t test_svrshl_single_s64_x2(svint64x2_t zdn, svint64_t zm) __arm_strea // CHECK-LABEL: @test_svrshl_single_u8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.urshl.single.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svrshl_single_u8_x211svuint8x2_tu11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.urshl.single.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint8x2_t test_svrshl_single_u8_x2(svuint8x2_t zdn, svuint8_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_single_u8_x2,,,)(zdn, zm); @@ -159,27 +89,13 @@ svuint8x2_t test_svrshl_single_u8_x2(svuint8x2_t zdn, svuint8_t zm) __arm_stream // CHECK-LABEL: @test_svrshl_single_u16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.urshl.single.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svrshl_single_u16_x212svuint16x2_tu12__SVUint16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.urshl.single.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint16x2_t test_svrshl_single_u16_x2(svuint16x2_t zdn, svuint16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_single_u16_x2,,,)(zdn, zm); @@ -187,27 +103,13 @@ svuint16x2_t test_svrshl_single_u16_x2(svuint16x2_t zdn, svuint16_t zm) __arm_st // CHECK-LABEL: @test_svrshl_single_u32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.urshl.single.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svrshl_single_u32_x212svuint32x2_tu12__SVUint32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.urshl.single.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint32x2_t test_svrshl_single_u32_x2(svuint32x2_t zdn, svuint32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_single_u32_x2,,,)(zdn, zm); @@ -215,27 +117,13 @@ svuint32x2_t test_svrshl_single_u32_x2(svuint32x2_t zdn, svuint32_t zm) __arm_st // CHECK-LABEL: @test_svrshl_single_u64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.urshl.single.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svrshl_single_u64_x212svuint64x2_tu12__SVUint64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.urshl.single.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint64x2_t test_svrshl_single_u64_x2(svuint64x2_t zdn, svuint64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_single_u64_x2,,,)(zdn, zm); @@ -245,35 +133,13 @@ svuint64x2_t test_svrshl_single_u64_x2(svuint64x2_t zdn, svuint64_t zm) __arm_st // CHECK-LABEL: @test_svrshl_single_s8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.srshl.single.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svrshl_single_s8_x410svint8x4_tu10__SVInt8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.srshl.single.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint8x4_t test_svrshl_single_s8_x4(svint8x4_t zdn, svint8_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_single_s8_x4,,,)(zdn, zm); @@ -281,35 +147,13 @@ svint8x4_t test_svrshl_single_s8_x4(svint8x4_t zdn, svint8_t zm) __arm_streaming // CHECK-LABEL: @test_svrshl_single_s16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.srshl.single.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svrshl_single_s16_x411svint16x4_tu11__SVInt16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.srshl.single.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint16x4_t test_svrshl_single_s16_x4(svint16x4_t zdn, svint16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_single_s16_x4,,,)(zdn, zm); @@ -317,35 +161,13 @@ svint16x4_t test_svrshl_single_s16_x4(svint16x4_t zdn, svint16_t zm) __arm_strea // CHECK-LABEL: @test_svrshl_single_s32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.srshl.single.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svrshl_single_s32_x411svint32x4_tu11__SVInt32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.srshl.single.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint32x4_t test_svrshl_single_s32_x4(svint32x4_t zdn, svint32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_single_s32_x4,,,)(zdn, zm); @@ -353,35 +175,13 @@ svint32x4_t test_svrshl_single_s32_x4(svint32x4_t zdn, svint32_t zm) __arm_strea // CHECK-LABEL: @test_svrshl_single_s64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.srshl.single.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svrshl_single_s64_x411svint64x4_tu11__SVInt64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.srshl.single.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint64x4_t test_svrshl_single_s64_x4(svint64x4_t zdn, svint64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_single_s64_x4,,,)(zdn, zm); @@ -389,35 +189,13 @@ svint64x4_t test_svrshl_single_s64_x4(svint64x4_t zdn, svint64_t zm) __arm_strea // CHECK-LABEL: @test_svrshl_single_u8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.urshl.single.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svrshl_single_u8_x411svuint8x4_tu11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.urshl.single.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint8x4_t test_svrshl_single_u8_x4(svuint8x4_t zdn, svuint8_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_single_u8_x4,,,)(zdn, zm); @@ -425,35 +203,13 @@ svuint8x4_t test_svrshl_single_u8_x4(svuint8x4_t zdn, svuint8_t zm) __arm_stream // CHECK-LABEL: @test_svrshl_single_u16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.urshl.single.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svrshl_single_u16_x412svuint16x4_tu12__SVUint16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.urshl.single.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint16x4_t test_svrshl_single_u16_x4(svuint16x4_t zdn, svuint16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_single_u16_x4,,,)(zdn, zm); @@ -461,35 +217,13 @@ svuint16x4_t test_svrshl_single_u16_x4(svuint16x4_t zdn, svuint16_t zm) __arm_st // CHECK-LABEL: @test_svrshl_single_u32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.urshl.single.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svrshl_single_u32_x412svuint32x4_tu12__SVUint32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.urshl.single.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint32x4_t test_svrshl_single_u32_x4(svuint32x4_t zdn, svuint32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_single_u32_x4,,,)(zdn, zm); @@ -497,35 +231,13 @@ svuint32x4_t test_svrshl_single_u32_x4(svuint32x4_t zdn, svuint32_t zm) __arm_st // CHECK-LABEL: @test_svrshl_single_u64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.urshl.single.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svrshl_single_u64_x412svuint64x4_tu12__SVUint64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.urshl.single.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint64x4_t test_svrshl_single_u64_x4(svuint64x4_t zdn, svuint64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_single_u64_x4,,,)(zdn, zm); @@ -535,27 +247,13 @@ svuint64x4_t test_svrshl_single_u64_x4(svuint64x4_t zdn, svuint64_t zm) __arm_st // CHECK-LABEL: @test_svrshl_multi_s8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.srshl.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z23test_svrshl_multi_s8_x210svint8x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.srshl.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint8x2_t test_svrshl_multi_s8_x2(svint8x2_t zdn, svint8x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_s8_x2,,,)(zdn, zm); @@ -563,27 +261,13 @@ svint8x2_t test_svrshl_multi_s8_x2(svint8x2_t zdn, svint8x2_t zm) __arm_streamin // CHECK-LABEL: @test_svrshl_multi_s16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.srshl.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svrshl_multi_s16_x211svint16x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.srshl.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint16x2_t test_svrshl_multi_s16_x2(svint16x2_t zdn, svint16x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_s16_x2,,,)(zdn, zm); @@ -591,27 +275,13 @@ svint16x2_t test_svrshl_multi_s16_x2(svint16x2_t zdn, svint16x2_t zm) __arm_stre // CHECK-LABEL: @test_svrshl_multi_s32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.srshl.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svrshl_multi_s32_x211svint32x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.srshl.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint32x2_t test_svrshl_multi_s32_x2(svint32x2_t zdn, svint32x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_s32_x2,,,)(zdn, zm); @@ -619,27 +289,13 @@ svint32x2_t test_svrshl_multi_s32_x2(svint32x2_t zdn, svint32x2_t zm) __arm_stre // CHECK-LABEL: @test_svrshl_multi_s64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.srshl.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svrshl_multi_s64_x211svint64x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.srshl.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint64x2_t test_svrshl_multi_s64_x2(svint64x2_t zdn, svint64x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_s64_x2,,,)(zdn, zm); @@ -647,27 +303,13 @@ svint64x2_t test_svrshl_multi_s64_x2(svint64x2_t zdn, svint64x2_t zm) __arm_stre // CHECK-LABEL: @test_svrshl_multi_u8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.urshl.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z23test_svrshl_multi_u8_x211svuint8x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.urshl.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint8x2_t test_svrshl_multi_u8_x2(svuint8x2_t zdn, svuint8x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_u8_x2,,,)(zdn, zm); @@ -675,27 +317,13 @@ svuint8x2_t test_svrshl_multi_u8_x2(svuint8x2_t zdn, svuint8x2_t zm) __arm_strea // CHECK-LABEL: @test_svrshl_multi_u16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.urshl.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svrshl_multi_u16_x212svuint16x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.urshl.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint16x2_t test_svrshl_multi_u16_x2(svuint16x2_t zdn, svuint16x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_u16_x2,,,)(zdn, zm); @@ -703,27 +331,13 @@ svuint16x2_t test_svrshl_multi_u16_x2(svuint16x2_t zdn, svuint16x2_t zm) __arm_s // CHECK-LABEL: @test_svrshl_multi_u32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.urshl.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svrshl_multi_u32_x212svuint32x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.urshl.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint32x2_t test_svrshl_multi_u32_x2(svuint32x2_t zdn, svuint32x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_u32_x2,,,)(zdn, zm); @@ -731,27 +345,13 @@ svuint32x2_t test_svrshl_multi_u32_x2(svuint32x2_t zdn, svuint32x2_t zm) __arm_s // CHECK-LABEL: @test_svrshl_multi_u64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.urshl.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svrshl_multi_u64_x212svuint64x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.urshl.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint64x2_t test_svrshl_multi_u64_x2(svuint64x2_t zdn, svuint64x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_u64_x2,,,)(zdn, zm); @@ -761,35 +361,13 @@ svuint64x2_t test_svrshl_multi_u64_x2(svuint64x2_t zdn, svuint64x2_t zm) __arm_s // CHECK-LABEL: @test_svrshl_multi_s8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.srshl.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z23test_svrshl_multi_s8_x410svint8x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.srshl.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint8x4_t test_svrshl_multi_s8_x4(svint8x4_t zdn, svint8x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_s8_x4,,,)(zdn, zm); @@ -797,35 +375,13 @@ svint8x4_t test_svrshl_multi_s8_x4(svint8x4_t zdn, svint8x4_t zm) __arm_streamin // CHECK-LABEL: @test_svrshl_multi_s16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.srshl.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svrshl_multi_s16_x411svint16x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.srshl.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint16x4_t test_svrshl_multi_s16_x4(svint16x4_t zdn, svint16x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_s16_x4,,,)(zdn, zm); @@ -833,35 +389,13 @@ svint16x4_t test_svrshl_multi_s16_x4(svint16x4_t zdn, svint16x4_t zm) __arm_stre // CHECK-LABEL: @test_svrshl_multi_s32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.srshl.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svrshl_multi_s32_x411svint32x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.srshl.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint32x4_t test_svrshl_multi_s32_x4(svint32x4_t zdn, svint32x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_s32_x4,,,)(zdn, zm); @@ -869,35 +403,13 @@ svint32x4_t test_svrshl_multi_s32_x4(svint32x4_t zdn, svint32x4_t zm) __arm_stre // CHECK-LABEL: @test_svrshl_multi_s64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.srshl.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svrshl_multi_s64_x411svint64x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.srshl.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint64x4_t test_svrshl_multi_s64_x4(svint64x4_t zdn, svint64x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_s64_x4,,,)(zdn, zm); @@ -905,35 +417,13 @@ svint64x4_t test_svrshl_multi_s64_x4(svint64x4_t zdn, svint64x4_t zm) __arm_stre // CHECK-LABEL: @test_svrshl_multi_u8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.urshl.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z23test_svrshl_multi_u8_x411svuint8x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.urshl.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint8x4_t test_svrshl_multi_u8_x4(svuint8x4_t zdn, svuint8x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_u8_x4,,,)(zdn, zm); @@ -941,35 +431,13 @@ svuint8x4_t test_svrshl_multi_u8_x4(svuint8x4_t zdn, svuint8x4_t zm) __arm_strea // CHECK-LABEL: @test_svrshl_multi_u16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.urshl.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svrshl_multi_u16_x412svuint16x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.urshl.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint16x4_t test_svrshl_multi_u16_x4(svuint16x4_t zdn, svuint16x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_u16_x4,,,)(zdn, zm); @@ -977,35 +445,13 @@ svuint16x4_t test_svrshl_multi_u16_x4(svuint16x4_t zdn, svuint16x4_t zm) __arm_s // CHECK-LABEL: @test_svrshl_multi_u32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.urshl.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svrshl_multi_u32_x412svuint32x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.urshl.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint32x4_t test_svrshl_multi_u32_x4(svuint32x4_t zdn, svuint32x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_u32_x4,,,)(zdn, zm); @@ -1013,35 +459,13 @@ svuint32x4_t test_svrshl_multi_u32_x4(svuint32x4_t zdn, svuint32x4_t zm) __arm_s // CHECK-LABEL: @test_svrshl_multi_u64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.urshl.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svrshl_multi_u64_x412svuint64x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.urshl.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint64x4_t test_svrshl_multi_u64_x4(svuint64x4_t zdn, svuint64x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_u64_x4,,,)(zdn, zm); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_selx2.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_selx2.c index a95f89faf7783..4047b2fbd1965 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_selx2.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_selx2.c @@ -19,27 +19,13 @@ // CHECK-LABEL: @test_svsel_s8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv16i8(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z16test_svsel_s8_x2u11__SVCount_t10svint8x2_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv16i8(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint8x2_t test_svsel_s8_x2(svcount_t pn, svint8x2_t zn, svint8x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svsel,_s8_x2)(pn, zn, zm); @@ -47,27 +33,13 @@ svint8x2_t test_svsel_s8_x2(svcount_t pn, svint8x2_t zn, svint8x2_t zm) __arm_st // CHECK-LABEL: @test_svsel_u8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv16i8(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z16test_svsel_u8_x2u11__SVCount_t11svuint8x2_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv16i8(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint8x2_t test_svsel_u8_x2(svcount_t pn, svuint8x2_t zn, svuint8x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svsel,_u8_x2)(pn, zn, zm); @@ -77,27 +49,13 @@ svuint8x2_t test_svsel_u8_x2(svcount_t pn, svuint8x2_t zn, svuint8x2_t zm) __arm // CHECK-LABEL: @test_svsel_s16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv8i16(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svsel_s16_x2u11__SVCount_t11svint16x2_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv8i16(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint16x2_t test_svsel_s16_x2(svcount_t pn, svint16x2_t zn, svint16x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svsel,_s16_x2)(pn, zn, zm); @@ -105,27 +63,13 @@ svint16x2_t test_svsel_s16_x2(svcount_t pn, svint16x2_t zn, svint16x2_t zm) __ar // CHECK-LABEL: @test_svsel_u16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv8i16(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svsel_u16_x2u11__SVCount_t12svuint16x2_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv8i16(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint16x2_t test_svsel_u16_x2(svcount_t pn, svuint16x2_t zn, svuint16x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svsel,_u16_x2)(pn, zn, zm); @@ -133,27 +77,13 @@ svuint16x2_t test_svsel_u16_x2(svcount_t pn, svuint16x2_t zn, svuint16x2_t zm) _ // CHECK-LABEL: @test_svsel_f16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv8f16(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svsel_f16_x2u11__SVCount_t13svfloat16x2_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv8f16(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat16x2_t test_svsel_f16_x2(svcount_t pn, svfloat16x2_t zn, svfloat16x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svsel,_f16_x2)(pn, zn, zm); @@ -161,27 +91,13 @@ svfloat16x2_t test_svsel_f16_x2(svcount_t pn, svfloat16x2_t zn, svfloat16x2_t zm // CHECK-LABEL: @test_svsel_bf16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv8bf16(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svsel_bf16_x2u11__SVCount_t14svbfloat16x2_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv8bf16(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svbfloat16x2_t test_svsel_bf16_x2(svcount_t pn, svbfloat16x2_t zn, svbfloat16x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svsel,_bf16_x2)(pn, zn, zm); @@ -191,27 +107,13 @@ svbfloat16x2_t test_svsel_bf16_x2(svcount_t pn, svbfloat16x2_t zn, svbfloat16x2_ // CHECK-LABEL: @test_svsel_s32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv4i32(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svsel_s32_x2u11__SVCount_t11svint32x2_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv4i32(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint32x2_t test_svsel_s32_x2(svcount_t pn, svint32x2_t zn, svint32x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svsel,_s32_x2)(pn, zn, zm); @@ -219,27 +121,13 @@ svint32x2_t test_svsel_s32_x2(svcount_t pn, svint32x2_t zn, svint32x2_t zm) __ar // CHECK-LABEL: @test_svsel_u32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv4i32(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svsel_u32_x2u11__SVCount_t12svuint32x2_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv4i32(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint32x2_t test_svsel_u32_x2(svcount_t pn, svuint32x2_t zn, svuint32x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svsel,_u32_x2)(pn, zn, zm); @@ -247,27 +135,13 @@ svuint32x2_t test_svsel_u32_x2(svcount_t pn, svuint32x2_t zn, svuint32x2_t zm) _ // CHECK-LABEL: @test_svsel_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv4f32(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svsel_f32_x2u11__SVCount_t13svfloat32x2_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv4f32(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svsel_f32_x2(svcount_t pn, svfloat32x2_t zn, svfloat32x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svsel,_f32_x2)(pn, zn, zm); @@ -277,27 +151,13 @@ svfloat32x2_t test_svsel_f32_x2(svcount_t pn, svfloat32x2_t zn, svfloat32x2_t zm // CHECK-LABEL: @test_svsel_s64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv2i64(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svsel_s64_x2u11__SVCount_t11svint64x2_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv2i64(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint64x2_t test_svsel_s64_x2(svcount_t pn, svint64x2_t zn, svint64x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svsel,_s64_x2)(pn, zn, zm); @@ -305,27 +165,13 @@ svint64x2_t test_svsel_s64_x2(svcount_t pn, svint64x2_t zn, svint64x2_t zm) __ar // CHECK-LABEL: @test_svsel_u64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv2i64(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svsel_u64_x2u11__SVCount_t12svuint64x2_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv2i64(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint64x2_t test_svsel_u64_x2(svcount_t pn, svuint64x2_t zn, svuint64x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svsel,_u64_x2)(pn, zn, zm); @@ -333,27 +179,13 @@ svuint64x2_t test_svsel_u64_x2(svcount_t pn, svuint64x2_t zn, svuint64x2_t zm) _ // CHECK-LABEL: @test_svsel_f64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv2f64(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svsel_f64_x2u11__SVCount_t13svfloat64x2_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv2f64(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat64x2_t test_svsel_f64_x2(svcount_t pn, svfloat64x2_t zn, svfloat64x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svsel,_f64_x2)(pn, zn, zm); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_selx4.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_selx4.c index 997b6acf96244..871d70943c9df 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_selx4.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_selx4.c @@ -19,35 +19,13 @@ // CHECK-LABEL: @test_svsel_s8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv16i8(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z16test_svsel_s8_x4u11__SVCount_t10svint8x4_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv16i8(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint8x4_t test_svsel_s8_x4(svcount_t pn, svint8x4_t zn1, svint8x4_t zn2) __arm_streaming { return SVE_ACLE_FUNC(svsel,_s8_x4)(pn, zn1, zn2); @@ -55,35 +33,13 @@ svint8x4_t test_svsel_s8_x4(svcount_t pn, svint8x4_t zn1, svint8x4_t zn2) __arm_ // CHECK-LABEL: @test_svsel_u8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv16i8(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z16test_svsel_u8_x4u11__SVCount_t11svuint8x4_tS0_S0_S0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv16i8(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint8x4_t test_svsel_u8_x4(svcount_t pn, svuint8x4_t zn1, svuint8x4_t zn2, svuint8x4_t zn3, svuint8x4_t zn4) __arm_streaming { return SVE_ACLE_FUNC(svsel,_u8_x4)(pn, zn1, zn2); @@ -93,35 +49,13 @@ svuint8x4_t test_svsel_u8_x4(svcount_t pn, svuint8x4_t zn1, svuint8x4_t zn2, svu // CHECK-LABEL: @test_svsel_s16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv8i16(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svsel_s16_x4u11__SVCount_t11svint16x4_tS0_S0_S0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv8i16(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint16x4_t test_svsel_s16_x4(svcount_t pn, svint16x4_t zn1, svint16x4_t zn2, svint16x4_t zn3, svint16x4_t zn4) __arm_streaming { return SVE_ACLE_FUNC(svsel,_s16_x4)(pn, zn1, zn2); @@ -129,35 +63,13 @@ svint16x4_t test_svsel_s16_x4(svcount_t pn, svint16x4_t zn1, svint16x4_t zn2, sv // CHECK-LABEL: @test_svsel_u16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv8i16(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svsel_u16_x4u11__SVCount_t12svuint16x4_tS0_S0_S0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv8i16(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint16x4_t test_svsel_u16_x4(svcount_t pn, svuint16x4_t zn1, svuint16x4_t zn2, svuint16x4_t zn3, svuint16x4_t zn4) __arm_streaming { return SVE_ACLE_FUNC(svsel,_u16_x4)(pn, zn1, zn2); @@ -165,35 +77,13 @@ svuint16x4_t test_svsel_u16_x4(svcount_t pn, svuint16x4_t zn1, svuint16x4_t zn2, // CHECK-LABEL: @test_svsel_f16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv8f16(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svsel_f16_x4u11__SVCount_t13svfloat16x4_tS0_S0_S0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv8f16(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat16x4_t test_svsel_f16_x4(svcount_t pn, svfloat16x4_t zn1, svfloat16x4_t zn2, svfloat16x4_t zn3, svfloat16x4_t zn4) __arm_streaming { return SVE_ACLE_FUNC(svsel,_f16_x4)(pn, zn1, zn2); @@ -201,35 +91,13 @@ svfloat16x4_t test_svsel_f16_x4(svcount_t pn, svfloat16x4_t zn1, svfloat16x4_t z // CHECK-LABEL: @test_svsel_bf16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv8bf16(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svsel_bf16_x4u11__SVCount_t14svbfloat16x4_tS0_S0_S0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv8bf16(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svbfloat16x4_t test_svsel_bf16_x4(svcount_t pn, svbfloat16x4_t zn1, svbfloat16x4_t zn2, svbfloat16x4_t zn3, svbfloat16x4_t zn4) __arm_streaming { return SVE_ACLE_FUNC(svsel,_bf16_x4)(pn, zn1, zn2); @@ -239,35 +107,13 @@ svbfloat16x4_t test_svsel_bf16_x4(svcount_t pn, svbfloat16x4_t zn1, svbfloat16x4 // CHECK-LABEL: @test_svsel_s32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv4i32(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svsel_s32_x4u11__SVCount_t11svint32x4_tS0_S0_S0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv4i32(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint32x4_t test_svsel_s32_x4(svcount_t pn, svint32x4_t zn1, svint32x4_t zn2, svint32x4_t zn3, svint32x4_t zn4) __arm_streaming { return SVE_ACLE_FUNC(svsel,_s32_x4)(pn, zn1, zn2); @@ -275,35 +121,13 @@ svint32x4_t test_svsel_s32_x4(svcount_t pn, svint32x4_t zn1, svint32x4_t zn2, sv // CHECK-LABEL: @test_svsel_u32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv4i32(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svsel_u32_x4u11__SVCount_t12svuint32x4_tS0_S0_S0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv4i32(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint32x4_t test_svsel_u32_x4(svcount_t pn, svuint32x4_t zn1, svuint32x4_t zn2, svuint32x4_t zn3, svuint32x4_t zn4) __arm_streaming { return SVE_ACLE_FUNC(svsel,_u32_x4)(pn, zn1, zn2); @@ -311,35 +135,13 @@ svuint32x4_t test_svsel_u32_x4(svcount_t pn, svuint32x4_t zn1, svuint32x4_t zn2, // CHECK-LABEL: @test_svsel_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv4f32(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svsel_f32_x4u11__SVCount_t13svfloat32x4_tS0_S0_S0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv4f32(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svsel_f32_x4(svcount_t pn, svfloat32x4_t zn1, svfloat32x4_t zn2, svfloat32x4_t zn3, svfloat32x4_t zn4) __arm_streaming { return SVE_ACLE_FUNC(svsel,_f32_x4)(pn, zn1, zn2); @@ -349,35 +151,13 @@ svfloat32x4_t test_svsel_f32_x4(svcount_t pn, svfloat32x4_t zn1, svfloat32x4_t z // CHECK-LABEL: @test_svsel_s64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv2i64(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svsel_s64_x4u11__SVCount_t11svint64x4_tS0_S0_S0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv2i64(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint64x4_t test_svsel_s64_x4(svcount_t pn, svint64x4_t zn1, svint64x4_t zn2, svint64x4_t zn3, svint64x4_t zn4) __arm_streaming { return SVE_ACLE_FUNC(svsel,_s64_x4)(pn, zn1, zn2); @@ -385,35 +165,13 @@ svint64x4_t test_svsel_s64_x4(svcount_t pn, svint64x4_t zn1, svint64x4_t zn2, sv // CHECK-LABEL: @test_svsel_u64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv2i64(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svsel_u64_x4u11__SVCount_t12svuint64x4_tS0_S0_S0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv2i64(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint64x4_t test_svsel_u64_x4(svcount_t pn, svuint64x4_t zn1, svuint64x4_t zn2, svuint64x4_t zn3, svuint64x4_t zn4) __arm_streaming { return SVE_ACLE_FUNC(svsel,_u64_x4)(pn, zn1, zn2); @@ -421,35 +179,13 @@ svuint64x4_t test_svsel_u64_x4(svcount_t pn, svuint64x4_t zn1, svuint64x4_t zn2, // CHECK-LABEL: @test_svsel_f64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv2f64(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svsel_f64_x4u11__SVCount_t13svfloat64x4_tS0_S0_S0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv2f64(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat64x4_t test_svsel_f64_x4(svcount_t pn, svfloat64x4_t zn1, svfloat64x4_t zn2, svfloat64x4_t zn3, svfloat64x4_t zn4) __arm_streaming { return SVE_ACLE_FUNC(svsel,_f64_x4)(pn, zn1, zn2); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_uzpx2.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_uzpx2.c index de605bab67cc3..9a66ee5262082 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_uzpx2.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_uzpx2.c @@ -20,27 +20,13 @@ // CHECK-LABEL: @test_svuzp_s8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z16test_svuzp_s8_x210svint8x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint8x2_t test_svuzp_s8_x2(svint8x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_s8_x2)(zn); @@ -48,27 +34,13 @@ svint8x2_t test_svuzp_s8_x2(svint8x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_u8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z16test_svuzp_u8_x211svuint8x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint8x2_t test_svuzp_u8_x2(svuint8x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_u8_x2)(zn); @@ -78,27 +50,13 @@ svuint8x2_t test_svuzp_u8_x2(svuint8x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_s16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svuzp_s16_x211svint16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint16x2_t test_svuzp_s16_x2(svint16x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_s16_x2)(zn); @@ -106,27 +64,13 @@ svint16x2_t test_svuzp_s16_x2(svint16x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_u16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svuzp_u16_x212svuint16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint16x2_t test_svuzp_u16_x2(svuint16x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_u16_x2)(zn); @@ -134,27 +78,13 @@ svuint16x2_t test_svuzp_u16_x2(svuint16x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_f16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv8f16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svuzp_f16_x213svfloat16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv8f16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat16x2_t test_svuzp_f16_x2(svfloat16x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_f16_x2)(zn); @@ -162,27 +92,13 @@ svfloat16x2_t test_svuzp_f16_x2(svfloat16x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_bf16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv8bf16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svuzp_bf16_x214svbfloat16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv8bf16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svbfloat16x2_t test_svuzp_bf16_x2(svbfloat16x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_bf16_x2)(zn); @@ -192,27 +108,13 @@ svbfloat16x2_t test_svuzp_bf16_x2(svbfloat16x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_s32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svuzp_s32_x211svint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint32x2_t test_svuzp_s32_x2(svint32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_s32_x2)(zn); @@ -220,27 +122,13 @@ svint32x2_t test_svuzp_s32_x2(svint32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_u32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svuzp_u32_x212svuint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint32x2_t test_svuzp_u32_x2(svuint32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_u32_x2)(zn); @@ -248,27 +136,13 @@ svuint32x2_t test_svuzp_u32_x2(svuint32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svuzp_f32_x213svfloat32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svuzp_f32_x2(svfloat32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_f32_x2)(zn); @@ -278,27 +152,13 @@ svfloat32x2_t test_svuzp_f32_x2(svfloat32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_s64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svuzp_s64_x211svint64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint64x2_t test_svuzp_s64_x2(svint64x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_s64_x2)(zn); @@ -306,27 +166,13 @@ svint64x2_t test_svuzp_s64_x2(svint64x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_u64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svuzp_u64_x212svuint64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint64x2_t test_svuzp_u64_x2(svuint64x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_u64_x2)(zn); @@ -334,27 +180,13 @@ svuint64x2_t test_svuzp_u64_x2(svuint64x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_f64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv2f64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svuzp_f64_x213svfloat64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv2f64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat64x2_t test_svuzp_f64_x2(svfloat64x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_f64_x2)(zn); @@ -364,27 +196,13 @@ svfloat64x2_t test_svuzp_f64_x2(svfloat64x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_s8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svuzpq_s8_x210svint8x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint8x2_t test_svuzpq_s8_x2(svint8x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_s8_x2)(zn); @@ -392,27 +210,13 @@ svint8x2_t test_svuzpq_s8_x2(svint8x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_u8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svuzpq_u8_x211svuint8x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint8x2_t test_svuzpq_u8_x2(svuint8x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_u8_x2)(zn); @@ -420,27 +224,13 @@ svuint8x2_t test_svuzpq_u8_x2(svuint8x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_s16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svuzpq_s16_x211svint16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint16x2_t test_svuzpq_s16_x2(svint16x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_s16_x2)(zn); @@ -448,27 +238,13 @@ svint16x2_t test_svuzpq_s16_x2(svint16x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_u16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svuzpq_u16_x212svuint16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint16x2_t test_svuzpq_u16_x2(svuint16x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_u16_x2)(zn); @@ -476,27 +252,13 @@ svuint16x2_t test_svuzpq_u16_x2(svuint16x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_f16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv8f16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svuzpq_f16_x213svfloat16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv8f16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat16x2_t test_svuzpq_f16_x2(svfloat16x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_f16_x2)(zn); @@ -504,27 +266,13 @@ svfloat16x2_t test_svuzpq_f16_x2(svfloat16x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_bf16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv8bf16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z19test_svuzpq_bf16_x214svbfloat16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv8bf16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svbfloat16x2_t test_svuzpq_bf16_x2(svbfloat16x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_bf16_x2)(zn); @@ -532,27 +280,13 @@ svbfloat16x2_t test_svuzpq_bf16_x2(svbfloat16x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_s32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svuzpq_s32_x211svint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint32x2_t test_svuzpq_s32_x2(svint32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_s32_x2)(zn); @@ -560,27 +294,13 @@ svint32x2_t test_svuzpq_s32_x2(svint32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_u32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svuzpq_u32_x212svuint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint32x2_t test_svuzpq_u32_x2(svuint32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_u32_x2)(zn); @@ -588,27 +308,13 @@ svuint32x2_t test_svuzpq_u32_x2(svuint32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svuzpq_f32_x213svfloat32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svuzpq_f32_x2(svfloat32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_f32_x2)(zn); @@ -616,27 +322,13 @@ svfloat32x2_t test_svuzpq_f32_x2(svfloat32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_s64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svuzpq_s64_x211svint64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint64x2_t test_svuzpq_s64_x2(svint64x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_s64_x2)(zn); @@ -644,27 +336,13 @@ svint64x2_t test_svuzpq_s64_x2(svint64x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_u64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svuzpq_u64_x212svuint64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint64x2_t test_svuzpq_u64_x2(svuint64x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_u64_x2)(zn); @@ -672,27 +350,13 @@ svuint64x2_t test_svuzpq_u64_x2(svuint64x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_f64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv2f64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svuzpq_f64_x213svfloat64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv2f64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat64x2_t test_svuzpq_f64_x2(svfloat64x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_f64_x2)(zn); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_uzpx4.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_uzpx4.c index aa210f59508b5..131928615edcd 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_uzpx4.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_uzpx4.c @@ -20,35 +20,13 @@ // CHECK-LABEL: @test_svuzp_s8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z16test_svuzp_s8_x410svint8x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint8x4_t test_svuzp_s8_x4(svint8x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_s8_x4)(zn); @@ -56,35 +34,13 @@ svint8x4_t test_svuzp_s8_x4(svint8x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_u8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z16test_svuzp_u8_x411svuint8x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint8x4_t test_svuzp_u8_x4(svuint8x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_u8_x4)(zn); @@ -94,35 +50,13 @@ svuint8x4_t test_svuzp_u8_x4(svuint8x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_s16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svuzp_s16_x411svint16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint16x4_t test_svuzp_s16_x4(svint16x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_s16_x4)(zn); @@ -130,35 +64,13 @@ svint16x4_t test_svuzp_s16_x4(svint16x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_u16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svuzp_u16_x412svuint16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint16x4_t test_svuzp_u16_x4(svuint16x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_u16_x4)(zn); @@ -166,35 +78,13 @@ svuint16x4_t test_svuzp_u16_x4(svuint16x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_f16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv8f16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svuzp_f16_x413svfloat16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv8f16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat16x4_t test_svuzp_f16_x4(svfloat16x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_f16_x4)(zn); @@ -202,35 +92,13 @@ svfloat16x4_t test_svuzp_f16_x4(svfloat16x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_bf16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv8bf16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svuzp_bf16_x414svbfloat16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv8bf16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svbfloat16x4_t test_svuzp_bf16_x4(svbfloat16x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_bf16_x4)(zn); @@ -240,35 +108,13 @@ svbfloat16x4_t test_svuzp_bf16_x4(svbfloat16x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_s32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svuzp_s32_x411svint32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint32x4_t test_svuzp_s32_x4(svint32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_s32_x4)(zn); @@ -276,35 +122,13 @@ svint32x4_t test_svuzp_s32_x4(svint32x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_u32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svuzp_u32_x412svuint32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint32x4_t test_svuzp_u32_x4(svuint32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_u32_x4)(zn); @@ -312,35 +136,13 @@ svuint32x4_t test_svuzp_u32_x4(svuint32x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svuzp_f32_x413svfloat32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svuzp_f32_x4(svfloat32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_f32_x4)(zn); @@ -350,35 +152,13 @@ svfloat32x4_t test_svuzp_f32_x4(svfloat32x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_s64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svuzp_s64_x411svint64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint64x4_t test_svuzp_s64_x4(svint64x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_s64_x4)(zn); @@ -386,35 +166,13 @@ svint64x4_t test_svuzp_s64_x4(svint64x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_u64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svuzp_u64_x412svuint64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint64x4_t test_svuzp_u64_x4(svuint64x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_u64_x4)(zn); @@ -422,35 +180,13 @@ svuint64x4_t test_svuzp_u64_x4(svuint64x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_f64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv2f64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svuzp_f64_x413svfloat64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv2f64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat64x4_t test_svuzp_f64_x4(svfloat64x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_f64_x4)(zn); @@ -460,35 +196,13 @@ svfloat64x4_t test_svuzp_f64_x4(svfloat64x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_s8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svuzpq_s8_x410svint8x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint8x4_t test_svuzpq_s8_x4(svint8x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_s8_x4)(zn); @@ -496,35 +210,13 @@ svint8x4_t test_svuzpq_s8_x4(svint8x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_u8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svuzpq_u8_x411svuint8x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint8x4_t test_svuzpq_u8_x4(svuint8x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_u8_x4)(zn); @@ -532,35 +224,13 @@ svuint8x4_t test_svuzpq_u8_x4(svuint8x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_s16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svuzpq_s16_x411svint16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint16x4_t test_svuzpq_s16_x4(svint16x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_s16_x4)(zn); @@ -568,35 +238,13 @@ svint16x4_t test_svuzpq_s16_x4(svint16x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_u16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svuzpq_u16_x412svuint16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint16x4_t test_svuzpq_u16_x4(svuint16x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_u16_x4)(zn); @@ -604,35 +252,13 @@ svuint16x4_t test_svuzpq_u16_x4(svuint16x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_f16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv8f16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svuzpq_f16_x413svfloat16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv8f16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat16x4_t test_svuzpq_f16_x4(svfloat16x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_f16_x4)(zn); @@ -640,35 +266,13 @@ svfloat16x4_t test_svuzpq_f16_x4(svfloat16x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_bf16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv8bf16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z19test_svuzpq_bf16_x414svbfloat16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv8bf16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svbfloat16x4_t test_svuzpq_bf16_x4(svbfloat16x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_bf16_x4)(zn); @@ -676,35 +280,13 @@ svbfloat16x4_t test_svuzpq_bf16_x4(svbfloat16x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_s32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svuzpq_s32_x411svint32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint32x4_t test_svuzpq_s32_x4(svint32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_s32_x4)(zn); @@ -712,35 +294,13 @@ svint32x4_t test_svuzpq_s32_x4(svint32x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_u32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svuzpq_u32_x412svuint32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint32x4_t test_svuzpq_u32_x4(svuint32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_u32_x4)(zn); @@ -748,35 +308,13 @@ svuint32x4_t test_svuzpq_u32_x4(svuint32x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svuzpq_f32_x413svfloat32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svuzpq_f32_x4(svfloat32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_f32_x4)(zn); @@ -784,35 +322,13 @@ svfloat32x4_t test_svuzpq_f32_x4(svfloat32x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_s64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svuzpq_s64_x411svint64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint64x4_t test_svuzpq_s64_x4(svint64x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_s64_x4)(zn); @@ -820,35 +336,13 @@ svint64x4_t test_svuzpq_s64_x4(svint64x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_u64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svuzpq_u64_x412svuint64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint64x4_t test_svuzpq_u64_x4(svuint64x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_u64_x4)(zn); @@ -856,35 +350,13 @@ svuint64x4_t test_svuzpq_u64_x4(svuint64x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_f64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv2f64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svuzpq_f64_x413svfloat64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv2f64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat64x4_t test_svuzpq_f64_x4(svfloat64x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_f64_x4)(zn); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_zipx2.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_zipx2.c index a29c347e3197f..787b7d0b3ea1a 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_zipx2.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_zipx2.c @@ -19,27 +19,13 @@ // CHECK-LABEL: @test_svzip_s8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z16test_svzip_s8_x210svint8x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint8x2_t test_svzip_s8_x2(svint8x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_s8_x2)(zn); @@ -47,27 +33,13 @@ svint8x2_t test_svzip_s8_x2(svint8x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_u8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z16test_svzip_u8_x211svuint8x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint8x2_t test_svzip_u8_x2(svuint8x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_u8_x2)(zn); @@ -77,27 +49,13 @@ svuint8x2_t test_svzip_u8_x2(svuint8x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_s16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svzip_s16_x211svint16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint16x2_t test_svzip_s16_x2(svint16x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_s16_x2)(zn); @@ -105,27 +63,13 @@ svint16x2_t test_svzip_s16_x2(svint16x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_u16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svzip_u16_x212svuint16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint16x2_t test_svzip_u16_x2(svuint16x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_u16_x2)(zn); @@ -133,27 +77,13 @@ svuint16x2_t test_svzip_u16_x2(svuint16x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_f16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv8f16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svzip_f16_x213svfloat16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv8f16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat16x2_t test_svzip_f16_x2(svfloat16x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_f16_x2)(zn); @@ -161,27 +91,13 @@ svfloat16x2_t test_svzip_f16_x2(svfloat16x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_bf16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv8bf16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svzip_bf16_x214svbfloat16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv8bf16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svbfloat16x2_t test_svzip_bf16_x2(svbfloat16x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_bf16_x2)(zn); @@ -191,27 +107,13 @@ svbfloat16x2_t test_svzip_bf16_x2(svbfloat16x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_s32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svzip_s32_x211svint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint32x2_t test_svzip_s32_x2(svint32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_s32_x2)(zn); @@ -219,27 +121,13 @@ svint32x2_t test_svzip_s32_x2(svint32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_u32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svzip_u32_x212svuint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint32x2_t test_svzip_u32_x2(svuint32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_u32_x2)(zn); @@ -247,27 +135,13 @@ svuint32x2_t test_svzip_u32_x2(svuint32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svzip_f32_x213svfloat32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svzip_f32_x2(svfloat32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_f32_x2)(zn); @@ -277,27 +151,13 @@ svfloat32x2_t test_svzip_f32_x2(svfloat32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_s64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svzip_s64_x211svint64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint64x2_t test_svzip_s64_x2(svint64x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_s64_x2)(zn); @@ -305,27 +165,13 @@ svint64x2_t test_svzip_s64_x2(svint64x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_u64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svzip_u64_x212svuint64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint64x2_t test_svzip_u64_x2(svuint64x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_u64_x2)(zn); @@ -333,27 +179,13 @@ svuint64x2_t test_svzip_u64_x2(svuint64x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_f64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv2f64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svzip_f64_x213svfloat64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv2f64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat64x2_t test_svzip_f64_x2(svfloat64x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_f64_x2)(zn); @@ -363,27 +195,13 @@ svfloat64x2_t test_svzip_f64_x2(svfloat64x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_s8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svzipq_s8_x210svint8x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint8x2_t test_svzipq_s8_x2(svint8x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_s8_x2)(zn); @@ -391,27 +209,13 @@ svint8x2_t test_svzipq_s8_x2(svint8x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_u8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svzipq_u8_x211svuint8x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint8x2_t test_svzipq_u8_x2(svuint8x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_u8_x2)(zn); @@ -419,27 +223,13 @@ svuint8x2_t test_svzipq_u8_x2(svuint8x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_s16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svzipq_s16_x211svint16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint16x2_t test_svzipq_s16_x2(svint16x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_s16_x2)(zn); @@ -447,27 +237,13 @@ svint16x2_t test_svzipq_s16_x2(svint16x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_u16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svzipq_u16_x212svuint16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint16x2_t test_svzipq_u16_x2(svuint16x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_u16_x2)(zn); @@ -475,27 +251,13 @@ svuint16x2_t test_svzipq_u16_x2(svuint16x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_f16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv8f16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svzipq_f16_x213svfloat16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv8f16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat16x2_t test_svzipq_f16_x2(svfloat16x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_f16_x2)(zn); @@ -503,27 +265,13 @@ svfloat16x2_t test_svzipq_f16_x2(svfloat16x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_bf16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv8bf16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z19test_svzipq_bf16_x214svbfloat16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv8bf16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svbfloat16x2_t test_svzipq_bf16_x2(svbfloat16x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_bf16_x2)(zn); @@ -531,27 +279,13 @@ svbfloat16x2_t test_svzipq_bf16_x2(svbfloat16x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_s32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svzipq_s32_x211svint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint32x2_t test_svzipq_s32_x2(svint32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_s32_x2)(zn); @@ -559,27 +293,13 @@ svint32x2_t test_svzipq_s32_x2(svint32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_u32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svzipq_u32_x212svuint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint32x2_t test_svzipq_u32_x2(svuint32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_u32_x2)(zn); @@ -587,27 +307,13 @@ svuint32x2_t test_svzipq_u32_x2(svuint32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svzipq_f32_x213svfloat32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svzipq_f32_x2(svfloat32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_f32_x2)(zn); @@ -615,27 +321,13 @@ svfloat32x2_t test_svzipq_f32_x2(svfloat32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_s64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svzipq_s64_x211svint64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint64x2_t test_svzipq_s64_x2(svint64x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_s64_x2)(zn); @@ -643,27 +335,13 @@ svint64x2_t test_svzipq_s64_x2(svint64x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_u64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svzipq_u64_x212svuint64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint64x2_t test_svzipq_u64_x2(svuint64x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_u64_x2)(zn); @@ -671,27 +349,13 @@ svuint64x2_t test_svzipq_u64_x2(svuint64x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_f64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv2f64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svzipq_f64_x213svfloat64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv2f64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat64x2_t test_svzipq_f64_x2(svfloat64x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_f64_x2)(zn); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_zipx4.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_zipx4.c index be40ecb4bcaa3..9bea471bc9837 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_zipx4.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_zipx4.c @@ -19,35 +19,13 @@ // CHECK-LABEL: @test_svzip_s8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z16test_svzip_s8_x410svint8x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint8x4_t test_svzip_s8_x4(svint8x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_s8_x4)(zn); @@ -55,35 +33,13 @@ svint8x4_t test_svzip_s8_x4(svint8x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_u8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z16test_svzip_u8_x411svuint8x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint8x4_t test_svzip_u8_x4(svuint8x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_u8_x4)(zn); @@ -93,35 +49,13 @@ svuint8x4_t test_svzip_u8_x4(svuint8x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_s16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svzip_s16_x411svint16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint16x4_t test_svzip_s16_x4(svint16x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_s16_x4)(zn); @@ -129,35 +63,13 @@ svint16x4_t test_svzip_s16_x4(svint16x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_u16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svzip_u16_x412svuint16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint16x4_t test_svzip_u16_x4(svuint16x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_u16_x4)(zn); @@ -165,35 +77,13 @@ svuint16x4_t test_svzip_u16_x4(svuint16x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_f16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv8f16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svzip_f16_x413svfloat16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv8f16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat16x4_t test_svzip_f16_x4(svfloat16x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_f16_x4)(zn); @@ -201,35 +91,13 @@ svfloat16x4_t test_svzip_f16_x4(svfloat16x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_bf16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv8bf16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svzip_bf16_x414svbfloat16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv8bf16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svbfloat16x4_t test_svzip_bf16_x4(svbfloat16x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_bf16_x4)(zn); @@ -239,35 +107,13 @@ svbfloat16x4_t test_svzip_bf16_x4(svbfloat16x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_s32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svzip_s32_x411svint32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint32x4_t test_svzip_s32_x4(svint32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_s32_x4)(zn); @@ -275,35 +121,13 @@ svint32x4_t test_svzip_s32_x4(svint32x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_u32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svzip_u32_x412svuint32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint32x4_t test_svzip_u32_x4(svuint32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_u32_x4)(zn); @@ -311,35 +135,13 @@ svuint32x4_t test_svzip_u32_x4(svuint32x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svzip_f32_x413svfloat32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svzip_f32_x4(svfloat32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_f32_x4)(zn); @@ -349,35 +151,13 @@ svfloat32x4_t test_svzip_f32_x4(svfloat32x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_s64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svzip_s64_x411svint64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint64x4_t test_svzip_s64_x4(svint64x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_s64_x4)(zn); @@ -385,35 +165,13 @@ svint64x4_t test_svzip_s64_x4(svint64x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_u64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svzip_u64_x412svuint64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint64x4_t test_svzip_u64_x4(svuint64x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_u64_x4)(zn); @@ -421,35 +179,13 @@ svuint64x4_t test_svzip_u64_x4(svuint64x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_f64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv2f64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svzip_f64_x413svfloat64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv2f64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat64x4_t test_svzip_f64_x4(svfloat64x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_f64_x4)(zn); @@ -459,35 +195,13 @@ svfloat64x4_t test_svzip_f64_x4(svfloat64x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_s8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svzipq_s8_x410svint8x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint8x4_t test_svzipq_s8_x4(svint8x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_s8_x4)(zn); @@ -495,35 +209,13 @@ svint8x4_t test_svzipq_s8_x4(svint8x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_u8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svzipq_u8_x411svuint8x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint8x4_t test_svzipq_u8_x4(svuint8x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_u8_x4)(zn); @@ -531,35 +223,13 @@ svuint8x4_t test_svzipq_u8_x4(svuint8x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_s16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svzipq_s16_x411svint16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint16x4_t test_svzipq_s16_x4(svint16x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_s16_x4)(zn); @@ -567,35 +237,13 @@ svint16x4_t test_svzipq_s16_x4(svint16x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_u16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svzipq_u16_x412svuint16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint16x4_t test_svzipq_u16_x4(svuint16x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_u16_x4)(zn); @@ -603,35 +251,13 @@ svuint16x4_t test_svzipq_u16_x4(svuint16x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_f16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv8f16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svzipq_f16_x413svfloat16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv8f16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat16x4_t test_svzipq_f16_x4(svfloat16x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_f16_x4)(zn); @@ -639,35 +265,13 @@ svfloat16x4_t test_svzipq_f16_x4(svfloat16x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_bf16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv8bf16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z19test_svzipq_bf16_x414svbfloat16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv8bf16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svbfloat16x4_t test_svzipq_bf16_x4(svbfloat16x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_bf16_x4)(zn); @@ -675,35 +279,13 @@ svbfloat16x4_t test_svzipq_bf16_x4(svbfloat16x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_s32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svzipq_s32_x411svint32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint32x4_t test_svzipq_s32_x4(svint32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_s32_x4)(zn); @@ -711,35 +293,13 @@ svint32x4_t test_svzipq_s32_x4(svint32x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_u32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svzipq_u32_x412svuint32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint32x4_t test_svzipq_u32_x4(svuint32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_u32_x4)(zn); @@ -747,35 +307,13 @@ svuint32x4_t test_svzipq_u32_x4(svuint32x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svzipq_f32_x413svfloat32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svzipq_f32_x4(svfloat32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_f32_x4)(zn); @@ -783,35 +321,13 @@ svfloat32x4_t test_svzipq_f32_x4(svfloat32x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_s64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svzipq_s64_x411svint64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint64x4_t test_svzipq_s64_x4(svint64x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_s64_x4)(zn); @@ -819,35 +335,13 @@ svint64x4_t test_svzipq_s64_x4(svint64x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_u64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svzipq_u64_x412svuint64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint64x4_t test_svzipq_u64_x4(svuint64x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_u64_x4)(zn); @@ -855,35 +349,13 @@ svuint64x4_t test_svzipq_u64_x4(svuint64x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_f64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv2f64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svzipq_f64_x413svfloat64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv2f64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat64x4_t test_svzipq_f64_x4(svfloat64x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_f64_x4)(zn); diff --git a/clang/test/CodeGen/aarch64-sme2p1-intrinsics/acle_sme2p1_movaz.c b/clang/test/CodeGen/aarch64-sme2p1-intrinsics/acle_sme2p1_movaz.c index 77b02b4c4708f..7fa2249827c4e 100644 --- a/clang/test/CodeGen/aarch64-sme2p1-intrinsics/acle_sme2p1_movaz.c +++ b/clang/test/CodeGen/aarch64-sme2p1-intrinsics/acle_sme2p1_movaz.c @@ -11,28 +11,14 @@ // CHECK-LABEL: define dso_local { , } @test_svreadz_hor_za8_s8_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv16i8(i32 0, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z26test_svreadz_hor_za8_s8_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0:[0-9]+]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv16i8(i32 0, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint8x2_t test_svreadz_hor_za8_s8_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -42,28 +28,14 @@ svint8x2_t test_svreadz_hor_za8_s8_x2(uint32_t slice) __arm_streaming __arm_inou // CHECK-LABEL: define dso_local { , } @test_svreadz_hor_za8_u8_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv16i8(i32 0, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z26test_svreadz_hor_za8_u8_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv16i8(i32 0, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint8x2_t test_svreadz_hor_za8_u8_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -73,28 +45,14 @@ svuint8x2_t test_svreadz_hor_za8_u8_x2(uint32_t slice) __arm_streaming __arm_ino // CHECK-LABEL: define dso_local { , } @test_svreadz_hor_za16_s16_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv8i16(i32 0, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z28test_svreadz_hor_za16_s16_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv8i16(i32 0, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint16x2_t test_svreadz_hor_za16_s16_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -104,28 +62,14 @@ svint16x2_t test_svreadz_hor_za16_s16_x2(uint32_t slice) __arm_streaming __arm_i // CHECK-LABEL: define dso_local { , } @test_svreadz_hor_za16_u16_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv8i16(i32 1, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z28test_svreadz_hor_za16_u16_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv8i16(i32 1, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint16x2_t test_svreadz_hor_za16_u16_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -135,28 +79,14 @@ svuint16x2_t test_svreadz_hor_za16_u16_x2(uint32_t slice) __arm_streaming __arm_ // CHECK-LABEL: define dso_local { , } @test_svreadz_hor_za16_f16_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv8f16(i32 0, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z28test_svreadz_hor_za16_f16_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv8f16(i32 0, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat16x2_t test_svreadz_hor_za16_f16_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -166,28 +96,14 @@ svfloat16x2_t test_svreadz_hor_za16_f16_x2(uint32_t slice) __arm_streaming __arm // CHECK-LABEL: define dso_local { , } @test_svreadz_hor_za16_bf16_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv8bf16(i32 1, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z29test_svreadz_hor_za16_bf16_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv8bf16(i32 1, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svbfloat16x2_t test_svreadz_hor_za16_bf16_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -198,28 +114,14 @@ svbfloat16x2_t test_svreadz_hor_za16_bf16_x2(uint32_t slice) __arm_streaming __a // CHECK-LABEL: define dso_local { , } @test_svreadz_hor_za32_s32_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv4i32(i32 0, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z28test_svreadz_hor_za32_s32_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv4i32(i32 0, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint32x2_t test_svreadz_hor_za32_s32_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -229,28 +131,14 @@ svint32x2_t test_svreadz_hor_za32_s32_x2(uint32_t slice) __arm_streaming __arm_i // CHECK-LABEL: define dso_local { , } @test_svreadz_hor_za32_u32_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv4i32(i32 2, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z28test_svreadz_hor_za32_u32_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv4i32(i32 2, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint32x2_t test_svreadz_hor_za32_u32_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -260,28 +148,14 @@ svuint32x2_t test_svreadz_hor_za32_u32_x2(uint32_t slice) __arm_streaming __arm_ // CHECK-LABEL: define dso_local { , } @test_svreadz_hor_za32_f32_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv4f32(i32 3, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z28test_svreadz_hor_za32_f32_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv4f32(i32 3, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svreadz_hor_za32_f32_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -291,28 +165,14 @@ svfloat32x2_t test_svreadz_hor_za32_f32_x2(uint32_t slice) __arm_streaming __arm // CHECK-LABEL: define dso_local { , } @test_svreadz_hor_za64_s64_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv2i64(i32 0, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z28test_svreadz_hor_za64_s64_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv2i64(i32 0, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint64x2_t test_svreadz_hor_za64_s64_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -322,28 +182,14 @@ svint64x2_t test_svreadz_hor_za64_s64_x2(uint32_t slice) __arm_streaming __arm_i // CHECK-LABEL: define dso_local { , } @test_svreadz_hor_za64_u64_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv2i64(i32 4, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z28test_svreadz_hor_za64_u64_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv2i64(i32 4, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint64x2_t test_svreadz_hor_za64_u64_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -353,28 +199,14 @@ svuint64x2_t test_svreadz_hor_za64_u64_x2(uint32_t slice) __arm_streaming __arm_ // CHECK-LABEL: define dso_local { , } @test_svreadz_hor_za64_f64_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv2f64(i32 7, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z28test_svreadz_hor_za64_f64_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv2f64(i32 7, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat64x2_t test_svreadz_hor_za64_f64_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -389,28 +221,14 @@ svfloat64x2_t test_svreadz_hor_za64_f64_x2(uint32_t slice) __arm_streaming __arm // CHECK-LABEL: define dso_local { , } @test_svreadz_ver_za8_s8_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv16i8(i32 0, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z26test_svreadz_ver_za8_s8_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv16i8(i32 0, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint8x2_t test_svreadz_ver_za8_s8_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -420,28 +238,14 @@ svint8x2_t test_svreadz_ver_za8_s8_x2(uint32_t slice) __arm_streaming __arm_inou // CHECK-LABEL: define dso_local { , } @test_svreadz_ver_za8_u8_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv16i8(i32 0, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z26test_svreadz_ver_za8_u8_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv16i8(i32 0, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint8x2_t test_svreadz_ver_za8_u8_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -451,28 +255,14 @@ svuint8x2_t test_svreadz_ver_za8_u8_x2(uint32_t slice) __arm_streaming __arm_ino // CHECK-LABEL: define dso_local { , } @test_svreadz_ver_za16_s16_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv8i16(i32 0, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z28test_svreadz_ver_za16_s16_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv8i16(i32 0, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint16x2_t test_svreadz_ver_za16_s16_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -482,28 +272,14 @@ svint16x2_t test_svreadz_ver_za16_s16_x2(uint32_t slice) __arm_streaming __arm_i // CHECK-LABEL: define dso_local { , } @test_svreadz_ver_za16_u16_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv8i16(i32 1, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z28test_svreadz_ver_za16_u16_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv8i16(i32 1, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint16x2_t test_svreadz_ver_za16_u16_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -513,28 +289,14 @@ svuint16x2_t test_svreadz_ver_za16_u16_x2(uint32_t slice) __arm_streaming __arm_ // CHECK-LABEL: define dso_local { , } @test_svreadz_ver_za16_f16_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv8f16(i32 0, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z28test_svreadz_ver_za16_f16_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv8f16(i32 0, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat16x2_t test_svreadz_ver_za16_f16_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -544,28 +306,14 @@ svfloat16x2_t test_svreadz_ver_za16_f16_x2(uint32_t slice) __arm_streaming __arm // CHECK-LABEL: define dso_local { , } @test_svreadz_ver_za16_bf16_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv8bf16(i32 1, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z29test_svreadz_ver_za16_bf16_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv8bf16(i32 1, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svbfloat16x2_t test_svreadz_ver_za16_bf16_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -576,28 +324,14 @@ svbfloat16x2_t test_svreadz_ver_za16_bf16_x2(uint32_t slice) __arm_streaming __a // CHECK-LABEL: define dso_local { , } @test_svreadz_ver_za32_s32_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv4i32(i32 0, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z28test_svreadz_ver_za32_s32_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv4i32(i32 0, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint32x2_t test_svreadz_ver_za32_s32_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -607,28 +341,14 @@ svint32x2_t test_svreadz_ver_za32_s32_x2(uint32_t slice) __arm_streaming __arm_i // CHECK-LABEL: define dso_local { , } @test_svreadz_ver_za32_u32_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv4i32(i32 2, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z28test_svreadz_ver_za32_u32_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv4i32(i32 2, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint32x2_t test_svreadz_ver_za32_u32_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -638,28 +358,14 @@ svuint32x2_t test_svreadz_ver_za32_u32_x2(uint32_t slice) __arm_streaming __arm_ // CHECK-LABEL: define dso_local { , } @test_svreadz_ver_za32_f32_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv4f32(i32 3, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z28test_svreadz_ver_za32_f32_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv4f32(i32 3, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svreadz_ver_za32_f32_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -669,28 +375,14 @@ svfloat32x2_t test_svreadz_ver_za32_f32_x2(uint32_t slice) __arm_streaming __arm // CHECK-LABEL: define dso_local { , } @test_svreadz_ver_za64_s64_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv2i64(i32 0, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z28test_svreadz_ver_za64_s64_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv2i64(i32 0, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint64x2_t test_svreadz_ver_za64_s64_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -700,28 +392,14 @@ svint64x2_t test_svreadz_ver_za64_s64_x2(uint32_t slice) __arm_streaming __arm_i // CHECK-LABEL: define dso_local { , } @test_svreadz_ver_za64_u64_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv2i64(i32 4, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z28test_svreadz_ver_za64_u64_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv2i64(i32 4, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint64x2_t test_svreadz_ver_za64_u64_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -731,28 +409,14 @@ svuint64x2_t test_svreadz_ver_za64_u64_x2(uint32_t slice) __arm_streaming __arm_ // CHECK-LABEL: define dso_local { , } @test_svreadz_ver_za64_f64_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv2f64(i32 7, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z28test_svreadz_ver_za64_f64_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv2f64(i32 7, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat64x2_t test_svreadz_ver_za64_f64_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -765,36 +429,14 @@ svfloat64x2_t test_svreadz_ver_za64_f64_x2(uint32_t slice) __arm_streaming __arm // CHECK-LABEL: define dso_local { , , , } @test_svreadz_hor_za8_s8_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv16i8(i32 0, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z26test_svreadz_hor_za8_s8_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv16i8(i32 0, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint8x4_t test_svreadz_hor_za8_s8_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -804,36 +446,14 @@ svint8x4_t test_svreadz_hor_za8_s8_x4(uint32_t slice) __arm_streaming __arm_inou // CHECK-LABEL: define dso_local { , , , } @test_svreadz_hor_za8_u8_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv16i8(i32 0, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z26test_svreadz_hor_za8_u8_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv16i8(i32 0, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint8x4_t test_svreadz_hor_za8_u8_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -843,36 +463,14 @@ svuint8x4_t test_svreadz_hor_za8_u8_x4(uint32_t slice) __arm_streaming __arm_ino // CHECK-LABEL: define dso_local { , , , } @test_svreadz_hor_za16_s16_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv8i16(i32 0, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z28test_svreadz_hor_za16_s16_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv8i16(i32 0, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint16x4_t test_svreadz_hor_za16_s16_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -882,36 +480,14 @@ svint16x4_t test_svreadz_hor_za16_s16_x4(uint32_t slice) __arm_streaming __arm_i // CHECK-LABEL: define dso_local { , , , } @test_svreadz_hor_za16_u16_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv8i16(i32 1, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z28test_svreadz_hor_za16_u16_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv8i16(i32 1, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint16x4_t test_svreadz_hor_za16_u16_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -921,36 +497,14 @@ svuint16x4_t test_svreadz_hor_za16_u16_x4(uint32_t slice) __arm_streaming __arm_ // CHECK-LABEL: define dso_local { , , , } @test_svreadz_hor_za16_f16_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv8f16(i32 0, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z28test_svreadz_hor_za16_f16_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv8f16(i32 0, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat16x4_t test_svreadz_hor_za16_f16_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -960,36 +514,14 @@ svfloat16x4_t test_svreadz_hor_za16_f16_x4(uint32_t slice) __arm_streaming __arm // CHECK-LABEL: define dso_local { , , , } @test_svreadz_hor_za16_bf16_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv8bf16(i32 1, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z29test_svreadz_hor_za16_bf16_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv8bf16(i32 1, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svbfloat16x4_t test_svreadz_hor_za16_bf16_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -1000,36 +532,14 @@ svbfloat16x4_t test_svreadz_hor_za16_bf16_x4(uint32_t slice) __arm_streaming __a // CHECK-LABEL: define dso_local { , , , } @test_svreadz_hor_za32_s32_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv4i32(i32 0, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z28test_svreadz_hor_za32_s32_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv4i32(i32 0, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint32x4_t test_svreadz_hor_za32_s32_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -1039,36 +549,14 @@ svint32x4_t test_svreadz_hor_za32_s32_x4(uint32_t slice) __arm_streaming __arm_i // CHECK-LABEL: define dso_local { , , , } @test_svreadz_hor_za32_u32_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv4i32(i32 2, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z28test_svreadz_hor_za32_u32_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv4i32(i32 2, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint32x4_t test_svreadz_hor_za32_u32_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -1078,36 +566,14 @@ svuint32x4_t test_svreadz_hor_za32_u32_x4(uint32_t slice) __arm_streaming __arm_ // CHECK-LABEL: define dso_local { , , , } @test_svreadz_hor_za32_f32_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv4f32(i32 3, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z28test_svreadz_hor_za32_f32_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv4f32(i32 3, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svreadz_hor_za32_f32_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -1117,36 +583,14 @@ svfloat32x4_t test_svreadz_hor_za32_f32_x4(uint32_t slice) __arm_streaming __arm // CHECK-LABEL: define dso_local { , , , } @test_svreadz_hor_za64_s64_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv2i64(i32 0, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z28test_svreadz_hor_za64_s64_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv2i64(i32 0, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint64x4_t test_svreadz_hor_za64_s64_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -1156,36 +600,14 @@ svint64x4_t test_svreadz_hor_za64_s64_x4(uint32_t slice) __arm_streaming __arm_i // CHECK-LABEL: define dso_local { , , , } @test_svreadz_hor_za64_u64_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv2i64(i32 4, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z28test_svreadz_hor_za64_u64_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv2i64(i32 4, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint64x4_t test_svreadz_hor_za64_u64_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -1195,36 +617,14 @@ svuint64x4_t test_svreadz_hor_za64_u64_x4(uint32_t slice) __arm_streaming __arm_ // CHECK-LABEL: define dso_local { , , , } @test_svreadz_hor_za64_f64_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv2f64(i32 7, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z28test_svreadz_hor_za64_f64_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv2f64(i32 7, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat64x4_t test_svreadz_hor_za64_f64_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -1236,36 +636,14 @@ svfloat64x4_t test_svreadz_hor_za64_f64_x4(uint32_t slice) __arm_streaming __arm // CHECK-LABEL: define dso_local { , , , } @test_svreadz_ver_za8_s8_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv16i8(i32 0, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z26test_svreadz_ver_za8_s8_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv16i8(i32 0, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint8x4_t test_svreadz_ver_za8_s8_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -1275,36 +653,14 @@ svint8x4_t test_svreadz_ver_za8_s8_x4(uint32_t slice) __arm_streaming __arm_inou // CHECK-LABEL: define dso_local { , , , } @test_svreadz_ver_za8_u8_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv16i8(i32 0, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z26test_svreadz_ver_za8_u8_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv16i8(i32 0, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint8x4_t test_svreadz_ver_za8_u8_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -1314,36 +670,14 @@ svuint8x4_t test_svreadz_ver_za8_u8_x4(uint32_t slice) __arm_streaming __arm_ino // CHECK-LABEL: define dso_local { , , , } @test_svreadz_ver_za16_s16_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv8i16(i32 0, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z28test_svreadz_ver_za16_s16_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv8i16(i32 0, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint16x4_t test_svreadz_ver_za16_s16_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -1353,36 +687,14 @@ svint16x4_t test_svreadz_ver_za16_s16_x4(uint32_t slice) __arm_streaming __arm_i // CHECK-LABEL: define dso_local { , , , } @test_svreadz_ver_za16_u16_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv8i16(i32 1, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z28test_svreadz_ver_za16_u16_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv8i16(i32 1, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint16x4_t test_svreadz_ver_za16_u16_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -1392,36 +704,14 @@ svuint16x4_t test_svreadz_ver_za16_u16_x4(uint32_t slice) __arm_streaming __arm_ // CHECK-LABEL: define dso_local { , , , } @test_svreadz_ver_za16_f16_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv8f16(i32 0, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z28test_svreadz_ver_za16_f16_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv8f16(i32 0, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat16x4_t test_svreadz_ver_za16_f16_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -1431,36 +721,14 @@ svfloat16x4_t test_svreadz_ver_za16_f16_x4(uint32_t slice) __arm_streaming __arm // CHECK-LABEL: define dso_local { , , , } @test_svreadz_ver_za16_bf16_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv8bf16(i32 1, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z29test_svreadz_ver_za16_bf16_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv8bf16(i32 1, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svbfloat16x4_t test_svreadz_ver_za16_bf16_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -1471,36 +739,14 @@ svbfloat16x4_t test_svreadz_ver_za16_bf16_x4(uint32_t slice) __arm_streaming __a // CHECK-LABEL: define dso_local { , , , } @test_svreadz_ver_za32_s32_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv4i32(i32 0, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z28test_svreadz_ver_za32_s32_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv4i32(i32 0, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint32x4_t test_svreadz_ver_za32_s32_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -1510,36 +756,14 @@ svint32x4_t test_svreadz_ver_za32_s32_x4(uint32_t slice) __arm_streaming __arm_i // CHECK-LABEL: define dso_local { , , , } @test_svreadz_ver_za32_u32_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv4i32(i32 2, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z28test_svreadz_ver_za32_u32_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv4i32(i32 2, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint32x4_t test_svreadz_ver_za32_u32_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -1549,36 +773,14 @@ svuint32x4_t test_svreadz_ver_za32_u32_x4(uint32_t slice) __arm_streaming __arm_ // CHECK-LABEL: define dso_local { , , , } @test_svreadz_ver_za32_f32_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv4f32(i32 3, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z28test_svreadz_ver_za32_f32_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv4f32(i32 3, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svreadz_ver_za32_f32_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -1588,36 +790,14 @@ svfloat32x4_t test_svreadz_ver_za32_f32_x4(uint32_t slice) __arm_streaming __arm // CHECK-LABEL: define dso_local { , , , } @test_svreadz_ver_za64_s64_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv2i64(i32 0, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z28test_svreadz_ver_za64_s64_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv2i64(i32 0, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint64x4_t test_svreadz_ver_za64_s64_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -1627,36 +807,14 @@ svint64x4_t test_svreadz_ver_za64_s64_x4(uint32_t slice) __arm_streaming __arm_i // CHECK-LABEL: define dso_local { , , , } @test_svreadz_ver_za64_u64_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv2i64(i32 4, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z28test_svreadz_ver_za64_u64_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv2i64(i32 4, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint64x4_t test_svreadz_ver_za64_u64_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -1666,36 +824,14 @@ svuint64x4_t test_svreadz_ver_za64_u64_x4(uint32_t slice) __arm_streaming __arm_ // CHECK-LABEL: define dso_local { , , , } @test_svreadz_ver_za64_f64_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv2f64(i32 7, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z28test_svreadz_ver_za64_f64_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv2f64(i32 7, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat64x4_t test_svreadz_ver_za64_f64_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -2120,28 +1256,14 @@ svfloat64_t test_svreadz_hor_za128_f64(uint32_t slice) __arm_streaming __arm_ino // CHECK-LABEL: define dso_local { , } @test_svreadz_za8_s8_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv16i8(i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svreadz_za8_s8_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv16i8(i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint8x2_t test_svreadz_za8_s8_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -2151,28 +1273,14 @@ svint8x2_t test_svreadz_za8_s8_x2(uint32_t slice) __arm_streaming __arm_inout("z // CHECK-LABEL: define dso_local { , } @test_svreadz_za8_u8_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv16i8(i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svreadz_za8_u8_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv16i8(i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint8x2_t test_svreadz_za8_u8_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -2182,28 +1290,14 @@ svuint8x2_t test_svreadz_za8_u8_x2(uint32_t slice) __arm_streaming __arm_inout(" // CHECK-LABEL: define dso_local { , } @test_svreadz_za16_s16_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv8i16(i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z24test_svreadz_za16_s16_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv8i16(i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint16x2_t test_svreadz_za16_s16_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -2213,28 +1307,14 @@ svint16x2_t test_svreadz_za16_s16_x2(uint32_t slice) __arm_streaming __arm_inout // CHECK-LABEL: define dso_local { , } @test_svreadz_za16_u16_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv8i16(i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z24test_svreadz_za16_u16_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv8i16(i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint16x2_t test_svreadz_za16_u16_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -2244,28 +1324,14 @@ svuint16x2_t test_svreadz_za16_u16_x2(uint32_t slice) __arm_streaming __arm_inou // CHECK-LABEL: define dso_local { , } @test_svreadz_za32_s32_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv4i32(i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z24test_svreadz_za32_s32_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv4i32(i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint32x2_t test_svreadz_za32_s32_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -2275,28 +1341,14 @@ svint32x2_t test_svreadz_za32_s32_x2(uint32_t slice) __arm_streaming __arm_inout // CHECK-LABEL: define dso_local { , } @test_svreadz_za32_u32_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv4i32(i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z24test_svreadz_za32_u32_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv4i32(i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint32x2_t test_svreadz_za32_u32_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -2306,28 +1358,14 @@ svuint32x2_t test_svreadz_za32_u32_x2(uint32_t slice) __arm_streaming __arm_inou // CHECK-LABEL: define dso_local { , } @test_svreadz_za64_s64_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv2i64(i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z24test_svreadz_za64_s64_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv2i64(i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint64x2_t test_svreadz_za64_s64_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -2337,28 +1375,14 @@ svint64x2_t test_svreadz_za64_s64_x2(uint32_t slice) __arm_streaming __arm_inout // CHECK-LABEL: define dso_local { , } @test_svreadz_za64_u64_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv2i64(i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z24test_svreadz_za64_u64_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv2i64(i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint64x2_t test_svreadz_za64_u64_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -2368,28 +1392,14 @@ svuint64x2_t test_svreadz_za64_u64_x2(uint32_t slice) __arm_streaming __arm_inou // CHECK-LABEL: define dso_local { , } @test_svreadz_za16_bf16_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv8bf16(i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z25test_svreadz_za16_bf16_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv8bf16(i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svbfloat16x2_t test_svreadz_za16_bf16_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -2399,28 +1409,14 @@ svbfloat16x2_t test_svreadz_za16_bf16_x2(uint32_t slice) __arm_streaming __arm_i // CHECK-LABEL: define dso_local { , } @test_svreadz_za16_f16_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv8f16(i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z24test_svreadz_za16_f16_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv8f16(i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat16x2_t test_svreadz_za16_f16_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -2430,28 +1426,14 @@ svfloat16x2_t test_svreadz_za16_f16_x2(uint32_t slice) __arm_streaming __arm_ino // CHECK-LABEL: define dso_local { , } @test_svreadz_za32_f32_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv4f32(i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z24test_svreadz_za32_f32_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv4f32(i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svreadz_za32_f32_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -2461,28 +1443,14 @@ svfloat32x2_t test_svreadz_za32_f32_x2(uint32_t slice) __arm_streaming __arm_ino // CHECK-LABEL: define dso_local { , } @test_svreadz_za64_f64_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv2f64(i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z24test_svreadz_za64_f64_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv2f64(i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat64x2_t test_svreadz_za64_f64_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -2496,36 +1464,14 @@ svfloat64x2_t test_svreadz_za64_f64_x2(uint32_t slice) __arm_streaming __arm_ino // CHECK-LABEL: define dso_local { , , , } @test_svreadz_za8_s8_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv16i8(i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z22test_svreadz_za8_s8_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv16i8(i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint8x4_t test_svreadz_za8_s8_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -2535,36 +1481,14 @@ svint8x4_t test_svreadz_za8_s8_x4(uint32_t slice) __arm_streaming __arm_inout("z // CHECK-LABEL: define dso_local { , , , } @test_svreadz_za8_u8_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv16i8(i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z22test_svreadz_za8_u8_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv16i8(i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint8x4_t test_svreadz_za8_u8_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -2574,36 +1498,14 @@ svuint8x4_t test_svreadz_za8_u8_x4(uint32_t slice) __arm_streaming __arm_inout(" // CHECK-LABEL: define dso_local { , , , } @test_svreadz_za16_s16_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv8i16(i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z24test_svreadz_za16_s16_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv8i16(i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint16x4_t test_svreadz_za16_s16_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -2613,36 +1515,14 @@ svint16x4_t test_svreadz_za16_s16_x4(uint32_t slice) __arm_streaming __arm_inout // CHECK-LABEL: define dso_local { , , , } @test_svreadz_za16_u16_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv8i16(i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z24test_svreadz_za16_u16_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv8i16(i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint16x4_t test_svreadz_za16_u16_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -2652,36 +1532,14 @@ svuint16x4_t test_svreadz_za16_u16_x4(uint32_t slice) __arm_streaming __arm_inou // CHECK-LABEL: define dso_local { , , , } @test_svreadz_za32_s32_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv4i32(i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z24test_svreadz_za32_s32_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv4i32(i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint32x4_t test_svreadz_za32_s32_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -2691,36 +1549,14 @@ svint32x4_t test_svreadz_za32_s32_x4(uint32_t slice) __arm_streaming __arm_inout // CHECK-LABEL: define dso_local { , , , } @test_svreadz_za32_u32_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv4i32(i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z24test_svreadz_za32_u32_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv4i32(i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint32x4_t test_svreadz_za32_u32_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -2730,36 +1566,14 @@ svuint32x4_t test_svreadz_za32_u32_x4(uint32_t slice) __arm_streaming __arm_inou // CHECK-LABEL: define dso_local { , , , } @test_svreadz_za64_s64_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv2i64(i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z24test_svreadz_za64_s64_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv2i64(i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint64x4_t test_svreadz_za64_s64_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -2769,36 +1583,14 @@ svint64x4_t test_svreadz_za64_s64_x4(uint32_t slice) __arm_streaming __arm_inout // CHECK-LABEL: define dso_local { , , , } @test_svreadz_za64_u64_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv2i64(i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z24test_svreadz_za64_u64_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv2i64(i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint64x4_t test_svreadz_za64_u64_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -2808,36 +1600,14 @@ svuint64x4_t test_svreadz_za64_u64_x4(uint32_t slice) __arm_streaming __arm_inou // CHECK-LABEL: define dso_local { , , , } @test_svreadz_za16_bf16_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv8bf16(i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z25test_svreadz_za16_bf16_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv8bf16(i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svbfloat16x4_t test_svreadz_za16_bf16_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -2847,36 +1617,14 @@ svbfloat16x4_t test_svreadz_za16_bf16_x4(uint32_t slice) __arm_streaming __arm_i // CHECK-LABEL: define dso_local { , , , } @test_svreadz_za16_f16_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv8f16(i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z24test_svreadz_za16_f16_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv8f16(i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat16x4_t test_svreadz_za16_f16_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -2886,36 +1634,14 @@ svfloat16x4_t test_svreadz_za16_f16_x4(uint32_t slice) __arm_streaming __arm_ino // CHECK-LABEL: define dso_local { , , , } @test_svreadz_za32_f32_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv4f32(i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z24test_svreadz_za32_f32_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv4f32(i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svreadz_za32_f32_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -2925,36 +1651,14 @@ svfloat32x4_t test_svreadz_za32_f32_x4(uint32_t slice) __arm_streaming __arm_ino // CHECK-LABEL: define dso_local { , , , } @test_svreadz_za64_f64_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv2f64(i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z24test_svreadz_za64_f64_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv2f64(i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat64x4_t test_svreadz_za64_f64_x4(uint32_t slice) __arm_streaming __arm_inout("za") { diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_pext.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_pext.c index 6cea34ee52ef6..deb126236ad57 100644 --- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_pext.c +++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_pext.c @@ -141,27 +141,13 @@ svbool_t test_svpext_lane_c64_3(svcount_t c) ATTR { // CHECK-LABEL: @test_svpext_lane_c8_x2_0( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 8 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.pext.x2.nxv16i1(target("aarch64.svcount") [[C:%.*]], i32 0) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 8 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 8 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svpext_lane_c8_x2_0u11__SVCount_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 8 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.pext.x2.nxv16i1(target("aarch64.svcount") [[C:%.*]], i32 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 8 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 8 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svboolx2_t test_svpext_lane_c8_x2_0(svcount_t c) ATTR { return svpext_lane_c8_x2(c, 0); @@ -169,27 +155,13 @@ svboolx2_t test_svpext_lane_c8_x2_0(svcount_t c) ATTR { // CHECK-LABEL: @test_svpext_lane_c8_x2_1( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 8 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.pext.x2.nxv16i1(target("aarch64.svcount") [[C:%.*]], i32 1) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 8 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 8 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svpext_lane_c8_x2_1u11__SVCount_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 8 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.pext.x2.nxv16i1(target("aarch64.svcount") [[C:%.*]], i32 1) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 8 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 8 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svboolx2_t test_svpext_lane_c8_x2_1(svcount_t c) ATTR { return svpext_lane_c8_x2(c, 1); @@ -197,31 +169,25 @@ svboolx2_t test_svpext_lane_c8_x2_1(svcount_t c) ATTR { // CHECK-LABEL: @test_svpext_lane_c16_x2_0( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 8 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.pext.x2.nxv8i1(target("aarch64.svcount") [[C:%.*]], i32 0) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 8 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 8 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: @_Z25test_svpext_lane_c16_x2_0u11__SVCount_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 8 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.pext.x2.nxv8i1(target("aarch64.svcount") [[C:%.*]], i32 0) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 8 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 8 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svpext_lane_c16_x2_0(svcount_t c) ATTR { return svpext_lane_c16_x2(c, 0); @@ -229,31 +195,25 @@ svboolx2_t test_svpext_lane_c16_x2_0(svcount_t c) ATTR { // CHECK-LABEL: @test_svpext_lane_c16_x2_1( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 8 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.pext.x2.nxv8i1(target("aarch64.svcount") [[C:%.*]], i32 1) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 8 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 8 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: @_Z25test_svpext_lane_c16_x2_1u11__SVCount_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 8 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.pext.x2.nxv8i1(target("aarch64.svcount") [[C:%.*]], i32 1) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 8 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 8 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svpext_lane_c16_x2_1(svcount_t c) ATTR { return svpext_lane_c16_x2(c, 1); @@ -261,31 +221,25 @@ svboolx2_t test_svpext_lane_c16_x2_1(svcount_t c) ATTR { // CHECK-LABEL: @test_svpext_lane_c32_x2_0( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 8 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.pext.x2.nxv4i1(target("aarch64.svcount") [[C:%.*]], i32 0) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 8 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 8 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: @_Z25test_svpext_lane_c32_x2_0u11__SVCount_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 8 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.pext.x2.nxv4i1(target("aarch64.svcount") [[C:%.*]], i32 0) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 8 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 8 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svpext_lane_c32_x2_0(svcount_t c) ATTR { return svpext_lane_c32_x2(c, 0); @@ -293,31 +247,25 @@ svboolx2_t test_svpext_lane_c32_x2_0(svcount_t c) ATTR { // CHECK-LABEL: @test_svpext_lane_c32_x2_1( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 8 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.pext.x2.nxv4i1(target("aarch64.svcount") [[C:%.*]], i32 1) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 8 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 8 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: @_Z25test_svpext_lane_c32_x2_1u11__SVCount_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 8 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.pext.x2.nxv4i1(target("aarch64.svcount") [[C:%.*]], i32 1) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 8 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 8 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svpext_lane_c32_x2_1(svcount_t c) ATTR { return svpext_lane_c32_x2(c, 1); @@ -325,31 +273,25 @@ svboolx2_t test_svpext_lane_c32_x2_1(svcount_t c) ATTR { // CHECK-LABEL: @test_svpext_lane_c64_x2_0( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 8 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.pext.x2.nxv2i1(target("aarch64.svcount") [[C:%.*]], i32 0) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 8 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 8 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: @_Z25test_svpext_lane_c64_x2_0u11__SVCount_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 8 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.pext.x2.nxv2i1(target("aarch64.svcount") [[C:%.*]], i32 0) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 8 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 8 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svpext_lane_c64_x2_0(svcount_t c) ATTR { return svpext_lane_c64_x2(c, 0); @@ -357,31 +299,25 @@ svboolx2_t test_svpext_lane_c64_x2_0(svcount_t c) ATTR { // CHECK-LABEL: @test_svpext_lane_c64_x2_1( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 8 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.pext.x2.nxv2i1(target("aarch64.svcount") [[C:%.*]], i32 1) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 8 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 8 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: @_Z25test_svpext_lane_c64_x2_1u11__SVCount_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 8 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.pext.x2.nxv2i1(target("aarch64.svcount") [[C:%.*]], i32 1) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 8 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 8 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svpext_lane_c64_x2_1(svcount_t c) ATTR { return svpext_lane_c64_x2(c, 1); diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_while_x2.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_while_x2.c index 3fcc1dc6c819a..612f2d25d40d0 100644 --- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_while_x2.c +++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_while_x2.c @@ -23,28 +23,14 @@ // CHECK-LABEL: define dso_local { , } @test_svwhilege_b8_s64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilege.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z21test_svwhilege_b8_s64ll( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0:[0-9]+]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilege.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svboolx2_t test_svwhilege_b8_s64(int64_t op1, int64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilege_b8,_s64,_x2)(op1, op2); @@ -53,28 +39,14 @@ svboolx2_t test_svwhilege_b8_s64(int64_t op1, int64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilege_b8_u64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilehs.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z21test_svwhilege_b8_u64mm( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilehs.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svboolx2_t test_svwhilege_b8_u64(uint64_t op1, uint64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilege_b8,_u64,_x2)(op1, op2); @@ -83,32 +55,26 @@ svboolx2_t test_svwhilege_b8_u64(uint64_t op1, uint64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilege_b16_s64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilege.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilege_b16_s64ll( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilege.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svwhilege_b16_s64(int64_t op1, int64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilege_b16,_s64,_x2)(op1, op2); @@ -117,32 +83,26 @@ svboolx2_t test_svwhilege_b16_s64(int64_t op1, int64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilege_b16_u64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilehs.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilege_b16_u64mm( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilehs.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svwhilege_b16_u64(uint64_t op1, uint64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilege_b16,_u64,_x2)(op1, op2); @@ -151,32 +111,26 @@ svboolx2_t test_svwhilege_b16_u64(uint64_t op1, uint64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilege_b32_s64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilege.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilege_b32_s64ll( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilege.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svwhilege_b32_s64(int64_t op1, int64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilege_b32,_s64,_x2)(op1, op2); @@ -185,32 +139,26 @@ svboolx2_t test_svwhilege_b32_s64(int64_t op1, int64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilege_b32_u64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilehs.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilege_b32_u64mm( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilehs.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svwhilege_b32_u64(uint64_t op1, uint64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilege_b32,_u64,_x2)(op1, op2); @@ -219,32 +167,26 @@ svboolx2_t test_svwhilege_b32_u64(uint64_t op1, uint64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilege_b64_s64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilege.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilege_b64_s64ll( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilege.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svwhilege_b64_s64(int64_t op1, int64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilege_b64,_s64,_x2)(op1, op2); @@ -253,32 +195,26 @@ svboolx2_t test_svwhilege_b64_s64(int64_t op1, int64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilege_b64_u64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilehs.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilege_b64_u64mm( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilehs.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svwhilege_b64_u64(uint64_t op1, uint64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilege_b64,_u64,_x2)(op1, op2); @@ -287,28 +223,14 @@ svboolx2_t test_svwhilege_b64_u64(uint64_t op1, uint64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilegt_b8_s64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilegt.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z21test_svwhilegt_b8_s64ll( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilegt.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svboolx2_t test_svwhilegt_b8_s64(int64_t op1, int64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilegt_b8,_s64,_x2)(op1, op2); @@ -317,28 +239,14 @@ svboolx2_t test_svwhilegt_b8_s64(int64_t op1, int64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilegt_b8_u64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilehi.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z21test_svwhilegt_b8_u64mm( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilehi.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svboolx2_t test_svwhilegt_b8_u64(uint64_t op1, uint64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilegt_b8,_u64,_x2)(op1, op2); @@ -347,32 +255,26 @@ svboolx2_t test_svwhilegt_b8_u64(uint64_t op1, uint64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilegt_b16_s64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilegt.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilegt_b16_s64ll( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilegt.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svwhilegt_b16_s64(int64_t op1, int64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilegt_b16,_s64,_x2)(op1, op2); @@ -381,32 +283,26 @@ svboolx2_t test_svwhilegt_b16_s64(int64_t op1, int64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilegt_b16_u64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilehi.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilegt_b16_u64mm( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilehi.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svwhilegt_b16_u64(uint64_t op1, uint64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilegt_b16,_u64,_x2)(op1, op2); @@ -415,32 +311,26 @@ svboolx2_t test_svwhilegt_b16_u64(uint64_t op1, uint64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilegt_b32_s64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilegt.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilegt_b32_s64ll( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilegt.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svwhilegt_b32_s64(int64_t op1, int64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilegt_b32,_s64,_x2)(op1, op2); @@ -449,32 +339,26 @@ svboolx2_t test_svwhilegt_b32_s64(int64_t op1, int64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilegt_b32_u64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilehi.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilegt_b32_u64mm( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilehi.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svwhilegt_b32_u64(uint64_t op1, uint64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilegt_b32,_u64,_x2)(op1, op2); @@ -483,32 +367,26 @@ svboolx2_t test_svwhilegt_b32_u64(uint64_t op1, uint64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilegt_b64_s64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilegt.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilegt_b64_s64ll( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilegt.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svwhilegt_b64_s64(int64_t op1, int64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilegt_b64,_s64,_x2)(op1, op2); @@ -517,32 +395,26 @@ svboolx2_t test_svwhilegt_b64_s64(int64_t op1, int64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilegt_b64_u64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilehi.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilegt_b64_u64mm( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilehi.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svwhilegt_b64_u64(uint64_t op1, uint64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilegt_b64,_u64,_x2)(op1, op2); @@ -551,28 +423,14 @@ svboolx2_t test_svwhilegt_b64_u64(uint64_t op1, uint64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilele_b8_s64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilele.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z21test_svwhilele_b8_s64ll( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilele.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svboolx2_t test_svwhilele_b8_s64(int64_t op1, int64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilele_b8,_s64,_x2)(op1, op2); @@ -581,28 +439,14 @@ svboolx2_t test_svwhilele_b8_s64(int64_t op1, int64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilele_b8_u64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilels.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z21test_svwhilele_b8_u64mm( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilels.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svboolx2_t test_svwhilele_b8_u64(uint64_t op1, uint64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilele_b8,_u64,_x2)(op1, op2); @@ -611,32 +455,26 @@ svboolx2_t test_svwhilele_b8_u64(uint64_t op1, uint64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilele_b16_s64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilele.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilele_b16_s64ll( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilele.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svwhilele_b16_s64(int64_t op1, int64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilele_b16,_s64,_x2)(op1, op2); @@ -645,32 +483,26 @@ svboolx2_t test_svwhilele_b16_s64(int64_t op1, int64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilele_b16_u64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilels.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilele_b16_u64mm( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilels.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svwhilele_b16_u64(uint64_t op1, uint64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilele_b16,_u64,_x2)(op1, op2); @@ -679,32 +511,26 @@ svboolx2_t test_svwhilele_b16_u64(uint64_t op1, uint64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilele_b32_s64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilele.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilele_b32_s64ll( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilele.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svwhilele_b32_s64(int64_t op1, int64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilele_b32,_s64,_x2)(op1, op2); @@ -713,32 +539,26 @@ svboolx2_t test_svwhilele_b32_s64(int64_t op1, int64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilele_b32_u64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilels.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilele_b32_u64mm( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilels.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svwhilele_b32_u64(uint64_t op1, uint64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilele_b32,_u64,_x2)(op1, op2); @@ -747,32 +567,26 @@ svboolx2_t test_svwhilele_b32_u64(uint64_t op1, uint64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilele_b64_s64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilele.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilele_b64_s64ll( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilele.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svwhilele_b64_s64(int64_t op1, int64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilele_b64,_s64,_x2)(op1, op2); @@ -781,32 +595,26 @@ svboolx2_t test_svwhilele_b64_s64(int64_t op1, int64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilele_b64_u64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilels.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilele_b64_u64mm( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilels.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svwhilele_b64_u64(uint64_t op1, uint64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilele_b64,_u64,_x2)(op1, op2); @@ -815,28 +623,14 @@ svboolx2_t test_svwhilele_b64_u64(uint64_t op1, uint64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilelt_b8_s64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilelt.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z21test_svwhilelt_b8_s64ll( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilelt.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svboolx2_t test_svwhilelt_b8_s64(int64_t op1, int64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilelt_b8,_s64,_x2)(op1, op2); @@ -845,28 +639,14 @@ svboolx2_t test_svwhilelt_b8_s64(int64_t op1, int64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilelt_b8_u64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilelo.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z21test_svwhilelt_b8_u64mm( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilelo.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svboolx2_t test_svwhilelt_b8_u64(uint64_t op1, uint64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilelt_b8,_u64,_x2)(op1, op2); @@ -875,32 +655,26 @@ svboolx2_t test_svwhilelt_b8_u64(uint64_t op1, uint64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilelt_b16_s64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilelt.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilelt_b16_s64ll( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilelt.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svwhilelt_b16_s64(int64_t op1, int64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilelt_b16,_s64,_x2)(op1, op2); @@ -909,32 +683,26 @@ svboolx2_t test_svwhilelt_b16_s64(int64_t op1, int64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilelt_b16_u64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilelo.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilelt_b16_u64mm( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilelo.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svwhilelt_b16_u64(uint64_t op1, uint64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilelt_b16,_u64,_x2)(op1, op2); @@ -943,32 +711,26 @@ svboolx2_t test_svwhilelt_b16_u64(uint64_t op1, uint64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilelt_b32_s64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilelt.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilelt_b32_s64ll( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilelt.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svwhilelt_b32_s64(int64_t op1, int64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilelt_b32,_s64,_x2)(op1, op2); @@ -977,32 +739,26 @@ svboolx2_t test_svwhilelt_b32_s64(int64_t op1, int64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilelt_b32_u64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilelo.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilelt_b32_u64mm( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilelo.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svwhilelt_b32_u64(uint64_t op1, uint64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilelt_b32,_u64,_x2)(op1, op2); @@ -1011,32 +767,26 @@ svboolx2_t test_svwhilelt_b32_u64(uint64_t op1, uint64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilelt_b64_s64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilelt.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilelt_b64_s64ll( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilelt.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svwhilelt_b64_s64(int64_t op1, int64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilelt_b64,_s64,_x2)(op1, op2); @@ -1045,32 +795,26 @@ svboolx2_t test_svwhilelt_b64_s64(int64_t op1, int64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilelt_b64_u64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilelo.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilelt_b64_u64mm( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilelo.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svwhilelt_b64_u64(uint64_t op1, uint64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilelt_b64,_u64,_x2)(op1, op2); diff --git a/clang/test/CodeGen/bpf-attr-type-tag-atomic.c b/clang/test/CodeGen/bpf-attr-type-tag-atomic.c new file mode 100644 index 0000000000000..a10a45dc0808d --- /dev/null +++ b/clang/test/CodeGen/bpf-attr-type-tag-atomic.c @@ -0,0 +1,16 @@ +// REQUIRES: bpf-registered-target +// RUN: %clang_cc1 -triple bpf -emit-llvm -disable-llvm-passes -debug-info-kind=limited %s -o - | FileCheck %s + +#define __tag1 __attribute__((btf_type_tag("tag1"))) +int _Atomic __tag1 *g1; +volatile int _Atomic __tag1 *g2; + +// CHECK: distinct !DIGlobalVariable(name: "g1", scope: ![[#]], file: ![[#]], line: [[#]], type: ![[PTR1:[0-9]+]] +// CHECK: distinct !DIGlobalVariable(name: "g2", scope: ![[#]], file: ![[#]], line: [[#]], type: ![[PTR2:[0-9]+]] +// CHECK: ![[PTR2]] = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: ![[BASE2:[0-9]+]], size: [[#]], annotations: ![[ANNOT:[0-9]+]]) +// CHECK: ![[BASE2]] = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: ![[BASE1:[0-9]+]]) +// CHECK: ![[BASE1]] = !DIDerivedType(tag: DW_TAG_atomic_type, baseType: ![[BASIC:[0-9]+]]) +// CHECK: ![[BASIC]] = !DIBasicType(name: "int", size: [[#]], encoding: DW_ATE_signed) +// CHECK: ![[ANNOT]] = !{![[ENTRY:[0-9]+]]} +// CHECK: ![[ENTRY]] = !{!"btf_type_tag", !"tag1"} +// CHECK: ![[PTR1]] = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: ![[BASE1]], size: [[#]], annotations: ![[ANNOT]]) diff --git a/clang/test/CodeGen/inline-asm-output-variant.c b/clang/test/CodeGen/inline-asm-output-variant.c new file mode 100644 index 0000000000000..376a876754034 --- /dev/null +++ b/clang/test/CodeGen/inline-asm-output-variant.c @@ -0,0 +1,26 @@ +// REQUIRES: x86-registered-target +/// AT&T input +// RUN: %clang_cc1 -triple x86_64 -S --output-asm-variant=0 %s -o - | FileCheck --check-prefix=ATT %s +// RUN: %clang_cc1 -triple x86_64 -S --output-asm-variant=1 %s -o - | FileCheck --check-prefix=INTEL %s + +/// Intel input +// RUN: %clang_cc1 -triple x86_64 -S -D INTEL -mllvm -x86-asm-syntax=intel -inline-asm=intel %s -o - | FileCheck --check-prefix=INTEL %s +// RUN: %clang_cc1 -triple x86_64 -S -D INTEL -mllvm -x86-asm-syntax=intel -inline-asm=intel --output-asm-variant=1 %s -o - | FileCheck --check-prefix=INTEL %s + +// ATT: movl $1, %eax +// ATT: movl $2, %eax + +// INTEL: mov eax, 1 +// INTEL: mov eax, 2 + +#ifdef INTEL +asm("mov eax, 1"); +void foo() { + asm("mov eax, 2"); +} +#else +asm("mov $1, %eax"); +void foo() { + asm("mov $2, %eax"); +} +#endif diff --git a/clang/test/CodeGen/math-libcalls-tbaa-indirect-args.c b/clang/test/CodeGen/math-libcalls-tbaa-indirect-args.c new file mode 100644 index 0000000000000..b94f9641decc8 --- /dev/null +++ b/clang/test/CodeGen/math-libcalls-tbaa-indirect-args.c @@ -0,0 +1,250 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --filter "(@powl|@cargl|@ilogbl|!|load|store)" --version 5 +// RUN: %clang_cc1 %s -O3 -fmath-errno -emit-llvm -triple x86_64-unknown-unknown -o - | FileCheck %s -check-prefixes=CHECK +// RUN: %clang_cc1 %s -O3 -fmath-errno -emit-llvm -triple x86_64-pc-win64 -o - | FileCheck %s -check-prefixes=CHECK-WIN64 +// RUN: %clang_cc1 %s -O3 -fmath-errno -emit-llvm -triple i686-unknown-unknown -o - | FileCheck %s -check-prefixes=CHECK-I686 +// RUN: %clang_cc1 %s -O3 -fmath-errno -emit-llvm -triple powerpc-unknown-unknown -o - | FileCheck %s -check-prefixes=CHECK-PPC +// RUN: %clang_cc1 %s -O3 -fmath-errno -emit-llvm -triple armv7-none-linux-gnueabi -o - | FileCheck %s -check-prefixes=CHECK-ARM +// RUN: %clang_cc1 %s -O3 -fmath-errno -emit-llvm -triple armv7-none-linux-gnueabihf -o - | FileCheck %s -check-prefixes=CHECK-ARM-HF +// RUN: %clang_cc1 %s -O3 -fmath-errno -emit-llvm -triple thumbv7k-apple-watchos2.0 -o - -target-abi aapcs16 | FileCheck %s -check-prefixes=CHECK-THUMB +// RUN: %clang_cc1 %s -O3 -fmath-errno -emit-llvm -triple aarch64-unknown-unknown -o - | FileCheck %s -check-prefixes=CHECK-AARCH +// RUN: %clang_cc1 %s -O3 -fmath-errno -emit-llvm -triple spir -o - | FileCheck %s -check-prefixes=CHECK-SPIR +// RUN: %clang_cc1 %s -O3 -fmath-errno -emit-llvm -triple x86_64-w64-mingw32 -o - | FileCheck %s -check-prefixes=CHECK-MINGW32 + +// This file checks that if arguments/results are passed indirectly (i.e. via +// pointers), then the "int" TBAA metadata is not set on the FP libcall as this +// can lead to optimizations incorrectly optimizing out the setup for the call. + +long double powl(long double a, long double b); + +// CHECK-LABEL: define dso_local x86_fp80 @test_powl( +// CHECK-SAME: x86_fp80 noundef [[A:%.*]], x86_fp80 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK: [[CALL:%.*]] = tail call x86_fp80 @powl(x86_fp80 noundef [[A]], x86_fp80 noundef [[B]]) #[[ATTR5:[0-9]+]], !tbaa [[TBAA2:![0-9]+]] +// +// CHECK-WIN64-LABEL: define dso_local x86_fp80 @test_powl( +// CHECK-WIN64-SAME: x86_fp80 noundef [[A:%.*]], x86_fp80 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-WIN64: [[CALL:%.*]] = tail call x86_fp80 @powl(x86_fp80 noundef [[A]], x86_fp80 noundef [[B]]) #[[ATTR5:[0-9]+]], !tbaa [[TBAA2:![0-9]+]] +// +// CHECK-I686-LABEL: define dso_local x86_fp80 @test_powl( +// CHECK-I686-SAME: x86_fp80 noundef [[A:%.*]], x86_fp80 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-I686: [[CALL:%.*]] = tail call x86_fp80 @powl(x86_fp80 noundef [[A]], x86_fp80 noundef [[B]]) #[[ATTR5:[0-9]+]], !tbaa [[TBAA3:![0-9]+]] +// +// CHECK-PPC-LABEL: define dso_local ppc_fp128 @test_powl( +// CHECK-PPC-SAME: ppc_fp128 noundef [[A:%.*]], ppc_fp128 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-PPC: [[CALL:%.*]] = tail call ppc_fp128 @powl(ppc_fp128 noundef [[A]], ppc_fp128 noundef [[B]]) #[[ATTR3:[0-9]+]], !tbaa [[TBAA2:![0-9]+]] +// +// CHECK-ARM-LABEL: define dso_local double @test_powl( +// CHECK-ARM-SAME: double noundef [[A:%.*]], double noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-ARM: [[CALL:%.*]] = tail call double @powl(double noundef [[A]], double noundef [[B]]) #[[ATTR2:[0-9]+]], !tbaa [[TBAA3:![0-9]+]] +// +// CHECK-ARM-HF-LABEL: define dso_local double @test_powl( +// CHECK-ARM-HF-SAME: double noundef [[A:%.*]], double noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-ARM-HF: [[CALL:%.*]] = tail call double @powl(double noundef [[A]], double noundef [[B]]) #[[ATTR2:[0-9]+]], !tbaa [[TBAA3:![0-9]+]] +// +// CHECK-THUMB-LABEL: define double @test_powl( +// CHECK-THUMB-SAME: double noundef [[A:%.*]], double noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-THUMB: [[CALL:%.*]] = tail call double @powl(double noundef [[A]], double noundef [[B]]) #[[ATTR2:[0-9]+]], !tbaa [[TBAA3:![0-9]+]] +// +// CHECK-AARCH-LABEL: define dso_local fp128 @test_powl( +// CHECK-AARCH-SAME: fp128 noundef [[A:%.*]], fp128 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-AARCH: [[CALL:%.*]] = tail call fp128 @powl(fp128 noundef [[A]], fp128 noundef [[B]]) #[[ATTR2:[0-9]+]], !tbaa [[TBAA2:![0-9]+]] +// +// CHECK-SPIR-LABEL: define dso_local spir_func double @test_powl( +// CHECK-SPIR-SAME: double noundef [[A:%.*]], double noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-SPIR: [[CALL:%.*]] = tail call spir_func double @powl(double noundef [[A]], double noundef [[B]]) #[[ATTR3:[0-9]+]], !tbaa [[TBAA2:![0-9]+]] +// +// CHECK-MINGW32-LABEL: define dso_local void @test_powl( +// CHECK-MINGW32-SAME: ptr dead_on_unwind noalias nocapture writable writeonly sret(x86_fp80) align 16 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly [[TMP0:%.*]], ptr nocapture noundef readonly [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-MINGW32: [[A:%.*]] = load x86_fp80, ptr [[TMP0]], align 16, !tbaa [[TBAA3:![0-9]+]] +// CHECK-MINGW32: [[B:%.*]] = load x86_fp80, ptr [[TMP1]], align 16, !tbaa [[TBAA3]] +// CHECK-MINGW32: store x86_fp80 [[A]], ptr [[BYVAL_TEMP:%.*]], align 16, !tbaa [[TBAA3]] +// CHECK-MINGW32: store x86_fp80 [[B]], ptr [[BYVAL_TEMP1:%.*]], align 16, !tbaa [[TBAA3]] +// CHECK-MINGW32: call void @powl(ptr dead_on_unwind nonnull writable sret(x86_fp80) align 16 [[TMP:%.*]], ptr noundef nonnull [[BYVAL_TEMP]], ptr noundef nonnull [[BYVAL_TEMP1]]) #[[ATTR3:[0-9]+]] +// CHECK-MINGW32: [[TMP2:%.*]] = load x86_fp80, ptr [[TMP]], align 16, !tbaa [[TBAA3]] +// CHECK-MINGW32: store x86_fp80 [[TMP2]], ptr [[AGG_RESULT]], align 16, !tbaa [[TBAA3]] +// +long double test_powl(long double a, long double b) { + return powl(a, b); +} + +// CHECK-LABEL: define dso_local { x86_fp80, x86_fp80 } @test_cargl( +// CHECK-SAME: ptr nocapture noundef readonly byval({ x86_fp80, x86_fp80 }) align 16 [[CLD:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] { +// CHECK: [[CLD_REAL:%.*]] = load x86_fp80, ptr [[CLD]], align 16 +// CHECK: [[CLD_IMAG:%.*]] = load x86_fp80, ptr [[CLD_IMAGP:%.*]], align 16 +// CHECK: store x86_fp80 [[CLD_REAL]], ptr [[BYVAL_TEMP:%.*]], align 16 +// CHECK: store x86_fp80 [[CLD_IMAG]], ptr [[BYVAL_TEMP_IMAGP:%.*]], align 16 +// CHECK: [[CALL:%.*]] = tail call x86_fp80 @cargl(ptr noundef nonnull byval({ x86_fp80, x86_fp80 }) align 16 [[BYVAL_TEMP]]) #[[ATTR5]] +// +// CHECK-WIN64-LABEL: define dso_local { x86_fp80, x86_fp80 } @test_cargl( +// CHECK-WIN64-SAME: ptr nocapture noundef readonly byval({ x86_fp80, x86_fp80 }) align 16 [[CLD:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] { +// CHECK-WIN64: [[CLD_REAL:%.*]] = load x86_fp80, ptr [[CLD]], align 16 +// CHECK-WIN64: [[CLD_IMAG:%.*]] = load x86_fp80, ptr [[CLD_IMAGP:%.*]], align 16 +// CHECK-WIN64: store x86_fp80 [[CLD_REAL]], ptr [[BYVAL_TEMP:%.*]], align 16 +// CHECK-WIN64: store x86_fp80 [[CLD_IMAG]], ptr [[BYVAL_TEMP_IMAGP:%.*]], align 16 +// CHECK-WIN64: [[CALL:%.*]] = tail call x86_fp80 @cargl(ptr noundef nonnull byval({ x86_fp80, x86_fp80 }) align 16 [[BYVAL_TEMP]]) #[[ATTR5]] +// +// CHECK-I686-LABEL: define dso_local void @test_cargl( +// CHECK-I686-SAME: ptr dead_on_unwind noalias nocapture writable writeonly sret({ x86_fp80, x86_fp80 }) align 4 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval({ x86_fp80, x86_fp80 }) align 4 [[CLD:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] { +// CHECK-I686: [[CLD_REAL:%.*]] = load x86_fp80, ptr [[CLD]], align 4 +// CHECK-I686: [[CLD_IMAG:%.*]] = load x86_fp80, ptr [[CLD_IMAGP:%.*]], align 4 +// CHECK-I686: store x86_fp80 [[CLD_REAL]], ptr [[BYVAL_TEMP:%.*]], align 4 +// CHECK-I686: store x86_fp80 [[CLD_IMAG]], ptr [[BYVAL_TEMP_IMAGP:%.*]], align 4 +// CHECK-I686: [[CALL:%.*]] = tail call x86_fp80 @cargl(ptr noundef nonnull byval({ x86_fp80, x86_fp80 }) align 4 [[BYVAL_TEMP]]) #[[ATTR5]] +// CHECK-I686: store x86_fp80 [[MUL_RL:%.*]], ptr [[AGG_RESULT]], align 4 +// CHECK-I686: store x86_fp80 [[MUL_IR:%.*]], ptr [[AGG_RESULT_IMAGP:%.*]], align 4 +// +// CHECK-PPC-LABEL: define dso_local void @test_cargl( +// CHECK-PPC-SAME: ptr dead_on_unwind noalias nocapture writable writeonly sret({ ppc_fp128, ppc_fp128 }) align 16 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval({ ppc_fp128, ppc_fp128 }) align 16 [[CLD:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] { +// CHECK-PPC: [[CLD_REAL:%.*]] = load ppc_fp128, ptr [[CLD]], align 16 +// CHECK-PPC: [[CLD_IMAG:%.*]] = load ppc_fp128, ptr [[CLD_IMAGP:%.*]], align 16 +// CHECK-PPC: store ppc_fp128 [[CLD_REAL]], ptr [[BYVAL_TEMP:%.*]], align 16 +// CHECK-PPC: store ppc_fp128 [[CLD_IMAG]], ptr [[BYVAL_TEMP_IMAGP:%.*]], align 16 +// CHECK-PPC: [[CALL:%.*]] = tail call ppc_fp128 @cargl(ptr noundef nonnull byval({ ppc_fp128, ppc_fp128 }) align 16 [[BYVAL_TEMP]]) #[[ATTR3]] +// CHECK-PPC: store ppc_fp128 [[MUL_RL:%.*]], ptr [[AGG_RESULT]], align 16 +// CHECK-PPC: store ppc_fp128 [[MUL_IR:%.*]], ptr [[AGG_RESULT_IMAGP:%.*]], align 16 +// +// CHECK-ARM-LABEL: define dso_local void @test_cargl( +// CHECK-ARM-SAME: ptr dead_on_unwind noalias nocapture writable writeonly sret({ double, double }) align 8 [[AGG_RESULT:%.*]], [2 x i64] noundef [[CLD_COERCE:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] { +// CHECK-ARM: [[CALL:%.*]] = tail call double @cargl([2 x i64] noundef [[CLD_COERCE]]) #[[ATTR2]], !tbaa [[TBAA3]] +// CHECK-ARM: store double [[MUL_RL:%.*]], ptr [[AGG_RESULT]], align 8 +// CHECK-ARM: store double [[MUL_IR:%.*]], ptr [[AGG_RESULT_IMAGP:%.*]], align 8 +// +// CHECK-ARM-HF-LABEL: define dso_local { double, double } @test_cargl( +// CHECK-ARM-HF-SAME: { double, double } noundef [[CLD_COERCE:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] { +// CHECK-ARM-HF: [[CALL:%.*]] = tail call double @cargl({ double, double } noundef [[CLD_COERCE]]) #[[ATTR2]], !tbaa [[TBAA3]] +// +// CHECK-THUMB-LABEL: define { double, double } @test_cargl( +// CHECK-THUMB-SAME: [2 x double] noundef [[CLD_COERCE:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] { +// CHECK-THUMB: [[CALL:%.*]] = tail call double @cargl([2 x double] noundef [[CLD_COERCE]]) #[[ATTR2]], !tbaa [[TBAA3]] +// +// CHECK-AARCH-LABEL: define dso_local { fp128, fp128 } @test_cargl( +// CHECK-AARCH-SAME: [2 x fp128] noundef alignstack(16) [[CLD_COERCE:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] { +// CHECK-AARCH: [[CALL:%.*]] = tail call fp128 @cargl([2 x fp128] noundef alignstack(16) [[CLD_COERCE]]) #[[ATTR2]], !tbaa [[TBAA2]] +// +// CHECK-SPIR-LABEL: define dso_local spir_func void @test_cargl( +// CHECK-SPIR-SAME: ptr dead_on_unwind noalias nocapture writable writeonly sret({ double, double }) align 8 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval({ double, double }) align 8 [[CLD:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] { +// CHECK-SPIR: [[CLD_REAL:%.*]] = load double, ptr [[CLD]], align 8 +// CHECK-SPIR: [[CLD_IMAG:%.*]] = load double, ptr [[CLD_IMAGP:%.*]], align 8 +// CHECK-SPIR: store double [[CLD_REAL]], ptr [[BYVAL_TEMP:%.*]], align 8 +// CHECK-SPIR: store double [[CLD_IMAG]], ptr [[BYVAL_TEMP_IMAGP:%.*]], align 8 +// CHECK-SPIR: [[CALL:%.*]] = tail call spir_func double @cargl(ptr noundef nonnull byval({ double, double }) align 8 [[BYVAL_TEMP]]) #[[ATTR3]] +// CHECK-SPIR: store double [[MUL_RL:%.*]], ptr [[AGG_RESULT]], align 8 +// CHECK-SPIR: store double [[MUL_IR:%.*]], ptr [[AGG_RESULT_IMAGP:%.*]], align 8 +// +// CHECK-MINGW32-LABEL: define dso_local void @test_cargl( +// CHECK-MINGW32-SAME: ptr dead_on_unwind noalias nocapture writable writeonly sret({ x86_fp80, x86_fp80 }) align 16 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly [[CLD:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-MINGW32: [[CLD_REAL:%.*]] = load x86_fp80, ptr [[CLD]], align 16 +// CHECK-MINGW32: [[CLD_IMAG:%.*]] = load x86_fp80, ptr [[CLD_IMAGP:%.*]], align 16 +// CHECK-MINGW32: store x86_fp80 [[CLD_REAL]], ptr [[BYVAL_TEMP:%.*]], align 16 +// CHECK-MINGW32: store x86_fp80 [[CLD_IMAG]], ptr [[BYVAL_TEMP_IMAGP:%.*]], align 16 +// CHECK-MINGW32: call void @cargl(ptr dead_on_unwind nonnull writable sret(x86_fp80) align 16 [[TMP:%.*]], ptr noundef nonnull [[BYVAL_TEMP]]) #[[ATTR3]] +// CHECK-MINGW32: [[TMP0:%.*]] = load x86_fp80, ptr [[TMP]], align 16, !tbaa [[TBAA3]] +// CHECK-MINGW32: [[CLD_REAL3:%.*]] = load x86_fp80, ptr [[CLD]], align 16 +// CHECK-MINGW32: [[CLD_IMAG5:%.*]] = load x86_fp80, ptr [[CLD_IMAGP]], align 16 +// CHECK-MINGW32: store x86_fp80 [[MUL_RL:%.*]], ptr [[AGG_RESULT]], align 16 +// CHECK-MINGW32: store x86_fp80 [[MUL_IR:%.*]], ptr [[AGG_RESULT_IMAGP:%.*]], align 16 +// +_Complex long double test_cargl(_Complex long double cld) { + long double v2 = __builtin_cargl(cld); + _Complex long double tmp = v2 * cld; + return tmp; +} + + +int ilogbl(long double a); + +// CHECK-LABEL: define dso_local i32 @test_ilogb( +// CHECK-SAME: x86_fp80 noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK: [[CALL:%.*]] = tail call i32 @ilogbl(x86_fp80 noundef [[A]]) #[[ATTR5]], !tbaa [[TBAA2]] +// +// CHECK-WIN64-LABEL: define dso_local i32 @test_ilogb( +// CHECK-WIN64-SAME: x86_fp80 noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-WIN64: [[CALL:%.*]] = tail call i32 @ilogbl(x86_fp80 noundef [[A]]) #[[ATTR5]], !tbaa [[TBAA2]] +// +// CHECK-I686-LABEL: define dso_local i32 @test_ilogb( +// CHECK-I686-SAME: x86_fp80 noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-I686: [[CALL:%.*]] = tail call i32 @ilogbl(x86_fp80 noundef [[A]]) #[[ATTR5]], !tbaa [[TBAA3]] +// +// CHECK-PPC-LABEL: define dso_local i32 @test_ilogb( +// CHECK-PPC-SAME: ppc_fp128 noundef [[A:%.*]]) local_unnamed_addr #[[ATTR1]] { +// CHECK-PPC: [[CALL:%.*]] = tail call i32 @ilogbl(ppc_fp128 noundef [[A]]) #[[ATTR3]], !tbaa [[TBAA2]] +// +// CHECK-ARM-LABEL: define dso_local i32 @test_ilogb( +// CHECK-ARM-SAME: double noundef [[A:%.*]]) local_unnamed_addr #[[ATTR1]] { +// CHECK-ARM: [[CALL:%.*]] = tail call i32 @ilogbl(double noundef [[A]]) #[[ATTR2]], !tbaa [[TBAA3]] +// +// CHECK-ARM-HF-LABEL: define dso_local i32 @test_ilogb( +// CHECK-ARM-HF-SAME: double noundef [[A:%.*]]) local_unnamed_addr #[[ATTR1]] { +// CHECK-ARM-HF: [[CALL:%.*]] = tail call i32 @ilogbl(double noundef [[A]]) #[[ATTR2]], !tbaa [[TBAA3]] +// +// CHECK-THUMB-LABEL: define i32 @test_ilogb( +// CHECK-THUMB-SAME: double noundef [[A:%.*]]) local_unnamed_addr #[[ATTR1]] { +// CHECK-THUMB: [[CALL:%.*]] = tail call i32 @ilogbl(double noundef [[A]]) #[[ATTR2]], !tbaa [[TBAA3]] +// +// CHECK-AARCH-LABEL: define dso_local i32 @test_ilogb( +// CHECK-AARCH-SAME: fp128 noundef [[A:%.*]]) local_unnamed_addr #[[ATTR1]] { +// CHECK-AARCH: [[CALL:%.*]] = tail call i32 @ilogbl(fp128 noundef [[A]]) #[[ATTR2]], !tbaa [[TBAA2]] +// +// CHECK-SPIR-LABEL: define dso_local spir_func i32 @test_ilogb( +// CHECK-SPIR-SAME: double noundef [[A:%.*]]) local_unnamed_addr #[[ATTR1]] { +// CHECK-SPIR: [[CALL:%.*]] = tail call spir_func i32 @ilogbl(double noundef [[A]]) #[[ATTR3]], !tbaa [[TBAA2]] +// +// CHECK-MINGW32-LABEL: define dso_local i32 @test_ilogb( +// CHECK-MINGW32-SAME: ptr nocapture noundef readonly [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-MINGW32: [[A:%.*]] = load x86_fp80, ptr [[TMP0]], align 16, !tbaa [[TBAA3]] +// CHECK-MINGW32: store x86_fp80 [[A]], ptr [[BYVAL_TEMP:%.*]], align 16, !tbaa [[TBAA3]] +// CHECK-MINGW32: [[CALL:%.*]] = call i32 @ilogbl(ptr noundef nonnull [[BYVAL_TEMP]]) #[[ATTR3]] +// +int test_ilogb(long double a) { + return ilogbl(a); +} +//. +// CHECK: [[TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} +// CHECK: [[META3]] = !{!"int", [[META4:![0-9]+]], i64 0} +// CHECK: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} +// CHECK: [[META5]] = !{!"Simple C/C++ TBAA"} +//. +// CHECK-WIN64: [[TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} +// CHECK-WIN64: [[META3]] = !{!"int", [[META4:![0-9]+]], i64 0} +// CHECK-WIN64: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} +// CHECK-WIN64: [[META5]] = !{!"Simple C/C++ TBAA"} +//. +// CHECK-I686: [[TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0} +// CHECK-I686: [[META4]] = !{!"int", [[META5:![0-9]+]], i64 0} +// CHECK-I686: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0} +// CHECK-I686: [[META6]] = !{!"Simple C/C++ TBAA"} +//. +// CHECK-PPC: [[TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} +// CHECK-PPC: [[META3]] = !{!"int", [[META4:![0-9]+]], i64 0} +// CHECK-PPC: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} +// CHECK-PPC: [[META5]] = !{!"Simple C/C++ TBAA"} +//. +// CHECK-ARM: [[TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0} +// CHECK-ARM: [[META4]] = !{!"int", [[META5:![0-9]+]], i64 0} +// CHECK-ARM: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0} +// CHECK-ARM: [[META6]] = !{!"Simple C/C++ TBAA"} +//. +// CHECK-ARM-HF: [[TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0} +// CHECK-ARM-HF: [[META4]] = !{!"int", [[META5:![0-9]+]], i64 0} +// CHECK-ARM-HF: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0} +// CHECK-ARM-HF: [[META6]] = !{!"Simple C/C++ TBAA"} +//. +// CHECK-THUMB: [[TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0} +// CHECK-THUMB: [[META4]] = !{!"int", [[META5:![0-9]+]], i64 0} +// CHECK-THUMB: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0} +// CHECK-THUMB: [[META6]] = !{!"Simple C/C++ TBAA"} +//. +// CHECK-AARCH: [[TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} +// CHECK-AARCH: [[META3]] = !{!"int", [[META4:![0-9]+]], i64 0} +// CHECK-AARCH: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} +// CHECK-AARCH: [[META5]] = !{!"Simple C/C++ TBAA"} +//. +// CHECK-SPIR: [[TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} +// CHECK-SPIR: [[META3]] = !{!"int", [[META4:![0-9]+]], i64 0} +// CHECK-SPIR: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} +// CHECK-SPIR: [[META5]] = !{!"Simple C/C++ TBAA"} +//. +// CHECK-MINGW32: [[TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0} +// CHECK-MINGW32: [[META4]] = !{!"long double", [[META5:![0-9]+]], i64 0} +// CHECK-MINGW32: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0} +// CHECK-MINGW32: [[META6]] = !{!"Simple C/C++ TBAA"} +//. diff --git a/clang/test/CodeGen/scoped-atomic-ops.c b/clang/test/CodeGen/scoped-atomic-ops.c index b0032046639b8..cf98812a07e91 100644 --- a/clang/test/CodeGen/scoped-atomic-ops.c +++ b/clang/test/CodeGen/scoped-atomic-ops.c @@ -1,12 +1,21 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 // RUN: %clang_cc1 %s -emit-llvm -o - -triple=amdgcn-amd-amdhsa -ffreestanding \ -// RUN: -fvisibility=hidden | FileCheck %s +// RUN: -fvisibility=hidden | FileCheck --check-prefix=AMDGCN %s +// RUN: %clang_cc1 %s -emit-llvm -o - -triple=spirv64-unknown-unknown -ffreestanding \ +// RUN: -fvisibility=hidden | FileCheck --check-prefix=SPIRV %s -// CHECK-LABEL: define hidden i32 @fi1a( -// CHECK: [[TMP0:%.*]] = load atomic i32, ptr [[PTR0:.+]] syncscope("one-as") monotonic, align 4 -// CHECK: [[TMP1:%.*]] = load atomic i32, ptr [[PTR1:.+]] syncscope("agent-one-as") monotonic, align 4 -// CHECK: [[TMP2:%.*]] = load atomic i32, ptr [[PTR2:.+]] syncscope("workgroup-one-as") monotonic, align 4 -// CHECK: [[TMP3:%.*]] = load atomic i32, ptr [[PTR3:.+]] syncscope("wavefront-one-as") monotonic, align 4 -// CHECK: [[TMP4:%.*]] = load atomic i32, ptr [[PTR4:.+]] syncscope("singlethread-one-as") monotonic, align 4 +// AMDGCN-LABEL: define hidden i32 @fi1a( +// AMDGCN: [[TMP0:%.*]] = load atomic i32, ptr [[PTR0:.+]] syncscope("one-as") monotonic, align 4 +// AMDGCN: [[TMP1:%.*]] = load atomic i32, ptr [[PTR1:.+]] syncscope("agent-one-as") monotonic, align 4 +// AMDGCN: [[TMP2:%.*]] = load atomic i32, ptr [[PTR2:.+]] syncscope("workgroup-one-as") monotonic, align 4 +// AMDGCN: [[TMP3:%.*]] = load atomic i32, ptr [[PTR3:.+]] syncscope("wavefront-one-as") monotonic, align 4 +// AMDGCN: [[TMP4:%.*]] = load atomic i32, ptr [[PTR4:.+]] syncscope("singlethread-one-as") monotonic, align 4 +// SPIRV: define hidden spir_func i32 @fi1a( +// SPIRV: [[TMP0:%.*]] = load atomic i32, ptr [[PTR0:.+]] monotonic, align 4 +// SPIRV: [[TMP1:%.*]] = load atomic i32, ptr [[PTR1:.+]] syncscope("device") monotonic, align 4 +// SPIRV: [[TMP2:%.*]] = load atomic i32, ptr [[PTR2:.+]] syncscope("workgroup") monotonic, align 4 +// SPIRV: [[TMP3:%.*]] = load atomic i32, ptr [[PTR3:.+]] syncscope("subgroup") monotonic, align 4 +// SPIRV: [[TMP4:%.*]] = load atomic i32, ptr [[PTR4:.+]] syncscope("singlethread") monotonic, align 4 int fi1a(int *i) { int v; __scoped_atomic_load(i, &v, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM); @@ -17,13 +26,18 @@ int fi1a(int *i) { return v; } -// CHECK-LABEL: define hidden i32 @fi1b( -// CHECK: [[TMP0:%.*]] = load atomic i32, ptr [[PTR0:%.+]] syncscope("one-as") monotonic, align 4 -// CHECK: [[TMP1:%.*]] = load atomic i32, ptr [[PTR1:%.+]] syncscope("agent-one-as") monotonic, align 4 -// CHECK: [[TMP2:%.*]] = load atomic i32, ptr [[PTR2:%.+]] syncscope("workgroup-one-as") monotonic, align 4 -// CHECK: [[TMP3:%.*]] = load atomic i32, ptr [[PTR3:%.+]] syncscope("wavefront-one-as") monotonic, align 4 -// CHECK: [[TMP4:%.*]] = load atomic i32, ptr [[PTR4:%.+]] syncscope("singlethread-one-as") monotonic, align 4 -// +// AMDGCN-LABEL: define hidden i32 @fi1b( +// AMDGCN: [[TMP0:%.*]] = load atomic i32, ptr [[PTR0:%.+]] syncscope("one-as") monotonic, align 4 +// AMDGCN: [[TMP1:%.*]] = load atomic i32, ptr [[PTR1:%.+]] syncscope("agent-one-as") monotonic, align 4 +// AMDGCN: [[TMP2:%.*]] = load atomic i32, ptr [[PTR2:%.+]] syncscope("workgroup-one-as") monotonic, align 4 +// AMDGCN: [[TMP3:%.*]] = load atomic i32, ptr [[PTR3:%.+]] syncscope("wavefront-one-as") monotonic, align 4 +// AMDGCN: [[TMP4:%.*]] = load atomic i32, ptr [[PTR4:%.+]] syncscope("singlethread-one-as") monotonic, align 4 +// SPIRV-LABEL: define hidden spir_func i32 @fi1b( +// SPIRV: [[TMP0:%.*]] = load atomic i32, ptr [[PTR0:%.+]] monotonic, align 4 +// SPIRV: [[TMP1:%.*]] = load atomic i32, ptr [[PTR1:%.+]] syncscope("device") monotonic, align 4 +// SPIRV: [[TMP2:%.*]] = load atomic i32, ptr [[PTR2:%.+]] syncscope("workgroup") monotonic, align 4 +// SPIRV: [[TMP3:%.*]] = load atomic i32, ptr [[PTR3:%.+]] syncscope("subgroup") monotonic, align 4 +// SPIRV: [[TMP4:%.*]] = load atomic i32, ptr [[PTR4:%.+]] syncscope("singlethread") monotonic, align 4 int fi1b(int *i) { *i = __scoped_atomic_load_n(i, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM); *i = __scoped_atomic_load_n(i, __ATOMIC_RELAXED, __MEMORY_SCOPE_DEVICE); @@ -33,13 +47,18 @@ int fi1b(int *i) { return *i; } -// CHECK-LABEL: define hidden void @fi2a( -// CHECK: store atomic i32 [[TMP0:%.+]], ptr [[PTR0:%.+]] syncscope("one-as") monotonic, align 4 -// CHECK: store atomic i32 [[TMP1:%.+]], ptr [[PTR1:%.+]] syncscope("agent-one-as") monotonic, align 4 -// CHECK: store atomic i32 [[TMP2:%.+]], ptr [[PTR2:%.+]] syncscope("workgroup-one-as") monotonic, align 4 -// CHECK: store atomic i32 [[TMP3:%.+]], ptr [[PTR3:%.+]] syncscope("wavefront-one-as") monotonic, align 4 -// CHECK: store atomic i32 [[TMP4:%.+]], ptr [[PTR4:%.+]] syncscope("singlethread-one-as") monotonic, align 4 -// +// AMDGCN-LABEL: define hidden void @fi2a( +// AMDGCN: store atomic i32 [[TMP0:%.+]], ptr [[PTR0:%.+]] syncscope("one-as") monotonic, align 4 +// AMDGCN: store atomic i32 [[TMP1:%.+]], ptr [[PTR1:%.+]] syncscope("agent-one-as") monotonic, align 4 +// AMDGCN: store atomic i32 [[TMP2:%.+]], ptr [[PTR2:%.+]] syncscope("workgroup-one-as") monotonic, align 4 +// AMDGCN: store atomic i32 [[TMP3:%.+]], ptr [[PTR3:%.+]] syncscope("wavefront-one-as") monotonic, align 4 +// AMDGCN: store atomic i32 [[TMP4:%.+]], ptr [[PTR4:%.+]] syncscope("singlethread-one-as") monotonic, align 4 +// SPIRV-LABEL: define hidden spir_func void @fi2a( +// SPIRV: store atomic i32 [[TMP0:%.+]], ptr [[PTR0:%.+]] monotonic, align 4 +// SPIRV: store atomic i32 [[TMP1:%.+]], ptr [[PTR1:%.+]] syncscope("device") monotonic, align 4 +// SPIRV: store atomic i32 [[TMP2:%.+]], ptr [[PTR2:%.+]] syncscope("workgroup") monotonic, align 4 +// SPIRV: store atomic i32 [[TMP3:%.+]], ptr [[PTR3:%.+]] syncscope("subgroup") monotonic, align 4 +// SPIRV: store atomic i32 [[TMP4:%.+]], ptr [[PTR4:%.+]] syncscope("singlethread") monotonic, align 4 void fi2a(int *i) { int v = 1; __scoped_atomic_store(i, &v, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM); @@ -49,12 +68,18 @@ void fi2a(int *i) { __scoped_atomic_store(i, &v, __ATOMIC_RELAXED, __MEMORY_SCOPE_SINGLE); } -// CHECK-LABEL: define hidden void @fi2b( -// CHECK: store atomic i32 [[TMP0:%.+]], ptr [[PTR0:%.+]] syncscope("one-as") monotonic, align 4 -// CHECK: store atomic i32 [[TMP1:%.+]], ptr [[PTR1:%.+]] syncscope("agent-one-as") monotonic, align 4 -// CHECK: store atomic i32 [[TMP2:%.+]], ptr [[PTR2:%.+]] syncscope("workgroup-one-as") monotonic, align 4 -// CHECK: store atomic i32 [[TMP3:%.+]], ptr [[PTR3:%.+]] syncscope("wavefront-one-as") monotonic, align 4 -// CHECK: store atomic i32 [[TMP4:%.+]], ptr [[PTR4:%.+]] syncscope("singlethread-one-as") monotonic, align 4 +// AMDGCN-LABEL: define hidden void @fi2b( +// AMDGCN: store atomic i32 [[TMP0:%.+]], ptr [[PTR0:%.+]] syncscope("one-as") monotonic, align 4 +// AMDGCN: store atomic i32 [[TMP1:%.+]], ptr [[PTR1:%.+]] syncscope("agent-one-as") monotonic, align 4 +// AMDGCN: store atomic i32 [[TMP2:%.+]], ptr [[PTR2:%.+]] syncscope("workgroup-one-as") monotonic, align 4 +// AMDGCN: store atomic i32 [[TMP3:%.+]], ptr [[PTR3:%.+]] syncscope("wavefront-one-as") monotonic, align 4 +// AMDGCN: store atomic i32 [[TMP4:%.+]], ptr [[PTR4:%.+]] syncscope("singlethread-one-as") monotonic, align 4 +// SPIRV-LABEL: define hidden spir_func void @fi2b( +// SPIRV: store atomic i32 [[TMP0:%.+]], ptr [[PTR0:%.+]] monotonic, align 4 +// SPIRV: store atomic i32 [[TMP1:%.+]], ptr [[PTR1:%.+]] syncscope("device") monotonic, align 4 +// SPIRV: store atomic i32 [[TMP2:%.+]], ptr [[PTR2:%.+]] syncscope("workgroup") monotonic, align 4 +// SPIRV: store atomic i32 [[TMP3:%.+]], ptr [[PTR3:%.+]] syncscope("subgroup") monotonic, align 4 +// SPIRV: store atomic i32 [[TMP4:%.+]], ptr [[PTR4:%.+]] syncscope("singlethread") monotonic, align 4 void fi2b(int *i) { __scoped_atomic_store_n(i, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM); __scoped_atomic_store_n(i, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_DEVICE); @@ -63,15 +88,24 @@ void fi2b(int *i) { __scoped_atomic_store_n(i, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SINGLE); } -// CHECK-LABEL: define hidden void @fi3a( -// CHECK: [[TMP0:%.*]] = atomicrmw add ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("one-as") monotonic, align 4 -// CHECK: [[TMP1:%.*]] = atomicrmw sub ptr [[PTR1:%.+]], i32 [[VAL1:.+]] syncscope("one-as") monotonic, align 4 -// CHECK: [[TMP2:%.*]] = atomicrmw and ptr [[PTR2:%.+]], i32 [[VAL2:.+]] syncscope("one-as") monotonic, align 4 -// CHECK: [[TMP3:%.*]] = atomicrmw or ptr [[PTR3:%.+]], i32 [[VAL3:.+]] syncscope("one-as") monotonic, align 4 -// CHECK: [[TMP4:%.*]] = atomicrmw xor ptr [[PTR4:%.+]], i32 [[VAL4:.+]] syncscope("one-as") monotonic, align 4 -// CHECK: [[TMP5:%.*]] = atomicrmw nand ptr [[PTR5:%.+]], i32 [[VAL5:.+]] syncscope("one-as") monotonic, align 4 -// CHECK: [[TMP6:%.*]] = atomicrmw min ptr [[PTR6:%.+]], i32 [[VAL6:.+]] syncscope("one-as") monotonic, align 4 -// CHECK: [[TMP7:%.*]] = atomicrmw max ptr [[PTR7:%.+]], i32 [[VAL7:.+]] syncscope("one-as") monotonic, align 4 +// AMDGCN-LABEL: define hidden void @fi3a( +// AMDGCN: [[TMP0:%.*]] = atomicrmw add ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("one-as") monotonic, align 4 +// AMDGCN: [[TMP1:%.*]] = atomicrmw sub ptr [[PTR1:%.+]], i32 [[VAL1:.+]] syncscope("one-as") monotonic, align 4 +// AMDGCN: [[TMP2:%.*]] = atomicrmw and ptr [[PTR2:%.+]], i32 [[VAL2:.+]] syncscope("one-as") monotonic, align 4 +// AMDGCN: [[TMP3:%.*]] = atomicrmw or ptr [[PTR3:%.+]], i32 [[VAL3:.+]] syncscope("one-as") monotonic, align 4 +// AMDGCN: [[TMP4:%.*]] = atomicrmw xor ptr [[PTR4:%.+]], i32 [[VAL4:.+]] syncscope("one-as") monotonic, align 4 +// AMDGCN: [[TMP5:%.*]] = atomicrmw nand ptr [[PTR5:%.+]], i32 [[VAL5:.+]] syncscope("one-as") monotonic, align 4 +// AMDGCN: [[TMP6:%.*]] = atomicrmw min ptr [[PTR6:%.+]], i32 [[VAL6:.+]] syncscope("one-as") monotonic, align 4 +// AMDGCN: [[TMP7:%.*]] = atomicrmw max ptr [[PTR7:%.+]], i32 [[VAL7:.+]] syncscope("one-as") monotonic, align 4 +// SPIRV-LABEL: define hidden spir_func void @fi3a( +// SPIRV: [[TMP0:%.*]] = atomicrmw add ptr [[PTR0:%.+]], i32 [[VAL0:.+]] monotonic, align 4 +// SPIRV: [[TMP1:%.*]] = atomicrmw sub ptr [[PTR1:%.+]], i32 [[VAL1:.+]] monotonic, align 4 +// SPIRV: [[TMP2:%.*]] = atomicrmw and ptr [[PTR2:%.+]], i32 [[VAL2:.+]] monotonic, align 4 +// SPIRV: [[TMP3:%.*]] = atomicrmw or ptr [[PTR3:%.+]], i32 [[VAL3:.+]] monotonic, align 4 +// SPIRV: [[TMP4:%.*]] = atomicrmw xor ptr [[PTR4:%.+]], i32 [[VAL4:.+]] monotonic, align 4 +// SPIRV: [[TMP5:%.*]] = atomicrmw nand ptr [[PTR5:%.+]], i32 [[VAL5:.+]] monotonic, align 4 +// SPIRV: [[TMP6:%.*]] = atomicrmw min ptr [[PTR6:%.+]], i32 [[VAL6:.+]] monotonic, align 4 +// SPIRV: [[TMP7:%.*]] = atomicrmw max ptr [[PTR7:%.+]], i32 [[VAL7:.+]] monotonic, align 4 void fi3a(int *a, int *b, int *c, int *d, int *e, int *f, int *g, int *h) { *a = __scoped_atomic_fetch_add(a, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM); *b = __scoped_atomic_fetch_sub(b, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM); @@ -83,15 +117,24 @@ void fi3a(int *a, int *b, int *c, int *d, int *e, int *f, int *g, int *h) { *h = __scoped_atomic_fetch_max(h, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM); } -// CHECK-LABEL: define hidden void @fi3b( -// CHECK: [[TMP0:%.*]] = atomicrmw add ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("agent-one-as") monotonic, align 4 -// CHECK: [[TMP1:%.*]] = atomicrmw sub ptr [[PTR1:%.+]], i32 [[VAL1:.+]] syncscope("agent-one-as") monotonic, align 4 -// CHECK: [[TMP2:%.*]] = atomicrmw and ptr [[PTR2:%.+]], i32 [[VAL2:.+]] syncscope("agent-one-as") monotonic, align 4 -// CHECK: [[TMP3:%.*]] = atomicrmw or ptr [[PTR3:%.+]], i32 [[VAL3:.+]] syncscope("agent-one-as") monotonic, align 4 -// CHECK: [[TMP4:%.*]] = atomicrmw xor ptr [[PTR4:%.+]], i32 [[VAL4:.+]] syncscope("agent-one-as") monotonic, align 4 -// CHECK: [[TMP5:%.*]] = atomicrmw nand ptr [[PTR5:%.+]], i32 [[VAL5:.+]] syncscope("agent-one-as") monotonic, align 4 -// CHECK: [[TMP6:%.*]] = atomicrmw min ptr [[PTR6:%.+]], i32 [[VAL6:.+]] syncscope("agent-one-as") monotonic, align 4 -// CHECK: [[TMP7:%.*]] = atomicrmw max ptr [[PTR7:%.+]], i32 [[VAL7:.+]] syncscope("agent-one-as") monotonic, align 4 +// AMDGCN-LABEL: define hidden void @fi3b( +// AMDGCN: [[TMP0:%.*]] = atomicrmw add ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("agent-one-as") monotonic, align 4 +// AMDGCN: [[TMP1:%.*]] = atomicrmw sub ptr [[PTR1:%.+]], i32 [[VAL1:.+]] syncscope("agent-one-as") monotonic, align 4 +// AMDGCN: [[TMP2:%.*]] = atomicrmw and ptr [[PTR2:%.+]], i32 [[VAL2:.+]] syncscope("agent-one-as") monotonic, align 4 +// AMDGCN: [[TMP3:%.*]] = atomicrmw or ptr [[PTR3:%.+]], i32 [[VAL3:.+]] syncscope("agent-one-as") monotonic, align 4 +// AMDGCN: [[TMP4:%.*]] = atomicrmw xor ptr [[PTR4:%.+]], i32 [[VAL4:.+]] syncscope("agent-one-as") monotonic, align 4 +// AMDGCN: [[TMP5:%.*]] = atomicrmw nand ptr [[PTR5:%.+]], i32 [[VAL5:.+]] syncscope("agent-one-as") monotonic, align 4 +// AMDGCN: [[TMP6:%.*]] = atomicrmw min ptr [[PTR6:%.+]], i32 [[VAL6:.+]] syncscope("agent-one-as") monotonic, align 4 +// AMDGCN: [[TMP7:%.*]] = atomicrmw max ptr [[PTR7:%.+]], i32 [[VAL7:.+]] syncscope("agent-one-as") monotonic, align 4 +// SPIRV-LABEL: define hidden spir_func void @fi3b( +// SPIRV: [[TMP0:%.*]] = atomicrmw add ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("device") monotonic, align 4 +// SPIRV: [[TMP1:%.*]] = atomicrmw sub ptr [[PTR1:%.+]], i32 [[VAL1:.+]] syncscope("device") monotonic, align 4 +// SPIRV: [[TMP2:%.*]] = atomicrmw and ptr [[PTR2:%.+]], i32 [[VAL2:.+]] syncscope("device") monotonic, align 4 +// SPIRV: [[TMP3:%.*]] = atomicrmw or ptr [[PTR3:%.+]], i32 [[VAL3:.+]] syncscope("device") monotonic, align 4 +// SPIRV: [[TMP4:%.*]] = atomicrmw xor ptr [[PTR4:%.+]], i32 [[VAL4:.+]] syncscope("device") monotonic, align 4 +// SPIRV: [[TMP5:%.*]] = atomicrmw nand ptr [[PTR5:%.+]], i32 [[VAL5:.+]] syncscope("device") monotonic, align 4 +// SPIRV: [[TMP6:%.*]] = atomicrmw min ptr [[PTR6:%.+]], i32 [[VAL6:.+]] syncscope("device") monotonic, align 4 +// SPIRV: [[TMP7:%.*]] = atomicrmw max ptr [[PTR7:%.+]], i32 [[VAL7:.+]] syncscope("device") monotonic, align 4 void fi3b(int *a, int *b, int *c, int *d, int *e, int *f, int *g, int *h) { *a = __scoped_atomic_fetch_add(a, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_DEVICE); *b = __scoped_atomic_fetch_sub(b, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_DEVICE); @@ -103,15 +146,24 @@ void fi3b(int *a, int *b, int *c, int *d, int *e, int *f, int *g, int *h) { *h = __scoped_atomic_fetch_max(h, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_DEVICE); } -// CHECK-LABEL: define hidden void @fi3c( -// CHECK: [[TMP0:%.*]] = atomicrmw add ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("workgroup-one-as") monotonic, align 4 -// CHECK: [[TMP1:%.*]] = atomicrmw sub ptr [[PTR1:%.+]], i32 [[VAL1:.+]] syncscope("workgroup-one-as") monotonic, align 4 -// CHECK: [[TMP2:%.*]] = atomicrmw and ptr [[PTR2:%.+]], i32 [[VAL2:.+]] syncscope("workgroup-one-as") monotonic, align 4 -// CHECK: [[TMP3:%.*]] = atomicrmw or ptr [[PTR3:%.+]], i32 [[VAL3:.+]] syncscope("workgroup-one-as") monotonic, align 4 -// CHECK: [[TMP4:%.*]] = atomicrmw xor ptr [[PTR4:%.+]], i32 [[VAL4:.+]] syncscope("workgroup-one-as") monotonic, align 4 -// CHECK: [[TMP5:%.*]] = atomicrmw nand ptr [[PTR5:%.+]], i32 [[VAL5:.+]] syncscope("workgroup-one-as") monotonic, align 4 -// CHECK: [[TMP6:%.*]] = atomicrmw min ptr [[PTR6:%.+]], i32 [[VAL6:.+]] syncscope("workgroup-one-as") monotonic, align 4 -// CHECK: [[TMP7:%.*]] = atomicrmw max ptr [[PTR7:%.+]], i32 [[VAL7:.+]] syncscope("workgroup-one-as") monotonic, align 4 +// AMDGCN-LABEL: define hidden void @fi3c( +// AMDGCN: [[TMP0:%.*]] = atomicrmw add ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("workgroup-one-as") monotonic, align 4 +// AMDGCN: [[TMP1:%.*]] = atomicrmw sub ptr [[PTR1:%.+]], i32 [[VAL1:.+]] syncscope("workgroup-one-as") monotonic, align 4 +// AMDGCN: [[TMP2:%.*]] = atomicrmw and ptr [[PTR2:%.+]], i32 [[VAL2:.+]] syncscope("workgroup-one-as") monotonic, align 4 +// AMDGCN: [[TMP3:%.*]] = atomicrmw or ptr [[PTR3:%.+]], i32 [[VAL3:.+]] syncscope("workgroup-one-as") monotonic, align 4 +// AMDGCN: [[TMP4:%.*]] = atomicrmw xor ptr [[PTR4:%.+]], i32 [[VAL4:.+]] syncscope("workgroup-one-as") monotonic, align 4 +// AMDGCN: [[TMP5:%.*]] = atomicrmw nand ptr [[PTR5:%.+]], i32 [[VAL5:.+]] syncscope("workgroup-one-as") monotonic, align 4 +// AMDGCN: [[TMP6:%.*]] = atomicrmw min ptr [[PTR6:%.+]], i32 [[VAL6:.+]] syncscope("workgroup-one-as") monotonic, align 4 +// AMDGCN: [[TMP7:%.*]] = atomicrmw max ptr [[PTR7:%.+]], i32 [[VAL7:.+]] syncscope("workgroup-one-as") monotonic, align 4 +// SPIRV-LABEL: define hidden spir_func void @fi3c( +// SPIRV: [[TMP0:%.*]] = atomicrmw add ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("workgroup") monotonic, align 4 +// SPIRV: [[TMP1:%.*]] = atomicrmw sub ptr [[PTR1:%.+]], i32 [[VAL1:.+]] syncscope("workgroup") monotonic, align 4 +// SPIRV: [[TMP2:%.*]] = atomicrmw and ptr [[PTR2:%.+]], i32 [[VAL2:.+]] syncscope("workgroup") monotonic, align 4 +// SPIRV: [[TMP3:%.*]] = atomicrmw or ptr [[PTR3:%.+]], i32 [[VAL3:.+]] syncscope("workgroup") monotonic, align 4 +// SPIRV: [[TMP4:%.*]] = atomicrmw xor ptr [[PTR4:%.+]], i32 [[VAL4:.+]] syncscope("workgroup") monotonic, align 4 +// SPIRV: [[TMP5:%.*]] = atomicrmw nand ptr [[PTR5:%.+]], i32 [[VAL5:.+]] syncscope("workgroup") monotonic, align 4 +// SPIRV: [[TMP6:%.*]] = atomicrmw min ptr [[PTR6:%.+]], i32 [[VAL6:.+]] syncscope("workgroup") monotonic, align 4 +// SPIRV: [[TMP7:%.*]] = atomicrmw max ptr [[PTR7:%.+]], i32 [[VAL7:.+]] syncscope("workgroup") monotonic, align 4 void fi3c(int *a, int *b, int *c, int *d, int *e, int *f, int *g, int *h) { *a = __scoped_atomic_fetch_add(a, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_WRKGRP); *b = __scoped_atomic_fetch_sub(b, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_WRKGRP); @@ -123,15 +175,24 @@ void fi3c(int *a, int *b, int *c, int *d, int *e, int *f, int *g, int *h) { *h = __scoped_atomic_fetch_max(h, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_WRKGRP); } -// CHECK-LABEL: define hidden void @fi3d( -// CHECK: [[TMP0:%.*]] = atomicrmw add ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("wavefront-one-as") monotonic, align 4 -// CHECK: [[TMP1:%.*]] = atomicrmw sub ptr [[PTR1:%.+]], i32 [[VAL1:.+]] syncscope("wavefront-one-as") monotonic, align 4 -// CHECK: [[TMP2:%.*]] = atomicrmw and ptr [[PTR2:%.+]], i32 [[VAL2:.+]] syncscope("wavefront-one-as") monotonic, align 4 -// CHECK: [[TMP3:%.*]] = atomicrmw or ptr [[PTR3:%.+]], i32 [[VAL3:.+]] syncscope("wavefront-one-as") monotonic, align 4 -// CHECK: [[TMP4:%.*]] = atomicrmw xor ptr [[PTR4:%.+]], i32 [[VAL4:.+]] syncscope("wavefront-one-as") monotonic, align 4 -// CHECK: [[TMP5:%.*]] = atomicrmw nand ptr [[PTR5:%.+]], i32 [[VAL5:.+]] syncscope("wavefront-one-as") monotonic, align 4 -// CHECK: [[TMP6:%.*]] = atomicrmw min ptr [[PTR6:%.+]], i32 [[VAL6:.+]] syncscope("wavefront-one-as") monotonic, align 4 -// CHECK: [[TMP7:%.*]] = atomicrmw max ptr [[PTR7:%.+]], i32 [[VAL7:.+]] syncscope("wavefront-one-as") monotonic, align 4 +// AMDGCN-LABEL: define hidden void @fi3d( +// AMDGCN: [[TMP0:%.*]] = atomicrmw add ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("wavefront-one-as") monotonic, align 4 +// AMDGCN: [[TMP1:%.*]] = atomicrmw sub ptr [[PTR1:%.+]], i32 [[VAL1:.+]] syncscope("wavefront-one-as") monotonic, align 4 +// AMDGCN: [[TMP2:%.*]] = atomicrmw and ptr [[PTR2:%.+]], i32 [[VAL2:.+]] syncscope("wavefront-one-as") monotonic, align 4 +// AMDGCN: [[TMP3:%.*]] = atomicrmw or ptr [[PTR3:%.+]], i32 [[VAL3:.+]] syncscope("wavefront-one-as") monotonic, align 4 +// AMDGCN: [[TMP4:%.*]] = atomicrmw xor ptr [[PTR4:%.+]], i32 [[VAL4:.+]] syncscope("wavefront-one-as") monotonic, align 4 +// AMDGCN: [[TMP5:%.*]] = atomicrmw nand ptr [[PTR5:%.+]], i32 [[VAL5:.+]] syncscope("wavefront-one-as") monotonic, align 4 +// AMDGCN: [[TMP6:%.*]] = atomicrmw min ptr [[PTR6:%.+]], i32 [[VAL6:.+]] syncscope("wavefront-one-as") monotonic, align 4 +// AMDGCN: [[TMP7:%.*]] = atomicrmw max ptr [[PTR7:%.+]], i32 [[VAL7:.+]] syncscope("wavefront-one-as") monotonic, align 4 +// SPIRV-LABEL: define hidden spir_func void @fi3d( +// SPIRV: [[TMP0:%.*]] = atomicrmw add ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("subgroup") monotonic, align 4 +// SPIRV: [[TMP1:%.*]] = atomicrmw sub ptr [[PTR1:%.+]], i32 [[VAL1:.+]] syncscope("subgroup") monotonic, align 4 +// SPIRV: [[TMP2:%.*]] = atomicrmw and ptr [[PTR2:%.+]], i32 [[VAL2:.+]] syncscope("subgroup") monotonic, align 4 +// SPIRV: [[TMP3:%.*]] = atomicrmw or ptr [[PTR3:%.+]], i32 [[VAL3:.+]] syncscope("subgroup") monotonic, align 4 +// SPIRV: [[TMP4:%.*]] = atomicrmw xor ptr [[PTR4:%.+]], i32 [[VAL4:.+]] syncscope("subgroup") monotonic, align 4 +// SPIRV: [[TMP5:%.*]] = atomicrmw nand ptr [[PTR5:%.+]], i32 [[VAL5:.+]] syncscope("subgroup") monotonic, align 4 +// SPIRV: [[TMP6:%.*]] = atomicrmw min ptr [[PTR6:%.+]], i32 [[VAL6:.+]] syncscope("subgroup") monotonic, align 4 +// SPIRV: [[TMP7:%.*]] = atomicrmw max ptr [[PTR7:%.+]], i32 [[VAL7:.+]] syncscope("subgroup") monotonic, align 4 void fi3d(int *a, int *b, int *c, int *d, int *e, int *f, int *g, int *h) { *a = __scoped_atomic_fetch_add(a, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_WVFRNT); *b = __scoped_atomic_fetch_sub(b, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_WVFRNT); @@ -143,15 +204,24 @@ void fi3d(int *a, int *b, int *c, int *d, int *e, int *f, int *g, int *h) { *h = __scoped_atomic_fetch_max(h, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_WVFRNT); } -// CHECK-LABEL: define hidden void @fi3e( -// CHECK: [[TMP0:%.*]] = atomicrmw add ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("singlethread-one-as") monotonic, align 4 -// CHECK: [[TMP1:%.*]] = atomicrmw sub ptr [[PTR1:%.+]], i32 [[VAL1:.+]] syncscope("singlethread-one-as") monotonic, align 4 -// CHECK: [[TMP2:%.*]] = atomicrmw and ptr [[PTR2:%.+]], i32 [[VAL2:.+]] syncscope("singlethread-one-as") monotonic, align 4 -// CHECK: [[TMP3:%.*]] = atomicrmw or ptr [[PTR3:%.+]], i32 [[VAL3:.+]] syncscope("singlethread-one-as") monotonic, align 4 -// CHECK: [[TMP4:%.*]] = atomicrmw xor ptr [[PTR4:%.+]], i32 [[VAL4:.+]] syncscope("singlethread-one-as") monotonic, align 4 -// CHECK: [[TMP5:%.*]] = atomicrmw nand ptr [[PTR5:%.+]], i32 [[VAL5:.+]] syncscope("singlethread-one-as") monotonic, align 4 -// CHECK: [[TMP6:%.*]] = atomicrmw min ptr [[PTR6:%.+]], i32 [[VAL6:.+]] syncscope("singlethread-one-as") monotonic, align 4 -// CHECK: [[TMP7:%.*]] = atomicrmw max ptr [[PTR7:%.+]], i32 [[VAL7:.+]] syncscope("singlethread-one-as") monotonic, align 4 +// AMDGCN-LABEL: define hidden void @fi3e( +// AMDGCN: [[TMP0:%.*]] = atomicrmw add ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("singlethread-one-as") monotonic, align 4 +// AMDGCN: [[TMP1:%.*]] = atomicrmw sub ptr [[PTR1:%.+]], i32 [[VAL1:.+]] syncscope("singlethread-one-as") monotonic, align 4 +// AMDGCN: [[TMP2:%.*]] = atomicrmw and ptr [[PTR2:%.+]], i32 [[VAL2:.+]] syncscope("singlethread-one-as") monotonic, align 4 +// AMDGCN: [[TMP3:%.*]] = atomicrmw or ptr [[PTR3:%.+]], i32 [[VAL3:.+]] syncscope("singlethread-one-as") monotonic, align 4 +// AMDGCN: [[TMP4:%.*]] = atomicrmw xor ptr [[PTR4:%.+]], i32 [[VAL4:.+]] syncscope("singlethread-one-as") monotonic, align 4 +// AMDGCN: [[TMP5:%.*]] = atomicrmw nand ptr [[PTR5:%.+]], i32 [[VAL5:.+]] syncscope("singlethread-one-as") monotonic, align 4 +// AMDGCN: [[TMP6:%.*]] = atomicrmw min ptr [[PTR6:%.+]], i32 [[VAL6:.+]] syncscope("singlethread-one-as") monotonic, align 4 +// AMDGCN: [[TMP7:%.*]] = atomicrmw max ptr [[PTR7:%.+]], i32 [[VAL7:.+]] syncscope("singlethread-one-as") monotonic, align 4 +// SPIRV-LABEL: define hidden spir_func void @fi3e( +// SPIRV: [[TMP0:%.*]] = atomicrmw add ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("singlethread") monotonic, align 4 +// SPIRV: [[TMP1:%.*]] = atomicrmw sub ptr [[PTR1:%.+]], i32 [[VAL1:.+]] syncscope("singlethread") monotonic, align 4 +// SPIRV: [[TMP2:%.*]] = atomicrmw and ptr [[PTR2:%.+]], i32 [[VAL2:.+]] syncscope("singlethread") monotonic, align 4 +// SPIRV: [[TMP3:%.*]] = atomicrmw or ptr [[PTR3:%.+]], i32 [[VAL3:.+]] syncscope("singlethread") monotonic, align 4 +// SPIRV: [[TMP4:%.*]] = atomicrmw xor ptr [[PTR4:%.+]], i32 [[VAL4:.+]] syncscope("singlethread") monotonic, align 4 +// SPIRV: [[TMP5:%.*]] = atomicrmw nand ptr [[PTR5:%.+]], i32 [[VAL5:.+]] syncscope("singlethread") monotonic, align 4 +// SPIRV: [[TMP6:%.*]] = atomicrmw min ptr [[PTR6:%.+]], i32 [[VAL6:.+]] syncscope("singlethread") monotonic, align 4 +// SPIRV: [[TMP7:%.*]] = atomicrmw max ptr [[PTR7:%.+]], i32 [[VAL7:.+]] syncscope("singlethread") monotonic, align 4 void fi3e(int *a, int *b, int *c, int *d, int *e, int *f, int *g, int *h) { *a = __scoped_atomic_fetch_add(a, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SINGLE); *b = __scoped_atomic_fetch_sub(b, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SINGLE); @@ -163,8 +233,10 @@ void fi3e(int *a, int *b, int *c, int *d, int *e, int *f, int *g, int *h) { *h = __scoped_atomic_fetch_max(h, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SINGLE); } -// CHECK-LABEL: define hidden zeroext i1 @fi4a( -// CHECK: [[TMP0:%.*]] = cmpxchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("one-as") acquire acquire, align 4 +// AMDGCN-LABEL: define hidden zeroext i1 @fi4a( +// AMDGCN-DAG: [[TMP0:%.*]] = cmpxchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("one-as") acquire acquire, align 4 +// SPIRV-LABEL: define hidden spir_func zeroext i1 @fi4a( +// SPIRV-DAG: [[TMP0:%.*]] = cmpxchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] acquire acquire, align 4 _Bool fi4a(int *i) { int cmp = 0; int desired = 1; @@ -173,8 +245,10 @@ _Bool fi4a(int *i) { __MEMORY_SCOPE_SYSTEM); } -// CHECK-LABEL: define hidden zeroext i1 @fi4b( -// CHECK: [[TMP0:%.*]] = cmpxchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("agent-one-as") acquire acquire, align 4 +// AMDGCN-LABEL: define hidden zeroext i1 @fi4b( +// AMDGCN-DAG: [[TMP0:%.*]] = cmpxchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("agent-one-as") acquire acquire, align 4 +// SPIRV-LABEL: define hidden spir_func zeroext i1 @fi4b( +// SPIRV-DAG: [[TMP0:%.*]] = cmpxchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("device") acquire acquire, align 4 _Bool fi4b(int *i) { int cmp = 0; int desired = 1; @@ -183,8 +257,10 @@ _Bool fi4b(int *i) { __MEMORY_SCOPE_DEVICE); } -// CHECK-LABEL: define hidden zeroext i1 @fi4c( -// CHECK: [[TMP0:%.*]] = cmpxchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("workgroup-one-as") acquire acquire, align 4 +// AMDGCN-LABEL: define hidden zeroext i1 @fi4c( +// AMDGCN: [[TMP0:%.*]] = cmpxchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("workgroup-one-as") acquire acquire, align 4 +// SPIRV-LABEL: define hidden spir_func zeroext i1 @fi4c( +// SPIRV: [[TMP0:%.*]] = cmpxchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("workgroup") acquire acquire, align 4 _Bool fi4c(int *i) { int cmp = 0; int desired = 1; @@ -193,8 +269,10 @@ _Bool fi4c(int *i) { __MEMORY_SCOPE_WRKGRP); } -// CHECK-LABEL: define hidden zeroext i1 @fi4d( -// CHECK: [[TMP0:%.*]] = cmpxchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("wavefront-one-as") acquire acquire, align 4 +// AMDGCN-LABEL: define hidden zeroext i1 @fi4d( +// AMDGCN: [[TMP0:%.*]] = cmpxchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("wavefront-one-as") acquire acquire, align 4 +// SPIRV-LABEL: define hidden spir_func zeroext i1 @fi4d( +// SPIRV: [[TMP0:%.*]] = cmpxchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("subgroup") acquire acquire, align 4 _Bool fi4d(int *i) { int cmp = 0; int desired = 1; @@ -203,8 +281,10 @@ _Bool fi4d(int *i) { __MEMORY_SCOPE_WVFRNT); } -// CHECK-LABEL: define hidden zeroext i1 @fi4e( -// CHECK: [[TMP0:%.*]] = cmpxchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("singlethread-one-as") acquire acquire, align 4 +// AMDGCN-LABEL: define hidden zeroext i1 @fi4e( +// AMDGCN: [[TMP0:%.*]] = cmpxchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("singlethread-one-as") acquire acquire, align 4 +// SPIRV-LABEL: define hidden spir_func zeroext i1 @fi4e( +// SPIRV: [[TMP0:%.*]] = cmpxchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("singlethread") acquire acquire, align 4 _Bool fi4e(int *i) { int cmp = 0; int desired = 1; @@ -213,8 +293,10 @@ _Bool fi4e(int *i) { __MEMORY_SCOPE_SINGLE); } -// CHECK-LABEL: define hidden zeroext i1 @fi5a( -// CHECK: [[TMP0:%.*]] = cmpxchg weak ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("one-as") acquire acquire, align 4 +// AMDGCN-LABEL: define hidden zeroext i1 @fi5a( +// AMDGCN: [[TMP0:%.*]] = cmpxchg weak ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("one-as") acquire acquire, align 4 +// SPIRV-LABEL: define hidden spir_func zeroext i1 @fi5a( +// SPIRV: [[TMP0:%.*]] = cmpxchg weak ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] acquire acquire, align 4 _Bool fi5a(int *i) { int cmp = 0; return __scoped_atomic_compare_exchange_n(i, &cmp, 1, 1, __ATOMIC_ACQUIRE, @@ -222,8 +304,10 @@ _Bool fi5a(int *i) { __MEMORY_SCOPE_SYSTEM); } -// CHECK-LABEL: define hidden zeroext i1 @fi5b( -// CHECK: [[TMP0:%.*]] = cmpxchg weak ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("agent-one-as") acquire acquire, align 4 +// AMDGCN-LABEL: define hidden zeroext i1 @fi5b( +// AMDGCN: [[TMP0:%.*]] = cmpxchg weak ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("agent-one-as") acquire acquire, align 4 +// SPIRV-LABEL: define hidden spir_func zeroext i1 @fi5b( +// SPIRV: [[TMP0:%.*]] = cmpxchg weak ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("device") acquire acquire, align 4 _Bool fi5b(int *i) { int cmp = 0; return __scoped_atomic_compare_exchange_n(i, &cmp, 1, 1, __ATOMIC_ACQUIRE, @@ -231,101 +315,127 @@ _Bool fi5b(int *i) { __MEMORY_SCOPE_DEVICE); } -// CHECK-LABEL: define hidden zeroext i1 @fi5c( -// CHECK: [[TMP0:%.*]] = cmpxchg weak ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("workgroup-one-as") acquire acquire, align 4 +// AMDGCN-LABEL: define hidden zeroext i1 @fi5c( +// AMDGCN: [[TMP0:%.*]] = cmpxchg weak ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("workgroup-one-as") acquire acquire, align 4 +// SPIRV-LABEL: define hidden spir_func zeroext i1 @fi5c( +// SPIRV: [[TMP0:%.*]] = cmpxchg weak ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("workgroup") acquire acquire, align 4 _Bool fi5c(int *i) { int cmp = 0; return __scoped_atomic_compare_exchange_n( i, &cmp, 1, 1, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE, __MEMORY_SCOPE_WRKGRP); } -// CHECK-LABEL: define hidden zeroext i1 @fi5d( -// CHECK: [[TMP0:%.*]] = cmpxchg weak ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("wavefront-one-as") acquire acquire, align 4 +// AMDGCN-LABEL: define hidden zeroext i1 @fi5d( +// AMDGCN: [[TMP0:%.*]] = cmpxchg weak ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("wavefront-one-as") acquire acquire, align 4 +// SPIRV-LABEL: define hidden spir_func zeroext i1 @fi5d( +// SPIRV: [[TMP0:%.*]] = cmpxchg weak ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("subgroup") acquire acquire, align 4 _Bool fi5d(int *i) { int cmp = 0; return __scoped_atomic_compare_exchange_n( i, &cmp, 1, 1, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE, __MEMORY_SCOPE_WVFRNT); } -// CHECK-LABEL: define hidden zeroext i1 @fi5e( -// CHECK: [[TMP0:%.*]] = cmpxchg weak ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("singlethread-one-as") acquire acquire, align 4 +// AMDGCN-LABEL: define hidden zeroext i1 @fi5e( +// AMDGCN: [[TMP0:%.*]] = cmpxchg weak ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("singlethread-one-as") acquire acquire, align 4 +// SPIRV-LABEL: define hidden spir_func zeroext i1 @fi5e( +// SPIRV: [[TMP0:%.*]] = cmpxchg weak ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("singlethread") acquire acquire, align 4 _Bool fi5e(int *i) { int cmp = 0; return __scoped_atomic_compare_exchange_n( i, &cmp, 1, 1, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE, __MEMORY_SCOPE_SINGLE); } -// CHECK-LABEL: define hidden i32 @fi6a( -// CHECK: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("one-as") monotonic, align 4 +// AMDGCN-LABEL: define hidden i32 @fi6a( +// AMDGCN: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("one-as") monotonic, align 4 +// SPIRV-LABEL: define hidden spir_func i32 @fi6a( +// SPIRV: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]] monotonic, align 4 int fi6a(int *c, int *d) { int ret; __scoped_atomic_exchange(c, d, &ret, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM); return ret; } -// CHECK-LABEL: define hidden i32 @fi6b( -// CHECK: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("agent-one-as") monotonic, align 4 +// AMDGCN-LABEL: define hidden i32 @fi6b( +// AMDGCN: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("agent-one-as") monotonic, align 4 +// SPIRV-LABEL: define hidden spir_func i32 @fi6b( +// SPIRV: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("device") monotonic, align 4 int fi6b(int *c, int *d) { int ret; __scoped_atomic_exchange(c, d, &ret, __ATOMIC_RELAXED, __MEMORY_SCOPE_DEVICE); return ret; } -// CHECK-LABEL: define hidden i32 @fi6c( -// CHECK: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("workgroup-one-as") monotonic, align 4 +// AMDGCN-LABEL: define hidden i32 @fi6c( +// AMDGCN: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("workgroup-one-as") monotonic, align 4 +// SPIRV-LABEL: define hidden spir_func i32 @fi6c( +// SPIRV: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("workgroup") monotonic, align 4 int fi6c(int *c, int *d) { int ret; __scoped_atomic_exchange(c, d, &ret, __ATOMIC_RELAXED, __MEMORY_SCOPE_WRKGRP); return ret; } -// CHECK-LABEL: define hidden i32 @fi6d( -// CHECK: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("wavefront-one-as") monotonic, align 4 +// AMDGCN-LABEL: define hidden i32 @fi6d( +// AMDGCN: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("wavefront-one-as") monotonic, align 4 +// SPIRV-LABEL: define hidden spir_func i32 @fi6d( +// SPIRV: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("subgroup") monotonic, align 4 int fi6d(int *c, int *d) { int ret; __scoped_atomic_exchange(c, d, &ret, __ATOMIC_RELAXED, __MEMORY_SCOPE_WVFRNT); return ret; } -// CHECK-LABEL: define hidden i32 @fi6e( -// CHECK: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("singlethread-one-as") monotonic, align 4 +// AMDGCN-LABEL: define hidden i32 @fi6e( +// AMDGCN: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("singlethread-one-as") monotonic, align 4 +// SPIRV-LABEL: define hidden spir_func i32 @fi6e( +// SPIRV: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("singlethread") monotonic, align 4 int fi6e(int *c, int *d) { int ret; __scoped_atomic_exchange(c, d, &ret, __ATOMIC_RELAXED, __MEMORY_SCOPE_SINGLE); return ret; } -// CHECK-LABEL: define hidden zeroext i1 @fi7a( -// CHECK: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i8 [[VAL0:.+]] syncscope("one-as") monotonic, align 1 +// AMDGCN-LABEL: define hidden zeroext i1 @fi7a( +// AMDGCN: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i8 [[VAL0:.+]] syncscope("one-as") monotonic, align 1 +// SPIRV-LABEL: define hidden spir_func zeroext i1 @fi7a( +// SPIRV: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i8 [[VAL0:.+]] monotonic, align 1 _Bool fi7a(_Bool *c) { return __scoped_atomic_exchange_n(c, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM); } -// CHECK-LABEL: define hidden zeroext i1 @fi7b( -// CHECK: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i8 [[VAL0:.+]] syncscope("agent-one-as") monotonic, align 1 +// AMDGCN-LABEL: define hidden zeroext i1 @fi7b( +// AMDGCN: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i8 [[VAL0:.+]] syncscope("agent-one-as") monotonic, align 1 +// SPIRV-LABEL: define hidden spir_func zeroext i1 @fi7b( +// SPIRV: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i8 [[VAL0:.+]] syncscope("device") monotonic, align 1 _Bool fi7b(_Bool *c) { return __scoped_atomic_exchange_n(c, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_DEVICE); } -// CHECK-LABEL: define hidden zeroext i1 @fi7c( -// CHECK: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i8 [[VAL0:.+]] syncscope("workgroup-one-as") monotonic, align 1 +// AMDGCN-LABEL: define hidden zeroext i1 @fi7c( +// AMDGCN: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i8 [[VAL0:.+]] syncscope("workgroup-one-as") monotonic, align 1 +// SPIRV-LABEL: define hidden spir_func zeroext i1 @fi7c( +// SPIRV: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i8 [[VAL0:.+]] syncscope("workgroup") monotonic, align 1 _Bool fi7c(_Bool *c) { return __scoped_atomic_exchange_n(c, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_WRKGRP); } -// CHECK-LABEL: define hidden zeroext i1 @fi7d( -// CHECK: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i8 [[VAL0:.+]] syncscope("wavefront-one-as") monotonic, align 1 +// AMDGCN-LABEL: define hidden zeroext i1 @fi7d( +// AMDGCN: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i8 [[VAL0:.+]] syncscope("wavefront-one-as") monotonic, align 1 +// SPIRV-LABEL: define hidden spir_func zeroext i1 @fi7d( +// SPIRV: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i8 [[VAL0:.+]] syncscope("subgroup") monotonic, align 1 _Bool fi7d(_Bool *c) { return __scoped_atomic_exchange_n(c, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_WVFRNT); } -// CHECK-LABEL: define hidden zeroext i1 @fi7e( -// CHECK: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i8 [[VAL0:.+]] syncscope("singlethread-one-as") monotonic, align 1 +// AMDGCN-LABEL: define hidden zeroext i1 @fi7e( +// AMDGCN: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i8 [[VAL0:.+]] syncscope("singlethread-one-as") monotonic, align 1 +// SPIRV-LABEL: define hidden spir_func zeroext i1 @fi7e( +// SPIRV: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i8 [[VAL0:.+]] syncscope("singlethread") monotonic, align 1 _Bool fi7e(_Bool *c) { - return __scoped_atomic_exchange_n(c, 1, __ATOMIC_RELAXED, + return __scoped_atomic_exchange_n(c, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SINGLE); } diff --git a/clang/test/CodeGen/ubsan-builtin-checks.c b/clang/test/CodeGen/ubsan-builtin-checks.c index c7f6078f903ba..8535ec915ac34 100644 --- a/clang/test/CodeGen/ubsan-builtin-checks.c +++ b/clang/test/CodeGen/ubsan-builtin-checks.c @@ -51,3 +51,20 @@ void check_clz(int n) { // CHECK: call void @__ubsan_handle_invalid_builtin __builtin_clzg((unsigned int)n); } + +// CHECK: define{{.*}} void @check_assume +void check_assume(int n) { + // CHECK: [[TOBOOL:%.*]] = icmp ne i32 [[N:%.*]], 0 + // CHECK-NEXT: br i1 [[TOBOOL]] + // + // Handler block: + // CHECK: call void @__ubsan_handle_invalid_builtin + // CHECK-NEXT: unreachable + // + // Continuation block: + // CHECK: call void @llvm.assume(i1 [[TOBOOL]]) + __builtin_assume(n); + + // CHECK: call void @__ubsan_handle_invalid_builtin + __attribute__((assume(n))); +} diff --git a/clang/test/CodeGenCXX/cxx2a-consteval.cpp b/clang/test/CodeGenCXX/cxx2a-consteval.cpp index a58a09554699d..bfeabc946da41 100644 --- a/clang/test/CodeGenCXX/cxx2a-consteval.cpp +++ b/clang/test/CodeGenCXX/cxx2a-consteval.cpp @@ -6,6 +6,14 @@ // RUN: %clang_cc1 -emit-llvm %s -Dconsteval="" -std=c++2a -triple x86_64-unknown-linux-gnu -o %t.ll // RUN: FileCheck -check-prefix=EXPR -input-file=%t.ll %s +// RUN: %clang_cc1 -emit-llvm %s -std=c++2a -triple x86_64-unknown-linux-gnu -o %t.ll -fexperimental-new-constant-interpreter +// RUN: FileCheck -check-prefix=EVAL -input-file=%t.ll %s +// RUN: FileCheck -check-prefix=EVAL-STATIC -input-file=%t.ll %s +// RUN: FileCheck -check-prefix=EVAL-FN -input-file=%t.ll %s +// +// RUN: %clang_cc1 -emit-llvm %s -Dconsteval="" -std=c++2a -triple x86_64-unknown-linux-gnu -o %t.ll -fexperimental-new-constant-interpreter +// RUN: FileCheck -check-prefix=EXPR -input-file=%t.ll %s + // there is two version of symbol checks to ensure // that the symbol we are looking for are correct // EVAL-NOT: @__cxx_global_var_init() diff --git a/clang/test/CodeGenCXX/mangle-concept.cpp b/clang/test/CodeGenCXX/mangle-concept.cpp index e9c46d87635ab..91dc1b0e688e0 100644 --- a/clang/test/CodeGenCXX/mangle-concept.cpp +++ b/clang/test/CodeGenCXX/mangle-concept.cpp @@ -1,5 +1,5 @@ // RUN: %clang_cc1 -verify -std=c++20 -emit-llvm -triple %itanium_abi_triple -o - %s -fclang-abi-compat=latest | FileCheck %s -// RUN: %clang_cc1 -verify -std=c++20 -emit-llvm -triple %itanium_abi_triple -o - %s -fclang-abi-compat=16 | FileCheck %s --check-prefix=CLANG16 +// RUN: %clang_cc1 -verify -std=c++20 -emit-llvm -triple %itanium_abi_triple -o - %s -fclang-abi-compat=17 | FileCheck %s --check-prefix=CLANG17 // expected-no-diagnostics namespace test1 { @@ -8,7 +8,7 @@ template concept C = true; template S> f0() { return S>{}; } template S> f0<>(); // CHECK: @_ZN5test12f0IiEENS_1SIX1CIT_EEEEv( -// CLANG16: @_ZN5test12f0IiEENS_1SIL_ZNS_1CIT_EEEEEv( +// CLANG17: @_ZN5test12f0IiEENS_1SIL_ZNS_1CIT_EEEEEv( } template struct S {}; @@ -18,12 +18,12 @@ template concept D = true; template S> f0a() { return S>{}; } template S> f0a<>(); // CHECK: @_Z3f0aIiE1SIXsr5test1E1CIT_EEEv( -// CLANG16: @_Z3f0aIiE1SIL_ZN5test11CIT_EEEEv( +// CLANG17: @_Z3f0aIiE1SIL_ZN5test11CIT_EEEEv( template S> f0() { return S>{}; } template S> f0<>(); // CHECK: @_Z2f0IiE1SIX1CIT_EEEv( -// CLANG16: @_Z2f0IiE1SIL_Z1CIT_EEEv( +// CLANG17: @_Z2f0IiE1SIL_Z1CIT_EEEv( template concept True = true; @@ -56,25 +56,25 @@ namespace test2 { // CHECK-LABEL: define {{.*}}@{{.*}}test2{{.*}}use void use() { // CHECK: call {{.*}}@_ZN5test21AIiEF1fEzQ4TrueIT_E( - // CLANG16: call {{.*}}@_ZN5test21fEz( + // CLANG17: call {{.*}}@_ZN5test21fEz( f(ai); // CHECK: call {{.*}}@_ZN5test2F1gIvEEvzQaa4TrueIT_E4TrueITL0__E( - // CLANG16: call {{.*}}@_ZN5test21gIvEEvz( + // CLANG17: call {{.*}}@_ZN5test21gIvEEvz( g(ai); // CHECK: call {{.*}}@_ZN5test21hIvEEvzQ4TrueITL0__E( - // CLANG16: call {{.*}}@_ZN5test21hIvEEvz( + // CLANG17: call {{.*}}@_ZN5test21hIvEEvz( h(ai); // CHECK: call {{.*}}@_ZN5test2F1iIvQaa4TrueIT_E4TrueITL0__EEEvz( - // CLANG16: call {{.*}}@_ZN5test21iIvEEvz( + // CLANG17: call {{.*}}@_ZN5test21iIvEEvz( i(ai); // CHECK: call {{.*}}@_ZN5test21jIvQ4TrueITL0__EEEvz( - // CLANG16: call {{.*}}@_ZN5test21jIvEEvz( + // CLANG17: call {{.*}}@_ZN5test21jIvEEvz( j(ai); // CHECK: call {{.*}}@_ZN5test2F1kITk4TruevQ4TrueIT_EEEvz( - // CLANG16: call {{.*}}@_ZN5test21kIvEEvz( + // CLANG17: call {{.*}}@_ZN5test21kIvEEvz( k(ai); // CHECK: call {{.*}}@_ZN5test21lITk4TruevEEvz( - // CLANG16: call {{.*}}@_ZN5test21lIvEEvz( + // CLANG17: call {{.*}}@_ZN5test21lIvEEvz( l(ai); } } @@ -84,38 +84,38 @@ namespace test3 { template void d() {} template void d<0>(); // CHECK: define {{.*}}@_ZN5test31dITnDaLi0EEEvv( - // CLANG16: define {{.*}}@_ZN5test31dILi0EEEvv( + // CLANG17: define {{.*}}@_ZN5test31dILi0EEEvv( template void e() {} template void e<0>(); // CHECK: define {{.*}}@_ZN5test31eITnDcLi0EEEvv( - // CLANG16: define {{.*}}@_ZN5test31eILi0EEEvv( + // CLANG17: define {{.*}}@_ZN5test31eILi0EEEvv( // Constrained auto. template void f() {} template void f<0>(); // CHECK: define {{.*}}@_ZN5test31fITnDk1CLi0EEEvv( - // CLANG16: define {{.*}}@_ZN5test31fILi0EEEvv( + // CLANG17: define {{.*}}@_ZN5test31fILi0EEEvv( template auto> void g() {} template void g<0>(); // CHECK: define {{.*}}@_ZN5test31gITnDk1DIiELi0EEEvv( - // CLANG16: define {{.*}}@_ZN5test31gILi0EEEvv( + // CLANG17: define {{.*}}@_ZN5test31gILi0EEEvv( template auto> void h() {} template void h(); // CHECK: define {{.*}}@_ZN5test31hIiTnDk1DIT_ELi0EEEvv( - // CLANG16: define {{.*}}@_ZN5test31hIiLi0EEEvv( + // CLANG17: define {{.*}}@_ZN5test31hIiLi0EEEvv( template void i(decltype(new C auto(T()))) {} template void i(int*); // CHECK: define {{.*}}@_ZN5test31iIiEEvDTnw_Dk1CpicvT__EEE( - // CLANG16: define {{.*}}@_ZN5test31iIiEEvDTnw_DapicvT__EEE( + // CLANG17: define {{.*}}@_ZN5test31iIiEEvDTnw_DapicvT__EEE( template void j(decltype(new C decltype(auto)(T()))) {} template void j(int*); // CHECK: define {{.*}}@_ZN5test31jIiEEvDTnw_DK1CpicvT__EEE( - // CLANG16: define {{.*}}@_ZN5test31jIiEEvDTnw_DcpicvT__EEE( + // CLANG17: define {{.*}}@_ZN5test31jIiEEvDTnw_DcpicvT__EEE( } namespace test4 { @@ -123,12 +123,12 @@ namespace test4 { template void f() {} template void f(); // CHECK: define {{.*}}@_ZN5test41fITk1CiEEvv( - // CLANG16: define {{.*}}@_ZN5test41fIiEEvv( + // CLANG17: define {{.*}}@_ZN5test41fIiEEvv( template> void g() {} template void g(); // CHECK: define {{.*}}@_ZN5test41gITk1DIiEiEEvv( - // CLANG16: define {{.*}}@_ZN5test41gIiEEvv( + // CLANG17: define {{.*}}@_ZN5test41gIiEEvv( } namespace test5 { @@ -175,18 +175,18 @@ namespace test5 { template typename> void p() {} // CHECK: define {{.*}}@_ZN5test51pINS_1AEEEvv( - // CLANG16: define {{.*}}@_ZN5test51pINS_1AEEEvv( + // CLANG17: define {{.*}}@_ZN5test51pINS_1AEEEvv( template void p(); // CHECK: define {{.*}}@_ZN5test51pITtTpTyENS_1BEEEvv( - // CLANG16: define {{.*}}@_ZN5test51pINS_1BEEEvv( + // CLANG17: define {{.*}}@_ZN5test51pINS_1BEEEvv( template void p(); template typename> void q() {} // CHECK: define {{.*}}@_ZN5test51qITtTyTyENS_1AEEEvv( - // CLANG16: define {{.*}}@_ZN5test51qINS_1AEEEvv( + // CLANG17: define {{.*}}@_ZN5test51qINS_1AEEEvv( template void q(); // CHECK: define {{.*}}@_ZN5test51qINS_1BEEEvv( - // CLANG16: define {{.*}}@_ZN5test51qINS_1BEEEvv( + // CLANG17: define {{.*}}@_ZN5test51qINS_1BEEEvv( template void q(); } @@ -194,13 +194,13 @@ namespace test6 { // Abbreviated function templates. void f(C auto) {} // CHECK: define {{.*}}@_ZN5test61fITk1CiEEvT_( - // CLANG16: define {{.*}}@_ZN5test61fIiEEvT_( + // CLANG17: define {{.*}}@_ZN5test61fIiEEvT_( template void f(int); template void g(D auto) {} // CHECK: define {{.*}}@_ZN5test61gIiTk1DIT_EiEEvT0_( - // CLANG16: define {{.*}}@_ZN5test61gIiiEEvT0_( + // CLANG17: define {{.*}}@_ZN5test61gIiiEEvT0_( template void g(int); } diff --git a/clang/test/CodeGenCXX/pr59765-modules-global-ctor-dtor.cppm b/clang/test/CodeGenCXX/pr59765-modules-global-ctor-dtor.cppm index 9956348f87ff4..ad5a3e14a81db 100644 --- a/clang/test/CodeGenCXX/pr59765-modules-global-ctor-dtor.cppm +++ b/clang/test/CodeGenCXX/pr59765-modules-global-ctor-dtor.cppm @@ -1,9 +1,10 @@ // https://github.com/llvm/llvm-project/issues/59765 // FIXME: Since the signature of the constructors/destructors is // different in different targets. The current CHECK can't work -// well when targeting or running on AIX and z/OS. +// well when targeting AIX and z/OS. // It would be better to add the corresponding test for other test. -// UNSUPPORTED: system-zos, system-aix +// UNSUPPORTED: system-aix +// UNSUPPORTED: target={{.*}}-zos{{.*}} // // RUN: rm -rf %t // RUN: mkdir %t diff --git a/clang/test/CodeGenOpenCL/atomic-builtins-default-to-device-scope.cl b/clang/test/CodeGenOpenCL/atomic-builtins-default-to-device-scope.cl new file mode 100644 index 0000000000000..5af2d807b4189 --- /dev/null +++ b/clang/test/CodeGenOpenCL/atomic-builtins-default-to-device-scope.cl @@ -0,0 +1,235 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-llvm -O3 -o - -triple=amdgcn-amd-amdhsa \ +// RUN: | FileCheck %s --check-prefix=AMDGCN +// RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-llvm -O3 -o - -triple=spirv64-unknown-unknown \ +// RUN: | FileCheck %s --check-prefix=SPIRV + +// AMDGCN-LABEL: define dso_local i32 @load( +// AMDGCN-SAME: ptr nocapture noundef readonly [[P:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[TMP0:%.*]] = load atomic i32, ptr [[P]] syncscope("agent") seq_cst, align 4 +// AMDGCN-NEXT: ret i32 [[TMP0]] +// +// SPIRV-LABEL: define spir_func i32 @load( +// SPIRV-SAME: ptr addrspace(4) nocapture noundef readonly [[P:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// SPIRV-NEXT: [[ENTRY:.*:]] +// SPIRV-NEXT: [[TMP0:%.*]] = load atomic i32, ptr addrspace(4) [[P]] syncscope("device") seq_cst, align 4 +// SPIRV-NEXT: ret i32 [[TMP0]] +// +int load(int *p) { return __atomic_load_n(p, __ATOMIC_SEQ_CST); } +// AMDGCN-LABEL: define dso_local void @store( +// AMDGCN-SAME: ptr nocapture noundef writeonly [[P:%.*]], i32 noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: store atomic i32 [[X]], ptr [[P]] syncscope("agent") seq_cst, align 4 +// AMDGCN-NEXT: ret void +// +// SPIRV-LABEL: define spir_func void @store( +// SPIRV-SAME: ptr addrspace(4) nocapture noundef writeonly [[P:%.*]], i32 noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// SPIRV-NEXT: [[ENTRY:.*:]] +// SPIRV-NEXT: store atomic i32 [[X]], ptr addrspace(4) [[P]] syncscope("device") seq_cst, align 4 +// SPIRV-NEXT: ret void +// +void store(int *p, int x) { return __atomic_store_n(p, x, __ATOMIC_SEQ_CST); } +// AMDGCN-LABEL: define dso_local i32 @add( +// AMDGCN-SAME: ptr nocapture noundef [[P:%.*]], i32 noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[TMP0:%.*]] = atomicrmw add ptr [[P]], i32 [[X]] syncscope("agent") seq_cst, align 4 +// AMDGCN-NEXT: ret i32 [[TMP0]] +// +// SPIRV-LABEL: define spir_func i32 @add( +// SPIRV-SAME: ptr addrspace(4) nocapture noundef [[P:%.*]], i32 noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// SPIRV-NEXT: [[ENTRY:.*:]] +// SPIRV-NEXT: [[TMP0:%.*]] = atomicrmw add ptr addrspace(4) [[P]], i32 [[X]] syncscope("device") seq_cst, align 4 +// SPIRV-NEXT: ret i32 [[TMP0]] +// +int add(int *p, int x) { return __atomic_fetch_add(p, x, __ATOMIC_SEQ_CST); } +// AMDGCN-LABEL: define dso_local float @fadd( +// AMDGCN-SAME: ptr nocapture noundef [[P:%.*]], float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr [[P]], float [[X]] syncscope("agent") seq_cst, align 4 +// AMDGCN-NEXT: ret float [[TMP0]] +// +// SPIRV-LABEL: define spir_func float @fadd( +// SPIRV-SAME: ptr addrspace(4) nocapture noundef [[P:%.*]], float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// SPIRV-NEXT: [[ENTRY:.*:]] +// SPIRV-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspace(4) [[P]], float [[X]] syncscope("device") seq_cst, align 4 +// SPIRV-NEXT: ret float [[TMP0]] +// +float fadd(float *p, float x) { return __atomic_fetch_add(p, x, __ATOMIC_SEQ_CST); } +// AMDGCN-LABEL: define dso_local i32 @sub( +// AMDGCN-SAME: ptr nocapture noundef [[P:%.*]], i32 noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[TMP0:%.*]] = atomicrmw sub ptr [[P]], i32 [[X]] syncscope("agent") seq_cst, align 4 +// AMDGCN-NEXT: ret i32 [[TMP0]] +// +// SPIRV-LABEL: define spir_func i32 @sub( +// SPIRV-SAME: ptr addrspace(4) nocapture noundef [[P:%.*]], i32 noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// SPIRV-NEXT: [[ENTRY:.*:]] +// SPIRV-NEXT: [[TMP0:%.*]] = atomicrmw sub ptr addrspace(4) [[P]], i32 [[X]] syncscope("device") seq_cst, align 4 +// SPIRV-NEXT: ret i32 [[TMP0]] +// +int sub(int *p, int x) { return __atomic_fetch_sub(p, x, __ATOMIC_SEQ_CST); } +// AMDGCN-LABEL: define dso_local float @fsub( +// AMDGCN-SAME: ptr nocapture noundef [[P:%.*]], float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr [[P]], float [[X]] syncscope("agent") seq_cst, align 4 +// AMDGCN-NEXT: ret float [[TMP0]] +// +// SPIRV-LABEL: define spir_func float @fsub( +// SPIRV-SAME: ptr addrspace(4) nocapture noundef [[P:%.*]], float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// SPIRV-NEXT: [[ENTRY:.*:]] +// SPIRV-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr addrspace(4) [[P]], float [[X]] syncscope("device") seq_cst, align 4 +// SPIRV-NEXT: ret float [[TMP0]] +// +float fsub(float *p, float x) { return __atomic_fetch_sub(p, x, __ATOMIC_SEQ_CST); } +// AMDGCN-LABEL: define dso_local i32 @and( +// AMDGCN-SAME: ptr nocapture noundef [[P:%.*]], i32 noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[TMP0:%.*]] = atomicrmw and ptr [[P]], i32 [[X]] syncscope("agent") seq_cst, align 4 +// AMDGCN-NEXT: ret i32 [[TMP0]] +// +// SPIRV-LABEL: define spir_func i32 @and( +// SPIRV-SAME: ptr addrspace(4) nocapture noundef [[P:%.*]], i32 noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// SPIRV-NEXT: [[ENTRY:.*:]] +// SPIRV-NEXT: [[TMP0:%.*]] = atomicrmw and ptr addrspace(4) [[P]], i32 [[X]] syncscope("device") seq_cst, align 4 +// SPIRV-NEXT: ret i32 [[TMP0]] +// +int and(int *p, int x) { return __atomic_fetch_and(p, x, __ATOMIC_SEQ_CST); } +// AMDGCN-LABEL: define dso_local i32 @nand( +// AMDGCN-SAME: ptr nocapture noundef [[P:%.*]], i32 noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[TMP0:%.*]] = atomicrmw nand ptr [[P]], i32 [[X]] syncscope("agent") seq_cst, align 4 +// AMDGCN-NEXT: ret i32 [[TMP0]] +// +// SPIRV-LABEL: define spir_func i32 @nand( +// SPIRV-SAME: ptr addrspace(4) nocapture noundef [[P:%.*]], i32 noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// SPIRV-NEXT: [[ENTRY:.*:]] +// SPIRV-NEXT: [[TMP0:%.*]] = atomicrmw nand ptr addrspace(4) [[P]], i32 [[X]] syncscope("device") seq_cst, align 4 +// SPIRV-NEXT: ret i32 [[TMP0]] +// +int nand(int *p, int x) { return __atomic_fetch_nand(p, x, __ATOMIC_SEQ_CST); } +// AMDGCN-LABEL: define dso_local i32 @or( +// AMDGCN-SAME: ptr nocapture noundef [[P:%.*]], i32 noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[TMP0:%.*]] = atomicrmw or ptr [[P]], i32 [[X]] syncscope("agent") seq_cst, align 4 +// AMDGCN-NEXT: ret i32 [[TMP0]] +// +// SPIRV-LABEL: define spir_func i32 @or( +// SPIRV-SAME: ptr addrspace(4) nocapture noundef [[P:%.*]], i32 noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// SPIRV-NEXT: [[ENTRY:.*:]] +// SPIRV-NEXT: [[TMP0:%.*]] = atomicrmw or ptr addrspace(4) [[P]], i32 [[X]] syncscope("device") seq_cst, align 4 +// SPIRV-NEXT: ret i32 [[TMP0]] +// +int or(int *p, int x) { return __atomic_fetch_or(p, x, __ATOMIC_SEQ_CST); } +// AMDGCN-LABEL: define dso_local i32 @xor( +// AMDGCN-SAME: ptr nocapture noundef [[P:%.*]], i32 noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[TMP0:%.*]] = atomicrmw xor ptr [[P]], i32 [[X]] syncscope("agent") seq_cst, align 4 +// AMDGCN-NEXT: ret i32 [[TMP0]] +// +// SPIRV-LABEL: define spir_func i32 @xor( +// SPIRV-SAME: ptr addrspace(4) nocapture noundef [[P:%.*]], i32 noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// SPIRV-NEXT: [[ENTRY:.*:]] +// SPIRV-NEXT: [[TMP0:%.*]] = atomicrmw xor ptr addrspace(4) [[P]], i32 [[X]] syncscope("device") seq_cst, align 4 +// SPIRV-NEXT: ret i32 [[TMP0]] +// +int xor(int *p, int x) { return __atomic_fetch_xor(p, x, __ATOMIC_SEQ_CST); } +// AMDGCN-LABEL: define dso_local i32 @min( +// AMDGCN-SAME: ptr nocapture noundef [[P:%.*]], i32 noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[TMP0:%.*]] = atomicrmw min ptr [[P]], i32 [[X]] syncscope("agent") seq_cst, align 4 +// AMDGCN-NEXT: ret i32 [[TMP0]] +// +// SPIRV-LABEL: define spir_func i32 @min( +// SPIRV-SAME: ptr addrspace(4) nocapture noundef [[P:%.*]], i32 noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// SPIRV-NEXT: [[ENTRY:.*:]] +// SPIRV-NEXT: [[TMP0:%.*]] = atomicrmw min ptr addrspace(4) [[P]], i32 [[X]] syncscope("device") seq_cst, align 4 +// SPIRV-NEXT: ret i32 [[TMP0]] +// +int min(int *p, int x) { return __atomic_fetch_min(p, x, __ATOMIC_SEQ_CST); } +// AMDGCN-LABEL: define dso_local float @fmin( +// AMDGCN-SAME: ptr nocapture noundef [[P:%.*]], float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[TMP0:%.*]] = atomicrmw fmin ptr [[P]], float [[X]] syncscope("agent") seq_cst, align 4 +// AMDGCN-NEXT: ret float [[TMP0]] +// +// SPIRV-LABEL: define spir_func float @fmin( +// SPIRV-SAME: ptr addrspace(4) nocapture noundef [[P:%.*]], float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// SPIRV-NEXT: [[ENTRY:.*:]] +// SPIRV-NEXT: [[TMP0:%.*]] = atomicrmw fmin ptr addrspace(4) [[P]], float [[X]] syncscope("device") seq_cst, align 4 +// SPIRV-NEXT: ret float [[TMP0]] +// +float fmin(float *p, float x) { return __atomic_fetch_min(p, x, __ATOMIC_SEQ_CST); } +// AMDGCN-LABEL: define dso_local i32 @max( +// AMDGCN-SAME: ptr nocapture noundef [[P:%.*]], i32 noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[TMP0:%.*]] = atomicrmw max ptr [[P]], i32 [[X]] syncscope("agent") seq_cst, align 4 +// AMDGCN-NEXT: ret i32 [[TMP0]] +// +// SPIRV-LABEL: define spir_func i32 @max( +// SPIRV-SAME: ptr addrspace(4) nocapture noundef [[P:%.*]], i32 noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// SPIRV-NEXT: [[ENTRY:.*:]] +// SPIRV-NEXT: [[TMP0:%.*]] = atomicrmw max ptr addrspace(4) [[P]], i32 [[X]] syncscope("device") seq_cst, align 4 +// SPIRV-NEXT: ret i32 [[TMP0]] +// +int max(int *p, int x) { return __atomic_fetch_max(p, x, __ATOMIC_SEQ_CST); } +// AMDGCN-LABEL: define dso_local float @fmax( +// AMDGCN-SAME: ptr nocapture noundef [[P:%.*]], float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[TMP0:%.*]] = atomicrmw fmax ptr [[P]], float [[X]] syncscope("agent") seq_cst, align 4 +// AMDGCN-NEXT: ret float [[TMP0]] +// +// SPIRV-LABEL: define spir_func float @fmax( +// SPIRV-SAME: ptr addrspace(4) nocapture noundef [[P:%.*]], float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// SPIRV-NEXT: [[ENTRY:.*:]] +// SPIRV-NEXT: [[TMP0:%.*]] = atomicrmw fmax ptr addrspace(4) [[P]], float [[X]] syncscope("device") seq_cst, align 4 +// SPIRV-NEXT: ret float [[TMP0]] +// +float fmax(float *p, float x) { return __atomic_fetch_max(p, x, __ATOMIC_SEQ_CST); } +// AMDGCN-LABEL: define dso_local i32 @xchg( +// AMDGCN-SAME: ptr nocapture noundef [[P:%.*]], i32 noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[TMP0:%.*]] = atomicrmw xchg ptr [[P]], i32 [[X]] syncscope("agent") seq_cst, align 4 +// AMDGCN-NEXT: ret i32 [[TMP0]] +// +// SPIRV-LABEL: define spir_func i32 @xchg( +// SPIRV-SAME: ptr addrspace(4) nocapture noundef [[P:%.*]], i32 noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// SPIRV-NEXT: [[ENTRY:.*:]] +// SPIRV-NEXT: [[TMP0:%.*]] = atomicrmw xchg ptr addrspace(4) [[P]], i32 [[X]] syncscope("device") seq_cst, align 4 +// SPIRV-NEXT: ret i32 [[TMP0]] +// +int xchg(int *p, int x) { return __atomic_exchange_n(p, x, __ATOMIC_SEQ_CST); } +// AMDGCN-LABEL: define dso_local range(i32 0, 2) i32 @cmpxchg( +// AMDGCN-SAME: ptr nocapture noundef [[P:%.*]], i32 noundef [[X:%.*]], i32 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[TMP0:%.*]] = cmpxchg ptr [[P]], i32 [[X]], i32 [[Y]] syncscope("agent") seq_cst seq_cst, align 4 +// AMDGCN-NEXT: [[TMP1:%.*]] = extractvalue { i32, i1 } [[TMP0]], 1 +// AMDGCN-NEXT: [[CONV:%.*]] = zext i1 [[TMP1]] to i32 +// AMDGCN-NEXT: ret i32 [[CONV]] +// +// SPIRV-LABEL: define spir_func range(i32 0, 2) i32 @cmpxchg( +// SPIRV-SAME: ptr addrspace(4) nocapture noundef [[P:%.*]], i32 noundef [[X:%.*]], i32 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] { +// SPIRV-NEXT: [[ENTRY:.*:]] +// SPIRV-NEXT: [[TMP0:%.*]] = cmpxchg ptr addrspace(4) [[P]], i32 [[X]], i32 [[Y]] syncscope("device") seq_cst seq_cst, align 4 +// SPIRV-NEXT: [[TMP1:%.*]] = extractvalue { i32, i1 } [[TMP0]], 1 +// SPIRV-NEXT: [[CONV:%.*]] = zext i1 [[TMP1]] to i32 +// SPIRV-NEXT: ret i32 [[CONV]] +// +int cmpxchg(int *p, int x, int y) { return __atomic_compare_exchange(p, &x, &y, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); } +// AMDGCN-LABEL: define dso_local range(i32 0, 2) i32 @cmpxchg_weak( +// AMDGCN-SAME: ptr nocapture noundef [[P:%.*]], i32 noundef [[X:%.*]], i32 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[TMP0:%.*]] = cmpxchg weak ptr [[P]], i32 [[X]], i32 [[Y]] syncscope("agent") seq_cst seq_cst, align 4 +// AMDGCN-NEXT: [[TMP1:%.*]] = extractvalue { i32, i1 } [[TMP0]], 1 +// AMDGCN-NEXT: [[CONV:%.*]] = zext i1 [[TMP1]] to i32 +// AMDGCN-NEXT: ret i32 [[CONV]] +// +// SPIRV-LABEL: define spir_func range(i32 0, 2) i32 @cmpxchg_weak( +// SPIRV-SAME: ptr addrspace(4) nocapture noundef [[P:%.*]], i32 noundef [[X:%.*]], i32 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] { +// SPIRV-NEXT: [[ENTRY:.*:]] +// SPIRV-NEXT: [[TMP0:%.*]] = cmpxchg weak ptr addrspace(4) [[P]], i32 [[X]], i32 [[Y]] syncscope("device") seq_cst seq_cst, align 4 +// SPIRV-NEXT: [[TMP1:%.*]] = extractvalue { i32, i1 } [[TMP0]], 1 +// SPIRV-NEXT: [[CONV:%.*]] = zext i1 [[TMP1]] to i32 +// SPIRV-NEXT: ret i32 [[CONV]] +// +int cmpxchg_weak(int *p, int x, int y) { return __atomic_compare_exchange(p, &x, &y, 1, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); } diff --git a/clang/test/Driver/arm-execute-only.c b/clang/test/Driver/arm-execute-only.c index a9bf1656fd27e..d654ec364a87f 100644 --- a/clang/test/Driver/arm-execute-only.c +++ b/clang/test/Driver/arm-execute-only.c @@ -19,6 +19,9 @@ // RUN: not %clang -### --target=arm-arm-none-eabi -march=armv8-m.main -mpure-code -mno-movt %s 2>&1 \ // RUN: | FileCheck %s -check-prefix CHECK-PURE-CODE-NO-MOVT +// RUN: echo "-DABC" > %t.cfg +// RUN: not %clang -### --target=arm-arm-none-eabi -march=armv8-m.main -mpure-code -mno-movt --config %t.cfg %s 2>&1 \ +// RUN: | FileCheck %s -check-prefix CHECK-PURE-CODE-NO-MOVT // CHECK-PURE-CODE-NO-MOVT: error: option '-mpure-code' cannot be specified with '-mno-movt' // RUN: not %clang -### --target=arm-arm-none-eabi -march=armv6-m -mexecute-only -fropi %s 2>&1 \ diff --git a/clang/test/Driver/hipstdpar.c b/clang/test/Driver/hipstdpar.c index 2f48bf6b5cf1e..32e040ef70d75 100644 --- a/clang/test/Driver/hipstdpar.c +++ b/clang/test/Driver/hipstdpar.c @@ -1,6 +1,7 @@ // REQUIRES: x86-registered-target // REQUIRES: amdgpu-registered-target // REQUIRES: system-linux +// UNSUPPORTED: target={{.*}}-zos{{.*}} // XFAIL: target={{.*}}hexagon{{.*}} // XFAIL: target={{.*}}-scei{{.*}} // XFAIL: target={{.*}}-sie{{.*}} diff --git a/clang/test/Driver/lld-repro.c b/clang/test/Driver/lld-repro.c index 61904c0e6df30..0e6340865b738 100644 --- a/clang/test/Driver/lld-repro.c +++ b/clang/test/Driver/lld-repro.c @@ -1,5 +1,5 @@ // REQUIRES: lld -// UNSUPPORTED: target={{.*-(ps4|ps5)}} +// UNSUPPORTED: target={{.*-(ps4|ps5)}}, target={{.*}}-zos{{.*}} // RUN: echo "-nostartfiles -nostdlib -fuse-ld=lld -gen-reproducer=error -fcrash-diagnostics-dir=%t" \ // RUN: | sed -e 's/\\/\\\\/g' > %t.rsp diff --git a/clang/test/Driver/print-supported-extensions-riscv.c b/clang/test/Driver/print-supported-extensions-riscv.c index 312c462f715d5..a39c1ab36b1db 100644 --- a/clang/test/Driver/print-supported-extensions-riscv.c +++ b/clang/test/Driver/print-supported-extensions-riscv.c @@ -35,6 +35,7 @@ // CHECK-NEXT: za64rs 1.0 'Za64rs' (Reservation Set Size of at Most 64 Bytes) // CHECK-NEXT: zaamo 1.0 'Zaamo' (Atomic Memory Operations) // CHECK-NEXT: zabha 1.0 'Zabha' (Byte and Halfword Atomic Memory Operations) +// CHECK-NEXT: zacas 1.0 'Zacas' (Atomic Compare-And-Swap Instructions) // CHECK-NEXT: zalrsc 1.0 'Zalrsc' (Load-Reserved/Store-Conditional) // CHECK-NEXT: zama16b 1.0 'Zama16b' (Atomic 16-byte misaligned loads, stores and AMOs) // CHECK-NEXT: zawrs 1.0 'Zawrs' (Wait on Reservation Set) @@ -171,7 +172,6 @@ // CHECK-NEXT: Experimental extensions // CHECK-NEXT: zicfilp 1.0 'Zicfilp' (Landing pad) // CHECK-NEXT: zicfiss 1.0 'Zicfiss' (Shadow stack) -// CHECK-NEXT: zacas 1.0 'Zacas' (Atomic Compare-And-Swap Instructions) // CHECK-NEXT: zalasr 0.1 'Zalasr' (Load-Acquire and Store-Release Instructions) // CHECK-NEXT: zvbc32e 0.7 'Zvbc32e' (Vector Carryless Multiplication with 32-bits elements) // CHECK-NEXT: zvkgs 0.7 'Zvkgs' (Vector-Scalar GCM instructions for Cryptography) diff --git a/clang/test/Driver/ps5-linker.c b/clang/test/Driver/ps5-linker.c index c0cf0b864028c..4ae65963e361a 100644 --- a/clang/test/Driver/ps5-linker.c +++ b/clang/test/Driver/ps5-linker.c @@ -46,3 +46,27 @@ // CHECK-SYSROOT: {{ld(\.exe)?}}" // CHECK-SYSROOT-SAME: "--sysroot=mysdk" + +// Test that "." is always added to library search paths. This is long-standing +// behavior, unique to PlayStation toolchains. + +// RUN: %clang --target=x64_64-sie-ps5 %s -### 2>&1 | FileCheck --check-prefixes=CHECK-LDOT %s + +// CHECK-LDOT: {{ld(\.exe)?}}" +// CHECK-LDOT-SAME: "-L." + +// Test that /target/lib is added to library search paths, if it +// exists and no --sysroot is specified. + +// RUN: rm -rf %t.dir && mkdir %t.dir +// RUN: env SCE_PROSPERO_SDK_DIR=%t.dir %clang --target=x64_64-sie-ps5 %s -### 2>&1 | FileCheck --check-prefixes=CHECK-NO-TARGETLIB %s +// RUN: env SCE_PROSPERO_SDK_DIR=%t.dir %clang --target=x64_64-sie-ps5 %s -### --sysroot=%t.dir 2>&1 | FileCheck --check-prefixes=CHECK-NO-TARGETLIB %s + +// CHECK-NO-TARGETLIB: {{ld(\.exe)?}}" +// CHECK-NO-TARGETLIB-NOT: "-L{{.*[/\\]}}target/lib" + +// RUN: mkdir -p %t.dir/target/lib +// RUN: env SCE_PROSPERO_SDK_DIR=%t.dir %clang --target=x64_64-sie-ps5 %s -### 2>&1 | FileCheck --check-prefixes=CHECK-TARGETLIB %s + +// CHECK-TARGETLIB: {{ld(\.exe)?}}" +// CHECK-TARGETLIB-SAME: "-L{{.*[/\\]}}target/lib" diff --git a/clang/test/Headers/crash-instantiated-in-scope-cxx-modules.cpp b/clang/test/Headers/crash-instantiated-in-scope-cxx-modules.cpp new file mode 100644 index 0000000000000..80844a58ad825 --- /dev/null +++ b/clang/test/Headers/crash-instantiated-in-scope-cxx-modules.cpp @@ -0,0 +1,76 @@ +// RUN: rm -fR %t +// RUN: split-file %s %t +// RUN: cd %t +// RUN: %clang_cc1 -std=c++20 -emit-header-unit -xc++-user-header -Werror=uninitialized folly-conv.h +// RUN: %clang_cc1 -std=c++20 -emit-header-unit -xc++-user-header -Werror=uninitialized thrift_cpp2_base.h +// RUN: %clang_cc1 -std=c++20 -emit-header-unit -xc++-user-header -Werror=uninitialized -fmodule-file=folly-conv.pcm -fmodule-file=thrift_cpp2_base.pcm logger_base.h + +//--- Conv.h +#pragma once + +template +_Up __declval(int); + +template +auto declval() noexcept -> decltype(__declval<_Tp>(0)); + +namespace folly { + +template +struct Expected { + template + auto thenOrThrow() -> decltype(declval()) { + return 1; + } +}; + +struct ExpectedHelper { + template + static constexpr Expected return_(T) { + return Expected(); + } + + template + static auto then_(This&&, Fn&&) + -> decltype(T::template return_((declval()(true), 0))) { + return Expected(); + } +}; + +template +inline Expected tryTo() { + Tgt result = 0; + // In build with asserts: + // clang/lib/Sema/SemaTemplateInstantiate.cpp: llvm::PointerUnion *clang::LocalInstantiationScope::findInstantiationOf(const Decl *): Assertion `isa(D) && "declaration not instantiated in this scope"' failed. + // In release build compilation error on the line below inside lambda: + // error: variable 'result' is uninitialized when used here [-Werror,-Wuninitialized] + ExpectedHelper::then_(Expected(), [&](bool) { return result; }); + return {}; +} + +} // namespace folly + +inline void bar() { + folly::tryTo(); +} +// expected-no-diagnostics + +//--- folly-conv.h +#pragma once +#include "Conv.h" +// expected-no-diagnostics + +//--- thrift_cpp2_base.h +#pragma once +#include "Conv.h" +// expected-no-diagnostics + +//--- logger_base.h +#pragma once +import "folly-conv.h"; +import "thrift_cpp2_base.h"; + +inline void foo() { + folly::tryTo(); +} +// expected-no-diagnostics diff --git a/clang/test/Headers/crash-instantiated-in-scope-cxx-modules2.cpp b/clang/test/Headers/crash-instantiated-in-scope-cxx-modules2.cpp new file mode 100644 index 0000000000000..5b1a904e928a6 --- /dev/null +++ b/clang/test/Headers/crash-instantiated-in-scope-cxx-modules2.cpp @@ -0,0 +1,30 @@ +// RUN: rm -fR %t +// RUN: split-file %s %t +// RUN: cd %t +// RUN: %clang_cc1 -std=c++20 -emit-header-unit -xc++-user-header header.h +// RUN: %clang_cc1 -std=c++20 -fmodule-file=header.pcm main.cpp + +//--- header.h +template +void f(T) {} + +class A { + virtual ~A(); +}; + +inline A::~A() { + f([](){}); +} + +struct B { + void g() { + f([](){ + [](){}; + }); + } +}; +// expected-no-diagnostics + +//--- main.cpp +import "header.h"; +// expected-no-diagnostics diff --git a/clang/test/Headers/crash-instantiated-in-scope-cxx-modules3.cpp b/clang/test/Headers/crash-instantiated-in-scope-cxx-modules3.cpp new file mode 100644 index 0000000000000..646ff9f745710 --- /dev/null +++ b/clang/test/Headers/crash-instantiated-in-scope-cxx-modules3.cpp @@ -0,0 +1,26 @@ +// RUN: %clang_cc1 %s -std=c++11 -emit-pch -o %t +// RUN: %clang_cc1 %s -std=c++11 -include-pch %t -fsyntax-only -verify + +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +// No crash or assertion failure on multiple nested lambdas deserialization. +template +void b() { + [] { + []{ + []{ + []{ + []{ + }(); + }(); + }(); + }(); + }(); +} + +void foo() { + b(); +} +#endif diff --git a/clang/test/Misc/cc1as-output-asm-variant.c b/clang/test/Misc/cc1as-output-asm-variant.c new file mode 100644 index 0000000000000..c287c62fc95e4 --- /dev/null +++ b/clang/test/Misc/cc1as-output-asm-variant.c @@ -0,0 +1,8 @@ +// REQUIRES: x86-registered-target +// RUN: %clang -cc1as -triple x86_64 %s -o - | FileCheck %s --check-prefix=ATT +// RUN: %clang -cc1as -triple x86_64 %s --output-asm-variant=1 -o - | FileCheck %s --check-prefix=INTEL + +// ATT: movl $1, %eax +// INTEL: mov eax, 1 + +mov $1, %eax diff --git a/clang/test/Modules/GH109879-1.cpp b/clang/test/Modules/GH109879-1.cpp new file mode 100644 index 0000000000000..72cfb11081e48 --- /dev/null +++ b/clang/test/Modules/GH109879-1.cpp @@ -0,0 +1,25 @@ +// RUN: rm -rf %t +// RUN: mkdir -p %t +// RUN: split-file %s %t +// +// RUN: %clang_cc1 -std=c++20 %t/A.cppm -emit-module-interface -o %t/A.pcm +// RUN: %clang_cc1 -std=c++20 %t/B.cppm -fprebuilt-module-path=%t -emit-module-interface -o %t/B.pcm +// RUN: %clang_cc1 -fsyntax-only -std=c++20 -fprebuilt-module-path=%t -verify %t/C.cpp + +//--- A.cppm +export module A; +export extern "C" void foo(struct Bar); + +//--- B.cppm +module; +import A; +export module B; + +//--- C.cpp +import B; +struct Bar {}; +void test() { + foo(Bar()); + // expected-error@-1 {{declaration of 'foo' must be imported}} + // expected-note@A.cppm:2 {{declaration here is not visible}} +} diff --git a/clang/test/Modules/GH109879-2.cpp b/clang/test/Modules/GH109879-2.cpp new file mode 100644 index 0000000000000..ccec57839898a --- /dev/null +++ b/clang/test/Modules/GH109879-2.cpp @@ -0,0 +1,29 @@ +// RUN: rm -rf %t +// RUN: mkdir -p %t +// RUN: split-file %s %t +// +// RUN: %clang_cc1 -std=c++20 %t/A.cppm -emit-module-interface -o %t/A.pcm +// RUN: %clang_cc1 -std=c++20 %t/B.cppm -fprebuilt-module-path=%t -emit-module-interface -o %t/B.pcm +// RUN: %clang_cc1 -fsyntax-only -std=c++20 -fprebuilt-module-path=%t -verify %t/C.cpp + +//--- foo.h +struct Bar {}; +extern "C" void foo(struct Bar); + +//--- A.cppm +module; +#include "foo.h" +export module A; +export extern "C" using ::foo; +//--- B.cppm +module; +import A; +export module B; + +//--- C.cpp +// expected-no-diagnostics +import B; +#include "foo.h" +void test() { + foo(Bar()); +} diff --git a/clang/test/OpenMP/lit.local.cfg b/clang/test/OpenMP/lit.local.cfg index 58ee923cb7ec5..93adc6734d1a2 100644 --- a/clang/test/OpenMP/lit.local.cfg +++ b/clang/test/OpenMP/lit.local.cfg @@ -1,5 +1,9 @@ # -*- Python -*- vim: set ft=python ts=4 sw=4 expandtab tw=79: +import re from lit.llvm.subst import ToolSubst fc = ToolSubst("FileCheck", unresolved="fatal") config.substitutions.insert(0, (fc.regex, "FileCheck --allow-unused-prefixes")) + +if re.match(r".*-zos", config.target_triple): + config.unsupported = True diff --git a/clang/test/Preprocessor/riscv-target-features.c b/clang/test/Preprocessor/riscv-target-features.c index 60675065495bb..05a8534ba13da 100644 --- a/clang/test/Preprocessor/riscv-target-features.c +++ b/clang/test/Preprocessor/riscv-target-features.c @@ -87,6 +87,7 @@ // CHECK-NOT: __riscv_za64rs {{.*$}} // CHECK-NOT: __riscv_zaamo {{.*$}} // CHECK-NOT: __riscv_zabha {{.*$}} +// CHECK-NOT: __riscv_zacas {{.*$}} // CHECK-NOT: __riscv_zalrsc {{.*$}} // CHECK-NOT: __riscv_zama16b {{.*$}} // CHECK-NOT: __riscv_zawrs {{.*$}} @@ -183,7 +184,6 @@ // CHECK-NOT: __riscv_ssnpm{{.*$}} // CHECK-NOT: __riscv_sspm{{.*$}} // CHECK-NOT: __riscv_supm{{.*$}} -// CHECK-NOT: __riscv_zacas {{.*$}} // CHECK-NOT: __riscv_zalasr {{.*$}} // CHECK-NOT: __riscv_zfbfmin {{.*$}} // CHECK-NOT: __riscv_zicfilp {{.*$}} @@ -751,6 +751,14 @@ // RUN: -o - | FileCheck --check-prefix=CHECK-ZABHA-EXT %s // CHECK-ZABHA-EXT: __riscv_zabha 1000000{{$}} +// RUN: %clang --target=riscv32 \ +// RUN: -march=rv32ia_zacas1p0 -E -dM %s \ +// RUN: -o - | FileCheck --check-prefix=CHECK-ZACAS-EXT %s +// RUN: %clang --target=riscv64 \ +// RUN: -march=rv64ia_zacas1p0 -E -dM %s \ +// RUN: -o - | FileCheck --check-prefix=CHECK-ZACAS-EXT %s +// CHECK-ZACAS-EXT: __riscv_zacas 1000000{{$}} + // RUN: %clang --target=riscv32 \ // RUN: -march=rv32i_zalrsc1p0 -E -dM %s \ // RUN: -o - | FileCheck --check-prefix=CHECK-ZALRSC-EXT %s @@ -1630,14 +1638,6 @@ // CHECK-ZVKT-EXT: __riscv_zvkt 1000000{{$}} // Experimental extensions -// RUN: %clang --target=riscv32 -menable-experimental-extensions \ -// RUN: -march=rv32ia_zacas1p0 -E -dM %s \ -// RUN: -o - | FileCheck --check-prefix=CHECK-ZACAS-EXT %s -// RUN: %clang --target=riscv64 -menable-experimental-extensions \ -// RUN: -march=rv64ia_zacas1p0 -E -dM %s \ -// RUN: -o - | FileCheck --check-prefix=CHECK-ZACAS-EXT %s -// CHECK-ZACAS-EXT: __riscv_zacas 1000000{{$}} - // RUN: %clang --target=riscv32 -menable-experimental-extensions \ // RUN: -march=rv32i_zalasr0p1 -E -dM %s \ // RUN: -o - | FileCheck --check-prefix=CHECK-ZALASR-EXT %s diff --git a/clang/test/Sema/fp16vec-sema.c b/clang/test/Sema/fp16vec-sema.c index 80936cd622f7c..89f01c6dcf47b 100644 --- a/clang/test/Sema/fp16vec-sema.c +++ b/clang/test/Sema/fp16vec-sema.c @@ -1,4 +1,5 @@ // RUN: %clang_cc1 -fsyntax-only -Wno-unused-value -verify %s +// RUN: %clang_cc1 -fexperimental-new-constant-interpreter -fsyntax-only -Wno-unused-value -verify %s typedef __fp16 half4 __attribute__ ((vector_size (8))); typedef float float4 __attribute__ ((vector_size (16))); diff --git a/clang/test/Sema/scoped-atomic-ops.c b/clang/test/Sema/scoped-atomic-ops.c index 59e638c646664..33044aa256cb0 100644 --- a/clang/test/Sema/scoped-atomic-ops.c +++ b/clang/test/Sema/scoped-atomic-ops.c @@ -1,5 +1,6 @@ // RUN: %clang_cc1 -x c -triple=amdgcn-amd-amdhsa -verify -fsyntax-only %s // RUN: %clang_cc1 -x c -triple=x86_64-pc-linux-gnu -verify -fsyntax-only %s +// RUN: %clang_cc1 -x c -triple=spirv64-unknown-unknown -verify -fsyntax-only %s int fi1a(int *i) { int v; diff --git a/clang/test/Sema/warn-lifetime-analysis-nocfg.cpp b/clang/test/Sema/warn-lifetime-analysis-nocfg.cpp index 69e5395a78a57..c6272a775a28f 100644 --- a/clang/test/Sema/warn-lifetime-analysis-nocfg.cpp +++ b/clang/test/Sema/warn-lifetime-analysis-nocfg.cpp @@ -158,17 +158,30 @@ auto begin(C &c) -> decltype(c.begin()); template T *begin(T (&array)[N]); +using size_t = decltype(sizeof(0)); + +template +struct initializer_list { + const T* ptr; size_t sz; +}; template struct vector { typedef __gnu_cxx::basic_iterator iterator; iterator begin(); iterator end(); const T *data() const; + vector(); + vector(initializer_list __l); + + template + vector(InputIterator first, InputIterator __last); + T &at(int n); }; template struct basic_string_view { + basic_string_view(); basic_string_view(const T *); const T *begin() const; }; @@ -203,11 +216,21 @@ template struct optional { optional(); optional(const T&); + + template + optional(U&& t); + + template + optional(optional&& __t); + T &operator*() &; T &&operator*() &&; T &value() &; T &&value() &&; }; +template +optional<__decay(T)> make_optional(T&&); + template struct stack { @@ -587,3 +610,170 @@ std::string_view test2() { return k.value(); // expected-warning {{address of stack memory associated}} } } // namespace GH108272 + +namespace GH100526 { +void test() { + std::vector v1({std::string()}); // expected-warning {{object backing the pointer will be destroyed at the end}} + std::vector v2({ + std::string(), // expected-warning {{object backing the pointer will be destroyed at the end}} + std::string_view() + }); + std::vector v3({ + std::string_view(), + std::string() // expected-warning {{object backing the pointer will be destroyed at the end}} + }); + + std::optional o1 = std::string(); // expected-warning {{object backing the pointer}} + + std::string s; + // This is a tricky use-after-free case, what it does: + // 1. make_optional creates a temporary "optional"" object + // 2. the temporary object owns the underlying string which is copied from s. + // 3. the t3 object holds the view to the underlying string of the temporary object. + std::optional o2 = std::make_optional(s); // expected-warning {{object backing the pointer}} + std::optional o3 = std::optional(s); // expected-warning {{object backing the pointer}} + std::optional o4 = std::optional(s); + + // FIXME: should work for assignment cases + v1 = {std::string()}; + o1 = std::string(); + + // no warning on copying pointers. + std::vector n1 = {std::string_view()}; + std::optional n2 = {std::string_view()}; + std::optional n3 = std::string_view(); + std::optional n4 = std::make_optional(std::string_view()); + const char* b = ""; + std::optional n5 = std::make_optional(b); + std::optional n6 = std::make_optional("test"); +} + +std::vector test2(int i) { + std::vector t; + if (i) + return t; // this is fine, no dangling + return std::vector(t.begin(), t.end()); +} + +class Foo { + public: + operator std::string_view() const { return ""; } +}; +class [[gsl::Owner]] FooOwner { + public: + operator std::string_view() const { return ""; } +}; +std::optional GetFoo(); +std::optional GetFooOwner(); + +template +struct [[gsl::Owner]] Container1 { + Container1(); +}; +template +struct [[gsl::Owner]] Container2 { + template + Container2(const Container1& C2); +}; + +std::optional test3(int i) { + std::string s; + std::string_view sv; + if (i) + return s; // expected-warning {{address of stack memory associated}} + return sv; // fine + Container2 c1 = Container1(); // no diagnostic as Foo is not an Owner. + Container2 c2 = Container1(); // expected-warning {{object backing the pointer will be destroyed}} + return GetFoo(); // fine, we don't know Foo is owner or not, be conservative. + return GetFooOwner(); // expected-warning {{returning address of local temporary object}} +} + +std::optional test4(int a) { + return std::make_optional(nullptr); // fine +} + + +template +struct [[gsl::Owner]] StatusOr { + const T &valueLB() const [[clang::lifetimebound]]; + const T &valueNoLB() const; +}; + +template +struct [[gsl::Pointer]] Span { + Span(const std::vector &V); + + const int& getFieldLB() const [[clang::lifetimebound]]; + const int& getFieldNoLB() const; +}; + + +/////// From Owner /////// + +// Pointer from Owner +std::string_view test5() { + std::string_view a = StatusOr().valueLB(); // expected-warning {{object backing the pointer will be dest}} +return StatusOr().valueLB(); // expected-warning {{returning address of local temporary}} + + // No dangling diagnostics on non-lifetimebound methods. + std::string_view b = StatusOr().valueNoLB(); + return StatusOr().valueNoLB(); +} + +// Pointer from Owner +// Prevent regression GH108463 +Span test6(std::vector v) { + Span dangling = std::vector(); // expected-warning {{object backing the pointer}} + return v; // expected-warning {{address of stack memory}} +} + +/////// From Owner> /////// + +// Pointer from Owner> +int* test7(StatusOr> aa) { + // No dangling diagnostic on pointer. + return aa.valueLB().valueLB(); // OK. +} + +// Owner from Owner> +std::vector test8(StatusOr> aa) { + return aa.valueLB(); // OK, no pointer being construct on this case. + return aa.valueNoLB(); +} + +// Pointer from Owner> +Span test9(StatusOr> aa) { + return aa.valueLB(); // expected-warning {{address of stack memory associated}} + return aa.valueNoLB(); // OK. +} + +/////// From Owner /////// + +// Pointer> from Owner +Span test10(StatusOr> aa) { + return aa.valueLB(); // expected-warning {{address of stack memory}} + return aa.valueNoLB(); // OK. +} + +/////// From Owner> /////// + +// Pointer> from Owner> +Span test11(StatusOr> aa) { + return aa.valueLB(); // expected-warning {{address of stack memory}} + return aa.valueNoLB(); // OK. +} + +// Lifetimebound and gsl::Pointer. +const int& test12(Span a) { + return a.getFieldLB(); // expected-warning {{reference to stack memory associated}} + return a.getFieldNoLB(); // OK. +} + +void test13() { + // FIXME: RHS is Owner, we skip this case to avoid false positives. + std::optional> abc = std::vector{}; + + std::optional> t = std::vector {}; // expected-warning {{object backing the pointer will be destroyed}} +} + +} // namespace GH100526 diff --git a/clang/tools/clang-installapi/Options.cpp b/clang/tools/clang-installapi/Options.cpp index 1ca1d583d5ccd..3fa79636de5d7 100644 --- a/clang/tools/clang-installapi/Options.cpp +++ b/clang/tools/clang-installapi/Options.cpp @@ -554,7 +554,7 @@ bool Options::processFrontendOptions(InputArgList &Args) { bool Options::addFilePaths(InputArgList &Args, PathSeq &Headers, OptSpecifier ID) { for (const StringRef Path : Args.getAllArgValues(ID)) { - if ((bool)FM->getDirectory(Path, /*CacheFailure=*/false)) { + if ((bool)FM->getOptionalDirectoryRef(Path, /*CacheFailure=*/false)) { auto InputHeadersOrErr = enumerateFiles(*FM, Path); if (!InputHeadersOrErr) { Diags->Report(diag::err_cannot_open_file) diff --git a/clang/tools/clang-refactor/ClangRefactor.cpp b/clang/tools/clang-refactor/ClangRefactor.cpp index 9310263c446ae..968f0594085d4 100644 --- a/clang/tools/clang-refactor/ClangRefactor.cpp +++ b/clang/tools/clang-refactor/ClangRefactor.cpp @@ -117,7 +117,7 @@ class SourceRangeSelectionArgument final : public SourceSelectionArgument { bool forAllRanges(const SourceManager &SM, llvm::function_ref Callback) override { - auto FE = SM.getFileManager().getFile(Range.FileName); + auto FE = SM.getFileManager().getOptionalFileRef(Range.FileName); FileID FID = FE ? SM.translateFile(*FE) : FileID(); if (!FE || FID.isInvalid()) { llvm::errs() << "error: -selection=" << Range.FileName diff --git a/clang/tools/clang-refactor/TestSupport.cpp b/clang/tools/clang-refactor/TestSupport.cpp index 3fae18c2109a6..8b6e250b3632d 100644 --- a/clang/tools/clang-refactor/TestSupport.cpp +++ b/clang/tools/clang-refactor/TestSupport.cpp @@ -43,7 +43,7 @@ void TestSelectionRangesInFile::dump(raw_ostream &OS) const { bool TestSelectionRangesInFile::foreachRange( const SourceManager &SM, llvm::function_ref Callback) const { - auto FE = SM.getFileManager().getFile(Filename); + auto FE = SM.getFileManager().getOptionalFileRef(Filename); FileID FID = FE ? SM.translateFile(*FE) : FileID(); if (!FE || FID.isInvalid()) { llvm::errs() << "error: -selection=test:" << Filename diff --git a/clang/unittests/Basic/FileManagerTest.cpp b/clang/unittests/Basic/FileManagerTest.cpp index d32036d975ce9..88d778fccd68e 100644 --- a/clang/unittests/Basic/FileManagerTest.cpp +++ b/clang/unittests/Basic/FileManagerTest.cpp @@ -116,9 +116,9 @@ TEST_F(FileManagerTest, NoVirtualDirectoryExistsBeforeAVirtualFileIsAdded) { // by what's in the real file system. manager.setStatCache(std::make_unique()); - ASSERT_FALSE(manager.getDirectory("virtual/dir/foo")); - ASSERT_FALSE(manager.getDirectory("virtual/dir")); - ASSERT_FALSE(manager.getDirectory("virtual")); + ASSERT_FALSE(manager.getOptionalDirectoryRef("virtual/dir/foo")); + ASSERT_FALSE(manager.getOptionalDirectoryRef("virtual/dir")); + ASSERT_FALSE(manager.getOptionalDirectoryRef("virtual")); } // When a virtual file is added, all of its ancestors should be created. @@ -126,10 +126,12 @@ TEST_F(FileManagerTest, getVirtualFileCreatesDirectoryEntriesForAncestors) { // Fake an empty real file system. manager.setStatCache(std::make_unique()); - manager.getVirtualFile("virtual/dir/bar.h", 100, 0); - ASSERT_FALSE(manager.getDirectory("virtual/dir/foo")); + manager.getVirtualFileRef("virtual/dir/bar.h", 100, 0); - auto dir = manager.getDirectoryRef("virtual/dir"); + auto dir = manager.getDirectoryRef("virtual/dir/foo"); + ASSERT_THAT_EXPECTED(dir, llvm::Failed()); + + dir = manager.getDirectoryRef("virtual/dir"); ASSERT_THAT_EXPECTED(dir, llvm::Succeeded()); EXPECT_EQ("virtual/dir", dir->getName()); @@ -172,7 +174,7 @@ TEST_F(FileManagerTest, getFileReturnsValidFileEntryForExistingVirtualFile) { // Fake an empty real file system. manager.setStatCache(std::make_unique()); - manager.getVirtualFile("virtual/dir/bar.h", 100, 0); + manager.getVirtualFileRef("virtual/dir/bar.h", 100, 0); auto file = manager.getFileRef("virtual/dir/bar.h"); ASSERT_THAT_EXPECTED(file, llvm::Succeeded()); EXPECT_EQ("virtual/dir/bar.h", file->getName()); @@ -190,11 +192,11 @@ TEST_F(FileManagerTest, getFileReturnsDifferentFileEntriesForDifferentFiles) { statCache->InjectFile("bar.cpp", 43); manager.setStatCache(std::move(statCache)); - auto fileFoo = manager.getFile("foo.cpp"); - auto fileBar = manager.getFile("bar.cpp"); + auto fileFoo = manager.getOptionalFileRef("foo.cpp"); + auto fileBar = manager.getOptionalFileRef("bar.cpp"); ASSERT_TRUE(fileFoo); ASSERT_TRUE(fileBar); - EXPECT_NE(*fileFoo, *fileBar); + EXPECT_NE(&fileFoo->getFileEntry(), &fileBar->getFileEntry()); } // getFile() returns an error if neither a real file nor a virtual file @@ -208,19 +210,22 @@ TEST_F(FileManagerTest, getFileReturnsErrorForNonexistentFile) { manager.setStatCache(std::move(statCache)); // Create a virtual bar.cpp file. - manager.getVirtualFile("bar.cpp", 200, 0); + manager.getVirtualFileRef("bar.cpp", 200, 0); - auto file = manager.getFile("xyz.txt"); + auto file = manager.getFileRef("xyz.txt"); ASSERT_FALSE(file); - ASSERT_EQ(file.getError(), std::errc::no_such_file_or_directory); + ASSERT_EQ(llvm::errorToErrorCode(file.takeError()), + std::make_error_code(std::errc::no_such_file_or_directory)); - auto readingDirAsFile = manager.getFile("MyDirectory"); + auto readingDirAsFile = manager.getFileRef("MyDirectory"); ASSERT_FALSE(readingDirAsFile); - ASSERT_EQ(readingDirAsFile.getError(), std::errc::is_a_directory); + ASSERT_EQ(llvm::errorToErrorCode(readingDirAsFile.takeError()), + std::make_error_code(std::errc::is_a_directory)); - auto readingFileAsDir = manager.getDirectory("foo.cpp"); + auto readingFileAsDir = manager.getDirectoryRef("foo.cpp"); ASSERT_FALSE(readingFileAsDir); - ASSERT_EQ(readingFileAsDir.getError(), std::errc::not_a_directory); + ASSERT_EQ(llvm::errorToErrorCode(readingFileAsDir.takeError()), + std::make_error_code(std::errc::not_a_directory)); } // The following tests apply to Unix-like system only. @@ -236,11 +241,11 @@ TEST_F(FileManagerTest, getFileReturnsSameFileEntryForAliasedRealFiles) { statCache->InjectFile("abc/bar.cpp", 42); manager.setStatCache(std::move(statCache)); - auto f1 = manager.getFile("abc/foo.cpp"); - auto f2 = manager.getFile("abc/bar.cpp"); + auto f1 = manager.getOptionalFileRef("abc/foo.cpp"); + auto f2 = manager.getOptionalFileRef("abc/bar.cpp"); - EXPECT_EQ(f1 ? *f1 : nullptr, - f2 ? *f2 : nullptr); + EXPECT_EQ(f1 ? &f1->getFileEntry() : nullptr, + f2 ? &f2->getFileEntry() : nullptr); // Check that getFileRef also does the right thing. auto r1 = manager.getFileRef("abc/foo.cpp"); @@ -250,8 +255,8 @@ TEST_F(FileManagerTest, getFileReturnsSameFileEntryForAliasedRealFiles) { EXPECT_EQ("abc/foo.cpp", r1->getName()); EXPECT_EQ("abc/bar.cpp", r2->getName()); - EXPECT_EQ((f1 ? *f1 : nullptr), &r1->getFileEntry()); - EXPECT_EQ((f2 ? *f2 : nullptr), &r2->getFileEntry()); + EXPECT_EQ((f1 ? &f1->getFileEntry() : nullptr), &r1->getFileEntry()); + EXPECT_EQ((f2 ? &f2->getFileEntry() : nullptr), &r2->getFileEntry()); } TEST_F(FileManagerTest, getFileRefReturnsCorrectNameForDifferentStatPath) { @@ -338,11 +343,11 @@ TEST_F(FileManagerTest, getFileReturnsSameFileEntryForAliasedVirtualFiles) { statCache->InjectFile("abc/bar.cpp", 42); manager.setStatCache(std::move(statCache)); - auto f1 = manager.getFile("abc/foo.cpp"); - auto f2 = manager.getFile("abc/bar.cpp"); + auto f1 = manager.getOptionalFileRef("abc/foo.cpp"); + auto f2 = manager.getOptionalFileRef("abc/bar.cpp"); - EXPECT_EQ(f1 ? *f1 : nullptr, - f2 ? *f2 : nullptr); + EXPECT_EQ(f1 ? &f1->getFileEntry() : nullptr, + f2 ? &f2->getFileEntry() : nullptr); } TEST_F(FileManagerTest, getFileRefEquality) { @@ -420,20 +425,19 @@ TEST_F(FileManagerTest, getVirtualFileWithDifferentName) { manager.setStatCache(std::move(statCache)); // Inject the virtual file: - const FileEntry *file1 = manager.getVirtualFile("c:\\tmp\\test", 123, 1); - ASSERT_TRUE(file1 != nullptr); - EXPECT_EQ(43U, file1->getUniqueID().getFile()); - EXPECT_EQ(123, file1->getSize()); + FileEntryRef file1 = manager.getVirtualFileRef("c:\\tmp\\test", 123, 1); + EXPECT_EQ(43U, file1.getUniqueID().getFile()); + EXPECT_EQ(123, file1.getSize()); // Lookup the virtual file with a different name: - auto file2 = manager.getFile("c:/tmp/test", 100, 1); + auto file2 = manager.getOptionalFileRef("c:/tmp/test", 100, 1); ASSERT_TRUE(file2); // Check that it's the same UFE: EXPECT_EQ(file1, *file2); - EXPECT_EQ(43U, (*file2)->getUniqueID().getFile()); + EXPECT_EQ(43U, file2->getUniqueID().getFile()); // Check that the contents of the UFE are not overwritten by the entry in the // filesystem: - EXPECT_EQ(123, (*file2)->getSize()); + EXPECT_EQ(123, file2->getSize()); } #endif // !_WIN32 @@ -487,12 +491,11 @@ TEST_F(FileManagerTest, getVirtualFileFillsRealPathName) { Manager.setStatCache(std::move(statCache)); // Check for real path. - const FileEntry *file = Manager.getVirtualFile("/tmp/test", 123, 1); - ASSERT_TRUE(file != nullptr); + FileEntryRef file = Manager.getVirtualFileRef("/tmp/test", 123, 1); SmallString<64> ExpectedResult = CustomWorkingDir; llvm::sys::path::append(ExpectedResult, "tmp", "test"); - EXPECT_EQ(file->tryGetRealPathName(), ExpectedResult); + EXPECT_EQ(file.getFileEntry().tryGetRealPathName(), ExpectedResult); } TEST_F(FileManagerTest, getFileDontOpenRealPath) { @@ -514,12 +517,12 @@ TEST_F(FileManagerTest, getFileDontOpenRealPath) { Manager.setStatCache(std::move(statCache)); // Check for real path. - auto file = Manager.getFile("/tmp/test", /*OpenFile=*/false); + auto file = Manager.getOptionalFileRef("/tmp/test", /*OpenFile=*/false); ASSERT_TRUE(file); SmallString<64> ExpectedResult = CustomWorkingDir; llvm::sys::path::append(ExpectedResult, "tmp", "test"); - EXPECT_EQ((*file)->tryGetRealPathName(), ExpectedResult); + EXPECT_EQ(file->getFileEntry().tryGetRealPathName(), ExpectedResult); } TEST_F(FileManagerTest, getBypassFile) { diff --git a/clang/unittests/Basic/SourceManagerTest.cpp b/clang/unittests/Basic/SourceManagerTest.cpp index 0f2476bd8b061..2b3fce9128ba9 100644 --- a/clang/unittests/Basic/SourceManagerTest.cpp +++ b/clang/unittests/Basic/SourceManagerTest.cpp @@ -549,7 +549,7 @@ TEST_F(SourceManagerTest, getMacroArgExpandedLocation) { // These are different than normal includes since predefines buffer doesn't // have a valid insertion location. PP.setPredefines("#include \"/implicit-header.h\""); - FileMgr.getVirtualFile("/implicit-header.h", 0, 0); + FileMgr.getVirtualFileRef("/implicit-header.h", 0, 0); PP.Initialize(*Target); PP.EnterMainSourceFile(); diff --git a/clang/unittests/Frontend/CompilerInstanceTest.cpp b/clang/unittests/Frontend/CompilerInstanceTest.cpp index 8bc705dd21993..5cf548e913cc1 100644 --- a/clang/unittests/Frontend/CompilerInstanceTest.cpp +++ b/clang/unittests/Frontend/CompilerInstanceTest.cpp @@ -71,7 +71,7 @@ TEST(CompilerInstance, DefaultVFSOverlayFromInvocation) { // Check if the virtual file exists which means that our VFS is used by the // CompilerInstance. - ASSERT_TRUE(Instance.getFileManager().getFile("vfs-virtual.file")); + ASSERT_TRUE(Instance.getFileManager().getOptionalFileRef("vfs-virtual.file")); } TEST(CompilerInstance, AllowDiagnosticLogWithUnownedDiagnosticConsumer) { diff --git a/cmake/Modules/CMakePolicy.cmake b/cmake/Modules/CMakePolicy.cmake index b6962668cb09a..665af01d43bd2 100644 --- a/cmake/Modules/CMakePolicy.cmake +++ b/cmake/Modules/CMakePolicy.cmake @@ -29,3 +29,9 @@ endif() if(POLICY CMP0144) cmake_policy(SET CMP0144 NEW) endif() + +# CMP0147: Visual Studio Generators build custom commands in parallel. +# New in CMake 3.27: https://cmake.org/cmake/help/latest/policy/CMP0147.html +if(POLICY CMP0147) + cmake_policy(SET CMP0147 NEW) +endif() diff --git a/compiler-rt/lib/builtins/fp_lib.h b/compiler-rt/lib/builtins/fp_lib.h index 0289cfd10db66..fae58497a8f80 100644 --- a/compiler-rt/lib/builtins/fp_lib.h +++ b/compiler-rt/lib/builtins/fp_lib.h @@ -171,8 +171,11 @@ static __inline void wideMultiply(rep_t a, rep_t b, rep_t *hi, rep_t *lo) { (sum2 & Word_FullMask) + ((sum3 << 32) & Word_HiMask); *lo = r0 + (r1 << 64); + // The addition above can overflow, in which case `*lo` will be less than + // `r0`. Carry any overflow into `hi`. + const bool carry = *lo < r0; *hi = (r1 >> 64) + (sum1 >> 96) + (sum2 >> 64) + (sum3 >> 32) + sum4 + - (sum5 << 32) + (sum6 << 64); + (sum5 << 32) + (sum6 << 64) + carry; } #undef Word_1 #undef Word_2 diff --git a/compiler-rt/lib/fuzzer/FuzzerUtilWindows.cpp b/compiler-rt/lib/fuzzer/FuzzerUtilWindows.cpp index e0210aa0ac365..37aecae7237ae 100644 --- a/compiler-rt/lib/fuzzer/FuzzerUtilWindows.cpp +++ b/compiler-rt/lib/fuzzer/FuzzerUtilWindows.cpp @@ -239,6 +239,10 @@ size_t PageSize() { } void SetThreadName(std::thread &thread, const std::string &name) { +#if defined(_LIBCPP_HAS_THREAD_API_PTHREAD) || \ + defined(_GLIBCXX_GCC_GTHR_POSIX_H) + (void)pthread_setname_np(thread.native_handle(), name.c_str()); +#else typedef HRESULT(WINAPI * proc)(HANDLE, PCWSTR); HMODULE kbase = GetModuleHandleA("KernelBase.dll"); proc ThreadNameProc = reinterpret_cast( @@ -253,6 +257,7 @@ void SetThreadName(std::thread &thread, const std::string &name) { } } } +#endif } } // namespace fuzzer diff --git a/compiler-rt/lib/rtsan/CMakeLists.txt b/compiler-rt/lib/rtsan/CMakeLists.txt index 0fc3a3f8f4896..af34fb63cf53c 100644 --- a/compiler-rt/lib/rtsan/CMakeLists.txt +++ b/compiler-rt/lib/rtsan/CMakeLists.txt @@ -5,7 +5,9 @@ set(RTSAN_CXX_SOURCES rtsan_context.cpp rtsan_diagnostics.cpp rtsan_flags.cpp - rtsan_interceptors.cpp) + rtsan_interceptors.cpp + rtsan_stats.cpp + ) set(RTSAN_PREINIT_SOURCES rtsan_preinit.cpp) @@ -16,7 +18,9 @@ set(RTSAN_HEADERS rtsan_context.h rtsan_diagnostics.h rtsan_flags.h - rtsan_flags.inc) + rtsan_flags.inc + rtsan_stats.h + ) set(RTSAN_DEPS) @@ -25,9 +29,10 @@ set(RTSAN_CFLAGS ${COMPILER_RT_CXX_CFLAGS} -DSANITIZER_COMMON_NO_REDEFINE_BUILTINS) set(RTSAN_LINK_FLAGS ${COMPILER_RT_COMMON_LINK_FLAGS}) -set(RTSAN_LINK_LIBS +set(RTSAN_DYNAMIC_LIBS ${COMPILER_RT_UNWINDER_LINK_LIBS} - ${COMPILER_RT_CXX_LINK_LIBS}) + ${SANITIZER_CXX_ABI_LIBRARIES} + ${SANITIZER_COMMON_LINK_LIBS}) append_rtti_flag(OFF RTSAN_CFLAGS) @@ -60,11 +65,11 @@ set(RTSAN_COMMON_RUNTIME_OBJECT_LIBS RTSanitizerCommonCoverage RTSanitizerCommonSymbolizer) -append_list_if(COMPILER_RT_HAS_LIBDL dl RTSAN_LINK_LIBS) -append_list_if(COMPILER_RT_HAS_LIBRT rt RTSAN_LINK_LIBS) -append_list_if(COMPILER_RT_HAS_LIBM m RTSAN_LINK_LIBS) -append_list_if(COMPILER_RT_HAS_LIBPTHREAD pthread RTSAN_LINK_LIBS) -append_list_if(COMPILER_RT_HAS_LIBLOG log RTSAN_LINK_LIBS) +append_list_if(COMPILER_RT_HAS_LIBDL dl RTSAN_DYNAMIC_LIBS) +append_list_if(COMPILER_RT_HAS_LIBRT rt RTSAN_DYNAMIC_LIBS) +append_list_if(COMPILER_RT_HAS_LIBM m RTSAN_DYNAMIC_LIBS) +append_list_if(COMPILER_RT_HAS_LIBPTHREAD pthread RTSAN_DYNAMIC_LIBS) +append_list_if(COMPILER_RT_HAS_LIBLOG log RTSAN_DYNAMIC_LIBS) add_compiler_rt_component(rtsan) @@ -79,7 +84,7 @@ if (APPLE) OBJECT_LIBS RTRtsan ${RTSAN_COMMON_RUNTIME_OBJECT_LIBS} LINK_FLAGS ${RTSAN_LINK_FLAGS} - LINK_LIBS ${RTSAN_LINK_LIBS} + LINK_LIBS ${RTSAN_DYNAMIC_LIBS} PARENT_TARGET rtsan) else() add_compiler_rt_runtime(clang_rt.rtsan diff --git a/compiler-rt/lib/rtsan/rtsan.cpp b/compiler-rt/lib/rtsan/rtsan.cpp index b288da64ffbe2..6fcff5e326a52 100644 --- a/compiler-rt/lib/rtsan/rtsan.cpp +++ b/compiler-rt/lib/rtsan/rtsan.cpp @@ -8,44 +8,92 @@ // //===----------------------------------------------------------------------===// -#include -#include -#include -#include -#include +#include "rtsan/rtsan.h" +#include "rtsan/rtsan_assertions.h" +#include "rtsan/rtsan_diagnostics.h" +#include "rtsan/rtsan_flags.h" +#include "rtsan/rtsan_interceptors.h" +#include "rtsan/rtsan_stats.h" #include "sanitizer_common/sanitizer_atomic.h" #include "sanitizer_common/sanitizer_common.h" #include "sanitizer_common/sanitizer_mutex.h" +#include "sanitizer_common/sanitizer_stackdepot.h" #include "sanitizer_common/sanitizer_stacktrace.h" using namespace __rtsan; using namespace __sanitizer; +namespace { +enum class InitializationState : u8 { + Uninitialized, + Initializing, + Initialized, +}; +} // namespace + static StaticSpinMutex rtsan_inited_mutex; -static atomic_uint8_t rtsan_initialized = {0}; +static atomic_uint8_t rtsan_initialized = { + static_cast(InitializationState::Uninitialized)}; + +static void SetInitializationState(InitializationState state) { + atomic_store(&rtsan_initialized, static_cast(state), + memory_order_release); +} -static void SetInitialized() { - atomic_store(&rtsan_initialized, 1, memory_order_release); +static InitializationState GetInitializationState() { + return static_cast( + atomic_load(&rtsan_initialized, memory_order_acquire)); } -static auto PrintDiagnosticsAndDieAction(DiagnosticsInfo info) { +static auto OnViolationAction(DiagnosticsInfo info) { return [info]() { - __rtsan::PrintDiagnostics(info); - Die(); + IncrementTotalErrorCount(); + + BufferedStackTrace stack; + + // We use the unwind_on_fatal flag here because of precedent with other + // sanitizers, this action is not necessarily fatal if halt_on_error=false + stack.Unwind(info.pc, info.bp, nullptr, + common_flags()->fast_unwind_on_fatal); + + // If in the future we interop with other sanitizers, we will + // need to make our own stackdepot + StackDepotHandle handle = StackDepotPut_WithHandle(stack); + + const bool is_stack_novel = handle.use_count() == 0; + + // Marked UNLIKELY as if user is runing with halt_on_error=false + // we expect a high number of duplicate stacks. We are willing + // To pay for the first insertion. + if (UNLIKELY(is_stack_novel)) { + IncrementUniqueErrorCount(); + + PrintDiagnostics(info); + stack.Print(); + + handle.inc_use_count_unsafe(); + } + + if (flags().halt_on_error) + Die(); }; } extern "C" { SANITIZER_INTERFACE_ATTRIBUTE void __rtsan_init() { - CHECK(!__rtsan_is_initialized()); + CHECK(GetInitializationState() == InitializationState::Uninitialized); + SetInitializationState(InitializationState::Initializing); SanitizerToolName = "RealtimeSanitizer"; InitializeFlags(); InitializeInterceptors(); - SetInitialized(); + if (flags().print_stats_on_exit) + Atexit(PrintStatisticsSummary); + + SetInitializationState(InitializationState::Initialized); } SANITIZER_INTERFACE_ATTRIBUTE void __rtsan_ensure_initialized() { @@ -62,7 +110,7 @@ SANITIZER_INTERFACE_ATTRIBUTE void __rtsan_ensure_initialized() { } SANITIZER_INTERFACE_ATTRIBUTE bool __rtsan_is_initialized() { - return atomic_load(&rtsan_initialized, memory_order_acquire) == 1; + return GetInitializationState() == InitializationState::Initialized; } SANITIZER_INTERFACE_ATTRIBUTE void __rtsan_realtime_enter() { @@ -83,22 +131,24 @@ SANITIZER_INTERFACE_ATTRIBUTE void __rtsan_enable() { SANITIZER_INTERFACE_ATTRIBUTE void __rtsan_notify_intercepted_call(const char *func_name) { + // While initializing, we need all intercepted functions to behave normally + if (GetInitializationState() == InitializationState::Initializing) + return; + __rtsan_ensure_initialized(); GET_CALLER_PC_BP; - ExpectNotRealtime( - GetContextForThisThread(), - PrintDiagnosticsAndDieAction( - {DiagnosticsInfoType::InterceptedCall, func_name, pc, bp})); + ExpectNotRealtime(GetContextForThisThread(), + OnViolationAction({DiagnosticsInfoType::InterceptedCall, + func_name, pc, bp})); } SANITIZER_INTERFACE_ATTRIBUTE void __rtsan_notify_blocking_call(const char *func_name) { __rtsan_ensure_initialized(); GET_CALLER_PC_BP; - ExpectNotRealtime( - GetContextForThisThread(), - PrintDiagnosticsAndDieAction( - {DiagnosticsInfoType::BlockingCall, func_name, pc, bp})); + ExpectNotRealtime(GetContextForThisThread(), + OnViolationAction({DiagnosticsInfoType::BlockingCall, + func_name, pc, bp})); } } // extern "C" diff --git a/compiler-rt/lib/rtsan/rtsan_context.cpp b/compiler-rt/lib/rtsan/rtsan_context.cpp index 37ac817db76e4..1cf1791f0aaf8 100644 --- a/compiler-rt/lib/rtsan/rtsan_context.cpp +++ b/compiler-rt/lib/rtsan/rtsan_context.cpp @@ -8,10 +8,10 @@ // //===----------------------------------------------------------------------===// -#include -#include +#include "rtsan/rtsan_context.h" +#include "rtsan/rtsan.h" -#include +#include "sanitizer_common/sanitizer_allocator_internal.h" #include #include diff --git a/compiler-rt/lib/rtsan/rtsan_context.h b/compiler-rt/lib/rtsan/rtsan_context.h index 8512017793a48..cb0c2eb0a5e0d 100644 --- a/compiler-rt/lib/rtsan/rtsan_context.h +++ b/compiler-rt/lib/rtsan/rtsan_context.h @@ -10,8 +10,6 @@ #pragma once -#include - namespace __rtsan { class Context { diff --git a/compiler-rt/lib/rtsan/rtsan_diagnostics.cpp b/compiler-rt/lib/rtsan/rtsan_diagnostics.cpp index f82001f5b2057..cfe71481d3dc7 100644 --- a/compiler-rt/lib/rtsan/rtsan_diagnostics.cpp +++ b/compiler-rt/lib/rtsan/rtsan_diagnostics.cpp @@ -39,13 +39,6 @@ class Decorator : public __sanitizer::SanitizerCommonDecorator { }; } // namespace -static void PrintStackTrace(uptr pc, uptr bp) { - BufferedStackTrace stack{}; - - stack.Unwind(pc, bp, nullptr, common_flags()->fast_unwind_on_fatal); - stack.Print(); -} - static void PrintError(const Decorator &decorator, const DiagnosticsInfo &info) { const auto ErrorTypeStr = [&info]() -> const char * { @@ -91,5 +84,4 @@ void __rtsan::PrintDiagnostics(const DiagnosticsInfo &info) { PrintError(d, info); PrintReason(d, info); Printf("%s", d.Default()); - PrintStackTrace(info.pc, info.bp); } diff --git a/compiler-rt/lib/rtsan/rtsan_flags.inc b/compiler-rt/lib/rtsan/rtsan_flags.inc index 93b0294313672..1df71127d19d3 100644 --- a/compiler-rt/lib/rtsan/rtsan_flags.inc +++ b/compiler-rt/lib/rtsan/rtsan_flags.inc @@ -16,5 +16,5 @@ // RTSAN_FLAG(Type, Name, DefaultValue, Description) // See COMMON_FLAG in sanitizer_flags.inc for more details. -// Example flag, until we get a real one -// RTSAN_FLAG(bool, halt_on_error, true, "If true, halt the program on error") +RTSAN_FLAG(bool, halt_on_error, true, "Exit after first reported error.") +RTSAN_FLAG(bool, print_stats_on_exit, false, "Print stats on exit.") diff --git a/compiler-rt/lib/rtsan/rtsan_preinit.cpp b/compiler-rt/lib/rtsan/rtsan_preinit.cpp index 1307268951fbc..5d49223bc8beb 100644 --- a/compiler-rt/lib/rtsan/rtsan_preinit.cpp +++ b/compiler-rt/lib/rtsan/rtsan_preinit.cpp @@ -8,8 +8,8 @@ // //===----------------------------------------------------------------------===// +#include "rtsan/rtsan.h" #include "sanitizer_common/sanitizer_internal_defs.h" -#include #if SANITIZER_CAN_USE_PREINIT_ARRAY diff --git a/compiler-rt/lib/rtsan/rtsan_stats.cpp b/compiler-rt/lib/rtsan/rtsan_stats.cpp new file mode 100644 index 0000000000000..dac7b23c3ef52 --- /dev/null +++ b/compiler-rt/lib/rtsan/rtsan_stats.cpp @@ -0,0 +1,45 @@ +//===--- rtsan_stats.cpp - Realtime Sanitizer -------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Part of the RealtimeSanitizer runtime library +// +//===----------------------------------------------------------------------===// + +#include "rtsan/rtsan_stats.h" + +#include "sanitizer_common/sanitizer_atomic.h" +#include "sanitizer_common/sanitizer_common.h" + +using namespace __sanitizer; +using namespace __rtsan; + +static atomic_uint32_t rtsan_total_error_count{0}; +static atomic_uint32_t rtsan_unique_error_count{0}; + +void __rtsan::IncrementTotalErrorCount() { + atomic_fetch_add(&rtsan_total_error_count, 1, memory_order_relaxed); +} + +void __rtsan::IncrementUniqueErrorCount() { + atomic_fetch_add(&rtsan_unique_error_count, 1, memory_order_relaxed); +} + +static u32 GetTotalErrorCount() { + return atomic_load(&rtsan_total_error_count, memory_order_relaxed); +} + +static u32 GetUniqueErrorCount() { + return atomic_load(&rtsan_unique_error_count, memory_order_relaxed); +} + +void __rtsan::PrintStatisticsSummary() { + ScopedErrorReportLock l; + Printf("RealtimeSanitizer exit stats:\n"); + Printf(" Total error count: %u\n", GetTotalErrorCount()); + Printf(" Unique error count: %u\n", GetUniqueErrorCount()); +} diff --git a/compiler-rt/lib/rtsan/rtsan_stats.h b/compiler-rt/lib/rtsan/rtsan_stats.h new file mode 100644 index 0000000000000..a72098792c89c --- /dev/null +++ b/compiler-rt/lib/rtsan/rtsan_stats.h @@ -0,0 +1,22 @@ +//===--- rtsan_stats.h - Realtime Sanitizer ---------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Part of the RealtimeSanitizer runtime library +// +//===----------------------------------------------------------------------===// + +#pragma once + +namespace __rtsan { + +void IncrementTotalErrorCount(); +void IncrementUniqueErrorCount(); + +void PrintStatisticsSummary(); + +} // namespace __rtsan diff --git a/compiler-rt/lib/rtsan/tests/rtsan_test_functional.cpp b/compiler-rt/lib/rtsan/tests/rtsan_test_functional.cpp index dff3c527350fd..9e455f0326a54 100644 --- a/compiler-rt/lib/rtsan/tests/rtsan_test_functional.cpp +++ b/compiler-rt/lib/rtsan/tests/rtsan_test_functional.cpp @@ -15,9 +15,10 @@ #include "gtest/gtest.h" #include "rtsan_test_utilities.h" -#include -#include -#include + +#include "rtsan/rtsan.h" +#include "sanitizer_common/sanitizer_platform.h" +#include "sanitizer_common/sanitizer_platform_interceptors.h" #include #include diff --git a/compiler-rt/lib/rtsan/tests/rtsan_test_interceptors.cpp b/compiler-rt/lib/rtsan/tests/rtsan_test_interceptors.cpp index e96d3758bcaf8..c65b1bb01fbe0 100644 --- a/compiler-rt/lib/rtsan/tests/rtsan_test_interceptors.cpp +++ b/compiler-rt/lib/rtsan/tests/rtsan_test_interceptors.cpp @@ -10,8 +10,8 @@ #include "gtest/gtest.h" -#include -#include +#include "sanitizer_common/sanitizer_platform.h" +#include "sanitizer_common/sanitizer_platform_interceptors.h" #include "rtsan_test_utilities.h" diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc index e29cc0c8b390a..7898af4a335e3 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc +++ b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc @@ -1289,9 +1289,11 @@ INTERCEPTOR(int, prctl, int option, unsigned long arg2, unsigned long arg3, static const int PR_SCHED_CORE = 62; static const int PR_SCHED_CORE_GET = 0; static const int PR_GET_PDEATHSIG = 2; - static const int PR_SET_SECCOMP = 22; +# if !SANITIZER_ANDROID + static const int PR_SET_SECCOMP = 22; static const int SECCOMP_MODE_FILTER = 2; +# endif if (option == PR_SET_VMA && arg2 == 0UL) { char *name = (char *)arg5; COMMON_INTERCEPTOR_READ_RANGE(ctx, name, internal_strlen(name) + 1); @@ -1310,9 +1312,11 @@ INTERCEPTOR(int, prctl, int option, unsigned long arg2, unsigned long arg3, COMMON_INTERCEPTOR_WRITE_RANGE(ctx, (u64 *)(arg5), sizeof(u64)); } else if (res != -1 && option == PR_GET_PDEATHSIG) { COMMON_INTERCEPTOR_WRITE_RANGE(ctx, (u64 *)(arg2), sizeof(int)); +# if !SANITIZER_ANDROID } else if (res != -1 && option == PR_SET_SECCOMP && arg2 == SECCOMP_MODE_FILTER) { COMMON_INTERCEPTOR_WRITE_RANGE(ctx, (u64 *)(arg3), struct_sock_fprog_sz); +# endif } return res; } @@ -10338,6 +10342,24 @@ INTERCEPTOR(SSIZE_T, pwritev2, int fd, __sanitizer_iovec *iov, int iovcnt, #define INIT_PWRITEV2 #endif +#if SANITIZER_INTERCEPT_FREADLINK +INTERCEPTOR(SSIZE_T, freadlink, int fd, char *buf, SIZE_T bufsiz) { + void *ctx; + COMMON_INTERCEPTOR_ENTER(ctx, freadlink, fd, buf, bufsiz); + COMMON_INTERCEPTOR_FD_ACCESS(ctx, fd); + SSIZE_T res = REAL(freadlink)(fd, buf, bufsiz); + if (res > 0) + COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, res); + if (res >= 0 && fd > 0) + COMMON_INTERCEPTOR_FD_ACQUIRE(ctx, fd); + return res; +} + +# define INIT_FREADLINK COMMON_INTERCEPT_FUNCTION(freadlink) +#else +# define INIT_FREADLINK +#endif + #include "sanitizer_common_interceptors_netbsd_compat.inc" namespace __sanitizer { @@ -10659,6 +10681,7 @@ static void InitializeCommonInterceptors() { INIT_CPUSET_GETAFFINITY; INIT_PREADV2; INIT_PWRITEV2; + INIT_FREADLINK; INIT___PRINTF_CHK; } diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h b/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h index e71a6bcd6a837..d4cc380f641b8 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h @@ -606,6 +606,13 @@ // FIXME: also available from musl 1.2.5 #define SANITIZER_INTERCEPT_PREADV2 (SI_LINUX && __GLIBC_PREREQ(2, 26)) #define SANITIZER_INTERCEPT_PWRITEV2 (SI_LINUX && __GLIBC_PREREQ(2, 26)) +#if defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && \ + __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ < 130000 +# define SI_MAC_DEPLOYMENT_BELOW_13_00 1 +#else +# define SI_MAC_DEPLOYMENT_BELOW_13_00 0 +#endif +#define SANITIZER_INTERCEPT_FREADLINK (SI_MAC && !SI_MAC_DEPLOYMENT_BELOW_13_00) // This macro gives a way for downstream users to override the above // interceptor macros irrespective of the platform they are on. They have diff --git a/compiler-rt/lib/ubsan/ubsan_handlers.cpp b/compiler-rt/lib/ubsan/ubsan_handlers.cpp index 27d01653f088d..a419cf0b2b555 100644 --- a/compiler-rt/lib/ubsan/ubsan_handlers.cpp +++ b/compiler-rt/lib/ubsan/ubsan_handlers.cpp @@ -633,13 +633,16 @@ static void handleInvalidBuiltin(InvalidBuiltinData *Data, ReportOptions Opts) { ScopedReport R(Opts, Loc, ET); - Diag(Loc, DL_Error, ET, - "passing zero to %0, which is not a valid argument") - << ((Data->Kind == BCK_CTZPassedZero) ? "ctz()" : "clz()"); + if (Data->Kind == BCK_AssumePassedFalse) + Diag(Loc, DL_Error, ET, "assumption is violated during execution"); + else + Diag(Loc, DL_Error, ET, + "passing zero to __builtin_%0(), which is not a valid argument") + << ((Data->Kind == BCK_CTZPassedZero) ? "ctz" : "clz"); } void __ubsan::__ubsan_handle_invalid_builtin(InvalidBuiltinData *Data) { - GET_REPORT_OPTIONS(true); + GET_REPORT_OPTIONS(false); handleInvalidBuiltin(Data, Opts); } void __ubsan::__ubsan_handle_invalid_builtin_abort(InvalidBuiltinData *Data) { diff --git a/compiler-rt/lib/ubsan/ubsan_handlers.h b/compiler-rt/lib/ubsan/ubsan_handlers.h index bae661a56833d..4ffa1439a1323 100644 --- a/compiler-rt/lib/ubsan/ubsan_handlers.h +++ b/compiler-rt/lib/ubsan/ubsan_handlers.h @@ -159,6 +159,7 @@ RECOVERABLE(implicit_conversion, ImplicitConversionData *Data, ValueHandle Src, enum BuiltinCheckKind : unsigned char { BCK_CTZPassedZero, BCK_CLZPassedZero, + BCK_AssumePassedFalse, }; struct InvalidBuiltinData { diff --git a/compiler-rt/test/asan/TestCases/Linux/stack-overflow-recovery-mode.cpp b/compiler-rt/test/asan/TestCases/Linux/stack-overflow-recovery-mode.cpp index e99665953784a..461702a0ea7a9 100644 --- a/compiler-rt/test/asan/TestCases/Linux/stack-overflow-recovery-mode.cpp +++ b/compiler-rt/test/asan/TestCases/Linux/stack-overflow-recovery-mode.cpp @@ -3,6 +3,9 @@ // RUN: %clang_asan -O0 -fsanitize-recover=address %s -o %t // RUN: %env_asan_opts=halt_on_error=false not %run %t 2>&1 | FileCheck %s +// Issue #109771 +// XFAIL: target={{sparc.*-.*-linux.*}} + #include #include #include diff --git a/compiler-rt/test/asan/TestCases/Linux/stack-overflow-sigbus.cpp b/compiler-rt/test/asan/TestCases/Linux/stack-overflow-sigbus.cpp index 8c9599c9f6110..f6c95318238af 100644 --- a/compiler-rt/test/asan/TestCases/Linux/stack-overflow-sigbus.cpp +++ b/compiler-rt/test/asan/TestCases/Linux/stack-overflow-sigbus.cpp @@ -2,6 +2,9 @@ // RUN: %clangxx_asan -O0 %s -o %t && %env_asan_opts=use_sigaltstack=1 not %run %t 2>&1 | FileCheck %s +// Issue #109771 +// XFAIL: target={{sparc.*-.*-linux.*}} + #include #include #include diff --git a/compiler-rt/test/asan/TestCases/Posix/stack-overflow.cpp b/compiler-rt/test/asan/TestCases/Posix/stack-overflow.cpp index 06057250f8759..3d95a4ba273db 100644 --- a/compiler-rt/test/asan/TestCases/Posix/stack-overflow.cpp +++ b/compiler-rt/test/asan/TestCases/Posix/stack-overflow.cpp @@ -16,6 +16,9 @@ // RUN: not %run %t 2>&1 | FileCheck %s // REQUIRES: stable-runtime +// Issue #109771 +// XFAIL: target={{sparc.*-.*-linux.*}} + // UNSUPPORTED: ios #include diff --git a/compiler-rt/test/asan/lit.cfg.py b/compiler-rt/test/asan/lit.cfg.py index 05ed7e8dd294e..dac3ef00a99af 100644 --- a/compiler-rt/test/asan/lit.cfg.py +++ b/compiler-rt/test/asan/lit.cfg.py @@ -153,12 +153,16 @@ def build_invocation(compile_flags, with_lto=False): if platform.system() == "Windows": # MSVC-specific tests might also use the clang-cl.exe driver. if target_is_msvc: - clang_cl_cxxflags = [ - "-Wno-deprecated-declarations", - "-WX", - "-D_HAS_EXCEPTIONS=0", - "-Zi", - ] + target_cflags + clang_cl_cxxflags = ( + [ + "-WX", + "-D_HAS_EXCEPTIONS=0", + ] + + config.debug_info_flags + + target_cflags + ) + if config.compiler_id != "MSVC": + clang_cl_cxxflags = ["-Wno-deprecated-declarations"] + clang_cl_cxxflags clang_cl_asan_cxxflags = ["-fsanitize=address"] + clang_cl_cxxflags if config.asan_dynamic: clang_cl_asan_cxxflags.append("-MD") @@ -286,6 +290,12 @@ def build_invocation(compile_flags, with_lto=False): [config.compiler_rt_libdir, os.environ.get("PATH", "")] ) +# msvc needs to be instructed where the compiler-rt libraries are +if config.compiler_id == "MSVC": + config.environment["LIB"] = os.path.pathsep.join( + [config.compiler_rt_libdir, config.environment.get("LIB", "")] + ) + # Default test suffixes. config.suffixes = [".c", ".cpp"] diff --git a/compiler-rt/test/builtins/Unit/multf3_test.c b/compiler-rt/test/builtins/Unit/multf3_test.c index 543b55899ce82..0e561551d3534 100644 --- a/compiler-rt/test/builtins/Unit/multf3_test.c +++ b/compiler-rt/test/builtins/Unit/multf3_test.c @@ -77,6 +77,12 @@ int main() UINT64_C(0x0), UINT64_C(0x0))) return 1; + // test carry between lo and hi in widening multiply + if (test__multf3(0x0.7fffffffffffffffffffffffffffp-16382L, + 0x1.7fffffffffffffffffffffffffffp+1L, + UINT64_C(0x00017fffffffffff), + UINT64_C(0xfffffffffffffffc))) + return 1; #else printf("skipped\n"); diff --git a/compiler-rt/test/rtsan/deduplicate_errors.cpp b/compiler-rt/test/rtsan/deduplicate_errors.cpp new file mode 100644 index 0000000000000..7d60d4d7da7dd --- /dev/null +++ b/compiler-rt/test/rtsan/deduplicate_errors.cpp @@ -0,0 +1,39 @@ +// RUN: %clangxx -fsanitize=realtime %s -o %t +// RUN: env RTSAN_OPTIONS="halt_on_error=false,print_stats_on_exit=true" %run %t 2>&1 | FileCheck %s + +// UNSUPPORTED: ios + +// Intent: Ensure all errors are deduplicated. + +#include + +const int kNumViolations = 10; + +void violation() [[clang::nonblocking]] { + for (int i = 0; i < kNumViolations; i++) + usleep(1); +} + +void violation2() [[clang::nonblocking]] { + for (int i = 0; i < kNumViolations; i++) + violation(); +} + +void double_violation() [[clang::nonblocking]] { + violation(); + violation2(); +} + +int main() { + violation(); // 1 unique errors here, but 10 total + violation2(); // 1 unique errors here, but 100 total + double_violation(); // 2 unique errors here, but 110 total + return 0; +} + +// CHECK-COUNT-4: ==ERROR: +// CHECK-NOT: ==ERROR: + +// CHECK: RealtimeSanitizer exit stats: +// CHECK-NEXT: Total error count: 220 +// CHECK-NEXT: Unique error count: 4 diff --git a/compiler-rt/test/rtsan/exit_stats.cpp b/compiler-rt/test/rtsan/exit_stats.cpp new file mode 100644 index 0000000000000..4341fbb0f9cf2 --- /dev/null +++ b/compiler-rt/test/rtsan/exit_stats.cpp @@ -0,0 +1,24 @@ +// RUN: %clangxx -fsanitize=realtime %s -o %t +// RUN: env RTSAN_OPTIONS="halt_on_error=false,print_stats_on_exit=true" %run %t 2>&1 | FileCheck %s + +// UNSUPPORTED: ios + +// Intent: Ensure exits stats are printed on exit. + +#include + +void violation() [[clang::nonblocking]] { + const int kNumViolations = 10; + for (int i = 0; i < kNumViolations; i++) { + usleep(1); + } +} + +int main() { + violation(); + return 0; +} + +// CHECK: RealtimeSanitizer exit stats: +// CHECK-NEXT: Total error count: 10 +// CHECK-NEXT: Unique error count: 1 diff --git a/compiler-rt/test/rtsan/halt_on_error.cpp b/compiler-rt/test/rtsan/halt_on_error.cpp new file mode 100644 index 0000000000000..c2ebdf349f371 --- /dev/null +++ b/compiler-rt/test/rtsan/halt_on_error.cpp @@ -0,0 +1,26 @@ +// RUN: %clangxx -fsanitize=realtime %s -o %t +// RUN: %env_rtsan_opts="halt_on_error=true" not %run %t 2>&1 | FileCheck %s +// RUN: %env_rtsan_opts="halt_on_error=false" %run %t 2>&1 | FileCheck %s --check-prefixes=CHECK-NO-HALT,CHECK +// UNSUPPORTED: ios + +// Intent: Ensure that halt_on_error does not exit on the first violation. + +#include + +void *MallocViolation() { return malloc(10); } + +void FreeViolation(void *Ptr) { free(Ptr); } + +void process() [[clang::nonblocking]] { + void *Ptr = MallocViolation(); + FreeViolation(Ptr); +} + +int main() { + process(); + return 0; + // CHECK: ==ERROR: RealtimeSanitizer + // CHECK-NEXT: {{.*`malloc`.*}} + // CHECK-NO-HALT: ==ERROR: RealtimeSanitizer + // CHECK-NO-HALT-NEXT: {{.*`free`.*}} +} diff --git a/compiler-rt/test/sanitizer_common/TestCases/Darwin/freadlink.c b/compiler-rt/test/sanitizer_common/TestCases/Darwin/freadlink.c new file mode 100644 index 0000000000000..53658cdb66aa3 --- /dev/null +++ b/compiler-rt/test/sanitizer_common/TestCases/Darwin/freadlink.c @@ -0,0 +1,29 @@ +// RUN: %clang -O0 %s -o %t && %run %t + +#include +#include +#include +#include +#include +#include +#include + +int main(int argc, char **argv) { + char symlink_path[PATH_MAX]; + snprintf(symlink_path, sizeof(symlink_path), "%s_%d.symlink", argv[0], + getpid()); + remove(symlink_path); + int res = symlink(argv[0], symlink_path); + assert(!res); + + int fd; + char readlink_path[PATH_MAX]; + fd = open(symlink_path, O_RDONLY); + ssize_t res2 = freadlink(fd, readlink_path, sizeof(readlink_path)); + assert(res2 >= 0); + readlink_path[res2] = '\0'; + assert(!strcmp(readlink_path, argv[0])); + close(fd); + + return 0; +} diff --git a/compiler-rt/test/ubsan/TestCases/Integer/suppressions-builtin.cpp b/compiler-rt/test/ubsan/TestCases/Integer/suppressions-builtin.cpp new file mode 100644 index 0000000000000..60377c492e8cc --- /dev/null +++ b/compiler-rt/test/ubsan/TestCases/Integer/suppressions-builtin.cpp @@ -0,0 +1,18 @@ +// RUN: %clangxx -fsanitize=builtin -g0 %s -o %t + +// Suppression by symbol name requires the compiler-rt runtime to be able to +// symbolize stack addresses. +// REQUIRES: can-symbolize +// UNSUPPORTED: android + +// RUN: echo "invalid-builtin-use:do_ctz" > %t.func-supp +// RUN: %env_ubsan_opts=halt_on_error=1:suppressions='"%t.func-supp"' %run %t + +#include + +extern "C" void do_ctz(int n) { __builtin_ctz(0); } + +int main() { + do_ctz(0); + return 0; +} diff --git a/compiler-rt/test/ubsan/TestCases/Misc/builtins.cpp b/compiler-rt/test/ubsan/TestCases/Misc/builtins.cpp index f8f564cb7baae..2702065bce067 100644 --- a/compiler-rt/test/ubsan/TestCases/Misc/builtins.cpp +++ b/compiler-rt/test/ubsan/TestCases/Misc/builtins.cpp @@ -1,35 +1,47 @@ // REQUIRES: target={{x86_64.*}} // -// RUN: %clangxx -fsanitize=builtin -w %s -O3 -o %t +// RUN: %clangxx -fsanitize=builtin -fno-inline -w %s -O3 -o %t // RUN: %run %t 2>&1 | FileCheck %s --check-prefix=RECOVER -// RUN: %clangxx -fsanitize=builtin -fno-sanitize-recover=builtin -w %s -O3 -o %t.abort +// RUN: %clangxx -fsanitize=builtin -fno-inline -fno-sanitize-recover=builtin -w %s -O3 -o %t.abort // RUN: not %run %t.abort 2>&1 | FileCheck %s --check-prefix=ABORT void check_ctz(int n) { - // ABORT: builtins.cpp:[[@LINE+2]]:17: runtime error: passing zero to ctz(), which is not a valid argument - // RECOVER: builtins.cpp:[[@LINE+1]]:17: runtime error: passing zero to ctz(), which is not a valid argument + // ABORT: builtins.cpp:[[@LINE+2]]:17: runtime error: passing zero to __builtin_ctz(), which is not a valid argument + // RECOVER: builtins.cpp:[[@LINE+1]]:17: runtime error: passing zero to __builtin_ctz(), which is not a valid argument __builtin_ctz(n); - // RECOVER: builtins.cpp:[[@LINE+1]]:18: runtime error: passing zero to ctz(), which is not a valid argument + // RECOVER: builtins.cpp:[[@LINE+1]]:18: runtime error: passing zero to __builtin_ctz(), which is not a valid argument __builtin_ctzl(n); - // RECOVER: builtins.cpp:[[@LINE+1]]:19: runtime error: passing zero to ctz(), which is not a valid argument + // RECOVER: builtins.cpp:[[@LINE+1]]:19: runtime error: passing zero to __builtin_ctz(), which is not a valid argument __builtin_ctzll(n); } void check_clz(int n) { - // RECOVER: builtins.cpp:[[@LINE+1]]:17: runtime error: passing zero to clz(), which is not a valid argument + // RECOVER: builtins.cpp:[[@LINE+1]]:17: runtime error: passing zero to __builtin_clz(), which is not a valid argument __builtin_clz(n); - // RECOVER: builtins.cpp:[[@LINE+1]]:18: runtime error: passing zero to clz(), which is not a valid argument + // RECOVER: builtins.cpp:[[@LINE+1]]:18: runtime error: passing zero to __builtin_clz(), which is not a valid argument __builtin_clzl(n); - // RECOVER: builtins.cpp:[[@LINE+1]]:19: runtime error: passing zero to clz(), which is not a valid argument + // RECOVER: builtins.cpp:[[@LINE+1]]:19: runtime error: passing zero to __builtin_clz(), which is not a valid argument __builtin_clzll(n); } +void check_assume(int n) { + // RECOVER: builtins.cpp:[[@LINE+1]]:20: runtime error: assumption is violated during execution + __builtin_assume(n); +} + +void check_assume_attr(int n) { + // RECOVER: builtins.cpp:[[@LINE+1]]:25: runtime error: assumption is violated during execution + __attribute__((assume(n))); +} + int main() { check_ctz(0); check_clz(0); + check_assume(0); + check_assume_attr(0); return 0; } diff --git a/flang/include/flang/Lower/CallInterface.h b/flang/include/flang/Lower/CallInterface.h index 9a688330e8bd2..1fb390455733f 100644 --- a/flang/include/flang/Lower/CallInterface.h +++ b/flang/include/flang/Lower/CallInterface.h @@ -42,6 +42,10 @@ namespace mlir { class Location; } +namespace fir { +class FortranProcedureFlagsEnumAttr; +} + namespace Fortran::lower { class AbstractConverter; class SymMap; @@ -235,6 +239,11 @@ class CallInterface { return characteristic && characteristic->CanBeCalledViaImplicitInterface(); } + /// Translate Fortran procedure attributes into FIR attribute. + /// Return attribute is nullptr if the procedure has no attributes. + fir::FortranProcedureFlagsEnumAttr + getProcedureAttrs(mlir::MLIRContext *) const; + protected: CallInterface(Fortran::lower::AbstractConverter &c) : converter{c} {} /// CRTP handle. diff --git a/flang/include/flang/Optimizer/Dialect/FIRAttr.td b/flang/include/flang/Optimizer/Dialect/FIRAttr.td index 6400756b38448..4e84959a3b3e1 100644 --- a/flang/include/flang/Optimizer/Dialect/FIRAttr.td +++ b/flang/include/flang/Optimizer/Dialect/FIRAttr.td @@ -62,6 +62,8 @@ def fir_FortranVariableFlagsAttr : fir_Attr<"FortranVariableFlags"> { /// Fortran procedure attributes (F2023 15.6.2.1). BIND attribute (18.3.7) /// is also tracked in the same enum. Recursive (resp. Impure) attribute /// is implied by the absence of opposite NonRecursive (resp. Pure) attribute. +/// Beware that "elemental" does not implicitly imply "pure" as it does in +/// Fortran, "pure" must be made explicit when generating the FIR attribute. def FIRfuncNoAttributes : I32BitEnumAttrCaseNone<"none">; def FIRfuncElemental : I32BitEnumAttrCaseBit<"elemental", 0>; def FIRfuncPure : I32BitEnumAttrCaseBit<"pure", 1>; diff --git a/flang/include/flang/Optimizer/Transforms/Passes.h b/flang/include/flang/Optimizer/Transforms/Passes.h index fcfb8677951a2..3b2af3a339810 100644 --- a/flang/include/flang/Optimizer/Transforms/Passes.h +++ b/flang/include/flang/Optimizer/Transforms/Passes.h @@ -39,6 +39,7 @@ namespace fir { #define GEN_PASS_DECL_ASSUMEDRANKOPCONVERSION #define GEN_PASS_DECL_CHARACTERCONVERSION #define GEN_PASS_DECL_CFGCONVERSION +#define GEN_PASS_DECL_CUFADDCONSTRUCTOR #define GEN_PASS_DECL_CUFIMPLICITDEVICEGLOBAL #define GEN_PASS_DECL_CUFOPCONVERSION #define GEN_PASS_DECL_EXTERNALNAMECONVERSION diff --git a/flang/include/flang/Optimizer/Transforms/Passes.td b/flang/include/flang/Optimizer/Transforms/Passes.td index ab98591c911cd..bf75123e85377 100644 --- a/flang/include/flang/Optimizer/Transforms/Passes.td +++ b/flang/include/flang/Optimizer/Transforms/Passes.td @@ -436,4 +436,11 @@ def CufImplicitDeviceGlobal : ]; } +def CUFAddConstructor : Pass<"cuf-add-constructor", "mlir::ModuleOp"> { + let summary = "Add constructor to register CUDA Fortran allocators"; + let dependentDialects = [ + "mlir::func::FuncDialect" + ]; +} + #endif // FLANG_OPTIMIZER_TRANSFORMS_PASSES diff --git a/flang/include/flang/Runtime/CUDA/common.h b/flang/include/flang/Runtime/CUDA/common.h index cb8681da161f0..b73bc390ea8c9 100644 --- a/flang/include/flang/Runtime/CUDA/common.h +++ b/flang/include/flang/Runtime/CUDA/common.h @@ -12,6 +12,13 @@ #include "flang/Runtime/descriptor.h" #include "flang/Runtime/entry-names.h" +/// Type of memory for allocation/deallocation +static constexpr unsigned kMemTypeDevice = 0; +static constexpr unsigned kMemTypeManaged = 1; +static constexpr unsigned kMemTypeUnified = 2; +static constexpr unsigned kMemTypePinned = 3; + +/// Data transfer kinds. static constexpr unsigned kHostToDevice = 0; static constexpr unsigned kDeviceToHost = 1; static constexpr unsigned kDeviceToDevice = 2; diff --git a/flang/include/flang/Runtime/CUDA/memory.h b/flang/include/flang/Runtime/CUDA/memory.h index 33947248dc483..3c3ae73d4ad7a 100644 --- a/flang/include/flang/Runtime/CUDA/memory.h +++ b/flang/include/flang/Runtime/CUDA/memory.h @@ -17,12 +17,24 @@ namespace Fortran::runtime::cuda { extern "C" { +/// Allocate memory on the device. +void *RTDECL(CUFMemAlloc)(std::size_t bytes, unsigned type, + const char *sourceFile = nullptr, int sourceLine = 0); + +/// Free memory allocated on the device. +void RTDECL(CUFMemFree)(void *devicePtr, unsigned type, + const char *sourceFile = nullptr, int sourceLine = 0); + /// Set value to the data hold by a descriptor. The \p value pointer must be /// addressable to the same amount of bytes specified by the element size of /// the descriptor \p desc. void RTDECL(CUFMemsetDescriptor)(const Descriptor &desc, void *value, const char *sourceFile = nullptr, int sourceLine = 0); +/// Data transfer from a pointer to a pointer. +void RTDECL(CUFDataTransferPtrPtr)(void *dst, void *src, std::size_t bytes, + unsigned mode, const char *sourceFile = nullptr, int sourceLine = 0); + /// Data transfer from a pointer to a descriptor. void RTDECL(CUFDataTransferDescPtr)(const Descriptor &dst, void *src, std::size_t bytes, unsigned mode, const char *sourceFile = nullptr, diff --git a/flang/include/flang/Semantics/tools.h b/flang/include/flang/Semantics/tools.h index 15c02ecc0058c..96d4dbb2acaa1 100644 --- a/flang/include/flang/Semantics/tools.h +++ b/flang/include/flang/Semantics/tools.h @@ -736,5 +736,34 @@ std::string GetCommonBlockObjectName(const Symbol &, bool underscoring); // Check for ambiguous USE associations bool HadUseError(SemanticsContext &, SourceName at, const Symbol *); +/// Checks if the assignment statement has a single variable on the RHS. +inline bool checkForSingleVariableOnRHS( + const Fortran::parser::AssignmentStmt &assignmentStmt) { + const Fortran::parser::Expr &expr{ + std::get(assignmentStmt.t)}; + const Fortran::common::Indirection *designator = + std::get_if>( + &expr.u); + return designator != nullptr; +} + +/// Checks if the symbol on the LHS of the assignment statement is present in +/// the RHS expression. +inline bool checkForSymbolMatch( + const Fortran::parser::AssignmentStmt &assignmentStmt) { + const auto &var{std::get(assignmentStmt.t)}; + const auto &expr{std::get(assignmentStmt.t)}; + const auto *e{Fortran::semantics::GetExpr(expr)}; + const auto *v{Fortran::semantics::GetExpr(var)}; + auto varSyms{Fortran::evaluate::GetSymbolVector(*v)}; + const Fortran::semantics::Symbol &varSymbol{*varSyms.front()}; + for (const Fortran::semantics::Symbol &symbol : + Fortran::evaluate::GetSymbolVector(*e)) { + if (varSymbol == symbol) { + return true; + } + } + return false; +} } // namespace Fortran::semantics #endif // FORTRAN_SEMANTICS_TOOLS_H_ diff --git a/flang/lib/Lower/CallInterface.cpp b/flang/lib/Lower/CallInterface.cpp index c0ef96adc20c3..f541f84738291 100644 --- a/flang/lib/Lower/CallInterface.cpp +++ b/flang/lib/Lower/CallInterface.cpp @@ -1546,6 +1546,29 @@ Fortran::lower::CallInterface::getResultType() const { return types; } +template +fir::FortranProcedureFlagsEnumAttr +Fortran::lower::CallInterface::getProcedureAttrs( + mlir::MLIRContext *mlirContext) const { + if (characteristic) { + fir::FortranProcedureFlagsEnum flags = fir::FortranProcedureFlagsEnum::none; + if (characteristic->IsBindC()) + flags = flags | fir::FortranProcedureFlagsEnum::bind_c; + if (characteristic->IsPure()) + flags = flags | fir::FortranProcedureFlagsEnum::pure; + if (characteristic->IsElemental()) + flags = flags | fir::FortranProcedureFlagsEnum::elemental; + // TODO: + // - SIMPLE: F2023, not yet handled by semantics. + // - NON_RECURSIVE: not part of the characteristics. Maybe this should + // simply not be part of FortranProcedureFlagsEnum since cannot accurately + // be known on the caller side. + if (flags != fir::FortranProcedureFlagsEnum::none) + return fir::FortranProcedureFlagsEnumAttr::get(mlirContext, flags); + } + return nullptr; +} + template class Fortran::lower::CallInterface; template class Fortran::lower::CallInterface; diff --git a/flang/lib/Lower/ConvertCall.cpp b/flang/lib/Lower/ConvertCall.cpp index 017bfd049d3dc..ee5eb225f0d7e 100644 --- a/flang/lib/Lower/ConvertCall.cpp +++ b/flang/lib/Lower/ConvertCall.cpp @@ -631,13 +631,9 @@ std::pair Fortran::lower::genCallOpAndResult( if (callNumResults != 0) callResult = dispatch.getResult(0); } else { - // TODO: gather other procedure attributes. - fir::FortranProcedureFlagsEnumAttr procAttrs; - if (caller.characterize().IsBindC()) - procAttrs = fir::FortranProcedureFlagsEnumAttr::get( - builder.getContext(), fir::FortranProcedureFlagsEnum::bind_c); - // Standard procedure call with fir.call. + fir::FortranProcedureFlagsEnumAttr procAttrs = + caller.getProcedureAttrs(builder.getContext()); auto call = builder.create( loc, funcType.getResults(), funcSymbolAttr, operands, procAttrs); diff --git a/flang/lib/Lower/DirectivesCommon.h b/flang/lib/Lower/DirectivesCommon.h index d2060e77ce530..a32f0b287e049 100644 --- a/flang/lib/Lower/DirectivesCommon.h +++ b/flang/lib/Lower/DirectivesCommon.h @@ -74,34 +74,6 @@ struct AddrAndBoundsInfo { } }; -/// Checks if the assignment statement has a single variable on the RHS. -static inline bool checkForSingleVariableOnRHS( - const Fortran::parser::AssignmentStmt &assignmentStmt) { - const Fortran::parser::Expr &expr{ - std::get(assignmentStmt.t)}; - const Fortran::common::Indirection *designator = - std::get_if>( - &expr.u); - return designator != nullptr; -} - -/// Checks if the symbol on the LHS of the assignment statement is present in -/// the RHS expression. -static inline bool -checkForSymbolMatch(const Fortran::parser::AssignmentStmt &assignmentStmt) { - const auto &var{std::get(assignmentStmt.t)}; - const auto &expr{std::get(assignmentStmt.t)}; - const auto *e{Fortran::semantics::GetExpr(expr)}; - const auto *v{Fortran::semantics::GetExpr(var)}; - auto varSyms{Fortran::evaluate::GetSymbolVector(*v)}; - const Fortran::semantics::Symbol &varSymbol{*varSyms.front()}; - for (const Fortran::semantics::Symbol &symbol : - Fortran::evaluate::GetSymbolVector(*e)) - if (varSymbol == symbol) - return true; - return false; -} - /// Populates \p hint and \p memoryOrder with appropriate clause information /// if present on atomic construct. static inline void genOmpAtomicHintAndMemoryOrderClauses( @@ -537,7 +509,7 @@ void genOmpAccAtomicCapture(Fortran::lower::AbstractConverter &converter, stmt2LHSArg = fir::getBase(converter.genExprAddr(assign2.lhs, stmtCtx)); // Operation specific RHS evaluations - if (checkForSingleVariableOnRHS(stmt1)) { + if (Fortran::semantics::checkForSingleVariableOnRHS(stmt1)) { // Atomic capture construct is of the form [capture-stmt, update-stmt] or // of the form [capture-stmt, write-stmt] stmt1RHSArg = fir::getBase(converter.genExprAddr(assign1.rhs, stmtCtx)); @@ -573,8 +545,8 @@ void genOmpAccAtomicCapture(Fortran::lower::AbstractConverter &converter, firOpBuilder.createBlock(&(atomicCaptureOp->getRegion(0))); mlir::Block &block = atomicCaptureOp->getRegion(0).back(); firOpBuilder.setInsertionPointToStart(&block); - if (checkForSingleVariableOnRHS(stmt1)) { - if (checkForSymbolMatch(stmt2)) { + if (Fortran::semantics::checkForSingleVariableOnRHS(stmt1)) { + if (Fortran::semantics::checkForSymbolMatch(stmt2)) { // Atomic capture construct is of the form [capture-stmt, update-stmt] const Fortran::semantics::SomeExpr &fromExpr = *Fortran::semantics::GetExpr(stmt1Expr); diff --git a/flang/lib/Optimizer/Transforms/CMakeLists.txt b/flang/lib/Optimizer/Transforms/CMakeLists.txt index b68e3d68b9b83..5e1a0293e63c9 100644 --- a/flang/lib/Optimizer/Transforms/CMakeLists.txt +++ b/flang/lib/Optimizer/Transforms/CMakeLists.txt @@ -9,6 +9,7 @@ add_flang_library(FIRTransforms CompilerGeneratedNames.cpp ConstantArgumentGlobalisation.cpp ControlFlowConverter.cpp + CUFAddConstructor.cpp CufImplicitDeviceGlobal.cpp CufOpConversion.cpp ArrayValueCopy.cpp diff --git a/flang/lib/Optimizer/Transforms/CUFAddConstructor.cpp b/flang/lib/Optimizer/Transforms/CUFAddConstructor.cpp new file mode 100644 index 0000000000000..48620fbc58586 --- /dev/null +++ b/flang/lib/Optimizer/Transforms/CUFAddConstructor.cpp @@ -0,0 +1,75 @@ +//===-- CUFAddConstructor.cpp ---------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "flang/Optimizer/Builder/FIRBuilder.h" +#include "flang/Optimizer/Dialect/CUF/CUFOps.h" +#include "flang/Optimizer/Dialect/FIRAttr.h" +#include "flang/Optimizer/Dialect/FIRDialect.h" +#include "flang/Optimizer/Dialect/FIROpsSupport.h" +#include "flang/Runtime/entry-names.h" +#include "mlir/Dialect/LLVMIR/LLVMDialect.h" +#include "mlir/Pass/Pass.h" +#include "llvm/ADT/SmallVector.h" + +namespace fir { +#define GEN_PASS_DEF_CUFADDCONSTRUCTOR +#include "flang/Optimizer/Transforms/Passes.h.inc" +} // namespace fir + +namespace { + +static constexpr llvm::StringRef cudaFortranCtorName{ + "__cudaFortranConstructor"}; + +struct CUFAddConstructor + : public fir::impl::CUFAddConstructorBase { + + void runOnOperation() override { + mlir::ModuleOp mod = getOperation(); + mlir::OpBuilder builder{mod.getBodyRegion()}; + builder.setInsertionPointToEnd(mod.getBody()); + mlir::Location loc = mod.getLoc(); + auto *ctx = mod.getContext(); + auto voidTy = mlir::LLVM::LLVMVoidType::get(ctx); + auto funcTy = + mlir::LLVM::LLVMFunctionType::get(voidTy, {}, /*isVarArg=*/false); + + // Symbol reference to CUFRegisterAllocator. + builder.setInsertionPointToEnd(mod.getBody()); + auto registerFuncOp = builder.create( + loc, RTNAME_STRING(CUFRegisterAllocator), funcTy); + registerFuncOp.setVisibility(mlir::SymbolTable::Visibility::Private); + auto cufRegisterAllocatorRef = mlir::SymbolRefAttr::get( + mod.getContext(), RTNAME_STRING(CUFRegisterAllocator)); + builder.setInsertionPointToEnd(mod.getBody()); + + // Create the constructor function that cal CUFRegisterAllocator. + builder.setInsertionPointToEnd(mod.getBody()); + auto func = builder.create(loc, cudaFortranCtorName, + funcTy); + func.setLinkage(mlir::LLVM::Linkage::Internal); + builder.setInsertionPointToStart(func.addEntryBlock(builder)); + builder.create(loc, funcTy, cufRegisterAllocatorRef); + builder.create(loc, mlir::ValueRange{}); + + // Create the llvm.global_ctor with the function. + // TODO: We might want to have a utility that retrieve it if already created + // and adds new functions. + builder.setInsertionPointToEnd(mod.getBody()); + llvm::SmallVector funcs; + funcs.push_back( + mlir::FlatSymbolRefAttr::get(mod.getContext(), func.getSymName())); + llvm::SmallVector priorities; + priorities.push_back(0); + builder.create( + mod.getLoc(), builder.getArrayAttr(funcs), + builder.getI32ArrayAttr(priorities)); + } +}; + +} // end anonymous namespace diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp index 643b713b32e29..dfc3f3290a81b 100644 --- a/flang/lib/Semantics/check-omp-structure.cpp +++ b/flang/lib/Semantics/check-omp-structure.cpp @@ -1977,6 +1977,58 @@ void OmpStructureChecker::CheckAtomicUpdateStmt( ErrIfAllocatableVariable(var); } +// TODO: Allow cond-update-stmt once compare clause is supported. +void OmpStructureChecker::CheckAtomicCaptureConstruct( + const parser::OmpAtomicCapture &atomicCaptureConstruct) { + const Fortran::parser::AssignmentStmt &stmt1 = + std::get( + atomicCaptureConstruct.t) + .v.statement; + const auto &stmt1Var{std::get(stmt1.t)}; + const auto &stmt1Expr{std::get(stmt1.t)}; + + const Fortran::parser::AssignmentStmt &stmt2 = + std::get( + atomicCaptureConstruct.t) + .v.statement; + const auto &stmt2Var{std::get(stmt2.t)}; + const auto &stmt2Expr{std::get(stmt2.t)}; + + if (Fortran::semantics::checkForSingleVariableOnRHS(stmt1)) { + CheckAtomicCaptureStmt(stmt1); + if (Fortran::semantics::checkForSymbolMatch(stmt2)) { + // ATOMIC CAPTURE construct is of the form [capture-stmt, update-stmt] + CheckAtomicUpdateStmt(stmt2); + } else { + // ATOMIC CAPTURE construct is of the form [capture-stmt, write-stmt] + CheckAtomicWriteStmt(stmt2); + } + auto *v{stmt2Var.typedExpr.get()}; + auto *e{stmt1Expr.typedExpr.get()}; + if (v && e && !(v->v == e->v)) { + context_.Say(stmt1Expr.source, + "Captured variable/array element/derived-type component %s expected to be assigned in the second statement of ATOMIC CAPTURE construct"_err_en_US, + stmt1Expr.source); + } + } else if (Fortran::semantics::checkForSymbolMatch(stmt1) && + Fortran::semantics::checkForSingleVariableOnRHS(stmt2)) { + // ATOMIC CAPTURE construct is of the form [update-stmt, capture-stmt] + CheckAtomicUpdateStmt(stmt1); + CheckAtomicCaptureStmt(stmt2); + // Variable updated in stmt1 should be captured in stmt2 + auto *v{stmt1Var.typedExpr.get()}; + auto *e{stmt2Expr.typedExpr.get()}; + if (v && e && !(v->v == e->v)) { + context_.Say(stmt1Var.GetSource(), + "Updated variable/array element/derived-type component %s expected to be captured in the second statement of ATOMIC CAPTURE construct"_err_en_US, + stmt1Var.GetSource()); + } + } else { + context_.Say(stmt1Expr.source, + "Invalid ATOMIC CAPTURE construct statements. Expected one of [update-stmt, capture-stmt], [capture-stmt, update-stmt], or [capture-stmt, write-stmt]"_err_en_US); + } +} + void OmpStructureChecker::CheckAtomicMemoryOrderClause( const parser::OmpAtomicClauseList *leftHandClauseList, const parser::OmpAtomicClauseList *rightHandClauseList) { @@ -2060,15 +2112,15 @@ void OmpStructureChecker::Enter(const parser::OpenMPAtomicConstruct &x) { atomicWrite.t) .statement); }, - [&](const auto &atomicConstruct) { - const auto &dir{std::get(atomicConstruct.t)}; + [&](const parser::OmpAtomicCapture &atomicCapture) { + const auto &dir{std::get(atomicCapture.t)}; PushContextAndClauseSets( dir.source, llvm::omp::Directive::OMPD_atomic); - CheckAtomicMemoryOrderClause(&std::get<0>(atomicConstruct.t), - &std::get<2>(atomicConstruct.t)); + CheckAtomicMemoryOrderClause( + &std::get<0>(atomicCapture.t), &std::get<2>(atomicCapture.t)); CheckHintClause( - &std::get<0>(atomicConstruct.t), - &std::get<2>(atomicConstruct.t)); + &std::get<0>(atomicCapture.t), &std::get<2>(atomicCapture.t)); + CheckAtomicCaptureConstruct(atomicCapture); }, }, x.u); diff --git a/flang/lib/Semantics/check-omp-structure.h b/flang/lib/Semantics/check-omp-structure.h index 2cc1a78068f54..8bfd4d594b028 100644 --- a/flang/lib/Semantics/check-omp-structure.h +++ b/flang/lib/Semantics/check-omp-structure.h @@ -193,6 +193,7 @@ class OmpStructureChecker void CheckAtomicUpdateStmt(const parser::AssignmentStmt &); void CheckAtomicCaptureStmt(const parser::AssignmentStmt &); void CheckAtomicWriteStmt(const parser::AssignmentStmt &); + void CheckAtomicCaptureConstruct(const parser::OmpAtomicCapture &); void CheckAtomicConstructStructure(const parser::OpenMPAtomicConstruct &); void CheckDistLinear(const parser::OpenMPLoopConstruct &x); void CheckSIMDNest(const parser::OpenMPConstruct &x); diff --git a/flang/runtime/CUDA/memory.cpp b/flang/runtime/CUDA/memory.cpp index a287fa14a4878..fc48b4343eea9 100644 --- a/flang/runtime/CUDA/memory.cpp +++ b/flang/runtime/CUDA/memory.cpp @@ -8,12 +8,47 @@ #include "flang/Runtime/CUDA/memory.h" #include "../terminator.h" +#include "flang/Runtime/CUDA/common.h" #include "cuda_runtime.h" namespace Fortran::runtime::cuda { extern "C" { +void *RTDEF(CUFMemAlloc)( + std::size_t bytes, unsigned type, const char *sourceFile, int sourceLine) { + void *ptr = nullptr; + if (bytes != 0) { + if (type == kMemTypeDevice) { + CUDA_REPORT_IF_ERROR(cudaMalloc((void **)&ptr, bytes)); + } else if (type == kMemTypeManaged || type == kMemTypeUnified) { + CUDA_REPORT_IF_ERROR( + cudaMallocManaged((void **)&ptr, bytes, cudaMemAttachGlobal)); + } else if (type == kMemTypePinned) { + CUDA_REPORT_IF_ERROR(cudaMallocHost((void **)&ptr, bytes)); + } else { + Terminator terminator{sourceFile, sourceLine}; + terminator.Crash("unsupported memory type"); + } + } + return ptr; +} + +void RTDEF(CUFMemFree)( + void *ptr, unsigned type, const char *sourceFile, int sourceLine) { + if (!ptr) + return; + if (type == kMemTypeDevice || type == kMemTypeManaged || + type == kMemTypeUnified) { + CUDA_REPORT_IF_ERROR(cudaFree(ptr)); + } else if (type == kMemTypePinned) { + CUDA_REPORT_IF_ERROR(cudaFreeHost(ptr)); + } else { + Terminator terminator{sourceFile, sourceLine}; + terminator.Crash("unsupported memory type"); + } +} + void RTDEF(CUFMemsetDescriptor)(const Descriptor &desc, void *value, const char *sourceFile, int sourceLine) { Terminator terminator{sourceFile, sourceLine}; @@ -21,6 +56,23 @@ void RTDEF(CUFMemsetDescriptor)(const Descriptor &desc, void *value, "value to a descriptor"); } +void RTDEF(CUFDataTransferPtrPtr)(void *dst, void *src, std::size_t bytes, + unsigned mode, const char *sourceFile, int sourceLine) { + cudaMemcpyKind kind; + if (mode == kHostToDevice) { + kind = cudaMemcpyHostToDevice; + } else if (mode == kDeviceToHost) { + kind = cudaMemcpyDeviceToHost; + } else if (mode == kDeviceToDevice) { + kind = cudaMemcpyDeviceToDevice; + } else { + Terminator terminator{sourceFile, sourceLine}; + terminator.Crash("host to host copy not supported"); + } + // TODO: Use cudaMemcpyAsync when we have support for stream. + CUDA_REPORT_IF_ERROR(cudaMemcpy(dst, src, bytes, kind)); +} + void RTDEF(CUFDataTransferDescPtr)(const Descriptor &desc, void *addr, std::size_t bytes, unsigned mode, const char *sourceFile, int sourceLine) { Terminator terminator{sourceFile, sourceLine}; diff --git a/flang/test/Fir/CUDA/cuda-constructor.f90 b/flang/test/Fir/CUDA/cuda-constructor.f90 new file mode 100644 index 0000000000000..d02350b4f4198 --- /dev/null +++ b/flang/test/Fir/CUDA/cuda-constructor.f90 @@ -0,0 +1,12 @@ +! RUN: bbc -fcuda -emit-hlfir %s -o - | fir-opt --cuf-add-constructor | FileCheck %s + +program main + real, device :: ahost(10) +end + +! CHECK: llvm.func @_FortranACUFRegisterAllocator() attributes {sym_visibility = "private"} +! CHECK-LABEL: llvm.func internal @__cudaFortranConstructor() { +! CHECK: llvm.call @_FortranACUFRegisterAllocator() : () -> () +! CHECK: llvm.return +! CHECK: } +! CHECK: llvm.mlir.global_ctors {ctors = [@__cudaFortranConstructor], priorities = [0 : i32]} diff --git a/flang/test/Lower/HLFIR/array-ctor-as-elemental-nested.f90 b/flang/test/Lower/HLFIR/array-ctor-as-elemental-nested.f90 index a30c6c6e4a227..1dc033d0ba033 100644 --- a/flang/test/Lower/HLFIR/array-ctor-as-elemental-nested.f90 +++ b/flang/test/Lower/HLFIR/array-ctor-as-elemental-nested.f90 @@ -31,7 +31,7 @@ ! CHECK: %[[VAL_21:.*]]:3 = hlfir.associate %[[VAL_22:.*]](%[[VAL_17]]) {adapt.valuebyref} : (!hlfir.expr<2xf32>, !fir.shape<1>) -> (!fir.ref>, !fir.ref>, i1) ! CHECK: %[[VAL_23:.*]] = fir.embox %[[VAL_21]]#0(%[[VAL_17]]) : (!fir.ref>, !fir.shape<1>) -> !fir.box> ! CHECK: %[[VAL_24:.*]] = fir.convert %[[VAL_23]] : (!fir.box>) -> !fir.box> -! CHECK: %[[VAL_25:.*]] = fir.call @_QPfoo(%[[VAL_24]]) fastmath : (!fir.box>) -> f32 +! CHECK: %[[VAL_25:.*]] = fir.call @_QPfoo(%[[VAL_24]]) proc_attrs fastmath : (!fir.box>) -> f32 ! CHECK: hlfir.end_associate %[[VAL_21]]#1, %[[VAL_21]]#2 : !fir.ref>, i1 ! CHECK: hlfir.destroy %[[VAL_22]] : !hlfir.expr<2xf32> ! CHECK: hlfir.yield_element %[[VAL_25]] : f32 diff --git a/flang/test/Lower/HLFIR/array-ctor-as-elemental.f90 b/flang/test/Lower/HLFIR/array-ctor-as-elemental.f90 index 277e2683c64f8..4d3f93c7d48ce 100644 --- a/flang/test/Lower/HLFIR/array-ctor-as-elemental.f90 +++ b/flang/test/Lower/HLFIR/array-ctor-as-elemental.f90 @@ -107,7 +107,7 @@ integer pure function foo(i) ! CHECK: %[[VAL_13:.*]] = arith.addi %[[VAL_5]], %[[VAL_12]] : index ! CHECK: %[[VAL_14:.*]] = fir.convert %[[VAL_13]] : (index) -> i64 ! CHECK: %[[VAL_15:.*]] = fir.convert %[[VAL_14]] : (i64) -> i32 -! CHECK: %[[VAL_16:.*]] = fir.call @_QPfoo(%[[VAL_15]]) fastmath : (i32) -> i32 +! CHECK: %[[VAL_16:.*]] = fir.call @_QPfoo(%[[VAL_15]]) proc_attrs fastmath : (i32) -> i32 ! CHECK: hlfir.yield_element %[[VAL_16]] : i32 ! CHECK: } ! CHECK: %[[VAL_17:.*]]:3 = hlfir.associate %[[VAL_18:.*]](%[[VAL_3]]) {adapt.valuebyref} : (!hlfir.expr<4xi32>, !fir.shape<1>) -> (!fir.ref>, !fir.ref>, i1) diff --git a/flang/test/Lower/HLFIR/elemental-array-ops.f90 b/flang/test/Lower/HLFIR/elemental-array-ops.f90 index 18e1fb0a787e7..aefc4d978a27d 100644 --- a/flang/test/Lower/HLFIR/elemental-array-ops.f90 +++ b/flang/test/Lower/HLFIR/elemental-array-ops.f90 @@ -182,7 +182,7 @@ end subroutine char_return ! CHECK: %[[VAL_23:.*]] = arith.constant 0 : index ! CHECK: %[[VAL_24:.*]] = arith.cmpi sgt, %[[VAL_22]], %[[VAL_23]] : index ! CHECK: %[[VAL_25:.*]] = arith.select %[[VAL_24]], %[[VAL_22]], %[[VAL_23]] : index -! CHECK: %[[VAL_27:.*]] = fir.call @_QPcallee(%[[VAL_2]], %[[VAL_25]], %[[VAL_20]]) fastmath : (!fir.ref>, index, !fir.boxchar<1>) -> !fir.boxchar<1> +! CHECK: %[[VAL_27:.*]] = fir.call @_QPcallee(%[[VAL_2]], %[[VAL_25]], %[[VAL_20]]) proc_attrs fastmath : (!fir.ref>, index, !fir.boxchar<1>) -> !fir.boxchar<1> ! CHECK: %[[VAL_28:.*]]:2 = hlfir.declare %[[VAL_2]] typeparams %[[VAL_25]] {uniq_name = ".tmp.func_result"} : (!fir.ref>, index) -> (!fir.ref>, !fir.ref>) ! CHECK: %[[MustFree:.*]] = arith.constant false ! CHECK: %[[ResultTemp:.*]] = hlfir.as_expr %[[VAL_28]]#0 move %[[MustFree]] : (!fir.ref>, i1) -> !hlfir.expr> diff --git a/flang/test/Lower/HLFIR/elemental-user-procedure-ref.f90 b/flang/test/Lower/HLFIR/elemental-user-procedure-ref.f90 index aea23d8d94672..d4d8b858aaeea 100644 --- a/flang/test/Lower/HLFIR/elemental-user-procedure-ref.f90 +++ b/flang/test/Lower/HLFIR/elemental-user-procedure-ref.f90 @@ -18,7 +18,7 @@ real elemental function elem(a, b) ! CHECK: %[[VAL_6:.*]] = hlfir.elemental %[[VAL_4]] unordered : (!fir.shape<1>) -> !hlfir.expr<100xf32> { ! CHECK: ^bb0(%[[VAL_7:.*]]: index): ! CHECK: %[[VAL_8:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_7]]) : (!fir.ref>, index) -> !fir.ref -! CHECK: %[[VAL_9:.*]] = fir.call @_QPelem(%[[VAL_2]]#1, %[[VAL_8]]) fastmath : (!fir.ref, !fir.ref) -> f32 +! CHECK: %[[VAL_9:.*]] = fir.call @_QPelem(%[[VAL_2]]#1, %[[VAL_8]]) proc_attrs fastmath : (!fir.ref, !fir.ref) -> f32 ! CHECK: hlfir.yield_element %[[VAL_9]] : f32 ! CHECK: } ! CHECK: fir.call @@ -43,7 +43,7 @@ real elemental function elem_val(a, b) ! CHECK: ^bb0(%[[VAL_9:.*]]: index, %[[VAL_10:.*]]: index): ! CHECK: %[[VAL_11:.*]] = hlfir.designate %[[VAL_6]]#0 (%[[VAL_9]], %[[VAL_10]]) : (!fir.ref>, index, index) -> !fir.ref ! CHECK: %[[VAL_12:.*]] = fir.load %[[VAL_11]] : !fir.ref -! CHECK: %[[VAL_13:.*]] = fir.call @_QPelem_val(%[[VAL_7]], %[[VAL_12]]) fastmath : (i32, f32) -> f32 +! CHECK: %[[VAL_13:.*]] = fir.call @_QPelem_val(%[[VAL_7]], %[[VAL_12]]) proc_attrs fastmath : (i32, f32) -> f32 ! CHECK: hlfir.yield_element %[[VAL_13]] : f32 ! CHECK: } ! CHECK: fir.call @@ -67,7 +67,7 @@ real elemental function char_elem(a, b) ! CHECK: %[[VAL_9:.*]] = hlfir.elemental %[[VAL_7]] unordered : (!fir.shape<1>) -> !hlfir.expr<100xf32> { ! CHECK: ^bb0(%[[VAL_10:.*]]: index): ! CHECK: %[[VAL_11:.*]] = hlfir.designate %[[VAL_8]]#0 (%[[VAL_10]]) typeparams %[[VAL_4]]#1 : (!fir.box>>, index, index) -> !fir.boxchar<1> -! CHECK: %[[VAL_12:.*]] = fir.call @_QPchar_elem(%[[VAL_3]]#0, %[[VAL_11]]) fastmath : (!fir.boxchar<1>, !fir.boxchar<1>) -> f32 +! CHECK: %[[VAL_12:.*]] = fir.call @_QPchar_elem(%[[VAL_3]]#0, %[[VAL_11]]) proc_attrs fastmath : (!fir.boxchar<1>, !fir.boxchar<1>) -> f32 ! CHECK: hlfir.yield_element %[[VAL_12]] : f32 ! CHECK: } ! CHECK: fir.call @@ -93,7 +93,7 @@ elemental subroutine elem_sub(a, b) ! CHECK: fir.do_loop %[[VAL_8:.*]] = %[[VAL_7]] to %[[VAL_4]] step %[[VAL_7]] unordered { ! CHECK: fir.do_loop %[[VAL_9:.*]] = %[[VAL_7]] to %[[VAL_3]] step %[[VAL_7]] unordered { ! CHECK: %[[VAL_10:.*]] = hlfir.designate %[[VAL_6]]#0 (%[[VAL_9]], %[[VAL_8]]) : (!fir.ref>, index, index) -> !fir.ref -! CHECK: fir.call @_QPelem_sub(%[[VAL_2]]#1, %[[VAL_10]]) fastmath : (!fir.ref, !fir.ref) -> () +! CHECK: fir.call @_QPelem_sub(%[[VAL_2]]#1, %[[VAL_10]]) proc_attrs fastmath : (!fir.ref, !fir.ref) -> () ! CHECK: } ! CHECK: } @@ -116,7 +116,7 @@ impure elemental subroutine impure_elem(a) ! CHECK: fir.do_loop %[[VAL_6:.*]] = %[[VAL_5]] to %[[VAL_2]] step %[[VAL_5]] { ! CHECK: fir.do_loop %[[VAL_7:.*]] = %[[VAL_5]] to %[[VAL_1]] step %[[VAL_5]] { ! CHECK: %[[VAL_8:.*]] = hlfir.designate %[[VAL_4]]#0 (%[[VAL_7]], %[[VAL_6]]) : (!fir.ref>, index, index) -> !fir.ref -! CHECK: fir.call @_QPimpure_elem(%[[VAL_8]]) fastmath : (!fir.ref) -> () +! CHECK: fir.call @_QPimpure_elem(%[[VAL_8]]) proc_attrs fastmath : (!fir.ref) -> () ! CHECK: } ! CHECK: } ! CHECK: return @@ -141,7 +141,7 @@ elemental subroutine ordered_elem(a) ! CHECK: fir.do_loop %[[VAL_6:.*]] = %[[VAL_5]] to %[[VAL_2]] step %[[VAL_5]] { ! CHECK: fir.do_loop %[[VAL_7:.*]] = %[[VAL_5]] to %[[VAL_1]] step %[[VAL_5]] { ! CHECK: %[[VAL_8:.*]] = hlfir.designate %[[VAL_4]]#0 (%[[VAL_7]], %[[VAL_6]]) : (!fir.ref>, index, index) -> !fir.ref -! CHECK: fir.call @_QPordered_elem(%[[VAL_8]]) fastmath : (!fir.ref) -> () +! CHECK: fir.call @_QPordered_elem(%[[VAL_8]]) proc_attrs fastmath : (!fir.ref) -> () ! CHECK: } ! CHECK: } ! CHECK: return @@ -174,7 +174,7 @@ impure elemental subroutine impure_elem(a) ! CHECK: fir.do_loop %[[VAL_14:.*]] = %[[VAL_13]] to %[[VAL_2]] step %[[VAL_13]] { ! CHECK: fir.do_loop %[[VAL_15:.*]] = %[[VAL_13]] to %[[VAL_1]] step %[[VAL_13]] { ! CHECK: %[[VAL_16:.*]] = hlfir.designate %[[VAL_11]]#0 (%[[VAL_15]], %[[VAL_14]]) : (!fir.ref>, index, index) -> !fir.ref -! CHECK: fir.call @_QPimpure_elem(%[[VAL_16]]) fastmath : (!fir.ref) -> () +! CHECK: fir.call @_QPimpure_elem(%[[VAL_16]]) proc_attrs fastmath : (!fir.ref) -> () ! CHECK: } ! CHECK: } ! CHECK: hlfir.end_associate %[[VAL_11]]#1, %[[VAL_11]]#2 : !fir.ref>, i1 diff --git a/flang/test/Lower/HLFIR/forall.f90 b/flang/test/Lower/HLFIR/forall.f90 index c12f0c6a826b5..709e233746a91 100644 --- a/flang/test/Lower/HLFIR/forall.f90 +++ b/flang/test/Lower/HLFIR/forall.f90 @@ -86,7 +86,7 @@ subroutine test_forall_mask() ! CHECK: } (%[[VAL_9:.*]]: i64) { ! CHECK: %[[VAL_10:.*]] = hlfir.forall_index "i" %[[VAL_9]] : (i64) -> !fir.ref ! CHECK: hlfir.forall_mask { -! CHECK: %[[VAL_11:.*]] = fir.call @_QPpredicate(%[[VAL_10]]) fastmath : (!fir.ref) -> !fir.logical<4> +! CHECK: %[[VAL_11:.*]] = fir.call @_QPpredicate(%[[VAL_10]]) proc_attrs fastmath : (!fir.ref) -> !fir.logical<4> ! CHECK: %[[VAL_12:.*]] = fir.convert %[[VAL_11]] : (!fir.logical<4>) -> i1 ! CHECK: hlfir.yield %[[VAL_12]] : i1 ! CHECK: } do { @@ -113,8 +113,8 @@ subroutine test_forall_several_indices() ! CHECK: %[[VAL_6:.*]]:2 = hlfir.declare {{.*}}Ey ! CHECK: %[[VAL_7:.*]] = fir.call @_QPibar() fastmath : () -> i32 ! CHECK: %[[VAL_8:.*]] = fir.call @_QPifoo() fastmath : () -> i32 -! CHECK: %[[VAL_9:.*]] = fir.call @_QPjfoo() fastmath : () -> i64 -! CHECK: %[[VAL_10:.*]] = fir.call @_QPjbar() fastmath : () -> i64 +! CHECK: %[[VAL_9:.*]] = fir.call @_QPjfoo() proc_attrs fastmath : () -> i64 +! CHECK: %[[VAL_10:.*]] = fir.call @_QPjbar() proc_attrs fastmath : () -> i64 ! CHECK: hlfir.forall lb { ! CHECK: hlfir.yield %[[VAL_7]] : i32 ! CHECK: } ub { @@ -126,7 +126,7 @@ subroutine test_forall_several_indices() ! CHECK: hlfir.yield %[[VAL_10]] : i64 ! CHECK: } (%[[VAL_12:.*]]: i64) { ! CHECK: hlfir.region_assign { -! CHECK: %[[VAL_13:.*]] = fir.call @_QPifoo2(%[[VAL_11]], %[[VAL_12]]) fastmath : (i64, i64) -> i64 +! CHECK: %[[VAL_13:.*]] = fir.call @_QPifoo2(%[[VAL_11]], %[[VAL_12]]) proc_attrs fastmath : (i64, i64) -> i64 ! CHECK: %[[VAL_14:.*]] = hlfir.designate %[[VAL_6]]#0 (%[[VAL_13]]) : (!fir.ref>, i64) -> !fir.ref ! CHECK: %[[VAL_15:.*]] = fir.load %[[VAL_14]] : !fir.ref ! CHECK: hlfir.yield %[[VAL_15]] : i32 @@ -169,10 +169,10 @@ subroutine test_nested_foralls() ! CHECK: hlfir.yield %[[VAL_12]] : !fir.ref ! CHECK: } ! CHECK: hlfir.forall lb { -! CHECK: %[[VAL_13:.*]] = fir.call @_QPjfoo() fastmath : () -> i64 +! CHECK: %[[VAL_13:.*]] = fir.call @_QPjfoo() proc_attrs fastmath : () -> i64 ! CHECK: hlfir.yield %[[VAL_13]] : i64 ! CHECK: } ub { -! CHECK: %[[VAL_14:.*]] = fir.call @_QPjbar() fastmath : () -> i64 +! CHECK: %[[VAL_14:.*]] = fir.call @_QPjbar() proc_attrs fastmath : () -> i64 ! CHECK: hlfir.yield %[[VAL_14]] : i64 ! CHECK: } (%[[VAL_15:.*]]: i64) { ! CHECK: hlfir.region_assign { diff --git a/flang/test/Lower/HLFIR/where-nonelemental.f90 b/flang/test/Lower/HLFIR/where-nonelemental.f90 index 15a281b0ba681..643f417c47674 100644 --- a/flang/test/Lower/HLFIR/where-nonelemental.f90 +++ b/flang/test/Lower/HLFIR/where-nonelemental.f90 @@ -125,7 +125,7 @@ integer pure function pure_ifoo() ! CHECK: hlfir.where { ! CHECK: %[[VAL_21:.*]] = llvm.intr.stacksave : !llvm.ptr ! CHECK-NOT: hlfir.exactly_once -! CHECK: %[[VAL_23:.*]] = fir.call @_QPpure_logical_func1() fastmath : () -> !fir.array<100x!fir.logical<4>> +! CHECK: %[[VAL_23:.*]] = fir.call @_QPpure_logical_func1() proc_attrs fastmath : () -> !fir.array<100x!fir.logical<4>> ! CHECK: hlfir.yield %{{.*}} : !hlfir.expr<100x!fir.logical<4>> cleanup { ! CHECK: llvm.intr.stackrestore %[[VAL_21]] : !llvm.ptr ! CHECK: } @@ -173,7 +173,7 @@ integer pure function pure_ifoo() ! CHECK: hlfir.elsewhere mask { ! CHECK: %[[VAL_129:.*]] = hlfir.exactly_once : !hlfir.expr<100x!fir.logical<4>> { ! CHECK: %[[VAL_139:.*]] = llvm.intr.stacksave : !llvm.ptr -! CHECK: %[[VAL_141:.*]] = fir.call @_QPpure_logical_func2() fastmath : () -> !fir.array<100x!fir.logical<4>> +! CHECK: %[[VAL_141:.*]] = fir.call @_QPpure_logical_func2() proc_attrs fastmath : () -> !fir.array<100x!fir.logical<4>> ! CHECK: hlfir.yield %{{.*}} : !hlfir.expr<100x!fir.logical<4>> cleanup { ! CHECK: llvm.intr.stackrestore %[[VAL_139]] : !llvm.ptr ! CHECK: } @@ -185,7 +185,7 @@ integer pure function pure_ifoo() ! CHECK: hlfir.yield %{{.*}} : !fir.box> ! CHECK: } to { ! CHECK: %[[VAL_165:.*]] = hlfir.exactly_once : i32 { -! CHECK: %[[VAL_166:.*]] = fir.call @_QPpure_ifoo() fastmath : () -> i32 +! CHECK: %[[VAL_166:.*]] = fir.call @_QPpure_ifoo() proc_attrs fastmath : () -> i32 ! CHECK: hlfir.yield %[[VAL_166]] : i32 ! CHECK: } ! CHECK: hlfir.designate diff --git a/flang/test/Lower/array-elemental-calls-char.f90 b/flang/test/Lower/array-elemental-calls-char.f90 index 652e79232c1b5..603cc677805fc 100644 --- a/flang/test/Lower/array-elemental-calls-char.f90 +++ b/flang/test/Lower/array-elemental-calls-char.f90 @@ -123,7 +123,7 @@ subroutine foo2b(i, j, c) ! CHECK: %[[VAL_13:.*]] = fir.emboxchar %[[VAL_7]], %[[VAL_3]] : (!fir.ref>, index) -> !fir.boxchar<1> ! CHECK: %[[VAL_14:.*]] = arith.addi %[[VAL_9]], %[[VAL_5]] : index ! CHECK: %[[VAL_15:.*]] = fir.array_coor %[[VAL_1]](%[[VAL_8]]) %[[VAL_14]] : (!fir.ref>, !fir.shape<1>, index) -> !fir.ref -! CHECK: %[[VAL_16:.*]] = fir.call @_QPelem2(%[[VAL_13]], %[[VAL_15]]) fastmath : (!fir.boxchar<1>, !fir.ref) -> i32 +! CHECK: %[[VAL_16:.*]] = fir.call @_QPelem2(%[[VAL_13]], %[[VAL_15]]) proc_attrs fastmath : (!fir.boxchar<1>, !fir.ref) -> i32 ! CHECK: %[[VAL_17:.*]] = fir.array_coor %[[VAL_0]](%[[VAL_8]]) %[[VAL_14]] : (!fir.ref>, !fir.shape<1>, index) -> !fir.ref ! CHECK: fir.store %[[VAL_16]] to %[[VAL_17]] : !fir.ref ! CHECK: %[[VAL_18:.*]] = arith.subi %[[VAL_10]], %[[VAL_5]] : index diff --git a/flang/test/Lower/array-user-def-assignments.f90 b/flang/test/Lower/array-user-def-assignments.f90 index 97090ff77678c..e88bc2fb861ba 100644 --- a/flang/test/Lower/array-user-def-assignments.f90 +++ b/flang/test/Lower/array-user-def-assignments.f90 @@ -442,7 +442,7 @@ elemental subroutine sto_char(a,b) ! CHECK: %[[V_6:[0-9]+]] = fir.do_loop %arg2 = %[[V_2]] to %[[V_3]] step %[[C_1]] unordered iter_args(%arg3 = %[[V_5]]) -> (!fir.array<10x!fir.logical<4>>) { ! CHECK: %[[V_7:[0-9]+]] = fir.convert %arg2 : (index) -> i32 ! CHECK: fir.store %[[V_7]] to %[[V_1:[0-9]+]] : !fir.ref -! CHECK: %[[V_8:[0-9]+]] = fir.call @_QPreturns_alloc(%[[V_1]]) fastmath : (!fir.ref) -> !fir.box> +! CHECK: %[[V_8:[0-9]+]] = fir.call @_QPreturns_alloc(%[[V_1]]) proc_attrs fastmath : (!fir.ref) -> !fir.box> ! CHECK: fir.save_result %[[V_8]] to %[[V_0:[0-9]+]] : !fir.box>, !fir.ref>> ! CHECK: %[[V_9:[0-9]+]] = fir.load %[[V_0:[0-9]+]] : !fir.ref>> ! CHECK: %[[V_10:[0-9]+]] = fir.box_addr %[[V_9:[0-9]+]] : (!fir.box>) -> !fir.heap diff --git a/flang/test/Semantics/OpenMP/omp-atomic-assignment-stmt.f90 b/flang/test/Semantics/OpenMP/omp-atomic-assignment-stmt.f90 index a346056dee383..0d4da5485af04 100644 --- a/flang/test/Semantics/OpenMP/omp-atomic-assignment-stmt.f90 +++ b/flang/test/Semantics/OpenMP/omp-atomic-assignment-stmt.f90 @@ -84,4 +84,68 @@ program sample !$omp atomic write !ERROR: Expected scalar variable on the LHS of atomic assignment statement a = x + + !$omp atomic capture + v = x + x = x + 1 + !$omp end atomic + + !$omp atomic release capture + v = x + !ERROR: Atomic update statement should be of form `x = x operator expr` OR `x = expr operator x` + x = b + (x*1) + !$omp end atomic + + !$omp atomic capture hint(0) + v = x + x = 1 + !$omp end atomic + + !$omp atomic capture + !ERROR: Captured variable/array element/derived-type component x expected to be assigned in the second statement of ATOMIC CAPTURE construct + v = x + b = b + 1 + !$omp end atomic + + !$omp atomic capture + !ERROR: Captured variable/array element/derived-type component x expected to be assigned in the second statement of ATOMIC CAPTURE construct + v = x + b = 10 + !$omp end atomic + + !$omp atomic capture + !ERROR: Updated variable/array element/derived-type component x expected to be captured in the second statement of ATOMIC CAPTURE construct + x = x + 10 + v = b + !$omp end atomic + + !$omp atomic capture + !ERROR: Invalid ATOMIC CAPTURE construct statements. Expected one of [update-stmt, capture-stmt], [capture-stmt, update-stmt], or [capture-stmt, write-stmt] + v = 1 + x = 4 + !$omp end atomic + + !$omp atomic capture + !ERROR: Captured variable/array element/derived-type component z%y expected to be assigned in the second statement of ATOMIC CAPTURE construct + x = z%y + z%m = z%m + 1.0 + !$omp end atomic + + !$omp atomic capture + !ERROR: Updated variable/array element/derived-type component z%m expected to be captured in the second statement of ATOMIC CAPTURE construct + z%m = z%m + 1.0 + x = z%y + !$omp end atomic + + !$omp atomic capture + !ERROR: Captured variable/array element/derived-type component y(2) expected to be assigned in the second statement of ATOMIC CAPTURE construct + x = y(2) + y(1) = y(1) + 1 + !$omp end atomic + + !$omp atomic capture + !ERROR: Updated variable/array element/derived-type component y(1) expected to be captured in the second statement of ATOMIC CAPTURE construct + y(1) = y(1) + 1 + x = y(2) + !$omp end atomic end program diff --git a/flang/test/Semantics/OpenMP/requires-atomic01.f90 b/flang/test/Semantics/OpenMP/requires-atomic01.f90 index b39c9cdcc0bb3..cb7b1bc1ac52a 100644 --- a/flang/test/Semantics/OpenMP/requires-atomic01.f90 +++ b/flang/test/Semantics/OpenMP/requires-atomic01.f90 @@ -88,7 +88,7 @@ program requires ! CHECK: OmpMemoryOrderClause -> OmpClause -> SeqCst !$omp atomic capture i = j - i = j + j = j + 1 !$omp end atomic ! CHECK-LABEL: OpenMPAtomicConstruct -> OmpAtomicCapture @@ -96,7 +96,7 @@ program requires ! CHECK: OmpMemoryOrderClause -> OmpClause -> Relaxed !$omp atomic relaxed capture i = j - i = j + j = j + 1 !$omp end atomic ! CHECK-LABEL: OpenMPAtomicConstruct -> OmpAtomicCapture @@ -104,6 +104,6 @@ program requires ! CHECK: OmpMemoryOrderClause -> OmpClause -> Relaxed !$omp atomic capture relaxed i = j - i = j + j = j + 1 !$omp end atomic end program requires diff --git a/flang/test/Semantics/OpenMP/requires-atomic02.f90 b/flang/test/Semantics/OpenMP/requires-atomic02.f90 index 3af83970e7927..5a4249794f7b5 100644 --- a/flang/test/Semantics/OpenMP/requires-atomic02.f90 +++ b/flang/test/Semantics/OpenMP/requires-atomic02.f90 @@ -88,7 +88,7 @@ program requires ! CHECK: OmpMemoryOrderClause -> OmpClause -> AcqRel !$omp atomic capture i = j - i = j + j = j + 1 !$omp end atomic ! CHECK-LABEL: OpenMPAtomicConstruct -> OmpAtomicCapture @@ -96,7 +96,7 @@ program requires ! CHECK: OmpMemoryOrderClause -> OmpClause -> Relaxed !$omp atomic relaxed capture i = j - i = j + j = j + 1 !$omp end atomic ! CHECK-LABEL: OpenMPAtomicConstruct -> OmpAtomicCapture @@ -104,6 +104,6 @@ program requires ! CHECK: OmpMemoryOrderClause -> OmpClause -> Relaxed !$omp atomic capture relaxed i = j - i = j + j = j + 1 !$omp end atomic end program requires diff --git a/flang/unittests/Runtime/CUDA/CMakeLists.txt b/flang/unittests/Runtime/CUDA/CMakeLists.txt index 30fb8c220233c..a7fe604d687bd 100644 --- a/flang/unittests/Runtime/CUDA/CMakeLists.txt +++ b/flang/unittests/Runtime/CUDA/CMakeLists.txt @@ -3,6 +3,7 @@ if (FLANG_CUF_RUNTIME) add_flang_unittest(FlangCufRuntimeTests Allocatable.cpp AllocatorCUF.cpp + Memory.cpp ) if (BUILD_SHARED_LIBS) diff --git a/flang/unittests/Runtime/CUDA/Memory.cpp b/flang/unittests/Runtime/CUDA/Memory.cpp new file mode 100644 index 0000000000000..157d3cdb531de --- /dev/null +++ b/flang/unittests/Runtime/CUDA/Memory.cpp @@ -0,0 +1,31 @@ +//===-- flang/unittests/Runtime/Memory.cpp -----------------------*- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "flang/Runtime/CUDA/memory.h" +#include "gtest/gtest.h" +#include "../../../runtime/terminator.h" +#include "flang/Common/Fortran.h" +#include "flang/Runtime/CUDA/common.h" + +#include "cuda_runtime.h" + +using namespace Fortran::runtime::cuda; + +TEST(MemoryCUFTest, SimpleAllocTramsferFree) { + int *dev = (int *)RTNAME(CUFMemAlloc)( + sizeof(int), kMemTypeDevice, __FILE__, __LINE__); + EXPECT_TRUE(dev != 0); + int host = 42; + RTNAME(CUFDataTransferPtrPtr) + ((void *)dev, (void *)&host, sizeof(int), kHostToDevice, __FILE__, __LINE__); + host = 0; + RTNAME(CUFDataTransferPtrPtr) + ((void *)&host, (void *)dev, sizeof(int), kDeviceToHost, __FILE__, __LINE__); + EXPECT_EQ(42, host); + RTNAME(CUFMemFree)((void *)dev, kMemTypeDevice, __FILE__, __LINE__); +} diff --git a/libc/config/gpu/entrypoints.txt b/libc/config/gpu/entrypoints.txt index 9fb89e6fd8d28..b4cfe47f4505f 100644 --- a/libc/config/gpu/entrypoints.txt +++ b/libc/config/gpu/entrypoints.txt @@ -240,6 +240,7 @@ set(TARGET_LIBC_ENTRYPOINTS libc.src.stdio.putchar libc.src.stdio.puts libc.src.stdio.remove + libc.src.stdio.rename libc.src.stdio.stderr libc.src.stdio.stdin libc.src.stdio.stdout diff --git a/libc/docs/gpu/support.rst b/libc/docs/gpu/support.rst index 44c21c7b4c1ff..9c151a5fbac1f 100644 --- a/libc/docs/gpu/support.rst +++ b/libc/docs/gpu/support.rst @@ -240,6 +240,7 @@ fputs |check| |check| fputc |check| |check| fwrite |check| |check| remove |check| |check| +rename |check| |check| putc |check| |check| printf |check| |check| vprintf |check| |check| diff --git a/libc/include/llvm-libc-types/rpc_opcodes_t.h b/libc/include/llvm-libc-types/rpc_opcodes_t.h index 3b388de6888c5..1a6c0cd9bc4a1 100644 --- a/libc/include/llvm-libc-types/rpc_opcodes_t.h +++ b/libc/include/llvm-libc-types/rpc_opcodes_t.h @@ -38,6 +38,7 @@ typedef enum { RPC_PRINTF_TO_STDERR_PACKED, RPC_PRINTF_TO_STREAM_PACKED, RPC_REMOVE, + RPC_RENAME, RPC_SYSTEM, RPC_LAST = 0xFFFF, } rpc_opcode_t; diff --git a/libc/src/stdio/gpu/CMakeLists.txt b/libc/src/stdio/gpu/CMakeLists.txt index 86470b8425e95..9cac42ed71fb7 100644 --- a/libc/src/stdio/gpu/CMakeLists.txt +++ b/libc/src/stdio/gpu/CMakeLists.txt @@ -294,6 +294,17 @@ add_entrypoint_object( .vfprintf_utils ) +add_entrypoint_object( + rename + SRCS + rename.cpp + HDRS + ../rename.h + DEPENDS + libc.hdr.types.FILE + .gpu_file +) + add_entrypoint_object( stdin SRCS diff --git a/libc/src/stdio/gpu/rename.cpp b/libc/src/stdio/gpu/rename.cpp new file mode 100644 index 0000000000000..1087228835842 --- /dev/null +++ b/libc/src/stdio/gpu/rename.cpp @@ -0,0 +1,30 @@ +//===-- GPU Implementation of rename --------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/stdio/rename.h" +#include "src/__support/CPP/string_view.h" +#include "src/__support/macros/config.h" +#include "src/stdio/gpu/file.h" + +#include "hdr/types/FILE.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(int, rename, (const char *oldpath, const char *newpath)) { + int ret; + rpc::Client::Port port = rpc::client.open(); + port.send_n(oldpath, internal::string_length(oldpath) + 1); + port.send_n(newpath, internal::string_length(newpath) + 1); + port.recv( + [&](rpc::Buffer *buffer) { ret = static_cast(buffer->data[0]); }); + port.close(); + + return ret; +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/sys/socket/linux/CMakeLists.txt b/libc/src/sys/socket/linux/CMakeLists.txt index f21679b5f8d3c..e1226aaad381f 100644 --- a/libc/src/sys/socket/linux/CMakeLists.txt +++ b/libc/src/sys/socket/linux/CMakeLists.txt @@ -33,6 +33,7 @@ add_entrypoint_object( DEPENDS libc.include.sys_syscall libc.include.sys_socket + libc.src.__support.macros.sanitizer libc.src.__support.OSUtil.osutil libc.src.errno.errno ) @@ -87,6 +88,7 @@ add_entrypoint_object( libc.include.sys_syscall libc.hdr.types.struct_sockaddr libc.hdr.types.socklen_t + libc.src.__support.macros.sanitizer libc.src.__support.OSUtil.osutil libc.src.errno.errno ) @@ -101,6 +103,7 @@ add_entrypoint_object( libc.include.sys_syscall libc.hdr.types.struct_sockaddr libc.hdr.types.socklen_t + libc.src.__support.macros.sanitizer libc.src.__support.OSUtil.osutil libc.src.errno.errno ) @@ -114,6 +117,7 @@ add_entrypoint_object( DEPENDS libc.include.sys_syscall libc.hdr.types.struct_msghdr + libc.src.__support.macros.sanitizer libc.src.__support.OSUtil.osutil libc.src.errno.errno ) diff --git a/libc/src/sys/socket/linux/recv.cpp b/libc/src/sys/socket/linux/recv.cpp index 96acf449dc4bf..55a766aec0e77 100644 --- a/libc/src/sys/socket/linux/recv.cpp +++ b/libc/src/sys/socket/linux/recv.cpp @@ -13,6 +13,7 @@ #include "hdr/types/struct_sockaddr.h" #include "src/__support/OSUtil/syscall.h" // For internal syscall function. #include "src/__support/common.h" +#include "src/__support/macros/sanitizer.h" #include "src/errno/libc_errno.h" #include // For SYS_SOCKET socketcall number. #include // For syscall numbers. @@ -41,6 +42,9 @@ LLVM_LIBC_FUNCTION(ssize_t, recv, libc_errno = static_cast(-ret); return -1; } + + MSAN_UNPOISON(buf, ret); + return ret; } diff --git a/libc/src/sys/socket/linux/recvfrom.cpp b/libc/src/sys/socket/linux/recvfrom.cpp index 17489a99c922d..990e58da3c1b6 100644 --- a/libc/src/sys/socket/linux/recvfrom.cpp +++ b/libc/src/sys/socket/linux/recvfrom.cpp @@ -13,6 +13,7 @@ #include "hdr/types/struct_sockaddr.h" #include "src/__support/OSUtil/syscall.h" // For internal syscall function. #include "src/__support/common.h" +#include "src/__support/macros/sanitizer.h" #include "src/errno/libc_errno.h" #include // For SYS_SOCKET socketcall number. #include // For syscall numbers. @@ -43,6 +44,9 @@ LLVM_LIBC_FUNCTION(ssize_t, recvfrom, libc_errno = static_cast(-ret); return -1; } + + MSAN_UNPOISON(buf, ret); + return ret; } diff --git a/libc/src/sys/socket/linux/recvmsg.cpp b/libc/src/sys/socket/linux/recvmsg.cpp index 60045d6a80f53..f44e5800d817f 100644 --- a/libc/src/sys/socket/linux/recvmsg.cpp +++ b/libc/src/sys/socket/linux/recvmsg.cpp @@ -12,6 +12,7 @@ #include "hdr/types/struct_msghdr.h" #include "src/__support/OSUtil/syscall.h" // For internal syscall function. #include "src/__support/common.h" +#include "src/__support/macros/sanitizer.h" #include "src/errno/libc_errno.h" #include // For SYS_SOCKET socketcall number. #include // For syscall numbers. @@ -36,6 +37,14 @@ LLVM_LIBC_FUNCTION(ssize_t, recvmsg, libc_errno = static_cast(-ret); return -1; } + + // Unpoison the msghdr, as well as all its components. + MSAN_UNPOISON(msg->msg_name, msg->msg_namelen); + for (size_t i = 0; i < msg->msg_iovlen; ++i) { + MSAN_UNPOISON(msg->msg_iov->iov_base, msg->msg_iov->iov_len); + } + MSAN_UNPOISON(msg->msg_control, msg->msg_controllen); + return ret; } diff --git a/libc/src/sys/socket/linux/socketpair.cpp b/libc/src/sys/socket/linux/socketpair.cpp index d459a74433906..60612ac04d613 100644 --- a/libc/src/sys/socket/linux/socketpair.cpp +++ b/libc/src/sys/socket/linux/socketpair.cpp @@ -10,10 +10,9 @@ #include "src/__support/OSUtil/syscall.h" // For internal syscall function. #include "src/__support/common.h" - #include "src/__support/macros/config.h" +#include "src/__support/macros/sanitizer.h" #include "src/errno/libc_errno.h" - #include // For SYS_SOCKET socketcall number. #include // For syscall numbers. @@ -37,6 +36,9 @@ LLVM_LIBC_FUNCTION(int, socketpair, libc_errno = -ret; return -1; } + + MSAN_UNPOISON(sv, sizeof(int) * 2); + return ret; } diff --git a/libc/utils/gpu/server/rpc_server.cpp b/libc/utils/gpu/server/rpc_server.cpp index 8708f946b310e..aa65dfe69c385 100644 --- a/libc/utils/gpu/server/rpc_server.cpp +++ b/libc/utils/gpu/server/rpc_server.cpp @@ -392,6 +392,24 @@ rpc_status_t handle_server_impl( }); break; } + case RPC_RENAME: { + uint64_t oldsizes[lane_size] = {0}; + uint64_t newsizes[lane_size] = {0}; + void *oldpath[lane_size] = {nullptr}; + void *newpath[lane_size] = {nullptr}; + port->recv_n(oldpath, oldsizes, + [&](uint64_t size) { return new char[size]; }); + port->recv_n(newpath, newsizes, + [&](uint64_t size) { return new char[size]; }); + port->send([&](rpc::Buffer *buffer, uint32_t id) { + buffer->data[0] = static_cast( + rename(reinterpret_cast(oldpath[id]), + reinterpret_cast(newpath[id]))); + delete[] reinterpret_cast(oldpath[id]); + delete[] reinterpret_cast(newpath[id]); + }); + break; + } case RPC_SYSTEM: { uint64_t sizes[lane_size] = {0}; void *args[lane_size] = {nullptr}; diff --git a/libcxx/test/tools/clang_tidy_checks/CMakeLists.txt b/libcxx/test/tools/clang_tidy_checks/CMakeLists.txt index 5de2d44994ad0..0e1d3506a9973 100644 --- a/libcxx/test/tools/clang_tidy_checks/CMakeLists.txt +++ b/libcxx/test/tools/clang_tidy_checks/CMakeLists.txt @@ -1,3 +1,5 @@ +# TODO: Re-enable the tests once the CI is back under control +return() # The find_package changes these variables. This leaves the build in an odd # state. Calling cmake a second time tries to write site config information in diff --git a/libcxx/utils/ci/docker-compose.yml b/libcxx/utils/ci/docker-compose.yml index 795e0dc98610d..c32e016edeb15 100644 --- a/libcxx/utils/ci/docker-compose.yml +++ b/libcxx/utils/ci/docker-compose.yml @@ -21,7 +21,7 @@ services: dockerfile: Dockerfile target: actions-builder args: - BASE_IMAGE: ghcr.io/actions/actions-runner:2.317.0 + BASE_IMAGE: ghcr.io/actions/actions-runner:2.319.1 <<: *compiler_versions android-buildkite-builder: image: ghcr.io/libcxx/android-buildkite-builder:${TAG:-latest} diff --git a/libcxxabi/src/demangle/ItaniumDemangle.h b/libcxxabi/src/demangle/ItaniumDemangle.h index 723bdfe324b14..501d0b6fdfcd1 100644 --- a/libcxxabi/src/demangle/ItaniumDemangle.h +++ b/libcxxabi/src/demangle/ItaniumDemangle.h @@ -2632,7 +2632,8 @@ template struct NodeKind; #include "ItaniumNodes.def" inline bool NodeArray::printAsString(OutputBuffer &OB) const { - auto Fail = [&OB, StartPos = OB.getCurrentPosition()] { + auto StartPos = OB.getCurrentPosition(); + auto Fail = [&OB, StartPos] { OB.setCurrentPosition(StartPos); return false; }; diff --git a/lld/COFF/Driver.cpp b/lld/COFF/Driver.cpp index f66fe3cab5a2f..5a6a4a61030e6 100644 --- a/lld/COFF/Driver.cpp +++ b/lld/COFF/Driver.cpp @@ -37,6 +37,7 @@ #include "llvm/Support/BinaryStreamReader.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/GlobPattern.h" #include "llvm/Support/LEB128.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/Parallel.h" @@ -704,6 +705,24 @@ Symbol *LinkerDriver::addUndefined(StringRef name) { return b; } +void LinkerDriver::addUndefinedGlob(StringRef arg) { + Expected pat = GlobPattern::create(arg); + if (!pat) { + error("/includeglob: " + toString(pat.takeError())); + return; + } + + SmallVector syms; + ctx.symtab.forEachSymbol([&syms, &pat](Symbol *sym) { + if (pat->match(sym->getName())) { + syms.push_back(sym); + } + }); + + for (Symbol *sym : syms) + addUndefined(sym->getName()); +} + StringRef LinkerDriver::mangleMaybe(Symbol *s) { // If the plain symbol name has already been resolved, do nothing. Undefined *unmangled = dyn_cast(s); @@ -2524,6 +2543,10 @@ void LinkerDriver::linkerMain(ArrayRef argsArr) { } while (run()); } + // Handle /includeglob + for (StringRef pat : args::getStrings(args, OPT_incl_glob)) + addUndefinedGlob(pat); + // Create wrapped symbols for -wrap option. std::vector wrapped = addWrappedSymbols(ctx, args); // Load more object files that might be needed for wrapped symbols. diff --git a/lld/COFF/Driver.h b/lld/COFF/Driver.h index 0c195a7cc3148..58a2ed2310624 100644 --- a/lld/COFF/Driver.h +++ b/lld/COFF/Driver.h @@ -172,6 +172,8 @@ class LinkerDriver { Symbol *addUndefined(StringRef sym); + void addUndefinedGlob(StringRef arg); + StringRef mangleMaybe(Symbol *s); // Windows specific -- "main" is not the only main function in Windows. diff --git a/lld/COFF/Options.td b/lld/COFF/Options.td index 4bc4d7c4b5a47..7ceb824c068de 100644 --- a/lld/COFF/Options.td +++ b/lld/COFF/Options.td @@ -310,6 +310,9 @@ defm build_id: B< "Generate build ID (always on when generating PDB)", "Do not Generate build ID">; +def incl_glob : Joined<["/", "-", "/?", "-?"], "includeglob:">, + HelpText<"Force symbol to be added to symbol table as undefined one using a glob pattern">; + // Flags for debugging def lldmap : F<"lldmap">; def lldmap_file : P_priv<"lldmap">; diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp index 343fc4989fa4c..dcdd74ac74f5f 100644 --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -850,7 +850,7 @@ static ICFLevel getICF(opt::InputArgList &args) { return ICFLevel::All; } -static StripPolicy getStrip(opt::InputArgList &args) { +static StripPolicy getStrip(Ctx &ctx, opt::InputArgList &args) { if (args.hasArg(OPT_relocatable)) return StripPolicy::None; if (!ctx.arg.zSectionHeader) @@ -953,7 +953,7 @@ static std::pair getPackDynRelocs(opt::InputArgList &args) { return {false, false}; } -static void readCallGraph(MemoryBufferRef mb) { +static void readCallGraph(Ctx &ctx, MemoryBufferRef mb) { // Build a map from symbol name to section DenseMap map; for (ELFFileBase *file : ctx.objectFiles) @@ -1041,7 +1041,7 @@ processCallGraphRelocations(SmallVector &symbolIndices, return !symbolIndices.empty(); } -template static void readCallGraphsFromObjectFiles() { +template static void readCallGraphsFromObjectFiles(Ctx &ctx) { SmallVector symbolIndices; ArrayRef cgProfile; for (auto file : ctx.objectFiles) { @@ -1070,7 +1070,8 @@ template static void readCallGraphsFromObjectFiles() { } template -static void ltoValidateAllVtablesHaveTypeInfos(opt::InputArgList &args) { +static void ltoValidateAllVtablesHaveTypeInfos(Ctx &ctx, + opt::InputArgList &args) { DenseSet typeInfoSymbols; SmallSetVector vtableSymbols; auto processVtableAndTypeInfoSymbols = [&](StringRef name) { @@ -1184,7 +1185,8 @@ getOldNewOptionsExtra(opt::InputArgList &args, unsigned id) { } // Parse the symbol ordering file and warn for any duplicate entries. -static SmallVector getSymbolOrderingFile(MemoryBufferRef mb) { +static SmallVector getSymbolOrderingFile(Ctx &ctx, + MemoryBufferRef mb) { SetVector> names; for (StringRef s : args::getLines(mb)) if (!names.insert(s) && ctx.arg.warnSymbolOrdering) @@ -1193,7 +1195,7 @@ static SmallVector getSymbolOrderingFile(MemoryBufferRef mb) { return names.takeVector(); } -static bool getIsRela(opt::InputArgList &args) { +static bool getIsRela(Ctx &ctx, opt::InputArgList &args) { // The psABI specifies the default relocation entry format. bool rela = is_contained({EM_AARCH64, EM_AMDGPU, EM_HEXAGON, EM_LOONGARCH, EM_PPC, EM_PPC64, EM_RISCV, EM_S390, EM_X86_64}, @@ -1212,7 +1214,7 @@ static bool getIsRela(opt::InputArgList &args) { return rela; } -static void parseClangOption(StringRef opt, const Twine &msg) { +static void parseClangOption(Ctx &ctx, StringRef opt, const Twine &msg) { std::string err; raw_string_ostream os(err); @@ -1228,7 +1230,7 @@ static bool isValidReportString(StringRef arg) { } // Process a remap pattern 'from-glob=to-file'. -static bool remapInputs(StringRef line, const Twine &location) { +static bool remapInputs(Ctx &ctx, StringRef line, const Twine &location) { SmallVector fields; line.split(fields, '='); if (fields.size() != 2 || fields[1].empty()) { @@ -1440,7 +1442,7 @@ static void readConfigs(Ctx &ctx, opt::InputArgList &args) { args::getInteger(args, OPT_split_stack_adjust_size, 16384); ctx.arg.zSectionHeader = getZFlag(args, "sectionheader", "nosectionheader", true); - ctx.arg.strip = getStrip(args); // needs zSectionHeader + ctx.arg.strip = getStrip(ctx, args); // needs zSectionHeader ctx.arg.sysroot = args.getLastArgValue(OPT_sysroot); ctx.arg.target1Rel = args.hasFlag(OPT_target1_rel, OPT_target1_abs, false); ctx.arg.target2 = getTarget2(args); @@ -1535,7 +1537,7 @@ static void readConfigs(Ctx &ctx, opt::InputArgList &args) { for (opt::Arg *arg : args.filtered(OPT_remap_inputs)) { StringRef value(arg->getValue()); - remapInputs(value, arg->getSpelling()); + remapInputs(ctx, value, arg->getSpelling()); } for (opt::Arg *arg : args.filtered(OPT_remap_inputs_file)) { StringRef filename(arg->getValue()); @@ -1544,7 +1546,7 @@ static void readConfigs(Ctx &ctx, opt::InputArgList &args) { continue; // Parse 'from-glob=to-file' lines, ignoring #-led comments. for (auto [lineno, line] : llvm::enumerate(args::getLines(*buffer))) - if (remapInputs(line, filename + ":" + Twine(lineno + 1))) + if (remapInputs(ctx, line, filename + ":" + Twine(lineno + 1))) break; } @@ -1637,11 +1639,12 @@ static void readConfigs(Ctx &ctx, opt::InputArgList &args) { // Parse LTO options. if (auto *arg = args.getLastArg(OPT_plugin_opt_mcpu_eq)) - parseClangOption(saver().save("-mcpu=" + StringRef(arg->getValue())), + parseClangOption(ctx, saver().save("-mcpu=" + StringRef(arg->getValue())), arg->getSpelling()); for (opt::Arg *arg : args.filtered(OPT_plugin_opt_eq_minus)) - parseClangOption(std::string("-") + arg->getValue(), arg->getSpelling()); + parseClangOption(ctx, std::string("-") + arg->getValue(), + arg->getSpelling()); // GCC collect2 passes -plugin-opt=path/to/lto-wrapper with an absolute or // relative path. Just ignore. If not ended with "lto-wrapper" (or @@ -1658,7 +1661,7 @@ static void readConfigs(Ctx &ctx, opt::InputArgList &args) { // Parse -mllvm options. for (const auto *arg : args.filtered(OPT_mllvm)) { - parseClangOption(arg->getValue(), arg->getSpelling()); + parseClangOption(ctx, arg->getValue(), arg->getSpelling()); ctx.arg.mllvmOpts.emplace_back(arg->getValue()); } @@ -1758,7 +1761,7 @@ static void readConfigs(Ctx &ctx, opt::InputArgList &args) { error("--symbol-ordering-file and --call-graph-order-file " "may not be used together"); if (std::optional buffer = readFile(arg->getValue())) { - ctx.arg.symbolOrderingFile = getSymbolOrderingFile(*buffer); + ctx.arg.symbolOrderingFile = getSymbolOrderingFile(ctx, *buffer); // Also need to disable CallGraphProfileSort to prevent // LLD order symbols with CGProfile ctx.arg.callGraphProfileSort = CGProfileSortKind::None; @@ -1851,7 +1854,7 @@ static void setConfigs(Ctx &ctx, opt::InputArgList &args) { // We pick the format for dynamic relocations according to the psABI for each // processor, but a contrary choice can be made if the dynamic loader // supports. - ctx.arg.isRela = getIsRela(args); + ctx.arg.isRela = getIsRela(ctx, args); // If the output uses REL relocations we must store the dynamic relocation // addends to the output sections. We also store addends for RELA relocations @@ -2146,7 +2149,7 @@ static DenseSet getExcludeLibs(opt::InputArgList &args) { // A special library name "ALL" means all archive files. // // This is not a popular option, but some programs such as bionic libc use it. -static void excludeLibs(opt::InputArgList &args) { +static void excludeLibs(Ctx &ctx, opt::InputArgList &args) { DenseSet libs = getExcludeLibs(args); bool all = libs.count("ALL"); @@ -2441,7 +2444,7 @@ static void findKeepUniqueSections(Ctx &ctx, opt::InputArgList &args) { // are used to control which partition a symbol is allocated to. See // https://lld.llvm.org/Partitions.html for more details on partitions. template -static void readSymbolPartitionSection(InputSectionBase *s) { +static void readSymbolPartitionSection(Ctx &ctx, InputSectionBase *s) { // Read the relocation that refers to the partition's entry point symbol. Symbol *sym; const RelsOrRelas rels = s->template relsOrRelas(); @@ -2961,7 +2964,7 @@ template void LinkerDriver::link(opt::InputArgList &args) { // 'has undefined version' error in -shared --exclude-libs=ALL mode (PR36295). // GNU ld errors in this case. if (args.hasArg(OPT_exclude_libs)) - excludeLibs(args); + excludeLibs(ctx, args); // Create elfHeader early. We need a dummy section in // addReservedSymbols to mark the created symbols as not absolute. @@ -2994,7 +2997,7 @@ template void LinkerDriver::link(opt::InputArgList &args) { // Handle --lto-validate-all-vtables-have-type-infos. if (ctx.arg.ltoValidateAllVtablesHaveTypeInfos) - ltoValidateAllVtablesHaveTypeInfos(args); + ltoValidateAllVtablesHaveTypeInfos(ctx, args); // Do link-time optimization if given files are LLVM bitcode files. // This compiles bitcode files into real object files. @@ -3045,7 +3048,7 @@ template void LinkerDriver::link(opt::InputArgList &args) { // libcalls symbols defined in an excluded archive. This may override // versionId set by scanVersionScript(). if (args.hasArg(OPT_exclude_libs)) - excludeLibs(args); + excludeLibs(ctx, args); // Record [__acle_se_, ] pairs for later processing. processArmCmseSymbols(); @@ -3079,10 +3082,10 @@ template void LinkerDriver::link(opt::InputArgList &args) { { llvm::TimeTraceScope timeScope("Strip sections"); if (ctx.hasSympart.load(std::memory_order_relaxed)) { - llvm::erase_if(ctx.inputSections, [](InputSectionBase *s) { + llvm::erase_if(ctx.inputSections, [&ctx = ctx](InputSectionBase *s) { if (s->type != SHT_LLVM_SYMPART) return false; - readSymbolPartitionSection(s); + readSymbolPartitionSection(ctx, s); return true; }); } @@ -3204,8 +3207,8 @@ template void LinkerDriver::link(opt::InputArgList &args) { if (ctx.arg.callGraphProfileSort != CGProfileSortKind::None) { if (auto *arg = args.getLastArg(OPT_call_graph_ordering_file)) if (std::optional buffer = readFile(arg->getValue())) - readCallGraph(*buffer); - readCallGraphsFromObjectFiles(); + readCallGraph(ctx, *buffer); + readCallGraphsFromObjectFiles(ctx); } // Write the result to the file. diff --git a/lld/MinGW/Driver.cpp b/lld/MinGW/Driver.cpp index c7d7b9cfca386..553698d4f537f 100644 --- a/lld/MinGW/Driver.cpp +++ b/lld/MinGW/Driver.cpp @@ -514,6 +514,8 @@ bool link(ArrayRef argsArr, llvm::raw_ostream &stdoutOS, for (auto *a : args.filtered(OPT_require_defined)) add("-include:" + StringRef(a->getValue())); + for (auto *a : args.filtered(OPT_undefined_glob)) + add("-includeglob:" + StringRef(a->getValue())); for (auto *a : args.filtered(OPT_undefined)) add("-includeoptional:" + StringRef(a->getValue())); for (auto *a : args.filtered(OPT_delayload)) diff --git a/lld/MinGW/Options.td b/lld/MinGW/Options.td index 7bd5fb80749da..ff7e21fa808f3 100644 --- a/lld/MinGW/Options.td +++ b/lld/MinGW/Options.td @@ -139,6 +139,8 @@ defm threads defm tsaware: B_disable<"tsaware", "Set the 'Terminal Server aware' flag", "Don't set the 'Terminal Server aware' flag">; defm undefined: Eq<"undefined", "Include symbol in the link, if available">; +defm undefined_glob: EEq<"undefined-glob", "Force undefined symbol during linking">, + MetaVarName<"">; defm whole_archive: B<"whole-archive", "Include all object files for following archives", "No longer include all object files for following archives">; diff --git a/lld/docs/ReleaseNotes.rst b/lld/docs/ReleaseNotes.rst index 6d09de10e7195..da93da9196af7 100644 --- a/lld/docs/ReleaseNotes.rst +++ b/lld/docs/ReleaseNotes.rst @@ -41,9 +41,11 @@ Breaking changes COFF Improvements ----------------- +* ``/includeglob`` has been implemented to match the behavior of ``--undefined-glob`` available for ELF. MinGW Improvements ------------------ +* ``--undefined-glob`` is now supported by translating into the ``/includeglob`` flag. MachO Improvements ------------------ diff --git a/lld/test/COFF/Inputs/include1d.yaml b/lld/test/COFF/Inputs/include1d.yaml new file mode 100644 index 0000000000000..d315cc885dd7c --- /dev/null +++ b/lld/test/COFF/Inputs/include1d.yaml @@ -0,0 +1,29 @@ +--- !COFF +header: + Machine: IMAGE_FILE_MACHINE_AMD64 + Characteristics: [] +sections: + - Name: .text + Characteristics: [ IMAGE_SCN_CNT_CODE, IMAGE_SCN_MEM_EXECUTE, IMAGE_SCN_MEM_READ ] + Alignment: 4 + SectionData: B800000000506800000000680000000050E80000000050E800000000 +symbols: + - Name: .text + Value: 0 + SectionNumber: 1 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_STATIC + SectionDefinition: + Length: 28 + NumberOfRelocations: 4 + NumberOfLinenumbers: 0 + CheckSum: 0 + Number: 0 + - Name: baz + Value: 0 + SectionNumber: 1 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_EXTERNAL +... diff --git a/lld/test/COFF/Inputs/loadconfig-arm64ec.s b/lld/test/COFF/Inputs/loadconfig-arm64ec.s index cb79b5c257e6e..80ec893869e6f 100644 --- a/lld/test/COFF/Inputs/loadconfig-arm64ec.s +++ b/lld/test/COFF/Inputs/loadconfig-arm64ec.s @@ -42,12 +42,6 @@ __os_arm64x_check_icall_cfg: .xword 0 __os_arm64x_dispatch_fptr: .xword 0 -__os_arm64x_helper0: - .xword 0 -__os_arm64x_helper1: - .xword 0 -__os_arm64x_helper2: - .xword 0 __os_arm64x_helper3: .xword 0 __os_arm64x_helper4: @@ -65,7 +59,7 @@ __os_arm64x_helper8: .globl __chpe_metadata .p2align 3, 0 __chpe_metadata: - .word 1 + .word 2 .rva __hybrid_code_map .word __hybrid_code_map_count .rva __x64_code_ranges_to_entry_points @@ -85,9 +79,9 @@ __chpe_metadata: .word __arm64x_extra_rfe_table_size .rva __os_arm64x_dispatch_fptr .rva __hybrid_auxiliary_iat_copy - .rva __os_arm64x_helper0 - .rva __os_arm64x_helper1 - .rva __os_arm64x_helper2 + .word 0 // __hybrid_auxiliary_delayload_iat + .word 0 // __hybrid_auxiliary_delayload_iat_copy + .word 0 // __hybrid_image_info_bitfield .rva __os_arm64x_helper3 .rva __os_arm64x_helper4 .rva __os_arm64x_helper5 diff --git a/lld/test/COFF/arm64ec-import.test b/lld/test/COFF/arm64ec-import.test index 08ff31ce1a8f3..9cf0914322941 100644 --- a/lld/test/COFF/arm64ec-import.test +++ b/lld/test/COFF/arm64ec-import.test @@ -27,7 +27,7 @@ RUN: llvm-readobj --coff-imports out2.dll | FileCheck --check-prefix=IMPORTS %s RUN: llvm-readobj --coff-imports out3.dll | FileCheck -check-prefix=IMPORTS %s IMPORTS: Import { IMPORTS-NEXT: Name: test.dll -IMPORTS-NEXT: ImportLookupTableRVA: 0x4230 +IMPORTS-NEXT: ImportLookupTableRVA: 0x4218 IMPORTS-NEXT: ImportAddressTableRVA: 0x3000 IMPORTS-NEXT: Symbol: data (0) IMPORTS-NEXT: Symbol: func (0) @@ -79,13 +79,13 @@ RUN: llvm-readobj --hex-dump=.test out2.dll | FileCheck --check-prefix=TESTSEC % TESTSEC: 0x180007000 08500000 00300000 10500000 20500000 TESTSEC-NEXT: 0x180007010 08300000 00500000 10300000 20300000 TESTSEC-NEXT: 0x180007020 14100000 28100000 00200000 08100000 -TESTSEC-NEXT: 0x180007030 3c100000 a0420000 +TESTSEC-NEXT: 0x180007030 3c100000 88420000 RUN: llvm-readobj --hex-dump=.test out3.dll | FileCheck -check-prefix=TESTSEC-X64 %s TESTSEC-X64: 0x180007000 08300000 00300000 10300000 20300000 TESTSEC-X64-NEXT: 0x180007010 08300000 00500000 10300000 20300000 TESTSEC-X64-NEXT: 0x180007020 14100000 28100000 00200000 08100000 -TESTSEC-X64-NEXT: 0x180007030 3c100000 a0420000 +TESTSEC-X64-NEXT: 0x180007030 3c100000 88420000 RUN: FileCheck --check-prefix=MAP %s < out.map RUN: FileCheck --check-prefix=MAP %s < out2.map @@ -100,10 +100,10 @@ MAP-NEXT: 0002:00000000 __imp_data 0000000180003000 te MAP-NEXT: 0002:00000008 __imp_aux_func 0000000180003008 test{{.*}}:test.dll MAP-NEXT: 0002:00000010 __imp_aux_func2 0000000180003010 test{{.*}}:test.dll MAP-NEXT: 0002:00000020 __imp_aux_t2func 0000000180003020 test2{{.*}}:test2.dll -MAP: 0002:00001298 __auximpcopy_data 0000000180004298 test{{.*}}:test.dll -MAP-NEXT: 0002:000012a0 __auximpcopy_func 00000001800042a0 test{{.*}}:test.dll -MAP-NEXT: 0002:000012a8 __auximpcopy_func2 00000001800042a8 test{{.*}}:test.dll -MAP-NEXT: 0002:000012b8 __auximpcopy_t2func 00000001800042b8 test2{{.*}}:test2.dll +MAP: 0002:00001280 __auximpcopy_data 0000000180004280 test{{.*}}:test.dll +MAP-NEXT: 0002:00001288 __auximpcopy_func 0000000180004288 test{{.*}}:test.dll +MAP-NEXT: 0002:00001290 __auximpcopy_func2 0000000180004290 test{{.*}}:test.dll +MAP-NEXT: 0002:000012a0 __auximpcopy_t2func 00000001800042a0 test2{{.*}}:test2.dll MAP: 0002:00002000 __imp_aux_data 0000000180005000 test{{.*}}:test.dll MAP-NEXT: 0002:00002008 __imp_func 0000000180005008 test{{.*}}:test.dll MAP-NEXT: 0002:00002010 __imp_func2 0000000180005010 test{{.*}}:test.dll @@ -120,15 +120,14 @@ RUN: llvm-readobj --coff-load-config out.dll | FileCheck -check-prefix=LOADCONFI RUN: llvm-readobj --coff-load-config out2.dll | FileCheck -check-prefix=LOADCONFIG %s RUN: llvm-readobj --coff-load-config out3.dll | FileCheck -check-prefix=LOADCONFIG %s LOADCONFIG: AuxiliaryIAT: 0x5000 -LOADCONFIG: AuxiliaryIATCopy: 0x4298 +LOADCONFIG: AuxiliaryIATCopy: 0x4280 RUN: llvm-readobj --hex-dump=.rdata out.dll | FileCheck -check-prefix=RDATA %s RUN: llvm-readobj --hex-dump=.rdata out2.dll | FileCheck -check-prefix=RDATA %s RUN: llvm-readobj --hex-dump=.rdata out3.dll | FileCheck -check-prefix=RDATA %s -RDATA: 0x180004290 2e646c6c 00000000 00000000 00000000 -RDATA-NEXT: 0x1800042a0 14100080 01000000 28100080 01000000 -RDATA-NEXT: 0x1800042b0 00000000 00000000 48100080 01000000 -RDATA-NEXT: 0x1800042c0 00000000 00000000 00000000 00000000 +RDATA: 0x180004280 00000000 00000000 14100080 01000000 +RDATA-NEXT: 0x180004290 28100080 01000000 00000000 00000000 +RDATA-NEXT: 0x1800042a0 48100080 01000000 00000000 00000000 RDATA: 0x180005000 00000000 00000000 14100080 01000000 RDATA-NEXT: 0x180005010 28100080 01000000 00000000 00000000 RDATA-NEXT: 0x180005020 48100080 01000000 00000000 00000000 @@ -138,15 +137,15 @@ RUN: llvm-readobj --coff-basereloc out2.dll | FileCheck -check-prefix=BASERELOC RUN: llvm-readobj --coff-basereloc out3.dll | FileCheck -check-prefix=BASERELOC %s BASERELOC: BaseReloc [ Aux IAT copy: -BASERELOC: Address: 0x42A0 +BASERELOC: Address: 0x4288 BASERELOC-NEXT: } BASERELOC-NEXT: Entry { BASERELOC-NEXT: Type: DIR64 -BASERELOC-NEXT: Address: 0x42A8 +BASERELOC-NEXT: Address: 0x4290 BASERELOC-NEXT: } BASERELOC-NEXT: Entry { BASERELOC-NEXT: Type: DIR64 -BASERELOC-NEXT: Address: 0x42B8 +BASERELOC-NEXT: Address: 0x42A0 BASERELOC-NEXT: } Aux IAT: BASERELOC-NOT: Address: 0x5000 diff --git a/lld/test/COFF/include.test b/lld/test/COFF/include.test index 8879ee5bd7a61..2a8a8fe4034c4 100644 --- a/lld/test/COFF/include.test +++ b/lld/test/COFF/include.test @@ -9,10 +9,18 @@ # RUN: echo dummy >> %t.log # RUN: FileCheck -check-prefix=CHECK2 %s < %t.log +# RUN: lld-link /out:%t.exe /entry:main %t.obj /verbose /includeglob:"glob_*" >& %t.log +# RUN: echo dummy >> %t.log +# RUN: FileCheck -check-prefix=CHECK3 %s < %t.log + # CHECK1: Discarded unused +# CHECK1: Discarded glob_match1 +# CHECK1: Discarded glob_match2 # CHECK1-NOT: Discarded used # CHECK2-NOT: Discarded unused # CHECK2-NOT: Discarded used +# CHECK3-NOT: Discarded glob_match1 +# CHECK3-NOT: Discarded glob_match2 --- !COFF header: @@ -31,6 +39,14 @@ sections: Characteristics: [ IMAGE_SCN_CNT_CODE, IMAGE_SCN_LNK_COMDAT, IMAGE_SCN_MEM_EXECUTE, IMAGE_SCN_MEM_READ ] Alignment: 4 SectionData: B82A000000C3 + - Name: '.text$mn' + Characteristics: [ IMAGE_SCN_CNT_CODE, IMAGE_SCN_LNK_COMDAT, IMAGE_SCN_MEM_EXECUTE, IMAGE_SCN_MEM_READ ] + Alignment: 4 + SectionData: B82A000000C3 + - Name: '.text$mn' + Characteristics: [ IMAGE_SCN_CNT_CODE, IMAGE_SCN_LNK_COMDAT, IMAGE_SCN_MEM_EXECUTE, IMAGE_SCN_MEM_READ ] + Alignment: 4 + SectionData: B82A000000C3 - Name: .drectve Characteristics: [ IMAGE_SCN_LNK_INFO, IMAGE_SCN_LNK_REMOVE ] Alignment: 1 @@ -75,6 +91,32 @@ symbols: CheckSum: 0 Number: 0 Selection: IMAGE_COMDAT_SELECT_ANY + - Name: '.text$mn' + Value: 0 + SectionNumber: 4 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_STATIC + SectionDefinition: + Length: 6 + NumberOfRelocations: 0 + NumberOfLinenumbers: 0 + CheckSum: 0 + Number: 0 + Selection: IMAGE_COMDAT_SELECT_ANY + - Name: '.text$mn' + Value: 0 + SectionNumber: 5 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_STATIC + SectionDefinition: + Length: 6 + NumberOfRelocations: 0 + NumberOfLinenumbers: 0 + CheckSum: 0 + Number: 0 + Selection: IMAGE_COMDAT_SELECT_ANY - Name: main Value: 0 SectionNumber: 1 @@ -93,4 +135,16 @@ symbols: SimpleType: IMAGE_SYM_TYPE_NULL ComplexType: IMAGE_SYM_DTYPE_FUNCTION StorageClass: IMAGE_SYM_CLASS_EXTERNAL + - Name: glob_match1 + Value: 0 + SectionNumber: 4 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_FUNCTION + StorageClass: IMAGE_SYM_CLASS_EXTERNAL + - Name: glob_match2 + Value: 0 + SectionNumber: 5 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_FUNCTION + StorageClass: IMAGE_SYM_CLASS_EXTERNAL ... diff --git a/lld/test/COFF/include2.test b/lld/test/COFF/include2.test index 557de47d9e198..4796512fff93f 100644 --- a/lld/test/COFF/include2.test +++ b/lld/test/COFF/include2.test @@ -1,14 +1,20 @@ # RUN: yaml2obj %p/Inputs/include1a.yaml -o %t1.obj # RUN: yaml2obj %p/Inputs/include1b.yaml -o %t2.obj # RUN: yaml2obj %p/Inputs/include1c.yaml -o %t3.obj -# RUN: rm -f %t2.lib %t3.lib +# RUN: yaml2obj %p/Inputs/include1d.yaml -o %t4.obj +# RUN: rm -f %t2.lib %t3.lib %t4.lib # RUN: llvm-ar cru %t2.lib %t2.obj # RUN: llvm-ar cru %t3.lib %t3.obj -# RUN: lld-link /out:%t.exe /entry:main %t1.obj %t2.lib %t3.lib /verbose >& %t.log +# RUN: llvm-ar cru %t4.lib %t4.obj +# RUN: lld-link /out:%t.exe /entry:main %t1.obj %t2.lib %t3.lib %t4.lib /verbose >& %t.log # RUN: FileCheck %s < %t.log +# RUN: lld-link /out:%t.exe /entry:main %t1.obj %t2.lib %t3.lib %t4.lib /includeglob:baz /verbose >& %t.glob.log +# RUN: FileCheck -check-prefix=GLOB %s < %t.glob.log CHECK: include2.test.tmp1.obj CHECK: include2.test.tmp2.lib CHECK: include2.test.tmp2.lib(include2.test.tmp2.obj) for foo CHECK: include2.test.tmp3.lib CHECK: include2.test.tmp3.lib(include2.test.tmp3.obj) for bar +CHECK-NOT: include2.test.tmp4.lib(include2.test.tmp4.obj) for baz +GLOB: include2.test.tmp4.lib(include2.test.tmp4.obj) for baz diff --git a/lld/test/ELF/aarch64-undefined-weak.s b/lld/test/ELF/aarch64-undefined-weak.s index f4628453ec3fe..015f9c9a043e5 100644 --- a/lld/test/ELF/aarch64-undefined-weak.s +++ b/lld/test/ELF/aarch64-undefined-weak.s @@ -1,7 +1,7 @@ // REQUIRES: aarch64 // RUN: llvm-mc -filetype=obj -triple=aarch64-none-linux %s -o %t.o // RUN: ld.lld --image-base=0x10000000 %t.o -o %t -// RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck %s +// RUN: llvm-objdump -d -z --no-show-raw-insn %t | FileCheck %s // Check that the ARM 64-bit ABI rules for undefined weak symbols are applied. // Branch instructions are resolved to the next instruction. Undefined diff --git a/lld/test/MinGW/driver.test b/lld/test/MinGW/driver.test index 0dab66b613c77..2831d155fef12 100644 --- a/lld/test/MinGW/driver.test +++ b/lld/test/MinGW/driver.test @@ -249,6 +249,9 @@ REQUIRE-DEFINED: -include:_foo -include:_bar -include:_baz -include:_foo2 RUN: ld.lld -### foo.o -m i386pe -u _foo --undefined _bar -undefined=_baz --undefined=_foo2 -u_foo3 2>&1 | FileCheck -check-prefix=UNDEFINED %s UNDEFINED: -includeoptional:_foo -includeoptional:_bar -includeoptional:_baz -includeoptional:_foo2 -includeoptional:_foo3 +RUN: ld.lld -### foo.o -m i386pe --undefined-glob="_foo*" 2>&1 | FileCheck -check-prefix=UNDEFINED-GLOB %s +UNDEFINED-GLOB: -includeglob:_foo* + RUN: ld.lld -### -m i386pep foo.o -Llibpath 2>&1 | FileCheck -check-prefix LIBPATH %s LIBPATH: -libpath:libpath diff --git a/lld/test/wasm/unsupported-pic-relocations.s b/lld/test/wasm/unsupported-pic-relocations.s index ea32e8468cdb4..2f85afa02c88b 100644 --- a/lld/test/wasm/unsupported-pic-relocations.s +++ b/lld/test/wasm/unsupported-pic-relocations.s @@ -15,6 +15,10 @@ # RUN: not wasm-ld --experimental-pic -shared %t.o -o /dev/null --unresolved-symbols=import-dynamic 2>&1 | \ # RUN: FileCheck %s +## These errors should not be reported under -r/--relocation (i.e. when +## generating an object file) +# RUN: wasm-ld --experimental-pic -r %t.o -o /dev/null + .functype external_func () -> () use_undefined_function: @@ -23,7 +27,7 @@ use_undefined_function: # CHECK: error: {{.*}}.o: relocation R_WASM_TABLE_INDEX_REL_SLEB is not supported against an undefined symbol `external_func` drop end_function - + use_undefined_data: .functype use_undefined_data () -> () i32.const external_data@MBREL diff --git a/lld/test/wasm/unsupported-pic-relocations64.s b/lld/test/wasm/unsupported-pic-relocations64.s index db9707b7fbac5..df885b8d75fbe 100644 --- a/lld/test/wasm/unsupported-pic-relocations64.s +++ b/lld/test/wasm/unsupported-pic-relocations64.s @@ -15,6 +15,10 @@ # RUN: not wasm-ld -mwasm64 --experimental-pic -shared %t.o -o /dev/null --unresolved-symbols=import-dynamic 2>&1 | \ # RUN: FileCheck %s +## These errors should not be reported under -r/--relocation (i.e. when +## generating an object file) +# RUN: wasm-ld -mwasm64 --experimental-pic -r %t.o -o /dev/null + .functype external_func () -> () use_undefined_function: @@ -23,7 +27,7 @@ use_undefined_function: # CHECK: error: {{.*}}.o: relocation R_WASM_TABLE_INDEX_REL_SLEB64 is not supported against an undefined symbol `external_func` drop end_function - + use_undefined_data: .functype use_undefined_data () -> () i64.const external_data@MBREL diff --git a/lld/wasm/Relocations.cpp b/lld/wasm/Relocations.cpp index 2dbfe33549471..45ad32701616a 100644 --- a/lld/wasm/Relocations.cpp +++ b/lld/wasm/Relocations.cpp @@ -173,7 +173,7 @@ void scanRelocations(InputChunk *chunk) { } } - if (sym->isUndefined()) { + if (!config->relocatable && sym->isUndefined()) { switch (reloc.Type) { case R_WASM_TABLE_INDEX_REL_SLEB: case R_WASM_TABLE_INDEX_REL_SLEB64: @@ -187,11 +187,11 @@ void scanRelocations(InputChunk *chunk) { toString(*sym) + "`"); break; } - } - if (sym->isUndefined() && !config->relocatable && !sym->isWeak()) { - // Report undefined symbols - reportUndefined(file, sym); + if (!sym->isWeak()) { + // Report undefined symbols + reportUndefined(file, sym); + } } } } diff --git a/lldb/bindings/python/python-wrapper.swig b/lldb/bindings/python/python-wrapper.swig index 961fb2d1a7617..b72a462d04643 100644 --- a/lldb/bindings/python/python-wrapper.swig +++ b/lldb/bindings/python/python-wrapper.swig @@ -667,6 +667,79 @@ lldb_private::python::SWIGBridge::LLDBSwigPythonGetRepeatCommandForScriptedComma return result.Str().GetString().str(); } +StructuredData::DictionarySP +lldb_private::python::SWIGBridge::LLDBSwigPythonHandleArgumentCompletionForScriptedCommand(PyObject *implementor, + std::vector &args_vec, size_t args_pos, size_t pos_in_arg) { + + PyErr_Cleaner py_err_cleaner(true); + + PythonObject self(PyRefType::Borrowed, implementor); + auto pfunc = self.ResolveName("handle_argument_completion"); + // If this isn't implemented, return an empty dict to signal falling back to default completion: + if (!pfunc.IsAllocated()) + return {}; + + PythonList args_list(PyInitialValue::Empty); + for (auto elem : args_vec) + args_list.AppendItem(PythonString(elem)); + + PythonObject result = pfunc(args_list, PythonInteger(args_pos), PythonInteger(pos_in_arg)); + // Returning None means do the ordinary completion + if (result.IsNone()) + return {}; + + // Convert the return dictionary to a DictionarySP. + StructuredData::ObjectSP result_obj_sp = result.CreateStructuredObject(); + if (!result_obj_sp) + return {}; + + StructuredData::DictionarySP dict_sp(new StructuredData::Dictionary(result_obj_sp)); + if (dict_sp->GetType() == lldb::eStructuredDataTypeInvalid) + return {}; + return dict_sp; +} + +StructuredData::DictionarySP +lldb_private::python::SWIGBridge::LLDBSwigPythonHandleOptionArgumentCompletionForScriptedCommand(PyObject *implementor, + llvm::StringRef &long_option, size_t pos_in_arg) { + + PyErr_Cleaner py_err_cleaner(true); + + PythonObject self(PyRefType::Borrowed, implementor); + auto pfunc = self.ResolveName("handle_option_argument_completion"); + // If this isn't implemented, return an empty dict to signal falling back to default completion: + if (!pfunc.IsAllocated()) + return {}; + + PythonObject result = pfunc(PythonString(long_option), PythonInteger(pos_in_arg)); + // Returning None means do the ordinary completion + if (result.IsNone()) + return {}; + + // Returning a boolean: + // True means the completion was handled, but there were no completions + // False means that the completion was not handled, again, do the ordinary completion: + if (result.GetObjectType() == PyObjectType::Boolean) { + if (!result.IsTrue()) + return {}; + // Make up a completion dictionary with the right element: + StructuredData::DictionarySP dict_sp(new StructuredData::Dictionary()); + dict_sp->AddBooleanItem("no-completion", true); + return dict_sp; + } + + + // Convert the return dictionary to a DictionarySP. + StructuredData::ObjectSP result_obj_sp = result.CreateStructuredObject(); + if (!result_obj_sp) + return {}; + + StructuredData::DictionarySP dict_sp(new StructuredData::Dictionary(result_obj_sp)); + if (dict_sp->GetType() == lldb::eStructuredDataTypeInvalid) + return {}; + return dict_sp; +} + #include "lldb/Interpreter/CommandReturnObject.h" bool lldb_private::python::SWIGBridge::LLDBSwigPythonCallParsedCommandObject( diff --git a/lldb/docs/use/python-reference.rst b/lldb/docs/use/python-reference.rst index b12048f1af067..95a6020ca3e45 100644 --- a/lldb/docs/use/python-reference.rst +++ b/lldb/docs/use/python-reference.rst @@ -551,7 +551,7 @@ command definition form can't do the right thing. Since lldb 3.7, Python commands can also be implemented by means of a class which should implement the following interface: -:: +.. code-block:: python class CommandObjectType: def __init__(self, debugger, internal_dict): @@ -586,20 +586,193 @@ which should implement the following interface: As a convenience, you can treat the result object as a Python file object, and say -:: +.. code-block:: python print >>result, "my command does lots of cool stuff" SBCommandReturnObject and SBStream both support this file-like behavior by providing write() and flush() calls at the Python layer. +The commands that are added using this class definition are what lldb calls +"raw" commands. The command interpreter doesn't attempt to parse the command, +doesn't handle option values, neither generating help for them, or their +completion. Raw commands are useful when the arguments passed to the command +are unstructured, and having to protect them against lldb command parsing would +be onerous. For instance, "expr" is a raw command. + +You can also add scripted commands that implement the "parsed command", where +the options and their types are specified, as well as the argument and argument +types. These commands look and act like the majority of lldb commands, and you +can also add custom completions for the options and/or the arguments if you have +special needs. + +The easiest way to do this is to derive your new command from the lldb.ParsedCommand +class. That responds in the same way to the help & repeat command interfaces, and +provides some convenience methods, and most importantly an LLDBOptionValueParser, +accessed throught lldb.ParsedCommand.get_parser(). The parser is used to set +your command definitions, and to retrieve option values in the __call__ method. + +To set up the command definition, implement the ParsedCommand abstract method: + +.. code-block:: python + + def setup_command_definition(self): + +This is called when your command is added to lldb. In this method you add the +options and their types, the option help strings, etc. to the command using the API: + +.. code-block:: python + + def add_option(self, short_option, long_option, help, default, + dest = None, required=False, groups = None, + value_type=lldb.eArgTypeNone, completion_type=None, + enum_values=None): + """ + short_option: one character, must be unique, not required + long_option: no spaces, must be unique, required + help: a usage string for this option, will print in the command help + default: the initial value for this option (if it has a value) + dest: the name of the property that gives you access to the value for + this value. Defaults to the long option if not provided. + required: if true, this option must be provided or the command will error out + groups: Which "option groups" does this option belong to. This can either be + a simple list (e.g. [1, 3, 4, 5]) or you can specify ranges by sublists: + so [1, [3,5]] is the same as [1, 3, 4, 5]. + value_type: one of the lldb.eArgType enum values. Some of the common arg + types also have default completers, which will be applied automatically. + completion_type: currently these are values form the lldb.CompletionType enum. If + you need custom completions, implement handle_option_argument_completion. + enum_values: An array of duples: ["element_name", "element_help"]. If provided, + only one of the enum elements is allowed. The value will be the + element_name for the chosen enum element as a string. + """ + +Similarly, you can add argument types to the command: + +.. code-block:: python + + def make_argument_element(self, arg_type, repeat = "optional", groups = None): + """ + arg_type: The argument type, one of the lldb.eArgType enum values. + repeat: Choose from the following options: + "plain" - one value + "optional" - zero or more values + "plus" - one or more values + groups: As with add_option. + """ + +Then implement the body of the command by defining: + +.. code-block:: python + + def __call__(self, debugger, args_array, exe_ctx, result): + """This is the command callback. The option values are + provided by the 'dest' properties on the parser. + + args_array: This is the list of arguments provided. + exe_ctx: Gives the SBExecutionContext on which the + command should operate. + result: Any results of the command should be + written into this SBCommandReturnObject. + """ + +This differs from the "raw" command's __call__ in that the arguments are already +parsed into the args_array, and the option values are set in the parser, and +can be accessed using their property name. The LLDBOptionValueParser class has +a couple of other handy methods: + +.. code-block:: python + def was_set(self, long_option_name): + +returns True if the option was specified on the command line. + +.. code-block:: python + + def dest_for_option(self, long_option_name): + """ + This will return the value of the dest variable you defined for opt_name. + Mostly useful for handle_completion where you get passed the long option. + """ + +lldb will handle completing your option names, and all your enum values +automatically. If your option or argument types have associated built-in completers, +then lldb will also handle that completion for you. But if you have a need for +custom completions, either in your arguments or option values, you can handle +completion by hand as well. To handle completion of option value arguments, +your lldb.ParsedCommand subclass should implement: + +.. code-block:: python + + def handle_option_argument_completion(self, long_option, cursor_pos): + """ + long_option: The long option name of the option whose value you are + asked to complete. + cursor_pos: The cursor position in the value for that option - which + you can get from the option parser. + """ + +And to handle the completion of arguments: + +.. code-block:: python + + def handle_argument_completion(self, args, arg_pos, cursor_pos): + """ + args: A list of the arguments to the command + arg_pos: An index into the args list of the argument with the cursor + cursor_pos: The cursor position in the arg specified by arg_pos + """ + +When either of these API's is called, the command line will have been parsed up to +the word containing the cursor, and any option values set in that part of the command +string are available from the option value parser. That's useful for instance +if you have a --shared-library option that would constrain the completions for, +say, a symbol name option or argument. + +The return value specifies what the completion options are. You have four +choices: + +- `True`: the completion was handled with no completions. + +- `False`: the completion was not handled, forward it to the regular +completion machinery. + +- A dictionary with the key: "completion": there is one candidate, +whose value is the value of the "completion" key. Optionally you can pass a +"mode" key whose value is either "partial" or "complete". Return partial if +the "completion" string is a prefix for all the completed value. + +For instance, if the string you are completing is "Test" and the available completions are: +"Test1", "Test11" and "Test111", you should return the dictionary: + +.. code-block:: python + + return {"completion": "Test1", "mode" : "partial"} + +and then lldb will add the "1" at the curson and advance it after the added string, +waiting for more completions. But if "Test1" is the only completion, return: + +.. code-block:: python + + {"completion": "Test1", "mode": "complete"} + +and lldb will add "1 " at the cursor, indicating the command string is complete. + +The default is "complete", you don't need to specify a "mode" in that case. + +- A dictionary with the key: "values" whose value is a list of candidate completion +strings. The command interpreter will present those strings as the available choices. +You can optionally include a "descriptions" key, whose value is a parallel array +of description strings, and the completion will show the description next to +each completion. + + One other handy convenience when defining lldb command-line commands is the -command command script import which will import a module specified by file +command "command script import" which will import a module specified by file path, so you don't have to change your PYTHONPATH for temporary scripts. It also has another convenience that if your new script module has a function of the form: -:: +.. code-block python def __lldb_init_module(debugger, internal_dict): # Command Initialization code goes here @@ -615,7 +788,7 @@ creating scripts that can be run from the command line. However, for command line scripts, the debugger instance must be created manually. Sample code would look like: -:: +.. code-block:: python if __name__ == '__main__': # Initialize the debugger before making any API calls. @@ -638,7 +811,7 @@ look like: Now we can create a module called ls.py in the file ~/ls.py that will implement a function that can be used by LLDB's python command code: -:: +.. code-block:: python #!/usr/bin/env python diff --git a/lldb/examples/python/cmdtemplate.py b/lldb/examples/python/cmdtemplate.py index b6a21cba7113e..a9fbe0b40e195 100644 --- a/lldb/examples/python/cmdtemplate.py +++ b/lldb/examples/python/cmdtemplate.py @@ -29,8 +29,8 @@ def get_flags(self): return lldb.eCommandRequiresFrame | lldb.eCommandProcessMustBePaused def setup_command_definition(self): - - self.ov_parser.add_option( + ov_parser = self.get_parser() + ov_parser.add_option( "i", "in-scope", help = "in_scope_only = True", @@ -39,7 +39,7 @@ def setup_command_definition(self): default = True, ) - self.ov_parser.add_option( + ov_parser.add_option( "i", "in-scope", help = "in_scope_only = True", @@ -48,7 +48,7 @@ def setup_command_definition(self): default=True, ) - self.ov_parser.add_option( + ov_parser.add_option( "a", "arguments", help = "arguments = True", @@ -57,7 +57,7 @@ def setup_command_definition(self): default = True, ) - self.ov_parser.add_option( + ov_parser.add_option( "l", "locals", help = "locals = True", @@ -66,7 +66,7 @@ def setup_command_definition(self): default = True, ) - self.ov_parser.add_option( + ov_parser.add_option( "s", "statics", help = "statics = True", @@ -103,8 +103,9 @@ def __call__(self, debugger, command, exe_ctx, result): result.SetError("invalid frame") return + ov_parser = self.get_parser() variables_list = frame.GetVariables( - self.ov_parser.arguments, self.ov_parser.locals, self.ov_parser.statics, self.ov_parser.inscope + ov_parser.arguments, ov_parser.locals, ov_parser.statics, ov_parser.inscope ) variables_count = variables_list.GetSize() if variables_count == 0: diff --git a/lldb/examples/python/templates/parsed_cmd.py b/lldb/examples/python/templates/parsed_cmd.py index 06124adf43420..13d6eae405c08 100644 --- a/lldb/examples/python/templates/parsed_cmd.py +++ b/lldb/examples/python/templates/parsed_cmd.py @@ -4,7 +4,8 @@ The way to use it is to make a class for your command that inherits from ParsedCommandBase. That will make an LLDBOptionValueParser which you will use for your option definition, and to fetch option values for the current invocation -of your command. Access to the OV parser is through: +of your command. For concision, I'll call this the `OVParser`. +Access to the `OVParser` is through: ParsedCommandBase.get_parser() @@ -43,7 +44,65 @@ def __call__(self, debugger, args_list, exe_ctx, result): will return True if the user set this option, and False if it was left at its default value. -There are example commands in the lldb testsuite at: +Custom Completions: + +You can also implement custom completers for your custom command, either for the +arguments to your command or to the option values in your command. If you use enum +values or if your option/argument uses is one of the types we have completers for, +you should not need to do this. But if you have your own completeable types, or if +you want completion of one option to be conditioned by other options on the command +line, you can use this interface to take over the completion. + +You can choose to add a completion for the option values defined for your command, +or for the arguments, separately. For the option values, define: + +def handle_option_argument_completion(self, long_option, cursor_pos): + +The line to be completed will be parsed up to the option containint the cursor position, +and the values will be set in the OptionValue parser object. long_option will be +the option name containing the cursor, and cursor_pos will be the position of the cursor +in that option's value. You can call the `OVParser` method: `dest_for_option(long_option)` +to get the value for that option. The other options that came before the cursor in the command +line will also be set in the `OVParser` when the completion handler is called. + +For argument values, define: + +def handle_argument_completion(self, args, arg_pos, cursor_pos): + +Again, the command line will be parsed up to the cursor position, and all the options +before the cursor pose will be set in the `OVParser`. args is a python list of the +arguments, arg_pos is the index of the argument with the cursor, and cursor_pos is +the position of the cursor in the argument. + +In both cases, the return value determines the completion. + +Return False to mean "Not Handled" - in which case lldb will fall back on the +standard completion machinery. + +Return True to mean "Handled with no completions". + +If there is a single unique completion, return a Python dictionary with two elements: + +return {"completion" : "completed_value", "mode" : <"partial", "complete">} + +If the mode is "partial", then the completion is to a common base, if it is "complete" +then the argument is considered done - mostly meaning lldb will put a space after the +completion string. "complete" is the default if no "mode" is specified. + +If there are multiple completion options, then return: + +return {"values" : ["option1", "option2"]} + +Optionally, you can return a parallel array of "descriptions" which the completer will +print alongside the options: + +return {"values" : ["option1", "option2"], "descriptions" : ["the first option", "the second option"]} + +The cmdtemplate example currently uses the parsed command infrastructure: + +llvm-project/lldb/examples/python/cmdtemplate.py + +There are also a few example commands in the lldb testsuite at: llvm-project/lldb/test/API/commands/command/script/add/test_commands.py """ @@ -226,10 +285,14 @@ def set_option_value(self, exe_ctx, opt_name, opt_value): return True def was_set(self, opt_name): - """ Call this in the __call__ method of your command to determine - whether this option was set on the command line. It is sometimes - useful to know whether an option has the default value because the - user set it explicitly (was_set -> True) or not. """ + """Call this in the __call__ method of your command to determine + whether this option was set on the command line. It is sometimes + useful to know whether an option has the default value because the + user set it explicitly (was_set -> True) or not. + You can also call this in a handle_completion method, but it will + currently only report true values for the options mentioned + BEFORE the cursor point in the command line. + """ elem = self.get_option_element(opt_name) if not elem: @@ -239,6 +302,16 @@ def was_set(self, opt_name): except AttributeError: return False + def dest_for_option(self, opt_name): + """This will return the value of the dest variable you defined for opt_name. + Mostly useful for handle_completion where you get passed the long option. + """ + elem = self.get_option_element(opt_name) + if not elem: + return None + value = self.__dict__[elem["dest"]] + return value + def add_option(self, short_option, long_option, help, default, dest = None, required=False, groups = None, value_type=lldb.eArgTypeNone, completion_type=None, @@ -251,14 +324,16 @@ def add_option(self, short_option, long_option, help, default, dest: the name of the property that gives you access to the value for this value. Defaults to the long option if not provided. required: if true, this option must be provided or the command will error out - groups: Which "option groups" does this option belong to + groups: Which "option groups" does this option belong to. This can either be + a simple list (e.g. [1, 3, 4, 5]) or you can specify ranges by sublists: + so [1, [3,5]] is the same as [1, 3, 4, 5]. value_type: one of the lldb.eArgType enum values. Some of the common arg types also have default completers, which will be applied automatically. - completion_type: currently these are values form the lldb.CompletionType enum, I - haven't done custom completions yet. + completion_type: currently these are values form the lldb.CompletionType enum. If + you need custom completions, implement handle_option_argument_completion. enum_values: An array of duples: ["element_name", "element_help"]. If provided, - only one of the enum elements is allowed. The value will be the - element_name for the chosen enum element as a string. + only one of the enum elements is allowed. The value will be the + element_name for the chosen enum element as a string. """ if not dest: dest = long_option diff --git a/lldb/include/lldb/Interpreter/ScriptInterpreter.h b/lldb/include/lldb/Interpreter/ScriptInterpreter.h index 901ecf3012d51..2c2bd6f232e09 100644 --- a/lldb/include/lldb/Interpreter/ScriptInterpreter.h +++ b/lldb/include/lldb/Interpreter/ScriptInterpreter.h @@ -420,6 +420,20 @@ class ScriptInterpreter : public PluginInterface { return std::nullopt; } + virtual StructuredData::DictionarySP + HandleArgumentCompletionForScriptedCommand( + StructuredData::GenericSP impl_obj_sp, std::vector &args, + size_t args_pos, size_t char_in_arg) { + return {}; + } + + virtual StructuredData::DictionarySP + HandleOptionArgumentCompletionForScriptedCommand( + StructuredData::GenericSP impl_obj_sp, llvm::StringRef &long_name, + size_t char_in_arg) { + return {}; + } + virtual bool RunScriptFormatKeyword(const char *impl_function, Process *process, std::string &output, Status &error) { diff --git a/lldb/include/lldb/Symbol/UnwindPlan.h b/lldb/include/lldb/Symbol/UnwindPlan.h index a1d00f2d2c0cd..e1567c7357d0b 100644 --- a/lldb/include/lldb/Symbol/UnwindPlan.h +++ b/lldb/include/lldb/Symbol/UnwindPlan.h @@ -370,6 +370,13 @@ class UnwindPlan { bool SetRegisterLocationToSame(uint32_t reg_num, bool must_replace); + /// This method does not make a copy of the \a opcodes memory, it is + /// assumed to have the same lifetime as the Module this UnwindPlan will + /// be registered in. + bool SetRegisterLocationToIsDWARFExpression(uint32_t reg_num, + const uint8_t *opcodes, + uint32_t len, bool can_replace); + bool SetRegisterLocationToIsConstant(uint32_t reg_num, uint64_t constant, bool can_replace); diff --git a/lldb/include/lldb/Utility/CompletionRequest.h b/lldb/include/lldb/Utility/CompletionRequest.h index 1a2b1d639950f..650158a197dbd 100644 --- a/lldb/include/lldb/Utility/CompletionRequest.h +++ b/lldb/include/lldb/Utility/CompletionRequest.h @@ -139,6 +139,8 @@ class CompletionRequest { return GetParsedLine()[GetCursorIndex()]; } + size_t GetCursorCharPos() const { return m_cursor_char_position; } + /// Drops the first argument from the argument list. void ShiftArguments() { m_cursor_index--; diff --git a/lldb/packages/Python/lldbsuite/test/builders/builder.py b/lldb/packages/Python/lldbsuite/test/builders/builder.py index 564918c58b6dd..e3099219e437e 100644 --- a/lldb/packages/Python/lldbsuite/test/builders/builder.py +++ b/lldb/packages/Python/lldbsuite/test/builders/builder.py @@ -110,6 +110,10 @@ def getToolchainSpec(self, compiler): if not cc: return [] + exe_ext = "" + if lldbplatformutil.getHostPlatform() == "windows": + exe_ext = ".exe" + cc = cc.strip() cc_path = pathlib.Path(cc) @@ -149,9 +153,9 @@ def getToolchainSpec(self, compiler): cc_dir = cc_path.parent def getToolchainUtil(util_name): - return cc_dir / (cc_prefix + util_name + cc_ext) + return os.path.join(configuration.llvm_tools_dir, util_name + exe_ext) - cxx = getToolchainUtil(cxx_type) + cxx = cc_dir / (cc_prefix + cxx_type + cc_ext) util_names = { "OBJCOPY": "objcopy", @@ -161,6 +165,10 @@ def getToolchainUtil(util_name): } utils = [] + # Required by API TestBSDArchives.py tests. + if not os.getenv("LLVM_AR"): + utils.extend(["LLVM_AR=%s" % getToolchainUtil("llvm-ar")]) + if not lldbplatformutil.platformIsDarwin(): if cc_type in ["clang", "cc", "gcc"]: util_paths = {} diff --git a/lldb/packages/Python/lldbsuite/test/configuration.py b/lldb/packages/Python/lldbsuite/test/configuration.py index 27eef040497d1..1bacd74a968c3 100644 --- a/lldb/packages/Python/lldbsuite/test/configuration.py +++ b/lldb/packages/Python/lldbsuite/test/configuration.py @@ -118,6 +118,9 @@ # same base name. all_tests = set() +# Path to LLVM tools to be used by tests. +llvm_tools_dir = None + # LLDB library directory. lldb_libs_dir = None lldb_obj_root = None diff --git a/lldb/packages/Python/lldbsuite/test/dotest.py b/lldb/packages/Python/lldbsuite/test/dotest.py index f14a00a2394b0..b1ae896d3fd3b 100644 --- a/lldb/packages/Python/lldbsuite/test/dotest.py +++ b/lldb/packages/Python/lldbsuite/test/dotest.py @@ -280,6 +280,7 @@ def parseOptionsAndInitTestdirs(): "xcrun -find -toolchain default dsymutil" ) if args.llvm_tools_dir: + configuration.llvm_tools_dir = args.llvm_tools_dir configuration.filecheck = shutil.which("FileCheck", path=args.llvm_tools_dir) configuration.yaml2obj = shutil.which("yaml2obj", path=args.llvm_tools_dir) diff --git a/lldb/packages/Python/lldbsuite/test/lldbtest.py b/lldb/packages/Python/lldbsuite/test/lldbtest.py index df5a110cb5b30..c6b7ce84109c0 100644 --- a/lldb/packages/Python/lldbsuite/test/lldbtest.py +++ b/lldb/packages/Python/lldbsuite/test/lldbtest.py @@ -1370,6 +1370,9 @@ def isAArch64PAuth(self): return True return self.isAArch64() and "paca" in self.getCPUInfo() + def isAArch64FPMR(self): + return self.isAArch64() and "fpmr" in self.getCPUInfo() + def isAArch64Windows(self): """Returns true if the architecture is AArch64 and platform windows.""" if self.getPlatform() == "windows": diff --git a/lldb/packages/Python/lldbsuite/test/make/Android.rules b/lldb/packages/Python/lldbsuite/test/make/Android.rules index cd7d8ae74d6bf..44aedf7248419 100644 --- a/lldb/packages/Python/lldbsuite/test/make/Android.rules +++ b/lldb/packages/Python/lldbsuite/test/make/Android.rules @@ -1,81 +1,59 @@ NDK_ROOT := $(shell dirname $(CC))/../../../../.. -ifeq "$(findstring 64, $(ARCH))" "64" - # lowest 64-bit API level - API_LEVEL := 21 -else ifeq "$(ARCH)" "i386" - # clone(2) declaration is present only since this api level - API_LEVEL := 17 +ifeq "$(HOST_OS)" "Linux" + HOST_TAG := linux-x86_64 +else ifeq "$(HOST_OS)" "Darwin" + HOST_TAG := darwin-x86_64 else - # lowest supported 32-bit API level - API_LEVEL := 16 + HOST_TAG := windows-x86_64 +endif + +TOOLCHAIN_ROOT := $(NDK_ROOT)/toolchains/llvm/prebuilt/$(HOST_TAG) +TOOLCHAIN_SYSROOT := $(TOOLCHAIN_ROOT)/sysroot + +OBJCOPY ?= $(TOOLCHAIN_ROOT)/bin/llvm-objcopy +ARCHIVER ?= $(TOOLCHAIN_ROOT)/bin/llvm-ar + +ifeq "$(wildcard $(TOOLCHAIN_SYSROOT)/.)" "" +# Compiling test inferiors for Android requires an NDK with the unified +# toolchain introduced in version r19. +$(error "No unified toolchain sysroot found in $(NDK_ROOT). NDK must be r19 or later.") endif ifeq "$(ARCH)" "arm" - SYSROOT_ARCH := arm - STL_ARCH := armeabi-v7a TRIPLE := armv7-none-linux-androideabi ARCH_CFLAGS += -march=armv7-a -mfloat-abi=softfp -mfpu=vfpv3-d16 -marm else ifeq "$(ARCH)" "aarch64" - SYSROOT_ARCH := arm64 - STL_ARCH := arm64-v8a TRIPLE := aarch64-none-linux-android else ifeq "$(ARCH)" "i386" - SYSROOT_ARCH := x86 - STL_ARCH := x86 TRIPLE := i686-none-linux-android else - SYSROOT_ARCH := $(ARCH) - STL_ARCH := $(ARCH) TRIPLE := $(ARCH)-none-linux-android endif -ifeq "$(findstring 86,$(ARCH))" "86" - TOOLCHAIN_DIR := $(STL_ARCH)-4.9 -else ifeq "$(ARCH)" "arm" - TOOLCHAIN_DIR := arm-linux-androideabi-4.9 -else - TOOLCHAIN_DIR := $(subst -none,,$(TRIPLE))-4.9 -endif +# lowest 64-bit API level +API_LEVEL := 21 ifeq "$(ARCH)" "arm" - TOOL_PREFIX := arm-linux-androideabi + ARCH_DIR := arm-linux-androideabi else - TOOL_PREFIX := $(subst -none,,$(TRIPLE)) + ARCH_DIR := $(subst -none,,$(TRIPLE)) endif -ifeq "$(HOST_OS)" "Linux" - HOST_TAG := linux-x86_64 -else ifeq "$(HOST_OS)" "Darwin" - HOST_TAG := darwin-x86_64 -else - HOST_TAG := windows-x86_64 -endif - -GCC_TOOLCHAIN = $(NDK_ROOT)/toolchains/$(TOOLCHAIN_DIR)/prebuilt/$(HOST_TAG) - -OBJCOPY ?= $(GCC_TOOLCHAIN)/bin/$(TOOL_PREFIX)-objcopy -ARCHIVER ?= $(GCC_TOOLCHAIN)/bin/$(TOOL_PREFIX)-ar - -ifeq "$(findstring clang,$(CC))" "clang" - ARCH_CFLAGS += -target $(TRIPLE) --gcc-toolchain=$(GCC_TOOLCHAIN) - ARCH_LDFLAGS += -target $(TRIPLE) --gcc-toolchain=$(GCC_TOOLCHAIN) -endif - -ARCH_CFLAGS += --sysroot=$(NDK_ROOT)/sysroot \ - -isystem $(NDK_ROOT)/sysroot/usr/include/$(TOOL_PREFIX) \ - -D__ANDROID_API__=$(API_LEVEL) \ - -isystem $(NDK_ROOT)/platforms/android-$(API_LEVEL)/arch-$(SYSROOT_ARCH)/usr/include - -ARCH_LDFLAGS += --sysroot=$(NDK_ROOT)/platforms/android-$(API_LEVEL)/arch-$(SYSROOT_ARCH) -lm +ARCH_CFLAGS += \ + --target=$(TRIPLE) \ + --sysroot=$(TOOLCHAIN_SYSROOT) \ + -D__ANDROID_API__=$(API_LEVEL) ARCH_CXXFLAGS += \ - -isystem $(NDK_ROOT)/sources/cxx-stl/llvm-libc++/include \ - -isystem $(NDK_ROOT)/sources/android/support/include \ - -isystem $(NDK_ROOT)/sources/cxx-stl/llvm-libc++abi/include + -isystem $(TOOLCHAIN_SYSROOT)/usr/include/c++/v1 ARCH_LDFLAGS += \ - -L$(NDK_ROOT)/sources/cxx-stl/llvm-libc++/libs/$(STL_ARCH) \ - $(NDK_ROOT)/sources/cxx-stl/llvm-libc++/libs/$(STL_ARCH)/libc++_static.a \ + --target=$(TRIPLE) \ + --sysroot=$(TOOLCHAIN_SYSROOT) \ + --prefix=$(TOOLCHAIN_SYSROOT)/usr/lib/$(ARCH_DIR)/$(API_LEVEL) \ + -L$(TOOLCHAIN_SYSROOT)/usr/lib/$(ARCH_DIR)/$(API_LEVEL) \ + $(TOOLCHAIN_SYSROOT)/usr/lib/$(ARCH_DIR)/libc++_static.a \ + -lm \ -lc++abi \ -nostdlib++ diff --git a/lldb/source/API/SBTarget.cpp b/lldb/source/API/SBTarget.cpp index 1c1f7e2a03def..d5017ad6bff16 100644 --- a/lldb/source/API/SBTarget.cpp +++ b/lldb/source/API/SBTarget.cpp @@ -662,15 +662,14 @@ size_t SBTarget::ReadMemory(const SBAddress addr, void *buf, size_t size, lldb::SBError &error) { LLDB_INSTRUMENT_VA(this, addr, buf, size, error); - SBError sb_error; size_t bytes_read = 0; TargetSP target_sp(GetSP()); if (target_sp) { std::lock_guard guard(target_sp->GetAPIMutex()); bytes_read = - target_sp->ReadMemory(addr.ref(), buf, size, sb_error.ref(), true); + target_sp->ReadMemory(addr.ref(), buf, size, error.ref(), true); } else { - sb_error.SetErrorString("invalid target"); + error.SetErrorString("invalid target"); } return bytes_read; diff --git a/lldb/source/Commands/CommandObjectCommands.cpp b/lldb/source/Commands/CommandObjectCommands.cpp index e3291640fa935..845b89a75b7b3 100644 --- a/lldb/source/Commands/CommandObjectCommands.cpp +++ b/lldb/source/Commands/CommandObjectCommands.cpp @@ -1637,6 +1637,129 @@ class CommandObjectScriptingObjectParsed : public CommandObjectParsed { size_t GetNumOptions() { return m_num_options; } + void PrepareOptionsForCompletion(CompletionRequest &request, + OptionElementVector &option_vec, + ExecutionContext *exe_ctx) { + // I'm not sure if we'll get into trouble doing an option parsing start + // and end in this context. If so, then I'll have to directly tell the + // scripter to do this. + OptionParsingStarting(exe_ctx); + auto opt_defs = GetDefinitions(); + + // Iterate through the options we found so far, and push them into + // the scripted side. + for (auto option_elem : option_vec) { + int cur_defs_index = option_elem.opt_defs_index; + // If we don't recognize this option we can't set it. + if (cur_defs_index == OptionArgElement::eUnrecognizedArg || + cur_defs_index == OptionArgElement::eBareDash || + cur_defs_index == OptionArgElement::eBareDoubleDash) + continue; + bool option_has_arg = opt_defs[cur_defs_index].option_has_arg; + llvm::StringRef cur_arg_value; + if (option_has_arg) { + int cur_arg_pos = option_elem.opt_arg_pos; + if (cur_arg_pos != OptionArgElement::eUnrecognizedArg && + cur_arg_pos != OptionArgElement::eBareDash && + cur_arg_pos != OptionArgElement::eBareDoubleDash) { + cur_arg_value = + request.GetParsedLine().GetArgumentAtIndex(cur_arg_pos); + } + } + SetOptionValue(cur_defs_index, cur_arg_value, exe_ctx); + } + OptionParsingFinished(exe_ctx); + } + + void + ProcessCompletionDict(CompletionRequest &request, + StructuredData::DictionarySP &completion_dict_sp) { + // We don't know how to process an empty completion dict, our callers have + // to do that. + assert(completion_dict_sp && "Must have valid completion dict"); + // First handle the case of a single completion: + llvm::StringRef completion; + // If the dictionary has one element "no-completion" then we return here + if (completion_dict_sp->GetValueForKeyAsString("no-completion", + completion)) + return; + + if (completion_dict_sp->GetValueForKeyAsString("completion", + completion)) { + llvm::StringRef mode_str; + CompletionMode mode = CompletionMode::Normal; + if (completion_dict_sp->GetValueForKeyAsString("mode", mode_str)) { + if (mode_str == "complete") + mode = CompletionMode::Normal; + else if (mode_str == "partial") + mode = CompletionMode::Partial; + else { + // FIXME - how do I report errors here? + return; + } + } + request.AddCompletion(completion, "", mode); + return; + } + // The completions are required, the descriptions are not: + StructuredData::Array *completions; + StructuredData::Array *descriptions; + if (completion_dict_sp->GetValueForKeyAsArray("values", completions)) { + completion_dict_sp->GetValueForKeyAsArray("descriptions", descriptions); + size_t num_completions = completions->GetSize(); + for (size_t idx = 0; idx < num_completions; idx++) { + auto val = completions->GetItemAtIndexAsString(idx); + if (!val) + // FIXME: How do I report this error? + return; + + if (descriptions) { + auto desc = descriptions->GetItemAtIndexAsString(idx); + request.AddCompletion(*val, desc ? *desc : ""); + } else + request.AddCompletion(*val); + } + } + } + + void + HandleOptionArgumentCompletion(lldb_private::CompletionRequest &request, + OptionElementVector &option_vec, + int opt_element_index, + CommandInterpreter &interpreter) override { + ScriptInterpreter *scripter = + interpreter.GetDebugger().GetScriptInterpreter(); + + if (!scripter) + return; + + ExecutionContext exe_ctx = interpreter.GetExecutionContext(); + PrepareOptionsForCompletion(request, option_vec, &exe_ctx); + + auto defs = GetDefinitions(); + + size_t defs_index = option_vec[opt_element_index].opt_defs_index; + llvm::StringRef option_name = defs[defs_index].long_option; + bool is_enum = defs[defs_index].enum_values.size() != 0; + if (option_name.empty()) + return; + // If this is an enum, we don't call the custom completer, just let the + // regular option completer handle that: + StructuredData::DictionarySP completion_dict_sp; + if (!is_enum) + completion_dict_sp = + scripter->HandleOptionArgumentCompletionForScriptedCommand( + m_cmd_obj_sp, option_name, request.GetCursorCharPos()); + + if (!completion_dict_sp) { + Options::HandleOptionArgumentCompletion(request, option_vec, + opt_element_index, interpreter); + return; + } + + ProcessCompletionDict(request, completion_dict_sp); + } + private: struct EnumValueStorage { EnumValueStorage() { @@ -1878,6 +2001,74 @@ class CommandObjectScriptingObjectParsed : public CommandObjectParsed { Status GetArgsError() { return m_args_error.Clone(); } bool WantsCompletion() override { return true; } +private: + void PrepareOptionsForCompletion(CompletionRequest &request, + OptionElementVector &option_vec) { + // First, we have to tell the Scripted side to set the values in its + // option store, then we call into the handle_completion passing in + // an array of the args, the arg index and the cursor position in the arg. + // We want the script side to have a chance to clear its state, so tell + // it argument parsing has started: + Options *options = GetOptions(); + // If there are not options, this will be nullptr, and in that case we + // can just skip setting the options on the scripted side: + if (options) + m_options.PrepareOptionsForCompletion(request, option_vec, &m_exe_ctx); + } + +public: + void HandleArgumentCompletion(CompletionRequest &request, + OptionElementVector &option_vec) override { + ScriptInterpreter *scripter = GetDebugger().GetScriptInterpreter(); + + if (!scripter) + return; + + // Set up the options values on the scripted side: + PrepareOptionsForCompletion(request, option_vec); + + // Now we have to make up the argument list. + // The ParseForCompletion only identifies tokens in the m_parsed_line + // it doesn't remove the options leaving only the args as it does for + // the regular Parse, so we have to filter out the option ones using the + // option_element_vector: + + Options *options = GetOptions(); + auto defs = options->GetDefinitions(); + + std::unordered_set option_slots; + for (const auto &elem : option_vec) { + if (elem.opt_defs_index == -1) + continue; + option_slots.insert(elem.opt_pos); + if (defs[elem.opt_defs_index].option_has_arg) + option_slots.insert(elem.opt_arg_pos); + } + + std::vector args_vec; + Args &args = request.GetParsedLine(); + size_t num_args = args.GetArgumentCount(); + size_t cursor_idx = request.GetCursorIndex(); + size_t args_elem_pos = cursor_idx; + + for (size_t idx = 0; idx < num_args; idx++) { + if (option_slots.count(idx) == 0) + args_vec.push_back(args[idx].ref()); + else if (idx < cursor_idx) + args_elem_pos--; + } + StructuredData::DictionarySP completion_dict_sp = + scripter->HandleArgumentCompletionForScriptedCommand( + m_cmd_obj_sp, args_vec, args_elem_pos, request.GetCursorCharPos()); + + if (!completion_dict_sp) { + CommandObject::HandleArgumentCompletion(request, option_vec); + return; + } + + m_options.ProcessCompletionDict(request, completion_dict_sp); + } + bool IsRemovable() const override { return true; } ScriptedCommandSynchronicity GetSynchronicity() { return m_synchro; } diff --git a/lldb/source/Commands/CommandObjectFrame.cpp b/lldb/source/Commands/CommandObjectFrame.cpp index 142f96946ed3d..e2203292e71e2 100644 --- a/lldb/source/Commands/CommandObjectFrame.cpp +++ b/lldb/source/Commands/CommandObjectFrame.cpp @@ -1223,7 +1223,7 @@ CommandObjectMultiwordFrame::CommandObjectMultiwordFrame( CommandInterpreter &interpreter) : CommandObjectMultiword(interpreter, "frame", "Commands for selecting and " - "examing the current " + "examining the current " "thread's stack frames.", "frame []") { LoadSubCommand("diagnose", diff --git a/lldb/source/Commands/CommandObjectProcess.cpp b/lldb/source/Commands/CommandObjectProcess.cpp index 5b0f4f66f248b..e7c7d07ad4772 100644 --- a/lldb/source/Commands/CommandObjectProcess.cpp +++ b/lldb/source/Commands/CommandObjectProcess.cpp @@ -1420,7 +1420,7 @@ class CommandObjectProcessStatus : public CommandObjectParsed { PlatformSP platform_sp = process->GetTarget().GetPlatform(); if (!platform_sp) { - result.AppendError("Couldn'retrieve the target's platform"); + result.AppendError("Couldn't retrieve the target's platform"); return; } diff --git a/lldb/source/Commands/CommandObjectScripting.cpp b/lldb/source/Commands/CommandObjectScripting.cpp index 9a1a2b63c7af0..1f8ee0a9554ec 100644 --- a/lldb/source/Commands/CommandObjectScripting.cpp +++ b/lldb/source/Commands/CommandObjectScripting.cpp @@ -254,7 +254,7 @@ CommandObjectMultiwordScripting::CommandObjectMultiwordScripting( CommandInterpreter &interpreter) : CommandObjectMultiword( interpreter, "scripting", - "Commands for operating on the scripting functionnalities.", + "Commands for operating on the scripting functionalities.", "scripting []") { LoadSubCommand("run", CommandObjectSP(new CommandObjectScriptingRun(interpreter))); diff --git a/lldb/source/Commands/Options.td b/lldb/source/Commands/Options.td index df906e9d7c808..4276d9e7f9c8b 100644 --- a/lldb/source/Commands/Options.td +++ b/lldb/source/Commands/Options.td @@ -1199,7 +1199,7 @@ let Command = "thread trace dump instructions" in { def thread_trace_dump_instruction_only_events : Option<"only-events", "E">, Group<1>, Desc<"Dump only the events that happened during the execution of the " - "target. No instrutions are dumped.">; + "target. No instructions are dumped.">; def thread_trace_dump_instructions_continue: Option<"continue", "C">, Group<1>, Desc<"Continue dumping instructions right where the previous invocation of " diff --git a/lldb/source/Interpreter/CommandInterpreter.cpp b/lldb/source/Interpreter/CommandInterpreter.cpp index b93f47a8a8d5e..acd592c3bd2db 100644 --- a/lldb/source/Interpreter/CommandInterpreter.cpp +++ b/lldb/source/Interpreter/CommandInterpreter.cpp @@ -797,7 +797,7 @@ void CommandInterpreter::LoadCommandDictionary() { new CommandObjectRegexCommand( *this, "gdb-remote", "Connect to a process via remote GDB server.\n" - "If no host is specifed, localhost is assumed.\n" + "If no host is specified, localhost is assumed.\n" "gdb-remote is an abbreviation for 'process connect --plugin " "gdb-remote connect://:'\n", "gdb-remote [:]", 0, false)); diff --git a/lldb/source/Interpreter/Options.cpp b/lldb/source/Interpreter/Options.cpp index b8a3f68a49b1c..3888a5812628c 100644 --- a/lldb/source/Interpreter/Options.cpp +++ b/lldb/source/Interpreter/Options.cpp @@ -661,7 +661,9 @@ bool Options::HandleOptionCompletion(CompletionRequest &request, } else if (opt_arg_pos == request.GetCursorIndex()) { // Okay the cursor is on the completion of an argument. See if it has a - // completion, otherwise return no matches. + // completion, otherwise return no matches. Note, opt_defs_index == -1 + // means we're after an option, but that option doesn't exist. We'll + // end up treating that as an argument. Not sure we can do much better. if (opt_defs_index != -1) { HandleOptionArgumentCompletion(request, opt_element_vector, i, interpreter); @@ -688,7 +690,6 @@ void Options::HandleOptionArgumentCompletion( int opt_defs_index = opt_element_vector[opt_element_index].opt_defs_index; // See if this is an enumeration type option, and if so complete it here: - const auto &enum_values = opt_defs[opt_defs_index].enum_values; if (!enum_values.empty()) for (const auto &enum_value : enum_values) diff --git a/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm64.cpp b/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm64.cpp index 1dd4fd4135133..6056f3001fed6 100644 --- a/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm64.cpp +++ b/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm64.cpp @@ -60,10 +60,16 @@ #define NT_ARM_TAGGED_ADDR_CTRL 0x409 /* Tagged address control register */ #endif +#ifndef NT_ARM_FPMR +#define NT_ARM_FPMR 0x40e /* Floating point mode register */ +#endif + #define HWCAP_PACA (1 << 30) #define HWCAP2_MTE (1 << 18) +#define HWCAP2_FPMR (1UL << 48) + using namespace lldb; using namespace lldb_private; using namespace lldb_private::process_linux; @@ -139,8 +145,12 @@ NativeRegisterContextLinux::CreateHostNativeRegisterContextLinux( std::optional auxv_at_hwcap2 = process.GetAuxValue(AuxVector::AUXV_AT_HWCAP2); - if (auxv_at_hwcap2 && (*auxv_at_hwcap2 & HWCAP2_MTE)) - opt_regsets.Set(RegisterInfoPOSIX_arm64::eRegsetMaskMTE); + if (auxv_at_hwcap2) { + if (*auxv_at_hwcap2 & HWCAP2_MTE) + opt_regsets.Set(RegisterInfoPOSIX_arm64::eRegsetMaskMTE); + if (*auxv_at_hwcap2 & HWCAP2_FPMR) + opt_regsets.Set(RegisterInfoPOSIX_arm64::eRegsetMaskFPMR); + } opt_regsets.Set(RegisterInfoPOSIX_arm64::eRegsetMaskTLS); @@ -186,6 +196,7 @@ NativeRegisterContextLinux_arm64::NativeRegisterContextLinux_arm64( std::fill(m_zt_reg.begin(), m_zt_reg.end(), 0); m_mte_ctrl_reg = 0; + m_fpmr_reg = 0; // 16 is just a maximum value, query hardware for actual watchpoint count m_max_hwp_supported = 16; @@ -201,6 +212,7 @@ NativeRegisterContextLinux_arm64::NativeRegisterContextLinux_arm64( m_mte_ctrl_is_valid = false; m_tls_is_valid = false; m_zt_buffer_is_valid = false; + m_fpmr_is_valid = false; // SME adds the tpidr2 register m_tls_size = GetRegisterInfo().IsSSVEPresent() ? sizeof(m_tls_regs) @@ -413,6 +425,14 @@ NativeRegisterContextLinux_arm64::ReadRegister(const RegisterInfo *reg_info, assert(offset < GetSMEPseudoBufferSize()); src = (uint8_t *)GetSMEPseudoBuffer() + offset; } + } else if (IsFPMR(reg)) { + error = ReadFPMR(); + if (error.Fail()) + return error; + + offset = reg_info->byte_offset - GetRegisterInfo().GetFPMROffset(); + assert(offset < GetFPMRBufferSize()); + src = (uint8_t *)GetFPMRBuffer() + offset; } else return Status::FromErrorString( "failed - register wasn't recognized to be a GPR or an FPR, " @@ -626,6 +646,17 @@ Status NativeRegisterContextLinux_arm64::WriteRegister( } else return Status::FromErrorString( "Writing to SVG or SVCR is not supported."); + } else if (IsFPMR(reg)) { + error = ReadFPMR(); + if (error.Fail()) + return error; + + offset = reg_info->byte_offset - GetRegisterInfo().GetFPMROffset(); + assert(offset < GetFPMRBufferSize()); + dst = (uint8_t *)GetFPMRBuffer() + offset; + ::memcpy(dst, reg_value.GetBytes(), reg_info->byte_size); + + return WriteFPMR(); } return Status::FromErrorString("Failed to write register value"); @@ -640,6 +671,7 @@ enum RegisterSetType : uint32_t { TLS, SME, // ZA only, because SVCR and SVG are pseudo registers. SME2, // ZT only. + FPMR, }; static uint8_t *AddRegisterSetType(uint8_t *dst, @@ -720,6 +752,13 @@ NativeRegisterContextLinux_arm64::CacheAllRegisters(uint32_t &cached_size) { return error; } + if (GetRegisterInfo().IsFPMRPresent()) { + cached_size += sizeof(RegisterSetType) + GetFPMRBufferSize(); + error = ReadFPMR(); + if (error.Fail()) + return error; + } + // tpidr is always present but tpidr2 depends on SME. cached_size += sizeof(RegisterSetType) + GetTLSBufferSize(); error = ReadTLS(); @@ -823,6 +862,11 @@ Status NativeRegisterContextLinux_arm64::ReadAllRegisterValues( GetMTEControlSize()); } + if (GetRegisterInfo().IsFPMRPresent()) { + dst = AddSavedRegisters(dst, RegisterSetType::FPMR, GetFPMRBuffer(), + GetFPMRBufferSize()); + } + dst = AddSavedRegisters(dst, RegisterSetType::TLS, GetTLSBuffer(), GetTLSBufferSize()); @@ -971,6 +1015,11 @@ Status NativeRegisterContextLinux_arm64::WriteAllRegisterValues( GetZTBuffer(), &src, GetZTBufferSize(), m_zt_buffer_is_valid, std::bind(&NativeRegisterContextLinux_arm64::WriteZT, this)); break; + case RegisterSetType::FPMR: + error = RestoreRegisters( + GetFPMRBuffer(), &src, GetFPMRBufferSize(), m_fpmr_is_valid, + std::bind(&NativeRegisterContextLinux_arm64::WriteFPMR, this)); + break; } if (error.Fail()) @@ -1014,6 +1063,10 @@ bool NativeRegisterContextLinux_arm64::IsTLS(unsigned reg) const { return GetRegisterInfo().IsTLSReg(reg); } +bool NativeRegisterContextLinux_arm64::IsFPMR(unsigned reg) const { + return GetRegisterInfo().IsFPMRReg(reg); +} + llvm::Error NativeRegisterContextLinux_arm64::ReadHardwareDebugInfo() { if (!m_refresh_hwdebug_info) { return llvm::Error::success(); @@ -1161,6 +1214,7 @@ void NativeRegisterContextLinux_arm64::InvalidateAllRegisters() { m_mte_ctrl_is_valid = false; m_tls_is_valid = false; m_zt_buffer_is_valid = false; + m_fpmr_is_valid = false; // Update SVE and ZA registers in case there is change in configuration. ConfigureRegisterContext(); @@ -1440,6 +1494,40 @@ Status NativeRegisterContextLinux_arm64::WriteZT() { return WriteRegisterSet(&ioVec, GetZTBufferSize(), NT_ARM_ZT); } +Status NativeRegisterContextLinux_arm64::ReadFPMR() { + Status error; + + if (m_fpmr_is_valid) + return error; + + struct iovec ioVec; + ioVec.iov_base = GetFPMRBuffer(); + ioVec.iov_len = GetFPMRBufferSize(); + + error = ReadRegisterSet(&ioVec, GetFPMRBufferSize(), NT_ARM_FPMR); + + if (error.Success()) + m_fpmr_is_valid = true; + + return error; +} + +Status NativeRegisterContextLinux_arm64::WriteFPMR() { + Status error; + + error = ReadFPMR(); + if (error.Fail()) + return error; + + struct iovec ioVec; + ioVec.iov_base = GetFPMRBuffer(); + ioVec.iov_len = GetFPMRBufferSize(); + + m_fpmr_is_valid = false; + + return WriteRegisterSet(&ioVec, GetFPMRBufferSize(), NT_ARM_FPMR); +} + void NativeRegisterContextLinux_arm64::ConfigureRegisterContext() { // ConfigureRegisterContext gets called from InvalidateAllRegisters // on every stop and configures SVE vector length and whether we are in diff --git a/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm64.h b/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm64.h index 6df7c3beefb82..16190b5492582 100644 --- a/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm64.h +++ b/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm64.h @@ -84,6 +84,7 @@ class NativeRegisterContextLinux_arm64 bool m_sve_buffer_is_valid; bool m_mte_ctrl_is_valid; bool m_zt_buffer_is_valid; + bool m_fpmr_is_valid; bool m_sve_header_is_valid; bool m_za_buffer_is_valid; @@ -133,6 +134,8 @@ class NativeRegisterContextLinux_arm64 // SME2's ZT is a 512 bit register. std::array m_zt_reg; + uint64_t m_fpmr_reg; + bool IsGPR(unsigned reg) const; bool IsFPR(unsigned reg) const; @@ -174,11 +177,16 @@ class NativeRegisterContextLinux_arm64 // SVCR is a pseudo register and we do not allow writes to it. Status ReadSMEControl(); + Status ReadFPMR(); + + Status WriteFPMR(); + bool IsSVE(unsigned reg) const; bool IsSME(unsigned reg) const; bool IsPAuth(unsigned reg) const; bool IsMTE(unsigned reg) const; bool IsTLS(unsigned reg) const; + bool IsFPMR(unsigned reg) const; uint64_t GetSVERegVG() { return m_sve_header.vl / 8; } @@ -202,6 +210,8 @@ class NativeRegisterContextLinux_arm64 void *GetSVEBuffer() { return m_sve_ptrace_payload.data(); } + void *GetFPMRBuffer() { return &m_fpmr_reg; } + size_t GetSVEHeaderSize() { return sizeof(m_sve_header); } size_t GetPACMaskSize() { return sizeof(m_pac_mask); } @@ -222,6 +232,8 @@ class NativeRegisterContextLinux_arm64 size_t GetZTBufferSize() { return m_zt_reg.size(); } + size_t GetFPMRBufferSize() { return sizeof(m_fpmr_reg); } + llvm::Error ReadHardwareDebugInfo() override; llvm::Error WriteHardwareDebugRegs(DREGType hwbType) override; diff --git a/lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_arm64.cpp b/lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_arm64.cpp index 50e25568f2ae0..575e9c8c81cbf 100644 --- a/lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_arm64.cpp +++ b/lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_arm64.cpp @@ -59,6 +59,10 @@ bool RegisterContextPOSIX_arm64::IsMTE(unsigned reg) const { return m_register_info_up->IsMTEReg(reg); } +bool RegisterContextPOSIX_arm64::IsFPMR(unsigned reg) const { + return m_register_info_up->IsFPMRReg(reg); +} + RegisterContextPOSIX_arm64::RegisterContextPOSIX_arm64( lldb_private::Thread &thread, std::unique_ptr register_info) diff --git a/lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_arm64.h b/lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_arm64.h index b1226b25b4be1..35ad56c98a7ae 100644 --- a/lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_arm64.h +++ b/lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_arm64.h @@ -58,6 +58,7 @@ class RegisterContextPOSIX_arm64 : public lldb_private::RegisterContext { bool IsTLS(unsigned reg) const; bool IsSME(unsigned reg) const; bool IsMTE(unsigned reg) const; + bool IsFPMR(unsigned reg) const; bool IsSVEZ(unsigned reg) const { return m_register_info_up->IsSVEZReg(reg); } bool IsSVEP(unsigned reg) const { return m_register_info_up->IsSVEPReg(reg); } diff --git a/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.cpp b/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.cpp index 9f5872e5de7e9..f51a93e1b2dcb 100644 --- a/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.cpp +++ b/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.cpp @@ -94,6 +94,9 @@ static lldb_private::RegisterInfo g_register_infos_sme2[] = { {"zt0", nullptr, 64, 0, lldb::eEncodingVector, lldb::eFormatVectorOfUInt8, KIND_ALL_INVALID, nullptr, nullptr, nullptr}}; +static lldb_private::RegisterInfo g_register_infos_fpmr[] = { + DEFINE_EXTENSION_REG(fpmr)}; + // Number of register sets provided by this context. enum { k_num_gpr_registers = gpr_w28 - gpr_x0 + 1, @@ -105,6 +108,7 @@ enum { // SME2's ZT0 will also be added to this set if present. So this number is // only for SME1 registers. k_num_sme_register = 3, + k_num_fpmr_register = 1, k_num_register_sets_default = 2, k_num_register_sets = 3 }; @@ -214,6 +218,9 @@ static const lldb_private::RegisterSet g_reg_set_mte_arm64 = { static const lldb_private::RegisterSet g_reg_set_sme_arm64 = { "Scalable Matrix Extension Registers", "sme", k_num_sme_register, nullptr}; +static const lldb_private::RegisterSet g_reg_set_fpmr_arm64 = { + "Floating Point Mode Register", "fpmr", k_num_fpmr_register, nullptr}; + RegisterInfoPOSIX_arm64::RegisterInfoPOSIX_arm64( const lldb_private::ArchSpec &target_arch, lldb_private::Flags opt_regsets) : lldb_private::RegisterInfoAndSetInterface(target_arch), @@ -263,6 +270,9 @@ RegisterInfoPOSIX_arm64::RegisterInfoPOSIX_arm64( if (m_opt_regsets.AnySet(eRegsetMaskSSVE)) AddRegSetSME(m_opt_regsets.AnySet(eRegsetMaskZT)); + if (m_opt_regsets.AllSet(eRegsetMaskFPMR)) + AddRegSetFPMR(); + m_register_info_count = m_dynamic_reg_infos.size(); m_register_info_p = m_dynamic_reg_infos.data(); m_register_set_p = m_dynamic_reg_sets.data(); @@ -409,6 +419,21 @@ void RegisterInfoPOSIX_arm64::AddRegSetSME(bool has_zt) { m_dynamic_reg_infos[GetRegNumSVEVG()].invalidate_regs = vg_invalidates; } +void RegisterInfoPOSIX_arm64::AddRegSetFPMR() { + uint32_t fpmr_regnum = m_dynamic_reg_infos.size(); + m_fpmr_regnum_collection.push_back(fpmr_regnum); + m_dynamic_reg_infos.push_back(g_register_infos_fpmr[0]); + m_dynamic_reg_infos[fpmr_regnum].byte_offset = + m_dynamic_reg_infos[fpmr_regnum - 1].byte_offset + + m_dynamic_reg_infos[fpmr_regnum - 1].byte_size; + m_dynamic_reg_infos[fpmr_regnum].kinds[lldb::eRegisterKindLLDB] = fpmr_regnum; + + m_per_regset_regnum_range[m_register_set_count] = + std::make_pair(fpmr_regnum, fpmr_regnum + 1); + m_dynamic_reg_sets.push_back(g_reg_set_fpmr_arm64); + m_dynamic_reg_sets.back().registers = m_fpmr_regnum_collection.data(); +} + uint32_t RegisterInfoPOSIX_arm64::ConfigureVectorLengthSVE(uint32_t sve_vq) { // sve_vq contains SVE Quad vector length in context of AArch64 SVE. // SVE register infos if enabled cannot be disabled by selecting sve_vq = 0. @@ -532,6 +557,10 @@ bool RegisterInfoPOSIX_arm64::IsSMEReg(unsigned reg) const { return llvm::is_contained(m_sme_regnum_collection, reg); } +bool RegisterInfoPOSIX_arm64::IsFPMRReg(unsigned reg) const { + return llvm::is_contained(m_fpmr_regnum_collection, reg); +} + uint32_t RegisterInfoPOSIX_arm64::GetRegNumSVEZ0() const { return sve_z0; } uint32_t RegisterInfoPOSIX_arm64::GetRegNumSVEFFR() const { return sve_ffr; } @@ -561,3 +590,7 @@ uint32_t RegisterInfoPOSIX_arm64::GetTLSOffset() const { uint32_t RegisterInfoPOSIX_arm64::GetSMEOffset() const { return m_register_info_p[m_sme_regnum_collection[0]].byte_offset; } + +uint32_t RegisterInfoPOSIX_arm64::GetFPMROffset() const { + return m_register_info_p[m_fpmr_regnum_collection[0]].byte_offset; +} \ No newline at end of file diff --git a/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.h b/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.h index 3b8171042c732..16a951ef0935f 100644 --- a/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.h +++ b/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.h @@ -32,6 +32,7 @@ class RegisterInfoPOSIX_arm64 eRegsetMaskTLS = 16, eRegsetMaskZA = 32, eRegsetMaskZT = 64, + eRegsetMaskFPMR = 128, eRegsetMaskDynamic = ~1, }; @@ -110,6 +111,8 @@ class RegisterInfoPOSIX_arm64 void AddRegSetSME(bool has_zt); + void AddRegSetFPMR(); + uint32_t ConfigureVectorLengthSVE(uint32_t sve_vq); void ConfigureVectorLengthZA(uint32_t za_vq); @@ -128,6 +131,7 @@ class RegisterInfoPOSIX_arm64 bool IsPAuthPresent() const { return m_opt_regsets.AnySet(eRegsetMaskPAuth); } bool IsMTEPresent() const { return m_opt_regsets.AnySet(eRegsetMaskMTE); } bool IsTLSPresent() const { return m_opt_regsets.AnySet(eRegsetMaskTLS); } + bool IsFPMRPresent() const { return m_opt_regsets.AnySet(eRegsetMaskFPMR); } bool IsSVEReg(unsigned reg) const; bool IsSVEZReg(unsigned reg) const; @@ -139,6 +143,7 @@ class RegisterInfoPOSIX_arm64 bool IsSMEReg(unsigned reg) const; bool IsSMERegZA(unsigned reg) const; bool IsSMERegZT(unsigned reg) const; + bool IsFPMRReg(unsigned reg) const; uint32_t GetRegNumSVEZ0() const; uint32_t GetRegNumSVEFFR() const; @@ -150,6 +155,7 @@ class RegisterInfoPOSIX_arm64 uint32_t GetMTEOffset() const; uint32_t GetTLSOffset() const; uint32_t GetSMEOffset() const; + uint32_t GetFPMROffset() const; private: typedef std::map> @@ -181,6 +187,7 @@ class RegisterInfoPOSIX_arm64 std::vector m_mte_regnum_collection; std::vector m_tls_regnum_collection; std::vector m_sme_regnum_collection; + std::vector m_fpmr_regnum_collection; }; #endif diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerCommon.cpp b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerCommon.cpp index f9d37490e16ae..324db3db7eb4c 100644 --- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerCommon.cpp +++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerCommon.cpp @@ -496,6 +496,17 @@ GDBRemoteCommunicationServerCommon::Handle_qSpeedTest( return SendErrorResponse(7); } +static GDBErrno system_errno_to_gdb(int err) { + switch (err) { +#define HANDLE_ERRNO(name, value) \ + case name: \ + return GDB_##name; +#include "Plugins/Process/gdb-remote/GDBRemoteErrno.def" + default: + return GDB_EUNKNOWN; + } +} + GDBRemoteCommunication::PacketResult GDBRemoteCommunicationServerCommon::Handle_vFile_Open( StringExtractorGDBRemote &packet) { @@ -522,9 +533,7 @@ GDBRemoteCommunicationServerCommon::Handle_vFile_Open( } else { response.PutCString("-1"); std::error_code code = errorToErrorCode(file.takeError()); - if (code.category() == std::system_category()) { - response.Printf(",%x", code.value()); - } + response.Printf(",%x", system_errno_to_gdb(code.value())); } return SendPacketNoLock(response.GetString()); @@ -534,17 +543,6 @@ GDBRemoteCommunicationServerCommon::Handle_vFile_Open( return SendErrorResponse(18); } -static GDBErrno system_errno_to_gdb(int err) { - switch (err) { -#define HANDLE_ERRNO(name, value) \ - case name: \ - return GDB_##name; -#include "Plugins/Process/gdb-remote/GDBRemoteErrno.def" - default: - return GDB_EUNKNOWN; - } -} - GDBRemoteCommunication::PacketResult GDBRemoteCommunicationServerCommon::Handle_vFile_Close( StringExtractorGDBRemote &packet) { @@ -727,7 +725,8 @@ GDBRemoteCommunicationServerCommon::Handle_vFile_unlink( packet.GetHexByteString(path); Status error(llvm::sys::fs::remove(path)); StreamString response; - response.Printf("F%x,%x", error.GetError(), error.GetError()); + response.Printf("F%x,%x", error.GetError(), + system_errno_to_gdb(error.GetError())); return SendPacketNoLock(response.GetString()); } diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/SWIGPythonBridge.h b/lldb/source/Plugins/ScriptInterpreter/Python/SWIGPythonBridge.h index 81ee9ea0a2fa1..518a478af5f6a 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/SWIGPythonBridge.h +++ b/lldb/source/Plugins/ScriptInterpreter/Python/SWIGPythonBridge.h @@ -200,6 +200,15 @@ class SWIGBridge { LLDBSwigPythonGetRepeatCommandForScriptedCommand(PyObject *implementor, std::string &command); + static StructuredData::DictionarySP + LLDBSwigPythonHandleArgumentCompletionForScriptedCommand( + PyObject *implementor, std::vector &args_impl, + size_t args_pos, size_t pos_in_arg); + + static StructuredData::DictionarySP + LLDBSwigPythonHandleOptionArgumentCompletionForScriptedCommand( + PyObject *implementor, llvm::StringRef &long_option, size_t pos_in_arg); + static bool LLDBSwigPythonCallModuleInit(const char *python_module_name, const char *session_dictionary_name, lldb::DebuggerSP debugger); diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp index 155efc06eaf41..db1a10e73a66a 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp +++ b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp @@ -2720,6 +2720,46 @@ ScriptInterpreterPythonImpl::GetRepeatCommandForScriptedCommand( return ret_val; } +StructuredData::DictionarySP +ScriptInterpreterPythonImpl::HandleArgumentCompletionForScriptedCommand( + StructuredData::GenericSP impl_obj_sp, std::vector &args, + size_t args_pos, size_t char_in_arg) { + StructuredData::DictionarySP completion_dict_sp; + if (!impl_obj_sp || !impl_obj_sp->IsValid()) + return completion_dict_sp; + + { + Locker py_lock(this, Locker::AcquireLock | Locker::NoSTDIN, + Locker::FreeLock); + + completion_dict_sp = + SWIGBridge::LLDBSwigPythonHandleArgumentCompletionForScriptedCommand( + static_cast(impl_obj_sp->GetValue()), args, args_pos, + char_in_arg); + } + return completion_dict_sp; +} + +StructuredData::DictionarySP +ScriptInterpreterPythonImpl::HandleOptionArgumentCompletionForScriptedCommand( + StructuredData::GenericSP impl_obj_sp, llvm::StringRef &long_option, + size_t char_in_arg) { + StructuredData::DictionarySP completion_dict_sp; + if (!impl_obj_sp || !impl_obj_sp->IsValid()) + return completion_dict_sp; + + { + Locker py_lock(this, Locker::AcquireLock | Locker::NoSTDIN, + Locker::FreeLock); + + completion_dict_sp = SWIGBridge:: + LLDBSwigPythonHandleOptionArgumentCompletionForScriptedCommand( + static_cast(impl_obj_sp->GetValue()), long_option, + char_in_arg); + } + return completion_dict_sp; +} + /// In Python, a special attribute __doc__ contains the docstring for an object /// (function, method, class, ...) if any is defined Otherwise, the attribute's /// value is None. diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPythonImpl.h b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPythonImpl.h index d15e2fd76f683..2dc784777151b 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPythonImpl.h +++ b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPythonImpl.h @@ -166,6 +166,14 @@ class ScriptInterpreterPythonImpl : public ScriptInterpreterPython { GetRepeatCommandForScriptedCommand(StructuredData::GenericSP impl_obj_sp, Args &args) override; + StructuredData::DictionarySP HandleArgumentCompletionForScriptedCommand( + StructuredData::GenericSP impl_obj_sp, std::vector &args, + size_t args_pos, size_t char_in_arg) override; + + StructuredData::DictionarySP HandleOptionArgumentCompletionForScriptedCommand( + StructuredData::GenericSP impl_obj_sp, llvm::StringRef &long_options, + size_t char_in_arg) override; + Status GenerateFunction(const char *signature, const StringList &input, bool is_callback) override; diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFFormValue.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFFormValue.cpp index e1f73f1997e36..f58c6262349c6 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFFormValue.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFFormValue.cpp @@ -25,7 +25,7 @@ using namespace lldb_private::plugin::dwarf; void DWARFFormValue::Clear() { m_unit = nullptr; m_form = dw_form_t(0); - m_value = ValueTypeTag(); + m_value = ValueType(); } bool DWARFFormValue::ExtractValue(const DWARFDataExtractor &data, @@ -44,68 +44,68 @@ bool DWARFFormValue::ExtractValue(const DWARFDataExtractor &data, switch (m_form) { case DW_FORM_addr: assert(m_unit); - m_value.value.uval = + m_value.uval = data.GetMaxU64(offset_ptr, DWARFUnit::GetAddressByteSize(m_unit)); break; case DW_FORM_block1: - m_value.value.uval = data.GetU8(offset_ptr); + m_value.uval = data.GetU8(offset_ptr); is_block = true; break; case DW_FORM_block2: - m_value.value.uval = data.GetU16(offset_ptr); + m_value.uval = data.GetU16(offset_ptr); is_block = true; break; case DW_FORM_block4: - m_value.value.uval = data.GetU32(offset_ptr); + m_value.uval = data.GetU32(offset_ptr); is_block = true; break; case DW_FORM_data16: - m_value.value.uval = 16; + m_value.uval = 16; is_block = true; break; case DW_FORM_exprloc: case DW_FORM_block: - m_value.value.uval = data.GetULEB128(offset_ptr); + m_value.uval = data.GetULEB128(offset_ptr); is_block = true; break; case DW_FORM_string: - m_value.value.cstr = data.GetCStr(offset_ptr); + m_value.cstr = data.GetCStr(offset_ptr); break; case DW_FORM_sdata: - m_value.value.sval = data.GetSLEB128(offset_ptr); + m_value.sval = data.GetSLEB128(offset_ptr); break; case DW_FORM_strp: case DW_FORM_line_strp: case DW_FORM_sec_offset: - m_value.value.uval = data.GetMaxU64(offset_ptr, 4); + m_value.uval = data.GetMaxU64(offset_ptr, 4); break; case DW_FORM_addrx1: case DW_FORM_strx1: case DW_FORM_ref1: case DW_FORM_data1: case DW_FORM_flag: - m_value.value.uval = data.GetU8(offset_ptr); + m_value.uval = data.GetU8(offset_ptr); break; case DW_FORM_addrx2: case DW_FORM_strx2: case DW_FORM_ref2: case DW_FORM_data2: - m_value.value.uval = data.GetU16(offset_ptr); + m_value.uval = data.GetU16(offset_ptr); break; case DW_FORM_addrx3: case DW_FORM_strx3: - m_value.value.uval = data.GetMaxU64(offset_ptr, 3); + m_value.uval = data.GetMaxU64(offset_ptr, 3); break; case DW_FORM_addrx4: case DW_FORM_strx4: case DW_FORM_ref4: case DW_FORM_data4: - m_value.value.uval = data.GetU32(offset_ptr); + m_value.uval = data.GetU32(offset_ptr); break; case DW_FORM_data8: case DW_FORM_ref8: case DW_FORM_ref_sig8: - m_value.value.uval = data.GetU64(offset_ptr); + m_value.uval = data.GetU64(offset_ptr); break; case DW_FORM_addrx: case DW_FORM_loclistx: @@ -115,7 +115,7 @@ bool DWARFFormValue::ExtractValue(const DWARFDataExtractor &data, case DW_FORM_ref_udata: case DW_FORM_GNU_str_index: case DW_FORM_GNU_addr_index: - m_value.value.uval = data.GetULEB128(offset_ptr); + m_value.uval = data.GetULEB128(offset_ptr); break; case DW_FORM_ref_addr: assert(m_unit); @@ -123,14 +123,14 @@ bool DWARFFormValue::ExtractValue(const DWARFDataExtractor &data, ref_addr_size = m_unit->GetAddressByteSize(); else ref_addr_size = 4; - m_value.value.uval = data.GetMaxU64(offset_ptr, ref_addr_size); + m_value.uval = data.GetMaxU64(offset_ptr, ref_addr_size); break; case DW_FORM_indirect: m_form = static_cast(data.GetULEB128(offset_ptr)); indirect = true; break; case DW_FORM_flag_present: - m_value.value.uval = 1; + m_value.uval = 1; break; default: return false; @@ -138,9 +138,9 @@ bool DWARFFormValue::ExtractValue(const DWARFDataExtractor &data, } while (indirect); if (is_block) { - m_value.data = data.PeekData(*offset_ptr, m_value.value.uval); + m_value.data = data.PeekData(*offset_ptr, m_value.uval); if (m_value.data != nullptr) { - *offset_ptr += m_value.value.uval; + *offset_ptr += m_value.uval; } } @@ -461,23 +461,23 @@ const char *DWARFFormValue::AsCString() const { DWARFContext &context = m_unit->GetSymbolFileDWARF().GetDWARFContext(); if (m_form == DW_FORM_string) - return m_value.value.cstr; + return m_value.cstr; if (m_form == DW_FORM_strp) - return context.getOrLoadStrData().PeekCStr(m_value.value.uval); + return context.getOrLoadStrData().PeekCStr(m_value.uval); if (m_form == DW_FORM_GNU_str_index || m_form == DW_FORM_strx || m_form == DW_FORM_strx1 || m_form == DW_FORM_strx2 || m_form == DW_FORM_strx3 || m_form == DW_FORM_strx4) { std::optional offset = - m_unit->GetStringOffsetSectionItem(m_value.value.uval); + m_unit->GetStringOffsetSectionItem(m_value.uval); if (!offset) return nullptr; return context.getOrLoadStrData().PeekCStr(*offset); } if (m_form == DW_FORM_line_strp) - return context.getOrLoadLineStrData().PeekCStr(m_value.value.uval); + return context.getOrLoadLineStrData().PeekCStr(m_value.uval); return nullptr; } @@ -495,14 +495,14 @@ dw_addr_t DWARFFormValue::Address() const { uint32_t index_size = m_unit->GetAddressByteSize(); dw_offset_t addr_base = m_unit->GetAddrBase(); - lldb::offset_t offset = addr_base + m_value.value.uval * index_size; + lldb::offset_t offset = addr_base + m_value.uval * index_size; return symbol_file.GetDWARFContext().getOrLoadAddrData().GetMaxU64( &offset, index_size); } std::pair DWARFFormValue::ReferencedUnitAndOffset() const { - uint64_t value = m_value.value.uval; + uint64_t value = m_value.uval; switch (m_form) { case DW_FORM_ref1: case DW_FORM_ref2: @@ -550,7 +550,7 @@ DWARFDIE DWARFFormValue::Reference() const { } uint64_t DWARFFormValue::Reference(dw_offset_t base_offset) const { - uint64_t value = m_value.value.uval; + uint64_t value = m_value.uval; switch (m_form) { case DW_FORM_ref1: case DW_FORM_ref2: diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFFormValue.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFFormValue.h index fdd5b3c278a4e..8ab9163e645fe 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFFormValue.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFFormValue.h @@ -10,7 +10,7 @@ #define LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFFORMVALUE_H #include "DWARFDataExtractor.h" -#include +#include "llvm/DebugInfo/DWARF/DWARFFormValue.h" #include namespace lldb_private::plugin { @@ -21,17 +21,7 @@ class DWARFDIE; class DWARFFormValue { public: - typedef struct ValueTypeTag { - ValueTypeTag() : value() { value.uval = 0; } - - union { - uint64_t uval; - int64_t sval; - const char *cstr; - } value; - const uint8_t *data = nullptr; - } ValueType; - + typedef llvm::DWARFFormValue::ValueType ValueType; enum { eValueTypeInvalid = 0, eValueTypeUnsigned, @@ -67,11 +57,11 @@ class DWARFFormValue { std::pair ReferencedUnitAndOffset() const; uint64_t Reference(dw_offset_t offset) const; - bool Boolean() const { return m_value.value.uval != 0; } - uint64_t Unsigned() const { return m_value.value.uval; } - void SetUnsigned(uint64_t uval) { m_value.value.uval = uval; } - int64_t Signed() const { return m_value.value.sval; } - void SetSigned(int64_t sval) { m_value.value.sval = sval; } + bool Boolean() const { return m_value.uval != 0; } + uint64_t Unsigned() const { return m_value.uval; } + void SetUnsigned(uint64_t uval) { m_value.uval = uval; } + int64_t Signed() const { return m_value.sval; } + void SetSigned(int64_t sval) { m_value.sval = sval; } const char *AsCString() const; dw_addr_t Address() const; bool IsValid() const { return m_form != 0; } diff --git a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp index b0f49ebf2d2cb..264b2e8411407 100644 --- a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp +++ b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp @@ -9702,7 +9702,7 @@ ScratchTypeSystemClang::GetForTarget(Target &target, lldb::eLanguageTypeC, create_on_demand); if (auto err = type_system_or_err.takeError()) { LLDB_LOG_ERROR(GetLog(LLDBLog::Target), std::move(err), - "Couldn't get scratch TypeSystemClang"); + "Couldn't get scratch TypeSystemClang: {0}"); return nullptr; } auto ts_sp = *type_system_or_err; diff --git a/lldb/source/Symbol/UnwindPlan.cpp b/lldb/source/Symbol/UnwindPlan.cpp index b5a9aa2094f54..a06e7cfd7f544 100644 --- a/lldb/source/Symbol/UnwindPlan.cpp +++ b/lldb/source/Symbol/UnwindPlan.cpp @@ -354,6 +354,17 @@ bool UnwindPlan::Row::SetRegisterLocationToSame(uint32_t reg_num, return true; } +bool UnwindPlan::Row::SetRegisterLocationToIsDWARFExpression( + uint32_t reg_num, const uint8_t *opcodes, uint32_t len, bool can_replace) { + if (!can_replace && + m_register_locations.find(reg_num) != m_register_locations.end()) + return false; + AbstractRegisterLocation reg_loc; + reg_loc.SetIsDWARFExpression(opcodes, len); + m_register_locations[reg_num] = reg_loc; + return true; +} + bool UnwindPlan::Row::SetRegisterLocationToIsConstant(uint32_t reg_num, uint64_t constant, bool can_replace) { diff --git a/lldb/source/Target/Target.cpp b/lldb/source/Target/Target.cpp index 29e9efb83efeb..6123e5b9c2090 100644 --- a/lldb/source/Target/Target.cpp +++ b/lldb/source/Target/Target.cpp @@ -3617,7 +3617,7 @@ void Target::FinalizeFileActions(ProcessLaunchInfo &info) { if (info.GetFileActionForFD(STDERR_FILENO) == nullptr) err_file_spec = GetStandardErrorPath(); - LLDB_LOG(log, "target stdin='{0}', target stdout='{1}', stderr='{1}'", + LLDB_LOG(log, "target stdin='{0}', target stdout='{1}', stderr='{2}'", in_file_spec, out_file_spec, err_file_spec); if (in_file_spec) { diff --git a/lldb/source/Target/TargetProperties.td b/lldb/source/Target/TargetProperties.td index 0f68deb543f90..fb61478fb752d 100644 --- a/lldb/source/Target/TargetProperties.td +++ b/lldb/source/Target/TargetProperties.td @@ -235,7 +235,7 @@ let Definition = "process" in { def DisableLangRuntimeUnwindPlans: Property<"disable-language-runtime-unwindplans", "Boolean">, Global, DefaultFalse, - Desc<"If true, language runtime augmented/overidden backtraces will not be used when printing a stack trace.">; + Desc<"If true, language runtime augmented/overridden backtraces will not be used when printing a stack trace.">; def DetachKeepsStopped: Property<"detach-keeps-stopped", "Boolean">, Global, DefaultFalse, diff --git a/lldb/test/API/commands/command/script/add/TestAddParsedCommand.py b/lldb/test/API/commands/command/script/add/TestAddParsedCommand.py index c7680e9bb7f41..6fac1eba919bc 100644 --- a/lldb/test/API/commands/command/script/add/TestAddParsedCommand.py +++ b/lldb/test/API/commands/command/script/add/TestAddParsedCommand.py @@ -68,6 +68,57 @@ def run_one_repeat(self, commands, expected_num_errors): return results + def handle_completion( + self, + cmd_str, + exp_num_completions, + exp_matches, + exp_descriptions, + match_description, + ): + matches = lldb.SBStringList() + descriptions = lldb.SBStringList() + + interp = self.dbg.GetCommandInterpreter() + num_completions = interp.HandleCompletionWithDescriptions( + cmd_str, len(cmd_str), 0, 1000, matches, descriptions + ) + self.assertEqual( + num_completions, exp_num_completions, "Number of completions is right." + ) + num_matches = matches.GetSize() + self.assertEqual( + num_matches, + exp_matches.GetSize(), + "matches and expected matches of different lengths", + ) + num_descriptions = descriptions.GetSize() + if match_description: + self.assertEqual( + num_descriptions, + exp_descriptions.GetSize(), + "descriptions and expected of different lengths", + ) + + self.assertEqual( + matches.GetSize(), + num_completions + 1, + "The first element is the complete additional text", + ) + + for idx in range(0, num_matches): + match = matches.GetStringAtIndex(idx) + exp_match = exp_matches.GetStringAtIndex(idx) + self.assertEqual( + match, exp_match, f"{match} did not match expectation: {exp_match}" + ) + if match_description: + desc = descriptions.GetStringAtIndex(idx) + exp_desc = exp_descriptions.GetStringAtIndex(idx) + self.assertEqual( + desc, exp_desc, f"{desc} didn't match expectation: {exp_desc}" + ) + def pycmd_tests(self): source_dir = self.getSourceDir() test_file_path = os.path.join(source_dir, "test_commands.py") @@ -176,24 +227,10 @@ def cleanup(): descriptions = lldb.SBStringList() # First try an enum completion: - num_completions = interp.HandleCompletionWithDescriptions( - "no-args -e f", 12, 0, 1000, matches, descriptions - ) - self.assertEqual(num_completions, 1, "Only one completion for foo") - self.assertEqual( - matches.GetSize(), 2, "The first element is the complete additional text" - ) - self.assertEqual( - matches.GetStringAtIndex(0), "oo ", "And we got the right extra characters" - ) - self.assertEqual( - matches.GetStringAtIndex(1), "foo", "And we got the right match" - ) - self.assertEqual( - descriptions.GetSize(), 2, "descriptions matche the return length" - ) - # FIXME: we don't return descriptions for enum elements - # self.assertEqual(descriptions.GetStringAtIndex(1), "does foo things", "And we got the right description") + # Note - this is an enum so all the values are returned: + matches.AppendList(["oo ", "foo"], 2) + + self.handle_completion("no-args -e f", 1, matches, descriptions, False) # Now try an internal completer, the on disk file one is handy: partial_name = os.path.join(source_dir, "test_") @@ -201,24 +238,9 @@ def cleanup(): matches.Clear() descriptions.Clear() - num_completions = interp.HandleCompletionWithDescriptions( - cmd_str, len(cmd_str) - 1, 0, 1000, matches, descriptions - ) - self.assertEqual(num_completions, 1, "Only one completion for source file") - self.assertEqual(matches.GetSize(), 2, "The first element is the complete line") - self.assertEqual( - matches.GetStringAtIndex(0), - "commands.py' ", - "And we got the right extra characters", - ) - self.assertEqual( - matches.GetStringAtIndex(1), test_file_path, "And we got the right match" - ) - self.assertEqual( - descriptions.GetSize(), 2, "descriptions match the return length" - ) - # FIXME: we don't return descriptions for enum elements - # self.assertEqual(descriptions.GetStringAtIndex(1), "does foo things", "And we got the right description") + matches.AppendList(["commands.py' ", test_file_path], 2) + # We don't have descriptions for the file path completer: + self.handle_completion(cmd_str, 1, matches, descriptions, False) # Try a command with arguments. # FIXME: It should be enough to define an argument and it's type to get the completer @@ -231,6 +253,44 @@ def cleanup(): substrs=["0: First Argument", "1: Second Argument"], ) + # Now test custom completions - two-args has both option and arg completers. In both + # completers we return different values if the -p option is set, so we can test that too: + matches.Clear() + descriptions.Clear() + cmd_str = "two-args -p something -c other_" + matches.AppendString("something ") + matches.AppendString("other_something") + # This is a full match so no descriptions: + self.handle_completion(cmd_str, 1, matches, descriptions, False) + + matches.Clear() + descriptions.Clear() + cmd_str = "two-args -c other_" + matches.AppendList(["", "other_nice", "other_not_nice", "other_mediocre"], 4) + # The option doesn't return descriptions either: + self.handle_completion(cmd_str, 3, matches, descriptions, False) + + # Now try the argument - it says "no completions" if the proc_name was set: + matches.Clear() + descriptions.Clear() + cmd_str = "two-args -p something arg" + matches.AppendString("") + self.handle_completion(cmd_str, 0, matches, descriptions, False) + + cmd_str = "two-args arg_" + matches.Clear() + descriptions.Clear() + matches.AppendList(["", "arg_cool", "arg_yuck"], 3) + descriptions.AppendList(["", "good idea", "bad idea"], 3) + self.handle_completion(cmd_str, 2, matches, descriptions, True) + + # This one gets a single unique match: + cmd_str = "two-args correct_" + matches.Clear() + descriptions.Clear() + matches.AppendList(["answer ", "correct_answer"], 2) + self.handle_completion(cmd_str, 1, matches, descriptions, False) + # Now make sure get_repeat_command works properly: # no-args turns off auto-repeat diff --git a/lldb/test/API/commands/command/script/add/test_commands.py b/lldb/test/API/commands/command/script/add/test_commands.py index fcde6cd3ef6dc..b15ea935c0586 100644 --- a/lldb/test/API/commands/command/script/add/test_commands.py +++ b/lldb/test/API/commands/command/script/add/test_commands.py @@ -18,7 +18,7 @@ def __call__(self, debugger, args_array, exe_ctx, result): for long_option, elem in opt_def.items(): dest = elem["dest"] result.AppendMessage( - f"{long_option} (set: {elem['_value_set']}): {object.__getattribute__(self.ov_parser, dest)}\n" + f"{long_option} (set: {elem['_value_set']}): {object.__getattribute__(self.get_parser(), dest)}\n" ) else: result.AppendMessage("No options\n") @@ -31,7 +31,6 @@ def __call__(self, debugger, args_array, exe_ctx, result): f"{idx}: {args_array.GetItemAtIndex(idx).GetStringValue(10000)}\n" ) - # Use these to make sure that get_repeat_command sends the right # command. no_args_repeat = None @@ -49,7 +48,8 @@ def register_lldb_command(cls, debugger, module_name): ParsedCommand.do_register_cmd(cls, debugger, module_name) def setup_command_definition(self): - self.ov_parser.add_option( + ov_parser = self.get_parser() + ov_parser.add_option( "b", "bool-arg", "a boolean arg, defaults to True", @@ -59,7 +59,7 @@ def setup_command_definition(self): default=True, ) - self.ov_parser.add_option( + ov_parser.add_option( "s", "shlib-name", "A shared library name.", @@ -69,7 +69,7 @@ def setup_command_definition(self): default=None, ) - self.ov_parser.add_option( + ov_parser.add_option( "d", "disk-file-name", "An on disk filename", @@ -78,7 +78,7 @@ def setup_command_definition(self): default=None, ) - self.ov_parser.add_option( + ov_parser.add_option( "l", "line-num", "A line number", @@ -88,7 +88,7 @@ def setup_command_definition(self): default=0, ) - self.ov_parser.add_option( + ov_parser.add_option( "e", "enum-option", "An enum, doesn't actually do anything", @@ -126,8 +126,9 @@ def register_lldb_command(cls, debugger, module_name): ParsedCommand.do_register_cmd(cls, debugger, module_name) def setup_command_definition(self): - self.ov_parser.add_argument_set( - [self.ov_parser.make_argument_element(lldb.eArgTypeSourceFile, "plain")] + ov_parser = self.get_parser() + ov_parser.add_argument_set( + [ov_parser.make_argument_element(lldb.eArgTypeSourceFile, "plain")] ) def get_repeat_command(self, command): @@ -154,7 +155,8 @@ def register_lldb_command(cls, debugger, module_name): ParsedCommand.do_register_cmd(cls, debugger, module_name) def setup_command_definition(self): - self.ov_parser.add_option( + ov_parser = self.get_parser() + ov_parser.add_option( "l", "language", "language defaults to None", @@ -164,7 +166,7 @@ def setup_command_definition(self): default=None, ) - self.ov_parser.add_option( + ov_parser.add_option( "c", "log-channel", "log channel - defaults to lldb", @@ -174,7 +176,7 @@ def setup_command_definition(self): default="lldb", ) - self.ov_parser.add_option( + ov_parser.add_option( "p", "process-name", "A process name, defaults to None", @@ -183,25 +185,23 @@ def setup_command_definition(self): default=None, ) - self.ov_parser.add_argument_set( + ov_parser.add_argument_set( [ - self.ov_parser.make_argument_element( + ov_parser.make_argument_element( lldb.eArgTypeClassName, "plain", [1, 2] ), - self.ov_parser.make_argument_element( + ov_parser.make_argument_element( lldb.eArgTypeOffset, "optional", [1, 2] ), ] ) - self.ov_parser.add_argument_set( + ov_parser.add_argument_set( [ - self.ov_parser.make_argument_element( + ov_parser.make_argument_element( lldb.eArgTypePythonClass, "plain", [3, 4] ), - self.ov_parser.make_argument_element( - lldb.eArgTypePid, "optional", [3, 4] - ), + ov_parser.make_argument_element(lldb.eArgTypePid, "optional", [3, 4]), ] ) @@ -210,6 +210,35 @@ def get_repeat_command(self, command): two_arg_repeat = command return command + " THIRD_ARG" + def handle_option_argument_completion(self, long_option, cursor_pos): + ov_parser = self.get_parser() + value = ov_parser.dest_for_option(long_option)[0 : cursor_pos + 1] + proc_value = ov_parser.proc_name + if proc_value != None: + new_str = value + proc_value + ret_arr = {"completion": new_str, "mode": "partial"} + return ret_arr + + ret_arr = {"values": [value + "nice", value + "not_nice", value + "mediocre"]} + return ret_arr + + def handle_argument_completion(self, args, arg_pos, cursor_pos): + ov_parser = self.get_parser() + orig_arg = args[arg_pos][0:cursor_pos] + if orig_arg == "correct_": + ret_arr = {"completion": "correct_answer"} + return ret_arr + + if ov_parser.was_set("process-name"): + # No completions if proc_name was set. + return True + + ret_arr = { + "values": [orig_arg + "cool", orig_arg + "yuck"], + "descriptions": ["good idea", "bad idea"], + } + return ret_arr + def get_short_help(self): return "This is my short help string" diff --git a/lldb/test/API/functionalities/archives/Makefile b/lldb/test/API/functionalities/archives/Makefile index c4c593e6db051..4b9696e26b575 100644 --- a/lldb/test/API/functionalities/archives/Makefile +++ b/lldb/test/API/functionalities/archives/Makefile @@ -12,12 +12,10 @@ libfoo.a: a.o b.o # This tests whether lldb can load a thin archive libbar.a: c.o - $(eval LLVM_AR := $(LLVM_TOOLS_DIR)/llvm-ar) $(eval LLVM_ARFLAGS := -rcsDT) $(LLVM_AR) $(LLVM_ARFLAGS) $@ $^ libfoo-thin.a: a.o b.o - $(eval LLVM_AR := $(LLVM_TOOLS_DIR)/llvm-ar) $(eval LLVM_ARFLAGS := -rcsUT) $(LLVM_AR) $(LLVM_ARFLAGS) $@ $^ diff --git a/lldb/test/API/functionalities/fork/concurrent_vfork/TestConcurrentVFork.py b/lldb/test/API/functionalities/fork/concurrent_vfork/TestConcurrentVFork.py index dd9500c186b2c..3b5efb834b162 100644 --- a/lldb/test/API/functionalities/fork/concurrent_vfork/TestConcurrentVFork.py +++ b/lldb/test/API/functionalities/fork/concurrent_vfork/TestConcurrentVFork.py @@ -49,7 +49,7 @@ def follow_child_helper(self, use_fork, call_exec): @skipUnlessPlatform(["linux"]) # https://github.com/llvm/llvm-project/issues/85084. - @skipIf(oslist=["linux"], archs=["aarch64", "arm"]) + @skipIf(oslist=["linux"]) def test_follow_parent_vfork_no_exec(self): """ Make sure that debugging concurrent vfork() from multiple threads won't crash lldb during follow-parent. @@ -59,7 +59,7 @@ def test_follow_parent_vfork_no_exec(self): @skipUnlessPlatform(["linux"]) # https://github.com/llvm/llvm-project/issues/85084. - @skipIf(oslist=["linux"], archs=["aarch64", "arm"]) + @skipIf(oslist=["linux"]) def test_follow_parent_fork_no_exec(self): """ Make sure that debugging concurrent fork() from multiple threads won't crash lldb during follow-parent. @@ -69,7 +69,7 @@ def test_follow_parent_fork_no_exec(self): @skipUnlessPlatform(["linux"]) # https://github.com/llvm/llvm-project/issues/85084. - @skipIf(oslist=["linux"], archs=["aarch64", "arm"]) + @skipIf(oslist=["linux"]) def test_follow_parent_vfork_call_exec(self): """ Make sure that debugging concurrent vfork() from multiple threads won't crash lldb during follow-parent. @@ -79,7 +79,7 @@ def test_follow_parent_vfork_call_exec(self): @skipUnlessPlatform(["linux"]) # https://github.com/llvm/llvm-project/issues/85084. - @skipIf(oslist=["linux"], archs=["aarch64", "arm"]) + @skipIf(oslist=["linux"]) def test_follow_parent_fork_call_exec(self): """ Make sure that debugging concurrent vfork() from multiple threads won't crash lldb during follow-parent. @@ -89,7 +89,7 @@ def test_follow_parent_fork_call_exec(self): @skipUnlessPlatform(["linux"]) # https://github.com/llvm/llvm-project/issues/85084. - @skipIf(oslist=["linux"], archs=["aarch64", "arm"]) + @skipIf(oslist=["linux"]) def test_follow_child_vfork_no_exec(self): """ Make sure that debugging concurrent vfork() from multiple threads won't crash lldb during follow-child. @@ -99,7 +99,7 @@ def test_follow_child_vfork_no_exec(self): @skipUnlessPlatform(["linux"]) # https://github.com/llvm/llvm-project/issues/85084. - @skipIf(oslist=["linux"], archs=["aarch64", "arm"]) + @skipIf(oslist=["linux"]) def test_follow_child_fork_no_exec(self): """ Make sure that debugging concurrent fork() from multiple threads won't crash lldb during follow-child. @@ -109,7 +109,7 @@ def test_follow_child_fork_no_exec(self): @skipUnlessPlatform(["linux"]) # https://github.com/llvm/llvm-project/issues/85084. - @skipIf(oslist=["linux"], archs=["aarch64", "arm"]) + @skipIf(oslist=["linux"]) def test_follow_child_vfork_call_exec(self): """ Make sure that debugging concurrent vfork() from multiple threads won't crash lldb during follow-child. @@ -119,7 +119,7 @@ def test_follow_child_vfork_call_exec(self): @skipUnlessPlatform(["linux"]) # https://github.com/llvm/llvm-project/issues/85084. - @skipIf(oslist=["linux"], archs=["aarch64", "arm"]) + @skipIf(oslist=["linux"]) def test_follow_child_fork_call_exec(self): """ Make sure that debugging concurrent fork() from multiple threads won't crash lldb during follow-child. diff --git a/lldb/test/API/linux/aarch64/fpmr/Makefile b/lldb/test/API/linux/aarch64/fpmr/Makefile new file mode 100644 index 0000000000000..10495940055b6 --- /dev/null +++ b/lldb/test/API/linux/aarch64/fpmr/Makefile @@ -0,0 +1,3 @@ +C_SOURCES := main.c + +include Makefile.rules diff --git a/lldb/test/API/linux/aarch64/fpmr/TestAArch64LinuxFPMR.py b/lldb/test/API/linux/aarch64/fpmr/TestAArch64LinuxFPMR.py new file mode 100644 index 0000000000000..5a3b8f501095e --- /dev/null +++ b/lldb/test/API/linux/aarch64/fpmr/TestAArch64LinuxFPMR.py @@ -0,0 +1,58 @@ +""" +Test lldb's ability to read and write the AArch64 FPMR register. +""" + +import lldb +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil + + +class AArch64LinuxFPMR(TestBase): + NO_DEBUG_INFO_TESTCASE = True + + @skipUnlessArch("aarch64") + @skipUnlessPlatform(["linux"]) + def test_fpmr_register(self): + if not self.isAArch64FPMR(): + self.skipTest("FPMR must be present.") + + self.build() + self.runCmd("file " + self.getBuildArtifact("a.out"), CURRENT_EXECUTABLE_SET) + + lldbutil.run_break_set_by_file_and_line( + self, + "main.c", + line_number("main.c", "// Set break point at this line."), + num_expected_locations=1, + ) + + self.runCmd("run", RUN_SUCCEEDED) + + if self.process().GetState() == lldb.eStateExited: + self.fail("Test program failed to run.") + + self.expect( + "thread list", + STOPPED_DUE_TO_BREAKPOINT, + substrs=["stopped", "stop reason = breakpoint"], + ) + + # This has been set by the program. + expected_fpmr = (0b101010 << 32) | 0b101 + self.expect( + "register read --all", + substrs=["Floating Point Mode Register", f"fpmr = {expected_fpmr:#018x}"], + ) + + # Write a value for the program to find. Same fields but with bit values + # inverted. + new_fpmr = (0b010101 << 32) | 0b010 + self.runCmd(f"register write fpmr {new_fpmr:#x}") + + # This value should be saved and restored after expressions. + self.runCmd("p expr_func()") + self.expect("register read fpmr", substrs=[f"fpmr = {new_fpmr:#018x}"]) + + # 0 means the program found the new value in the sysreg as expected. + self.expect("continue", substrs=["exited with status = 0"]) diff --git a/lldb/test/API/linux/aarch64/fpmr/main.c b/lldb/test/API/linux/aarch64/fpmr/main.c new file mode 100644 index 0000000000000..bdb7d8f40b64d --- /dev/null +++ b/lldb/test/API/linux/aarch64/fpmr/main.c @@ -0,0 +1,41 @@ +#include +#include +#include + +#ifndef HWCAP2_FPMR +#define HWCAP2_FPMR (1UL << 48) +#endif + +uint64_t get_fpmr(void) { + uint64_t fpmr = 0; + __asm__ volatile("mrs %0, s3_3_c4_c4_2" : "=r"(fpmr)); + return fpmr; +} + +void set_fpmr(uint64_t value) { + __asm__ volatile("msr s3_3_c4_c4_2, %0" ::"r"(value)); +} + +// Set F8S1 (bits 0-2) and LSCALE2 (bits 37-32) (to prove we treat fpmr as 64 +// bit). +const uint64_t original_fpmr = (uint64_t)0b101010 << 32 | (uint64_t)0b101; + +void expr_func() { set_fpmr(original_fpmr); } + +int main(int argc, char *argv[]) { + if (!(getauxval(AT_HWCAP2) & HWCAP2_FPMR)) + return 1; + + // As FPMR controls a bunch of floating point options that are quite + // extensive, we're not going to run any floating point ops here. Instead just + // update the value from the debugger and check it from this program, and vice + // versa. + set_fpmr(original_fpmr); + + // Here the debugger checks it read back the value above, then writes in a new + // value. Note that the bits are flipped in the new value. + uint64_t new_fpmr = get_fpmr(); // Set break point at this line. + uint64_t expected_fpmr = ((uint64_t)0b010101 << 32) | (uint64_t)0b010; + + return new_fpmr == expected_fpmr ? 0 : 1; +} diff --git a/lldb/test/API/python_api/target/TestTargetAPI.py b/lldb/test/API/python_api/target/TestTargetAPI.py index 2e8d6a5b1e53f..155a25b576b03 100644 --- a/lldb/test/API/python_api/target/TestTargetAPI.py +++ b/lldb/test/API/python_api/target/TestTargetAPI.py @@ -153,6 +153,11 @@ def test_read_memory(self): self.assertSuccess(error, "Make sure memory read succeeded") self.assertEqual(len(content), 1) + # Make sure reading from 0x0 fails + sb_addr = lldb.SBAddress(0, target) + self.assertIsNone(target.ReadMemory(sb_addr, 1, error)) + self.assertTrue(error.Fail()) + @skipIfWindows # stdio manipulation unsupported on Windows @skipIfRemote # stdio manipulation unsupported on remote iOS devices @skipIf(oslist=["linux"], archs=["arm", "aarch64"]) diff --git a/lldb/test/API/tools/lldb-dap/memory/TestDAP_memory.py b/lldb/test/API/tools/lldb-dap/memory/TestDAP_memory.py index 1082541aebcf7..ea43fccf016a7 100644 --- a/lldb/test/API/tools/lldb-dap/memory/TestDAP_memory.py +++ b/lldb/test/API/tools/lldb-dap/memory/TestDAP_memory.py @@ -93,15 +93,18 @@ def test_readMemory(self): # We can read the complete string mem = self.dap_server.request_readMemory(memref, 0, 5)["body"] - self.assertEqual(mem["unreadableBytes"], 0) self.assertEqual(b64decode(mem["data"]), b"dead\0") + # We can read large chunks, potentially returning partial results + mem = self.dap_server.request_readMemory(memref, 0, 4096)["body"] + self.assertEqual(b64decode(mem["data"])[0:5], b"dead\0") + # Use an offset mem = self.dap_server.request_readMemory(memref, 2, 3)["body"] self.assertEqual(b64decode(mem["data"]), b"ad\0") # Reads of size 0 are successful - # VS-Code sends those in order to check if a `memoryReference` can actually be dereferenced. + # VS Code sends those in order to check if a `memoryReference` can actually be dereferenced. mem = self.dap_server.request_readMemory(memref, 0, 0) self.assertEqual(mem["success"], True) self.assertEqual(mem["body"]["data"], "") @@ -109,4 +112,3 @@ def test_readMemory(self): # Reads at offset 0x0 fail mem = self.dap_server.request_readMemory("0x0", 0, 6) self.assertEqual(mem["success"], False) - self.assertEqual(mem["message"], "Memory region is not readable") diff --git a/lldb/tools/lldb-dap/lldb-dap.cpp b/lldb/tools/lldb-dap/lldb-dap.cpp index c7653fed2def4..db4dbbd6f6200 100644 --- a/lldb/tools/lldb-dap/lldb-dap.cpp +++ b/lldb/tools/lldb-dap/lldb-dap.cpp @@ -4422,14 +4422,6 @@ void request_readMemory(const llvm::json::Object &request) { FillResponse(request, response); auto *arguments = request.getObject("arguments"); - lldb::SBProcess process = g_dap.target.GetProcess(); - if (!process.IsValid()) { - response["success"] = false; - response["message"] = "No process running"; - g_dap.SendJSON(llvm::json::Value(std::move(response))); - return; - } - llvm::StringRef memoryReference = GetString(arguments, "memoryReference"); auto addr_opt = DecodeMemoryReference(memoryReference); if (!addr_opt.has_value()) { @@ -4439,57 +4431,32 @@ void request_readMemory(const llvm::json::Object &request) { g_dap.SendJSON(llvm::json::Value(std::move(response))); return; } - lldb::addr_t addr = *addr_opt; - - addr += GetSigned(arguments, "offset", 0); - const uint64_t requested_count = GetUnsigned(arguments, "count", 0); - lldb::SBMemoryRegionInfo region_info; - lldb::SBError memreg_error = process.GetMemoryRegionInfo(addr, region_info); - if (memreg_error.Fail()) { - response["success"] = false; - EmplaceSafeString(response, "message", - "Unable to find memory region: " + - std::string(memreg_error.GetCString())); - g_dap.SendJSON(llvm::json::Value(std::move(response))); - return; - } - if (!region_info.IsReadable()) { + lldb::addr_t addr_int = *addr_opt; + addr_int += GetSigned(arguments, "offset", 0); + const uint64_t count_requested = GetUnsigned(arguments, "count", 0); + + // We also need support reading 0 bytes + // VS Code sends those requests to check if a `memoryReference` + // can be dereferenced. + const uint64_t count_read = std::max(count_requested, 1); + std::vector buf; + buf.resize(count_read); + lldb::SBError error; + lldb::SBAddress addr{addr_int, g_dap.target}; + size_t count_result = + g_dap.target.ReadMemory(addr, buf.data(), count_read, error); + if (count_result == 0) { response["success"] = false; - response.try_emplace("message", "Memory region is not readable"); + EmplaceSafeString(response, "message", error.GetCString()); g_dap.SendJSON(llvm::json::Value(std::move(response))); return; } - const uint64_t available_count = - std::min(requested_count, region_info.GetRegionEnd() - addr); - const uint64_t unavailable_count = requested_count - available_count; - - std::vector buf; - buf.resize(available_count); - if (available_count > 0) { - lldb::SBError memread_error; - uint64_t bytes_read = - process.ReadMemory(addr, buf.data(), available_count, memread_error); - if (memread_error.Fail()) { - response["success"] = false; - EmplaceSafeString(response, "message", - "Unable to read memory: " + - std::string(memread_error.GetCString())); - g_dap.SendJSON(llvm::json::Value(std::move(response))); - return; - } - if (bytes_read != available_count) { - response["success"] = false; - EmplaceSafeString(response, "message", "Unexpected, short read"); - g_dap.SendJSON(llvm::json::Value(std::move(response))); - return; - } - } + buf.resize(std::min(count_result, count_requested)); llvm::json::Object body; - std::string formatted_addr = "0x" + llvm::utohexstr(addr); + std::string formatted_addr = "0x" + llvm::utohexstr(addr_int); body.try_emplace("address", formatted_addr); body.try_emplace("data", llvm::encodeBase64(buf)); - body.try_emplace("unreadableBytes", unavailable_count); response.try_emplace("body", std::move(body)); g_dap.SendJSON(llvm::json::Value(std::move(response))); } diff --git a/lldb/unittests/ScriptInterpreter/Python/PythonTestSuite.cpp b/lldb/unittests/ScriptInterpreter/Python/PythonTestSuite.cpp index c67a2b4bf46e6..3faeb587c3a91 100644 --- a/lldb/unittests/ScriptInterpreter/Python/PythonTestSuite.cpp +++ b/lldb/unittests/ScriptInterpreter/Python/PythonTestSuite.cpp @@ -211,6 +211,19 @@ LLDBSwigPythonGetRepeatCommandForScriptedCommand(PyObject *implementor, return std::nullopt; } +StructuredData::DictionarySP +LLDBSwigPythonHandleArgumentCompletionForScriptedCommand( + PyObject *implementor, std::vector &args, size_t args_pos, + size_t pos_in_arg) { + return {}; +} + +StructuredData::DictionarySP +LLDBSwigPythonHandleOptionArgumentCompletionForScriptedCommand( + PyObject *implementor, llvm::StringRef &long_options, size_t char_in_arg) { + return {}; +} + bool lldb_private::python::SWIGBridge::LLDBSwigPythonCallModuleInit( const char *python_module_name, const char *session_dictionary_name, lldb::DebuggerSP debugger) { diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt index c637febce1c1f..330db65e85cab 100644 --- a/llvm/CMakeLists.txt +++ b/llvm/CMakeLists.txt @@ -161,6 +161,12 @@ foreach(proj IN LISTS LLVM_ENABLE_RUNTIMES) endif() endforeach() +foreach(proj IN LISTS LLVM_ENABLE_RUNTIMES) + if ("${proj}" IN_LIST LLVM_ENABLE_PROJECTS) + message(FATAL_ERROR "Runtime project \"${proj}\" found in LLVM_ENABLE_PROJECTS and LLVM_ENABLE_RUNTIMES. It must only appear in one of them and that one should almost always be LLVM_ENABLE_RUNTIMES.") + endif() +endforeach() + # Set a shorthand option to enable the GPU build of the 'libc' project. option(LIBC_GPU_BUILD "Enable the 'libc' project targeting the GPU" OFF) if(LIBC_GPU_BUILD) diff --git a/llvm/cmake/modules/CMakeLists.txt b/llvm/cmake/modules/CMakeLists.txt index d99af79aa38e0..ef4cfa3acdb59 100644 --- a/llvm/cmake/modules/CMakeLists.txt +++ b/llvm/cmake/modules/CMakeLists.txt @@ -36,6 +36,9 @@ endif() if(omp_gen IN_LIST LLVM_COMMON_DEPENDS) list(REMOVE_ITEM LLVM_COMMON_DEPENDS omp_gen) endif() +if(vt_gen IN_LIST LLVM_COMMON_DEPENDS) + list(REMOVE_ITEM LLVM_COMMON_DEPENDS vt_gen) +endif() # # Generate LLVMConfig.cmake for the build tree. diff --git a/llvm/cmake/modules/LLVMConfig.cmake.in b/llvm/cmake/modules/LLVMConfig.cmake.in index 7e1501a89354c..c49f10b9343ff 100644 --- a/llvm/cmake/modules/LLVMConfig.cmake.in +++ b/llvm/cmake/modules/LLVMConfig.cmake.in @@ -151,6 +151,9 @@ endif() if(NOT TARGET intrinsics_gen) add_custom_target(intrinsics_gen) endif() +if(NOT TARGET vt_gen) + add_custom_target(vt_gen) +endif() if(NOT TARGET omp_gen) add_custom_target(omp_gen) endif() diff --git a/llvm/docs/CMake.rst b/llvm/docs/CMake.rst index b5adb22d8f33b..321bae48594cf 100644 --- a/llvm/docs/CMake.rst +++ b/llvm/docs/CMake.rst @@ -577,7 +577,12 @@ enabled sub-projects. Nearly all of these variable names begin with The full list is: - ``clang;clang-tools-extra;cross-project-tests;libc;libclc;lld;lldb;openmp;polly;pstl`` + ``bolt;clang;clang-tools-extra;compiler-rt;cross-project-tests;libc;libclc;lld;lldb;mlir;openmp;polly;pstl`` + + .. note:: + Some projects listed here can also go in ``LLVM_ENABLE_RUNTIMES``. They + should only appear in one of the two lists. If a project is a valid possiblity + for both, prefer putting it in ``LLVM_ENABLE_RUNTIMES``. **LLVM_ENABLE_RTTI**:BOOL Build LLVM with run-time type information. Defaults to OFF. @@ -594,7 +599,7 @@ enabled sub-projects. Nearly all of these variable names begin with The full list is: - ``compiler-rt;libc;libcxx;libcxxabi;libunwind;openmp`` + ``libc;libunwind;libcxxabi;pstl;libcxx;compiler-rt;openmp;llvm-libgcc;offload`` To enable all of them, use: diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 91c3e60bb0acb..3b905c2788128 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -15583,6 +15583,43 @@ trapping or setting ``errno``. When specified with the fast-math-flag 'afn', the result may be approximated using a less accurate calculation. +'``llvm.atan2.*``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +This is an overloaded intrinsic. You can use ``llvm.atan2`` on any +floating-point or vector of floating-point type. Not all targets support +all types however. + +:: + + declare float @llvm.atan2.f32(float %X, float %Y) + declare double @llvm.atan2.f64(double %X, double %Y) + declare x86_fp80 @llvm.atan2.f80(x86_fp80 %X, x86_fp80 %Y) + declare fp128 @llvm.atan2.f128(fp128 %X, fp128 %Y) + declare ppc_fp128 @llvm.atan2.ppcf128(ppc_fp128 %X, ppc_fp128 %Y) + +Overview: +""""""""" + +The '``llvm.atan2.*``' intrinsics return the arctangent of the operand. + +Arguments: +"""""""""" + +The arguments and return value are floating-point numbers of the same type. + +Semantics: +"""""""""" + +Return the same value as a corresponding libm '``atan2``' function but without +trapping or setting ``errno``. + +When specified with the fast-math-flag 'afn', the result may be approximated +using a less accurate calculation. + '``llvm.sinh.*``' Intrinsic ^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/llvm/docs/RISCVUsage.rst b/llvm/docs/RISCVUsage.rst index f424004857454..d22f642865bb3 100644 --- a/llvm/docs/RISCVUsage.rst +++ b/llvm/docs/RISCVUsage.rst @@ -154,6 +154,7 @@ on support follow. ``Za64rs`` Supported (`See note <#riscv-profiles-extensions-note>`__) ``Zaamo`` Assembly Support ``Zabha`` Supported + ``Zacas`` Supported (`See note <#riscv-zacas-note>`__) ``Zalrsc`` Assembly Support ``Zama16b`` Supported (`See note <#riscv-profiles-extensions-note>`__) ``Zawrs`` Assembly Support @@ -287,6 +288,11 @@ Supported ``Za128rs``, ``Za64rs``, ``Zama16b``, ``Zic64b``, ``Ziccamoa``, ``Ziccif``, ``Zicclsm``, ``Ziccrse``, ``Shcounterenvw``, ``Shgatpa``, ``Shtvala``, ``Shvsatpa``, ``Shvstvala``, ``Shvstvecd``, ``Ssccptr``, ``Sscounterenw``, ``Ssstateen``, ``Ssstrict``, ``Sstvala``, ``Sstvecd``, ``Ssu64xl``, ``Svade``, ``Svbare`` These extensions are defined as part of the `RISC-V Profiles specification `__. They do not introduce any new features themselves, but instead describe existing hardware features. +.. _riscv-zacas-note: + +``Zacas`` + The compiler will not generate amocas.d on RV32 or amocas.q on RV64 due to ABI compatibilty. These can only be used in the assembler. + Atomics ABIs ============ @@ -304,9 +310,6 @@ The primary goal of experimental support is to assist in the process of ratifica ``experimental-ssnpm``, ``experimental-smnpm``, ``experimental-smmpm``, ``experimental-sspm``, ``experimental-supm`` LLVM implements the `v1.0.0-rc2 specification `__. -``experimental-zacas`` - LLVM implements the `1.0 release specification `__. amocas.w will be used for i32 cmpxchg. amocas.d will be used i64 cmpxchg on RV64. The compiler will not generate amocas.d on RV32 or amocas.q on RV64 due to ABI compatibilty. These can only be used in the assembler. The extension will be left as experimental until `an ABI issue `__ is resolved. - ``experimental-zalasr`` LLVM implements the `0.0.5 draft specification `__. diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst index 27d6bc158b3c3..05f5bd65fc5f6 100644 --- a/llvm/docs/ReleaseNotes.rst +++ b/llvm/docs/ReleaseNotes.rst @@ -63,12 +63,6 @@ Changes to the LLVM IR * ``llvm.nvvm.bitcast.d2ll`` * ``llvm.nvvm.bitcast.ll2d`` -* Remove the following intrinsics which can be replaced with a funnel-shift: - - * ``llvm.nvvm.rotate.b32`` - * ``llvm.nvvm.rotate.right.b64`` - * ``llvm.nvvm.rotate.b64`` - Changes to LLVM infrastructure ------------------------------ @@ -137,6 +131,7 @@ Changes to the RISC-V Backend * The ``Zvbc32e`` and ``Zvkgs`` extensions are now supported experimentally. * Added ``Smctr`` and ``Ssctr`` extensions. * ``-mcpu=syntacore-scr7`` was added. +* The ``Zacas`` extension is no longer marked as experimental. Changes to the WebAssembly Backend ---------------------------------- diff --git a/llvm/include/llvm/ADT/APFloat.h b/llvm/include/llvm/ADT/APFloat.h index 7039e961bff82..9cc8369a0bf52 100644 --- a/llvm/include/llvm/ADT/APFloat.h +++ b/llvm/include/llvm/ADT/APFloat.h @@ -1534,6 +1534,11 @@ inline APFloat maximumnum(const APFloat &A, const APFloat &B) { return A < B ? B : A; } +inline raw_ostream &operator<<(raw_ostream &OS, const APFloat &V) { + V.print(OS); + return OS; +} + // We want the following functions to be available in the header for inlining. // We cannot define them inline in the class definition of `DoubleAPFloat` // because doing so would instantiate `std::unique_ptr` before diff --git a/llvm/include/llvm/ADT/SmallSet.h b/llvm/include/llvm/ADT/SmallSet.h index 630c98504261a..56259ea7cf9d0 100644 --- a/llvm/include/llvm/ADT/SmallSet.h +++ b/llvm/include/llvm/ADT/SmallSet.h @@ -16,14 +16,10 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/iterator.h" -#include "llvm/Support/Compiler.h" -#include "llvm/Support/type_traits.h" #include #include #include -#include #include namespace llvm { @@ -139,10 +135,6 @@ class SmallSet { SmallVector Vector; std::set Set; - using VIterator = typename SmallVector::const_iterator; - using SIterator = typename std::set::const_iterator; - using mutable_iterator = typename SmallVector::iterator; - // In small mode SmallPtrSet uses linear search for the elements, so it is // not a good idea to choose this value too high. You may consider using a // DenseSet<> instead if you expect many elements in the set. @@ -163,38 +155,16 @@ class SmallSet { } /// count - Return 1 if the element is in the set, 0 otherwise. - size_type count(const T &V) const { - if (isSmall()) { - // Since the collection is small, just do a linear search. - return vfind(V) == Vector.end() ? 0 : 1; - } - return Set.count(V); - } + size_type count(const T &V) const { return contains(V) ? 1 : 0; } /// insert - Insert an element into the set if it isn't already there. /// Returns a pair. The first value of it is an iterator to the inserted /// element or the existing element in the set. The second value is true /// if the element is inserted (it was not in the set before). - std::pair insert(const T &V) { - if (!isSmall()) { - auto [I, Inserted] = Set.insert(V); - return std::make_pair(const_iterator(I), Inserted); - } + std::pair insert(const T &V) { return insertImpl(V); } - VIterator I = vfind(V); - if (I != Vector.end()) // Don't reinsert if it already exists. - return std::make_pair(const_iterator(I), false); - if (Vector.size() < N) { - Vector.push_back(V); - return std::make_pair(const_iterator(std::prev(Vector.end())), true); - } - - // Otherwise, grow from vector to set. - while (!Vector.empty()) { - Set.insert(Vector.back()); - Vector.pop_back(); - } - return std::make_pair(const_iterator(Set.insert(V).first), true); + std::pair insert(T &&V) { + return insertImpl(std::move(V)); } template @@ -206,11 +176,11 @@ class SmallSet { bool erase(const T &V) { if (!isSmall()) return Set.erase(V); - for (mutable_iterator I = Vector.begin(), E = Vector.end(); I != E; ++I) - if (*I == V) { - Vector.erase(I); - return true; - } + auto I = std::find(Vector.begin(), Vector.end(), V); + if (I != Vector.end()) { + Vector.erase(I); + return true; + } return false; } @@ -234,18 +204,35 @@ class SmallSet { /// Check if the SmallSet contains the given element. bool contains(const T &V) const { if (isSmall()) - return vfind(V) != Vector.end(); + return std::find(Vector.begin(), Vector.end(), V) != Vector.end(); return Set.find(V) != Set.end(); } private: bool isSmall() const { return Set.empty(); } - VIterator vfind(const T &V) const { - for (VIterator I = Vector.begin(), E = Vector.end(); I != E; ++I) - if (*I == V) - return I; - return Vector.end(); + template + std::pair insertImpl(ArgType &&V) { + static_assert(std::is_convertible_v, + "ArgType must be convertible to T!"); + if (!isSmall()) { + auto [I, Inserted] = Set.insert(std::forward(V)); + return {const_iterator(I), Inserted}; + } + + auto I = std::find(Vector.begin(), Vector.end(), V); + if (I != Vector.end()) // Don't reinsert if it already exists. + return {const_iterator(I), false}; + if (Vector.size() < N) { + Vector.push_back(std::forward(V)); + return {const_iterator(std::prev(Vector.end())), true}; + } + + // Otherwise, grow from vector to set. + Set.insert(std::make_move_iterator(Vector.begin()), + std::make_move_iterator(Vector.end())); + Vector.clear(); + return {const_iterator(Set.insert(std::forward(V)).first), true}; } }; diff --git a/llvm/include/llvm/Analysis/CmpInstAnalysis.h b/llvm/include/llvm/Analysis/CmpInstAnalysis.h index 1d07a0c22887b..406dacd930605 100644 --- a/llvm/include/llvm/Analysis/CmpInstAnalysis.h +++ b/llvm/include/llvm/Analysis/CmpInstAnalysis.h @@ -14,6 +14,7 @@ #ifndef LLVM_ANALYSIS_CMPINSTANALYSIS_H #define LLVM_ANALYSIS_CMPINSTANALYSIS_H +#include "llvm/ADT/APInt.h" #include "llvm/IR/InstrTypes.h" namespace llvm { @@ -91,12 +92,18 @@ namespace llvm { Constant *getPredForFCmpCode(unsigned Code, Type *OpTy, CmpInst::Predicate &Pred); - /// Decompose an icmp into the form ((X & Mask) pred 0) if possible. The - /// returned predicate is either == or !=. Returns false if decomposition - /// fails. - bool decomposeBitTestICmp(Value *LHS, Value *RHS, CmpInst::Predicate &Pred, - Value *&X, APInt &Mask, - bool LookThroughTrunc = true); + /// Represents the operation icmp (X & Mask) pred 0, where pred can only be + /// eq or ne. + struct DecomposedBitTest { + Value *X; + CmpInst::Predicate Pred; + APInt Mask; + }; + + /// Decompose an icmp into the form ((X & Mask) pred 0) if possible. + std::optional + decomposeBitTestICmp(Value *LHS, Value *RHS, CmpInst::Predicate Pred, + bool LookThroughTrunc = true); } // end namespace llvm diff --git a/llvm/include/llvm/Analysis/PtrUseVisitor.h b/llvm/include/llvm/Analysis/PtrUseVisitor.h index 237d328721609..bbe2741f44fc3 100644 --- a/llvm/include/llvm/Analysis/PtrUseVisitor.h +++ b/llvm/include/llvm/Analysis/PtrUseVisitor.h @@ -52,57 +52,54 @@ class PtrUseVisitorBase { /// analysis and whether the visit completed or aborted early. class PtrInfo { public: - PtrInfo() : AbortedInfo(nullptr, false), EscapedInfo(nullptr, false) {} - /// Reset the pointer info, clearing all state. void reset() { - AbortedInfo.setPointer(nullptr); - AbortedInfo.setInt(false); - EscapedInfo.setPointer(nullptr); - EscapedInfo.setInt(false); + AbortedInfo = nullptr; + EscapedInfo = nullptr; } /// Did we abort the visit early? - bool isAborted() const { return AbortedInfo.getInt(); } + bool isAborted() const { return AbortedInfo != nullptr; } /// Is the pointer escaped at some point? - bool isEscaped() const { return EscapedInfo.getInt(); } + bool isEscaped() const { return EscapedInfo != nullptr; } /// Get the instruction causing the visit to abort. /// \returns a pointer to the instruction causing the abort if one is /// available; otherwise returns null. - Instruction *getAbortingInst() const { return AbortedInfo.getPointer(); } + Instruction *getAbortingInst() const { return AbortedInfo; } /// Get the instruction causing the pointer to escape. /// \returns a pointer to the instruction which escapes the pointer if one /// is available; otherwise returns null. - Instruction *getEscapingInst() const { return EscapedInfo.getPointer(); } + Instruction *getEscapingInst() const { return EscapedInfo; } /// Mark the visit as aborted. Intended for use in a void return. /// \param I The instruction which caused the visit to abort, if available. - void setAborted(Instruction *I = nullptr) { - AbortedInfo.setInt(true); - AbortedInfo.setPointer(I); + void setAborted(Instruction *I) { + assert(I && "Expected a valid pointer in setAborted"); + AbortedInfo = I; } /// Mark the pointer as escaped. Intended for use in a void return. /// \param I The instruction which escapes the pointer, if available. - void setEscaped(Instruction *I = nullptr) { - EscapedInfo.setInt(true); - EscapedInfo.setPointer(I); + void setEscaped(Instruction *I) { + assert(I && "Expected a valid pointer in setEscaped"); + EscapedInfo = I; } /// Mark the pointer as escaped, and the visit as aborted. Intended /// for use in a void return. /// \param I The instruction which both escapes the pointer and aborts the /// visit, if available. - void setEscapedAndAborted(Instruction *I = nullptr) { + void setEscapedAndAborted(Instruction *I) { setEscaped(I); setAborted(I); } private: - PointerIntPair AbortedInfo, EscapedInfo; + Instruction *AbortedInfo = nullptr; + Instruction *EscapedInfo = nullptr; }; protected: diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h index cd69a8a371b6e..89a85bc8a9086 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -1371,11 +1371,15 @@ class TargetTransformInfo { /// is an existing instruction that holds Opcode, it may be passed in the /// 'I' parameter. The \p VecPred parameter can be used to indicate the select /// is using a compare with the specified predicate as condition. When vector - /// types are passed, \p VecPred must be used for all lanes. + /// types are passed, \p VecPred must be used for all lanes. For a + /// comparison, the two operands are the natural values. For a select, the + /// two operands are the *value* operands, not the condition operand. InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, + OperandValueInfo Op1Info = {OK_AnyValue, OP_None}, + OperandValueInfo Op2Info = {OK_AnyValue, OP_None}, const Instruction *I = nullptr) const; /// \return The expected cost of vector Insert and Extract. @@ -2049,11 +2053,11 @@ class TargetTransformInfo::Concept { virtual InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I = nullptr) = 0; - virtual InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, - Type *CondTy, - CmpInst::Predicate VecPred, - TTI::TargetCostKind CostKind, - const Instruction *I) = 0; + virtual InstructionCost + getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, + CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, + OperandValueInfo Op1Info, OperandValueInfo Op2Info, + const Instruction *I) = 0; virtual InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, @@ -2710,8 +2714,11 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept { InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, + OperandValueInfo Op1Info, + OperandValueInfo Op2Info, const Instruction *I) override { - return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I); + return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, + Op1Info, Op2Info, I); } InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index 79c8bafbc6c0d..eca8818cc25e6 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -666,6 +666,8 @@ class TargetTransformInfoImplBase { InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, + TTI::OperandValueInfo Op1Info, + TTI::OperandValueInfo Op2Info, const Instruction *I) const { return 1; } @@ -1332,19 +1334,23 @@ class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase { match(U, m_LogicalOr()) ? Instruction::Or : Instruction::And, Ty, CostKind, Op1Info, Op2Info, Operands, I); } + const auto Op1Info = TTI::getOperandInfo(Operands[1]); + const auto Op2Info = TTI::getOperandInfo(Operands[2]); Type *CondTy = Operands[0]->getType(); return TargetTTI->getCmpSelInstrCost(Opcode, U->getType(), CondTy, CmpInst::BAD_ICMP_PREDICATE, - CostKind, I); + CostKind, Op1Info, Op2Info, I); } case Instruction::ICmp: case Instruction::FCmp: { + const auto Op1Info = TTI::getOperandInfo(Operands[0]); + const auto Op2Info = TTI::getOperandInfo(Operands[1]); Type *ValTy = Operands[0]->getType(); // TODO: Also handle ICmp/FCmp constant expressions. return TargetTTI->getCmpSelInstrCost(Opcode, ValTy, U->getType(), I ? cast(I)->getPredicate() : CmpInst::BAD_ICMP_PREDICATE, - CostKind, I); + CostKind, Op1Info, Op2Info, I); } case Instruction::InsertElement: { auto *IE = dyn_cast(U); diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index 7198e134a2d26..cb62c86b502c1 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -1222,10 +1222,12 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { return BaseT::getCFInstrCost(Opcode, CostKind, I); } - InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, - CmpInst::Predicate VecPred, - TTI::TargetCostKind CostKind, - const Instruction *I = nullptr) { + InstructionCost getCmpSelInstrCost( + unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, + TTI::TargetCostKind CostKind, + TTI::OperandValueInfo Op1Info = {TTI::OK_AnyValue, TTI::OP_None}, + TTI::OperandValueInfo Op2Info = {TTI::OK_AnyValue, TTI::OP_None}, + const Instruction *I = nullptr) { const TargetLoweringBase *TLI = getTLI(); int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); @@ -1233,7 +1235,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { // TODO: Handle other cost kinds. if (CostKind != TTI::TCK_RecipThroughput) return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, - I); + Op1Info, Op2Info, I); // Selects on vectors are actually vector selects. if (ISD == ISD::SELECT) { @@ -1260,8 +1262,9 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { unsigned Num = cast(ValVTy)->getNumElements(); if (CondTy) CondTy = CondTy->getScalarType(); - InstructionCost Cost = thisT()->getCmpSelInstrCost( - Opcode, ValVTy->getScalarType(), CondTy, VecPred, CostKind, I); + InstructionCost Cost = + thisT()->getCmpSelInstrCost(Opcode, ValVTy->getScalarType(), CondTy, + VecPred, CostKind, Op1Info, Op2Info, I); // Return the cost of multiple scalar invocation plus the cost of // inserting and extracting the values. @@ -2535,7 +2538,19 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { unsigned getNumberOfParts(Type *Tp) { std::pair LT = getTypeLegalizationCost(Tp); - return LT.first.isValid() ? *LT.first.getValue() : 0; + if (!LT.first.isValid()) + return 0; + // Try to find actual number of parts for non-power-of-2 elements as + // ceil(num-of-elements/num-of-subtype-elements). + if (auto *FTp = dyn_cast(Tp); + Tp && LT.second.isFixedLengthVector() && + !has_single_bit(FTp->getNumElements())) { + if (auto *SubTp = dyn_cast_if_present( + EVT(LT.second).getTypeForEVT(Tp->getContext())); + SubTp && SubTp->getElementType() == FTp->getElementType()) + return divideCeil(FTp->getNumElements(), SubTp->getNumElements()); + } + return *LT.first.getValue(); } InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *, diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h index bc83f19dc581f..471a7f70dd546 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h @@ -1090,6 +1090,11 @@ class LegalizationArtifactCombiner { LegalizeActionStep ActionStep = LI.getAction( {TargetOpcode::G_UNMERGE_VALUES, {OpTy, SrcUnmergeSrcTy}}); switch (ActionStep.Action) { + case LegalizeActions::Legal: + if (!OpTy.isVector() || !LI.isLegal({TargetOpcode::G_UNMERGE_VALUES, + {DestTy, SrcUnmergeSrcTy}})) + return false; + break; case LegalizeActions::Lower: case LegalizeActions::Unsupported: break; diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h index 5360850deeffd..ecade6b5caed6 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h @@ -394,6 +394,8 @@ class LegalizerHelper { LegalizeResult lowerRotate(MachineInstr &MI); LegalizeResult lowerU64ToF32BitOps(MachineInstr &MI); + LegalizeResult lowerU64ToF32WithSITOFP(MachineInstr &MI); + LegalizeResult lowerU64ToF64BitFloatOps(MachineInstr &MI); LegalizeResult lowerUITOFP(MachineInstr &MI); LegalizeResult lowerSITOFP(MachineInstr &MI); LegalizeResult lowerFPTOUI(MachineInstr &MI); diff --git a/llvm/include/llvm/CodeGen/MachineFunction.h b/llvm/include/llvm/CodeGen/MachineFunction.h index aeb72ca24d79b..5c1da4fa762e8 100644 --- a/llvm/include/llvm/CodeGen/MachineFunction.h +++ b/llvm/include/llvm/CodeGen/MachineFunction.h @@ -897,13 +897,14 @@ class LLVM_ABI MachineFunction { /// for debugger use. /// \returns true if no problems were found. bool verify(Pass *p = nullptr, const char *Banner = nullptr, - bool AbortOnError = true) const; + raw_ostream *OS = nullptr, bool AbortOnError = true) const; /// Run the current MachineFunction through the machine code verifier, useful /// for debugger use. /// \returns true if no problems were found. bool verify(LiveIntervals *LiveInts, SlotIndexes *Indexes, - const char *Banner = nullptr, bool AbortOnError = true) const; + const char *Banner = nullptr, raw_ostream *OS = nullptr, + bool AbortOnError = true) const; // Provide accessors for the MachineBasicBlock list... using iterator = BasicBlockListType::iterator; diff --git a/llvm/include/llvm/DWARFLinker/IndexedValuesMap.h b/llvm/include/llvm/DWARFLinker/IndexedValuesMap.h index fadbeb168b533..5e0779157473e 100644 --- a/llvm/include/llvm/DWARFLinker/IndexedValuesMap.h +++ b/llvm/include/llvm/DWARFLinker/IndexedValuesMap.h @@ -21,11 +21,9 @@ namespace dwarf_linker { template class IndexedValuesMap { public: uint64_t getValueIndex(T Value) { - typename ValueToIndexMapTy::iterator It = ValueToIndexMap.find(Value); - if (It == ValueToIndexMap.end()) { - It = ValueToIndexMap.insert(std::make_pair(Value, Values.size())).first; + auto [It, Inserted] = ValueToIndexMap.try_emplace(Value, Values.size()); + if (Inserted) Values.push_back(Value); - } return It->second; } diff --git a/llvm/include/llvm/Demangle/ItaniumDemangle.h b/llvm/include/llvm/Demangle/ItaniumDemangle.h index 9ada4d747b1ce..56ff3cfb148f0 100644 --- a/llvm/include/llvm/Demangle/ItaniumDemangle.h +++ b/llvm/include/llvm/Demangle/ItaniumDemangle.h @@ -2632,7 +2632,8 @@ template struct NodeKind; #include "ItaniumNodes.def" inline bool NodeArray::printAsString(OutputBuffer &OB) const { - auto Fail = [&OB, StartPos = OB.getCurrentPosition()] { + auto StartPos = OB.getCurrentPosition(); + auto Fail = [&OB, StartPos] { OB.setCurrentPosition(StartPos); return false; }; diff --git a/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h b/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h index b93bc594a82bf..4bdfa1cf4c149 100644 --- a/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h +++ b/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h @@ -916,8 +916,7 @@ bool ConstructDecompositionT::applyClause( /*ReductionIdentifiers=*/std::get(clause.t), /*List=*/objects}}); - ReductionModifier effective = - modifier.has_value() ? *modifier : ReductionModifier::Default; + ReductionModifier effective = modifier.value_or(ReductionModifier::Default); bool effectiveApplied = false; // Walk over the leaf constructs starting from the innermost, and apply // the clause as required by the spec. diff --git a/llvm/include/llvm/IR/ConstantFPRange.h b/llvm/include/llvm/IR/ConstantFPRange.h new file mode 100644 index 0000000000000..67f9f945d748b --- /dev/null +++ b/llvm/include/llvm/IR/ConstantFPRange.h @@ -0,0 +1,201 @@ +//===- ConstantFPRange.h - Represent a range for floating-point -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Represent a range of possible values that may occur when the program is run +// for a floating-point value. This keeps track of a lower and upper bound for +// the constant. +// +// Range = [Lower, Upper] U (MayBeQNaN ? QNaN : {}) U (MayBeSNaN ? SNaN : {}) +// Specifically, [inf, -inf] represents an empty set. +// Note: +// 1. Bounds are inclusive. +// 2. -0 is considered to be less than 0. That is, range [0, 0] doesn't contain +// -0. +// 3. Currently wrapping ranges are not supported. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_IR_CONSTANTFPRANGE_H +#define LLVM_IR_CONSTANTFPRANGE_H + +#include "llvm/ADT/APFloat.h" +#include "llvm/IR/Instructions.h" +#include + +namespace llvm { + +class raw_ostream; +struct KnownFPClass; + +/// This class represents a range of floating-point values. +class [[nodiscard]] ConstantFPRange { + APFloat Lower, Upper; + bool MayBeQNaN : 1; + bool MayBeSNaN : 1; + + /// Create empty constant range with same semantics. + ConstantFPRange getEmpty() const { + return ConstantFPRange(getSemantics(), /*IsFullSet=*/false); + } + + /// Create full constant range with same semantics. + ConstantFPRange getFull() const { + return ConstantFPRange(getSemantics(), /*IsFullSet=*/true); + } + + void makeEmpty(); + void makeFull(); + bool isNaNOnly() const; + + /// Initialize a full or empty set for the specified semantics. + explicit ConstantFPRange(const fltSemantics &Sem, bool IsFullSet); + +public: + /// Initialize a range to hold the single specified value. + explicit ConstantFPRange(const APFloat &Value); + + /// Initialize a range of values explicitly. + /// Note: If \p LowerVal is greater than \p UpperVal, please use the canonical + /// form [Inf, -Inf]. + ConstantFPRange(APFloat LowerVal, APFloat UpperVal, bool MayBeQNaN, + bool MayBeSNaN); + + /// Create empty constant range with the given semantics. + static ConstantFPRange getEmpty(const fltSemantics &Sem) { + return ConstantFPRange(Sem, /*IsFullSet=*/false); + } + + /// Create full constant range with the given semantics. + static ConstantFPRange getFull(const fltSemantics &Sem) { + return ConstantFPRange(Sem, /*IsFullSet=*/true); + } + + /// Helper for (-inf, inf) to represent all finite values. + static ConstantFPRange getFinite(const fltSemantics &Sem); + + /// Create a range which doesn't contain NaNs. + static ConstantFPRange getNonNaN(APFloat LowerVal, APFloat UpperVal) { + return ConstantFPRange(std::move(LowerVal), std::move(UpperVal), + /*MayBeQNaN=*/false, /*MayBeSNaN=*/false); + } + + /// Create a range which may contain NaNs. + static ConstantFPRange getMayBeNaN(APFloat LowerVal, APFloat UpperVal) { + return ConstantFPRange(std::move(LowerVal), std::move(UpperVal), + /*MayBeQNaN=*/true, /*MayBeSNaN=*/true); + } + + /// Create a range which only contains NaNs. + static ConstantFPRange getNaNOnly(const fltSemantics &Sem, bool MayBeQNaN, + bool MayBeSNaN); + + /// Produce the smallest range such that all values that may satisfy the given + /// predicate with any value contained within Other is contained in the + /// returned range. Formally, this returns a superset of + /// 'union over all y in Other . { x : fcmp op x y is true }'. If the exact + /// answer is not representable as a ConstantFPRange, the return value will be + /// a proper superset of the above. + /// + /// Example: Pred = ole and Other = float [2, 5] returns Result = [-inf, 5] + static ConstantFPRange makeAllowedFCmpRegion(FCmpInst::Predicate Pred, + const ConstantFPRange &Other); + + /// Produce the largest range such that all values in the returned range + /// satisfy the given predicate with all values contained within Other. + /// Formally, this returns a subset of + /// 'intersection over all y in Other . { x : fcmp op x y is true }'. If the + /// exact answer is not representable as a ConstantFPRange, the return value + /// will be a proper subset of the above. + /// + /// Example: Pred = ole and Other = float [2, 5] returns [-inf, 2] + static ConstantFPRange makeSatisfyingFCmpRegion(FCmpInst::Predicate Pred, + const ConstantFPRange &Other); + + /// Produce the exact range such that all values in the returned range satisfy + /// the given predicate with any value contained within Other. Formally, this + /// returns the exact answer when the superset of 'union over all y in Other + /// is exactly same as the subset of intersection over all y in Other. + /// { x : fcmp op x y is true}'. + /// + /// Example: Pred = olt and Other = float 3 returns [-inf, 3) + static ConstantFPRange makeExactFCmpRegion(FCmpInst::Predicate Pred, + const APFloat &Other); + + /// Does the predicate \p Pred hold between ranges this and \p Other? + /// NOTE: false does not mean that inverse predicate holds! + bool fcmp(FCmpInst::Predicate Pred, const ConstantFPRange &Other) const; + + /// Return the lower value for this range. + const APFloat &getLower() const { return Lower; } + + /// Return the upper value for this range. + const APFloat &getUpper() const { return Upper; } + + bool containsNaN() const { return MayBeQNaN || MayBeSNaN; } + bool containsQNaN() const { return MayBeQNaN; } + bool containsSNaN() const { return MayBeSNaN; } + + /// Get the semantics of this ConstantFPRange. + const fltSemantics &getSemantics() const { return Lower.getSemantics(); } + + /// Return true if this set contains all of the elements possible + /// for this data-type. + bool isFullSet() const; + + /// Return true if this set contains no members. + bool isEmptySet() const; + + /// Return true if the specified value is in the set. + bool contains(const APFloat &Val) const; + + /// Return true if the other range is a subset of this one. + bool contains(const ConstantFPRange &CR) const; + + /// If this set contains a single element, return it, otherwise return null. + const APFloat *getSingleElement() const; + + /// Return true if this set contains exactly one member. + bool isSingleElement() const { return getSingleElement() != nullptr; } + + /// Return true if the sign bit of all values in this range is 1. + /// Return false if the sign bit of all values in this range is 0. + /// Otherwise, return std::nullopt. + std::optional getSignBit() const; + + /// Return true if this range is equal to another range. + bool operator==(const ConstantFPRange &CR) const; + /// Return true if this range is not equal to another range. + bool operator!=(const ConstantFPRange &CR) const { return !operator==(CR); } + + /// Return the FPClassTest which will return true for the value. + FPClassTest classify() const; + + /// Print out the bounds to a stream. + void print(raw_ostream &OS) const; + + /// Allow printing from a debugger easily. + void dump() const; + + /// Return the range that results from the intersection of this range with + /// another range. + ConstantFPRange intersectWith(const ConstantFPRange &CR) const; + + /// Return the range that results from the union of this range + /// with another range. The resultant range is guaranteed to include the + /// elements of both sets, but may contain more. + ConstantFPRange unionWith(const ConstantFPRange &CR) const; +}; + +inline raw_ostream &operator<<(raw_ostream &OS, const ConstantFPRange &CR) { + CR.print(OS); + return OS; +} + +} // end namespace llvm + +#endif // LLVM_IR_CONSTANTFPRANGE_H diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index 0a74a217a5f01..079ac61adef6e 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -626,7 +626,7 @@ class TypeInfoGen< list Types = !foreach(ty, AllTypes, !if(!isa(ty), ACTys[MappingRIdxs[ty.Number]], ty)); - list> TypeSig = !listconcat( + list TypeSig = !listflatten(!listconcat( [IIT_RetNumbers[!size(RetTypes)]], !foreach(i, !range(AllTypes), !foreach(a, AllTypes[i].Sig, @@ -634,7 +634,7 @@ class TypeInfoGen< MappingRIdxs, ArgCodes, ACIdxs[i], - a>.ret))); + a>.ret)))); } //===----------------------------------------------------------------------===// @@ -1016,6 +1016,7 @@ let IntrProperties = [IntrNoMem, IntrSpeculatable, IntrWillReturn] in { def int_asin : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; def int_acos : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; def int_atan : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; + def int_atan2 : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>]>; def int_sin : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; def int_cos : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; def int_tan : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index 8ffa2d0878e11..b2a2e11240186 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -3762,6 +3762,31 @@ let TargetPrefix = "aarch64" in { : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [llvm_i32_ty, llvm_nxv16i8_ty, llvm_i32_ty], [ImmArg>, ImmArg>, IntrReadMem]>; + + // + // Register scaling + // + def int_aarch64_sme_fp8_scale_single_x2 + : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>], + [LLVMMatchType<0>, LLVMMatchType<0>, LLVMVectorOfBitcastsToInt<0>], + [IntrNoMem]>; + + def int_aarch64_sme_fp8_scale_single_x4 + : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], + [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, LLVMVectorOfBitcastsToInt<0>], + [IntrNoMem]>; + + def int_aarch64_sme_fp8_scale_x2 + : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>], + [LLVMMatchType<0>, LLVMMatchType<0>, + LLVMVectorOfBitcastsToInt<0>, LLVMVectorOfBitcastsToInt<0>], + [IntrNoMem]>; + + def int_aarch64_sme_fp8_scale_x4 + : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], + [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, + LLVMVectorOfBitcastsToInt<0>, LLVMVectorOfBitcastsToInt<0>, LLVMVectorOfBitcastsToInt<0>, LLVMVectorOfBitcastsToInt<0>], + [IntrNoMem]>; } // SVE2.1 - ZIPQ1, ZIPQ2, UZPQ1, UZPQ2 diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td index aa5294f5f9c90..737dd6092e218 100644 --- a/llvm/include/llvm/IR/IntrinsicsNVVM.td +++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td @@ -4479,6 +4479,22 @@ def int_nvvm_sust_p_3d_v4i32_trap "llvm.nvvm.sust.p.3d.v4i32.trap">, ClangBuiltin<"__nvvm_sust_p_3d_v4i32_trap">; + +def int_nvvm_rotate_b32 + : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrSpeculatable], "llvm.nvvm.rotate.b32">, + ClangBuiltin<"__nvvm_rotate_b32">; + +def int_nvvm_rotate_b64 + : DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty], + [IntrNoMem, IntrSpeculatable], "llvm.nvvm.rotate.b64">, + ClangBuiltin<"__nvvm_rotate_b64">; + +def int_nvvm_rotate_right_b64 + : DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty], + [IntrNoMem, IntrSpeculatable], "llvm.nvvm.rotate.right.b64">, + ClangBuiltin<"__nvvm_rotate_right_b64">; + def int_nvvm_swap_lo_hi_b64 : DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.swap.lo.hi.b64">, diff --git a/llvm/include/llvm/IR/LLVMContext.h b/llvm/include/llvm/IR/LLVMContext.h index 558816e146587..6d4a59ba6b1f6 100644 --- a/llvm/include/llvm/IR/LLVMContext.h +++ b/llvm/include/llvm/IR/LLVMContext.h @@ -130,6 +130,10 @@ class LLVMContext { /// scope names are ordered by increasing synchronization scope IDs. void getSyncScopeNames(SmallVectorImpl &SSNs) const; + /// getSyncScopeName - Returns the name of a SyncScope::ID + /// registered with LLVMContext, if any. + std::optional getSyncScopeName(SyncScope::ID Id) const; + /// Define the GC for a function void setGC(const Function &Fn, std::string GCName); diff --git a/llvm/include/llvm/MC/MCTargetOptions.h b/llvm/include/llvm/MC/MCTargetOptions.h index 2e2025c2e7b2c..7b0d81faf73d2 100644 --- a/llvm/include/llvm/MC/MCTargetOptions.h +++ b/llvm/include/llvm/MC/MCTargetOptions.h @@ -72,6 +72,8 @@ class MCTargetOptions { bool X86Sse2Avx = false; + std::optional OutputAsmVariant; + EmitDwarfUnwindType EmitDwarfUnwind; int DwarfVersion = 0; diff --git a/llvm/include/llvm/Option/OptTable.h b/llvm/include/llvm/Option/OptTable.h index d8bf292bac21a..8fabc78d81aed 100644 --- a/llvm/include/llvm/Option/OptTable.h +++ b/llvm/include/llvm/Option/OptTable.h @@ -64,7 +64,7 @@ class OptTable { // the program, HelpText is used instead. This cannot use std::vector // because OptTable is used in constexpr contexts. Increase the array sizes // here if you need more entries and adjust the constants in - // OptParserEmitter::EmitHelpTextsForVariants. + // OptionParserEmitter::EmitHelpTextsForVariants. std::array, const char *>, 1 /*MaxVisibilityHelp*/> diff --git a/llvm/include/llvm/ProfileData/MemProf.h b/llvm/include/llvm/ProfileData/MemProf.h index 892865a0a26fe..f8121d3573251 100644 --- a/llvm/include/llvm/ProfileData/MemProf.h +++ b/llvm/include/llvm/ProfileData/MemProf.h @@ -7,8 +7,10 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/IR/GlobalValue.h" #include "llvm/ProfileData/MemProfData.inc" +#include "llvm/Support/BLAKE3.h" #include "llvm/Support/Endian.h" #include "llvm/Support/EndianStream.h" +#include "llvm/Support/HashBuilder.h" #include "llvm/Support/raw_ostream.h" #include @@ -308,24 +310,19 @@ struct Frame { << " Inline: " << IsInlineFrame << "\n"; } - // Return a hash value based on the contents of the frame. Here we don't use - // hashing from llvm ADT since we are going to persist the hash id, the hash - // combine algorithm in ADT uses a new randomized seed each time. + // Return a hash value based on the contents of the frame. Here we use a + // cryptographic hash function to minimize the chance of hash collisions. We + // do persist FrameIds as part of memprof formats up to Version 2, inclusive. + // However, the deserializer never calls this function; it uses FrameIds + // merely as keys to look up Frames proper. inline FrameId hash() const { - auto HashCombine = [](auto Value, size_t Seed) { - std::hash Hasher; - // The constant used below is the 64 bit representation of the fractional - // part of the golden ratio. Used here for the randomness in their bit - // pattern. - return Hasher(Value) + 0x9e3779b97f4a7c15 + (Seed << 6) + (Seed >> 2); - }; - - size_t Result = 0; - Result ^= HashCombine(Function, Result); - Result ^= HashCombine(LineOffset, Result); - Result ^= HashCombine(Column, Result); - Result ^= HashCombine(IsInlineFrame, Result); - return static_cast(Result); + llvm::HashBuilder, llvm::endianness::little> + HashBuilder; + HashBuilder.add(Function, LineOffset, Column, IsInlineFrame); + llvm::BLAKE3Result<8> Hash = HashBuilder.final(); + FrameId Id; + std::memcpy(&Id, Hash.data(), sizeof(Hash)); + return Id; } }; diff --git a/llvm/include/llvm/ProfileData/PGOCtxProfReader.h b/llvm/include/llvm/ProfileData/PGOCtxProfReader.h index beda07d7b8286..a00c21ddc7d7a 100644 --- a/llvm/include/llvm/ProfileData/PGOCtxProfReader.h +++ b/llvm/include/llvm/ProfileData/PGOCtxProfReader.h @@ -68,8 +68,7 @@ class PGOCtxProfContext final { CallsiteMapTy &callsites() { return Callsites; } void ingestContext(uint32_t CSId, PGOCtxProfContext &&Other) { - auto [Iter, _] = callsites().try_emplace(CSId, CallTargetMapTy()); - Iter->second.emplace(Other.guid(), std::move(Other)); + callsites()[CSId].emplace(Other.guid(), std::move(Other)); } void ingestAllContexts(uint32_t CSId, CallTargetMapTy &&Other) { diff --git a/llvm/include/llvm/SandboxIR/SandboxIR.h b/llvm/include/llvm/SandboxIR/SandboxIR.h index d5e239e70da61..d99d564ba24e5 100644 --- a/llvm/include/llvm/SandboxIR/SandboxIR.h +++ b/llvm/include/llvm/SandboxIR/SandboxIR.h @@ -346,6 +346,7 @@ class Value { friend class NoCFIValue; // For `Val`. friend class ConstantPtrAuth; // For `Val`. friend class ConstantExpr; // For `Val`. + friend class Utils; // For `Val`. // Region needs to manipulate metadata in the underlying LLVM Value, we don't // expose metadata in sandboxir. @@ -1935,6 +1936,22 @@ class Instruction : public sandboxir::User { /// \Returns this Instruction's opcode. Note that SandboxIR has its own opcode /// state to allow for new SandboxIR-specific instructions. Opcode getOpcode() const { return Opc; } + + // TODO: Missing function getOpcodeName(). + + bool isTerminator() const { + return cast(Val)->isTerminator(); + } + bool isUnaryOp() const { return cast(Val)->isUnaryOp(); } + bool isBinaryOp() const { return cast(Val)->isBinaryOp(); } + bool isIntDivRem() const { + return cast(Val)->isIntDivRem(); + } + bool isShift() const { return cast(Val)->isShift(); } + bool isCast() const { return cast(Val)->isCast(); } + + // TODO: More missing functions + /// Detach this from its parent BasicBlock without deleting it. void removeFromParent(); /// Detach this Value from its parent and delete it. diff --git a/llvm/include/llvm/SandboxIR/Utils.h b/llvm/include/llvm/SandboxIR/Utils.h index ccc0030868a55..4e8a175f54705 100644 --- a/llvm/include/llvm/SandboxIR/Utils.h +++ b/llvm/include/llvm/SandboxIR/Utils.h @@ -12,6 +12,9 @@ #ifndef LLVM_SANDBOXIR_UTILS_H #define LLVM_SANDBOXIR_UTILS_H +#include "llvm/Analysis/MemoryLocation.h" +#include "llvm/SandboxIR/SandboxIR.h" + namespace llvm::sandboxir { class Utils { @@ -48,6 +51,12 @@ class Utils { Type *Ty = getExpectedType(V); return DL.getTypeSizeInBits(Ty->LLVMTy); } + + /// Equivalent to MemoryLocation::getOrNone(I). + static std::optional + memoryLocationGetOrNone(const Instruction *I) { + return llvm::MemoryLocation::getOrNone(cast(I->Val)); + } }; } // namespace llvm::sandboxir diff --git a/llvm/include/llvm/Support/OptionStrCmp.h b/llvm/include/llvm/Support/OptionStrCmp.h index d417fe675e292..f3d3c2adb902f 100644 --- a/llvm/include/llvm/Support/OptionStrCmp.h +++ b/llvm/include/llvm/Support/OptionStrCmp.h @@ -1,32 +1,32 @@ -//===- OptionStrCmp.h - Option String Comparison ----------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_SUPPORT_OPTIONSTRCMP_H -#define LLVM_SUPPORT_OPTIONSTRCMP_H - -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/StringRef.h" - -namespace llvm { - -// Comparison function for Option strings (option names & prefixes). -// The ordering is *almost* case-insensitive lexicographic, with an exception. -// '\0' comes at the end of the alphabet instead of the beginning (thus options -// precede any other options which prefix them). Additionally, if two options -// are identical ignoring case, they are ordered according to case sensitive -// ordering if `FallbackCaseSensitive` is true. -int StrCmpOptionName(StringRef A, StringRef B, - bool FallbackCaseSensitive = true); - -// Comparison function for Option prefixes. -int StrCmpOptionPrefixes(ArrayRef APrefixes, - ArrayRef BPrefixes); - -} // namespace llvm - -#endif // LLVM_SUPPORT_OPTIONSTRCMP_H +//===- OptionStrCmp.h - Option String Comparison ----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SUPPORT_OPTIONSTRCMP_H +#define LLVM_SUPPORT_OPTIONSTRCMP_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" + +namespace llvm { + +// Comparison function for Option strings (option names & prefixes). +// The ordering is *almost* case-insensitive lexicographic, with an exception. +// '\0' comes at the end of the alphabet instead of the beginning (thus options +// precede any other options which prefix them). Additionally, if two options +// are identical ignoring case, they are ordered according to case sensitive +// ordering if `FallbackCaseSensitive` is true. +int StrCmpOptionName(StringRef A, StringRef B, + bool FallbackCaseSensitive = true); + +// Comparison function for Option prefixes. +int StrCmpOptionPrefixes(ArrayRef APrefixes, + ArrayRef BPrefixes); + +} // namespace llvm + +#endif // LLVM_SUPPORT_OPTIONSTRCMP_H diff --git a/llvm/include/llvm/Support/raw_ostream.h b/llvm/include/llvm/Support/raw_ostream.h index c2f2299ed9645..d3b411590e7fd 100644 --- a/llvm/include/llvm/Support/raw_ostream.h +++ b/llvm/include/llvm/Support/raw_ostream.h @@ -797,6 +797,30 @@ struct indent { assert(NumIndents >= N && "Indentation undeflow"); return indent(NumIndents - N, Scale); } + indent &operator++() { // Prefix ++. + ++NumIndents; + return *this; + } + indent operator++(int) { // Postfix ++. + indent Old = *this; + ++NumIndents; + return Old; + } + indent &operator--() { // Prefix --. + assert(NumIndents >= 1); + --NumIndents; + return *this; + } + indent operator--(int) { // Postfix --. + indent Old = *this; + assert(NumIndents >= 1); + --NumIndents; + return Old; + } + indent &operator=(unsigned N) { + NumIndents = N; + return *this; + } }; inline raw_ostream &operator<<(raw_ostream &OS, const indent &Indent) { diff --git a/llvm/lib/Analysis/CmpInstAnalysis.cpp b/llvm/lib/Analysis/CmpInstAnalysis.cpp index a1fa7857764d9..36d7aa510545a 100644 --- a/llvm/lib/Analysis/CmpInstAnalysis.cpp +++ b/llvm/lib/Analysis/CmpInstAnalysis.cpp @@ -73,81 +73,84 @@ Constant *llvm::getPredForFCmpCode(unsigned Code, Type *OpTy, return nullptr; } -bool llvm::decomposeBitTestICmp(Value *LHS, Value *RHS, - CmpInst::Predicate &Pred, - Value *&X, APInt &Mask, bool LookThruTrunc) { +std::optional +llvm::decomposeBitTestICmp(Value *LHS, Value *RHS, CmpInst::Predicate Pred, + bool LookThruTrunc) { using namespace PatternMatch; const APInt *C; if (!match(RHS, m_APIntAllowPoison(C))) - return false; + return std::nullopt; + DecomposedBitTest Result; switch (Pred) { default: - return false; + return std::nullopt; case ICmpInst::ICMP_SLT: // X < 0 is equivalent to (X & SignMask) != 0. if (!C->isZero()) - return false; - Mask = APInt::getSignMask(C->getBitWidth()); - Pred = ICmpInst::ICMP_NE; + return std::nullopt; + Result.Mask = APInt::getSignMask(C->getBitWidth()); + Result.Pred = ICmpInst::ICMP_NE; break; case ICmpInst::ICMP_SLE: // X <= -1 is equivalent to (X & SignMask) != 0. if (!C->isAllOnes()) - return false; - Mask = APInt::getSignMask(C->getBitWidth()); - Pred = ICmpInst::ICMP_NE; + return std::nullopt; + Result.Mask = APInt::getSignMask(C->getBitWidth()); + Result.Pred = ICmpInst::ICMP_NE; break; case ICmpInst::ICMP_SGT: // X > -1 is equivalent to (X & SignMask) == 0. if (!C->isAllOnes()) - return false; - Mask = APInt::getSignMask(C->getBitWidth()); - Pred = ICmpInst::ICMP_EQ; + return std::nullopt; + Result.Mask = APInt::getSignMask(C->getBitWidth()); + Result.Pred = ICmpInst::ICMP_EQ; break; case ICmpInst::ICMP_SGE: // X >= 0 is equivalent to (X & SignMask) == 0. if (!C->isZero()) - return false; - Mask = APInt::getSignMask(C->getBitWidth()); - Pred = ICmpInst::ICMP_EQ; + return std::nullopt; + Result.Mask = APInt::getSignMask(C->getBitWidth()); + Result.Pred = ICmpInst::ICMP_EQ; break; case ICmpInst::ICMP_ULT: // X isPowerOf2()) - return false; - Mask = -*C; - Pred = ICmpInst::ICMP_EQ; + return std::nullopt; + Result.Mask = -*C; + Result.Pred = ICmpInst::ICMP_EQ; break; case ICmpInst::ICMP_ULE: // X <=u 2^n-1 is equivalent to (X & ~(2^n-1)) == 0. if (!(*C + 1).isPowerOf2()) - return false; - Mask = ~*C; - Pred = ICmpInst::ICMP_EQ; + return std::nullopt; + Result.Mask = ~*C; + Result.Pred = ICmpInst::ICMP_EQ; break; case ICmpInst::ICMP_UGT: // X >u 2^n-1 is equivalent to (X & ~(2^n-1)) != 0. if (!(*C + 1).isPowerOf2()) - return false; - Mask = ~*C; - Pred = ICmpInst::ICMP_NE; + return std::nullopt; + Result.Mask = ~*C; + Result.Pred = ICmpInst::ICMP_NE; break; case ICmpInst::ICMP_UGE: // X >=u 2^n is equivalent to (X & ~(2^n-1)) != 0. if (!C->isPowerOf2()) - return false; - Mask = -*C; - Pred = ICmpInst::ICMP_NE; + return std::nullopt; + Result.Mask = -*C; + Result.Pred = ICmpInst::ICMP_NE; break; } + Value *X; if (LookThruTrunc && match(LHS, m_Trunc(m_Value(X)))) { - Mask = Mask.zext(X->getType()->getScalarSizeInBits()); + Result.X = X; + Result.Mask = Result.Mask.zext(X->getType()->getScalarSizeInBits()); } else { - X = LHS; + Result.X = LHS; } - return true; + return Result; } diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index 32a9f1ab34fb3..90f05d43a2b14 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -4624,13 +4624,11 @@ static Value *simplifyCmpSelOfMaxMin(Value *CmpLHS, Value *CmpRHS, static Value *simplifySelectWithFakeICmpEq(Value *CmpLHS, Value *CmpRHS, ICmpInst::Predicate Pred, Value *TrueVal, Value *FalseVal) { - Value *X; - APInt Mask; - if (!decomposeBitTestICmp(CmpLHS, CmpRHS, Pred, X, Mask)) - return nullptr; + if (auto Res = decomposeBitTestICmp(CmpLHS, CmpRHS, Pred)) + return simplifySelectBitTest(TrueVal, FalseVal, Res->X, &Res->Mask, + Res->Pred == ICmpInst::ICMP_EQ); - return simplifySelectBitTest(TrueVal, FalseVal, X, &Mask, - Pred == ICmpInst::ICMP_EQ); + return nullptr; } /// Try to simplify a select instruction when its condition operand is an diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp index 67b626f300a10..b5195f764cbd1 100644 --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -1015,11 +1015,12 @@ InstructionCost TargetTransformInfo::getCFInstrCost( InstructionCost TargetTransformInfo::getCmpSelInstrCost( unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, - TTI::TargetCostKind CostKind, const Instruction *I) const { + TTI::TargetCostKind CostKind, OperandValueInfo Op1Info, + OperandValueInfo Op2Info, const Instruction *I) const { assert((I == nullptr || I->getOpcode() == Opcode) && "Opcode should reflect passed instruction."); - InstructionCost Cost = - TTIImpl->getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I); + InstructionCost Cost = TTIImpl->getCmpSelInstrCost( + Opcode, ValTy, CondTy, VecPred, CostKind, Op1Info, Op2Info, I); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index e64d3f51a0111..c3b6b3033cf5c 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -7169,6 +7169,78 @@ LegalizerHelper::lowerU64ToF32BitOps(MachineInstr &MI) { return Legalized; } +// Expand s32 = G_UITOFP s64 to an IEEE float representation using bit +// operations and G_SITOFP +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerU64ToF32WithSITOFP(MachineInstr &MI) { + auto [Dst, Src] = MI.getFirst2Regs(); + const LLT S64 = LLT::scalar(64); + const LLT S32 = LLT::scalar(32); + const LLT S1 = LLT::scalar(1); + + assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S32); + + // For i64 < INT_MAX we simply reuse SITOFP. + // Otherwise, divide i64 by 2, round result by ORing with the lowest bit + // saved before division, convert to float by SITOFP, multiply the result + // by 2. + auto One = MIRBuilder.buildConstant(S64, 1); + auto Zero = MIRBuilder.buildConstant(S64, 0); + // Result if Src < INT_MAX + auto SmallResult = MIRBuilder.buildSITOFP(S32, Src); + // Result if Src >= INT_MAX + auto Halved = MIRBuilder.buildLShr(S64, Src, One); + auto LowerBit = MIRBuilder.buildAnd(S64, Src, One); + auto RoundedHalved = MIRBuilder.buildOr(S64, Halved, LowerBit); + auto HalvedFP = MIRBuilder.buildSITOFP(S32, RoundedHalved); + auto LargeResult = MIRBuilder.buildFAdd(S32, HalvedFP, HalvedFP); + // Check if the original value is larger than INT_MAX by comparing with + // zero to pick one of the two conversions. + auto IsLarge = + MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_SLT, S1, Src, Zero); + MIRBuilder.buildSelect(Dst, IsLarge, LargeResult, SmallResult); + + MI.eraseFromParent(); + return Legalized; +} + +// Expand s64 = G_UITOFP s64 using bit and float arithmetic operations to an +// IEEE double representation. +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerU64ToF64BitFloatOps(MachineInstr &MI) { + auto [Dst, Src] = MI.getFirst2Regs(); + const LLT S64 = LLT::scalar(64); + const LLT S32 = LLT::scalar(32); + + assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S64); + + // We create double value from 32 bit parts with 32 exponent difference. + // Note that + and - are float operations that adjust the implicit leading + // one, the bases 2^52 and 2^84 are for illustrative purposes. + // + // X = 2^52 * 1.0...LowBits + // Y = 2^84 * 1.0...HighBits + // Scratch = 2^84 * 1.0...HighBits - 2^84 * 1.0 - 2^52 * 1.0 + // = - 2^52 * 1.0...HighBits + // Result = - 2^52 * 1.0...HighBits + 2^52 * 1.0...LowBits + auto TwoP52 = MIRBuilder.buildConstant(S64, UINT64_C(0x4330000000000000)); + auto TwoP84 = MIRBuilder.buildConstant(S64, UINT64_C(0x4530000000000000)); + auto TwoP52P84 = llvm::bit_cast(UINT64_C(0x4530000000100000)); + auto TwoP52P84FP = MIRBuilder.buildFConstant(S64, TwoP52P84); + auto HalfWidth = MIRBuilder.buildConstant(S64, 32); + + auto LowBits = MIRBuilder.buildTrunc(S32, Src); + LowBits = MIRBuilder.buildZExt(S64, LowBits); + auto LowBitsFP = MIRBuilder.buildOr(S64, TwoP52, LowBits); + auto HighBits = MIRBuilder.buildLShr(S64, Src, HalfWidth); + auto HighBitsFP = MIRBuilder.buildOr(S64, TwoP84, HighBits); + auto Scratch = MIRBuilder.buildFSub(S64, HighBitsFP, TwoP52P84FP); + MIRBuilder.buildFAdd(Dst, Scratch, LowBitsFP); + + MI.eraseFromParent(); + return Legalized; +} + LegalizerHelper::LegalizeResult LegalizerHelper::lowerUITOFP(MachineInstr &MI) { auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs(); @@ -7183,13 +7255,15 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerUITOFP(MachineInstr &MI) { if (SrcTy != LLT::scalar(64)) return UnableToLegalize; - if (DstTy == LLT::scalar(32)) { + if (DstTy == LLT::scalar(32)) // TODO: SelectionDAG has several alternative expansions to port which may - // be more reasonble depending on the available instructions. If a target - // has sitofp, does not have CTLZ, or can efficiently use f64 as an - // intermediate type, this is probably worse. - return lowerU64ToF32BitOps(MI); - } + // be more reasonable depending on the available instructions. We also need + // a more advanced mechanism to choose an optimal version depending on + // target features such as sitofp or CTLZ availability. + return lowerU64ToF32WithSITOFP(MI); + + if (DstTy == LLT::scalar(64)) + return lowerU64ToF64BitFloatOps(MI); return UnableToLegalize; } diff --git a/llvm/lib/CodeGen/LLVMTargetMachine.cpp b/llvm/lib/CodeGen/LLVMTargetMachine.cpp index 4ff22057b290f..ea36fedef93ac 100644 --- a/llvm/lib/CodeGen/LLVMTargetMachine.cpp +++ b/llvm/lib/CodeGen/LLVMTargetMachine.cpp @@ -160,7 +160,9 @@ Expected> LLVMTargetMachine::createMCStreamer( switch (FileType) { case CodeGenFileType::AssemblyFile: { MCInstPrinter *InstPrinter = getTarget().createMCInstPrinter( - getTargetTriple(), MAI.getAssemblerDialect(), MAI, MII, MRI); + getTargetTriple(), + Options.MCOptions.OutputAsmVariant.value_or(MAI.getAssemblerDialect()), + MAI, MII, MRI); // Create a code emitter if asked to show the encoding. std::unique_ptr MCE; diff --git a/llvm/lib/CodeGen/LiveRangeCalc.cpp b/llvm/lib/CodeGen/LiveRangeCalc.cpp index f7d9e5c44ac2e..e325e77189a6f 100644 --- a/llvm/lib/CodeGen/LiveRangeCalc.cpp +++ b/llvm/lib/CodeGen/LiveRangeCalc.cpp @@ -208,7 +208,7 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB, #ifndef NDEBUG if (MBB->pred_empty()) { - MBB->getParent()->verify(); + MBB->getParent()->verify(nullptr, nullptr, &errs()); errs() << "Use of " << printReg(PhysReg, MRI->getTargetRegisterInfo()) << " does not have a corresponding definition on every path:\n"; const MachineInstr *MI = Indexes->getInstructionFromIndex(Use); @@ -223,7 +223,7 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB, for (MCRegAliasIterator Alias(PhysReg, TRI, false); !IsLiveIn && Alias.isValid(); ++Alias) IsLiveIn = MBB->isLiveIn(*Alias); if (!IsLiveIn) { - MBB->getParent()->verify(); + MBB->getParent()->verify(nullptr, nullptr, &errs()); errs() << "The register " << printReg(PhysReg, TRI) << " needs to be live in to " << printMBBReference(*MBB) << ", but is missing from the live-in list.\n"; diff --git a/llvm/lib/CodeGen/MIRParser/MIRParser.cpp b/llvm/lib/CodeGen/MIRParser/MIRParser.cpp index 8d6d800d76147..be07fbf478b1d 100644 --- a/llvm/lib/CodeGen/MIRParser/MIRParser.cpp +++ b/llvm/lib/CodeGen/MIRParser/MIRParser.cpp @@ -625,16 +625,16 @@ MIRParserImpl::initializeMachineFunction(const yaml::MachineFunction &YamlMF, MRI.freezeReservedRegs(); if (computeFunctionProperties(MF, YamlMF)) - return false; + return true; if (initializeCallSiteInfo(PFS, YamlMF)) - return false; + return true; setupDebugValueTracking(MF, PFS, YamlMF); MF.getSubtarget().mirFileLoaded(MF); - MF.verify(); + MF.verify(nullptr, nullptr, &errs()); return false; } diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp index be783bc4e2973..a52c82d77ca64 100644 --- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp +++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp @@ -119,10 +119,10 @@ static cl::opt LoopToColdBlockRatio( "(frequency of block) is greater than this ratio"), cl::init(5), cl::Hidden); -static cl::opt ForceLoopColdBlock( - "force-loop-cold-block", - cl::desc("Force outlining cold blocks from loops."), - cl::init(false), cl::Hidden); +static cl::opt + ForceLoopColdBlock("force-loop-cold-block", + cl::desc("Force outlining cold blocks from loops."), + cl::init(false), cl::Hidden); static cl::opt PreciseRotationCost("precise-rotation-cost", @@ -147,43 +147,43 @@ static cl::opt JumpInstCost("jump-inst-cost", cl::desc("Cost of jump instructions."), cl::init(1), cl::Hidden); static cl::opt -TailDupPlacement("tail-dup-placement", - cl::desc("Perform tail duplication during placement. " - "Creates more fallthrough opportunites in " - "outline branches."), - cl::init(true), cl::Hidden); + TailDupPlacement("tail-dup-placement", + cl::desc("Perform tail duplication during placement. " + "Creates more fallthrough opportunites in " + "outline branches."), + cl::init(true), cl::Hidden); static cl::opt -BranchFoldPlacement("branch-fold-placement", - cl::desc("Perform branch folding during placement. " - "Reduces code size."), - cl::init(true), cl::Hidden); + BranchFoldPlacement("branch-fold-placement", + cl::desc("Perform branch folding during placement. " + "Reduces code size."), + cl::init(true), cl::Hidden); // Heuristic for tail duplication. static cl::opt TailDupPlacementThreshold( "tail-dup-placement-threshold", cl::desc("Instruction cutoff for tail duplication during layout. " "Tail merging during layout is forced to have a threshold " - "that won't conflict."), cl::init(2), - cl::Hidden); + "that won't conflict."), + cl::init(2), cl::Hidden); // Heuristic for aggressive tail duplication. static cl::opt TailDupPlacementAggressiveThreshold( "tail-dup-placement-aggressive-threshold", cl::desc("Instruction cutoff for aggressive tail duplication during " "layout. Used at -O3. Tail merging during layout is forced to " - "have a threshold that won't conflict."), cl::init(4), - cl::Hidden); + "have a threshold that won't conflict."), + cl::init(4), cl::Hidden); // Heuristic for tail duplication. static cl::opt TailDupPlacementPenalty( "tail-dup-placement-penalty", - cl::desc("Cost penalty for blocks that can avoid breaking CFG by copying. " - "Copying can increase fallthrough, but it also increases icache " - "pressure. This parameter controls the penalty to account for that. " - "Percent as integer."), - cl::init(2), - cl::Hidden); + cl::desc( + "Cost penalty for blocks that can avoid breaking CFG by copying. " + "Copying can increase fallthrough, but it also increases icache " + "pressure. This parameter controls the penalty to account for that. " + "Percent as integer."), + cl::init(2), cl::Hidden); // Heuristic for tail duplication if profile count is used in cost model. static cl::opt TailDupProfilePercentThreshold( @@ -198,8 +198,7 @@ static cl::opt TriangleChainCount( "triangle-chain-count", cl::desc("Number of triangle-shaped-CFG's that need to be in a row for the " "triangle tail duplication heuristic to kick in. 0 to disable."), - cl::init(2), - cl::Hidden); + cl::init(2), cl::Hidden); // Use case: When block layout is visualized after MBP pass, the basic blocks // are labeled in layout order; meanwhile blocks could be numbered in a @@ -292,8 +291,8 @@ class BlockChain { iterator end() { return Blocks.end(); } const_iterator end() const { return Blocks.end(); } - bool remove(MachineBasicBlock* BB) { - for(iterator i = begin(); i != end(); ++i) { + bool remove(MachineBasicBlock *BB) { + for (iterator i = begin(); i != end(); ++i) { if (*i == BB) { Blocks.erase(i); return true; @@ -405,6 +404,8 @@ class MachineBlockPlacement : public MachineFunctionPass { ProfileSummaryInfo *PSI = nullptr; + TargetPassConfig *PassConfig = nullptr; + /// Duplicator used to duplicate tails during placement. /// /// Placement decisions can open up new tail duplication opportunities, but @@ -415,6 +416,8 @@ class MachineBlockPlacement : public MachineFunctionPass { /// Partial tail duplication threshold. BlockFrequency DupThreshold; + unsigned TailDupSize; + /// True: use block profile count to compute tail duplication cost. /// False: use block frequency to compute tail duplication cost. bool UseProfileCount = false; @@ -459,26 +462,24 @@ class MachineBlockPlacement : public MachineFunctionPass { /// Scale the DupThreshold according to basic block size. BlockFrequency scaleThreshold(MachineBasicBlock *BB); - void initDupThreshold(); + void initTailDupThreshold(); /// Decrease the UnscheduledPredecessors count for all blocks in chain, and /// if the count goes to 0, add them to the appropriate work list. - void markChainSuccessors( - const BlockChain &Chain, const MachineBasicBlock *LoopHeaderBB, - const BlockFilterSet *BlockFilter = nullptr); + void markChainSuccessors(const BlockChain &Chain, + const MachineBasicBlock *LoopHeaderBB, + const BlockFilterSet *BlockFilter = nullptr); /// Decrease the UnscheduledPredecessors count for a single block, and /// if the count goes to 0, add them to the appropriate work list. - void markBlockSuccessors( - const BlockChain &Chain, const MachineBasicBlock *BB, - const MachineBasicBlock *LoopHeaderBB, - const BlockFilterSet *BlockFilter = nullptr); + void markBlockSuccessors(const BlockChain &Chain, const MachineBasicBlock *BB, + const MachineBasicBlock *LoopHeaderBB, + const BlockFilterSet *BlockFilter = nullptr); BranchProbability - collectViableSuccessors( - const MachineBasicBlock *BB, const BlockChain &Chain, - const BlockFilterSet *BlockFilter, - SmallVector &Successors); + collectViableSuccessors(const MachineBasicBlock *BB, const BlockChain &Chain, + const BlockFilterSet *BlockFilter, + SmallVector &Successors); bool isBestSuccessor(MachineBasicBlock *BB, MachineBasicBlock *Pred, BlockFilterSet *BlockFilter); void findDuplicateCandidates(SmallVectorImpl &Candidates, @@ -496,16 +497,19 @@ class MachineBlockPlacement : public MachineFunctionPass { MachineFunction::iterator &PrevUnplacedBlockIt, BlockFilterSet::iterator &PrevUnplacedBlockInFilterIt, bool &DuplicatedToLPred); - bool hasBetterLayoutPredecessor( - const MachineBasicBlock *BB, const MachineBasicBlock *Succ, - const BlockChain &SuccChain, BranchProbability SuccProb, - BranchProbability RealSuccProb, const BlockChain &Chain, - const BlockFilterSet *BlockFilter); - BlockAndTailDupResult selectBestSuccessor( - const MachineBasicBlock *BB, const BlockChain &Chain, - const BlockFilterSet *BlockFilter); - MachineBasicBlock *selectBestCandidateBlock( - const BlockChain &Chain, SmallVectorImpl &WorkList); + bool hasBetterLayoutPredecessor(const MachineBasicBlock *BB, + const MachineBasicBlock *Succ, + const BlockChain &SuccChain, + BranchProbability SuccProb, + BranchProbability RealSuccProb, + const BlockChain &Chain, + const BlockFilterSet *BlockFilter); + BlockAndTailDupResult selectBestSuccessor(const MachineBasicBlock *BB, + const BlockChain &Chain, + const BlockFilterSet *BlockFilter); + MachineBasicBlock * + selectBestCandidateBlock(const BlockChain &Chain, + SmallVectorImpl &WorkList); MachineBasicBlock * getFirstUnplacedBlock(const BlockChain &PlacedChain, MachineFunction::iterator &PrevUnplacedBlockIt); @@ -536,20 +540,19 @@ class MachineBlockPlacement : public MachineFunctionPass { const MachineBasicBlock *ExitBB, const BlockFilterSet &LoopBlockSet); MachineBasicBlock *findBestLoopTopHelper(MachineBasicBlock *OldTop, - const MachineLoop &L, const BlockFilterSet &LoopBlockSet); - MachineBasicBlock *findBestLoopTop( - const MachineLoop &L, const BlockFilterSet &LoopBlockSet); - MachineBasicBlock *findBestLoopExit( - const MachineLoop &L, const BlockFilterSet &LoopBlockSet, - BlockFrequency &ExitFreq); + const MachineLoop &L, + const BlockFilterSet &LoopBlockSet); + MachineBasicBlock *findBestLoopTop(const MachineLoop &L, + const BlockFilterSet &LoopBlockSet); + MachineBasicBlock *findBestLoopExit(const MachineLoop &L, + const BlockFilterSet &LoopBlockSet, + BlockFrequency &ExitFreq); BlockFilterSet collectLoopBlockSet(const MachineLoop &L); void buildLoopChains(const MachineLoop &L); - void rotateLoop( - BlockChain &LoopChain, const MachineBasicBlock *ExitingBB, - BlockFrequency ExitFreq, const BlockFilterSet &LoopBlockSet); - void rotateLoopWithProfile( - BlockChain &LoopChain, const MachineLoop &L, - const BlockFilterSet &LoopBlockSet); + void rotateLoop(BlockChain &LoopChain, const MachineBasicBlock *ExitingBB, + BlockFrequency ExitFreq, const BlockFilterSet &LoopBlockSet); + void rotateLoopWithProfile(BlockChain &LoopChain, const MachineLoop &L, + const BlockFilterSet &LoopBlockSet); void buildCFGChains(); void optimizeBranches(); void alignBlocks(); @@ -558,10 +561,10 @@ class MachineBlockPlacement : public MachineFunctionPass { bool shouldTailDuplicate(MachineBasicBlock *BB); /// Check the edge frequencies to see if tail duplication will increase /// fallthroughs. - bool isProfitableToTailDup( - const MachineBasicBlock *BB, const MachineBasicBlock *Succ, - BranchProbability QProb, - const BlockChain &Chain, const BlockFilterSet *BlockFilter); + bool isProfitableToTailDup(const MachineBasicBlock *BB, + const MachineBasicBlock *Succ, + BranchProbability QProb, const BlockChain &Chain, + const BlockFilterSet *BlockFilter); /// Check for a trellis layout. bool isTrellis(const MachineBasicBlock *BB, @@ -582,9 +585,10 @@ class MachineBlockPlacement : public MachineFunctionPass { /// Returns true if a block can tail duplicate into all unplaced /// predecessors. Filters based on loop. - bool canTailDuplicateUnplacedPreds( - const MachineBasicBlock *BB, MachineBasicBlock *Succ, - const BlockChain &Chain, const BlockFilterSet *BlockFilter); + bool canTailDuplicateUnplacedPreds(const MachineBasicBlock *BB, + MachineBasicBlock *Succ, + const BlockChain &Chain, + const BlockFilterSet *BlockFilter); /// Find chains of triangles to tail-duplicate where a global analysis works, /// but a local analysis would not find them. @@ -802,8 +806,8 @@ bool MachineBlockPlacement::shouldTailDuplicate(MachineBasicBlock *BB) { /// Compare 2 BlockFrequency's with a small penalty for \p A. /// In order to be conservative, we apply a X% penalty to account for /// increased icache pressure and static heuristics. For small frequencies -/// we use only the numerators to improve accuracy. For simplicity, we assume the -/// penalty is less than 100% +/// we use only the numerators to improve accuracy. For simplicity, we assume +/// the penalty is less than 100% /// TODO(iteratee): Use 64-bit fixed point edge frequencies everywhere. static bool greaterWithBias(BlockFrequency A, BlockFrequency B, BlockFrequency EntryFreq) { @@ -819,8 +823,8 @@ static bool greaterWithBias(BlockFrequency A, BlockFrequency B, /// considering duplication. bool MachineBlockPlacement::isProfitableToTailDup( const MachineBasicBlock *BB, const MachineBasicBlock *Succ, - BranchProbability QProb, - const BlockChain &Chain, const BlockFilterSet *BlockFilter) { + BranchProbability QProb, const BlockChain &Chain, + const BlockFilterSet *BlockFilter) { // We need to do a probability calculation to make sure this is profitable. // First: does succ have a successor that post-dominates? This affects the // calculation. The 2 relevant cases are: @@ -876,12 +880,12 @@ bool MachineBlockPlacement::isProfitableToTailDup( // from BB. auto SuccBestPred = BlockFrequency(0); for (MachineBasicBlock *SuccPred : Succ->predecessors()) { - if (SuccPred == Succ || SuccPred == BB - || BlockToChain[SuccPred] == &Chain - || (BlockFilter && !BlockFilter->count(SuccPred))) + if (SuccPred == Succ || SuccPred == BB || + BlockToChain[SuccPred] == &Chain || + (BlockFilter && !BlockFilter->count(SuccPred))) continue; - auto Freq = MBFI->getBlockFreq(SuccPred) - * MBPI->getEdgeProbability(SuccPred, Succ); + auto Freq = + MBFI->getBlockFreq(SuccPred) * MBPI->getEdgeProbability(SuccPred, Succ); if (Freq > SuccBestPred) SuccBestPred = Freq; } @@ -1137,7 +1141,7 @@ MachineBlockPlacement::getBestTrellisSuccessor( } // We have already computed the optimal edge for the other side of the // trellis. - ComputedEdges[BestB.Src] = { BestB.Dest, false }; + ComputedEdges[BestB.Src] = {BestB.Dest, false}; auto TrellisSucc = BestA.Dest; LLVM_DEBUG(BranchProbability SuccProb = getAdjustedProbability( @@ -1169,8 +1173,8 @@ bool MachineBlockPlacement::canTailDuplicateUnplacedPreds( // Make sure all unplaced and unfiltered predecessors can be // tail-duplicated into. // Skip any blocks that are already placed or not in this loop. - if (Pred == BB || (BlockFilter && !BlockFilter->count(Pred)) - || (BlockToChain[Pred] == &Chain && !Succ->succ_empty())) + if (Pred == BB || (BlockFilter && !BlockFilter->count(Pred)) || + (BlockToChain[Pred] == &Chain && !Succ->succ_empty())) continue; if (!TailDup.canTailDuplicate(Succ, Pred)) { if (Successors.size() > 1 && hasSameSuccessors(*Pred, Successors)) @@ -1289,9 +1293,7 @@ void MachineBlockPlacement::precomputeTriangleChains() { unsigned count() const { return Edges.size() - 1; } - MachineBasicBlock *getKey() const { - return Edges.back(); - } + MachineBasicBlock *getKey() const { return Edges.back(); } }; if (TriangleChainCount == 0) @@ -1326,7 +1328,7 @@ void MachineBlockPlacement::precomputeTriangleChains() { bool CanTailDuplicate = true; // If PDom can't tail-duplicate into it's non-BB predecessors, then this // isn't the kind of triangle we're looking for. - for (MachineBasicBlock* Pred : PDom->predecessors()) { + for (MachineBasicBlock *Pred : PDom->predecessors()) { if (Pred == &BB) continue; if (!TailDup.canTailDuplicate(PDom, Pred)) { @@ -1386,8 +1388,8 @@ void MachineBlockPlacement::precomputeTriangleChains() { // When profile is not present, return the StaticLikelyProb. // When profile is available, we need to handle the triangle-shape CFG. -static BranchProbability getLayoutSuccessorProbThreshold( - const MachineBasicBlock *BB) { +static BranchProbability +getLayoutSuccessorProbThreshold(const MachineBasicBlock *BB) { if (!BB->getParent()->getFunction().hasProfileData()) return BranchProbability(StaticLikelyProb, 100); if (BB->succ_size() == 2) { @@ -1551,8 +1553,8 @@ bool MachineBlockPlacement::hasBetterLayoutPredecessor( for (MachineBasicBlock *Pred : Succ->predecessors()) { BlockChain *PredChain = BlockToChain[Pred]; if (Pred == Succ || PredChain == &SuccChain || - (BlockFilter && !BlockFilter->count(Pred)) || - PredChain == &Chain || Pred != *std::prev(PredChain->end()) || + (BlockFilter && !BlockFilter->count(Pred)) || PredChain == &Chain || + Pred != *std::prev(PredChain->end()) || // This check is redundant except for look ahead. This function is // called for lookahead by isProfitableToTailDup when BB hasn't been // placed yet. @@ -1599,12 +1601,12 @@ bool MachineBlockPlacement::hasBetterLayoutPredecessor( /// \returns The best successor block found, or null if none are viable, along /// with a boolean indicating if tail duplication is necessary. MachineBlockPlacement::BlockAndTailDupResult -MachineBlockPlacement::selectBestSuccessor( - const MachineBasicBlock *BB, const BlockChain &Chain, - const BlockFilterSet *BlockFilter) { +MachineBlockPlacement::selectBestSuccessor(const MachineBasicBlock *BB, + const BlockChain &Chain, + const BlockFilterSet *BlockFilter) { const BranchProbability HotProb(StaticLikelyProb, 100); - BlockAndTailDupResult BestSucc = { nullptr, false }; + BlockAndTailDupResult BestSucc = {nullptr, false}; auto BestProb = BranchProbability::getZero(); SmallVector Successors; @@ -1684,8 +1686,8 @@ MachineBlockPlacement::selectBestSuccessor( std::tie(DupProb, Succ) = Tup; if (DupProb < BestProb) break; - if (canTailDuplicateUnplacedPreds(BB, Succ, Chain, BlockFilter) - && (isProfitableToTailDup(BB, Succ, BestProb, Chain, BlockFilter))) { + if (canTailDuplicateUnplacedPreds(BB, Succ, Chain, BlockFilter) && + (isProfitableToTailDup(BB, Succ, BestProb, Chain, BlockFilter))) { LLVM_DEBUG(dbgs() << " Candidate: " << getBlockName(Succ) << ", probability: " << DupProb << " (Tail Duplicate)\n"); @@ -1822,8 +1824,7 @@ MachineBasicBlock *MachineBlockPlacement::getFirstUnplacedBlock( } void MachineBlockPlacement::fillWorkLists( - const MachineBasicBlock *MBB, - SmallPtrSetImpl &UpdatedPreds, + const MachineBasicBlock *MBB, SmallPtrSetImpl &UpdatedPreds, const BlockFilterSet *BlockFilter = nullptr) { BlockChain &Chain = *BlockToChain[MBB]; if (!UpdatedPreds.insert(&Chain).second) @@ -1854,9 +1855,9 @@ void MachineBlockPlacement::fillWorkLists( BlockWorkList.push_back(BB); } -void MachineBlockPlacement::buildChain( - const MachineBasicBlock *HeadBB, BlockChain &Chain, - BlockFilterSet *BlockFilter) { +void MachineBlockPlacement::buildChain(const MachineBasicBlock *HeadBB, + BlockChain &Chain, + BlockFilterSet *BlockFilter) { assert(HeadBB && "BB must not be null.\n"); assert(BlockToChain[HeadBB] == &Chain && "BlockToChainMap mis-match.\n"); MachineFunction::iterator PrevUnplacedBlockIt = F->begin(); @@ -1872,16 +1873,14 @@ void MachineBlockPlacement::buildChain( assert(BlockToChain[BB] == &Chain && "BlockToChainMap mis-match in loop."); assert(*std::prev(Chain.end()) == BB && "BB Not found at end of chain."); - // Look for the best viable successor if there is one to place immediately // after this block. auto Result = selectBestSuccessor(BB, Chain, BlockFilter); - MachineBasicBlock* BestSucc = Result.BB; + MachineBasicBlock *BestSucc = Result.BB; bool ShouldTailDup = Result.ShouldTailDup; if (allowTailDupPlacement()) - ShouldTailDup |= (BestSucc && canTailDuplicateUnplacedPreds(BB, BestSucc, - Chain, - BlockFilter)); + ShouldTailDup |= (BestSucc && canTailDuplicateUnplacedPreds( + BB, BestSucc, Chain, BlockFilter)); // If an immediate successor isn't available, look for the best viable // block among those we've identified as not violating the loop's CFG at @@ -1918,8 +1917,8 @@ void MachineBlockPlacement::buildChain( // Place this block, updating the datastructures to reflect its placement. BlockChain &SuccChain = *BlockToChain[BestSucc]; - // Zero out UnscheduledPredecessors for the successor we're about to merge in case - // we selected a successor that didn't fit naturally into the CFG. + // Zero out UnscheduledPredecessors for the successor we're about to merge + // in case we selected a successor that didn't fit naturally into the CFG. SuccChain.UnscheduledPredecessors = 0; LLVM_DEBUG(dbgs() << "Merging from " << getBlockName(BB) << " to " << getBlockName(BestSucc) << "\n"); @@ -1946,10 +1945,8 @@ void MachineBlockPlacement::buildChain( // If BB is moved before OldTop, Pred needs a taken branch to BB, and it can't // layout the other successor below it, so it can't reduce taken branch. // In this case we keep its original layout. -bool -MachineBlockPlacement::canMoveBottomBlockToTop( - const MachineBasicBlock *BottomBlock, - const MachineBasicBlock *OldTop) { +bool MachineBlockPlacement::canMoveBottomBlockToTop( + const MachineBasicBlock *BottomBlock, const MachineBasicBlock *OldTop) { if (BottomBlock->pred_size() != 1) return true; MachineBasicBlock *Pred = *BottomBlock->pred_begin(); @@ -1967,9 +1964,8 @@ MachineBlockPlacement::canMoveBottomBlockToTop( // Find out the possible fall through frequence to the top of a loop. BlockFrequency -MachineBlockPlacement::TopFallThroughFreq( - const MachineBasicBlock *Top, - const BlockFilterSet &LoopBlockSet) { +MachineBlockPlacement::TopFallThroughFreq(const MachineBasicBlock *Top, + const BlockFilterSet &LoopBlockSet) { BlockFrequency MaxFreq = BlockFrequency(0); for (MachineBasicBlock *Pred : Top->predecessors()) { BlockChain *PredChain = BlockToChain[Pred]; @@ -1991,8 +1987,8 @@ MachineBlockPlacement::TopFallThroughFreq( } } if (TopOK) { - BlockFrequency EdgeFreq = MBFI->getBlockFreq(Pred) * - MBPI->getEdgeProbability(Pred, Top); + BlockFrequency EdgeFreq = + MBFI->getBlockFreq(Pred) * MBPI->getEdgeProbability(Pred, Top); if (EdgeFreq > MaxFreq) MaxFreq = EdgeFreq; } @@ -2022,19 +2018,16 @@ MachineBlockPlacement::TopFallThroughFreq( // |- // V // -BlockFrequency -MachineBlockPlacement::FallThroughGains( - const MachineBasicBlock *NewTop, - const MachineBasicBlock *OldTop, - const MachineBasicBlock *ExitBB, - const BlockFilterSet &LoopBlockSet) { +BlockFrequency MachineBlockPlacement::FallThroughGains( + const MachineBasicBlock *NewTop, const MachineBasicBlock *OldTop, + const MachineBasicBlock *ExitBB, const BlockFilterSet &LoopBlockSet) { BlockFrequency FallThrough2Top = TopFallThroughFreq(OldTop, LoopBlockSet); BlockFrequency FallThrough2Exit = BlockFrequency(0); if (ExitBB) - FallThrough2Exit = MBFI->getBlockFreq(NewTop) * - MBPI->getEdgeProbability(NewTop, ExitBB); - BlockFrequency BackEdgeFreq = MBFI->getBlockFreq(NewTop) * - MBPI->getEdgeProbability(NewTop, OldTop); + FallThrough2Exit = + MBFI->getBlockFreq(NewTop) * MBPI->getEdgeProbability(NewTop, ExitBB); + BlockFrequency BackEdgeFreq = + MBFI->getBlockFreq(NewTop) * MBPI->getEdgeProbability(NewTop, OldTop); // Find the best Pred of NewTop. MachineBasicBlock *BestPred = nullptr; @@ -2113,10 +2106,8 @@ MachineBlockPlacement::FallThroughGains( /// At the same time, move it before old top increases the taken branch /// to loop exit block, so the reduced taken branch will be compared with /// the increased taken branch to the loop exit block. -MachineBasicBlock * -MachineBlockPlacement::findBestLoopTopHelper( - MachineBasicBlock *OldTop, - const MachineLoop &L, +MachineBasicBlock *MachineBlockPlacement::findBestLoopTopHelper( + MachineBasicBlock *OldTop, const MachineLoop &L, const BlockFilterSet &LoopBlockSet) { // Check that the header hasn't been fused with a preheader block due to // crazy branches. If it has, we need to start with the header at the top to @@ -2153,8 +2144,8 @@ MachineBlockPlacement::findBestLoopTopHelper( if (!canMoveBottomBlockToTop(Pred, OldTop)) continue; - BlockFrequency Gains = FallThroughGains(Pred, OldTop, OtherBB, - LoopBlockSet); + BlockFrequency Gains = + FallThroughGains(Pred, OldTop, OtherBB, LoopBlockSet); if ((Gains > BlockFrequency(0)) && (Gains > BestGains || ((Gains == BestGains) && Pred->isLayoutSuccessor(OldTop)))) { @@ -2204,7 +2195,7 @@ MachineBlockPlacement::findBestLoopTop(const MachineLoop &L, OldTop = NewTop; NewTop = findBestLoopTopHelper(OldTop, L, LoopBlockSet); if (NewTop != OldTop) - ComputedEdges[NewTop] = { OldTop, false }; + ComputedEdges[NewTop] = {OldTop, false}; } return NewTop; } @@ -2336,10 +2327,8 @@ MachineBlockPlacement::findBestLoopExit(const MachineLoop &L, /// /// 1. Look for a Pred that can be layout before Top. /// 2. Check if Top is the most possible successor of Pred. -bool -MachineBlockPlacement::hasViableTopFallthrough( - const MachineBasicBlock *Top, - const BlockFilterSet &LoopBlockSet) { +bool MachineBlockPlacement::hasViableTopFallthrough( + const MachineBasicBlock *Top, const BlockFilterSet &LoopBlockSet) { for (MachineBasicBlock *Pred : Top->predecessors()) { BlockChain *PredChain = BlockToChain[Pred]; if (!LoopBlockSet.count(Pred) && @@ -2491,7 +2480,7 @@ void MachineBlockPlacement::rotateLoopWithProfile( if (!LoopBlockSet.count(Pred) && (!PredChain || Pred == *std::prev(PredChain->end()))) { auto EdgeFreq = MBFI->getBlockFreq(Pred) * - MBPI->getEdgeProbability(Pred, ChainHeaderBB); + MBPI->getEdgeProbability(Pred, ChainHeaderBB); auto FallThruCost = ScaleBlockFrequency(EdgeFreq, MisfetchCost); // If the predecessor has only an unconditional jump to the header, we // need to consider the cost of this jump. @@ -2951,12 +2940,16 @@ void MachineBlockPlacement::alignBlocks() { // exclusively on the loop info here so that we can align backedges in // unnatural CFGs and backedges that were introduced purely because of the // loop rotations done during this layout pass. - if (F->getFunction().hasMinSize() || - (F->getFunction().hasOptSize() && !TLI->alignLoopsWithOptSize())) - return; + if (!AlignAllBlock && !AlignAllNonFallThruBlocks) { + if (F->getFunction().hasMinSize() || + (F->getFunction().hasOptSize() && !TLI->alignLoopsWithOptSize())) + return; + } + BlockChain &FunctionChain = *BlockToChain[&F->front()]; + // Empty chain. if (FunctionChain.begin() == FunctionChain.end()) - return; // Empty chain. + return; const BranchProbability ColdProb(1, 5); // 20% BlockFrequency EntryFreq = MBFI->getBlockFreq(&F->front()); @@ -3052,6 +3045,33 @@ void MachineBlockPlacement::alignBlocks() { DetermineMaxAlignmentPadding(); } } + + const bool HasMaxBytesOverride = + MaxBytesForAlignmentOverride.getNumOccurrences() > 0; + + if (AlignAllBlock) + // Align all of the blocks in the function to a specific alignment. + for (MachineBasicBlock &MBB : *F) { + if (HasMaxBytesOverride) + MBB.setAlignment(Align(1ULL << AlignAllBlock), + MaxBytesForAlignmentOverride); + else + MBB.setAlignment(Align(1ULL << AlignAllBlock)); + } + else if (AlignAllNonFallThruBlocks) { + // Align all of the blocks that have no fall-through predecessors to a + // specific alignment. + for (auto MBI = std::next(F->begin()), MBE = F->end(); MBI != MBE; ++MBI) { + auto LayoutPred = std::prev(MBI); + if (!LayoutPred->isSuccessor(&*MBI)) { + if (HasMaxBytesOverride) + MBI->setAlignment(Align(1ULL << AlignAllNonFallThruBlocks), + MaxBytesForAlignmentOverride); + else + MBI->setAlignment(Align(1ULL << AlignAllNonFallThruBlocks)); + } + } + } } /// Tail duplicate \p BB into (some) predecessors if profitable, repeating if @@ -3142,67 +3162,66 @@ bool MachineBlockPlacement::maybeTailDuplicateBlock( // This has to be a callback because none of it can be done after // BB is deleted. bool Removed = false; - auto RemovalCallback = - [&](MachineBasicBlock *RemBB) { - // Signal to outer function - Removed = true; - - // Conservative default. - bool InWorkList = true; - // Remove from the Chain and Chain Map - if (BlockToChain.count(RemBB)) { - BlockChain *Chain = BlockToChain[RemBB]; - InWorkList = Chain->UnscheduledPredecessors == 0; - Chain->remove(RemBB); - BlockToChain.erase(RemBB); - } - - // Handle the unplaced block iterator - if (&(*PrevUnplacedBlockIt) == RemBB) { - PrevUnplacedBlockIt++; - } - - // Handle the Work Lists - if (InWorkList) { - SmallVectorImpl &RemoveList = BlockWorkList; - if (RemBB->isEHPad()) - RemoveList = EHPadWorkList; - llvm::erase(RemoveList, RemBB); - } - - // Handle the filter set - if (BlockFilter) { - auto It = llvm::find(*BlockFilter, RemBB); - // Erase RemBB from BlockFilter, and keep PrevUnplacedBlockInFilterIt - // pointing to the same element as before. - if (It != BlockFilter->end()) { - if (It < PrevUnplacedBlockInFilterIt) { - const MachineBasicBlock *PrevBB = *PrevUnplacedBlockInFilterIt; - // BlockFilter is a SmallVector so all elements after RemBB are - // shifted to the front by 1 after its deletion. - auto Distance = PrevUnplacedBlockInFilterIt - It - 1; - PrevUnplacedBlockInFilterIt = BlockFilter->erase(It) + Distance; - assert(*PrevUnplacedBlockInFilterIt == PrevBB); - (void)PrevBB; - } else if (It == PrevUnplacedBlockInFilterIt) - // The block pointed by PrevUnplacedBlockInFilterIt is erased, we - // have to set it to the next element. - PrevUnplacedBlockInFilterIt = BlockFilter->erase(It); - else - BlockFilter->erase(It); - } - } + auto RemovalCallback = [&](MachineBasicBlock *RemBB) { + // Signal to outer function + Removed = true; + + // Conservative default. + bool InWorkList = true; + // Remove from the Chain and Chain Map + if (BlockToChain.count(RemBB)) { + BlockChain *Chain = BlockToChain[RemBB]; + InWorkList = Chain->UnscheduledPredecessors == 0; + Chain->remove(RemBB); + BlockToChain.erase(RemBB); + } + + // Handle the unplaced block iterator + if (&(*PrevUnplacedBlockIt) == RemBB) { + PrevUnplacedBlockIt++; + } + + // Handle the Work Lists + if (InWorkList) { + SmallVectorImpl &RemoveList = BlockWorkList; + if (RemBB->isEHPad()) + RemoveList = EHPadWorkList; + llvm::erase(RemoveList, RemBB); + } + + // Handle the filter set + if (BlockFilter) { + auto It = llvm::find(*BlockFilter, RemBB); + // Erase RemBB from BlockFilter, and keep PrevUnplacedBlockInFilterIt + // pointing to the same element as before. + if (It != BlockFilter->end()) { + if (It < PrevUnplacedBlockInFilterIt) { + const MachineBasicBlock *PrevBB = *PrevUnplacedBlockInFilterIt; + // BlockFilter is a SmallVector so all elements after RemBB are + // shifted to the front by 1 after its deletion. + auto Distance = PrevUnplacedBlockInFilterIt - It - 1; + PrevUnplacedBlockInFilterIt = BlockFilter->erase(It) + Distance; + assert(*PrevUnplacedBlockInFilterIt == PrevBB); + (void)PrevBB; + } else if (It == PrevUnplacedBlockInFilterIt) + // The block pointed by PrevUnplacedBlockInFilterIt is erased, we + // have to set it to the next element. + PrevUnplacedBlockInFilterIt = BlockFilter->erase(It); + else + BlockFilter->erase(It); + } + } - // Remove the block from loop info. - MLI->removeBlock(RemBB); - if (RemBB == PreferredLoopExit) - PreferredLoopExit = nullptr; + // Remove the block from loop info. + MLI->removeBlock(RemBB); + if (RemBB == PreferredLoopExit) + PreferredLoopExit = nullptr; - LLVM_DEBUG(dbgs() << "TailDuplicator deleted block: " - << getBlockName(RemBB) << "\n"); - }; + LLVM_DEBUG(dbgs() << "TailDuplicator deleted block: " << getBlockName(RemBB) + << "\n"); + }; auto RemovalCallbackRef = - function_ref(RemovalCallback); + function_ref(RemovalCallback); SmallVector DuplicatedPreds; bool IsSimple = TailDup.isSimpleBB(BB); @@ -3223,11 +3242,11 @@ bool MachineBlockPlacement::maybeTailDuplicateBlock( DuplicatedToLPred = false; for (MachineBasicBlock *Pred : DuplicatedPreds) { // We're only looking for unscheduled predecessors that match the filter. - BlockChain* PredChain = BlockToChain[Pred]; + BlockChain *PredChain = BlockToChain[Pred]; if (Pred == LPred) DuplicatedToLPred = true; - if (Pred == LPred || (BlockFilter && !BlockFilter->count(Pred)) - || PredChain == &Chain) + if (Pred == LPred || (BlockFilter && !BlockFilter->count(Pred)) || + PredChain == &Chain) continue; for (MachineBasicBlock *NewSucc : Pred->successors()) { if (BlockFilter && !BlockFilter->count(NewSucc)) @@ -3297,8 +3316,7 @@ bool MachineBlockPlacement::isBestSuccessor(MachineBasicBlock *BB, // Find out the predecessors of BB and BB can be beneficially duplicated into // them. void MachineBlockPlacement::findDuplicateCandidates( - SmallVectorImpl &Candidates, - MachineBasicBlock *BB, + SmallVectorImpl &Candidates, MachineBasicBlock *BB, BlockFilterSet *BlockFilter) { MachineBasicBlock *Fallthrough = nullptr; BranchProbability DefaultBranchProb = BranchProbability::getZero(); @@ -3407,31 +3425,53 @@ void MachineBlockPlacement::findDuplicateCandidates( } } -void MachineBlockPlacement::initDupThreshold() { +void MachineBlockPlacement::initTailDupThreshold() { DupThreshold = BlockFrequency(0); - if (!F->getFunction().hasProfileData()) - return; + if (F->getFunction().hasProfileData()) { + // We prefer to use prifile count. + uint64_t HotThreshold = PSI->getOrCompHotCountThreshold(); + if (HotThreshold != UINT64_MAX) { + UseProfileCount = true; + DupThreshold = + BlockFrequency(HotThreshold * TailDupProfilePercentThreshold / 100); + } else { + // Profile count is not available, we can use block frequency instead. + BlockFrequency MaxFreq = BlockFrequency(0); + for (MachineBasicBlock &MBB : *F) { + BlockFrequency Freq = MBFI->getBlockFreq(&MBB); + if (Freq > MaxFreq) + MaxFreq = Freq; + } - // We prefer to use prifile count. - uint64_t HotThreshold = PSI->getOrCompHotCountThreshold(); - if (HotThreshold != UINT64_MAX) { - UseProfileCount = true; - DupThreshold = - BlockFrequency(HotThreshold * TailDupProfilePercentThreshold / 100); - return; + BranchProbability ThresholdProb(TailDupPlacementPenalty, 100); + DupThreshold = BlockFrequency(MaxFreq * ThresholdProb); + UseProfileCount = false; + } } - // Profile count is not available, we can use block frequency instead. - BlockFrequency MaxFreq = BlockFrequency(0); - for (MachineBasicBlock &MBB : *F) { - BlockFrequency Freq = MBFI->getBlockFreq(&MBB); - if (Freq > MaxFreq) - MaxFreq = Freq; + TailDupSize = TailDupPlacementThreshold; + // If only the aggressive threshold is explicitly set, use it. + if (TailDupPlacementAggressiveThreshold.getNumOccurrences() != 0 && + TailDupPlacementThreshold.getNumOccurrences() == 0) + TailDupSize = TailDupPlacementAggressiveThreshold; + + // For aggressive optimization, we can adjust some thresholds to be less + // conservative. + if (PassConfig->getOptLevel() >= CodeGenOptLevel::Aggressive) { + // At O3 we should be more willing to copy blocks for tail duplication. This + // increases size pressure, so we only do it at O3 + // Do this unless only the regular threshold is explicitly set. + if (TailDupPlacementThreshold.getNumOccurrences() == 0 || + TailDupPlacementAggressiveThreshold.getNumOccurrences() != 0) + TailDupSize = TailDupPlacementAggressiveThreshold; } - BranchProbability ThresholdProb(TailDupPlacementPenalty, 100); - DupThreshold = BlockFrequency(MaxFreq * ThresholdProb); - UseProfileCount = false; + // If there's no threshold provided through options, query the target + // information for a threshold instead. + if (TailDupPlacementThreshold.getNumOccurrences() == 0 && + (PassConfig->getOptLevel() < CodeGenOptLevel::Aggressive || + TailDupPlacementAggressiveThreshold.getNumOccurrences() == 0)) + TailDupSize = TII->getTailDuplicateSize(PassConfig->getOptLevel()); } bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) { @@ -3451,8 +3491,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) { TLI = MF.getSubtarget().getTargetLowering(); MPDT = nullptr; PSI = &getAnalysis().getPSI(); - - initDupThreshold(); + PassConfig = &getAnalysis(); // Initialize PreferredLoopExit to nullptr here since it may never be set if // there are no MachineLoops. @@ -3463,38 +3502,17 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) { assert(ComputedEdges.empty() && "Computed Edge map should be empty before starting placement."); - unsigned TailDupSize = TailDupPlacementThreshold; - // If only the aggressive threshold is explicitly set, use it. - if (TailDupPlacementAggressiveThreshold.getNumOccurrences() != 0 && - TailDupPlacementThreshold.getNumOccurrences() == 0) - TailDupSize = TailDupPlacementAggressiveThreshold; - - TargetPassConfig *PassConfig = &getAnalysis(); - // For aggressive optimization, we can adjust some thresholds to be less - // conservative. - if (PassConfig->getOptLevel() >= CodeGenOptLevel::Aggressive) { - // At O3 we should be more willing to copy blocks for tail duplication. This - // increases size pressure, so we only do it at O3 - // Do this unless only the regular threshold is explicitly set. - if (TailDupPlacementThreshold.getNumOccurrences() == 0 || - TailDupPlacementAggressiveThreshold.getNumOccurrences() != 0) - TailDupSize = TailDupPlacementAggressiveThreshold; - } - - // If there's no threshold provided through options, query the target - // information for a threshold instead. - if (TailDupPlacementThreshold.getNumOccurrences() == 0 && - (PassConfig->getOptLevel() < CodeGenOptLevel::Aggressive || - TailDupPlacementAggressiveThreshold.getNumOccurrences() == 0)) - TailDupSize = TII->getTailDuplicateSize(PassConfig->getOptLevel()); + // Initialize tail duplication thresholds. + initTailDupThreshold(); + // Apply tail duplication. if (allowTailDupPlacement()) { MPDT = &getAnalysis().getPostDomTree(); bool OptForSize = MF.getFunction().hasOptSize() || llvm::shouldOptimizeForSize(&MF, PSI, &MBFI->getMBFI()); if (OptForSize) TailDupSize = 1; - bool PreRegAlloc = false; + const bool PreRegAlloc = false; TailDup.initMF(MF, PreRegAlloc, MBPI, MBFI.get(), PSI, /* LayoutMode */ true, TailDupSize); precomputeTriangleChains(); @@ -3505,12 +3523,12 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) { // Changing the layout can create new tail merging opportunities. // TailMerge can create jump into if branches that make CFG irreducible for // HW that requires structured CFG. - bool EnableTailMerge = !MF.getTarget().requiresStructuredCFG() && - PassConfig->getEnableTailMerge() && - BranchFoldPlacement; + const bool EnableTailMerge = !MF.getTarget().requiresStructuredCFG() && + PassConfig->getEnableTailMerge() && + BranchFoldPlacement && MF.size() > 3; // No tail merging opportunities if the block number is less than four. - if (MF.size() > 3 && EnableTailMerge) { - unsigned TailMergeSize = TailDupSize + 1; + if (EnableTailMerge) { + const unsigned TailMergeSize = TailDupSize + 1; BranchFolder BF(/*DefaultEnableTailMerge=*/true, /*CommonHoist=*/false, *MBFI, *MBPI, PSI, TailMergeSize); @@ -3545,32 +3563,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) { ComputedEdges.clear(); ChainAllocator.DestroyAll(); - bool HasMaxBytesOverride = - MaxBytesForAlignmentOverride.getNumOccurrences() > 0; - - if (AlignAllBlock) - // Align all of the blocks in the function to a specific alignment. - for (MachineBasicBlock &MBB : MF) { - if (HasMaxBytesOverride) - MBB.setAlignment(Align(1ULL << AlignAllBlock), - MaxBytesForAlignmentOverride); - else - MBB.setAlignment(Align(1ULL << AlignAllBlock)); - } - else if (AlignAllNonFallThruBlocks) { - // Align all of the blocks that have no fall-through predecessors to a - // specific alignment. - for (auto MBI = std::next(MF.begin()), MBE = MF.end(); MBI != MBE; ++MBI) { - auto LayoutPred = std::prev(MBI); - if (!LayoutPred->isSuccessor(&*MBI)) { - if (HasMaxBytesOverride) - MBI->setAlignment(Align(1ULL << AlignAllNonFallThruBlocks), - MaxBytesForAlignmentOverride); - else - MBI->setAlignment(Align(1ULL << AlignAllNonFallThruBlocks)); - } - } - } + // View the function. if (ViewBlockLayoutWithBFI != GVDT_None && (ViewBlockFreqFuncName.empty() || F->getFunction().getName() == ViewBlockFreqFuncName)) { @@ -3705,7 +3698,7 @@ void MachineBlockPlacement::assignBlockOrder( #ifndef NDEBUG // Make sure we correctly constructed all branches. - F->verify(this, "After optimized block reordering"); + F->verify(this, "After optimized block reordering", &errs()); #endif } diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp index 4e6d34346b1d8..9b2862de22b69 100644 --- a/llvm/lib/CodeGen/MachineScheduler.cpp +++ b/llvm/lib/CodeGen/MachineScheduler.cpp @@ -453,7 +453,7 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { if (VerifyScheduling) { LLVM_DEBUG(LIS->dump()); - MF->verify(this, "Before machine scheduling."); + MF->verify(this, "Before machine scheduling.", &errs()); } RegClassInfo->runOnMachineFunction(*MF); @@ -472,7 +472,7 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { LLVM_DEBUG(LIS->dump()); if (VerifyScheduling) - MF->verify(this, "After machine scheduling."); + MF->verify(this, "After machine scheduling.", &errs()); return true; } @@ -496,7 +496,7 @@ bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) { AA = &getAnalysis().getAAResults(); if (VerifyScheduling) - MF->verify(this, "Before post machine scheduling."); + MF->verify(this, "Before post machine scheduling.", &errs()); // Instantiate the selected scheduler for this target, function, and // optimization level. @@ -512,7 +512,7 @@ bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) { scheduleRegions(*Scheduler, true); if (VerifyScheduling) - MF->verify(this, "After post machine scheduling."); + MF->verify(this, "After post machine scheduling.", &errs()); return true; } diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp index 609f9af9767f5..658ebd47488c7 100644 --- a/llvm/lib/CodeGen/MachineSink.cpp +++ b/llvm/lib/CodeGen/MachineSink.cpp @@ -2152,8 +2152,9 @@ bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB, MachineBasicBlock::iterator InsertPos = SuccBB->SkipPHIsAndLabels(SuccBB->begin()); if (blockPrologueInterferes(SuccBB, InsertPos, MI, TRI, TII, nullptr)) { - LLVM_DEBUG( - dbgs() << " *** Not sinking: prologue interference\n"); + LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, + TRI); + LLVM_DEBUG(dbgs() << " *** Not sinking: prologue interference\n"); continue; } diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp index e1295ec8ea6e9..24a0f41775cc1 100644 --- a/llvm/lib/CodeGen/MachineVerifier.cpp +++ b/llvm/lib/CodeGen/MachineVerifier.cpp @@ -94,21 +94,24 @@ using namespace llvm; namespace { struct MachineVerifier { - MachineVerifier(MachineFunctionAnalysisManager &MFAM, const char *b) - : MFAM(&MFAM), Banner(b) {} + MachineVerifier(MachineFunctionAnalysisManager &MFAM, const char *b, + raw_ostream *OS) + : MFAM(&MFAM), OS(OS ? *OS : nulls()), Banner(b) {} - MachineVerifier(Pass *pass, const char *b) : PASS(pass), Banner(b) {} + MachineVerifier(Pass *pass, const char *b, raw_ostream *OS) + : PASS(pass), OS(OS ? *OS : nulls()), Banner(b) {} MachineVerifier(const char *b, LiveVariables *LiveVars, LiveIntervals *LiveInts, LiveStacks *LiveStks, - SlotIndexes *Indexes) - : Banner(b), LiveVars(LiveVars), LiveInts(LiveInts), LiveStks(LiveStks), - Indexes(Indexes) {} + SlotIndexes *Indexes, raw_ostream *OS) + : OS(OS ? *OS : nulls()), Banner(b), LiveVars(LiveVars), + LiveInts(LiveInts), LiveStks(LiveStks), Indexes(Indexes) {} unsigned verify(const MachineFunction &MF); MachineFunctionAnalysisManager *MFAM = nullptr; Pass *const PASS = nullptr; + raw_ostream &OS; const char *Banner; const MachineFunction *MF = nullptr; const TargetMachine *TM = nullptr; @@ -334,7 +337,8 @@ namespace { MachineFunctionProperties::Property::FailsVerification)) return false; - unsigned FoundErrors = MachineVerifier(this, Banner.c_str()).verify(MF); + unsigned FoundErrors = + MachineVerifier(this, Banner.c_str(), &errs()).verify(MF); if (FoundErrors) report_fatal_error("Found "+Twine(FoundErrors)+" machine code errors."); return false; @@ -352,7 +356,8 @@ MachineVerifierPass::run(MachineFunction &MF, if (MF.getProperties().hasProperty( MachineFunctionProperties::Property::FailsVerification)) return PreservedAnalyses::all(); - unsigned FoundErrors = MachineVerifier(MFAM, Banner.c_str()).verify(MF); + unsigned FoundErrors = + MachineVerifier(MFAM, Banner.c_str(), &errs()).verify(MF); if (FoundErrors) report_fatal_error("Found " + Twine(FoundErrors) + " machine code errors."); return PreservedAnalyses::all(); @@ -374,25 +379,28 @@ void llvm::verifyMachineFunction(const std::string &Banner, // LiveIntervals *LiveInts; // LiveStacks *LiveStks; // SlotIndexes *Indexes; - unsigned FoundErrors = MachineVerifier(nullptr, Banner.c_str()).verify(MF); + unsigned FoundErrors = + MachineVerifier(nullptr, Banner.c_str(), &errs()).verify(MF); if (FoundErrors) report_fatal_error("Found " + Twine(FoundErrors) + " machine code errors."); } -bool MachineFunction::verify(Pass *p, const char *Banner, bool AbortOnErrors) - const { +bool MachineFunction::verify(Pass *p, const char *Banner, raw_ostream *OS, + bool AbortOnErrors) const { MachineFunction &MF = const_cast(*this); - unsigned FoundErrors = MachineVerifier(p, Banner).verify(MF); + unsigned FoundErrors = MachineVerifier(p, Banner, OS).verify(MF); if (AbortOnErrors && FoundErrors) report_fatal_error("Found "+Twine(FoundErrors)+" machine code errors."); return FoundErrors == 0; } bool MachineFunction::verify(LiveIntervals *LiveInts, SlotIndexes *Indexes, - const char *Banner, bool AbortOnErrors) const { + const char *Banner, raw_ostream *OS, + bool AbortOnErrors) const { MachineFunction &MF = const_cast(*this); unsigned FoundErrors = - MachineVerifier(Banner, nullptr, LiveInts, nullptr, Indexes).verify(MF); + MachineVerifier(Banner, nullptr, LiveInts, nullptr, Indexes, OS) + .verify(MF); if (AbortOnErrors && FoundErrors) report_fatal_error("Found " + Twine(FoundErrors) + " machine code errors."); return FoundErrors == 0; @@ -482,7 +490,7 @@ unsigned MachineVerifier::verify(const MachineFunction &MF) { for (const MachineInstr &MI : MBB.instrs()) { if (MI.getParent() != &MBB) { report("Bad instruction parent pointer", &MBB); - errs() << "Instruction: " << MI; + OS << "Instruction: " << MI; continue; } @@ -540,46 +548,48 @@ unsigned MachineVerifier::verify(const MachineFunction &MF) { void MachineVerifier::report(const char *msg, const MachineFunction *MF) { assert(MF); - errs() << '\n'; + OS << '\n'; if (!foundErrors++) { if (Banner) - errs() << "# " << Banner << '\n'; + OS << "# " << Banner << '\n'; + if (LiveInts != nullptr) - LiveInts->print(errs()); + LiveInts->print(OS); else - MF->print(errs(), Indexes); + MF->print(OS, Indexes); } - errs() << "*** Bad machine code: " << msg << " ***\n" - << "- function: " << MF->getName() << "\n"; + + OS << "*** Bad machine code: " << msg << " ***\n" + << "- function: " << MF->getName() << '\n'; } void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB) { assert(MBB); report(msg, MBB->getParent()); - errs() << "- basic block: " << printMBBReference(*MBB) << ' ' - << MBB->getName() << " (" << (const void *)MBB << ')'; + OS << "- basic block: " << printMBBReference(*MBB) << ' ' << MBB->getName() + << " (" << (const void *)MBB << ')'; if (Indexes) - errs() << " [" << Indexes->getMBBStartIdx(MBB) - << ';' << Indexes->getMBBEndIdx(MBB) << ')'; - errs() << '\n'; + OS << " [" << Indexes->getMBBStartIdx(MBB) << ';' + << Indexes->getMBBEndIdx(MBB) << ')'; + OS << '\n'; } void MachineVerifier::report(const char *msg, const MachineInstr *MI) { assert(MI); report(msg, MI->getParent()); - errs() << "- instruction: "; + OS << "- instruction: "; if (Indexes && Indexes->hasIndex(*MI)) - errs() << Indexes->getInstructionIndex(*MI) << '\t'; - MI->print(errs(), /*IsStandalone=*/true); + OS << Indexes->getInstructionIndex(*MI) << '\t'; + MI->print(OS, /*IsStandalone=*/true); } void MachineVerifier::report(const char *msg, const MachineOperand *MO, unsigned MONum, LLT MOVRegType) { assert(MO); report(msg, MO->getParent()); - errs() << "- operand " << MONum << ": "; - MO->print(errs(), MOVRegType, TRI); - errs() << "\n"; + OS << "- operand " << MONum << ": "; + MO->print(OS, MOVRegType, TRI); + OS << '\n'; } void MachineVerifier::report(const Twine &Msg, const MachineInstr *MI) { @@ -587,11 +597,11 @@ void MachineVerifier::report(const Twine &Msg, const MachineInstr *MI) { } void MachineVerifier::report_context(SlotIndex Pos) const { - errs() << "- at: " << Pos << '\n'; + OS << "- at: " << Pos << '\n'; } void MachineVerifier::report_context(const LiveInterval &LI) const { - errs() << "- interval: " << LI << '\n'; + OS << "- interval: " << LI << '\n'; } void MachineVerifier::report_context(const LiveRange &LR, Register VRegUnit, @@ -603,35 +613,35 @@ void MachineVerifier::report_context(const LiveRange &LR, Register VRegUnit, } void MachineVerifier::report_context(const LiveRange::Segment &S) const { - errs() << "- segment: " << S << '\n'; + OS << "- segment: " << S << '\n'; } void MachineVerifier::report_context(const VNInfo &VNI) const { - errs() << "- ValNo: " << VNI.id << " (def " << VNI.def << ")\n"; + OS << "- ValNo: " << VNI.id << " (def " << VNI.def << ")\n"; } void MachineVerifier::report_context_liverange(const LiveRange &LR) const { - errs() << "- liverange: " << LR << '\n'; + OS << "- liverange: " << LR << '\n'; } void MachineVerifier::report_context(MCPhysReg PReg) const { - errs() << "- p. register: " << printReg(PReg, TRI) << '\n'; + OS << "- p. register: " << printReg(PReg, TRI) << '\n'; } void MachineVerifier::report_context_vreg(Register VReg) const { - errs() << "- v. register: " << printReg(VReg, TRI) << '\n'; + OS << "- v. register: " << printReg(VReg, TRI) << '\n'; } void MachineVerifier::report_context_vreg_regunit(Register VRegOrUnit) const { if (VRegOrUnit.isVirtual()) { report_context_vreg(VRegOrUnit); } else { - errs() << "- regunit: " << printRegUnit(VRegOrUnit, TRI) << '\n'; + OS << "- regunit: " << printRegUnit(VRegOrUnit, TRI) << '\n'; } } void MachineVerifier::report_context_lanemask(LaneBitmask LaneMask) const { - errs() << "- lanemask: " << PrintLaneMask(LaneMask) << '\n'; + OS << "- lanemask: " << PrintLaneMask(LaneMask) << '\n'; } void MachineVerifier::markReachable(const MachineBasicBlock *MBB) { @@ -710,8 +720,8 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { report("MBB has successor that isn't part of the function.", MBB); if (!MBBInfoMap[succ].Preds.count(MBB)) { report("Inconsistent CFG", MBB); - errs() << "MBB is not in the predecessor list of the successor " - << printMBBReference(*succ) << ".\n"; + OS << "MBB is not in the predecessor list of the successor " + << printMBBReference(*succ) << ".\n"; } } @@ -721,8 +731,8 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { report("MBB has predecessor that isn't part of the function.", MBB); if (!MBBInfoMap[Pred].Succs.count(MBB)) { report("Inconsistent CFG", MBB); - errs() << "MBB is not in the successor list of the predecessor " - << printMBBReference(*Pred) << ".\n"; + OS << "MBB is not in the successor list of the predecessor " + << printMBBReference(*Pred) << ".\n"; } } @@ -880,7 +890,7 @@ void MachineVerifier::visitMachineBundleBefore(const MachineInstr *MI) { SlotIndex idx = Indexes->getInstructionIndex(*MI); if (!(idx > lastIndex)) { report("Instruction index out of order", MI); - errs() << "Last instruction was at " << lastIndex << '\n'; + OS << "Last instruction was at " << lastIndex << '\n'; } lastIndex = idx; } @@ -894,7 +904,7 @@ void MachineVerifier::visitMachineBundleBefore(const MachineInstr *MI) { // precede non-terminators. if (FirstTerminator->getOpcode() != TargetOpcode::G_INVOKE_REGION_START) { report("Non-terminator instruction after the first terminator", MI); - errs() << "First terminator was:\t" << *FirstTerminator; + OS << "First terminator was:\t" << *FirstTerminator; } } } @@ -2185,8 +2195,8 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) { const MCInstrDesc &MCID = MI->getDesc(); if (MI->getNumOperands() < MCID.getNumOperands()) { report("Too few operands", MI); - errs() << MCID.getNumOperands() << " operands expected, but " - << MI->getNumOperands() << " given.\n"; + OS << MCID.getNumOperands() << " operands expected, but " + << MI->getNumOperands() << " given.\n"; } if (MI->getFlag(MachineInstr::NoConvergent) && !MCID.isConvergent()) @@ -2278,7 +2288,7 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) { // If both types are valid, check that the types are the same. if (SrcTy != DstTy) { report("Copy Instruction is illegal with mismatching types", MI); - errs() << "Def = " << DstTy << ", Src = " << SrcTy << "\n"; + OS << "Def = " << DstTy << ", Src = " << SrcTy << '\n'; } break; @@ -2322,8 +2332,7 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) { if (SrcSize.isNonZero() && DstSize.isNonZero() && SrcSize != DstSize) { if (!DstOp.getSubReg() && !SrcOp.getSubReg()) { report("Copy Instruction is illegal with mismatching sizes", MI); - errs() << "Def Size = " << DstSize << ", Src Size = " << SrcSize - << "\n"; + OS << "Def Size = " << DstSize << ", Src Size = " << SrcSize << '\n'; } } break; @@ -2554,8 +2563,8 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { TII->getRegClass(MCID, MONum, TRI, *MF)) { if (!DRC->contains(Reg)) { report("Illegal physical register for instruction", MO, MONum); - errs() << printReg(Reg, TRI) << " is not a " - << TRI->getRegClassName(DRC) << " register.\n"; + OS << printReg(Reg, TRI) << " is not a " + << TRI->getRegClassName(DRC) << " register.\n"; } } } @@ -2618,9 +2627,9 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { RBI->getMaximumSize(RegBank->getID()) < Ty.getSizeInBits()) { report("Register bank is too small for virtual register", MO, MONum); - errs() << "Register bank " << RegBank->getName() << " too small(" - << RBI->getMaximumSize(RegBank->getID()) << ") to fit " - << Ty.getSizeInBits() << "-bits\n"; + OS << "Register bank " << RegBank->getName() << " too small(" + << RBI->getMaximumSize(RegBank->getID()) << ") to fit " + << Ty.getSizeInBits() << "-bits\n"; return; } } @@ -2639,10 +2648,9 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { TII->getRegClass(MCID, MONum, TRI, *MF)) { report("Virtual register does not match instruction constraint", MO, MONum); - errs() << "Expect register class " - << TRI->getRegClassName( - TII->getRegClass(MCID, MONum, TRI, *MF)) - << " but got nothing\n"; + OS << "Expect register class " + << TRI->getRegClassName(TII->getRegClass(MCID, MONum, TRI, *MF)) + << " but got nothing\n"; return; } @@ -2653,14 +2661,14 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { TRI->getSubClassWithSubReg(RC, SubIdx); if (!SRC) { report("Invalid subregister index for virtual register", MO, MONum); - errs() << "Register class " << TRI->getRegClassName(RC) - << " does not support subreg index " << SubIdx << "\n"; + OS << "Register class " << TRI->getRegClassName(RC) + << " does not support subreg index " << SubIdx << '\n'; return; } if (RC != SRC) { report("Invalid register class for subregister index", MO, MONum); - errs() << "Register class " << TRI->getRegClassName(RC) - << " does not fully support subreg index " << SubIdx << "\n"; + OS << "Register class " << TRI->getRegClassName(RC) + << " does not fully support subreg index " << SubIdx << '\n'; return; } } @@ -2682,9 +2690,9 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { } if (!RC->hasSuperClassEq(DRC)) { report("Illegal virtual register for instruction", MO, MONum); - errs() << "Expected a " << TRI->getRegClassName(DRC) - << " register, but got a " << TRI->getRegClassName(RC) - << " register\n"; + OS << "Expected a " << TRI->getRegClassName(DRC) + << " register, but got a " << TRI->getRegClassName(RC) + << " register\n"; } } } @@ -2733,11 +2741,11 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { } if (loads && !LI.liveAt(Idx.getRegSlot(true))) { report("Instruction loads from dead spill slot", MO, MONum); - errs() << "Live stack: " << LI << '\n'; + OS << "Live stack: " << LI << '\n'; } if (stores && !LI.liveAt(Idx.getRegSlot())) { report("Instruction stores to dead spill slot", MO, MONum); - errs() << "Live stack: " << LI << '\n'; + OS << "Live stack: " << LI << '\n'; } } break; @@ -3050,8 +3058,8 @@ MachineVerifier::visitMachineBasicBlockAfter(const MachineBasicBlock *MBB) { SlotIndex stop = Indexes->getMBBEndIdx(MBB); if (!(stop > lastIndex)) { report("Block ends before last instruction index", MBB); - errs() << "Block ends at " << stop - << " last instruction was at " << lastIndex << '\n'; + OS << "Block ends at " << stop << " last instruction was at " << lastIndex + << '\n'; } lastIndex = stop; } @@ -3296,8 +3304,8 @@ void MachineVerifier::checkPHIOps(const MachineBasicBlock &MBB) { for (MachineBasicBlock *Pred : MBB.predecessors()) { if (!seen.count(Pred)) { report("Missing PHI operand", &Phi); - errs() << printMBBReference(*Pred) - << " is a predecessor according to the CFG.\n"; + OS << printMBBReference(*Pred) + << " is a predecessor according to the CFG.\n"; } } } @@ -3306,9 +3314,10 @@ void MachineVerifier::checkPHIOps(const MachineBasicBlock &MBB) { static void verifyConvergenceControl(const MachineFunction &MF, MachineDominatorTree &DT, - std::function FailureCB) { + std::function FailureCB, + raw_ostream &OS) { MachineConvergenceVerifier CV; - CV.initialize(&errs(), FailureCB, MF); + CV.initialize(&OS, FailureCB, MF); for (const auto &MBB : MF) { CV.visit(MBB); @@ -3326,7 +3335,7 @@ void MachineVerifier::visitMachineFunctionAfter() { auto FailureCB = [this](const Twine &Message) { report(Message.str().c_str(), MF); }; - verifyConvergenceControl(*MF, DT, FailureCB); + verifyConvergenceControl(*MF, DT, FailureCB, OS); calcRegsPassed(); @@ -3342,8 +3351,8 @@ void MachineVerifier::visitMachineFunctionAfter() { for (Register VReg : MInfo.vregsRequired) if (MInfo.regsKilled.count(VReg)) { report("Virtual register killed in block, but needed live out.", &MBB); - errs() << "Virtual register " << printReg(VReg) - << " is used after the block.\n"; + OS << "Virtual register " << printReg(VReg) + << " is used after the block.\n"; } } @@ -3379,9 +3388,8 @@ void MachineVerifier::visitMachineFunctionAfter() { if (!PInfo.regsLiveOut.count(LiveInReg)) { report("Live in register not found to be live out from predecessor.", &MBB); - errs() << TRI->getName(LiveInReg) - << " not found to be live out from " - << printMBBReference(*Pred) << "\n"; + OS << TRI->getName(LiveInReg) << " not found to be live out from " + << printMBBReference(*Pred) << '\n'; } } } @@ -3418,14 +3426,14 @@ void MachineVerifier::verifyLiveVariables() { if (MInfo.vregsRequired.count(Reg)) { if (!VI.AliveBlocks.test(MBB.getNumber())) { report("LiveVariables: Block missing from AliveBlocks", &MBB); - errs() << "Virtual register " << printReg(Reg) - << " must be live through the block.\n"; + OS << "Virtual register " << printReg(Reg) + << " must be live through the block.\n"; } } else { if (VI.AliveBlocks.test(MBB.getNumber())) { report("LiveVariables: Block should not be in AliveBlocks", &MBB); - errs() << "Virtual register " << printReg(Reg) - << " is not needed live through the block.\n"; + OS << "Virtual register " << printReg(Reg) + << " is not needed live through the block.\n"; } } } @@ -3443,7 +3451,7 @@ void MachineVerifier::verifyLiveIntervals() { if (!LiveInts->hasInterval(Reg)) { report("Missing live interval for virtual register", MF); - errs() << printReg(Reg, TRI) << " still has defs or uses\n"; + OS << printReg(Reg, TRI) << " still has defs or uses\n"; continue; } @@ -3755,9 +3763,9 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR, report("Register not marked live out of predecessor", Pred); report_context(LR, Reg, LaneMask); report_context(*VNI); - errs() << " live into " << printMBBReference(*MFI) << '@' - << LiveInts->getMBBStartIdx(&*MFI) << ", not live before " - << PEnd << '\n'; + OS << " live into " << printMBBReference(*MFI) << '@' + << LiveInts->getMBBStartIdx(&*MFI) << ", not live before " << PEnd + << '\n'; continue; } @@ -3765,10 +3773,10 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR, if (!IsPHI && PVNI != VNI) { report("Different value live out of predecessor", Pred); report_context(LR, Reg, LaneMask); - errs() << "Valno #" << PVNI->id << " live out of " - << printMBBReference(*Pred) << '@' << PEnd << "\nValno #" - << VNI->id << " live into " << printMBBReference(*MFI) << '@' - << LiveInts->getMBBStartIdx(&*MFI) << '\n'; + OS << "Valno #" << PVNI->id << " live out of " + << printMBBReference(*Pred) << '@' << PEnd << "\nValno #" << VNI->id + << " live into " << printMBBReference(*MFI) << '@' + << LiveInts->getMBBStartIdx(&*MFI) << '\n'; } } if (&*MFI == EndMBB) @@ -3823,11 +3831,11 @@ void MachineVerifier::verifyLiveInterval(const LiveInterval &LI) { report("Multiple connected components in live interval", MF); report_context(LI); for (unsigned comp = 0; comp != NumComp; ++comp) { - errs() << comp << ": valnos"; + OS << comp << ": valnos"; for (const VNInfo *I : LI.valnos) if (comp == ConEQ.getEqClass(I)) - errs() << ' ' << I->id; - errs() << '\n'; + OS << ' ' << I->id; + OS << '\n'; } } } @@ -3889,9 +3897,9 @@ void MachineVerifier::verifyStackFrame() { report("Call frame size on entry does not match value computed from " "predecessor", MBB); - errs() << "Call frame size on entry " << MBB->getCallFrameSize() - << " does not match value computed from predecessor " - << -BBState.EntryValue << '\n'; + OS << "Call frame size on entry " << MBB->getCallFrameSize() + << " does not match value computed from predecessor " + << -BBState.EntryValue << '\n'; } // Update stack state by checking contents of MBB. @@ -3914,8 +3922,8 @@ void MachineVerifier::verifyStackFrame() { BBState.ExitValue; if (BBState.ExitIsSetup && AbsSPAdj != Size) { report("FrameDestroy is after FrameSetup ", &I); - errs() << "FrameDestroy <" << Size << "> is after FrameSetup <" - << AbsSPAdj << ">.\n"; + OS << "FrameDestroy <" << Size << "> is after FrameSetup <" + << AbsSPAdj << ">.\n"; } if (!MRI->isSSA() && !MF->getFrameInfo().adjustsStack()) report("AdjustsStack not set in presence of a frame pseudo " @@ -3933,11 +3941,11 @@ void MachineVerifier::verifyStackFrame() { (SPState[Pred->getNumber()].ExitValue != BBState.EntryValue || SPState[Pred->getNumber()].ExitIsSetup != BBState.EntryIsSetup)) { report("The exit stack state of a predecessor is inconsistent.", MBB); - errs() << "Predecessor " << printMBBReference(*Pred) - << " has exit state (" << SPState[Pred->getNumber()].ExitValue - << ", " << SPState[Pred->getNumber()].ExitIsSetup << "), while " - << printMBBReference(*MBB) << " has entry state (" - << BBState.EntryValue << ", " << BBState.EntryIsSetup << ").\n"; + OS << "Predecessor " << printMBBReference(*Pred) << " has exit state (" + << SPState[Pred->getNumber()].ExitValue << ", " + << SPState[Pred->getNumber()].ExitIsSetup << "), while " + << printMBBReference(*MBB) << " has entry state (" + << BBState.EntryValue << ", " << BBState.EntryIsSetup << ").\n"; } } @@ -3948,11 +3956,11 @@ void MachineVerifier::verifyStackFrame() { (SPState[Succ->getNumber()].EntryValue != BBState.ExitValue || SPState[Succ->getNumber()].EntryIsSetup != BBState.ExitIsSetup)) { report("The entry stack state of a successor is inconsistent.", MBB); - errs() << "Successor " << printMBBReference(*Succ) - << " has entry state (" << SPState[Succ->getNumber()].EntryValue - << ", " << SPState[Succ->getNumber()].EntryIsSetup << "), while " - << printMBBReference(*MBB) << " has exit state (" - << BBState.ExitValue << ", " << BBState.ExitIsSetup << ").\n"; + OS << "Successor " << printMBBReference(*Succ) << " has entry state (" + << SPState[Succ->getNumber()].EntryValue << ", " + << SPState[Succ->getNumber()].EntryIsSetup << "), while " + << printMBBReference(*MBB) << " has exit state (" + << BBState.ExitValue << ", " << BBState.ExitIsSetup << ").\n"; } } diff --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp index 5001b4fec58f2..1ad70c86d68e3 100644 --- a/llvm/lib/CodeGen/RegAllocGreedy.cpp +++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp @@ -1054,7 +1054,7 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit, } if (VerifyEnabled) - MF->verify(this, "After splitting live range around region"); + MF->verify(this, "After splitting live range around region", &errs()); } MCRegister RAGreedy::tryRegionSplit(const LiveInterval &VirtReg, @@ -1323,7 +1323,7 @@ unsigned RAGreedy::tryBlockSplit(const LiveInterval &VirtReg, } if (VerifyEnabled) - MF->verify(this, "After splitting live range around basic blocks"); + MF->verify(this, "After splitting live range around basic blocks", &errs()); return 0; } @@ -2507,7 +2507,7 @@ MCRegister RAGreedy::selectOrSplitImpl(const LiveInterval &VirtReg, DebugVars->splitRegister(VirtReg.reg(), LRE.regs(), *LIS); if (VerifyEnabled) - MF->verify(this, "After spilling"); + MF->verify(this, "After spilling", &errs()); } // The live virtual register requesting allocation was spilled, so tell @@ -2711,7 +2711,7 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { TII = MF->getSubtarget().getInstrInfo(); if (VerifyEnabled) - MF->verify(this, "Before greedy register allocator"); + MF->verify(this, "Before greedy register allocator", &errs()); RegAllocBase::init(getAnalysis(), getAnalysis().getLIS(), @@ -2770,7 +2770,7 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { tryHintsRecoloring(); if (VerifyEnabled) - MF->verify(this, "Before post optimization"); + MF->verify(this, "Before post optimization", &errs()); postOptimization(); reportStats(); diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp index 99125200c1a4f..2e1f498c090d1 100644 --- a/llvm/lib/CodeGen/RegisterCoalescer.cpp +++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp @@ -4239,7 +4239,7 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) { JoinSplitEdges = EnableJoinSplits; if (VerifyCoalescing) - MF->verify(this, "Before register coalescing"); + MF->verify(this, "Before register coalescing", &errs()); DbgVRegToValues.clear(); buildVRegToDbgValueMap(fn); @@ -4299,7 +4299,7 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) { LLVM_DEBUG(dump()); if (VerifyCoalescing) - MF->verify(this, "After register coalescing"); + MF->verify(this, "After register coalescing", &errs()); return true; } diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 3c087727a8012..04eb891f719d2 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -2205,7 +2205,8 @@ void SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node, Results.push_back(Tmp.first); Results.push_back(Tmp.second); } else { - SDValue Tmp = ExpandLibCall(LC, Node, false).first; + bool IsSignedArgument = Node->getOpcode() == ISD::FLDEXP; + SDValue Tmp = ExpandLibCall(LC, Node, IsSignedArgument).first; Results.push_back(Tmp); } } diff --git a/llvm/lib/DWARFLinker/Classic/DWARFStreamer.cpp b/llvm/lib/DWARFLinker/Classic/DWARFStreamer.cpp index bca3125368353..947db9cbcd92d 100644 --- a/llvm/lib/DWARFLinker/Classic/DWARFStreamer.cpp +++ b/llvm/lib/DWARFLinker/Classic/DWARFStreamer.cpp @@ -933,7 +933,7 @@ void DwarfStreamer::emitLineTablePrologueV5IncludeAndFileTable( LineSectionSize += MS->emitULEB128IntValue(StrForm); LineSectionSize += MS->emitULEB128IntValue(dwarf::DW_LNCT_directory_index); - LineSectionSize += MS->emitULEB128IntValue(dwarf::DW_FORM_data1); + LineSectionSize += MS->emitULEB128IntValue(dwarf::DW_FORM_udata); if (HasChecksums) { LineSectionSize += MS->emitULEB128IntValue(dwarf::DW_LNCT_MD5); @@ -952,8 +952,7 @@ void DwarfStreamer::emitLineTablePrologueV5IncludeAndFileTable( // file_names (sequence of file name entries). for (auto File : P.FileNames) { emitLineTableString(P, File.Name, DebugStrPool, DebugLineStrPool); - MS->emitInt8(File.DirIdx); - LineSectionSize += 1; + LineSectionSize += MS->emitULEB128IntValue(File.DirIdx); if (HasChecksums) { MS->emitBinaryData( StringRef(reinterpret_cast(File.Checksum.data()), diff --git a/llvm/lib/DWARFLinker/Parallel/DebugLineSectionEmitter.h b/llvm/lib/DWARFLinker/Parallel/DebugLineSectionEmitter.h index 38357c7f97314..b035c4b1d6c30 100644 --- a/llvm/lib/DWARFLinker/Parallel/DebugLineSectionEmitter.h +++ b/llvm/lib/DWARFLinker/Parallel/DebugLineSectionEmitter.h @@ -215,7 +215,7 @@ class DebugLineSectionEmitter { encodeULEB128(FileNameForm, Section.OS); encodeULEB128(dwarf::DW_LNCT_directory_index, Section.OS); - encodeULEB128(dwarf::DW_FORM_data1, Section.OS); + encodeULEB128(dwarf::DW_FORM_udata, Section.OS); if (HasChecksums) { encodeULEB128(dwarf::DW_LNCT_MD5, Section.OS); @@ -242,7 +242,7 @@ class DebugLineSectionEmitter { // A null-terminated string containing the full or relative path name of a // source file. Section.emitString(FileNameForm, *FileNameStr); - Section.emitIntVal(File.DirIdx, 1); + encodeULEB128(File.DirIdx, Section.OS); if (HasChecksums) { assert((File.Checksum.size() == 16) && diff --git a/llvm/lib/FuzzMutate/IRMutator.cpp b/llvm/lib/FuzzMutate/IRMutator.cpp index 72e0de5937607..e1fe6c8d89ab0 100644 --- a/llvm/lib/FuzzMutate/IRMutator.cpp +++ b/llvm/lib/FuzzMutate/IRMutator.cpp @@ -623,9 +623,11 @@ void ShuffleBlockStrategy::mutate(BasicBlock &BB, RandomIRBuilder &IB) { auto getAliveChildren = [&AliveInstsLookup](Instruction *I) { SmallSetVector Children; for (Value *U : I->users()) { - Instruction *P = dyn_cast(U); - if (P && AliveInstsLookup.count(P)) - Children.insert(AliveInstsLookup[P]); + if (Instruction *P = dyn_cast(U)) { + auto It = AliveInstsLookup.find(P); + if (It != AliveInstsLookup.end()) + Children.insert(It->second); + } } return Children; }; diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index 3390d651d6c69..02d1d9d9f7898 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -1272,9 +1272,6 @@ static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn, // nvvm.bitcast.{f2i,i2f,ll2d,d2ll} Expand = Name == "f2i" || Name == "i2f" || Name == "ll2d" || Name == "d2ll"; - else if (Name.consume_front("rotate.")) - // nvvm.rotate.{b32,b64,right.b64} - Expand = Name == "b32" || Name == "b64" || Name == "right.b64"; else Expand = false; @@ -2261,108 +2258,6 @@ void llvm::UpgradeInlineAsmString(std::string *AsmStr) { } } -static Value *upgradeNVVMIntrinsicCall(StringRef Name, CallBase *CI, - Function *F, IRBuilder<> &Builder) { - Value *Rep = nullptr; - - if (Name == "abs.i" || Name == "abs.ll") { - Value *Arg = CI->getArgOperand(0); - Value *Neg = Builder.CreateNeg(Arg, "neg"); - Value *Cmp = Builder.CreateICmpSGE( - Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond"); - Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs"); - } else if (Name.starts_with("atomic.load.add.f32.p") || - Name.starts_with("atomic.load.add.f64.p")) { - Value *Ptr = CI->getArgOperand(0); - Value *Val = CI->getArgOperand(1); - Rep = Builder.CreateAtomicRMW(AtomicRMWInst::FAdd, Ptr, Val, MaybeAlign(), - AtomicOrdering::SequentiallyConsistent); - } else if (Name.consume_front("max.") && - (Name == "s" || Name == "i" || Name == "ll" || Name == "us" || - Name == "ui" || Name == "ull")) { - Value *Arg0 = CI->getArgOperand(0); - Value *Arg1 = CI->getArgOperand(1); - Value *Cmp = Name.starts_with("u") - ? Builder.CreateICmpUGE(Arg0, Arg1, "max.cond") - : Builder.CreateICmpSGE(Arg0, Arg1, "max.cond"); - Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "max"); - } else if (Name.consume_front("min.") && - (Name == "s" || Name == "i" || Name == "ll" || Name == "us" || - Name == "ui" || Name == "ull")) { - Value *Arg0 = CI->getArgOperand(0); - Value *Arg1 = CI->getArgOperand(1); - Value *Cmp = Name.starts_with("u") - ? Builder.CreateICmpULE(Arg0, Arg1, "min.cond") - : Builder.CreateICmpSLE(Arg0, Arg1, "min.cond"); - Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "min"); - } else if (Name == "clz.ll") { - // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 returns an i64. - Value *Arg = CI->getArgOperand(0); - Value *Ctlz = Builder.CreateCall( - Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz, - {Arg->getType()}), - {Arg, Builder.getFalse()}, "ctlz"); - Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc"); - } else if (Name == "popc.ll") { - // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 returns an - // i64. - Value *Arg = CI->getArgOperand(0); - Value *Popc = Builder.CreateCall( - Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop, - {Arg->getType()}), - Arg, "ctpop"); - Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc"); - } else if (Name == "h2f") { - Rep = Builder.CreateCall( - Intrinsic::getDeclaration(F->getParent(), Intrinsic::convert_from_fp16, - {Builder.getFloatTy()}), - CI->getArgOperand(0), "h2f"); - } else if (Name.consume_front("bitcast.") && - (Name == "f2i" || Name == "i2f" || Name == "ll2d" || - Name == "d2ll")) { - Rep = Builder.CreateBitCast(CI->getArgOperand(0), CI->getType()); - } else if (Name == "rotate.b32") { - Value *Arg = CI->getOperand(0); - Value *ShiftAmt = CI->getOperand(1); - Rep = Builder.CreateIntrinsic(Builder.getInt32Ty(), Intrinsic::fshl, - {Arg, Arg, ShiftAmt}); - } else if (Name == "rotate.b64") { - Type *Int64Ty = Builder.getInt64Ty(); - Value *Arg = CI->getOperand(0); - Value *ZExtShiftAmt = Builder.CreateZExt(CI->getOperand(1), Int64Ty); - Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl, - {Arg, Arg, ZExtShiftAmt}); - } else if (Name == "rotate.right.b64") { - Type *Int64Ty = Builder.getInt64Ty(); - Value *Arg = CI->getOperand(0); - Value *ZExtShiftAmt = Builder.CreateZExt(CI->getOperand(1), Int64Ty); - Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshr, - {Arg, Arg, ZExtShiftAmt}); - } else { - Intrinsic::ID IID = shouldUpgradeNVPTXBF16Intrinsic(Name); - if (IID != Intrinsic::not_intrinsic && - !F->getReturnType()->getScalarType()->isBFloatTy()) { - rename(F); - Function *NewFn = Intrinsic::getDeclaration(F->getParent(), IID); - SmallVector Args; - for (size_t I = 0; I < NewFn->arg_size(); ++I) { - Value *Arg = CI->getArgOperand(I); - Type *OldType = Arg->getType(); - Type *NewType = NewFn->getArg(I)->getType(); - Args.push_back( - (OldType->isIntegerTy() && NewType->getScalarType()->isBFloatTy()) - ? Builder.CreateBitCast(Arg, NewType) - : Arg); - } - Rep = Builder.CreateCall(NewFn, Args); - if (F->getReturnType()->isIntegerTy()) - Rep = Builder.CreateBitCast(Rep, F->getReturnType()); - } - } - - return Rep; -} - static Value *upgradeX86IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder) { LLVMContext &C = F->getContext(); @@ -4313,8 +4208,85 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) { if (!IsX86 && Name == "stackprotectorcheck") { Rep = nullptr; + } else if (IsNVVM && (Name == "abs.i" || Name == "abs.ll")) { + Value *Arg = CI->getArgOperand(0); + Value *Neg = Builder.CreateNeg(Arg, "neg"); + Value *Cmp = Builder.CreateICmpSGE( + Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond"); + Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs"); + } else if (IsNVVM && (Name.starts_with("atomic.load.add.f32.p") || + Name.starts_with("atomic.load.add.f64.p"))) { + Value *Ptr = CI->getArgOperand(0); + Value *Val = CI->getArgOperand(1); + Rep = Builder.CreateAtomicRMW(AtomicRMWInst::FAdd, Ptr, Val, MaybeAlign(), + AtomicOrdering::SequentiallyConsistent); + } else if (IsNVVM && Name.consume_front("max.") && + (Name == "s" || Name == "i" || Name == "ll" || Name == "us" || + Name == "ui" || Name == "ull")) { + Value *Arg0 = CI->getArgOperand(0); + Value *Arg1 = CI->getArgOperand(1); + Value *Cmp = Name.starts_with("u") + ? Builder.CreateICmpUGE(Arg0, Arg1, "max.cond") + : Builder.CreateICmpSGE(Arg0, Arg1, "max.cond"); + Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "max"); + } else if (IsNVVM && Name.consume_front("min.") && + (Name == "s" || Name == "i" || Name == "ll" || Name == "us" || + Name == "ui" || Name == "ull")) { + Value *Arg0 = CI->getArgOperand(0); + Value *Arg1 = CI->getArgOperand(1); + Value *Cmp = Name.starts_with("u") + ? Builder.CreateICmpULE(Arg0, Arg1, "min.cond") + : Builder.CreateICmpSLE(Arg0, Arg1, "min.cond"); + Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "min"); + } else if (IsNVVM && Name == "clz.ll") { + // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 returns an i64. + Value *Arg = CI->getArgOperand(0); + Value *Ctlz = Builder.CreateCall( + Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz, + {Arg->getType()}), + {Arg, Builder.getFalse()}, "ctlz"); + Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc"); + } else if (IsNVVM && Name == "popc.ll") { + // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 returns an + // i64. + Value *Arg = CI->getArgOperand(0); + Value *Popc = Builder.CreateCall( + Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop, + {Arg->getType()}), + Arg, "ctpop"); + Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc"); } else if (IsNVVM) { - Rep = upgradeNVVMIntrinsicCall(Name, CI, F, Builder); + if (Name == "h2f") { + Rep = + Builder.CreateCall(Intrinsic::getDeclaration( + F->getParent(), Intrinsic::convert_from_fp16, + {Builder.getFloatTy()}), + CI->getArgOperand(0), "h2f"); + } else if (Name.consume_front("bitcast.") && + (Name == "f2i" || Name == "i2f" || Name == "ll2d" || + Name == "d2ll")) { + Rep = Builder.CreateBitCast(CI->getArgOperand(0), CI->getType()); + } else { + Intrinsic::ID IID = shouldUpgradeNVPTXBF16Intrinsic(Name); + if (IID != Intrinsic::not_intrinsic && + !F->getReturnType()->getScalarType()->isBFloatTy()) { + rename(F); + NewFn = Intrinsic::getDeclaration(F->getParent(), IID); + SmallVector Args; + for (size_t I = 0; I < NewFn->arg_size(); ++I) { + Value *Arg = CI->getArgOperand(I); + Type *OldType = Arg->getType(); + Type *NewType = NewFn->getArg(I)->getType(); + Args.push_back((OldType->isIntegerTy() && + NewType->getScalarType()->isBFloatTy()) + ? Builder.CreateBitCast(Arg, NewType) + : Arg); + } + Rep = Builder.CreateCall(NewFn, Args); + if (F->getReturnType()->isIntegerTy()) + Rep = Builder.CreateBitCast(Rep, F->getReturnType()); + } + } } else if (IsX86) { Rep = upgradeX86IntrinsicCall(Name, CI, F, Builder); } else if (IsARM) { diff --git a/llvm/lib/IR/CMakeLists.txt b/llvm/lib/IR/CMakeLists.txt index 91e0e0cc65f36..e5756940dd5a0 100644 --- a/llvm/lib/IR/CMakeLists.txt +++ b/llvm/lib/IR/CMakeLists.txt @@ -8,6 +8,7 @@ add_llvm_component_library(LLVMCore BuiltinGCs.cpp Comdat.cpp ConstantFold.cpp + ConstantFPRange.cpp ConstantRange.cpp ConstantRangeList.cpp Constants.cpp diff --git a/llvm/lib/IR/ConstantFPRange.cpp b/llvm/lib/IR/ConstantFPRange.cpp new file mode 100644 index 0000000000000..957701891c8f3 --- /dev/null +++ b/llvm/lib/IR/ConstantFPRange.cpp @@ -0,0 +1,241 @@ +//===- ConstantFPRange.cpp - ConstantFPRange implementation ---------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/IR/ConstantFPRange.h" +#include "llvm/ADT/APFloat.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include + +using namespace llvm; + +void ConstantFPRange::makeEmpty() { + auto &Sem = Lower.getSemantics(); + Lower = APFloat::getInf(Sem, /*Negative=*/false); + Upper = APFloat::getInf(Sem, /*Negative=*/true); + MayBeQNaN = false; + MayBeSNaN = false; +} + +void ConstantFPRange::makeFull() { + auto &Sem = Lower.getSemantics(); + Lower = APFloat::getInf(Sem, /*Negative=*/true); + Upper = APFloat::getInf(Sem, /*Negative=*/false); + MayBeQNaN = true; + MayBeSNaN = true; +} + +bool ConstantFPRange::isNaNOnly() const { + return Lower.isPosInfinity() && Upper.isNegInfinity(); +} + +ConstantFPRange::ConstantFPRange(const fltSemantics &Sem, bool IsFullSet) + : Lower(Sem, APFloat::uninitialized), Upper(Sem, APFloat::uninitialized) { + Lower = APFloat::getInf(Sem, /*Negative=*/IsFullSet); + Upper = APFloat::getInf(Sem, /*Negative=*/!IsFullSet); + MayBeQNaN = IsFullSet; + MayBeSNaN = IsFullSet; +} + +ConstantFPRange::ConstantFPRange(const APFloat &Value) + : Lower(Value.getSemantics(), APFloat::uninitialized), + Upper(Value.getSemantics(), APFloat::uninitialized) { + if (Value.isNaN()) { + makeEmpty(); + bool IsSNaN = Value.isSignaling(); + MayBeQNaN = !IsSNaN; + MayBeSNaN = IsSNaN; + } else { + Lower = Upper = Value; + MayBeQNaN = MayBeSNaN = false; + } +} + +// We treat that -0 is less than 0 here. +static APFloat::cmpResult strictCompare(const APFloat &LHS, + const APFloat &RHS) { + assert(!LHS.isNaN() && !RHS.isNaN() && "Unordered compare"); + if (LHS.isZero() && RHS.isZero()) { + if (LHS.isNegative() == RHS.isNegative()) + return APFloat::cmpEqual; + return LHS.isNegative() ? APFloat::cmpLessThan : APFloat::cmpGreaterThan; + } + return LHS.compare(RHS); +} + +static bool isNonCanonicalEmptySet(const APFloat &Lower, const APFloat &Upper) { + return strictCompare(Lower, Upper) == APFloat::cmpGreaterThan && + !(Lower.isInfinity() && Upper.isInfinity()); +} + +static void canonicalizeRange(APFloat &Lower, APFloat &Upper) { + if (isNonCanonicalEmptySet(Lower, Upper)) { + Lower = APFloat::getInf(Lower.getSemantics(), /*Negative=*/false); + Upper = APFloat::getInf(Upper.getSemantics(), /*Negative=*/true); + } +} + +ConstantFPRange::ConstantFPRange(APFloat LowerVal, APFloat UpperVal, + bool MayBeQNaN, bool MayBeSNaN) + : Lower(std::move(LowerVal)), Upper(std::move(UpperVal)) { + assert(&Lower.getSemantics() == &Upper.getSemantics() && + "Should only use the same semantics"); + assert(!isNonCanonicalEmptySet(Lower, Upper) && "Non-canonical form"); + this->MayBeQNaN = MayBeQNaN; + this->MayBeSNaN = MayBeSNaN; +} + +ConstantFPRange ConstantFPRange::getFinite(const fltSemantics &Sem) { + return ConstantFPRange(APFloat::getLargest(Sem, /*Negative=*/true), + APFloat::getLargest(Sem, /*Negative=*/false), + /*MayBeQNaN=*/false, /*MayBeSNaN=*/false); +} + +ConstantFPRange ConstantFPRange::getNaNOnly(const fltSemantics &Sem, + bool MayBeQNaN, bool MayBeSNaN) { + return ConstantFPRange(APFloat::getInf(Sem, /*Negative=*/false), + APFloat::getInf(Sem, /*Negative=*/true), MayBeQNaN, + MayBeSNaN); +} + +ConstantFPRange +ConstantFPRange::makeAllowedFCmpRegion(FCmpInst::Predicate Pred, + const ConstantFPRange &Other) { + // TODO + return getFull(Other.getSemantics()); +} + +ConstantFPRange +ConstantFPRange::makeSatisfyingFCmpRegion(FCmpInst::Predicate Pred, + const ConstantFPRange &Other) { + // TODO + return getEmpty(Other.getSemantics()); +} + +ConstantFPRange ConstantFPRange::makeExactFCmpRegion(FCmpInst::Predicate Pred, + const APFloat &Other) { + return makeAllowedFCmpRegion(Pred, ConstantFPRange(Other)); +} + +bool ConstantFPRange::fcmp(FCmpInst::Predicate Pred, + const ConstantFPRange &Other) const { + return makeSatisfyingFCmpRegion(Pred, Other).contains(*this); +} + +bool ConstantFPRange::isFullSet() const { + return Lower.isNegInfinity() && Upper.isPosInfinity() && MayBeQNaN && + MayBeSNaN; +} + +bool ConstantFPRange::isEmptySet() const { + return Lower.isPosInfinity() && Upper.isNegInfinity() && !MayBeQNaN && + !MayBeSNaN; +} + +bool ConstantFPRange::contains(const APFloat &Val) const { + assert(&getSemantics() == &Val.getSemantics() && + "Should only use the same semantics"); + + if (Val.isNaN()) + return Val.isSignaling() ? MayBeSNaN : MayBeQNaN; + return strictCompare(Lower, Val) != APFloat::cmpGreaterThan && + strictCompare(Val, Upper) != APFloat::cmpGreaterThan; +} + +bool ConstantFPRange::contains(const ConstantFPRange &CR) const { + assert(&getSemantics() == &CR.getSemantics() && + "Should only use the same semantics"); + + if (CR.MayBeQNaN && !MayBeQNaN) + return false; + + if (CR.MayBeSNaN && !MayBeSNaN) + return false; + + return strictCompare(Lower, CR.Lower) != APFloat::cmpGreaterThan && + strictCompare(CR.Upper, Upper) != APFloat::cmpGreaterThan; +} + +const APFloat *ConstantFPRange::getSingleElement() const { + if (MayBeSNaN || MayBeQNaN) + return nullptr; + return Lower.bitwiseIsEqual(Upper) ? &Lower : nullptr; +} + +std::optional ConstantFPRange::getSignBit() const { + if (!MayBeSNaN && !MayBeQNaN && Lower.isNegative() == Upper.isNegative()) + return Lower.isNegative(); + return std::nullopt; +} + +bool ConstantFPRange::operator==(const ConstantFPRange &CR) const { + if (MayBeSNaN != CR.MayBeSNaN || MayBeQNaN != CR.MayBeQNaN) + return false; + return Lower.bitwiseIsEqual(CR.Lower) && Upper.bitwiseIsEqual(CR.Upper); +} + +FPClassTest ConstantFPRange::classify() const { + uint32_t Mask = fcNone; + if (MayBeSNaN) + Mask |= fcSNan; + if (MayBeQNaN) + Mask |= fcQNan; + if (!isNaNOnly()) { + FPClassTest LowerMask = Lower.classify(); + FPClassTest UpperMask = Upper.classify(); + assert(LowerMask <= UpperMask && "Range is nan-only."); + for (uint32_t I = LowerMask; I <= UpperMask; I <<= 1) + Mask |= I; + } + return static_cast(Mask); +} + +void ConstantFPRange::print(raw_ostream &OS) const { + if (isFullSet()) + OS << "full-set"; + else if (isEmptySet()) + OS << "empty-set"; + else { + bool NaNOnly = isNaNOnly(); + if (!NaNOnly) + OS << '[' << Lower << ", " << Upper << ']'; + + if (MayBeSNaN || MayBeQNaN) { + if (!NaNOnly) + OS << " with "; + if (MayBeSNaN && MayBeQNaN) + OS << "NaN"; + else if (MayBeSNaN) + OS << "SNaN"; + else if (MayBeQNaN) + OS << "QNaN"; + } + } +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD void ConstantFPRange::dump() const { print(dbgs()); } +#endif + +ConstantFPRange +ConstantFPRange::intersectWith(const ConstantFPRange &CR) const { + assert(&getSemantics() == &CR.getSemantics() && + "Should only use the same semantics"); + APFloat NewLower = maxnum(Lower, CR.Lower); + APFloat NewUpper = minnum(Upper, CR.Upper); + canonicalizeRange(NewLower, NewUpper); + return ConstantFPRange(std::move(NewLower), std::move(NewUpper), + MayBeQNaN & CR.MayBeQNaN, MayBeSNaN & CR.MayBeSNaN); +} + +ConstantFPRange ConstantFPRange::unionWith(const ConstantFPRange &CR) const { + assert(&getSemantics() == &CR.getSemantics() && + "Should only use the same semantics"); + return ConstantFPRange(minnum(Lower, CR.Lower), maxnum(Upper, CR.Upper), + MayBeQNaN | CR.MayBeQNaN, MayBeSNaN | CR.MayBeSNaN); +} diff --git a/llvm/lib/IR/LLVMContext.cpp b/llvm/lib/IR/LLVMContext.cpp index 22e60772def43..e078527b597b4 100644 --- a/llvm/lib/IR/LLVMContext.cpp +++ b/llvm/lib/IR/LLVMContext.cpp @@ -330,6 +330,10 @@ void LLVMContext::getSyncScopeNames(SmallVectorImpl &SSNs) const { pImpl->getSyncScopeNames(SSNs); } +std::optional LLVMContext::getSyncScopeName(SyncScope::ID Id) const { + return pImpl->getSyncScopeName(Id); +} + void LLVMContext::setGC(const Function &Fn, std::string GCName) { pImpl->GCNames[&Fn] = std::move(GCName); } diff --git a/llvm/lib/IR/LLVMContextImpl.cpp b/llvm/lib/IR/LLVMContextImpl.cpp index 4f1ef8cec3213..f2c965a45df3a 100644 --- a/llvm/lib/IR/LLVMContextImpl.cpp +++ b/llvm/lib/IR/LLVMContextImpl.cpp @@ -244,6 +244,16 @@ void LLVMContextImpl::getSyncScopeNames( SSNs[SSE.second] = SSE.first(); } +std::optional +LLVMContextImpl::getSyncScopeName(SyncScope::ID Id) const { + for (const auto &SSE : SSC) { + if (SSE.second != Id) + continue; + return SSE.first(); + } + return std::nullopt; +} + /// Gets the OptPassGate for this LLVMContextImpl, which defaults to the /// singleton OptBisect if not explicitly set. OptPassGate &LLVMContextImpl::getOptPassGate() const { diff --git a/llvm/lib/IR/LLVMContextImpl.h b/llvm/lib/IR/LLVMContextImpl.h index e76f004b590ef..971091f304061 100644 --- a/llvm/lib/IR/LLVMContextImpl.h +++ b/llvm/lib/IR/LLVMContextImpl.h @@ -1665,6 +1665,10 @@ class LLVMContextImpl { /// scope names are ordered by increasing synchronization scope IDs. void getSyncScopeNames(SmallVectorImpl &SSNs) const; + /// getSyncScopeName - Returns the name of a SyncScope::ID + /// registered with LLVMContext, if any. + std::optional getSyncScopeName(SyncScope::ID Id) const; + /// Maintain the GC name for each function. /// /// This saves allocating an additional word in Function for programs which diff --git a/llvm/lib/IR/ProfDataUtils.cpp b/llvm/lib/IR/ProfDataUtils.cpp index 992ce34e00034..4d2fbdde3f9f0 100644 --- a/llvm/lib/IR/ProfDataUtils.cpp +++ b/llvm/lib/IR/ProfDataUtils.cpp @@ -216,8 +216,7 @@ bool extractProfTotalWeight(const MDNode *ProfileData, uint64_t &TotalVal) { if (ProfDataName->getString() == "branch_weights") { unsigned Offset = getBranchWeightOffset(ProfileData); for (unsigned Idx = Offset; Idx < ProfileData->getNumOperands(); ++Idx) { - auto *V = mdconst::dyn_extract(ProfileData->getOperand(Idx)); - assert(V && "Malformed branch_weight in MD_prof node"); + auto *V = mdconst::extract(ProfileData->getOperand(Idx)); TotalVal += V->getValue().getZExtValue(); } return true; diff --git a/llvm/lib/MCA/HardwareUnits/ResourceManager.cpp b/llvm/lib/MCA/HardwareUnits/ResourceManager.cpp index 82030207eee61..e45bd00f1a292 100644 --- a/llvm/lib/MCA/HardwareUnits/ResourceManager.cpp +++ b/llvm/lib/MCA/HardwareUnits/ResourceManager.cpp @@ -322,12 +322,11 @@ uint64_t ResourceManager::checkAvailability(const InstrDesc &Desc) const { uint64_t ResourceMask = llvm::bit_floor(ReadyMask); - auto it = AvailableUnits.find(ResourceMask); - if (it == AvailableUnits.end()) { + auto [it, Inserted] = AvailableUnits.try_emplace(ResourceMask); + if (Inserted) { unsigned Index = getResourceStateIndex(ResourceMask); unsigned NumUnits = llvm::popcount(Resources[Index]->getReadyMask()); - it = - AvailableUnits.insert(std::make_pair(ResourceMask, NumUnits)).first; + it->second = NumUnits; } if (!it->second) { diff --git a/llvm/lib/SandboxIR/CMakeLists.txt b/llvm/lib/SandboxIR/CMakeLists.txt index 03474be0c7b80..b2e6f6285fea5 100644 --- a/llvm/lib/SandboxIR/CMakeLists.txt +++ b/llvm/lib/SandboxIR/CMakeLists.txt @@ -11,5 +11,6 @@ add_llvm_component_library(LLVMSandboxIR LINK_COMPONENTS Core Support + Analysis ) diff --git a/llvm/lib/Support/APFloat.cpp b/llvm/lib/Support/APFloat.cpp index 7f68c5ab9b7cf..dee917fd56104 100644 --- a/llvm/lib/Support/APFloat.cpp +++ b/llvm/lib/Support/APFloat.cpp @@ -5366,11 +5366,14 @@ APFloat APFloat::getAllOnesValue(const fltSemantics &Semantics) { void APFloat::print(raw_ostream &OS) const { SmallVector Buffer; toString(Buffer); - OS << Buffer << "\n"; + OS << Buffer; } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -LLVM_DUMP_METHOD void APFloat::dump() const { print(dbgs()); } +LLVM_DUMP_METHOD void APFloat::dump() const { + print(dbgs()); + dbgs() << '\n'; +} #endif void APFloat::Profile(FoldingSetNodeID &NID) const { diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index 69806c9c3fdbf..dfb6b08b1f73b 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -5640,6 +5640,34 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) { AArch64::SQDMULH_VG4_4Z4Z_S, AArch64::SQDMULH_VG4_4Z4Z_D})) SelectDestructiveMultiIntrinsic(Node, 4, true, Op); return; + case Intrinsic::aarch64_sme_fp8_scale_single_x2: + if (auto Op = SelectOpcodeFromVT( + Node->getValueType(0), + {0, AArch64::FSCALE_2ZZ_H, AArch64::FSCALE_2ZZ_S, + AArch64::FSCALE_2ZZ_D})) + SelectDestructiveMultiIntrinsic(Node, 2, false, Op); + return; + case Intrinsic::aarch64_sme_fp8_scale_single_x4: + if (auto Op = SelectOpcodeFromVT( + Node->getValueType(0), + {0, AArch64::FSCALE_4ZZ_H, AArch64::FSCALE_4ZZ_S, + AArch64::FSCALE_4ZZ_D})) + SelectDestructiveMultiIntrinsic(Node, 4, false, Op); + return; + case Intrinsic::aarch64_sme_fp8_scale_x2: + if (auto Op = SelectOpcodeFromVT( + Node->getValueType(0), + {0, AArch64::FSCALE_2Z2Z_H, AArch64::FSCALE_2Z2Z_S, + AArch64::FSCALE_2Z2Z_D})) + SelectDestructiveMultiIntrinsic(Node, 2, true, Op); + return; + case Intrinsic::aarch64_sme_fp8_scale_x4: + if (auto Op = SelectOpcodeFromVT( + Node->getValueType(0), + {0, AArch64::FSCALE_4Z4Z_H, AArch64::FSCALE_4Z4Z_S, + AArch64::FSCALE_4Z4Z_D})) + SelectDestructiveMultiIntrinsic(Node, 4, true, Op); + return; case Intrinsic::aarch64_sve_whilege_x2: if (auto Op = SelectOpcodeFromVT( Node->getValueType(0), diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index ac05a44abc2dd..7a07bb67e77de 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -3440,15 +3440,14 @@ InstructionCost AArch64TTIImpl::getAddressComputationCost(Type *Ty, return 1; } -InstructionCost AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, - Type *CondTy, - CmpInst::Predicate VecPred, - TTI::TargetCostKind CostKind, - const Instruction *I) { +InstructionCost AArch64TTIImpl::getCmpSelInstrCost( + unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, + TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info, + TTI::OperandValueInfo Op2Info, const Instruction *I) { // TODO: Handle other cost kinds. if (CostKind != TTI::TCK_RecipThroughput) return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, - I); + Op1Info, Op2Info, I); int ISD = TLI->InstructionOpcodeToISD(Opcode); // We don't lower some vector selects well that are wider than the register @@ -3527,7 +3526,8 @@ InstructionCost AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, // The base case handles scalable vectors fine for now, since it treats the // cost as 1 * legalization cost. - return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I); + return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, + Op1Info, Op2Info, I); } AArch64TTIImpl::TTI::MemCmpExpansionOptions diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h index 22bba21eedcc5..28e45207596ec 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -208,10 +208,12 @@ class AArch64TTIImpl : public BasicTTIImplBase { InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE, const SCEV *Ptr); - InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, - CmpInst::Predicate VecPred, - TTI::TargetCostKind CostKind, - const Instruction *I = nullptr); + InstructionCost getCmpSelInstrCost( + unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, + TTI::TargetCostKind CostKind, + TTI::OperandValueInfo Op1Info = {TTI::OK_AnyValue, TTI::OP_None}, + TTI::OperandValueInfo Op2Info = {TTI::OK_AnyValue, TTI::OP_None}, + const Instruction *I = nullptr); TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const; diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 2ebb3798b03ab..271c8d45fd4a2 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -289,6 +289,7 @@ static const LLT F64 = LLT::float64(); static const LLT S96 = LLT::scalar(96); static const LLT S128 = LLT::scalar(128); static const LLT S160 = LLT::scalar(160); +static const LLT S192 = LLT::scalar(192); static const LLT S224 = LLT::scalar(224); static const LLT S256 = LLT::scalar(256); static const LLT S512 = LLT::scalar(512); @@ -334,7 +335,7 @@ static const LLT V2S128 = LLT::fixed_vector(2, 128); static const LLT V4S128 = LLT::fixed_vector(4, 128); static std::initializer_list AllScalarTypes = { - S32, S64, S96, S128, S160, S224, S256, S512, S1024}; + S32, S64, S96, S128, S160, S192, S224, S256, S512, S1024}; static std::initializer_list AllS16Vectors{ V2S16, V4S16, V6S16, V8S16, V10S16, V12S16, V16S16, V2S128, V4S128}; diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index a9754ba357893..885ecab891b1f 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -6797,8 +6797,7 @@ SDValue SITargetLowering::promoteUniformOpToI32(SDValue Op, LHS = DAG.getNode(ExtOp, DL, ExtTy, {LHS}); // Special case: for shifts, the RHS always needs a zext. - if (Op.getOpcode() == ISD::SRA || Op.getOpcode() == ISD::SRL || - Op.getOpcode() == ISD::SRA) + if (Opc == ISD::SHL || Opc == ISD::SRL || Opc == ISD::SRA) RHS = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtTy, {RHS}); else RHS = DAG.getNode(ExtOp, DL, ExtTy, {RHS}); @@ -9365,6 +9364,7 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, Opc = AMDGPU::S_GET_BARRIER_STATE_IMM; SDValue K = DAG.getTargetConstant(BarID, DL, MVT::i32); Ops.push_back(K); + Ops.push_back(Chain); } else { Opc = AMDGPU::S_GET_BARRIER_STATE_M0; SDValue M0Val = copyToM0(DAG, Chain, DL, Op.getOperand(2)); @@ -9967,7 +9967,9 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op, 0); } Ops.push_back(copyToM0(DAG, Chain, DL, M0Val).getValue(0)); - } else if (!IsInlinableBarID) { + } else if (IsInlinableBarID) { + Ops.push_back(Chain); + } else { Ops.push_back(copyToM0(DAG, Chain, DL, BarOp).getValue(0)); } @@ -16142,11 +16144,8 @@ static bool atomicIgnoresDenormalModeOrFPModeIsFTZ(const AtomicRMWInst *RMW) { static OptimizationRemark emitAtomicRMWLegalRemark(const AtomicRMWInst *RMW) { LLVMContext &Ctx = RMW->getContext(); - SmallVector SSNs; - Ctx.getSyncScopeNames(SSNs); - StringRef MemScope = SSNs[RMW->getSyncScopeID()].empty() - ? "system" - : SSNs[RMW->getSyncScopeID()]; + StringRef SS = Ctx.getSyncScopeName(RMW->getSyncScopeID()).value_or(""); + StringRef MemScope = SS.empty() ? StringRef("system") : SS; return OptimizationRemark(DEBUG_TYPE, "Passed", RMW) << "Hardware instruction generated for atomic " diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp index a566827260138..80a7529002ac9 100644 --- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -1752,6 +1752,14 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI, const bool IsVGPR = TRI->isVectorRegister(*MRI, Op.getReg()); for (int RegNo = Interval.first; RegNo < Interval.second; ++RegNo) { if (IsVGPR) { + // Implicit VGPR defs and uses are never a part of the memory + // instructions description and usually present to account for + // super-register liveness. + // TODO: Most of the other instructions also have implicit uses + // for the liveness accounting only. + if (Op.isImplicit() && MI.mayLoadOrStore()) + continue; + // RAW always needs an s_waitcnt. WAW needs an s_waitcnt unless the // previous write and this write are the same type of VMEM // instruction, in which case they are (in some architectures) diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index a03928b618df0..f891aece26848 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -17653,6 +17653,11 @@ SDValue ARMTargetLowering::PerformIntrinsicCombine(SDNode *N, // No immediate versions of these to check for. break; + case Intrinsic::arm_neon_vbsl: { + SDLoc dl(N); + return DAG.getNode(ARMISD::VBSP, dl, N->getValueType(0), N->getOperand(1), + N->getOperand(2), N->getOperand(3)); + } case Intrinsic::arm_mve_vqdmlah: case Intrinsic::arm_mve_vqdmlash: case Intrinsic::arm_mve_vqrdmlah: @@ -19072,6 +19077,10 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, return SDValue(); break; } + case ARMISD::VBSP: + if (N->getOperand(1) == N->getOperand(2)) + return N->getOperand(1); + return SDValue(); case ISD::INTRINSIC_VOID: case ISD::INTRINSIC_W_CHAIN: switch (N->getConstantOperandVal(1)) { diff --git a/llvm/lib/Target/ARM/ARMInstrNEON.td b/llvm/lib/Target/ARM/ARMInstrNEON.td index fcabc9076e4d3..48dcbdb137123 100644 --- a/llvm/lib/Target/ARM/ARMInstrNEON.td +++ b/llvm/lib/Target/ARM/ARMInstrNEON.td @@ -5524,26 +5524,23 @@ def : Pat<(v16i8 (vnotq QPR:$src)), // with different register constraints; it just inserts copies. // That is why pseudo VBSP implemented. Is is expanded later into // VBIT/VBIF/VBSL taking into account register constraints to avoid copies. -def VBSPd - : PseudoNeonI<(outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), - IIC_VBINiD, "", - [(set DPR:$Vd, - (v2i32 (NEONvbsp DPR:$src1, DPR:$Vn, DPR:$Vm)))]>; +def VBSPd : PseudoNeonI<(outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), + IIC_VBINiD, "", []>; let Predicates = [HasNEON] in { -def : Pat<(v8i8 (int_arm_neon_vbsl (v8i8 DPR:$src1), - (v8i8 DPR:$Vn), (v8i8 DPR:$Vm))), +def : Pat<(v8i8 (NEONvbsp (v8i8 DPR:$src1), + (v8i8 DPR:$Vn), (v8i8 DPR:$Vm))), (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>; -def : Pat<(v4i16 (int_arm_neon_vbsl (v4i16 DPR:$src1), - (v4i16 DPR:$Vn), (v4i16 DPR:$Vm))), +def : Pat<(v4i16 (NEONvbsp (v4i16 DPR:$src1), + (v4i16 DPR:$Vn), (v4i16 DPR:$Vm))), (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>; -def : Pat<(v2i32 (int_arm_neon_vbsl (v2i32 DPR:$src1), - (v2i32 DPR:$Vn), (v2i32 DPR:$Vm))), +def : Pat<(v2i32 (NEONvbsp (v2i32 DPR:$src1), + (v2i32 DPR:$Vn), (v2i32 DPR:$Vm))), (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>; -def : Pat<(v2f32 (int_arm_neon_vbsl (v2f32 DPR:$src1), - (v2f32 DPR:$Vn), (v2f32 DPR:$Vm))), +def : Pat<(v2f32 (NEONvbsp (v2f32 DPR:$src1), + (v2f32 DPR:$Vn), (v2f32 DPR:$Vm))), (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>; -def : Pat<(v1i64 (int_arm_neon_vbsl (v1i64 DPR:$src1), - (v1i64 DPR:$Vn), (v1i64 DPR:$Vm))), +def : Pat<(v1i64 (NEONvbsp (v1i64 DPR:$src1), + (v1i64 DPR:$Vn), (v1i64 DPR:$Vm))), (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>; def : Pat<(v8i8 (or (and DPR:$Vn, DPR:$Vd), @@ -5560,26 +5557,23 @@ def : Pat<(v1i64 (or (and DPR:$Vn, DPR:$Vd), (VBSPd DPR:$Vd, DPR:$Vn, DPR:$Vm)>; } -def VBSPq - : PseudoNeonI<(outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), - IIC_VBINiQ, "", - [(set QPR:$Vd, - (v4i32 (NEONvbsp QPR:$src1, QPR:$Vn, QPR:$Vm)))]>; +def VBSPq : PseudoNeonI<(outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), + IIC_VBINiQ, "", []>; let Predicates = [HasNEON] in { -def : Pat<(v16i8 (int_arm_neon_vbsl (v16i8 QPR:$src1), - (v16i8 QPR:$Vn), (v16i8 QPR:$Vm))), +def : Pat<(v16i8 (NEONvbsp (v16i8 QPR:$src1), + (v16i8 QPR:$Vn), (v16i8 QPR:$Vm))), (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>; -def : Pat<(v8i16 (int_arm_neon_vbsl (v8i16 QPR:$src1), - (v8i16 QPR:$Vn), (v8i16 QPR:$Vm))), +def : Pat<(v8i16 (NEONvbsp (v8i16 QPR:$src1), + (v8i16 QPR:$Vn), (v8i16 QPR:$Vm))), (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>; -def : Pat<(v4i32 (int_arm_neon_vbsl (v4i32 QPR:$src1), - (v4i32 QPR:$Vn), (v4i32 QPR:$Vm))), +def : Pat<(v4i32 (NEONvbsp (v4i32 QPR:$src1), + (v4i32 QPR:$Vn), (v4i32 QPR:$Vm))), (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>; -def : Pat<(v4f32 (int_arm_neon_vbsl (v4f32 QPR:$src1), - (v4f32 QPR:$Vn), (v4f32 QPR:$Vm))), +def : Pat<(v4f32 (NEONvbsp (v4f32 QPR:$src1), + (v4f32 QPR:$Vn), (v4f32 QPR:$Vm))), (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>; -def : Pat<(v2i64 (int_arm_neon_vbsl (v2i64 QPR:$src1), - (v2i64 QPR:$Vn), (v2i64 QPR:$Vm))), +def : Pat<(v2i64 (NEONvbsp (v2i64 QPR:$src1), + (v2i64 QPR:$Vn), (v2i64 QPR:$Vm))), (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>; def : Pat<(v16i8 (or (and QPR:$Vn, QPR:$Vd), diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp index 9b5349241c341..865e2f3066ef0 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -934,11 +934,10 @@ InstructionCost ARMTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy, return BaseT::getVectorInstrCost(Opcode, ValTy, CostKind, Index, Op0, Op1); } -InstructionCost ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, - Type *CondTy, - CmpInst::Predicate VecPred, - TTI::TargetCostKind CostKind, - const Instruction *I) { +InstructionCost ARMTTIImpl::getCmpSelInstrCost( + unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, + TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info, + TTI::OperandValueInfo Op2Info, const Instruction *I) { int ISD = TLI->InstructionOpcodeToISD(Opcode); // Thumb scalar code size cost for select. @@ -1052,7 +1051,7 @@ InstructionCost ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, VecValTy->getNumElements() * getCmpSelInstrCost(Opcode, ValTy->getScalarType(), VecCondTy->getScalarType(), VecPred, - CostKind, I); + CostKind, Op1Info, Op2Info, I); } std::pair LT = getTypeLegalizationCost(ValTy); @@ -1077,8 +1076,8 @@ InstructionCost ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, if (ST->hasMVEIntegerOps() && ValTy->isVectorTy()) BaseCost = ST->getMVEVectorCostFactor(CostKind); - return BaseCost * - BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I); + return BaseCost * BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, + CostKind, Op1Info, Op2Info, I); } InstructionCost ARMTTIImpl::getAddressComputationCost(Type *Ty, diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h index 528f082dde32c..7be53c4bcaa29 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h @@ -239,10 +239,12 @@ class ARMTTIImpl : public BasicTTIImplBase { TTI::TargetCostKind CostKind, const Instruction *I = nullptr); - InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, - CmpInst::Predicate VecPred, - TTI::TargetCostKind CostKind, - const Instruction *I = nullptr); + InstructionCost getCmpSelInstrCost( + unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, + TTI::TargetCostKind CostKind, + TTI::OperandValueInfo Op1Info = {TTI::OK_AnyValue, TTI::OP_None}, + TTI::OperandValueInfo Op2Info = {TTI::OK_AnyValue, TTI::OP_None}, + const Instruction *I = nullptr); using BaseT::getVectorInstrCost; InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, diff --git a/llvm/lib/Target/BPF/BPFInstrInfo.td b/llvm/lib/Target/BPF/BPFInstrInfo.td index f7e17901c7ed5..62d6e25f83b59 100644 --- a/llvm/lib/Target/BPF/BPFInstrInfo.td +++ b/llvm/lib/Target/BPF/BPFInstrInfo.td @@ -826,13 +826,12 @@ let Predicates = [BPFNoALU32] in { } // Atomic Fetch-and- operations -class XFALU64 +class XFALU64 : TYPE_LD_ST { + []> { bits<4> dst; bits<20> addr; @@ -844,13 +843,12 @@ class XFALU64 +class XFALU32 : TYPE_LD_ST { + []> { bits<4> dst; bits<20> addr; @@ -864,26 +862,122 @@ class XFALU32; - def XFANDW32 : XFALU32; - def XFORW32 : XFALU32; - def XFXORW32 : XFALU32; + def XFADDW32 : XFALU32; + def XFANDW32 : XFALU32; + def XFORW32 : XFALU32; + def XFXORW32 : XFALU32; } let Predicates = [BPFHasALU32] in { - def XFADDD : XFALU64; + def XFADDD : XFALU64; } - def XFANDD : XFALU64; - def XFORD : XFALU64; - def XFXORD : XFALU64; + def XFANDD : XFALU64; + def XFORD : XFALU64; + def XFXORD : XFALU64; } -// atomic_load_sub can be represented as a neg followed -// by an atomic_load_add. -def : Pat<(atomic_load_sub_i32 ADDRri:$addr, GPR32:$val), - (XFADDW32 ADDRri:$addr, (NEG_32 GPR32:$val))>; -def : Pat<(atomic_load_sub_i64 ADDRri:$addr, GPR:$val), - (XFADDD ADDRri:$addr, (NEG_64 GPR:$val))>; +let Predicates = [BPFHasALU32] in { + foreach P = [// add + [atomic_load_add_i32_monotonic, XADDW32], + [atomic_load_add_i32_acquire, XFADDW32], + [atomic_load_add_i32_release, XFADDW32], + [atomic_load_add_i32_acq_rel, XFADDW32], + [atomic_load_add_i32_seq_cst, XFADDW32], + // and + [atomic_load_and_i32_monotonic, XANDW32], + [atomic_load_and_i32_acquire, XFANDW32], + [atomic_load_and_i32_release, XFANDW32], + [atomic_load_and_i32_acq_rel, XFANDW32], + [atomic_load_and_i32_seq_cst, XFANDW32], + // or + [atomic_load_or_i32_monotonic, XORW32], + [atomic_load_or_i32_acquire, XFORW32], + [atomic_load_or_i32_release, XFORW32], + [atomic_load_or_i32_acq_rel, XFORW32], + [atomic_load_or_i32_seq_cst, XFORW32], + // xor + [atomic_load_xor_i32_monotonic, XXORW32], + [atomic_load_xor_i32_acquire, XFXORW32], + [atomic_load_xor_i32_release, XFXORW32], + [atomic_load_xor_i32_acq_rel, XFXORW32], + [atomic_load_xor_i32_seq_cst, XFXORW32], + ] in { + def : Pat<(P[0] ADDRri:$addr, GPR32:$val), (P[1] ADDRri:$addr, GPR32:$val)>; + } + + // atomic_load_sub can be represented as a neg followed + // by an atomic_load_add. + foreach P = [[atomic_load_sub_i32_monotonic, XADDW32], + [atomic_load_sub_i32_acquire, XFADDW32], + [atomic_load_sub_i32_release, XFADDW32], + [atomic_load_sub_i32_acq_rel, XFADDW32], + [atomic_load_sub_i32_seq_cst, XFADDW32], + ] in { + def : Pat<(P[0] ADDRri:$addr, GPR32:$val), (P[1] ADDRri:$addr, (NEG_32 GPR32:$val))>; + } + + foreach P = [// add + [atomic_load_add_i64_monotonic, XADDD], + [atomic_load_add_i64_acquire, XFADDD], + [atomic_load_add_i64_release, XFADDD], + [atomic_load_add_i64_acq_rel, XFADDD], + [atomic_load_add_i64_seq_cst, XFADDD], + ] in { + def : Pat<(P[0] ADDRri:$addr, GPR:$val), (P[1] ADDRri:$addr, GPR:$val)>; + } +} + +foreach P = [[atomic_load_sub_i64_monotonic, XADDD], + [atomic_load_sub_i64_acquire, XFADDD], + [atomic_load_sub_i64_release, XFADDD], + [atomic_load_sub_i64_acq_rel, XFADDD], + [atomic_load_sub_i64_seq_cst, XFADDD], + ] in { + def : Pat<(P[0] ADDRri:$addr, GPR:$val), (P[1] ADDRri:$addr, (NEG_64 GPR:$val))>; +} + +// Borrow the idea from X86InstrFragments.td +class binop_no_use + : PatFrag<(ops node:$A, node:$B), + (operator node:$A, node:$B), + [{ return SDValue(N, 0).use_empty(); }]>; + +class binop_has_use + : PatFrag<(ops node:$A, node:$B), + (operator node:$A, node:$B), + [{ return !SDValue(N, 0).use_empty(); }]>; + +foreach op = [add, and, or, xor] in { +def atomic_load_ # op # _i64_monotonic_nu: + binop_no_use ("atomic_load_"#op# _i64_monotonic)>; +def atomic_load_ # op # _i64_monotonic_hu: + binop_has_use("atomic_load_"#op# _i64_monotonic)>; +} + +foreach P = [// and + [atomic_load_and_i64_monotonic_nu, XANDD], + [atomic_load_and_i64_monotonic_hu, XFANDD], + [atomic_load_and_i64_acquire, XFANDD], + [atomic_load_and_i64_release, XFANDD], + [atomic_load_and_i64_acq_rel, XFANDD], + [atomic_load_and_i64_seq_cst, XFANDD], + // or + [atomic_load_or_i64_monotonic_nu, XORD], + [atomic_load_or_i64_monotonic_hu, XFORD], + [atomic_load_or_i64_acquire, XFORD], + [atomic_load_or_i64_release, XFORD], + [atomic_load_or_i64_acq_rel, XFORD], + [atomic_load_or_i64_seq_cst, XFORD], + // xor + [atomic_load_xor_i64_monotonic_nu, XXORD], + [atomic_load_xor_i64_monotonic_hu, XFXORD], + [atomic_load_xor_i64_acquire, XFXORD], + [atomic_load_xor_i64_release, XFXORD], + [atomic_load_xor_i64_acq_rel, XFXORD], + [atomic_load_xor_i64_seq_cst, XFXORD], + ] in { + def : Pat<(P[0] ADDRri:$addr, GPR:$val), (P[1] ADDRri:$addr, GPR:$val)>; +} // Atomic Exchange class XCHG diff --git a/llvm/lib/Target/BPF/BPFMIChecking.cpp b/llvm/lib/Target/BPF/BPFMIChecking.cpp index 24224f6c1e9e6..09635dbba1760 100644 --- a/llvm/lib/Target/BPF/BPFMIChecking.cpp +++ b/llvm/lib/Target/BPF/BPFMIChecking.cpp @@ -118,7 +118,7 @@ static bool hasLiveDefs(const MachineInstr &MI, const TargetRegisterInfo *TRI) { RegIsGPR64 = GPR64RegClass->contains(MO.getReg()); if (!MO.isDead()) { - // It is a GPR64 live Def, we are sure it is live. */ + // It is a GPR64 live Def, we are sure it is live. if (RegIsGPR64) return true; // It is a GPR32 live Def, we are unsure whether it is really dead due to @@ -153,6 +153,10 @@ static bool hasLiveDefs(const MachineInstr &MI, const TargetRegisterInfo *TRI) { } void BPFMIPreEmitChecking::processAtomicInsts() { + if (MF->getSubtarget().getHasJmp32()) + return; + + // Only check for cpu version 1 and 2. for (MachineBasicBlock &MBB : *MF) { for (MachineInstr &MI : MBB) { if (MI.getOpcode() != BPF::XADDW && MI.getOpcode() != BPF::XADDD) diff --git a/llvm/lib/Target/BPF/BPFTargetTransformInfo.h b/llvm/lib/Target/BPF/BPFTargetTransformInfo.h index 9d0db33d9a1fd..bf0bef3a2b2f9 100644 --- a/llvm/lib/Target/BPF/BPFTargetTransformInfo.h +++ b/llvm/lib/Target/BPF/BPFTargetTransformInfo.h @@ -44,15 +44,17 @@ class BPFTTIImpl : public BasicTTIImplBase { return TTI::TCC_Basic; } - InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, - CmpInst::Predicate VecPred, - TTI::TargetCostKind CostKind, - const llvm::Instruction *I = nullptr) { + InstructionCost getCmpSelInstrCost( + unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, + TTI::TargetCostKind CostKind, + TTI::OperandValueInfo Op1Info = {TTI::OK_AnyValue, TTI::OP_None}, + TTI::OperandValueInfo Op2Info = {TTI::OK_AnyValue, TTI::OP_None}, + const llvm::Instruction *I = nullptr) { if (Opcode == Instruction::Select) return SCEVCheapExpansionBudget.getValue(); return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, - I); + Op1Info, Op2Info, I); } InstructionCost getArithmeticInstrCost( diff --git a/llvm/lib/Target/BPF/BTFDebug.cpp b/llvm/lib/Target/BPF/BTFDebug.cpp index 4d847abea731d..9d6dee13ca97a 100644 --- a/llvm/lib/Target/BPF/BTFDebug.cpp +++ b/llvm/lib/Target/BPF/BTFDebug.cpp @@ -35,6 +35,15 @@ static const char *BTFKindStr[] = { #include "llvm/DebugInfo/BTF/BTF.def" }; +static const DIType *tryRemoveAtomicType(const DIType *Ty) { + if (!Ty) + return Ty; + auto DerivedTy = dyn_cast(Ty); + if (DerivedTy && DerivedTy->getTag() == dwarf::DW_TAG_atomic_type) + return DerivedTy->getBaseType(); + return Ty; +} + /// Emit a BTF common type. void BTFTypeBase::emitType(MCStreamer &OS) { OS.AddComment(std::string(BTFKindStr[Kind]) + "(id = " + std::to_string(Id) + @@ -90,7 +99,7 @@ void BTFTypeDerived::completeType(BTFDebug &BDebug) { return; // The base type for PTR/CONST/VOLATILE could be void. - const DIType *ResolvedType = DTy->getBaseType(); + const DIType *ResolvedType = tryRemoveAtomicType(DTy->getBaseType()); if (!ResolvedType) { assert((Kind == BTF::BTF_KIND_PTR || Kind == BTF::BTF_KIND_CONST || Kind == BTF::BTF_KIND_VOLATILE) && @@ -305,7 +314,7 @@ void BTFTypeStruct::completeType(BTFDebug &BDebug) { } else { BTFMember.Offset = DDTy->getOffsetInBits(); } - const auto *BaseTy = DDTy->getBaseType(); + const auto *BaseTy = tryRemoveAtomicType(DDTy->getBaseType()); BTFMember.Type = BDebug.getTypeId(BaseTy); Members.push_back(BTFMember); } @@ -342,7 +351,7 @@ void BTFTypeFuncProto::completeType(BTFDebug &BDebug) { IsCompleted = true; DITypeRefArray Elements = STy->getTypeArray(); - auto RetType = Elements[0]; + auto RetType = tryRemoveAtomicType(Elements[0]); BTFType.Type = RetType ? BDebug.getTypeId(RetType) : 0; BTFType.NameOff = 0; @@ -350,7 +359,7 @@ void BTFTypeFuncProto::completeType(BTFDebug &BDebug) { // to represent the vararg, encode the NameOff/Type to be 0. for (unsigned I = 1, N = Elements.size(); I < N; ++I) { struct BTF::BTFParam Param; - auto Element = Elements[I]; + auto Element = tryRemoveAtomicType(Elements[I]); if (Element) { Param.NameOff = BDebug.addString(FuncArgNames[I]); Param.Type = BDebug.getTypeId(Element); @@ -483,7 +492,7 @@ void BTFTypeTypeTag::completeType(BTFDebug &BDebug) { IsCompleted = true; BTFType.NameOff = BDebug.addString(Tag); if (DTy) { - const DIType *ResolvedType = DTy->getBaseType(); + const DIType *ResolvedType = tryRemoveAtomicType(DTy->getBaseType()); if (!ResolvedType) BTFType.Type = 0; else @@ -800,6 +809,10 @@ void BTFDebug::visitDerivedType(const DIDerivedType *DTy, uint32_t &TypeId, bool CheckPointer, bool SeenPointer) { unsigned Tag = DTy->getTag(); + if (Tag == dwarf::DW_TAG_atomic_type) + return visitTypeEntry(DTy->getBaseType(), TypeId, CheckPointer, + SeenPointer); + /// Try to avoid chasing pointees, esp. structure pointees which may /// unnecessary bring in a lot of types. if (CheckPointer && !SeenPointer) { @@ -1444,8 +1457,10 @@ void BTFDebug::processGlobals(bool ProcessingMapDef) { DIGlobal = GVE->getVariable(); if (SecName.starts_with(".maps")) visitMapDefType(DIGlobal->getType(), GVTypeId); - else - visitTypeEntry(DIGlobal->getType(), GVTypeId, false, false); + else { + const DIType *Ty = tryRemoveAtomicType(DIGlobal->getType()); + visitTypeEntry(Ty, GVTypeId, false, false); + } break; } diff --git a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp index dd73b895b14d3..926cbe97f24fd 100644 --- a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp +++ b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp @@ -36,6 +36,7 @@ using namespace llvm; static bool isIntrinsicExpansion(Function &F) { switch (F.getIntrinsicID()) { case Intrinsic::abs: + case Intrinsic::atan2: case Intrinsic::exp: case Intrinsic::log: case Intrinsic::log10: @@ -307,6 +308,54 @@ static Value *expandNormalizeIntrinsic(CallInst *Orig) { return Builder.CreateFMul(X, MultiplicandVec); } +static Value *expandAtan2Intrinsic(CallInst *Orig) { + Value *Y = Orig->getOperand(0); + Value *X = Orig->getOperand(1); + Type *Ty = X->getType(); + IRBuilder<> Builder(Orig); + Builder.setFastMathFlags(Orig->getFastMathFlags()); + + Value *Tan = Builder.CreateFDiv(Y, X); + + CallInst *Atan = + Builder.CreateIntrinsic(Ty, Intrinsic::atan, {Tan}, nullptr, "Elt.Atan"); + Atan->setTailCall(Orig->isTailCall()); + Atan->setAttributes(Orig->getAttributes()); + + // Modify atan result based on https://en.wikipedia.org/wiki/Atan2. + Constant *Pi = ConstantFP::get(Ty, llvm::numbers::pi); + Constant *HalfPi = ConstantFP::get(Ty, llvm::numbers::pi / 2); + Constant *NegHalfPi = ConstantFP::get(Ty, -llvm::numbers::pi / 2); + Constant *Zero = ConstantFP::get(Ty, 0); + Value *AtanAddPi = Builder.CreateFAdd(Atan, Pi); + Value *AtanSubPi = Builder.CreateFSub(Atan, Pi); + + // x > 0 -> atan. + Value *Result = Atan; + Value *XLt0 = Builder.CreateFCmpOLT(X, Zero); + Value *XEq0 = Builder.CreateFCmpOEQ(X, Zero); + Value *YGe0 = Builder.CreateFCmpOGE(Y, Zero); + Value *YLt0 = Builder.CreateFCmpOLT(Y, Zero); + + // x < 0, y >= 0 -> atan + pi. + Value *XLt0AndYGe0 = Builder.CreateAnd(XLt0, YGe0); + Result = Builder.CreateSelect(XLt0AndYGe0, AtanAddPi, Result); + + // x < 0, y < 0 -> atan - pi. + Value *XLt0AndYLt0 = Builder.CreateAnd(XLt0, YLt0); + Result = Builder.CreateSelect(XLt0AndYLt0, AtanSubPi, Result); + + // x == 0, y < 0 -> -pi/2 + Value *XEq0AndYLt0 = Builder.CreateAnd(XEq0, YLt0); + Result = Builder.CreateSelect(XEq0AndYLt0, NegHalfPi, Result); + + // x == 0, y > 0 -> pi/2 + Value *XEq0AndYGe0 = Builder.CreateAnd(XEq0, YGe0); + Result = Builder.CreateSelect(XEq0AndYGe0, HalfPi, Result); + + return Result; +} + static Value *expandPowIntrinsic(CallInst *Orig) { Value *X = Orig->getOperand(0); @@ -418,6 +467,9 @@ static bool expandIntrinsic(Function &F, CallInst *Orig) { case Intrinsic::abs: Result = expandAbs(Orig); break; + case Intrinsic::atan2: + Result = expandAtan2Intrinsic(Orig); + break; case Intrinsic::exp: Result = expandExpIntrinsic(Orig); break; diff --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp index f47fcff5d6025..bbb9d065b6243 100644 --- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp +++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp @@ -255,11 +255,10 @@ InstructionCost HexagonTTIImpl::getInterleavedMemoryOpCost( CostKind); } -InstructionCost HexagonTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, - Type *CondTy, - CmpInst::Predicate VecPred, - TTI::TargetCostKind CostKind, - const Instruction *I) { +InstructionCost HexagonTTIImpl::getCmpSelInstrCost( + unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, + TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info, + TTI::OperandValueInfo Op2Info, const Instruction *I) { if (ValTy->isVectorTy() && CostKind == TTI::TCK_RecipThroughput) { if (!isHVXVectorType(ValTy) && ValTy->isFPOrFPVectorTy()) return InstructionCost::getMax(); @@ -267,7 +266,8 @@ InstructionCost HexagonTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, if (Opcode == Instruction::FCmp) return LT.first + FloatFactor * getTypeNumElements(ValTy); } - return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I); + return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, + Op1Info, Op2Info, I); } InstructionCost HexagonTTIImpl::getArithmeticInstrCost( diff --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h index 4a1cfe03d48a7..826644d08d1ac 100644 --- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h +++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h @@ -132,10 +132,12 @@ class HexagonTTIImpl : public BasicTTIImplBase { unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond = false, bool UseMaskForGaps = false); - InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, - CmpInst::Predicate VecPred, - TTI::TargetCostKind CostKind, - const Instruction *I = nullptr); + InstructionCost getCmpSelInstrCost( + unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, + TTI::TargetCostKind CostKind, + TTI::OperandValueInfo Op1Info = {TTI::OK_AnyValue, TTI::OP_None}, + TTI::OperandValueInfo Op2Info = {TTI::OK_AnyValue, TTI::OP_None}, + const Instruction *I = nullptr); InstructionCost getArithmeticInstrCost( unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info = {TTI::OK_AnyValue, TTI::OP_None}, diff --git a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp index 38c51666139a8..9bcc911b6c345 100644 --- a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -563,21 +563,19 @@ void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F, O << ".maxntid " << Maxntidx.value_or(1) << ", " << Maxntidy.value_or(1) << ", " << Maxntidz.value_or(1) << "\n"; - unsigned Mincta = 0; - if (getMinCTASm(F, Mincta)) - O << ".minnctapersm " << Mincta << "\n"; + if (const auto Mincta = getMinCTASm(F)) + O << ".minnctapersm " << *Mincta << "\n"; - unsigned Maxnreg = 0; - if (getMaxNReg(F, Maxnreg)) - O << ".maxnreg " << Maxnreg << "\n"; + if (const auto Maxnreg = getMaxNReg(F)) + O << ".maxnreg " << *Maxnreg << "\n"; // .maxclusterrank directive requires SM_90 or higher, make sure that we // filter it out for lower SM versions, as it causes a hard ptxas crash. const NVPTXTargetMachine &NTM = static_cast(TM); const auto *STI = static_cast(NTM.getSubtargetImpl()); - unsigned Maxclusterrank = 0; - if (getMaxClusterRank(F, Maxclusterrank) && STI->getSmVersion() >= 90) - O << ".maxclusterrank " << Maxclusterrank << "\n"; + if (STI->getSmVersion() >= 90) + if (const auto Maxclusterrank = getMaxClusterRank(F)) + O << ".maxclusterrank " << *Maxclusterrank << "\n"; } std::string NVPTXAsmPrinter::getVirtualRegisterName(unsigned Reg) const { diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp index 8812136733fb2..2688834221091 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -594,13 +594,20 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM, setOperationAction(ISD::BITREVERSE, MVT::i32, Legal); setOperationAction(ISD::BITREVERSE, MVT::i64, Legal); - setOperationAction({ISD::ROTL, ISD::ROTR}, - {MVT::i8, MVT::i16, MVT::v2i16, MVT::i32, MVT::i64}, - Expand); - - if (STI.hasHWROT32()) - setOperationAction({ISD::FSHL, ISD::FSHR}, MVT::i32, Legal); - + // TODO: we may consider expanding ROTL/ROTR on older GPUs. Currently on GPUs + // that don't have h/w rotation we lower them to multi-instruction assembly. + // See ROT*_sw in NVPTXIntrInfo.td + setOperationAction(ISD::ROTL, MVT::i64, Legal); + setOperationAction(ISD::ROTR, MVT::i64, Legal); + setOperationAction(ISD::ROTL, MVT::i32, Legal); + setOperationAction(ISD::ROTR, MVT::i32, Legal); + + setOperationAction(ISD::ROTL, MVT::i16, Expand); + setOperationAction(ISD::ROTL, MVT::v2i16, Expand); + setOperationAction(ISD::ROTR, MVT::i16, Expand); + setOperationAction(ISD::ROTR, MVT::v2i16, Expand); + setOperationAction(ISD::ROTL, MVT::i8, Expand); + setOperationAction(ISD::ROTR, MVT::i8, Expand); setOperationAction(ISD::BSWAP, MVT::i16, Expand); setOperationAction(ISD::BR_JT, MVT::Other, Custom); diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td index f6bbf4c2ffc02..510e4b8100311 100644 --- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td +++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -1665,6 +1665,167 @@ def BREV64 : "brev.b64 \t$dst, $a;", [(set Int64Regs:$dst, (bitreverse Int64Regs:$a))]>; +// +// Rotate: Use ptx shf instruction if available. +// + +// 32 bit r2 = rotl r1, n +// => +// r2 = shf.l r1, r1, n +def ROTL32imm_hw : + NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, i32imm:$amt), + "shf.l.wrap.b32 \t$dst, $src, $src, $amt;", + [(set Int32Regs:$dst, (rotl (i32 Int32Regs:$src), (i32 imm:$amt)))]>, + Requires<[hasHWROT32]>; + +def ROTL32reg_hw : + NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, Int32Regs:$amt), + "shf.l.wrap.b32 \t$dst, $src, $src, $amt;", + [(set Int32Regs:$dst, (rotl (i32 Int32Regs:$src), (i32 Int32Regs:$amt)))]>, + Requires<[hasHWROT32]>; + +// 32 bit r2 = rotr r1, n +// => +// r2 = shf.r r1, r1, n +def ROTR32imm_hw : + NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, i32imm:$amt), + "shf.r.wrap.b32 \t$dst, $src, $src, $amt;", + [(set Int32Regs:$dst, (rotr (i32 Int32Regs:$src), (i32 imm:$amt)))]>, + Requires<[hasHWROT32]>; + +def ROTR32reg_hw : + NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, Int32Regs:$amt), + "shf.r.wrap.b32 \t$dst, $src, $src, $amt;", + [(set Int32Regs:$dst, (rotr (i32 Int32Regs:$src), (i32 Int32Regs:$amt)))]>, + Requires<[hasHWROT32]>; + +// 32-bit software rotate by immediate. $amt2 should equal 32 - $amt1. +def ROT32imm_sw : + NVPTXInst<(outs Int32Regs:$dst), + (ins Int32Regs:$src, i32imm:$amt1, i32imm:$amt2), + "{{\n\t" + ".reg .b32 %lhs;\n\t" + ".reg .b32 %rhs;\n\t" + "shl.b32 \t%lhs, $src, $amt1;\n\t" + "shr.b32 \t%rhs, $src, $amt2;\n\t" + "add.u32 \t$dst, %lhs, %rhs;\n\t" + "}}", + []>; + +def SUB_FRM_32 : SDNodeXFormgetTargetConstant(32 - N->getZExtValue(), SDLoc(N), MVT::i32); +}]>; + +def : Pat<(rotl (i32 Int32Regs:$src), (i32 imm:$amt)), + (ROT32imm_sw Int32Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>, + Requires<[noHWROT32]>; +def : Pat<(rotr (i32 Int32Regs:$src), (i32 imm:$amt)), + (ROT32imm_sw Int32Regs:$src, (SUB_FRM_32 node:$amt), imm:$amt)>, + Requires<[noHWROT32]>; + +// 32-bit software rotate left by register. +def ROTL32reg_sw : + NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, Int32Regs:$amt), + "{{\n\t" + ".reg .b32 %lhs;\n\t" + ".reg .b32 %rhs;\n\t" + ".reg .b32 %amt2;\n\t" + "shl.b32 \t%lhs, $src, $amt;\n\t" + "sub.s32 \t%amt2, 32, $amt;\n\t" + "shr.b32 \t%rhs, $src, %amt2;\n\t" + "add.u32 \t$dst, %lhs, %rhs;\n\t" + "}}", + [(set Int32Regs:$dst, (rotl (i32 Int32Regs:$src), (i32 Int32Regs:$amt)))]>, + Requires<[noHWROT32]>; + +// 32-bit software rotate right by register. +def ROTR32reg_sw : + NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, Int32Regs:$amt), + "{{\n\t" + ".reg .b32 %lhs;\n\t" + ".reg .b32 %rhs;\n\t" + ".reg .b32 %amt2;\n\t" + "shr.b32 \t%lhs, $src, $amt;\n\t" + "sub.s32 \t%amt2, 32, $amt;\n\t" + "shl.b32 \t%rhs, $src, %amt2;\n\t" + "add.u32 \t$dst, %lhs, %rhs;\n\t" + "}}", + [(set Int32Regs:$dst, (rotr (i32 Int32Regs:$src), (i32 Int32Regs:$amt)))]>, + Requires<[noHWROT32]>; + +// 64-bit software rotate by immediate. $amt2 should equal 64 - $amt1. +def ROT64imm_sw : + NVPTXInst<(outs Int64Regs:$dst), + (ins Int64Regs:$src, i32imm:$amt1, i32imm:$amt2), + "{{\n\t" + ".reg .b64 %lhs;\n\t" + ".reg .b64 %rhs;\n\t" + "shl.b64 \t%lhs, $src, $amt1;\n\t" + "shr.b64 \t%rhs, $src, $amt2;\n\t" + "add.u64 \t$dst, %lhs, %rhs;\n\t" + "}}", + []>; + +def SUB_FRM_64 : SDNodeXFormgetTargetConstant(64-N->getZExtValue(), SDLoc(N), MVT::i32); +}]>; + +def : Pat<(rotl Int64Regs:$src, (i32 imm:$amt)), + (ROT64imm_sw Int64Regs:$src, imm:$amt, (SUB_FRM_64 node:$amt))>; +def : Pat<(rotr Int64Regs:$src, (i32 imm:$amt)), + (ROT64imm_sw Int64Regs:$src, (SUB_FRM_64 node:$amt), imm:$amt)>; + +// 64-bit software rotate left by register. +def ROTL64reg_sw : + NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src, Int32Regs:$amt), + "{{\n\t" + ".reg .b64 %lhs;\n\t" + ".reg .b64 %rhs;\n\t" + ".reg .u32 %amt2;\n\t" + "and.b32 \t%amt2, $amt, 63;\n\t" + "shl.b64 \t%lhs, $src, %amt2;\n\t" + "sub.u32 \t%amt2, 64, %amt2;\n\t" + "shr.b64 \t%rhs, $src, %amt2;\n\t" + "add.u64 \t$dst, %lhs, %rhs;\n\t" + "}}", + [(set Int64Regs:$dst, (rotl Int64Regs:$src, (i32 Int32Regs:$amt)))]>; + +def ROTR64reg_sw : + NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src, Int32Regs:$amt), + "{{\n\t" + ".reg .b64 %lhs;\n\t" + ".reg .b64 %rhs;\n\t" + ".reg .u32 %amt2;\n\t" + "and.b32 \t%amt2, $amt, 63;\n\t" + "shr.b64 \t%lhs, $src, %amt2;\n\t" + "sub.u32 \t%amt2, 64, %amt2;\n\t" + "shl.b64 \t%rhs, $src, %amt2;\n\t" + "add.u64 \t$dst, %lhs, %rhs;\n\t" + "}}", + [(set Int64Regs:$dst, (rotr Int64Regs:$src, (i32 Int32Regs:$amt)))]>; + +// +// Funnnel shift in clamp mode +// + +// Create SDNodes so they can be used in the DAG code, e.g. +// NVPTXISelLowering (LowerShiftLeftParts and LowerShiftRightParts) +def FUN_SHFL_CLAMP : SDNode<"NVPTXISD::FUN_SHFL_CLAMP", SDTIntShiftDOp, []>; +def FUN_SHFR_CLAMP : SDNode<"NVPTXISD::FUN_SHFR_CLAMP", SDTIntShiftDOp, []>; + +def FUNSHFLCLAMP : + NVPTXInst<(outs Int32Regs:$dst), + (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt), + "shf.l.clamp.b32 \t$dst, $lo, $hi, $amt;", + [(set Int32Regs:$dst, + (FUN_SHFL_CLAMP (i32 Int32Regs:$lo), (i32 Int32Regs:$hi), (i32 Int32Regs:$amt)))]>; + +def FUNSHFRCLAMP : + NVPTXInst<(outs Int32Regs:$dst), + (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt), + "shf.r.clamp.b32 \t$dst, $lo, $hi, $amt;", + [(set Int32Regs:$dst, + (FUN_SHFR_CLAMP (i32 Int32Regs:$lo), (i32 Int32Regs:$hi), (i32 Int32Regs:$amt)))]>; // // BFE - bit-field extract @@ -3496,42 +3657,6 @@ def : Pat<(v2i16 (build_vector (i16 Int16Regs:$a), (i16 Int16Regs:$b))), def: Pat<(v2i16 (scalar_to_vector (i16 Int16Regs:$a))), (CVT_u32_u16 Int16Regs:$a, CvtNONE)>; -// -// Funnel-Shift -// - -// Create SDNodes so they can be used in the DAG code, e.g. -// NVPTXISelLowering (LowerShiftLeftParts and LowerShiftRightParts) -def fshl_clamp : SDNode<"NVPTXISD::FUN_SHFL_CLAMP", SDTIntShiftDOp, []>; -def fshr_clamp : SDNode<"NVPTXISD::FUN_SHFR_CLAMP", SDTIntShiftDOp, []>; - -// Funnel shift, requires >= sm_32. Does not trap if amt is out of range, so -// no side effects. -let hasSideEffects = false in { - multiclass ShfInst { - def _i - : NVPTXInst<(outs Int32Regs:$dst), - (ins Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt), - "shf." # mode # ".b32 \t$dst, $lo, $hi, $amt;", - [(set Int32Regs:$dst, - (op (i32 Int32Regs:$lo), (i32 Int32Regs:$hi), (i32 imm:$amt)))]>, - Requires<[hasHWROT32]>; - - def _r - : NVPTXInst<(outs Int32Regs:$dst), - (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt), - "shf." # mode # ".b32 \t$dst, $lo, $hi, $amt;", - [(set Int32Regs:$dst, - (op (i32 Int32Regs:$lo), (i32 Int32Regs:$hi), (i32 Int32Regs:$amt)))]>, - Requires<[hasHWROT32]>; - } - - defm SHF_L_CLAMP : ShfInst<"l.clamp", fshl_clamp>; - defm SHF_R_CLAMP : ShfInst<"r.clamp", fshr_clamp>; - defm SHF_L_WRAP : ShfInst<"l.wrap", fshl>; - defm SHF_R_WRAP : ShfInst<"r.wrap", fshr>; -} - // Count leading zeros let hasSideEffects = false in { def CLZr32 : NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a), diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td index 2688cfbe5e824..f22f0b368c9d5 100644 --- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td +++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -1619,14 +1619,16 @@ multiclass F_ATOMIC_2_imp Pred> { - def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b), - !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;"), - [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b)))]>, - Requires; - def imm : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, IMMType:$b), - !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;", ""), - [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), IMM:$b))]>, - Requires], Pred)>; + let mayLoad = 1, mayStore = 1, hasSideEffects = 1 in { + def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b), + !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;"), + [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b)))]>, + Requires; + def imm : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, IMMType:$b), + !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;", ""), + [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), IMM:$b))]>, + Requires], Pred)>; + } } multiclass F_ATOMIC_2 Pred> { - def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b), - !strconcat( - "{{ \n\t", - ".reg \t.s", TypeStr, " temp; \n\t", - "neg.s", TypeStr, " \ttemp, $b; \n\t", - "atom", SpaceStr, OpcStr, ".u", TypeStr, " \t$dst, [$addr], temp; \n\t", - "}}"), - [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b)))]>, - Requires; + let mayLoad = 1, mayStore = 1, hasSideEffects = 1 in { + def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b), + !strconcat( + "{{ \n\t", + ".reg \t.s", TypeStr, " temp; \n\t", + "neg.s", TypeStr, " \ttemp, $b; \n\t", + "atom", SpaceStr, OpcStr, ".u", TypeStr, " \t$dst, [$addr], temp; \n\t", + "}}"), + [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b)))]>, + Requires; + } } multiclass F_ATOMIC_2_NEG Pred = []> { @@ -1665,29 +1669,31 @@ multiclass F_ATOMIC_3_imp Pred> { - def reg : NVPTXInst<(outs regclass:$dst), - (ins ptrclass:$addr, regclass:$b, regclass:$c), - !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"), - [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b), (regT regclass:$c)))]>, - Requires; - - def imm1 : NVPTXInst<(outs regclass:$dst), - (ins ptrclass:$addr, IMMType:$b, regclass:$c), - !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"), - [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), imm:$b, (regT regclass:$c)))]>, - Requires; - - def imm2 : NVPTXInst<(outs regclass:$dst), - (ins ptrclass:$addr, regclass:$b, IMMType:$c), - !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;", ""), - [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b), imm:$c))]>, - Requires; - - def imm3 : NVPTXInst<(outs regclass:$dst), - (ins ptrclass:$addr, IMMType:$b, IMMType:$c), - !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"), - [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), imm:$b, imm:$c))]>, - Requires; + let mayLoad = 1, mayStore = 1, hasSideEffects = 1 in { + def reg : NVPTXInst<(outs regclass:$dst), + (ins ptrclass:$addr, regclass:$b, regclass:$c), + !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"), + [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b), (regT regclass:$c)))]>, + Requires; + + def imm1 : NVPTXInst<(outs regclass:$dst), + (ins ptrclass:$addr, IMMType:$b, regclass:$c), + !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"), + [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), imm:$b, (regT regclass:$c)))]>, + Requires; + + def imm2 : NVPTXInst<(outs regclass:$dst), + (ins ptrclass:$addr, regclass:$b, IMMType:$c), + !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;", ""), + [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b), imm:$c))]>, + Requires; + + def imm3 : NVPTXInst<(outs regclass:$dst), + (ins ptrclass:$addr, IMMType:$b, IMMType:$c), + !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"), + [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), imm:$b, imm:$c))]>, + Requires; + } } multiclass F_ATOMIC_3 Pred = []> { @@ -2733,9 +2739,134 @@ def : Pat<(int_nvvm_read_ptx_sreg_envreg30), (MOV_SPECIAL ENVREG30)>; def : Pat<(int_nvvm_read_ptx_sreg_envreg31), (MOV_SPECIAL ENVREG31)>; +// rotate builtin support + +def ROTATE_B32_HW_IMM + : NVPTXInst<(outs Int32Regs:$dst), + (ins Int32Regs:$src, i32imm:$amt), + "shf.l.wrap.b32 \t$dst, $src, $src, $amt;", + [(set Int32Regs:$dst, + (int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)))]>, + Requires<[hasHWROT32]> ; + +def ROTATE_B32_HW_REG + : NVPTXInst<(outs Int32Regs:$dst), + (ins Int32Regs:$src, Int32Regs:$amt), + "shf.l.wrap.b32 \t$dst, $src, $src, $amt;", + [(set Int32Regs:$dst, + (int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt))]>, + Requires<[hasHWROT32]> ; + +def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)), + (ROT32imm_sw Int32Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>, + Requires<[noHWROT32]> ; + +def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt), + (ROTL32reg_sw Int32Regs:$src, Int32Regs:$amt)>, + Requires<[noHWROT32]> ; + +let hasSideEffects = false in { + def GET_LO_INT64 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src), + !strconcat("{{\n\t", + ".reg .b32 %dummy;\n\t", + "mov.b64 \t{$dst,%dummy}, $src;\n\t", + "}}"), + []> ; + + def GET_HI_INT64 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src), + !strconcat("{{\n\t", + ".reg .b32 %dummy;\n\t", + "mov.b64 \t{%dummy,$dst}, $src;\n\t", + "}}"), + []> ; +} + +let hasSideEffects = false in { + def PACK_TWO_INT32 + : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$lo, Int32Regs:$hi), + "mov.b64 \t$dst, {{$lo, $hi}};", []> ; +} + def : Pat<(int_nvvm_swap_lo_hi_b64 Int64Regs:$src), - (V2I32toI64 (I64toI32H Int64Regs:$src), - (I64toI32L Int64Regs:$src))> ; + (PACK_TWO_INT32 (GET_HI_INT64 Int64Regs:$src), + (GET_LO_INT64 Int64Regs:$src))> ; + +// Funnel shift, requires >= sm_32. Does not trap if amt is out of range, so +// no side effects. +let hasSideEffects = false in { + def SHF_L_WRAP_B32_IMM + : NVPTXInst<(outs Int32Regs:$dst), + (ins Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt), + "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>, + Requires<[hasHWROT32]>; + + def SHF_L_WRAP_B32_REG + : NVPTXInst<(outs Int32Regs:$dst), + (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt), + "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>, + Requires<[hasHWROT32]>; + + def SHF_R_WRAP_B32_IMM + : NVPTXInst<(outs Int32Regs:$dst), + (ins Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt), + "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>, + Requires<[hasHWROT32]>; + + def SHF_R_WRAP_B32_REG + : NVPTXInst<(outs Int32Regs:$dst), + (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt), + "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>, + Requires<[hasHWROT32]>; +} + +// HW version of rotate 64 +def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)), + (PACK_TWO_INT32 + (SHF_L_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src), + (GET_LO_INT64 Int64Regs:$src), imm:$amt), + (SHF_L_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src), + (GET_HI_INT64 Int64Regs:$src), imm:$amt))>, + Requires<[hasHWROT32]>; + +def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt), + (PACK_TWO_INT32 + (SHF_L_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src), + (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt), + (SHF_L_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src), + (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt))>, + Requires<[hasHWROT32]>; + + +def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)), + (PACK_TWO_INT32 + (SHF_R_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src), + (GET_HI_INT64 Int64Regs:$src), imm:$amt), + (SHF_R_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src), + (GET_LO_INT64 Int64Regs:$src), imm:$amt))>, + Requires<[hasHWROT32]>; + +def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt), + (PACK_TWO_INT32 + (SHF_R_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src), + (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt), + (SHF_R_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src), + (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt))>, + Requires<[hasHWROT32]>; + +// SW version of rotate 64 +def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)), + (ROT64imm_sw Int64Regs:$src, imm:$amt, (SUB_FRM_64 node:$amt))>, + Requires<[noHWROT32]>; +def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt), + (ROTL64reg_sw Int64Regs:$src, Int32Regs:$amt)>, + Requires<[noHWROT32]>; +def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)), + (ROT64imm_sw Int64Regs:$src, (SUB_FRM_64 node:$amt), imm:$amt)>, + Requires<[noHWROT32]>; +def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt), + (ROTR64reg_sw Int64Regs:$src, Int32Regs:$amt)>, + Requires<[noHWROT32]>; + //----------------------------------- // Texture Intrinsics diff --git a/llvm/lib/Target/NVPTX/NVPTXLowerUnreachable.cpp b/llvm/lib/Target/NVPTX/NVPTXLowerUnreachable.cpp index 92b90e2559154..a289d35f9b3f1 100644 --- a/llvm/lib/Target/NVPTX/NVPTXLowerUnreachable.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXLowerUnreachable.cpp @@ -110,17 +110,24 @@ StringRef NVPTXLowerUnreachable::getPassName() const { } // ============================================================================= -// Returns whether a `trap` intrinsic should be emitted before I. +// Returns whether a `trap` intrinsic would be emitted before I. // // This is a copy of the logic in SelectionDAGBuilder::visitUnreachable(). // ============================================================================= bool NVPTXLowerUnreachable::isLoweredToTrap(const UnreachableInst &I) const { - if (!TrapUnreachable) - return false; - if (!NoTrapAfterNoreturn) - return true; - const CallInst *Call = dyn_cast_or_null(I.getPrevNode()); - return Call && Call->doesNotReturn(); + if (const auto *Call = dyn_cast_or_null(I.getPrevNode())) { + // We've already emitted a non-continuable trap. + if (Call->isNonContinuableTrap()) + return true; + + // No traps are emitted for calls that do not return + // when this option is enabled. + if (NoTrapAfterNoreturn && Call->doesNotReturn()) + return false; + } + + // In all other cases, we will generate a trap if TrapUnreachable is set. + return TrapUnreachable; } // ============================================================================= diff --git a/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp b/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp index 80361744fd5b6..be1c87d07f4de 100644 --- a/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp @@ -13,6 +13,7 @@ #include "NVPTXUtilities.h" #include "NVPTX.h" #include "NVPTXTargetMachine.h" +#include "llvm/ADT/StringRef.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalVariable.h" @@ -130,8 +131,8 @@ static void cacheAnnotationFromMD(const Module *m, const GlobalValue *gv) { } } -bool findOneNVVMAnnotation(const GlobalValue *gv, const std::string &prop, - unsigned &retval) { +static std::optional findOneNVVMAnnotation(const GlobalValue *gv, + const std::string &prop) { auto &AC = getAnnotationCache(); std::lock_guard Guard(AC.Lock); const Module *m = gv->getParent(); @@ -140,21 +141,13 @@ bool findOneNVVMAnnotation(const GlobalValue *gv, const std::string &prop, else if (AC.Cache[m].find(gv) == AC.Cache[m].end()) cacheAnnotationFromMD(m, gv); if (AC.Cache[m][gv].find(prop) == AC.Cache[m][gv].end()) - return false; - retval = AC.Cache[m][gv][prop][0]; - return true; -} - -static std::optional -findOneNVVMAnnotation(const GlobalValue &GV, const std::string &PropName) { - unsigned RetVal; - if (findOneNVVMAnnotation(&GV, PropName, RetVal)) - return RetVal; - return std::nullopt; + return std::nullopt; + return AC.Cache[m][gv][prop][0]; } -bool findAllNVVMAnnotation(const GlobalValue *gv, const std::string &prop, - std::vector &retval) { +static bool findAllNVVMAnnotation(const GlobalValue *gv, + const std::string &prop, + std::vector &retval) { auto &AC = getAnnotationCache(); std::lock_guard Guard(AC.Lock); const Module *m = gv->getParent(); @@ -168,25 +161,13 @@ bool findAllNVVMAnnotation(const GlobalValue *gv, const std::string &prop, return true; } -bool isTexture(const Value &val) { - if (const GlobalValue *gv = dyn_cast(&val)) { - unsigned Annot; - if (findOneNVVMAnnotation(gv, "texture", Annot)) { - assert((Annot == 1) && "Unexpected annotation on a texture symbol"); +static bool globalHasNVVMAnnotation(const Value &V, const std::string &Prop) { + if (const auto *GV = dyn_cast(&V)) + if (const auto Annot = findOneNVVMAnnotation(GV, Prop)) { + assert((*Annot == 1) && "Unexpected annotation on a symbol"); return true; } - } - return false; -} -bool isSurface(const Value &val) { - if (const GlobalValue *gv = dyn_cast(&val)) { - unsigned Annot; - if (findOneNVVMAnnotation(gv, "surface", Annot)) { - assert((Annot == 1) && "Unexpected annotation on a surface symbol"); - return true; - } - } return false; } @@ -220,71 +201,60 @@ bool isParamGridConstant(const Value &V) { return false; } -bool isSampler(const Value &val) { +bool isTexture(const Value &V) { return globalHasNVVMAnnotation(V, "texture"); } + +bool isSurface(const Value &V) { return globalHasNVVMAnnotation(V, "surface"); } + +bool isSampler(const Value &V) { const char *AnnotationName = "sampler"; - if (const GlobalValue *gv = dyn_cast(&val)) { - unsigned Annot; - if (findOneNVVMAnnotation(gv, AnnotationName, Annot)) { - assert((Annot == 1) && "Unexpected annotation on a sampler symbol"); - return true; - } - } - return argHasNVVMAnnotation(val, AnnotationName); + return globalHasNVVMAnnotation(V, AnnotationName) || + argHasNVVMAnnotation(V, AnnotationName); } -bool isImageReadOnly(const Value &val) { - return argHasNVVMAnnotation(val, "rdoimage"); +bool isImageReadOnly(const Value &V) { + return argHasNVVMAnnotation(V, "rdoimage"); } -bool isImageWriteOnly(const Value &val) { - return argHasNVVMAnnotation(val, "wroimage"); +bool isImageWriteOnly(const Value &V) { + return argHasNVVMAnnotation(V, "wroimage"); } -bool isImageReadWrite(const Value &val) { - return argHasNVVMAnnotation(val, "rdwrimage"); +bool isImageReadWrite(const Value &V) { + return argHasNVVMAnnotation(V, "rdwrimage"); } -bool isImage(const Value &val) { - return isImageReadOnly(val) || isImageWriteOnly(val) || isImageReadWrite(val); +bool isImage(const Value &V) { + return isImageReadOnly(V) || isImageWriteOnly(V) || isImageReadWrite(V); } -bool isManaged(const Value &val) { - if(const GlobalValue *gv = dyn_cast(&val)) { - unsigned Annot; - if (findOneNVVMAnnotation(gv, "managed", Annot)) { - assert((Annot == 1) && "Unexpected annotation on a managed symbol"); - return true; - } - } - return false; -} +bool isManaged(const Value &V) { return globalHasNVVMAnnotation(V, "managed"); } -std::string getTextureName(const Value &val) { - assert(val.hasName() && "Found texture variable with no name"); - return std::string(val.getName()); +StringRef getTextureName(const Value &V) { + assert(V.hasName() && "Found texture variable with no name"); + return V.getName(); } -std::string getSurfaceName(const Value &val) { - assert(val.hasName() && "Found surface variable with no name"); - return std::string(val.getName()); +StringRef getSurfaceName(const Value &V) { + assert(V.hasName() && "Found surface variable with no name"); + return V.getName(); } -std::string getSamplerName(const Value &val) { - assert(val.hasName() && "Found sampler variable with no name"); - return std::string(val.getName()); +StringRef getSamplerName(const Value &V) { + assert(V.hasName() && "Found sampler variable with no name"); + return V.getName(); } std::optional getMaxNTIDx(const Function &F) { - return findOneNVVMAnnotation(F, "maxntidx"); + return findOneNVVMAnnotation(&F, "maxntidx"); } std::optional getMaxNTIDy(const Function &F) { - return findOneNVVMAnnotation(F, "maxntidy"); + return findOneNVVMAnnotation(&F, "maxntidy"); } std::optional getMaxNTIDz(const Function &F) { - return findOneNVVMAnnotation(F, "maxntidz"); + return findOneNVVMAnnotation(&F, "maxntidz"); } std::optional getMaxNTID(const Function &F) { @@ -302,20 +272,20 @@ std::optional getMaxNTID(const Function &F) { return std::nullopt; } -bool getMaxClusterRank(const Function &F, unsigned &x) { - return findOneNVVMAnnotation(&F, "maxclusterrank", x); +std::optional getMaxClusterRank(const Function &F) { + return findOneNVVMAnnotation(&F, "maxclusterrank"); } std::optional getReqNTIDx(const Function &F) { - return findOneNVVMAnnotation(F, "reqntidx"); + return findOneNVVMAnnotation(&F, "reqntidx"); } std::optional getReqNTIDy(const Function &F) { - return findOneNVVMAnnotation(F, "reqntidy"); + return findOneNVVMAnnotation(&F, "reqntidy"); } std::optional getReqNTIDz(const Function &F) { - return findOneNVVMAnnotation(F, "reqntidz"); + return findOneNVVMAnnotation(&F, "reqntidz"); } std::optional getReqNTID(const Function &F) { @@ -328,21 +298,20 @@ std::optional getReqNTID(const Function &F) { return std::nullopt; } -bool getMinCTASm(const Function &F, unsigned &x) { - return findOneNVVMAnnotation(&F, "minctasm", x); +std::optional getMinCTASm(const Function &F) { + return findOneNVVMAnnotation(&F, "minctasm"); } -bool getMaxNReg(const Function &F, unsigned &x) { - return findOneNVVMAnnotation(&F, "maxnreg", x); +std::optional getMaxNReg(const Function &F) { + return findOneNVVMAnnotation(&F, "maxnreg"); } bool isKernelFunction(const Function &F) { - unsigned x = 0; - if (!findOneNVVMAnnotation(&F, "kernel", x)) { - // There is no NVVM metadata, check the calling convention - return F.getCallingConv() == CallingConv::PTX_Kernel; - } - return (x == 1); + if (const auto X = findOneNVVMAnnotation(&F, "kernel")) + return (*X == 1); + + // There is no NVVM metadata, check the calling convention + return F.getCallingConv() == CallingConv::PTX_Kernel; } MaybeAlign getAlign(const Function &F, unsigned Index) { diff --git a/llvm/lib/Target/NVPTX/NVPTXUtilities.h b/llvm/lib/Target/NVPTX/NVPTXUtilities.h index 938b9b04b7a44..cf15dff85cbde 100644 --- a/llvm/lib/Target/NVPTX/NVPTXUtilities.h +++ b/llvm/lib/Target/NVPTX/NVPTXUtilities.h @@ -32,11 +32,6 @@ class TargetMachine; void clearAnnotationCache(const Module *); -bool findOneNVVMAnnotation(const GlobalValue *, const std::string &, - unsigned &); -bool findAllNVVMAnnotation(const GlobalValue *, const std::string &, - std::vector &); - bool isTexture(const Value &); bool isSurface(const Value &); bool isSampler(const Value &); @@ -46,23 +41,23 @@ bool isImageWriteOnly(const Value &); bool isImageReadWrite(const Value &); bool isManaged(const Value &); -std::string getTextureName(const Value &); -std::string getSurfaceName(const Value &); -std::string getSamplerName(const Value &); +StringRef getTextureName(const Value &); +StringRef getSurfaceName(const Value &); +StringRef getSamplerName(const Value &); std::optional getMaxNTIDx(const Function &); std::optional getMaxNTIDy(const Function &); std::optional getMaxNTIDz(const Function &); -std::optional getMaxNTID(const Function &F); +std::optional getMaxNTID(const Function &); std::optional getReqNTIDx(const Function &); std::optional getReqNTIDy(const Function &); std::optional getReqNTIDz(const Function &); std::optional getReqNTID(const Function &); -bool getMaxClusterRank(const Function &, unsigned &); -bool getMinCTASm(const Function &, unsigned &); -bool getMaxNReg(const Function &, unsigned &); +std::optional getMaxClusterRank(const Function &); +std::optional getMinCTASm(const Function &); +std::optional getMaxNReg(const Function &); bool isKernelFunction(const Function &); bool isParamGridConstant(const Value &); @@ -75,10 +70,9 @@ Function *getMaybeBitcastedCallee(const CallBase *CB); inline unsigned promoteScalarArgumentSize(unsigned size) { if (size <= 32) return 32; - else if (size <= 64) + if (size <= 64) return 64; - else - return size; + return size; } bool shouldEmitPTXNoReturn(const Value *V, const TargetMachine &TM); diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp index b7bdbeb535d52..ec3d3dbc8f6aa 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -655,18 +655,17 @@ InstructionCost PPCTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, return Cost; } -InstructionCost PPCTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, - Type *CondTy, - CmpInst::Predicate VecPred, - TTI::TargetCostKind CostKind, - const Instruction *I) { +InstructionCost PPCTTIImpl::getCmpSelInstrCost( + unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, + TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info, + TTI::OperandValueInfo Op2Info, const Instruction *I) { InstructionCost CostFactor = vectorCostAdjustmentFactor(Opcode, ValTy, nullptr); if (!CostFactor.isValid()) return InstructionCost::getMax(); - InstructionCost Cost = - BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I); + InstructionCost Cost = BaseT::getCmpSelInstrCost( + Opcode, ValTy, CondTy, VecPred, CostKind, Op1Info, Op2Info, I); // TODO: Handle other cost kinds. if (CostKind != TTI::TCK_RecipThroughput) return Cost; diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h index 126ccb2b3096e..3cb60d7a1785a 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h @@ -118,10 +118,12 @@ class PPCTTIImpl : public BasicTTIImplBase { const Instruction *I = nullptr); InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I = nullptr); - InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, - CmpInst::Predicate VecPred, - TTI::TargetCostKind CostKind, - const Instruction *I = nullptr); + InstructionCost getCmpSelInstrCost( + unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, + TTI::TargetCostKind CostKind, + TTI::OperandValueInfo Op1Info = {TTI::OK_AnyValue, TTI::OP_None}, + TTI::OperandValueInfo Op2Info = {TTI::OK_AnyValue, TTI::OP_None}, + const Instruction *I = nullptr); using BaseT::getVectorInstrCost; InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td index 52f5a637eb740..3d0e1dae801d3 100644 --- a/llvm/lib/Target/RISCV/RISCVFeatures.td +++ b/llvm/lib/Target/RISCV/RISCVFeatures.td @@ -243,8 +243,8 @@ def HasStdExtZabha : Predicate<"Subtarget->hasStdExtZabha()">, "'Zabha' (Byte and Halfword Atomic Memory Operations)">; def FeatureStdExtZacas - : RISCVExperimentalExtension<"zacas", 1, 0, - "'Zacas' (Atomic Compare-And-Swap Instructions)">, + : RISCVExtension<"zacas", 1, 0, + "'Zacas' (Atomic Compare-And-Swap Instructions)">, RISCVExtensionBitmask<0, 26>; def HasStdExtZacas : Predicate<"Subtarget->hasStdExtZacas()">, AssemblerPredicate<(all_of FeatureStdExtZacas), diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp index 7abd5a49a1b5f..22824b77c37dd 100644 --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp @@ -1090,11 +1090,12 @@ RISCVFrameLowering::assignRVVStackObjectOffsets(MachineFunction &MF) const { for (int FI : ObjectsToAllocate) { // ObjectSize in bytes. int64_t ObjectSize = MFI.getObjectSize(FI); - auto ObjectAlign = std::max(Align(8), MFI.getObjectAlign(FI)); + auto ObjectAlign = + std::max(Align(RISCV::RVVBitsPerBlock / 8), MFI.getObjectAlign(FI)); // If the data type is the fractional vector type, reserve one vector // register for it. - if (ObjectSize < 8) - ObjectSize = 8; + if (ObjectSize < (RISCV::RVVBitsPerBlock / 8)) + ObjectSize = (RISCV::RVVBitsPerBlock / 8); Offset = alignTo(Offset + ObjectSize, ObjectAlign); MFI.setObjectOffset(FI, -Offset); // Update the maximum alignment of the RVV stack section diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index b998a1eb11c30..3b61cb5dfe090 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1082,10 +1082,17 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, VT, Custom); MVT EltVT = VT.getVectorElementType(); if (isTypeLegal(EltVT)) - setOperationAction(ISD::SPLAT_VECTOR, VT, Custom); + setOperationAction({ISD::SPLAT_VECTOR, ISD::EXPERIMENTAL_VP_SPLAT}, VT, + Custom); else - setOperationAction(ISD::SPLAT_VECTOR, EltVT, Custom); - setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom); + setOperationAction({ISD::SPLAT_VECTOR, ISD::EXPERIMENTAL_VP_SPLAT}, + EltVT, Custom); + setOperationAction({ISD::LOAD, ISD::STORE, ISD::MLOAD, ISD::MSTORE, + ISD::MGATHER, ISD::MSCATTER, ISD::VP_LOAD, + ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD, + ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, + ISD::VP_SCATTER}, + VT, Custom); setOperationAction(ISD::FNEG, VT, Expand); setOperationAction(ISD::FABS, VT, Expand); @@ -4449,11 +4456,27 @@ static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL, bool HasPassthru = Passthru && !Passthru.isUndef(); if (!HasPassthru && !Passthru) Passthru = DAG.getUNDEF(VT); - if (VT.isFloatingPoint()) - return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, VT, Passthru, Scalar, VL); + MVT EltVT = VT.getVectorElementType(); MVT XLenVT = Subtarget.getXLenVT(); + if (VT.isFloatingPoint()) { + if ((EltVT == MVT::f16 && !Subtarget.hasStdExtZvfh()) || + EltVT == MVT::bf16) { + if ((EltVT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) || + (EltVT == MVT::f16 && Subtarget.hasStdExtZfhmin())) + Scalar = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Scalar); + else + Scalar = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Scalar); + MVT IVT = VT.changeVectorElementType(MVT::i16); + Passthru = DAG.getNode(ISD::BITCAST, DL, IVT, Passthru); + SDValue Splat = + lowerScalarSplat(Passthru, Scalar, VL, IVT, DL, DAG, Subtarget); + return DAG.getNode(ISD::BITCAST, DL, VT, Splat); + } + return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, VT, Passthru, Scalar, VL); + } + // Simplest case is that the operand needs to be promoted to XLenVT. if (Scalar.getValueType().bitsLE(XLenVT)) { // If the operand is a constant, sign extend to increase our chances @@ -10152,13 +10175,15 @@ SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op, unsigned OrigIdx = Op.getConstantOperandVal(2); const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo(); + if (OrigIdx == 0 && Vec.isUndef()) + return Op; + // We don't have the ability to slide mask vectors up indexed by their i1 // elements; the smallest we can do is i8. Often we are able to bitcast to // equivalent i8 vectors. Note that when inserting a fixed-length vector // into a scalable one, we might not necessarily have enough scalable // elements to safely divide by 8: nxv1i1 = insert nxv1i1, v4i1 is valid. - if (SubVecVT.getVectorElementType() == MVT::i1 && - (OrigIdx != 0 || !Vec.isUndef())) { + if (SubVecVT.getVectorElementType() == MVT::i1) { if (VecVT.getVectorMinNumElements() >= 8 && SubVecVT.getVectorMinNumElements() >= 8) { assert(OrigIdx % 8 == 0 && "Invalid index"); @@ -10196,8 +10221,6 @@ SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op, // vector group up the full amount. const auto VLen = Subtarget.getRealVLen(); if (SubVecVT.isFixedLengthVector() && !VLen) { - if (OrigIdx == 0 && Vec.isUndef() && !VecVT.isFixedLengthVector()) - return Op; MVT ContainerVT = VecVT; if (VecVT.isFixedLengthVector()) { ContainerVT = getContainerForFixedLengthVector(VecVT); @@ -10208,11 +10231,6 @@ SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op, DAG.getUNDEF(ContainerVT), SubVec, DAG.getVectorIdxConstant(0, DL)); - if (OrigIdx == 0 && Vec.isUndef() && VecVT.isFixedLengthVector()) { - SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget); - return DAG.getBitcast(Op.getValueType(), SubVec); - } - SDValue Mask = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first; // Set the vector length to only the number of elements we care about. Note @@ -10304,8 +10322,12 @@ SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op, return Op; } + // Use a insert_subvector that will resolve to an insert subreg. + assert(VLen); + unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock; SDValue Insert = - DAG.getTargetInsertSubreg(SubRegIdx, DL, ContainerVecVT, Vec, SubVec); + DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVecVT, Vec, SubVec, + DAG.getConstant(OrigIdx / Vscale, DL, XLenVT)); if (VecVT.isFixedLengthVector()) Insert = convertFromScalableVector(VecVT, Insert, DAG, Subtarget); return Insert; @@ -10321,8 +10343,10 @@ SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op, MVT InterSubVT = ContainerVecVT; SDValue AlignedExtract = Vec; unsigned AlignedIdx = OrigIdx - RemIdx.getKnownMinValue(); - if (SubVecVT.isFixedLengthVector()) + if (SubVecVT.isFixedLengthVector()) { + assert(VLen); AlignedIdx /= *VLen / RISCV::RVVBitsPerBlock; + } if (ContainerVecVT.bitsGT(getLMUL1VT(ContainerVecVT))) { InterSubVT = getLMUL1VT(ContainerVecVT); // Extract a subvector equal to the nearest full vector register type. This @@ -10499,10 +10523,14 @@ SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op, // If the Idx has been completely eliminated then this is a subvector extract // which naturally aligns to a vector register. These can easily be handled - // using subregister manipulation. + // using subregister manipulation. We use an extract_subvector that will + // resolve to an extract subreg. if (RemIdx.isZero()) { if (SubVecVT.isFixedLengthVector()) { - Vec = DAG.getTargetExtractSubreg(SubRegIdx, DL, ContainerSubVecVT, Vec); + assert(VLen); + unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock; + Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerSubVecVT, Vec, + DAG.getConstant(OrigIdx / Vscale, DL, XLenVT)); return convertFromScalableVector(SubVecVT, Vec, DAG, Subtarget); } return Op; @@ -10520,9 +10548,17 @@ SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op, if (VecVT.bitsGT(getLMUL1VT(VecVT))) { // If VecVT has an LMUL > 1, then SubVecVT should have a smaller LMUL, and // we should have successfully decomposed the extract into a subregister. + // We use an extract_subvector that will resolve to a subreg extract. assert(SubRegIdx != RISCV::NoSubRegister); + (void)SubRegIdx; + unsigned Idx = OrigIdx - RemIdx.getKnownMinValue(); + if (SubVecVT.isFixedLengthVector()) { + assert(VLen); + Idx /= *VLen / RISCV::RVVBitsPerBlock; + } InterSubVT = getLMUL1VT(VecVT); - Vec = DAG.getTargetExtractSubreg(SubRegIdx, DL, InterSubVT, Vec); + Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec, + DAG.getConstant(Idx, DL, XLenVT)); } // Slide this vector register down by the desired number of elements in order @@ -16971,6 +17007,17 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, return Op0.getOperand(0); } + if (ISD::isNormalLoad(Op0.getNode()) && Op0.hasOneUse() && + cast(Op0)->isSimple()) { + MVT IVT = MVT::getIntegerVT(Op0.getValueSizeInBits()); + auto *LN0 = cast(Op0); + SDValue Load = + DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT, LN0->getChain(), + LN0->getBasePtr(), IVT, LN0->getMemOperand()); + DAG.ReplaceAllUsesOfValueWith(Op0.getValue(1), Load.getValue(1)); + return Load; + } + // This is a target-specific version of a DAGCombine performed in // DAGCombiner::visitBITCAST. It performs the equivalent of: // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index 41f93fde17d32..8dafd824963c0 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -736,8 +736,6 @@ MachineInstr *RISCVInstrInfo::foldMemoryOperandImpl( MachineFunction &MF, MachineInstr &MI, ArrayRef Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS, VirtRegMap *VRM) const { - const MachineFrameInfo &MFI = MF.getFrameInfo(); - // The below optimizations narrow the load so they are only valid for little // endian. // TODO: Support big endian by adding an offset into the frame object? @@ -763,6 +761,29 @@ MachineInstr *RISCVInstrInfo::foldMemoryOperandImpl( LoadOpc = RISCV::LBU; break; } + if (RISCV::getRVVMCOpcode(MI.getOpcode()) == RISCV::VMV_X_S) { + unsigned Log2SEW = + MI.getOperand(RISCVII::getSEWOpNum(MI.getDesc())).getImm(); + if (STI.getXLen() < (1 << Log2SEW)) + return nullptr; + switch (Log2SEW) { + case 3: + LoadOpc = RISCV::LB; + break; + case 4: + LoadOpc = RISCV::LH; + break; + case 5: + LoadOpc = RISCV::LW; + break; + case 6: + LoadOpc = RISCV::LD; + break; + default: + llvm_unreachable("Unexpected SEW"); + } + break; + } return nullptr; case RISCV::SEXT_H: LoadOpc = RISCV::LH; @@ -776,17 +797,11 @@ MachineInstr *RISCVInstrInfo::foldMemoryOperandImpl( break; } - MachineMemOperand *MMO = MF.getMachineMemOperand( - MachinePointerInfo::getFixedStack(MF, FrameIndex), - MachineMemOperand::MOLoad, MFI.getObjectSize(FrameIndex), - MFI.getObjectAlign(FrameIndex)); - Register DstReg = MI.getOperand(0).getReg(); return BuildMI(*MI.getParent(), InsertPt, MI.getDebugLoc(), get(LoadOpc), DstReg) .addFrameIndex(FrameIndex) - .addImm(0) - .addMemOperand(MMO); + .addImm(0); } void RISCVInstrInfo::movImm(MachineBasicBlock &MBB, diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp index 701594c0fb05d..91d539a355ac2 100644 --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp @@ -200,11 +200,11 @@ void RISCVRegisterInfo::adjustReg(MachineBasicBlock &MBB, ScratchReg = MRI.createVirtualRegister(&RISCV::GPRRegClass); assert(ScalableValue > 0 && "There is no need to get VLEN scaled value."); - assert(ScalableValue % 8 == 0 && + assert(ScalableValue % (RISCV::RVVBitsPerBlock / 8) == 0 && "Reserve the stack by the multiple of one vector size."); - assert(isInt<32>(ScalableValue / 8) && + assert(isInt<32>(ScalableValue / (RISCV::RVVBitsPerBlock / 8)) && "Expect the number of vector registers within 32-bits."); - uint32_t NumOfVReg = ScalableValue / 8; + uint32_t NumOfVReg = ScalableValue / (RISCV::RVVBitsPerBlock / 8); BuildMI(MBB, II, DL, TII->get(RISCV::PseudoReadVLENB), ScratchReg) .setMIFlag(Flag); diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index 768df71715fa6..e041854ee8fd6 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -1618,23 +1618,38 @@ InstructionCost RISCVTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, } -InstructionCost RISCVTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, - Type *CondTy, - CmpInst::Predicate VecPred, - TTI::TargetCostKind CostKind, - const Instruction *I) { +InstructionCost RISCVTTIImpl::getCmpSelInstrCost( + unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, + TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info, + TTI::OperandValueInfo Op2Info, const Instruction *I) { if (CostKind != TTI::TCK_RecipThroughput) return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, - I); + Op1Info, Op2Info, I); if (isa(ValTy) && !ST->useRVVForFixedLengthVectors()) return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, - I); + Op1Info, Op2Info, I); // Skip if scalar size of ValTy is bigger than ELEN. if (ValTy->isVectorTy() && ValTy->getScalarSizeInBits() > ST->getELen()) return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, - I); + Op1Info, Op2Info, I); + + auto GetConstantMatCost = + [&](TTI::OperandValueInfo OpInfo) -> InstructionCost { + if (OpInfo.isUniform()) + // We return 0 we currently ignore the cost of materializing scalar + // constants in GPRs. + return 0; + + return getConstantPoolLoadCost(ValTy, CostKind); + }; + + InstructionCost ConstantMatCost; + if (Op1Info.isConstant()) + ConstantMatCost += GetConstantMatCost(Op1Info); + if (Op2Info.isConstant()) + ConstantMatCost += GetConstantMatCost(Op2Info); std::pair LT = getTypeLegalizationCost(ValTy); if (Opcode == Instruction::Select && ValTy->isVectorTy()) { @@ -1643,14 +1658,16 @@ InstructionCost RISCVTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, // vmandn.mm v8, v8, v9 // vmand.mm v9, v0, v9 // vmor.mm v0, v9, v8 - return LT.first * - getRISCVInstructionCost( - {RISCV::VMANDN_MM, RISCV::VMAND_MM, RISCV::VMOR_MM}, - LT.second, CostKind); + return ConstantMatCost + + LT.first * + getRISCVInstructionCost( + {RISCV::VMANDN_MM, RISCV::VMAND_MM, RISCV::VMOR_MM}, + LT.second, CostKind); } // vselect and max/min are supported natively. - return LT.first * - getRISCVInstructionCost(RISCV::VMERGE_VVM, LT.second, CostKind); + return ConstantMatCost + + LT.first * getRISCVInstructionCost(RISCV::VMERGE_VVM, LT.second, + CostKind); } if (ValTy->getScalarSizeInBits() == 1) { @@ -1660,7 +1677,8 @@ InstructionCost RISCVTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, // vmand.mm v9, v0, v9 // vmor.mm v0, v9, v8 MVT InterimVT = LT.second.changeVectorElementType(MVT::i8); - return LT.first * + return ConstantMatCost + + LT.first * getRISCVInstructionCost({RISCV::VMV_V_X, RISCV::VMSNE_VI}, InterimVT, CostKind) + LT.first * getRISCVInstructionCost( @@ -1671,7 +1689,8 @@ InstructionCost RISCVTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, // vmv.v.x v10, a0 // vmsne.vi v0, v10, 0 // vmerge.vvm v8, v9, v8, v0 - return LT.first * getRISCVInstructionCost( + return ConstantMatCost + + LT.first * getRISCVInstructionCost( {RISCV::VMV_V_X, RISCV::VMSNE_VI, RISCV::VMERGE_VVM}, LT.second, CostKind); } @@ -1680,8 +1699,9 @@ InstructionCost RISCVTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, CmpInst::isIntPredicate(VecPred)) { // Use VMSLT_VV to represent VMSEQ, VMSNE, VMSLTU, VMSLEU, VMSLT, VMSLE // provided they incur the same cost across all implementations - return LT.first * - getRISCVInstructionCost(RISCV::VMSLT_VV, LT.second, CostKind); + return ConstantMatCost + LT.first * getRISCVInstructionCost(RISCV::VMSLT_VV, + LT.second, + CostKind); } if ((Opcode == Instruction::FCmp) && ValTy->isVectorTy() && @@ -1689,7 +1709,8 @@ InstructionCost RISCVTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, // Use VMXOR_MM and VMXNOR_MM to generate all true/false mask if ((VecPred == CmpInst::FCMP_FALSE) || (VecPred == CmpInst::FCMP_TRUE)) - return getRISCVInstructionCost(RISCV::VMXOR_MM, LT.second, CostKind); + return ConstantMatCost + + getRISCVInstructionCost(RISCV::VMXOR_MM, LT.second, CostKind); // If we do not support the input floating point vector type, use the base // one which will calculate as: @@ -1699,7 +1720,7 @@ InstructionCost RISCVTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, (ValTy->getScalarSizeInBits() == 32 && !ST->hasVInstructionsF32()) || (ValTy->getScalarSizeInBits() == 64 && !ST->hasVInstructionsF64())) return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, - I); + Op1Info, Op2Info, I); // Assuming vector fp compare and mask instructions are all the same cost // until a need arises to differentiate them. @@ -1708,7 +1729,8 @@ InstructionCost RISCVTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, case CmpInst::FCMP_ORD: // vmfeq.vv + vmfeq.vv + vmand.mm case CmpInst::FCMP_UNO: // vmfne.vv + vmfne.vv + vmor.mm case CmpInst::FCMP_UEQ: // vmflt.vv + vmflt.vv + vmnor.mm - return LT.first * getRISCVInstructionCost( + return ConstantMatCost + + LT.first * getRISCVInstructionCost( {RISCV::VMFLT_VV, RISCV::VMFLT_VV, RISCV::VMOR_MM}, LT.second, CostKind); @@ -1716,9 +1738,10 @@ InstructionCost RISCVTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, case CmpInst::FCMP_UGE: // vmflt.vv + vmnot.m case CmpInst::FCMP_ULT: // vmfle.vv + vmnot.m case CmpInst::FCMP_ULE: // vmflt.vv + vmnot.m - return LT.first * - getRISCVInstructionCost({RISCV::VMFLT_VV, RISCV::VMNAND_MM}, - LT.second, CostKind); + return ConstantMatCost + + LT.first * + getRISCVInstructionCost({RISCV::VMFLT_VV, RISCV::VMNAND_MM}, + LT.second, CostKind); case CmpInst::FCMP_OEQ: // vmfeq.vv case CmpInst::FCMP_OGT: // vmflt.vv @@ -1726,8 +1749,9 @@ InstructionCost RISCVTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, case CmpInst::FCMP_OLT: // vmflt.vv case CmpInst::FCMP_OLE: // vmfle.vv case CmpInst::FCMP_UNE: // vmfne.vv - return LT.first * - getRISCVInstructionCost(RISCV::VMFLT_VV, LT.second, CostKind); + return ConstantMatCost + + LT.first * + getRISCVInstructionCost(RISCV::VMFLT_VV, LT.second, CostKind); default: break; } @@ -1750,7 +1774,8 @@ InstructionCost RISCVTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, // TODO: Add cost for scalar type. - return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I); + return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, + Op1Info, Op2Info, I); } InstructionCost RISCVTTIImpl::getCFInstrCost(unsigned Opcode, @@ -1908,6 +1933,29 @@ InstructionCost RISCVTTIImpl::getArithmeticInstrCost( return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, Op2Info, Args, CxtI); + // f16 with zvfhmin and bf16 will be promoted to f32. + // FIXME: nxv32[b]f16 will be custom lowered and split. + unsigned ISDOpcode = TLI->InstructionOpcodeToISD(Opcode); + InstructionCost CastCost = 0; + if ((LT.second.getVectorElementType() == MVT::f16 || + LT.second.getVectorElementType() == MVT::bf16) && + TLI->getOperationAction(ISDOpcode, LT.second) == + TargetLoweringBase::LegalizeAction::Promote) { + MVT PromotedVT = TLI->getTypeToPromoteTo(ISDOpcode, LT.second); + Type *PromotedTy = EVT(PromotedVT).getTypeForEVT(Ty->getContext()); + Type *LegalTy = EVT(LT.second).getTypeForEVT(Ty->getContext()); + // Add cost of extending arguments + CastCost += LT.first * Args.size() * + getCastInstrCost(Instruction::FPExt, PromotedTy, LegalTy, + TTI::CastContextHint::None, CostKind); + // Add cost of truncating result + CastCost += + LT.first * getCastInstrCost(Instruction::FPTrunc, LegalTy, PromotedTy, + TTI::CastContextHint::None, CostKind); + // Compute cost of op in promoted type + LT.second = PromotedVT; + } + auto getConstantMatCost = [&](unsigned Operand, TTI::OperandValueInfo OpInfo) -> InstructionCost { if (OpInfo.isUniform() && TLI->canSplatOperand(Opcode, Operand)) @@ -1929,7 +1977,7 @@ InstructionCost RISCVTTIImpl::getArithmeticInstrCost( ConstantMatCost += getConstantMatCost(1, Op2Info); unsigned Op; - switch (TLI->InstructionOpcodeToISD(Opcode)) { + switch (ISDOpcode) { case ISD::ADD: case ISD::SUB: Op = RISCV::VADD_VV; @@ -1959,11 +2007,9 @@ InstructionCost RISCVTTIImpl::getArithmeticInstrCost( break; case ISD::FADD: case ISD::FSUB: - // TODO: Address FP16 with VFHMIN Op = RISCV::VFADD_VV; break; case ISD::FMUL: - // TODO: Address FP16 with VFHMIN Op = RISCV::VFMUL_VV; break; case ISD::FDIV: @@ -1975,9 +2021,9 @@ InstructionCost RISCVTTIImpl::getArithmeticInstrCost( default: // Assuming all other instructions have the same cost until a need arises to // differentiate them. - return ConstantMatCost + BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, - Op1Info, Op2Info, - Args, CxtI); + return CastCost + ConstantMatCost + + BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, Op2Info, + Args, CxtI); } InstructionCost InstrCost = getRISCVInstructionCost(Op, LT.second, CostKind); @@ -1986,7 +2032,7 @@ InstructionCost RISCVTTIImpl::getArithmeticInstrCost( // scalar floating point ops aren't cheaper than their vector equivalents. if (Ty->isFPOrFPVectorTy()) InstrCost *= 2; - return ConstantMatCost + LT.first * InstrCost; + return CastCost + ConstantMatCost + LT.first * InstrCost; } // TODO: Deduplicate from TargetTransformInfoImplCRTPBase. diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h index f16c4fc0eed02..65bbd90550855 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h @@ -200,10 +200,12 @@ class RISCVTTIImpl : public BasicTTIImplBase { TTI::OperandValueInfo OpdInfo = {TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I = nullptr); - InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, - CmpInst::Predicate VecPred, - TTI::TargetCostKind CostKind, - const Instruction *I = nullptr); + InstructionCost getCmpSelInstrCost( + unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, + TTI::TargetCostKind CostKind, + TTI::OperandValueInfo Op1Info = {TTI::OK_AnyValue, TTI::OP_None}, + TTI::OperandValueInfo Op2Info = {TTI::OK_AnyValue, TTI::OP_None}, + const Instruction *I = nullptr); InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I = nullptr); diff --git a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp index 795ddf47c40da..86be79cbb5e7f 100644 --- a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp @@ -1351,7 +1351,8 @@ Instruction *SPIRVEmitIntrinsics::visitAtomicCmpXchgInst(AtomicCmpXchgInst &I) { SmallVector Args; for (auto &Op : I.operands()) Args.push_back(Op); - Args.push_back(B.getInt32(I.getSyncScopeID())); + Args.push_back(B.getInt32( + static_cast(getMemScope(I.getContext(), I.getSyncScopeID())))); Args.push_back(B.getInt32( static_cast(getMemSemantics(I.getSuccessOrdering())))); Args.push_back(B.getInt32( diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp index 7af92b87ce00c..e475810f92f71 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp @@ -33,27 +33,6 @@ #include "llvm/IR/IntrinsicsSPIRV.h" #include "llvm/Support/Debug.h" -namespace { - -struct SyncScopeIDs { - llvm::SyncScope::ID Work_ItemSSID; - llvm::SyncScope::ID WorkGroupSSID; - llvm::SyncScope::ID DeviceSSID; - llvm::SyncScope::ID AllSVMDevicesSSID; - llvm::SyncScope::ID SubGroupSSID; - - SyncScopeIDs() {} - SyncScopeIDs(llvm::LLVMContext &Context) { - Work_ItemSSID = Context.getOrInsertSyncScopeID("work_item"); - WorkGroupSSID = Context.getOrInsertSyncScopeID("workgroup"); - DeviceSSID = Context.getOrInsertSyncScopeID("device"); - AllSVMDevicesSSID = Context.getOrInsertSyncScopeID("all_svm_devices"); - SubGroupSSID = Context.getOrInsertSyncScopeID("sub_group"); - } -}; - -} // namespace - #define DEBUG_TYPE "spirv-isel" using namespace llvm; @@ -76,7 +55,6 @@ class SPIRVInstructionSelector : public InstructionSelector { const RegisterBankInfo &RBI; SPIRVGlobalRegistry &GR; MachineRegisterInfo *MRI; - SyncScopeIDs SSIDs; MachineFunction *HasVRegsReset = nullptr; /// We need to keep track of the number we give to anonymous global values to @@ -305,7 +283,6 @@ void SPIRVInstructionSelector::setupMF(MachineFunction &MF, GISelKnownBits *KB, CodeGenCoverage *CoverageInfo, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) { - SSIDs = SyncScopeIDs(MF.getFunction().getContext()); MRI = &MF.getRegInfo(); GR.setCurrentFunc(MF); InstructionSelector::setupMF(MF, KB, CoverageInfo, PSI, BFI); @@ -845,29 +822,6 @@ bool SPIRVInstructionSelector::selectBitcast(Register ResVReg, return selectUnOp(ResVReg, ResType, I, SPIRV::OpBitcast); } -static SPIRV::Scope::Scope getScope(SyncScope::ID Ord, - const SyncScopeIDs &SSIDs) { - if (Ord == SyncScope::SingleThread || Ord == SSIDs.Work_ItemSSID) - return SPIRV::Scope::Invocation; - else if (Ord == SyncScope::System || Ord == SSIDs.DeviceSSID) - return SPIRV::Scope::Device; - else if (Ord == SSIDs.WorkGroupSSID) - return SPIRV::Scope::Workgroup; - else if (Ord == SSIDs.AllSVMDevicesSSID) - return SPIRV::Scope::CrossDevice; - else if (Ord == SSIDs.SubGroupSSID) - return SPIRV::Scope::Subgroup; - else - // OpenCL approach is: "The functions that do not have memory_scope argument - // have the same semantics as the corresponding functions with the - // memory_scope argument set to memory_scope_device." See ref.: // - // https://registry.khronos.org/OpenCL/specs/3.0-unified/html/OpenCL_C.html#atomic-functions - // In our case if the scope is unknown, assuming that SPIR-V code is to be - // consumed in an OpenCL environment, we use the same approach and set the - // scope to memory_scope_device. - return SPIRV::Scope::Device; -} - static void addMemoryOperands(MachineMemOperand *MemOp, MachineInstrBuilder &MIB) { uint32_t SpvMemOp = static_cast(SPIRV::MemoryOperand::None); @@ -1020,8 +974,8 @@ bool SPIRVInstructionSelector::selectAtomicRMW(Register ResVReg, unsigned NegateOpcode) const { assert(I.hasOneMemOperand()); const MachineMemOperand *MemOp = *I.memoperands_begin(); - uint32_t Scope = - static_cast(getScope(MemOp->getSyncScopeID(), SSIDs)); + uint32_t Scope = static_cast(getMemScope( + GR.CurMF->getFunction().getContext(), MemOp->getSyncScopeID())); Register ScopeReg = buildI32Constant(Scope, I); Register Ptr = I.getOperand(1).getReg(); @@ -1092,7 +1046,8 @@ bool SPIRVInstructionSelector::selectFence(MachineInstr &I) const { uint32_t MemSem = static_cast(getMemSemantics(AO)); Register MemSemReg = buildI32Constant(MemSem, I); SyncScope::ID Ord = SyncScope::ID(I.getOperand(1).getImm()); - uint32_t Scope = static_cast(getScope(Ord, SSIDs)); + uint32_t Scope = static_cast( + getMemScope(GR.CurMF->getFunction().getContext(), Ord)); Register ScopeReg = buildI32Constant(Scope, I); MachineBasicBlock &BB = *I.getParent(); return BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpMemoryBarrier)) @@ -1111,8 +1066,8 @@ bool SPIRVInstructionSelector::selectAtomicCmpXchg(Register ResVReg, if (!isa(I)) { assert(I.hasOneMemOperand()); const MachineMemOperand *MemOp = *I.memoperands_begin(); - unsigned Scope = - static_cast(getScope(MemOp->getSyncScopeID(), SSIDs)); + unsigned Scope = static_cast(getMemScope( + GR.CurMF->getFunction().getContext(), MemOp->getSyncScopeID())); ScopeReg = buildI32Constant(Scope, I); unsigned ScSem = static_cast( diff --git a/llvm/lib/Target/SPIRV/SPIRVUtils.cpp b/llvm/lib/Target/SPIRV/SPIRVUtils.cpp index 53601e402c737..a8016d42b0154 100644 --- a/llvm/lib/Target/SPIRV/SPIRVUtils.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVUtils.cpp @@ -253,6 +253,32 @@ SPIRV::MemorySemantics::MemorySemantics getMemSemantics(AtomicOrdering Ord) { llvm_unreachable(nullptr); } +SPIRV::Scope::Scope getMemScope(LLVMContext &Ctx, SyncScope::ID Id) { + // Named by + // https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#_scope_id. + // We don't need aliases for Invocation and CrossDevice, as we already have + // them covered by "singlethread" and "" strings respectively (see + // implementation of LLVMContext::LLVMContext()). + static const llvm::SyncScope::ID SubGroup = + Ctx.getOrInsertSyncScopeID("subgroup"); + static const llvm::SyncScope::ID WorkGroup = + Ctx.getOrInsertSyncScopeID("workgroup"); + static const llvm::SyncScope::ID Device = + Ctx.getOrInsertSyncScopeID("device"); + + if (Id == llvm::SyncScope::SingleThread) + return SPIRV::Scope::Invocation; + else if (Id == llvm::SyncScope::System) + return SPIRV::Scope::CrossDevice; + else if (Id == SubGroup) + return SPIRV::Scope::Subgroup; + else if (Id == WorkGroup) + return SPIRV::Scope::Workgroup; + else if (Id == Device) + return SPIRV::Scope::Device; + return SPIRV::Scope::CrossDevice; +} + MachineInstr *getDefInstrMaybeConstant(Register &ConstReg, const MachineRegisterInfo *MRI) { MachineInstr *MI = MRI->getVRegDef(ConstReg); diff --git a/llvm/lib/Target/SPIRV/SPIRVUtils.h b/llvm/lib/Target/SPIRV/SPIRVUtils.h index 93d64a7f435e9..7c7616000d22b 100644 --- a/llvm/lib/Target/SPIRV/SPIRVUtils.h +++ b/llvm/lib/Target/SPIRV/SPIRVUtils.h @@ -145,6 +145,8 @@ getMemSemanticsForStorageClass(SPIRV::StorageClass::StorageClass SC); SPIRV::MemorySemantics::MemorySemantics getMemSemantics(AtomicOrdering Ord); +SPIRV::Scope::Scope getMemScope(LLVMContext &Ctx, SyncScope::ID Id); + // Find def instruction for the given ConstReg, walking through // spv_track_constant and ASSIGN_TYPE instructions. Updates ConstReg by def // of OpConstant instruction. diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp index 3dabc5ef540cf..ba105c12bc4e9 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -7361,8 +7361,9 @@ SDValue SystemZTargetLowering::combineEXTRACT_VECTOR_ELT( if (auto *IndexN = dyn_cast(N->getOperand(1))) { SDValue Op0 = N->getOperand(0); EVT VecVT = Op0.getValueType(); - return combineExtract(SDLoc(N), N->getValueType(0), VecVT, Op0, - IndexN->getZExtValue(), DCI, false); + if (canTreatAsByteVector(VecVT)) + return combineExtract(SDLoc(N), N->getValueType(0), VecVT, Op0, + IndexN->getZExtValue(), DCI, false); } return SDValue(); } diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp index 3cd1e05aa5d18..e44777c5c4857 100644 --- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp +++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp @@ -959,13 +959,13 @@ static unsigned getOperandsExtensionCost(const Instruction *I) { return ExtCost; } -InstructionCost SystemZTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, - Type *CondTy, - CmpInst::Predicate VecPred, - TTI::TargetCostKind CostKind, - const Instruction *I) { +InstructionCost SystemZTTIImpl::getCmpSelInstrCost( + unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, + TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info, + TTI::OperandValueInfo Op2Info, const Instruction *I) { if (CostKind != TTI::TCK_RecipThroughput) - return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind); + return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, + Op1Info, Op2Info); if (!ValTy->isVectorTy()) { switch (Opcode) { @@ -1041,7 +1041,8 @@ InstructionCost SystemZTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, } } - return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind); + return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, + Op1Info, Op2Info); } InstructionCost SystemZTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h index 9294fada1eb77..e221200cfa08c 100644 --- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h +++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h @@ -104,10 +104,12 @@ class SystemZTTIImpl : public BasicTTIImplBase { TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I = nullptr); - InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, - CmpInst::Predicate VecPred, - TTI::TargetCostKind CostKind, - const Instruction *I = nullptr); + InstructionCost getCmpSelInstrCost( + unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, + TTI::TargetCostKind CostKind, + TTI::OperandValueInfo Op1Info = {TTI::OK_AnyValue, TTI::OP_None}, + TTI::OperandValueInfo Op2Info = {TTI::OK_AnyValue, TTI::OP_None}, + const Instruction *I = nullptr); using BaseT::getVectorInstrCost; InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, diff --git a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.cpp b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.cpp index 4ff19a9cacfb3..8b1e1dca4f847 100644 --- a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.cpp +++ b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.cpp @@ -70,14 +70,9 @@ void WebAssemblyAsmTypeCheck::dumpTypeStack(Twine Msg) { } bool WebAssemblyAsmTypeCheck::typeError(SMLoc ErrorLoc, const Twine &Msg) { - // Once you get one type error in a function, it will likely trigger more - // which are mostly not helpful. - if (TypeErrorThisFunction) - return true; // If we're currently in unreachable code, we suppress errors completely. if (Unreachable) return false; - TypeErrorThisFunction = true; dumpTypeStack("current stack: "); return Parser.Error(ErrorLoc, Msg); } @@ -171,11 +166,11 @@ bool WebAssemblyAsmTypeCheck::checkEnd(SMLoc ErrorLoc, bool PopVals) { bool WebAssemblyAsmTypeCheck::checkSig(SMLoc ErrorLoc, const wasm::WasmSignature &Sig) { + bool Error = false; for (auto VT : llvm::reverse(Sig.Params)) - if (popType(ErrorLoc, VT)) - return true; + Error |= popType(ErrorLoc, VT); Stack.insert(Stack.end(), Sig.Returns.begin(), Sig.Returns.end()); - return false; + return Error; } bool WebAssemblyAsmTypeCheck::getSymRef(SMLoc ErrorLoc, const MCOperand &SymOp, @@ -260,17 +255,16 @@ bool WebAssemblyAsmTypeCheck::getSignature(SMLoc ErrorLoc, } bool WebAssemblyAsmTypeCheck::endOfFunction(SMLoc ErrorLoc) { + bool Error = false; // Check the return types. - for (auto RVT : llvm::reverse(ReturnTypes)) { - if (popType(ErrorLoc, RVT)) - return true; - } + for (auto RVT : llvm::reverse(ReturnTypes)) + Error |= popType(ErrorLoc, RVT); if (!Stack.empty()) { return typeError(ErrorLoc, std::to_string(Stack.size()) + " superfluous return values"); } Unreachable = true; - return false; + return Error; } bool WebAssemblyAsmTypeCheck::typeCheck(SMLoc ErrorLoc, const MCInst &Inst, @@ -279,179 +273,221 @@ bool WebAssemblyAsmTypeCheck::typeCheck(SMLoc ErrorLoc, const MCInst &Inst, auto Name = getMnemonic(Opc); dumpTypeStack("typechecking " + Name + ": "); wasm::ValType Type; + if (Name == "local.get") { - if (getLocal(Operands[1]->getStartLoc(), Inst.getOperand(0), Type)) - return true; - Stack.push_back(Type); - } else if (Name == "local.set") { - if (getLocal(Operands[1]->getStartLoc(), Inst.getOperand(0), Type)) - return true; - if (popType(ErrorLoc, Type)) - return true; - } else if (Name == "local.tee") { - if (getLocal(Operands[1]->getStartLoc(), Inst.getOperand(0), Type)) - return true; - if (popType(ErrorLoc, Type)) - return true; - Stack.push_back(Type); - } else if (Name == "global.get") { - if (getGlobal(Operands[1]->getStartLoc(), Inst.getOperand(0), Type)) - return true; - Stack.push_back(Type); - } else if (Name == "global.set") { - if (getGlobal(Operands[1]->getStartLoc(), Inst.getOperand(0), Type)) - return true; - if (popType(ErrorLoc, Type)) - return true; - } else if (Name == "table.get") { - if (getTable(Operands[1]->getStartLoc(), Inst.getOperand(0), Type)) - return true; - if (popType(ErrorLoc, wasm::ValType::I32)) - return true; - Stack.push_back(Type); - } else if (Name == "table.set") { - if (getTable(Operands[1]->getStartLoc(), Inst.getOperand(0), Type)) - return true; - if (popType(ErrorLoc, Type)) - return true; - if (popType(ErrorLoc, wasm::ValType::I32)) - return true; - } else if (Name == "table.size") { - if (getTable(Operands[1]->getStartLoc(), Inst.getOperand(0), Type)) - return true; + if (!getLocal(Operands[1]->getStartLoc(), Inst.getOperand(0), Type)) { + Stack.push_back(Type); + return false; + } + return true; + } + + if (Name == "local.set") { + if (!getLocal(Operands[1]->getStartLoc(), Inst.getOperand(0), Type)) + return popType(ErrorLoc, Type); + return true; + } + + if (Name == "local.tee") { + if (!getLocal(Operands[1]->getStartLoc(), Inst.getOperand(0), Type)) { + bool Error = popType(ErrorLoc, Type); + Stack.push_back(Type); + return Error; + } + return true; + } + + if (Name == "global.get") { + if (!getGlobal(Operands[1]->getStartLoc(), Inst.getOperand(0), Type)) { + Stack.push_back(Type); + return false; + } + return true; + } + + if (Name == "global.set") { + if (!getGlobal(Operands[1]->getStartLoc(), Inst.getOperand(0), Type)) + return popType(ErrorLoc, Type); + return true; + } + + if (Name == "table.get") { + bool Error = popType(ErrorLoc, wasm::ValType::I32); + if (!getTable(Operands[1]->getStartLoc(), Inst.getOperand(0), Type)) { + Stack.push_back(Type); + return Error; + } + return true; + } + + if (Name == "table.set") { + bool Error = false; + if (!getTable(Operands[1]->getStartLoc(), Inst.getOperand(0), Type)) + Error |= popType(ErrorLoc, Type); + else + Error = true; + Error |= popType(ErrorLoc, wasm::ValType::I32); + return Error; + } + + if (Name == "table.size") { + bool Error = getTable(Operands[1]->getStartLoc(), Inst.getOperand(0), Type); Stack.push_back(wasm::ValType::I32); - } else if (Name == "table.grow") { - if (getTable(Operands[1]->getStartLoc(), Inst.getOperand(0), Type)) - return true; - if (popType(ErrorLoc, wasm::ValType::I32)) - return true; - if (popType(ErrorLoc, Type)) - return true; + return Error; + } + + if (Name == "table.grow") { + bool Error = popType(ErrorLoc, wasm::ValType::I32); + if (!getTable(Operands[1]->getStartLoc(), Inst.getOperand(0), Type)) + Error |= popType(ErrorLoc, Type); + else + Error = true; Stack.push_back(wasm::ValType::I32); - } else if (Name == "table.fill") { - if (getTable(Operands[1]->getStartLoc(), Inst.getOperand(0), Type)) - return true; - if (popType(ErrorLoc, wasm::ValType::I32)) - return true; - if (popType(ErrorLoc, Type)) - return true; - if (popType(ErrorLoc, wasm::ValType::I32)) - return true; - } else if (Name == "memory.fill") { + return Error; + } + + if (Name == "table.fill") { + bool Error = popType(ErrorLoc, wasm::ValType::I32); + if (!getTable(Operands[1]->getStartLoc(), Inst.getOperand(0), Type)) + Error |= popType(ErrorLoc, Type); + else + Error = true; + Error |= popType(ErrorLoc, wasm::ValType::I32); + return Error; + } + + if (Name == "memory.fill") { Type = Is64 ? wasm::ValType::I64 : wasm::ValType::I32; - if (popType(ErrorLoc, Type)) - return true; - if (popType(ErrorLoc, wasm::ValType::I32)) - return true; - if (popType(ErrorLoc, Type)) - return true; - } else if (Name == "memory.copy") { + bool Error = popType(ErrorLoc, Type); + Error |= popType(ErrorLoc, wasm::ValType::I32); + Error |= popType(ErrorLoc, Type); + return Error; + } + + if (Name == "memory.copy") { Type = Is64 ? wasm::ValType::I64 : wasm::ValType::I32; - if (popType(ErrorLoc, Type)) - return true; - if (popType(ErrorLoc, Type)) - return true; - if (popType(ErrorLoc, Type)) - return true; - } else if (Name == "memory.init") { + bool Error = popType(ErrorLoc, Type); + Error |= popType(ErrorLoc, Type); + Error |= popType(ErrorLoc, Type); + return Error; + } + + if (Name == "memory.init") { Type = Is64 ? wasm::ValType::I64 : wasm::ValType::I32; - if (popType(ErrorLoc, wasm::ValType::I32)) - return true; - if (popType(ErrorLoc, wasm::ValType::I32)) - return true; - if (popType(ErrorLoc, Type)) - return true; - } else if (Name == "drop") { - if (popType(ErrorLoc, {})) - return true; - } else if (Name == "try" || Name == "block" || Name == "loop" || - Name == "if") { - if (Name == "if" && popType(ErrorLoc, wasm::ValType::I32)) - return true; + bool Error = popType(ErrorLoc, wasm::ValType::I32); + Error |= popType(ErrorLoc, wasm::ValType::I32); + Error |= popType(ErrorLoc, Type); + return Error; + } + + if (Name == "drop") { + return popType(ErrorLoc, {}); + } + + if (Name == "try" || Name == "block" || Name == "loop" || Name == "if") { if (Name == "loop") BrStack.emplace_back(LastSig.Params.begin(), LastSig.Params.end()); else BrStack.emplace_back(LastSig.Returns.begin(), LastSig.Returns.end()); - } else if (Name == "end_block" || Name == "end_loop" || Name == "end_if" || - Name == "else" || Name == "end_try" || Name == "catch" || - Name == "catch_all" || Name == "delegate") { - if (checkEnd(ErrorLoc, - Name == "else" || Name == "catch" || Name == "catch_all")) + if (Name == "if" && popType(ErrorLoc, wasm::ValType::I32)) return true; + return false; + } + + if (Name == "end_block" || Name == "end_loop" || Name == "end_if" || + Name == "else" || Name == "end_try" || Name == "catch" || + Name == "catch_all" || Name == "delegate") { + bool Error = checkEnd(ErrorLoc, Name == "else" || Name == "catch" || + Name == "catch_all"); Unreachable = false; if (Name == "catch") { const wasm::WasmSignature *Sig = nullptr; - if (getSignature(Operands[1]->getStartLoc(), Inst.getOperand(0), - wasm::WASM_SYMBOL_TYPE_TAG, Sig)) - return true; - // catch instruction pushes values whose types are specified in the tag's - // "params" part - Stack.insert(Stack.end(), Sig->Params.begin(), Sig->Params.end()); + if (!getSignature(Operands[1]->getStartLoc(), Inst.getOperand(0), + wasm::WASM_SYMBOL_TYPE_TAG, Sig)) + // catch instruction pushes values whose types are specified in the + // tag's "params" part + Stack.insert(Stack.end(), Sig->Params.begin(), Sig->Params.end()); + else + Error = true; } - } else if (Name == "br") { + return Error; + } + + if (Name == "br") { const MCOperand &Operand = Inst.getOperand(0); if (!Operand.isImm()) - return false; - if (checkBr(ErrorLoc, static_cast(Operand.getImm()))) - return true; - } else if (Name == "return") { - if (endOfFunction(ErrorLoc)) return true; - } else if (Name == "call_indirect" || Name == "return_call_indirect") { + return checkBr(ErrorLoc, static_cast(Operand.getImm())); + } + + if (Name == "return") { + return endOfFunction(ErrorLoc); + } + + if (Name == "call_indirect" || Name == "return_call_indirect") { // Function value. - if (popType(ErrorLoc, wasm::ValType::I32)) - return true; - if (checkSig(ErrorLoc, LastSig)) - return true; + bool Error = popType(ErrorLoc, wasm::ValType::I32); + Error |= checkSig(ErrorLoc, LastSig); if (Name == "return_call_indirect" && endOfFunction(ErrorLoc)) return true; - } else if (Name == "call" || Name == "return_call") { + return Error; + } + + if (Name == "call" || Name == "return_call") { + bool Error = false; const wasm::WasmSignature *Sig = nullptr; - if (getSignature(Operands[1]->getStartLoc(), Inst.getOperand(0), - wasm::WASM_SYMBOL_TYPE_FUNCTION, Sig)) - return true; - if (checkSig(ErrorLoc, *Sig)) - return true; + if (!getSignature(Operands[1]->getStartLoc(), Inst.getOperand(0), + wasm::WASM_SYMBOL_TYPE_FUNCTION, Sig)) + Error |= checkSig(ErrorLoc, *Sig); + else + Error = true; if (Name == "return_call" && endOfFunction(ErrorLoc)) return true; - } else if (Name == "unreachable") { + return Error; + } + + if (Name == "unreachable") { Unreachable = true; - } else if (Name == "ref.is_null") { - if (popRefType(ErrorLoc)) - return true; + return false; + } + + if (Name == "ref.is_null") { + bool Error = popRefType(ErrorLoc); Stack.push_back(wasm::ValType::I32); - } else if (Name == "throw") { + return Error; + } + + if (Name == "throw") { const wasm::WasmSignature *Sig = nullptr; - if (getSignature(Operands[1]->getStartLoc(), Inst.getOperand(0), - wasm::WASM_SYMBOL_TYPE_TAG, Sig)) - return true; - if (checkSig(ErrorLoc, *Sig)) - return true; - } else { - // The current instruction is a stack instruction which doesn't have - // explicit operands that indicate push/pop types, so we get those from - // the register version of the same instruction. - auto RegOpc = WebAssembly::getRegisterOpcode(Opc); - assert(RegOpc != -1 && "Failed to get register version of MC instruction"); - const auto &II = MII.get(RegOpc); - // First pop all the uses off the stack and check them. - for (unsigned I = II.getNumOperands(); I > II.getNumDefs(); I--) { - const auto &Op = II.operands()[I - 1]; - if (Op.OperandType == MCOI::OPERAND_REGISTER) { - auto VT = WebAssembly::regClassToValType(Op.RegClass); - if (popType(ErrorLoc, VT)) - return true; - } - } - // Now push all the defs onto the stack. - for (unsigned I = 0; I < II.getNumDefs(); I++) { - const auto &Op = II.operands()[I]; - assert(Op.OperandType == MCOI::OPERAND_REGISTER && "Register expected"); + if (!getSignature(Operands[1]->getStartLoc(), Inst.getOperand(0), + wasm::WASM_SYMBOL_TYPE_TAG, Sig)) + return checkSig(ErrorLoc, *Sig); + return true; + } + + // The current instruction is a stack instruction which doesn't have + // explicit operands that indicate push/pop types, so we get those from + // the register version of the same instruction. + auto RegOpc = WebAssembly::getRegisterOpcode(Opc); + assert(RegOpc != -1 && "Failed to get register version of MC instruction"); + const auto &II = MII.get(RegOpc); + bool Error = false; + // First pop all the uses off the stack and check them. + for (unsigned I = II.getNumOperands(); I > II.getNumDefs(); I--) { + const auto &Op = II.operands()[I - 1]; + if (Op.OperandType == MCOI::OPERAND_REGISTER) { auto VT = WebAssembly::regClassToValType(Op.RegClass); - Stack.push_back(VT); + Error |= popType(ErrorLoc, VT); } } - return false; + // Now push all the defs onto the stack. + for (unsigned I = 0; I < II.getNumDefs(); I++) { + const auto &Op = II.operands()[I]; + assert(Op.OperandType == MCOI::OPERAND_REGISTER && "Register expected"); + auto VT = WebAssembly::regClassToValType(Op.RegClass); + Stack.push_back(VT); + } + return Error; } } // end namespace llvm diff --git a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.h b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.h index 2a654d7982510..972162d3e02f4 100644 --- a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.h +++ b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.h @@ -33,7 +33,6 @@ class WebAssemblyAsmTypeCheck final { SmallVector LocalTypes; SmallVector ReturnTypes; wasm::WasmSignature LastSig; - bool TypeErrorThisFunction = false; bool Unreachable = false; bool Is64; @@ -68,7 +67,6 @@ class WebAssemblyAsmTypeCheck final { BrStack.clear(); LocalTypes.clear(); ReturnTypes.clear(); - TypeErrorThisFunction = false; Unreachable = false; } }; diff --git a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp index 7169d588548b0..bab7fe9d25e44 100644 --- a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp +++ b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp @@ -498,6 +498,62 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI, .clampScalar(0, s32, sMaxScalar) .widenScalarToNextPow2(1); + // For G_UITOFP and G_FPTOUI without AVX512, we have to custom legalize types + // <= s32 manually. Otherwise, in custom handler there is no way to + // understand whether s32 is an original type and we need to promote it to + // s64 or s32 is obtained after widening and we shouldn't widen it to s64. + // + // For AVX512 we simply widen types as there is direct mapping from opcodes + // to asm instructions. + getActionDefinitionsBuilder(G_UITOFP) + .legalIf([=](const LegalityQuery &Query) { + return HasAVX512 && typeInSet(0, {s32, s64})(Query) && + typeInSet(1, {s32, s64})(Query); + }) + .customIf([=](const LegalityQuery &Query) { + return !HasAVX512 && + ((HasSSE1 && typeIs(0, s32)(Query)) || + (HasSSE2 && typeIs(0, s64)(Query))) && + scalarNarrowerThan(1, Is64Bit ? 64 : 32)(Query); + }) + .lowerIf([=](const LegalityQuery &Query) { + // Lower conversions from s64 + return !HasAVX512 && + ((HasSSE1 && typeIs(0, s32)(Query)) || + (HasSSE2 && typeIs(0, s64)(Query))) && + (Is64Bit && typeIs(1, s64)(Query)); + }) + .clampScalar(0, s32, HasSSE2 ? s64 : s32) + .widenScalarToNextPow2(0) + .clampScalar(1, s32, sMaxScalar) + .widenScalarToNextPow2(1); + + getActionDefinitionsBuilder(G_FPTOUI) + .legalIf([=](const LegalityQuery &Query) { + return HasAVX512 && typeInSet(0, {s32, s64})(Query) && + typeInSet(1, {s32, s64})(Query); + }) + .customIf([=](const LegalityQuery &Query) { + return !HasAVX512 && + ((HasSSE1 && typeIs(1, s32)(Query)) || + (HasSSE2 && typeIs(1, s64)(Query))) && + scalarNarrowerThan(0, Is64Bit ? 64 : 32)(Query); + }) + // TODO: replace with customized legalization using + // specifics of cvttsd2si. The selection of this node requires + // a vector type. Either G_SCALAR_TO_VECTOR is needed or more advanced + // support of G_BUILD_VECTOR/G_INSERT_VECTOR_ELT is required beforehand. + .lowerIf([=](const LegalityQuery &Query) { + return !HasAVX512 && + ((HasSSE1 && typeIs(1, s32)(Query)) || + (HasSSE2 && typeIs(1, s64)(Query))) && + (Is64Bit && typeIs(0, s64)(Query)); + }) + .clampScalar(0, s32, sMaxScalar) + .widenScalarToNextPow2(0) + .clampScalar(1, s32, HasSSE2 ? s64 : s32) + .widenScalarToNextPow2(1); + // vector ops getActionDefinitionsBuilder(G_BUILD_VECTOR) .customIf([=](const LegalityQuery &Query) { @@ -590,6 +646,10 @@ bool X86LegalizerInfo::legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI, return false; case TargetOpcode::G_BUILD_VECTOR: return legalizeBuildVector(MI, MRI, Helper); + case TargetOpcode::G_FPTOUI: + return legalizeFPTOUI(MI, MRI, Helper); + case TargetOpcode::G_UITOFP: + return legalizeUITOFP(MI, MRI, Helper); } llvm_unreachable("expected switch to return"); } @@ -645,6 +705,45 @@ bool X86LegalizerInfo::legalizeBuildVector(MachineInstr &MI, return true; } +bool X86LegalizerInfo::legalizeFPTOUI(MachineInstr &MI, + MachineRegisterInfo &MRI, + LegalizerHelper &Helper) const { + MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; + auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs(); + unsigned DstSizeInBits = DstTy.getScalarSizeInBits(); + const LLT s32 = LLT::scalar(32); + const LLT s64 = LLT::scalar(64); + + // Simply reuse FPTOSI when it is possible to widen the type + if (DstSizeInBits <= 32) { + auto Casted = MIRBuilder.buildFPTOSI(DstTy == s32 ? s64 : s32, Src); + MIRBuilder.buildTrunc(Dst, Casted); + MI.eraseFromParent(); + return true; + } + + return false; +} + +bool X86LegalizerInfo::legalizeUITOFP(MachineInstr &MI, + MachineRegisterInfo &MRI, + LegalizerHelper &Helper) const { + MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; + auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs(); + const LLT s32 = LLT::scalar(32); + const LLT s64 = LLT::scalar(64); + + // Simply reuse SITOFP when it is possible to widen the type + if (SrcTy.getSizeInBits() <= 32) { + auto Ext = MIRBuilder.buildZExt(SrcTy == s32 ? s64 : s32, Src); + MIRBuilder.buildSITOFP(Dst, Ext); + MI.eraseFromParent(); + return true; + } + + return false; +} + bool X86LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper, MachineInstr &MI) const { return true; diff --git a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.h b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.h index 229a58986903d..39bd9892e2f16 100644 --- a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.h +++ b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.h @@ -39,6 +39,12 @@ class X86LegalizerInfo : public LegalizerInfo { private: bool legalizeBuildVector(MachineInstr &MI, MachineRegisterInfo &MRI, LegalizerHelper &Helper) const; + + bool legalizeFPTOUI(MachineInstr &MI, MachineRegisterInfo &MRI, + LegalizerHelper &Helper) const; + + bool legalizeUITOFP(MachineInstr &MI, MachineRegisterInfo &MRI, + LegalizerHelper &Helper) const; }; } // namespace llvm #endif diff --git a/llvm/lib/Target/X86/GISel/X86RegisterBankInfo.cpp b/llvm/lib/Target/X86/GISel/X86RegisterBankInfo.cpp index 61633a09d93cf..43c0145ec8e2a 100644 --- a/llvm/lib/Target/X86/GISel/X86RegisterBankInfo.cpp +++ b/llvm/lib/Target/X86/GISel/X86RegisterBankInfo.cpp @@ -296,7 +296,9 @@ X86RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { getInstrPartialMappingIdxs(MI, MRI, /* isFP= */ true, OpRegBankIdx); break; case TargetOpcode::G_SITOFP: - case TargetOpcode::G_FPTOSI: { + case TargetOpcode::G_FPTOSI: + case TargetOpcode::G_UITOFP: + case TargetOpcode::G_FPTOUI: { // Some of the floating-point instructions have mixed GPR and FP // operands: fine-tune the computed mapping. auto &Op0 = MI.getOperand(0); @@ -304,10 +306,10 @@ X86RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { const LLT Ty0 = MRI.getType(Op0.getReg()); const LLT Ty1 = MRI.getType(Op1.getReg()); - bool FirstArgIsFP = Opc == TargetOpcode::G_SITOFP; - bool SecondArgIsFP = Opc == TargetOpcode::G_FPTOSI; + bool FirstArgIsFP = + Opc == TargetOpcode::G_SITOFP || Opc == TargetOpcode::G_UITOFP; OpRegBankIdx[0] = getPartialMappingIdx(MI, Ty0, /* isFP= */ FirstArgIsFP); - OpRegBankIdx[1] = getPartialMappingIdx(MI, Ty1, /* isFP= */ SecondArgIsFP); + OpRegBankIdx[1] = getPartialMappingIdx(MI, Ty1, /* isFP= */ !FirstArgIsFP); break; } case TargetOpcode::G_FCMP: { diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index d7a26dc4caec6..d9eedfdfd53a4 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -52983,10 +52983,7 @@ static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG, // combiner. static SDValue combinePMULH(SDValue Src, EVT VT, const SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget) { - // First instruction should be a right shift of a multiply. - if (Src.getOpcode() != ISD::SRL || - Src.getOperand(0).getOpcode() != ISD::MUL) - return SDValue(); + using namespace llvm::SDPatternMatch; if (!Subtarget.hasSSE2()) return SDValue(); @@ -53001,15 +52998,12 @@ static SDValue combinePMULH(SDValue Src, EVT VT, const SDLoc &DL, if (InVT.getVectorElementType().getSizeInBits() < 32) return SDValue(); - // Need a shift by 16. - APInt ShiftAmt; - if (!ISD::isConstantSplatVector(Src.getOperand(1).getNode(), ShiftAmt) || - ShiftAmt != 16) + // First instruction should be a right shift by 16 of a multiply. + SDValue LHS, RHS; + if (!sd_match(Src, + m_Srl(m_Mul(m_Value(LHS), m_Value(RHS)), m_SpecificInt(16)))) return SDValue(); - SDValue LHS = Src.getOperand(0).getOperand(0); - SDValue RHS = Src.getOperand(0).getOperand(1); - // Count leading sign/zero bits on both inputs - if there are enough then // truncation back to vXi16 will be cheap - either as a pack/shuffle // sequence or using AVX512 truncations. If the inputs are sext/zext then the @@ -53027,12 +53021,13 @@ static SDValue combinePMULH(SDValue Src, EVT VT, const SDLoc &DL, return SDValue(); // Check if both inputs are extensions, which will be removed by truncation. - bool IsTruncateFree = (LHS.getOpcode() == ISD::SIGN_EXTEND || - LHS.getOpcode() == ISD::ZERO_EXTEND) && - (RHS.getOpcode() == ISD::SIGN_EXTEND || - RHS.getOpcode() == ISD::ZERO_EXTEND) && - LHS.getOperand(0).getScalarValueSizeInBits() <= 16 && - RHS.getOperand(0).getScalarValueSizeInBits() <= 16; + auto isOpTruncateFree = [](SDValue Op) { + if (Op.getOpcode() == ISD::SIGN_EXTEND || + Op.getOpcode() == ISD::ZERO_EXTEND) + return Op.getOperand(0).getScalarValueSizeInBits() <= 16; + return ISD::isBuildVectorOfConstantSDNodes(Op.getNode()); + }; + bool IsTruncateFree = isOpTruncateFree(LHS) && isOpTruncateFree(RHS); // For AVX2+ targets, with the upper bits known zero, we can perform MULHU on // the (bitcasted) inputs directly, and then cheaply pack/truncate the result diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index 0fa138cefc3b8..46bc73c5e928e 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -3157,15 +3157,14 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I)); } -InstructionCost X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, - Type *CondTy, - CmpInst::Predicate VecPred, - TTI::TargetCostKind CostKind, - const Instruction *I) { +InstructionCost X86TTIImpl::getCmpSelInstrCost( + unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, + TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info, + TTI::OperandValueInfo Op2Info, const Instruction *I) { // Early out if this type isn't scalar/vector integer/float. if (!(ValTy->isIntOrIntVectorTy() || ValTy->isFPOrFPVectorTy())) return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, - I); + Op1Info, Op2Info, I); // Legalize the type. std::pair LT = getTypeLegalizationCost(ValTy); @@ -3229,9 +3228,11 @@ InstructionCost X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, // Use FCMP_UEQ expansion - FCMP_ONE should be the same. if (CondTy && !ST->hasAVX()) return getCmpSelInstrCost(Opcode, ValTy, CondTy, - CmpInst::Predicate::FCMP_UNO, CostKind) + + CmpInst::Predicate::FCMP_UNO, CostKind, + Op1Info, Op2Info) + getCmpSelInstrCost(Opcode, ValTy, CondTy, - CmpInst::Predicate::FCMP_OEQ, CostKind) + + CmpInst::Predicate::FCMP_OEQ, CostKind, + Op1Info, Op2Info) + getArithmeticInstrCost(Instruction::Or, CondTy, CostKind); break; @@ -3451,7 +3452,8 @@ InstructionCost X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, if (ValTy->getScalarType()->isFloatingPointTy()) return 3; - return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I); + return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, + Op1Info, Op2Info, I); } unsigned X86TTIImpl::getAtomicMemIntrinsicMaxElementSize() const { return 16; } diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.h b/llvm/lib/Target/X86/X86TargetTransformInfo.h index 8ea67dcbe5166..c16461b157e07 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.h +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h @@ -156,10 +156,12 @@ class X86TTIImpl : public BasicTTIImplBase { TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I = nullptr); - InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, - CmpInst::Predicate VecPred, - TTI::TargetCostKind CostKind, - const Instruction *I = nullptr); + InstructionCost getCmpSelInstrCost( + unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, + TTI::TargetCostKind CostKind, + TTI::OperandValueInfo Op1Info = {TTI::OK_AnyValue, TTI::OP_None}, + TTI::OperandValueInfo Op2Info = {TTI::OK_AnyValue, TTI::OP_None}, + const Instruction *I = nullptr); using BaseT::getVectorInstrCost; InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, diff --git a/llvm/lib/Target/Xtensa/XtensaAsmPrinter.cpp b/llvm/lib/Target/Xtensa/XtensaAsmPrinter.cpp index 3f99387f759d9..db86637ecf83f 100644 --- a/llvm/lib/Target/Xtensa/XtensaAsmPrinter.cpp +++ b/llvm/lib/Target/Xtensa/XtensaAsmPrinter.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "XtensaAsmPrinter.h" +#include "MCTargetDesc/XtensaInstPrinter.h" #include "MCTargetDesc/XtensaMCExpr.h" #include "MCTargetDesc/XtensaTargetStreamer.h" #include "TargetInfo/XtensaTargetInfo.h" @@ -157,6 +158,57 @@ void XtensaAsmPrinter::emitConstantPool() { OutStreamer->popSection(); } +void XtensaAsmPrinter::printOperand(const MachineInstr *MI, int OpNo, + raw_ostream &O) { + const MachineOperand &MO = MI->getOperand(OpNo); + + switch (MO.getType()) { + case MachineOperand::MO_Register: + case MachineOperand::MO_Immediate: { + MCOperand MC = lowerOperand(MI->getOperand(OpNo)); + XtensaInstPrinter::printOperand(MC, O); + break; + } + default: + llvm_unreachable("unknown operand type"); + } +} + +bool XtensaAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + const char *ExtraCode, raw_ostream &O) { + // Print the operand if there is no operand modifier. + if (!ExtraCode || !ExtraCode[0]) { + printOperand(MI, OpNo, O); + return false; + } + + // Fallback to the default implementation. + return AsmPrinter::PrintAsmOperand(MI, OpNo, ExtraCode, O); +} + +bool XtensaAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, + unsigned OpNo, + const char *ExtraCode, + raw_ostream &OS) { + if (ExtraCode && ExtraCode[0]) + return true; // Unknown modifier. + + assert(OpNo + 1 < MI->getNumOperands() && "Insufficient operands"); + + const MachineOperand &Base = MI->getOperand(OpNo); + const MachineOperand &Offset = MI->getOperand(OpNo + 1); + + assert(Base.isReg() && + "Unexpected base pointer for inline asm memory operand."); + assert(Offset.isImm() && "Unexpected offset for inline asm memory operand."); + + OS << XtensaInstPrinter::getRegisterName(Base.getReg()); + OS << ", "; + OS << Offset.getImm(); + + return false; +} + MCSymbol * XtensaAsmPrinter::GetConstantPoolIndexSymbol(const MachineOperand &MO) const { // Create a symbol for the name. diff --git a/llvm/lib/Target/Xtensa/XtensaAsmPrinter.h b/llvm/lib/Target/Xtensa/XtensaAsmPrinter.h index f9cf5ae8c9f65..1137309cd9a45 100644 --- a/llvm/lib/Target/Xtensa/XtensaAsmPrinter.h +++ b/llvm/lib/Target/Xtensa/XtensaAsmPrinter.h @@ -42,6 +42,14 @@ class LLVM_LIBRARY_VISIBILITY XtensaAsmPrinter : public AsmPrinter { void emitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) override; + void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O); + + bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + const char *ExtraCode, raw_ostream &O) override; + + bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, + const char *ExtraCode, raw_ostream &OS) override; + MCSymbol *GetConstantPoolIndexSymbol(const MachineOperand &MO) const; MCSymbol *GetJumpTableSymbol(const MachineOperand &MO) const; diff --git a/llvm/lib/Target/Xtensa/XtensaISelDAGToDAG.cpp b/llvm/lib/Target/Xtensa/XtensaISelDAGToDAG.cpp index 6f6d3342fcd7f..af1110487b427 100644 --- a/llvm/lib/Target/Xtensa/XtensaISelDAGToDAG.cpp +++ b/llvm/lib/Target/Xtensa/XtensaISelDAGToDAG.cpp @@ -33,6 +33,10 @@ class XtensaDAGToDAGISel : public SelectionDAGISel { void Select(SDNode *Node) override; + bool SelectInlineAsmMemoryOperand(const SDValue &Op, + InlineAsm::ConstraintCode ConstraintID, + std::vector &OutOps) override; + // For load/store instructions generate (base+offset) pair from // memory address. The offset must be a multiple of scale argument. bool selectMemRegAddr(SDValue Addr, SDValue &Base, SDValue &Offset, @@ -212,3 +216,22 @@ void XtensaDAGToDAGISel::Select(SDNode *Node) { SelectCode(Node); } + +bool XtensaDAGToDAGISel::SelectInlineAsmMemoryOperand( + const SDValue &Op, InlineAsm::ConstraintCode ConstraintID, + std::vector &OutOps) { + switch (ConstraintID) { + default: + llvm_unreachable("Unexpected asm memory constraint"); + case InlineAsm::ConstraintCode::m: { + SDValue Base, Offset; + + selectMemRegAddr(Op, Base, Offset, 4); + OutOps.push_back(Base); + OutOps.push_back(Offset); + + return false; + } + } + return false; +} diff --git a/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp b/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp index bc1360e212307..670930e99334f 100644 --- a/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp +++ b/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp @@ -142,6 +142,74 @@ bool XtensaTargetLowering::isOffsetFoldingLegal( return false; } +//===----------------------------------------------------------------------===// +// Inline asm support +//===----------------------------------------------------------------------===// +TargetLowering::ConstraintType +XtensaTargetLowering::getConstraintType(StringRef Constraint) const { + if (Constraint.size() == 1) { + switch (Constraint[0]) { + case 'r': + return C_RegisterClass; + default: + break; + } + } + return TargetLowering::getConstraintType(Constraint); +} + +TargetLowering::ConstraintWeight +XtensaTargetLowering::getSingleConstraintMatchWeight( + AsmOperandInfo &Info, const char *Constraint) const { + ConstraintWeight Weight = CW_Invalid; + Value *CallOperandVal = Info.CallOperandVal; + // If we don't have a value, we can't do a match, + // but allow it at the lowest weight. + if (!CallOperandVal) + return CW_Default; + + Type *Ty = CallOperandVal->getType(); + + // Look at the constraint type. + switch (*Constraint) { + default: + Weight = TargetLowering::getSingleConstraintMatchWeight(Info, Constraint); + break; + case 'r': + if (Ty->isIntegerTy()) + Weight = CW_Register; + break; + } + return Weight; +} + +std::pair +XtensaTargetLowering::getRegForInlineAsmConstraint( + const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const { + if (Constraint.size() == 1) { + // GCC Constraint Letters + switch (Constraint[0]) { + default: + break; + case 'r': // General-purpose register + return std::make_pair(0U, &Xtensa::ARRegClass); + } + } + return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); +} + +void XtensaTargetLowering::LowerAsmOperandForConstraint( + SDValue Op, StringRef Constraint, std::vector &Ops, + SelectionDAG &DAG) const { + SDLoc DL(Op); + + // Only support length 1 constraints for now. + if (Constraint.size() > 1) + return; + + TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); +} + //===----------------------------------------------------------------------===// // Calling conventions //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/Xtensa/XtensaISelLowering.h b/llvm/lib/Target/Xtensa/XtensaISelLowering.h index 2a878e45047d2..f1cd00c41437a 100644 --- a/llvm/lib/Target/Xtensa/XtensaISelLowering.h +++ b/llvm/lib/Target/Xtensa/XtensaISelLowering.h @@ -76,6 +76,21 @@ class XtensaTargetLowering : public TargetLowering { const char *getTargetNodeName(unsigned Opcode) const override; + std::pair + getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, + StringRef Constraint, MVT VT) const override; + + TargetLowering::ConstraintType + getConstraintType(StringRef Constraint) const override; + + TargetLowering::ConstraintWeight + getSingleConstraintMatchWeight(AsmOperandInfo &Info, + const char *Constraint) const override; + + void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, + std::vector &Ops, + SelectionDAG &DAG) const override; + SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp index b2c4f9ee00293..616e4eda1dd29 100644 --- a/llvm/lib/TargetParser/Host.cpp +++ b/llvm/lib/TargetParser/Host.cpp @@ -2040,8 +2040,7 @@ const StringMap sys::getHostCPUFeatures() { Features["zvfhmin"] = ExtMask & (1ULL << 31); // RISCV_HWPROBE_EXT_ZVFHMIN Features["zfa"] = ExtMask & (1ULL << 32); // RISCV_HWPROBE_EXT_ZFA Features["ztso"] = ExtMask & (1ULL << 33); // RISCV_HWPROBE_EXT_ZTSO - // TODO: Re-enable zacas when it is marked non-experimental again. - // Features["zacas"] = ExtMask & (1ULL << 34); // RISCV_HWPROBE_EXT_ZACAS + Features["zacas"] = ExtMask & (1ULL << 34); // RISCV_HWPROBE_EXT_ZACAS Features["zicond"] = ExtMask & (1ULL << 35); // RISCV_HWPROBE_EXT_ZICOND Features["zihintpause"] = ExtMask & (1ULL << 36); // RISCV_HWPROBE_EXT_ZIHINTPAUSE diff --git a/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp b/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp index 2b34d3b5a56ea..c997b180937af 100644 --- a/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp +++ b/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp @@ -14,6 +14,7 @@ #include "llvm/Transforms/IPO/ElimAvailExtern.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/CtxProfAnalysis.h" #include "llvm/IR/Constant.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/Function.h" @@ -88,7 +89,7 @@ static void convertToLocalCopy(Module &M, Function &F) { ++NumConversions; } -static bool eliminateAvailableExternally(Module &M) { +static bool eliminateAvailableExternally(Module &M, bool Convert) { bool Changed = false; // Drop initializers of available externally global variables. @@ -112,7 +113,7 @@ static bool eliminateAvailableExternally(Module &M) { if (F.isDeclaration() || !F.hasAvailableExternallyLinkage()) continue; - if (ConvertToLocal) + if (Convert || ConvertToLocal) convertToLocalCopy(M, F); else deleteFunction(F); @@ -125,8 +126,15 @@ static bool eliminateAvailableExternally(Module &M) { } PreservedAnalyses -EliminateAvailableExternallyPass::run(Module &M, ModuleAnalysisManager &) { - if (!eliminateAvailableExternally(M)) +EliminateAvailableExternallyPass::run(Module &M, ModuleAnalysisManager &MAM) { + auto *CtxProf = MAM.getCachedResult(M); + // Convert to local instead of eliding if we use contextual profiling in this + // module. This is because the IPO decisions performed with contextual + // information will likely differ from decisions made without. For a function + // that's imported, its optimizations will, thus, differ, and be specialized + // for this contextual information. Eliding it in favor of the original would + // undo these optimizations. + if (!eliminateAvailableExternally(M, /*Convert=*/(CtxProf && !!(*CtxProf)))) return PreservedAnalyses::all(); return PreservedAnalyses::none(); } diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp index 6927fe538e367..576a31f8b86ae 100644 --- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp +++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp @@ -1362,12 +1362,22 @@ void CallsiteContextGraph:: } } +#ifndef NDEBUG // Find the node for the last stack id, which should be the same // across all calls recorded for this id, and is this node's id. uint64_t LastId = Node->OrigStackOrAllocId; ContextNode *LastNode = getNodeForStackId(LastId); // We should only have kept stack ids that had nodes. assert(LastNode); + assert(LastNode == Node); +#else + ContextNode *LastNode = Node; +#endif + + // Compute the last node's context ids once, as it is shared by all calls in + // this entry. + DenseSet LastNodeContextIds = LastNode->getContextIds(); + assert(!LastNodeContextIds.empty()); for (unsigned I = 0; I < Calls.size(); I++) { auto &[Call, Ids, Func, SavedContextIds] = Calls[I]; @@ -1389,40 +1399,43 @@ void CallsiteContextGraph:: assert(LastId == Ids.back()); - ContextNode *FirstNode = getNodeForStackId(Ids[0]); - assert(FirstNode); - // Recompute the context ids for this stack id sequence (the // intersection of the context ids of the corresponding nodes). // Start with the ids we saved in the map for this call, which could be // duplicated context ids. We have to recompute as we might have overlap // overlap between the saved context ids for different last nodes, and // removed them already during the post order traversal. - set_intersect(SavedContextIds, FirstNode->getContextIds()); - ContextNode *PrevNode = nullptr; - for (auto Id : Ids) { + set_intersect(SavedContextIds, LastNodeContextIds); + ContextNode *PrevNode = LastNode; + bool Skip = false; + // Iterate backwards through the stack Ids, starting after the last Id + // in the list, which was handled once outside for all Calls. + for (auto IdIter = Ids.rbegin() + 1; IdIter != Ids.rend(); IdIter++) { + auto Id = *IdIter; ContextNode *CurNode = getNodeForStackId(Id); // We should only have kept stack ids that had nodes and weren't // recursive. assert(CurNode); assert(!CurNode->Recursive); - if (!PrevNode) { - PrevNode = CurNode; - continue; - } - auto *Edge = CurNode->findEdgeFromCallee(PrevNode); + + auto *Edge = CurNode->findEdgeFromCaller(PrevNode); if (!Edge) { - SavedContextIds.clear(); + Skip = true; break; } PrevNode = CurNode; + + // Update the context ids, which is the intersection of the ids along + // all edges in the sequence. set_intersect(SavedContextIds, Edge->getContextIds()); // If we now have no context ids for clone, skip this call. - if (SavedContextIds.empty()) + if (SavedContextIds.empty()) { + Skip = true; break; + } } - if (SavedContextIds.empty()) + if (Skip) continue; // Create new context node. @@ -1433,6 +1446,9 @@ void CallsiteContextGraph:: NonAllocationCallToContextNodeMap[Call] = NewNode; NewNode->AllocTypes = computeAllocType(SavedContextIds); + ContextNode *FirstNode = getNodeForStackId(Ids[0]); + assert(FirstNode); + // Connect to callees of innermost stack frame in inlined call chain. // This updates context ids for FirstNode's callee's to reflect those // moved to NewNode. diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 80d3adedfc89f..e8c0b00661654 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -181,11 +181,13 @@ static unsigned conjugateICmpMask(unsigned Mask) { // Adapts the external decomposeBitTestICmp for local use. static bool decomposeBitTestICmp(Value *LHS, Value *RHS, CmpInst::Predicate &Pred, Value *&X, Value *&Y, Value *&Z) { - APInt Mask; - if (!llvm::decomposeBitTestICmp(LHS, RHS, Pred, X, Mask)) + auto Res = llvm::decomposeBitTestICmp(LHS, RHS, Pred); + if (!Res) return false; - Y = ConstantInt::get(X->getType(), Mask); + Pred = Res->Pred; + X = Res->X; + Y = ConstantInt::get(X->getType(), Res->Mask); Z = ConstantInt::get(X->getType(), 0); return true; } @@ -870,11 +872,15 @@ static Value *foldSignedTruncationCheck(ICmpInst *ICmp0, ICmpInst *ICmp1, APInt &UnsetBitsMask) -> bool { CmpInst::Predicate Pred = ICmp->getPredicate(); // Can it be decomposed into icmp eq (X & Mask), 0 ? - if (llvm::decomposeBitTestICmp(ICmp->getOperand(0), ICmp->getOperand(1), - Pred, X, UnsetBitsMask, - /*LookThroughTrunc=*/false) && - Pred == ICmpInst::ICMP_EQ) + auto Res = + llvm::decomposeBitTestICmp(ICmp->getOperand(0), ICmp->getOperand(1), + Pred, /*LookThroughTrunc=*/false); + if (Res && Res->Pred == ICmpInst::ICMP_EQ) { + X = Res->X; + UnsetBitsMask = Res->Mask; return true; + } + // Is it icmp eq (X & Mask), 0 already? const APInt *Mask; if (match(ICmp, m_ICmp(Pred, m_And(m_Value(X), m_APInt(Mask)), m_Zero())) && diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index 698abbb34c18c..b1215bb4d83b0 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -5905,11 +5905,10 @@ Instruction *InstCombinerImpl::foldICmpWithTrunc(ICmpInst &ICmp) { // This matches patterns corresponding to tests of the signbit as well as: // (trunc X) u< C --> (X & -C) == 0 (are all masked-high-bits clear?) // (trunc X) u> C --> (X & ~C) != 0 (are any masked-high-bits set?) - APInt Mask; - if (decomposeBitTestICmp(Op0, Op1, Pred, X, Mask, true /* WithTrunc */)) { - Value *And = Builder.CreateAnd(X, Mask); - Constant *Zero = ConstantInt::getNullValue(X->getType()); - return new ICmpInst(Pred, And, Zero); + if (auto Res = decomposeBitTestICmp(Op0, Op1, Pred, /*WithTrunc=*/true)) { + Value *And = Builder.CreateAnd(Res->X, Res->Mask); + Constant *Zero = ConstantInt::getNullValue(Res->X->getType()); + return new ICmpInst(Res->Pred, And, Zero); } unsigned SrcBits = X->getType()->getScalarSizeInBits(); diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index 7476db9ee38f4..3dbe95897d635 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -145,12 +145,15 @@ static Value *foldSelectICmpAnd(SelectInst &Sel, ICmpInst *Cmp, return nullptr; AndMask = *AndRHS; - } else if (decomposeBitTestICmp(Cmp->getOperand(0), Cmp->getOperand(1), - Pred, V, AndMask)) { - assert(ICmpInst::isEquality(Pred) && "Not equality test?"); - if (!AndMask.isPowerOf2()) + } else if (auto Res = decomposeBitTestICmp(Cmp->getOperand(0), + Cmp->getOperand(1), Pred)) { + assert(ICmpInst::isEquality(Res->Pred) && "Not equality test?"); + if (!Res->Mask.isPowerOf2()) return nullptr; + V = Res->X; + AndMask = Res->Mask; + Pred = Res->Pred; CreateAnd = true; } else { return nullptr; @@ -747,12 +750,13 @@ static Value *foldSelectICmpAndBinOp(const ICmpInst *IC, Value *TrueVal, C1Log = C1->logBase2(); } else { - APInt C1; - if (!decomposeBitTestICmp(CmpLHS, CmpRHS, Pred, CmpLHS, C1) || - !C1.isPowerOf2()) + auto Res = decomposeBitTestICmp(CmpLHS, CmpRHS, Pred); + if (!Res || !Res->Mask.isPowerOf2()) return nullptr; - C1Log = C1.logBase2(); + CmpLHS = Res->X; + Pred = Res->Pred; + C1Log = Res->Mask.logBase2(); NeedAnd = true; } diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index 9c4d206692fac..c66db9285c799 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -858,11 +858,9 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Instruction *I, } case Instruction::SRem: { const APInt *Rem; - if (match(I->getOperand(1), m_APInt(Rem))) { - // X % -1 demands all the bits because we don't want to introduce - // INT_MIN % -1 (== undef) by accident. - if (Rem->isAllOnes()) - break; + // X % -1 demands all the bits because we don't want to introduce + // INT_MIN % -1 (== undef) by accident. + if (match(I->getOperand(1), m_APInt(Rem)) && !Rem->isAllOnes()) { APInt RA = Rem->abs(); if (RA.isPowerOf2()) { if (DemandedMask.ult(RA)) // srem won't affect demanded bits diff --git a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp index 49d463a07553f..669b63343e994 100644 --- a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp @@ -187,6 +187,11 @@ static cl::opt ClMappingOffsetDynamic( clEnumValN(OffsetKind::kIfunc, "ifunc", "Use ifunc global"), clEnumValN(OffsetKind::kTls, "tls", "Use TLS"))); +static cl::opt + ClFrameRecords("hwasan-with-frame-record", + cl::desc("Use ring buffer for stack allocations"), + cl::Hidden); + static cl::opt ClHotPercentileCutoff("hwasan-percentile-cutoff-hot", cl::desc("Hot percentile cuttoff.")); @@ -1932,14 +1937,18 @@ void HWAddressSanitizer::ShadowMapping::init(Triple &TargetTriple, // Fuchsia is always PIE, which means that the beginning of the address // space is always available. SetFixed(0); - } else if (ClMappingOffset.getNumOccurrences() > 0) { - SetFixed(ClMappingOffset); - WithFrameRecord = false; } else if (ClEnableKhwasan || InstrumentWithCalls) { SetFixed(0); WithFrameRecord = false; - } else if (ClMappingOffsetDynamic.getNumOccurrences() > 0) { - Kind = ClMappingOffsetDynamic; - WithFrameRecord = isInTls(); + } + + WithFrameRecord = optOr(ClFrameRecords, WithFrameRecord); + + // Apply the last of ClMappingOffset and ClMappingOffsetDynamic. + Kind = optOr(ClMappingOffsetDynamic, Kind); + if (ClMappingOffset.getNumOccurrences() > 0 && + !(ClMappingOffsetDynamic.getNumOccurrences() > 0 && + ClMappingOffsetDynamic.getPosition() > ClMappingOffset.getPosition())) { + SetFixed(ClMappingOffset); } } diff --git a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp index ef9c264482a64..0e2b5c925a6a7 100644 --- a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp +++ b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp @@ -194,7 +194,6 @@ void unfold(DomTreeUpdater *DTU, LoopInfo *LI, SelectInstToUnfold SIToUnfold, SelectInst *SI = SIToUnfold.getInst(); PHINode *SIUse = SIToUnfold.getUse(); BasicBlock *StartBlock = SI->getParent(); - BasicBlock *EndBlock = SIUse->getParent(); BranchInst *StartBlockTerm = dyn_cast(StartBlock->getTerminator()); @@ -202,6 +201,7 @@ void unfold(DomTreeUpdater *DTU, LoopInfo *LI, SelectInstToUnfold SIToUnfold, assert(SI->hasOneUse()); if (StartBlockTerm->isUnconditional()) { + BasicBlock *EndBlock = StartBlock->getUniqueSuccessor(); // Arbitrarily choose the 'false' side for a new input value to the PHI. BasicBlock *NewBlock = BasicBlock::Create( SI->getContext(), Twine(SI->getName(), ".si.unfold.false"), @@ -223,32 +223,44 @@ void unfold(DomTreeUpdater *DTU, LoopInfo *LI, SelectInstToUnfold SIToUnfold, NewBlock->getFirstInsertionPt()); NewPhi->addIncoming(SIOp2, StartBlock); - if (auto *OpSi = dyn_cast(SIOp1)) - NewSIsToUnfold->push_back(SelectInstToUnfold(OpSi, SIUse)); - if (auto *OpSi = dyn_cast(SIOp2)) - NewSIsToUnfold->push_back(SelectInstToUnfold(OpSi, NewPhi)); - - // Update the phi node of SI. - for (unsigned Idx = 0; Idx < SIUse->getNumIncomingValues(); ++Idx) { - if (SIUse->getIncomingBlock(Idx) == StartBlock) - SIUse->setIncomingValue(Idx, SIOp1); + // Update any other PHI nodes in EndBlock. + for (PHINode &Phi : EndBlock->phis()) { + if (SIUse == &Phi) + continue; + Phi.addIncoming(Phi.getIncomingValueForBlock(StartBlock), NewBlock); } - SIUse->addIncoming(NewPhi, NewBlock); - // Update any other PHI nodes in EndBlock. - for (auto II = EndBlock->begin(); PHINode *Phi = dyn_cast(II); - ++II) { - if (Phi != SIUse) - Phi->addIncoming(Phi->getIncomingValueForBlock(StartBlock), NewBlock); + // Update the phi node of SI, which is its only use. + if (EndBlock == SIUse->getParent()) { + SIUse->addIncoming(NewPhi, NewBlock); + SIUse->replaceUsesOfWith(SI, SIOp1); + } else { + PHINode *EndPhi = PHINode::Create(SIUse->getType(), pred_size(EndBlock), + Twine(SI->getName(), ".si.unfold.phi"), + EndBlock->getFirstInsertionPt()); + for (BasicBlock *Pred : predecessors(EndBlock)) { + if (Pred != StartBlock && Pred != NewBlock) + EndPhi->addIncoming(EndPhi, Pred); + } + + EndPhi->addIncoming(SIOp1, StartBlock); + EndPhi->addIncoming(NewPhi, NewBlock); + SIUse->replaceUsesOfWith(SI, EndPhi); + SIUse = EndPhi; } - StartBlockTerm->eraseFromParent(); + if (auto *OpSi = dyn_cast(SIOp1)) + NewSIsToUnfold->push_back(SelectInstToUnfold(OpSi, SIUse)); + if (auto *OpSi = dyn_cast(SIOp2)) + NewSIsToUnfold->push_back(SelectInstToUnfold(OpSi, NewPhi)); // Insert the real conditional branch based on the original condition. + StartBlockTerm->eraseFromParent(); BranchInst::Create(EndBlock, NewBlock, SI->getCondition(), StartBlock); DTU->applyUpdates({{DominatorTree::Insert, StartBlock, EndBlock}, {DominatorTree::Insert, StartBlock, NewBlock}}); } else { + BasicBlock *EndBlock = SIUse->getParent(); BasicBlock *NewBlockT = BasicBlock::Create( SI->getContext(), Twine(SI->getName(), ".si.unfold.true"), EndBlock->getParent(), EndBlock); diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index 578d087e470e1..740e1e39b9ee7 100644 --- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -2464,11 +2464,16 @@ static bool detectShiftUntilBitTestIdiom(Loop *CurLoop, Value *&BaseX, (BitPos = ConstantExpr::getExactLogBase2(cast(BitMask))); }; auto MatchDecomposableConstantBitMask = [&]() { - APInt Mask; - return llvm::decomposeBitTestICmp(CmpLHS, CmpRHS, Pred, CurrX, Mask) && - ICmpInst::isEquality(Pred) && Mask.isPowerOf2() && - (BitMask = ConstantInt::get(CurrX->getType(), Mask)) && - (BitPos = ConstantInt::get(CurrX->getType(), Mask.logBase2())); + auto Res = llvm::decomposeBitTestICmp(CmpLHS, CmpRHS, Pred); + if (Res && Res->Mask.isPowerOf2()) { + assert(ICmpInst::isEquality(Res->Pred)); + Pred = Res->Pred; + CurrX = Res->X; + BitMask = ConstantInt::get(CurrX->getType(), Res->Mask); + BitPos = ConstantInt::get(CurrX->getType(), Res->Mask.logBase2()); + return true; + } + return false; }; if (!MatchVariableBitMask() && !MatchConstantBitMask() && diff --git a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp index aca8225cebb3f..92e47cbc7ae8b 100644 --- a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp +++ b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp @@ -30,6 +30,7 @@ #include "llvm/IR/Metadata.h" #include "llvm/IR/PassManager.h" #include "llvm/IR/PatternMatch.h" +#include "llvm/IR/ProfDataUtils.h" #include "llvm/IR/Type.h" #include "llvm/IR/Use.h" #include "llvm/IR/Value.h" @@ -85,7 +86,43 @@ using PhiMap = MapVector; using BB2BBVecMap = MapVector; using BBPhiMap = DenseMap; -using BBPredicates = DenseMap; + +using MaybeCondBranchWeights = std::optional; + +class CondBranchWeights { + uint32_t TrueWeight; + uint32_t FalseWeight; + + CondBranchWeights(uint32_t T, uint32_t F) : TrueWeight(T), FalseWeight(F) {} + +public: + static MaybeCondBranchWeights tryParse(const BranchInst &Br) { + assert(Br.isConditional()); + + uint64_t T, F; + if (!extractBranchWeights(Br, T, F)) + return std::nullopt; + + return CondBranchWeights(T, F); + } + + static void setMetadata(BranchInst &Br, + const MaybeCondBranchWeights &Weights) { + assert(Br.isConditional()); + if (!Weights) + return; + uint32_t Arr[] = {Weights->TrueWeight, Weights->FalseWeight}; + setBranchWeights(Br, Arr, false); + } + + CondBranchWeights invert() const { + return CondBranchWeights{FalseWeight, TrueWeight}; + } +}; + +using ValueWeightPair = std::pair; + +using BBPredicates = DenseMap; using PredMap = DenseMap; using BB2BBMap = DenseMap; @@ -271,7 +308,7 @@ class StructurizeCFG { void analyzeLoops(RegionNode *N); - Value *buildCondition(BranchInst *Term, unsigned Idx, bool Invert); + ValueWeightPair buildCondition(BranchInst *Term, unsigned Idx, bool Invert); void gatherPredicates(RegionNode *N); @@ -449,16 +486,22 @@ void StructurizeCFG::analyzeLoops(RegionNode *N) { } /// Build the condition for one edge -Value *StructurizeCFG::buildCondition(BranchInst *Term, unsigned Idx, - bool Invert) { +ValueWeightPair StructurizeCFG::buildCondition(BranchInst *Term, unsigned Idx, + bool Invert) { Value *Cond = Invert ? BoolFalse : BoolTrue; + MaybeCondBranchWeights Weights; + if (Term->isConditional()) { Cond = Term->getCondition(); + Weights = CondBranchWeights::tryParse(*Term); - if (Idx != (unsigned)Invert) + if (Idx != (unsigned)Invert) { Cond = invertCondition(Cond); + if (Weights) + Weights = Weights->invert(); + } } - return Cond; + return {Cond, Weights}; } /// Analyze the predecessors of each block and build up predicates @@ -490,8 +533,8 @@ void StructurizeCFG::gatherPredicates(RegionNode *N) { if (Visited.count(Other) && !Loops.count(Other) && !Pred.count(Other) && !Pred.count(P)) { - Pred[Other] = BoolFalse; - Pred[P] = BoolTrue; + Pred[Other] = {BoolFalse, std::nullopt}; + Pred[P] = {BoolTrue, std::nullopt}; continue; } } @@ -512,9 +555,9 @@ void StructurizeCFG::gatherPredicates(RegionNode *N) { BasicBlock *Entry = R->getEntry(); if (Visited.count(Entry)) - Pred[Entry] = BoolTrue; + Pred[Entry] = {BoolTrue, std::nullopt}; else - LPred[Entry] = BoolFalse; + LPred[Entry] = {BoolFalse, std::nullopt}; } } } @@ -578,12 +621,14 @@ void StructurizeCFG::insertConditions(bool Loops) { Dominator.addBlock(Parent); Value *ParentValue = nullptr; - for (std::pair BBAndPred : Preds) { + MaybeCondBranchWeights ParentWeights = std::nullopt; + for (std::pair BBAndPred : Preds) { BasicBlock *BB = BBAndPred.first; - Value *Pred = BBAndPred.second; + auto [Pred, Weight] = BBAndPred.second; if (BB == Parent) { ParentValue = Pred; + ParentWeights = Weight; break; } PhiInserter.AddAvailableValue(BB, Pred); @@ -592,6 +637,7 @@ void StructurizeCFG::insertConditions(bool Loops) { if (ParentValue) { Term->setCondition(ParentValue); + CondBranchWeights::setMetadata(*Term, ParentWeights); } else { if (!Dominator.resultIsRememberedBlock()) PhiInserter.AddAvailableValue(Dominator.result(), Default); @@ -607,7 +653,7 @@ void StructurizeCFG::simplifyConditions() { for (auto &I : concat(Predicates, LoopPreds)) { auto &Preds = I.second; for (auto &J : Preds) { - auto &Cond = J.second; + Value *Cond = J.second.first; Instruction *Inverted; if (match(Cond, m_Not(m_OneUse(m_Instruction(Inverted)))) && !Cond->use_empty()) { @@ -904,9 +950,10 @@ void StructurizeCFG::setPrevNode(BasicBlock *BB) { /// Does BB dominate all the predicates of Node? bool StructurizeCFG::dominatesPredicates(BasicBlock *BB, RegionNode *Node) { BBPredicates &Preds = Predicates[Node->getEntry()]; - return llvm::all_of(Preds, [&](std::pair Pred) { - return DT->dominates(BB, Pred.first); - }); + return llvm::all_of(Preds, + [&](std::pair Pred) { + return DT->dominates(BB, Pred.first); + }); } /// Can we predict that this node will always be called? @@ -918,9 +965,9 @@ bool StructurizeCFG::isPredictableTrue(RegionNode *Node) { if (!PrevNode) return true; - for (std::pair Pred : Preds) { + for (std::pair Pred : Preds) { BasicBlock *BB = Pred.first; - Value *V = Pred.second; + Value *V = Pred.second.first; if (V != BoolTrue) return false; diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp index 725b512fb86e7..7659fc6919615 100644 --- a/llvm/lib/Transforms/Utils/Local.cpp +++ b/llvm/lib/Transforms/Utils/Local.cpp @@ -112,6 +112,12 @@ static cl::opt PHICSENumPHISmallSize( "When the basic block contains not more than this number of PHI nodes, " "perform a (faster!) exhaustive search instead of set-driven one.")); +static cl::opt MaxPhiEntriesIncreaseAfterRemovingEmptyBlock( + "max-phi-entries-increase-after-removing-empty-block", cl::init(1000), + cl::Hidden, + cl::desc("Stop removing an empty block if removing it will introduce more " + "than this number of phi entries in its successor")); + // Max recursion depth for collectBitParts used when detecting bswap and // bitreverse idioms. static const unsigned BitPartRecursionMaxDepth = 48; @@ -1047,6 +1053,33 @@ CanRedirectPredsOfEmptyBBToSucc(BasicBlock *BB, BasicBlock *Succ, return true; } +/// Check whether removing \p BB will make the phis in its \p Succ have too +/// many incoming entries. This function does not check whether \p BB is +/// foldable or not. +static bool introduceTooManyPhiEntries(BasicBlock *BB, BasicBlock *Succ) { + // If BB only has one predecessor, then removing it will not introduce more + // incoming edges for phis. + if (BB->hasNPredecessors(1)) + return false; + unsigned NumPreds = pred_size(BB); + unsigned NumChangedPhi = 0; + for (auto &Phi : Succ->phis()) { + // If the incoming value is a phi and the phi is defined in BB, + // then removing BB will not increase the total phi entries of the ir. + if (auto *IncomingPhi = dyn_cast(Phi.getIncomingValueForBlock(BB))) + if (IncomingPhi->getParent() == BB) + continue; + // Otherwise, we need to add entries to the phi + NumChangedPhi++; + } + // For every phi that needs to be changed, (NumPreds - 1) new entries will be + // added. If the total increase in phi entries exceeds + // MaxPhiEntriesIncreaseAfterRemovingEmptyBlock, it will be considered as + // introducing too many new phi entries. + return (NumPreds - 1) * NumChangedPhi > + MaxPhiEntriesIncreaseAfterRemovingEmptyBlock; +} + /// Replace a value flowing from a block to a phi with /// potentially multiple instances of that value flowing from the /// block's predecessors to the phi. @@ -1146,7 +1179,7 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB, BBKillable || CanRedirectPredsOfEmptyBBToSucc(BB, Succ, BBPreds, SuccPreds, CommonPred); - if (!BBKillable && !BBPhisMergeable) + if ((!BBKillable && !BBPhisMergeable) || introduceTooManyPhiEntries(BB, Succ)) return false; // Check to see if merging these blocks/phis would cause conflicts for any of diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h index 034fdf4233de3..00eec0a6f7b14 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h @@ -220,6 +220,11 @@ class VPBuilder { new VPInstruction(Instruction::ICmp, Pred, A, B, DL, Name)); } + VPInstruction *createPtrAdd(VPValue *Ptr, VPValue *Offset, DebugLoc DL, + const Twine &Name = "") { + return createInstruction(VPInstruction::PtrAdd, {Ptr, Offset}, DL, Name); + } + VPDerivedIVRecipe *createDerivedIV(InductionDescriptor::InductionKind Kind, FPMathOperator *FPBinOp, VPValue *Start, VPCanonicalIVPHIRecipe *CanonicalIV, diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 0566d80c1cc00..db4631e19c11d 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -505,8 +505,7 @@ class InnerLoopVectorizer { /// inclusive. Uses the VPValue operands from \p RepRecipe instead of \p /// Instr's operands. void scalarizeInstruction(const Instruction *Instr, - VPReplicateRecipe *RepRecipe, - const VPIteration &Instance, + VPReplicateRecipe *RepRecipe, const VPLane &Lane, VPTransformState &State); /// Fix the non-induction PHIs in \p Plan. @@ -538,17 +537,11 @@ class InnerLoopVectorizer { /// A small list of PHINodes. using PhiVector = SmallVector; - /// A type for scalarized values in the new loop. Each value from the - /// original loop, when scalarized, is represented by UF x VF scalar values - /// in the new unrolled loop, where UF is the unroll factor and VF is the - /// vectorization factor. - using ScalarParts = SmallVector, 2>; - /// Set up the values of the IVs correctly when exiting the vector loop. void fixupIVUsers(PHINode *OrigPhi, const InductionDescriptor &II, Value *VectorTripCount, Value *EndValue, - BasicBlock *MiddleBlock, BasicBlock *VectorHeader, - VPlan &Plan, VPTransformState &State); + BasicBlock *MiddleBlock, VPlan &Plan, + VPTransformState &State); /// Iteratively sink the scalarized operands of a predicated instruction into /// the block that was created for it. @@ -2328,14 +2321,14 @@ static bool useMaskedInterleavedAccesses(const TargetTransformInfo &TTI) { void InnerLoopVectorizer::scalarizeInstruction(const Instruction *Instr, VPReplicateRecipe *RepRecipe, - const VPIteration &Instance, + const VPLane &Lane, VPTransformState &State) { assert(!Instr->getType()->isAggregateType() && "Can't handle vectors"); // llvm.experimental.noalias.scope.decl intrinsics must only be duplicated for // the first lane and part. if (isa(Instr)) - if (!Instance.isFirstIteration()) + if (!Lane.isFirstLane()) return; // Does this instruction return a value ? @@ -2360,18 +2353,18 @@ void InnerLoopVectorizer::scalarizeInstruction(const Instruction *Instr, // Replace the operands of the cloned instructions with their scalar // equivalents in the new loop. for (const auto &I : enumerate(RepRecipe->operands())) { - auto InputInstance = Instance; + auto InputLane = Lane; VPValue *Operand = I.value(); if (vputils::isUniformAfterVectorization(Operand)) - InputInstance.Lane = VPLane::getFirstLane(); - Cloned->setOperand(I.index(), State.get(Operand, InputInstance)); + InputLane = VPLane::getFirstLane(); + Cloned->setOperand(I.index(), State.get(Operand, InputLane)); } State.addNewMetadata(Cloned, Instr); // Place the cloned scalar in the new loop. State.Builder.Insert(Cloned); - State.set(RepRecipe, Cloned, Instance); + State.set(RepRecipe, Cloned, Lane); // If we just cloned a new assumption, add it the assumption cache. if (auto *II = dyn_cast(Cloned)) @@ -2748,8 +2741,7 @@ InnerLoopVectorizer::createVectorizedLoopSkeleton( void InnerLoopVectorizer::fixupIVUsers(PHINode *OrigPhi, const InductionDescriptor &II, Value *VectorTripCount, Value *EndValue, - BasicBlock *MiddleBlock, - BasicBlock *VectorHeader, VPlan &Plan, + BasicBlock *MiddleBlock, VPlan &Plan, VPTransformState &State) { // There are two kinds of external IV usages - those that use the value // computed in the last iteration (the PHI) and those that use the penultimate @@ -2791,7 +2783,7 @@ void InnerLoopVectorizer::fixupIVUsers(PHINode *OrigPhi, VPValue *StepVPV = Plan.getSCEVExpansion(II.getStep()); assert(StepVPV && "step must have been expanded during VPlan execution"); Value *Step = StepVPV->isLiveIn() ? StepVPV->getLiveInIRValue() - : State.get(StepVPV, {0, 0}); + : State.get(StepVPV, VPLane(0)); Value *Escape = emitTransformedIndex(B, CountMinusOne, II.getStartValue(), Step, II.getKind(), II.getInductionBinOp()); @@ -2960,8 +2952,7 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State, for (const auto &Entry : Legal->getInductionVars()) fixupIVUsers(Entry.first, Entry.second, getOrCreateVectorTripCount(VectorLoop->getLoopPreheader()), - IVEndValues[Entry.first], LoopMiddleBlock, - VectorLoop->getHeader(), Plan, State); + IVEndValues[Entry.first], LoopMiddleBlock, Plan, State); } // Fix live-out phis not already fixed earlier. @@ -6595,7 +6586,8 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, if (auto *Cmp = dyn_cast(SI->getCondition())) Pred = Cmp->getPredicate(); return TTI.getCmpSelInstrCost(I->getOpcode(), VectorTy, CondTy, Pred, - CostKind, I); + CostKind, {TTI::OK_AnyValue, TTI::OP_None}, + {TTI::OK_AnyValue, TTI::OP_None}, I); } case Instruction::ICmp: case Instruction::FCmp: { @@ -6614,7 +6606,8 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, VectorTy = ToVectorTy(ValTy, VF); return TTI.getCmpSelInstrCost(I->getOpcode(), VectorTy, nullptr, cast(I)->getPredicate(), CostKind, - I); + {TTI::OK_AnyValue, TTI::OP_None}, + {TTI::OK_AnyValue, TTI::OP_None}, I); } case Instruction::Store: case Instruction::Load: { @@ -7441,8 +7434,7 @@ static void createAndCollectMergePhiForReduction( auto *PhiR = cast(RedResult->getOperand(0)); const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor(); - Value *FinalValue = - State.get(RedResult, VPIteration(0, VPLane::getFirstLane())); + Value *FinalValue = State.get(RedResult, VPLane(VPLane::getFirstLane())); auto *ResumePhi = dyn_cast(PhiR->getStartValue()->getUnderlyingValue()); if (VectorizingEpilogue && RecurrenceDescriptor::isAnyOfRecurrenceKind( @@ -7531,7 +7523,7 @@ LoopVectorizationPlanner::executePlan( BestVPlan.getPreheader()->execute(&State); } if (!ILV.getTripCount()) - ILV.setTripCount(State.get(BestVPlan.getTripCount(), {0, 0})); + ILV.setTripCount(State.get(BestVPlan.getTripCount(), VPLane(0))); else assert(IsEpilogueVectorization && "should only re-use the existing trip " "count during epilogue vectorization"); @@ -9302,41 +9294,6 @@ void LoopVectorizationPlanner::adjustRecipesForReductions( continue; const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor(); - // Adjust AnyOf reductions; replace the reduction phi for the selected value - // with a boolean reduction phi node to check if the condition is true in - // any iteration. The final value is selected by the final - // ComputeReductionResult. - if (RecurrenceDescriptor::isAnyOfRecurrenceKind( - RdxDesc.getRecurrenceKind())) { - auto *Select = cast(*find_if(PhiR->users(), [](VPUser *U) { - return isa(U) || - (isa(U) && - cast(U)->getUnderlyingInstr()->getOpcode() == - Instruction::Select); - })); - VPValue *Cmp = Select->getOperand(0); - // If the compare is checking the reduction PHI node, adjust it to check - // the start value. - if (VPRecipeBase *CmpR = Cmp->getDefiningRecipe()) { - for (unsigned I = 0; I != CmpR->getNumOperands(); ++I) - if (CmpR->getOperand(I) == PhiR) - CmpR->setOperand(I, PhiR->getStartValue()); - } - VPBuilder::InsertPointGuard Guard(Builder); - Builder.setInsertPoint(Select); - - // If the true value of the select is the reduction phi, the new value is - // selected if the negated condition is true in any iteration. - if (Select->getOperand(1) == PhiR) - Cmp = Builder.createNot(Cmp); - VPValue *Or = Builder.createOr(PhiR, Cmp); - Select->getVPSingleValue()->replaceAllUsesWith(Or); - - // Convert the reduction phi to operate on bools. - PhiR->setOperand(0, Plan->getOrAddLiveIn(ConstantInt::getFalse( - OrigLoop->getHeader()->getContext()))); - } - // If tail is folded by masking, introduce selects between the phi // and the live-out instruction of each reduction, at the beginning of the // dedicated latch block. @@ -9409,54 +9366,89 @@ void LoopVectorizationPlanner::adjustRecipesForReductions( return match(&User, m_Binary(m_VPValue(), m_VPValue())); }); + + // Adjust AnyOf reductions; replace the reduction phi for the selected value + // with a boolean reduction phi node to check if the condition is true in + // any iteration. The final value is selected by the final + // ComputeReductionResult. + if (RecurrenceDescriptor::isAnyOfRecurrenceKind( + RdxDesc.getRecurrenceKind())) { + auto *Select = cast(*find_if(PhiR->users(), [](VPUser *U) { + return isa(U) || + (isa(U) && + cast(U)->getUnderlyingInstr()->getOpcode() == + Instruction::Select); + })); + VPValue *Cmp = Select->getOperand(0); + // If the compare is checking the reduction PHI node, adjust it to check + // the start value. + if (VPRecipeBase *CmpR = Cmp->getDefiningRecipe()) { + for (unsigned I = 0; I != CmpR->getNumOperands(); ++I) + if (CmpR->getOperand(I) == PhiR) + CmpR->setOperand(I, PhiR->getStartValue()); + } + VPBuilder::InsertPointGuard Guard(Builder); + Builder.setInsertPoint(Select); + + // If the true value of the select is the reduction phi, the new value is + // selected if the negated condition is true in any iteration. + if (Select->getOperand(1) == PhiR) + Cmp = Builder.createNot(Cmp); + VPValue *Or = Builder.createOr(PhiR, Cmp); + Select->getVPSingleValue()->replaceAllUsesWith(Or); + + // Convert the reduction phi to operate on bools. + PhiR->setOperand(0, Plan->getOrAddLiveIn(ConstantInt::getFalse( + OrigLoop->getHeader()->getContext()))); + } } VPlanTransforms::clearReductionWrapFlags(*Plan); } void VPDerivedIVRecipe::execute(VPTransformState &State) { - assert(!State.Instance && "VPDerivedIVRecipe being replicated."); + assert(!State.Lane && "VPDerivedIVRecipe being replicated."); // Fast-math-flags propagate from the original induction instruction. IRBuilder<>::FastMathFlagGuard FMFG(State.Builder); if (FPBinOp) State.Builder.setFastMathFlags(FPBinOp->getFastMathFlags()); - Value *Step = State.get(getStepValue(), VPIteration(0, 0)); - Value *CanonicalIV = State.get(getOperand(1), VPIteration(0, 0)); + Value *Step = State.get(getStepValue(), VPLane(0)); + Value *CanonicalIV = State.get(getOperand(1), VPLane(0)); Value *DerivedIV = emitTransformedIndex( State.Builder, CanonicalIV, getStartValue()->getLiveInIRValue(), Step, Kind, cast_if_present(FPBinOp)); DerivedIV->setName("offset.idx"); assert(DerivedIV != CanonicalIV && "IV didn't need transforming?"); - State.set(this, DerivedIV, VPIteration(0, 0)); + State.set(this, DerivedIV, VPLane(0)); } void VPReplicateRecipe::execute(VPTransformState &State) { Instruction *UI = getUnderlyingInstr(); - if (State.Instance) { // Generate a single instance. + if (State.Lane) { // Generate a single instance. assert((State.VF.isScalar() || !isUniform()) && "uniform recipe shouldn't be predicated"); assert(!State.VF.isScalable() && "Can't scalarize a scalable vector"); - State.ILV->scalarizeInstruction(UI, this, *State.Instance, State); + State.ILV->scalarizeInstruction(UI, this, *State.Lane, State); // Insert scalar instance packing it into a vector. if (State.VF.isVector() && shouldPack()) { // If we're constructing lane 0, initialize to start from poison. - if (State.Instance->Lane.isFirstLane()) { + if (State.Lane->isFirstLane()) { assert(!State.VF.isScalable() && "VF is assumed to be non scalable."); Value *Poison = PoisonValue::get( VectorType::get(UI->getType(), State.VF)); State.set(this, Poison); } - State.packScalarIntoVectorValue(this, *State.Instance); + State.packScalarIntoVectorValue(this, *State.Lane); } return; } if (IsUniform) { // Uniform within VL means we need to generate lane 0. - State.ILV->scalarizeInstruction(UI, this, VPIteration(0, 0), State); + State.ILV->scalarizeInstruction(UI, this, VPLane(0), State); return; } @@ -9465,15 +9457,15 @@ void VPReplicateRecipe::execute(VPTransformState &State) { if (isa(UI) && vputils::isUniformAfterVectorization(getOperand(1))) { auto Lane = VPLane::getLastLaneForVF(State.VF); - State.ILV->scalarizeInstruction(UI, this, VPIteration(0, Lane), State); + State.ILV->scalarizeInstruction(UI, this, VPLane(Lane), State); return; } - // Generate scalar instances for all VF lanes of all UF parts. + // Generate scalar instances for all VF lanes. assert(!State.VF.isScalable() && "Can't scalarize a scalable vector"); const unsigned EndLane = State.VF.getKnownMinValue(); for (unsigned Lane = 0; Lane < EndLane; ++Lane) - State.ILV->scalarizeInstruction(UI, this, VPIteration(0, Lane), State); + State.ILV->scalarizeInstruction(UI, this, VPLane(Lane), State); } // Determine how to lower the scalar epilogue, which depends on 1) optimising diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 7e3dbe6260983..7c3741db40e75 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -260,6 +260,20 @@ static FixedVectorType *getWidenedType(Type *ScalarTy, unsigned VF) { VF * getNumElements(ScalarTy)); } +/// Returns the number of elements of the given type \p Ty, not less than \p Sz, +/// which forms type, which splits by \p TTI into whole vector types during +/// legalization. +static unsigned getFullVectorNumberOfElements(const TargetTransformInfo &TTI, + Type *Ty, unsigned Sz) { + if (!isValidElementType(Ty)) + return bit_ceil(Sz); + // Find the number of elements, which forms full vectors. + const unsigned NumParts = TTI.getNumberOfParts(getWidenedType(Ty, Sz)); + if (NumParts == 0 || NumParts >= Sz) + return bit_ceil(Sz); + return bit_ceil(divideCeil(Sz, NumParts)) * NumParts; +} + static void transformScalarShuffleIndiciesToVector(unsigned VecTyNumElements, SmallVectorImpl &Mask) { // The ShuffleBuilder implementation use shufflevector to splat an "element". @@ -394,7 +408,7 @@ static bool isVectorLikeInstWithConstOps(Value *V) { /// total number of elements \p Size and number of registers (parts) \p /// NumParts. static unsigned getPartNumElems(unsigned Size, unsigned NumParts) { - return PowerOf2Ceil(divideCeil(Size, NumParts)); + return std::min(Size, bit_ceil(divideCeil(Size, NumParts))); } /// Returns correct remaining number of elements, considering total amount \p @@ -1222,6 +1236,22 @@ static bool doesNotNeedToSchedule(ArrayRef VL) { (all_of(VL, isUsedOutsideBlock) || all_of(VL, areAllOperandsNonInsts)); } +/// Returns true if widened type of \p Ty elements with size \p Sz represents +/// full vector type, i.e. adding extra element results in extra parts upon type +/// legalization. +static bool hasFullVectorsOrPowerOf2(const TargetTransformInfo &TTI, Type *Ty, + unsigned Sz) { + if (Sz <= 1) + return false; + if (!isValidElementType(Ty) && !isa(Ty)) + return false; + if (has_single_bit(Sz)) + return true; + const unsigned NumParts = TTI.getNumberOfParts(getWidenedType(Ty, Sz)); + return NumParts > 0 && NumParts < Sz && has_single_bit(Sz / NumParts) && + Sz % NumParts == 0; +} + namespace slpvectorizer { /// Bottom Up SLP Vectorizer. @@ -1347,6 +1377,7 @@ class BoUpSLP { } MinBWs.clear(); ReductionBitWidth = 0; + BaseGraphSize = 1; CastMaxMinBWSizes.reset(); ExtraBitWidthNodes.clear(); InstrElementSize.clear(); @@ -1355,11 +1386,10 @@ class BoUpSLP { ValueToGatherNodes.clear(); } - unsigned getTreeSize() const { - return GatheredLoadsEntriesFirst == NoGatheredLoads - ? VectorizableTree.size() - : GatheredLoadsEntriesFirst; - } + unsigned getTreeSize() const { return VectorizableTree.size(); } + + /// Returns the base graph size, before any transformations. + unsigned getCanonicalGraphSize() const { return BaseGraphSize; } /// Perform LICM and CSE on the newly generated gather sequences. void optimizeGatherSequence(); @@ -1930,30 +1960,38 @@ class BoUpSLP { /// elements in the lane, it will be vectorized with higher probability /// after removing duplicates. Currently the SLP vectorizer supports only /// vectorization of the power-of-2 number of unique scalars. - int getSplatScore(unsigned Lane, unsigned OpIdx, unsigned Idx) const { + int getSplatScore(unsigned Lane, unsigned OpIdx, unsigned Idx, + const SmallBitVector &UsedLanes) const { Value *IdxLaneV = getData(Idx, Lane).V; - if (!isa(IdxLaneV) || IdxLaneV == getData(OpIdx, Lane).V) + if (!isa(IdxLaneV) || IdxLaneV == getData(OpIdx, Lane).V || + isa(IdxLaneV)) return 0; - SmallPtrSet Uniques; - for (unsigned Ln = 0, E = getNumLanes(); Ln < E; ++Ln) { + SmallDenseMap Uniques; + for (unsigned Ln : seq(getNumLanes())) { if (Ln == Lane) continue; Value *OpIdxLnV = getData(OpIdx, Ln).V; if (!isa(OpIdxLnV)) return 0; - Uniques.insert(OpIdxLnV); + Uniques.try_emplace(OpIdxLnV, Ln); } - int UniquesCount = Uniques.size(); - int UniquesCntWithIdxLaneV = - Uniques.contains(IdxLaneV) ? UniquesCount : UniquesCount + 1; + unsigned UniquesCount = Uniques.size(); + auto IdxIt = Uniques.find(IdxLaneV); + unsigned UniquesCntWithIdxLaneV = + IdxIt != Uniques.end() ? UniquesCount : UniquesCount + 1; Value *OpIdxLaneV = getData(OpIdx, Lane).V; - int UniquesCntWithOpIdxLaneV = - Uniques.contains(OpIdxLaneV) ? UniquesCount : UniquesCount + 1; + auto OpIdxIt = Uniques.find(OpIdxLaneV); + unsigned UniquesCntWithOpIdxLaneV = + OpIdxIt != Uniques.end() ? UniquesCount : UniquesCount + 1; if (UniquesCntWithIdxLaneV == UniquesCntWithOpIdxLaneV) return 0; - return (PowerOf2Ceil(UniquesCntWithOpIdxLaneV) - - UniquesCntWithOpIdxLaneV) - - (PowerOf2Ceil(UniquesCntWithIdxLaneV) - UniquesCntWithIdxLaneV); + return std::min(bit_ceil(UniquesCntWithOpIdxLaneV) - + UniquesCntWithOpIdxLaneV, + UniquesCntWithOpIdxLaneV - + bit_floor(UniquesCntWithOpIdxLaneV)) - + ((IdxIt != Uniques.end() && UsedLanes.test(IdxIt->second)) + ? UniquesCntWithIdxLaneV - bit_floor(UniquesCntWithIdxLaneV) + : bit_ceil(UniquesCntWithIdxLaneV) - UniquesCntWithIdxLaneV); } /// \param Lane lane of the operands under analysis. @@ -1993,7 +2031,7 @@ class BoUpSLP { /// predecessors. int getLookAheadScore(Value *LHS, Value *RHS, ArrayRef MainAltOps, int Lane, unsigned OpIdx, unsigned Idx, - bool &IsUsed) { + bool &IsUsed, const SmallBitVector &UsedLanes) { LookAheadHeuristics LookAhead(TLI, DL, SE, R, getNumLanes(), LookAheadMaxDepth); // Keep track of the instruction stack as we recurse into the operands @@ -2002,11 +2040,10 @@ class BoUpSLP { LookAhead.getScoreAtLevelRec(LHS, RHS, /*U1=*/nullptr, /*U2=*/nullptr, /*CurrLevel=*/1, MainAltOps); if (Score) { - int SplatScore = getSplatScore(Lane, OpIdx, Idx); + int SplatScore = getSplatScore(Lane, OpIdx, Idx, UsedLanes); if (Score <= -SplatScore) { - // Set the minimum score for splat-like sequence to avoid setting - // failed state. - Score = 1; + // Failed score. + Score = 0; } else { Score += SplatScore; // Scale score to see the difference between different operands @@ -2036,7 +2073,8 @@ class BoUpSLP { std::optional getBestOperand(unsigned OpIdx, int Lane, int LastLane, ArrayRef ReorderingModes, - ArrayRef MainAltOps) { + ArrayRef MainAltOps, + const SmallBitVector &UsedLanes) { unsigned NumOperands = getNumOperands(); // The operand of the previous lane at OpIdx. @@ -2092,7 +2130,7 @@ class BoUpSLP { Value *OpLeft = (LeftToRight) ? OpLastLane : Op; Value *OpRight = (LeftToRight) ? Op : OpLastLane; int Score = getLookAheadScore(OpLeft, OpRight, MainAltOps, Lane, - OpIdx, Idx, IsUsed); + OpIdx, Idx, IsUsed, UsedLanes); if (Score > static_cast(BestOp.Score) || (Score > 0 && Score == static_cast(BestOp.Score) && Idx == OpIdx)) { @@ -2507,20 +2545,24 @@ class BoUpSLP { for (unsigned I = 0; I < NumOperands; ++I) MainAltOps[I].push_back(getData(I, FirstLane).V); + SmallBitVector UsedLanes(NumLanes); + UsedLanes.set(FirstLane); for (unsigned Distance = 1; Distance != NumLanes; ++Distance) { // Visit the lane on the right and then the lane on the left. for (int Direction : {+1, -1}) { int Lane = FirstLane + Direction * Distance; if (Lane < 0 || Lane >= (int)NumLanes) continue; + UsedLanes.set(Lane); int LastLane = Lane - Direction; assert(LastLane >= 0 && LastLane < (int)NumLanes && "Out of bounds"); // Look for a good match for each operand. for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx) { // Search for the operand that matches SortedOps[OpIdx][Lane-1]. - std::optional BestIdx = getBestOperand( - OpIdx, Lane, LastLane, ReorderingModes, MainAltOps[OpIdx]); + std::optional BestIdx = + getBestOperand(OpIdx, Lane, LastLane, ReorderingModes, + MainAltOps[OpIdx], UsedLanes); // By not selecting a value, we allow the operands that follow to // select a better matching value. We will get a non-null value in // the next run of getBestOperand(). @@ -3299,6 +3341,15 @@ class BoUpSLP { /// Return true if this is a non-power-of-2 node. bool isNonPowOf2Vec() const { bool IsNonPowerOf2 = !has_single_bit(Scalars.size()); + return IsNonPowerOf2; + } + + /// Return true if this is a node, which tries to vectorize number of + /// elements, forming whole vectors. + bool + hasNonWholeRegisterOrNonPowerOf2Vec(const TargetTransformInfo &TTI) const { + bool IsNonPowerOf2 = !hasFullVectorsOrPowerOf2( + TTI, getValueType(Scalars.front()), Scalars.size()); assert((!IsNonPowerOf2 || ReuseShuffleIndices.empty()) && "Reshuffling not supported with non-power-of-2 vectors yet."); return IsNonPowerOf2; @@ -3418,8 +3469,10 @@ class BoUpSLP { Last->State = EntryState; // FIXME: Remove once support for ReuseShuffleIndices has been implemented // for non-power-of-two vectors. - assert((has_single_bit(VL.size()) || ReuseShuffleIndices.empty()) && - "Reshuffling scalars not yet supported for nodes with padding"); + assert( + (hasFullVectorsOrPowerOf2(*TTI, getValueType(VL.front()), VL.size()) || + ReuseShuffleIndices.empty()) && + "Reshuffling scalars not yet supported for nodes with padding"); Last->ReuseShuffleIndices.append(ReuseShuffleIndices.begin(), ReuseShuffleIndices.end()); if (ReorderIndices.empty()) { @@ -4179,6 +4232,9 @@ class BoUpSLP { /// reduction. unsigned ReductionBitWidth = 0; + /// Canonical graph size before the transformations. + unsigned BaseGraphSize = 1; + /// If the tree contains any zext/sext/trunc nodes, contains max-min pair of /// type sizes, used in the tree. std::optional> CastMaxMinBWSizes; @@ -5254,7 +5310,7 @@ BoUpSLP::getReorderingData(const TreeEntry &TE, bool TopToBottom) { // node. if (!TE.ReuseShuffleIndices.empty()) { // FIXME: Support ReuseShuffleIndices for non-power-of-two vectors. - assert(!TE.isNonPowOf2Vec() && + assert(!TE.hasNonWholeRegisterOrNonPowerOf2Vec(*TTI) && "Reshuffling scalars not yet supported for nodes with padding"); if (isSplat(TE.Scalars)) @@ -5494,7 +5550,7 @@ BoUpSLP::getReorderingData(const TreeEntry &TE, bool TopToBottom) { } // FIXME: Remove the non-power-of-two check once findReusedOrderedScalars // has been auditted for correctness with non-power-of-two vectors. - if (!TE.isNonPowOf2Vec()) + if (!TE.hasNonWholeRegisterOrNonPowerOf2Vec(*TTI)) if (std::optional CurrentOrder = findReusedOrderedScalars(TE)) return CurrentOrder; } @@ -5647,8 +5703,8 @@ void BoUpSLP::reorderTopToBottom() { }); // Reorder the graph nodes according to their vectorization factor. - for (unsigned VF = VectorizableTree.front()->getVectorFactor(); VF > 1; - VF = bit_ceil(VF) / 2) { + for (unsigned VF = VectorizableTree.front()->getVectorFactor(); + !VFToOrderedEntries.empty() && VF > 1; VF -= 2 - (VF & 1U)) { auto It = VFToOrderedEntries.find(VF); if (It == VFToOrderedEntries.end()) continue; @@ -5656,6 +5712,9 @@ void BoUpSLP::reorderTopToBottom() { // used order and reorder scalar elements in the nodes according to this // mostly used order. ArrayRef OrderedEntries = It->second.getArrayRef(); + // Delete VF entry upon exit. + auto Cleanup = make_scope_exit([&]() { VFToOrderedEntries.erase(It); }); + // All operands are reordered and used only in this node - propagate the // most used order to the user node. MapVector VL, unsigned Depth, UniqueValues.emplace_back(V); } size_t NumUniqueScalarValues = UniqueValues.size(); - if (NumUniqueScalarValues == VL.size()) { + bool IsFullVectors = hasFullVectorsOrPowerOf2( + *TTI, UniqueValues.front()->getType(), NumUniqueScalarValues); + if (NumUniqueScalarValues == VL.size() && + (VectorizeNonPowerOf2 || IsFullVectors)) { ReuseShuffleIndices.clear(); } else { // FIXME: Reshuffing scalars is not supported yet for non-power-of-2 ops. - if ((UserTreeIdx.UserTE && UserTreeIdx.UserTE->isNonPowOf2Vec()) || - !llvm::has_single_bit(VL.size())) { + if ((UserTreeIdx.UserTE && + UserTreeIdx.UserTE->hasNonWholeRegisterOrNonPowerOf2Vec(*TTI)) || + !has_single_bit(VL.size())) { LLVM_DEBUG(dbgs() << "SLP: Reshuffling scalars not yet supported " "for nodes with padding.\n"); newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx); return false; } LLVM_DEBUG(dbgs() << "SLP: Shuffle for reused scalars.\n"); - if (NumUniqueScalarValues <= 1 || - (UniquePositions.size() == 1 && all_of(UniqueValues, - [](Value *V) { - return isa(V) || - !isConstant(V); - })) || - !llvm::has_single_bit(NumUniqueScalarValues)) { + if (NumUniqueScalarValues <= 1 || !IsFullVectors || + (UniquePositions.size() == 1 && all_of(UniqueValues, [](Value *V) { + return isa(V) || !isConstant(V); + }))) { if (DoNotFail && UniquePositions.size() > 1 && NumUniqueScalarValues > 1 && S.MainOp->isSafeToRemove() && all_of(UniqueValues, [=](Value *V) { @@ -7540,7 +7600,9 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, areAllUsersVectorized(cast(V), UserIgnoreList); })) { - unsigned PWSz = PowerOf2Ceil(UniqueValues.size()); + // Find the number of elements, which forms full vectors. + unsigned PWSz = getFullVectorNumberOfElements( + *TTI, UniqueValues.front()->getType(), UniqueValues.size()); if (PWSz == VL.size()) { ReuseShuffleIndices.clear(); } else { @@ -8989,47 +9051,147 @@ getGEPCosts(const TargetTransformInfo &TTI, ArrayRef Ptrs, void BoUpSLP::transformNodes() { constexpr TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; + BaseGraphSize = VectorizableTree.size(); + // Operands are profitable if they are: + // 1. At least one constant + // or + // 2. Splats + // or + // 3. Results in good vectorization opportunity, i.e. may generate vector + // nodes and reduce cost of the graph. + auto CheckOperandsProfitability = [this](Instruction *I1, Instruction *I2, + const InstructionsState &S) { + SmallVector>> Candidates; + for (unsigned Op : seq(S.MainOp->getNumOperands())) + Candidates.emplace_back().emplace_back(I1->getOperand(Op), + I2->getOperand(Op)); + return all_of( + Candidates, [this](ArrayRef> Cand) { + return all_of(Cand, + [](const std::pair &P) { + return isa(P.first) || + isa(P.second) || P.first == P.second; + }) || + findBestRootPair(Cand, LookAheadHeuristics::ScoreSplatLoads); + }); + }; // The tree may grow here, so iterate over nodes, built before. - for (unsigned Idx : seq(VectorizableTree.size())) { + for (unsigned Idx : seq(BaseGraphSize)) { TreeEntry &E = *VectorizableTree[Idx]; if (E.isGather()) { ArrayRef VL = E.Scalars; const unsigned Sz = getVectorElementSize(VL.front()); unsigned MinVF = getMinVF(2 * Sz); + // Do not try partial vectorization for small nodes (<= 2), nodes with the + // same opcode and same parent block or all constants. if (VL.size() <= 2 || - (E.getOpcode() && - (E.isAltShuffle() || E.getOpcode() != Instruction::Load))) + !(!E.getOpcode() || E.getOpcode() == Instruction::Load || + E.isAltShuffle() || !allSameBlock(VL)) || + allConstant(VL) || isSplat(VL)) continue; // Try to find vectorizable sequences and transform them into a series of // insertvector instructions. unsigned StartIdx = 0; unsigned End = VL.size(); - for (unsigned VF = VL.size() / 2; VF >= MinVF; VF /= 2) { + for (unsigned VF = VL.size() / 2; VF >= MinVF; VF = bit_ceil(VF) / 2) { + SmallVector Slices; for (unsigned Cnt = StartIdx; Cnt + VF <= End; Cnt += VF) { ArrayRef Slice = VL.slice(Cnt, VF); // If any instruction is vectorized already - do not try again. - if (getTreeEntry(Slice.front()) || getTreeEntry(Slice.back())) + // Reuse the existing node, if it fully matches the slice. + if (const TreeEntry *SE = getTreeEntry(Slice.front()); + SE || getTreeEntry(Slice.back())) { + if (!SE) + continue; + if (VF != SE->getVectorFactor() || !SE->isSame(Slice)) + continue; + } + // Constant already handled effectively - skip. + if (allConstant(Slice)) continue; - InstructionsState S = getSameOpcode(Slice, *TLI); - if (!S.getOpcode() || S.isAltShuffle() || - (S.getOpcode() != Instruction::Load && - any_of(Slice, [&](Value *V) { - return !areAllUsersVectorized(cast(V), - UserIgnoreList); - }))) + // Do not try to vectorize small splats (less than vector register and + // only with the single non-undef element). + bool IsSplat = isSplat(Slice); + if (Slices.empty() || !IsSplat || + (VF <= 2 && 2 * std::clamp(TTI->getNumberOfParts(getWidenedType( + Slice.front()->getType(), VF)), + 1U, VF - 1) != + std::clamp(TTI->getNumberOfParts(getWidenedType( + Slice.front()->getType(), 2 * VF)), + 1U, 2 * VF)) || + count(Slice, Slice.front()) == + (isa(Slice.front()) ? VF - 1 : 1)) { + if (IsSplat) + continue; + InstructionsState S = getSameOpcode(Slice, *TLI); + if (!S.getOpcode() || S.isAltShuffle() || !allSameBlock(Slice)) + continue; + if (VF == 2) { + // Try to vectorize reduced values or if all users are vectorized. + // For expensive instructions extra extracts might be profitable. + if ((!UserIgnoreList || E.Idx != 0) && + TTI->getInstructionCost(cast(Slice.front()), + CostKind) < TTI::TCC_Expensive && + !all_of(Slice, [&](Value *V) { + return areAllUsersVectorized(cast(V), + UserIgnoreList); + })) + continue; + if (S.getOpcode() == Instruction::Load) { + OrdersType Order; + SmallVector PointerOps; + LoadsState Res = + canVectorizeLoads(Slice, Slice.front(), Order, PointerOps); + // Do not vectorize gathers. + if (Res == LoadsState::ScatterVectorize || + Res == LoadsState::Gather) + continue; + } else if (S.getOpcode() == Instruction::ExtractElement || + (TTI->getInstructionCost( + cast(Slice.front()), CostKind) < + TTI::TCC_Expensive && + !CheckOperandsProfitability( + cast(Slice.front()), + cast(Slice.back()), S))) { + // Do not vectorize extractelements (handled effectively + // alread). Do not vectorize non-profitable instructions (with + // low cost and non-vectorizable operands.) + continue; + } + } + } + Slices.emplace_back(Cnt); + } + auto AddCombinedNode = [&](unsigned Idx, unsigned Cnt) { + E.CombinedEntriesWithIndices.emplace_back(Idx, Cnt); + if (StartIdx == Cnt) + StartIdx = Cnt + VF; + if (End == Cnt + VF) + End = Cnt; + }; + for (unsigned Cnt : Slices) { + ArrayRef Slice = VL.slice(Cnt, VF); + // If any instruction is vectorized already - do not try again. + if (const TreeEntry *SE = getTreeEntry(Slice.front()); + SE || getTreeEntry(Slice.back())) { + if (!SE) + continue; + if (VF != SE->getVectorFactor() || !SE->isSame(Slice)) + continue; + AddCombinedNode(SE->Idx, Cnt); continue; + } unsigned PrevSize = VectorizableTree.size(); buildTree_rec(Slice, 0, EdgeInfo(&E, UINT_MAX)); if (PrevSize + 1 == VectorizableTree.size() && - VectorizableTree[PrevSize]->isGather()) { + VectorizableTree[PrevSize]->isGather() && + VectorizableTree[PrevSize]->getOpcode() != + Instruction::ExtractElement && + !isSplat(Slice)) { VectorizableTree.pop_back(); continue; } - E.CombinedEntriesWithIndices.emplace_back(PrevSize, Cnt); - if (StartIdx == Cnt) - StartIdx = Cnt + VF; - if (End == Cnt + VF) - End = Cnt; + AddCombinedNode(PrevSize, Cnt); } } } @@ -9678,9 +9840,6 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis { return nullptr; Value *VecBase = nullptr; ArrayRef VL = E->Scalars; - // If the resulting type is scalarized, do not adjust the cost. - if (NumParts == VL.size()) - return nullptr; // Check if it can be considered reused if same extractelements were // vectorized already. bool PrevNodeFound = any_of( @@ -9986,8 +10145,8 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis { } Cost += ::getShuffleCost( TTI, TTI::SK_InsertSubvector, - FixedVectorType::get(ScalarTy, CommonMask.size()), {}, CostKind, - Idx, FixedVectorType::get(ScalarTy, E->getVectorFactor())); + getWidenedType(ScalarTy, CommonMask.size()), {}, CostKind, Idx, + getWidenedType(ScalarTy, E->getVectorFactor())); if (!CommonMask.empty()) { std::iota(std::next(CommonMask.begin(), Idx), std::next(CommonMask.begin(), Idx + E->getVectorFactor()), @@ -10243,9 +10402,10 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, if (VI && SelectOnly) { assert(!Ty->isVectorTy() && "Expected only for scalar type."); auto *CI = cast(VI->getOperand(0)); - IntrinsicCost -= - TTI->getCmpSelInstrCost(CI->getOpcode(), Ty, Builder.getInt1Ty(), - CI->getPredicate(), CostKind, CI); + IntrinsicCost -= TTI->getCmpSelInstrCost( + CI->getOpcode(), Ty, Builder.getInt1Ty(), CI->getPredicate(), + CostKind, {TTI::OK_AnyValue, TTI::OP_None}, + {TTI::OK_AnyValue, TTI::OP_None}, CI); } return IntrinsicCost; }; @@ -10334,7 +10494,7 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, InsertMask[Idx] = I + 1; } unsigned VecScalarsSz = PowerOf2Ceil(NumElts); - if (NumOfParts > 0) + if (NumOfParts > 0 && NumOfParts < NumElts) VecScalarsSz = PowerOf2Ceil((NumElts + NumOfParts - 1) / NumOfParts); unsigned VecSz = (1 + OffsetEnd / VecScalarsSz - OffsetBeg / VecScalarsSz) * VecScalarsSz; @@ -10509,7 +10669,8 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, InstructionCost ScalarCost = TTI->getCmpSelInstrCost( E->getOpcode(), OrigScalarTy, Builder.getInt1Ty(), CurrentPred, - CostKind, VI); + CostKind, getOperandInfo(VI->getOperand(0)), + getOperandInfo(VI->getOperand(1)), VI); InstructionCost IntrinsicCost = GetMinMaxCost(OrigScalarTy, VI); if (IntrinsicCost.isValid()) ScalarCost = IntrinsicCost; @@ -10519,8 +10680,10 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, auto GetVectorCost = [&](InstructionCost CommonCost) { auto *MaskTy = getWidenedType(Builder.getInt1Ty(), VL.size()); - InstructionCost VecCost = TTI->getCmpSelInstrCost( - E->getOpcode(), VecTy, MaskTy, VecPred, CostKind, VL0); + InstructionCost VecCost = + TTI->getCmpSelInstrCost(E->getOpcode(), VecTy, MaskTy, VecPred, + CostKind, getOperandInfo(E->getOperand(0)), + getOperandInfo(E->getOperand(1)), VL0); if (auto *SI = dyn_cast(VL0)) { auto *CondType = getWidenedType(SI->getCondition()->getType(), VL.size()); @@ -10760,11 +10923,14 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, TTIRef.getArithmeticInstrCost(E->getAltOpcode(), VecTy, CostKind); } else if (auto *CI0 = dyn_cast(VL0)) { auto *MaskTy = getWidenedType(Builder.getInt1Ty(), VL.size()); - VecCost = TTIRef.getCmpSelInstrCost(E->getOpcode(), VecTy, MaskTy, - CI0->getPredicate(), CostKind, VL0); + VecCost = TTIRef.getCmpSelInstrCost( + E->getOpcode(), VecTy, MaskTy, CI0->getPredicate(), CostKind, + {TTI::OK_AnyValue, TTI::OP_None}, {TTI::OK_AnyValue, TTI::OP_None}, + VL0); VecCost += TTIRef.getCmpSelInstrCost( E->getOpcode(), VecTy, MaskTy, cast(E->getAltOp())->getPredicate(), CostKind, + {TTI::OK_AnyValue, TTI::OP_None}, {TTI::OK_AnyValue, TTI::OP_None}, E->getAltOp()); } else { Type *SrcSclTy = E->getMainOp()->getOperand(0)->getType(); @@ -12281,6 +12447,14 @@ BoUpSLP::isGatherShuffledEntry( "Expected only single user of the gather node."); assert(VL.size() % NumParts == 0 && "Number of scalars must be divisible by NumParts."); + if (!TE->UserTreeIndices.empty() && + TE->UserTreeIndices.front().UserTE->isGather() && + TE->UserTreeIndices.front().EdgeIdx == UINT_MAX) { + assert((TE->Idx == 0 || TE->getOpcode() == Instruction::ExtractElement || + isSplat(TE->Scalars)) && + "Expected splat or extractelements only node."); + return {}; + } unsigned SliceSize = getPartNumElems(VL.size(), NumParts); SmallVector> Res; for (unsigned Part : seq(NumParts)) { @@ -17107,7 +17281,7 @@ SLPVectorizerPass::vectorizeStoreChain(ArrayRef Chain, BoUpSLP &R, if (R.isGathered(Chain.front()) || R.isNotScheduled(cast(Chain.front())->getValueOperand())) return std::nullopt; - Size = R.getTreeSize(); + Size = R.getCanonicalGraphSize(); return false; } R.reorderTopToBottom(); @@ -17117,7 +17291,7 @@ SLPVectorizerPass::vectorizeStoreChain(ArrayRef Chain, BoUpSLP &R, R.computeMinimumValueSizes(); - Size = R.getTreeSize(); + Size = R.getCanonicalGraphSize(); if (S.getOpcode() == Instruction::Load) Size = 2; // cut off masked gather small trees InstructionCost Cost = R.getTreeCost(); @@ -17655,7 +17829,7 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef VL, BoUpSLP &R, for (unsigned I = NextInst; I < MaxInst; ++I) { unsigned ActualVF = std::min(MaxInst - I, VF); - if (!has_single_bit(ActualVF)) + if (!hasFullVectorsOrPowerOf2(*TTI, ScalarTy, ActualVF)) continue; if (MaxVFOnly && ActualVF < MaxVF) diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index ce15b2783cc45..6ddbfcf0ecfe5 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -228,28 +228,27 @@ VPTransformState::VPTransformState(ElementCount VF, unsigned UF, LoopInfo *LI, : VF(VF), CFG(DT), LI(LI), Builder(Builder), ILV(ILV), Plan(Plan), LVer(nullptr), TypeAnalysis(Plan->getCanonicalIV()->getScalarType()) {} -Value *VPTransformState::get(VPValue *Def, const VPIteration &Instance) { +Value *VPTransformState::get(VPValue *Def, const VPLane &Lane) { if (Def->isLiveIn()) return Def->getLiveInIRValue(); - if (hasScalarValue(Def, Instance)) { - return Data.VPV2Scalars[Def][Instance.Lane.mapToCacheIndex(VF)]; - } - if (!Instance.Lane.isFirstLane() && - vputils::isUniformAfterVectorization(Def) && - hasScalarValue(Def, {Instance.Part, VPLane::getFirstLane()})) { + if (hasScalarValue(Def, Lane)) + return Data.VPV2Scalars[Def][Lane.mapToCacheIndex(VF)]; + + if (!Lane.isFirstLane() && vputils::isUniformAfterVectorization(Def) && + hasScalarValue(Def, VPLane::getFirstLane())) { return Data.VPV2Scalars[Def][0]; } assert(hasVectorValue(Def)); auto *VecPart = Data.VPV2Vector[Def]; if (!VecPart->getType()->isVectorTy()) { - assert(Instance.Lane.isFirstLane() && "cannot get lane > 0 for scalar"); + assert(Lane.isFirstLane() && "cannot get lane > 0 for scalar"); return VecPart; } // TODO: Cache created scalar values. - Value *Lane = Instance.Lane.getAsRuntimeExpr(Builder, VF); - auto *Extract = Builder.CreateExtractElement(VecPart, Lane); + Value *LaneV = Lane.getAsRuntimeExpr(Builder, VF); + auto *Extract = Builder.CreateExtractElement(VecPart, LaneV); // set(Def, Extract, Instance); return Extract; } @@ -258,11 +257,11 @@ Value *VPTransformState::get(VPValue *Def, bool NeedsScalar) { if (NeedsScalar) { assert((VF.isScalar() || Def->isLiveIn() || hasVectorValue(Def) || !vputils::onlyFirstLaneUsed(Def) || - (hasScalarValue(Def, VPIteration(0, 0)) && + (hasScalarValue(Def, VPLane(0)) && Data.VPV2Scalars[Def].size() == 1)) && "Trying to access a single scalar per part but has multiple scalars " "per part."); - return get(Def, VPIteration(0, 0)); + return get(Def, VPLane(0)); } // If Values have been set for this Def return the one relevant for \p Part. @@ -289,7 +288,7 @@ Value *VPTransformState::get(VPValue *Def, bool NeedsScalar) { return Shuf; }; - if (!hasScalarValue(Def, {0, 0})) { + if (!hasScalarValue(Def, {0})) { assert(Def->isLiveIn() && "expected a live-in"); Value *IRV = Def->getLiveInIRValue(); Value *B = GetBroadcastInstrs(IRV); @@ -297,7 +296,7 @@ Value *VPTransformState::get(VPValue *Def, bool NeedsScalar) { return B; } - Value *ScalarValue = get(Def, {0, 0}); + Value *ScalarValue = get(Def, VPLane(0)); // If we aren't vectorizing, we can just copy the scalar map values over // to the vector map. if (VF.isScalar()) { @@ -307,9 +306,9 @@ Value *VPTransformState::get(VPValue *Def, bool NeedsScalar) { bool IsUniform = vputils::isUniformAfterVectorization(Def); - unsigned LastLane = IsUniform ? 0 : VF.getKnownMinValue() - 1; + VPLane LastLane(IsUniform ? 0 : VF.getKnownMinValue() - 1); // Check if there is a scalar value for the selected lane. - if (!hasScalarValue(Def, {0, LastLane})) { + if (!hasScalarValue(Def, LastLane)) { // At the moment, VPWidenIntOrFpInductionRecipes, VPScalarIVStepsRecipes and // VPExpandSCEVRecipes can also be uniform. assert((isa(Def->getDefiningRecipe()) || @@ -320,7 +319,7 @@ Value *VPTransformState::get(VPValue *Def, bool NeedsScalar) { LastLane = 0; } - auto *LastInst = cast(get(Def, {0, LastLane})); + auto *LastInst = cast(get(Def, LastLane)); // Set the insert point after the last scalarized instruction or after the // last PHI, if LastInst is a PHI. This ensures the insertelement sequence // will directly follow the scalar definitions. @@ -333,10 +332,10 @@ Value *VPTransformState::get(VPValue *Def, bool NeedsScalar) { // However, if we are vectorizing, we need to construct the vector values. // If the value is known to be uniform after vectorization, we can just - // broadcast the scalar value corresponding to lane zero for each unroll - // iteration. Otherwise, we construct the vector values using - // insertelement instructions. Since the resulting vectors are stored in - // State, we will only generate the insertelements once. + // broadcast the scalar value corresponding to lane zero. Otherwise, we + // construct the vector values using insertelement instructions. Since the + // resulting vectors are stored in State, we will only generate the + // insertelements once. Value *VectorValue = nullptr; if (IsUniform) { VectorValue = GetBroadcastInstrs(ScalarValue); @@ -347,7 +346,7 @@ Value *VPTransformState::get(VPValue *Def, bool NeedsScalar) { Value *Undef = PoisonValue::get(VectorType::get(LastInst->getType(), VF)); set(Def, Undef); for (unsigned Lane = 0; Lane < VF.getKnownMinValue(); ++Lane) - packScalarIntoVectorValue(Def, {0, Lane}); + packScalarIntoVectorValue(Def, Lane); VectorValue = get(Def); } Builder.restoreIP(OldIP); @@ -401,11 +400,11 @@ void VPTransformState::setDebugLocFrom(DebugLoc DL) { } void VPTransformState::packScalarIntoVectorValue(VPValue *Def, - const VPIteration &Instance) { - Value *ScalarInst = get(Def, Instance); + const VPLane &Lane) { + Value *ScalarInst = get(Def, Lane); Value *VectorValue = get(Def); - VectorValue = Builder.CreateInsertElement( - VectorValue, ScalarInst, Instance.Lane.getAsRuntimeExpr(Builder, VF)); + VectorValue = Builder.CreateInsertElement(VectorValue, ScalarInst, + Lane.getAsRuntimeExpr(Builder, VF)); set(Def, VectorValue); } @@ -483,7 +482,7 @@ void VPIRBasicBlock::execute(VPTransformState *State) { } void VPBasicBlock::execute(VPTransformState *State) { - bool Replica = State->Instance && !State->Instance->isFirstIteration(); + bool Replica = bool(State->Lane); VPBasicBlock *PrevVPBB = State->CFG.PrevVPBB; VPBlockBase *SingleHPred = nullptr; BasicBlock *NewBB = State->CFG.PrevBB; // Reuse it if possible. @@ -765,23 +764,23 @@ void VPRegionBlock::execute(VPTransformState *State) { return; } - assert(!State->Instance && "Replicating a Region with non-null instance."); + assert(!State->Lane && "Replicating a Region with non-null instance."); // Enter replicating mode. - State->Instance = VPIteration(0, 0); - assert(!State->VF.isScalable() && "VF is assumed to be non scalable."); - for (unsigned Lane = 0, VF = State->VF.getKnownMinValue(); Lane < VF; - ++Lane) { - State->Instance->Lane = VPLane(Lane, VPLane::Kind::First); - // Visit the VPBlocks connected to \p this, starting from it. - for (VPBlockBase *Block : RPOT) { - LLVM_DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n'); - Block->execute(State); - } + assert(!State->VF.isScalable() && "VF is assumed to be non scalable."); + State->Lane = VPLane(0); + for (unsigned Lane = 0, VF = State->VF.getKnownMinValue(); Lane < VF; + ++Lane) { + State->Lane = VPLane(Lane, VPLane::Kind::First); + // Visit the VPBlocks connected to \p this, starting from it. + for (VPBlockBase *Block : RPOT) { + LLVM_DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n'); + Block->execute(State); + } } // Exit replicating mode. - State->Instance.reset(); + State->Lane.reset(); } InstructionCost VPBasicBlock::cost(ElementCount VF, VPCostContext &Ctx) { diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 0632495bc511c..bbcfaf9e19cd0 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -172,6 +172,7 @@ class VPLane { Kind LaneKind; public: + VPLane(unsigned Lane) : Lane(Lane), LaneKind(VPLane::Kind::First) {} VPLane(unsigned Lane, Kind LaneKind) : Lane(Lane), LaneKind(LaneKind) {} static VPLane getFirstLane() { return VPLane(0, VPLane::Kind::First); } @@ -230,23 +231,6 @@ class VPLane { } }; -/// VPIteration represents a single point in the iteration space of the output -/// (vectorized and/or unrolled) IR loop. -struct VPIteration { - /// in [0..UF) - unsigned Part; - - VPLane Lane; - - VPIteration(unsigned Part, unsigned Lane, - VPLane::Kind Kind = VPLane::Kind::First) - : Part(Part), Lane(Lane, Kind) {} - - VPIteration(unsigned Part, const VPLane &Lane) : Part(Part), Lane(Lane) {} - - bool isFirstIteration() const { return Part == 0 && Lane.isFirstLane(); } -}; - /// VPTransformState holds information passed down when "executing" a VPlan, /// needed for generating the output IR. struct VPTransformState { @@ -254,13 +238,13 @@ struct VPTransformState { DominatorTree *DT, IRBuilderBase &Builder, InnerLoopVectorizer *ILV, VPlan *Plan); - /// The chosen Vectorization and Unroll Factors of the loop being vectorized. + /// The chosen Vectorization Factor of the loop being vectorized. ElementCount VF; - /// Hold the indices to generate specific scalar instructions. Null indicates + /// Hold the index to generate specific scalar instructions. Null indicates /// that all instances are to be generated, using either scalar or vector /// instructions. - std::optional Instance; + std::optional Lane; struct DataState { // Each value from the original loop, when vectorized, is represented by a @@ -275,15 +259,15 @@ struct VPTransformState { Value *get(VPValue *Def, bool IsScalar = false); /// Get the generated Value for a given VPValue and given Part and Lane. - Value *get(VPValue *Def, const VPIteration &Instance); + Value *get(VPValue *Def, const VPLane &Lane); bool hasVectorValue(VPValue *Def) { return Data.VPV2Vector.contains(Def); } - bool hasScalarValue(VPValue *Def, VPIteration Instance) { + bool hasScalarValue(VPValue *Def, VPLane Lane) { auto I = Data.VPV2Scalars.find(Def); if (I == Data.VPV2Scalars.end()) return false; - unsigned CacheIdx = Instance.Lane.mapToCacheIndex(VF); + unsigned CacheIdx = Lane.mapToCacheIndex(VF); return CacheIdx < I->second.size() && I->second[CacheIdx]; } @@ -291,7 +275,7 @@ struct VPTransformState { /// IsScalar is false. If \p IsScalar is true, set the scalar in lane 0. void set(VPValue *Def, Value *V, bool IsScalar = false) { if (IsScalar) { - set(Def, V, VPIteration(0, 0)); + set(Def, V, VPLane(0)); return; } assert((VF.isScalar() || V->getType()->isVectorTy()) && @@ -305,23 +289,23 @@ struct VPTransformState { Data.VPV2Vector[Def] = V; } - /// Set the generated scalar \p V for \p Def and the given \p Instance. - void set(VPValue *Def, Value *V, const VPIteration &Instance) { + /// Set the generated scalar \p V for \p Def and the given \p Lane. + void set(VPValue *Def, Value *V, const VPLane &Lane) { auto Iter = Data.VPV2Scalars.insert({Def, {}}); auto &Scalars = Iter.first->second; - unsigned CacheIdx = Instance.Lane.mapToCacheIndex(VF); + unsigned CacheIdx = Lane.mapToCacheIndex(VF); if (Scalars.size() <= CacheIdx) Scalars.resize(CacheIdx + 1); assert(!Scalars[CacheIdx] && "should overwrite existing value"); Scalars[CacheIdx] = V; } - /// Reset an existing scalar value for \p Def and a given \p Instance. - void reset(VPValue *Def, Value *V, const VPIteration &Instance) { + /// Reset an existing scalar value for \p Def and a given \p Lane. + void reset(VPValue *Def, Value *V, const VPLane &Lane) { auto Iter = Data.VPV2Scalars.find(Def); assert(Iter != Data.VPV2Scalars.end() && "need to overwrite existing value"); - unsigned CacheIdx = Instance.Lane.mapToCacheIndex(VF); + unsigned CacheIdx = Lane.mapToCacheIndex(VF); assert(CacheIdx < Iter->second.size() && "need to overwrite existing value"); Iter->second[CacheIdx] = V; @@ -345,7 +329,7 @@ struct VPTransformState { void setDebugLocFrom(DebugLoc DL); /// Construct the vector value of a scalarized value \p V one lane at a time. - void packScalarIntoVectorValue(VPValue *Def, const VPIteration &Instance); + void packScalarIntoVectorValue(VPValue *Def, const VPLane &Lane); /// Hold state information used when constructing the CFG of the output IR, /// traversing the VPBasicBlocks and generating corresponding IR BasicBlocks. @@ -1253,9 +1237,7 @@ class VPInstruction : public VPRecipeWithIRFlags, ComputeReductionResult, // Takes the VPValue to extract from as first operand and the lane or part // to extract as second operand, counting from the end starting with 1 for - // last. The second operand must be a positive constant and <= VF when - // extracting from a vector or <= UF when extracting from an unrolled - // scalar. + // last. The second operand must be a positive constant and <= VF. ExtractFromEnd, LogicalAnd, // Non-poison propagating logical And. // Add an offset in bytes (second operand) to a base pointer (first @@ -1291,7 +1273,7 @@ class VPInstruction : public VPRecipeWithIRFlags, /// Utility methods serving execute(): generates a scalar single instance of /// the modeled instruction for a given lane. \returns the scalar generated /// value for lane \p Lane. - Value *generatePerLane(VPTransformState &State, const VPIteration &Lane); + Value *generatePerLane(VPTransformState &State, const VPLane &Lane); #if !defined(NDEBUG) /// Return true if the VPInstruction is a floating point math operation, i.e. diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 318d6a8c5b8c3..dacba152611c1 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -209,7 +209,7 @@ void VPLiveOut::fixPhi(VPlan &Plan, VPTransformState &State) { ? MiddleVPBB : ExitingVPBB; BasicBlock *PredBB = State.CFG.VPBB2IRBB[PredVPBB]; - Value *V = State.get(ExitValue, VPIteration(0, 0)); + Value *V = State.get(ExitValue, VPLane(0)); if (Phi->getBasicBlockIndex(PredBB) != -1) Phi->setIncomingValueForBlock(PredBB, V); else @@ -390,7 +390,7 @@ bool VPInstruction::canGenerateScalarForFirstLane() const { } Value *VPInstruction::generatePerLane(VPTransformState &State, - const VPIteration &Lane) { + const VPLane &Lane) { IRBuilderBase &Builder = State.Builder; assert(getOpcode() == VPInstruction::PtrAdd && @@ -432,9 +432,9 @@ Value *VPInstruction::generate(VPTransformState &State) { } case VPInstruction::ActiveLaneMask: { // Get first lane of vector induction variable. - Value *VIVElem0 = State.get(getOperand(0), VPIteration(0, 0)); + Value *VIVElem0 = State.get(getOperand(0), VPLane(0)); // Get the original loop tripcount. - Value *ScalarTC = State.get(getOperand(1), VPIteration(0, 0)); + Value *ScalarTC = State.get(getOperand(1), VPLane(0)); // If this part of the active lane mask is scalar, generate the CMP directly // to avoid unnecessary extracts. @@ -469,7 +469,7 @@ Value *VPInstruction::generate(VPTransformState &State) { } case VPInstruction::CalculateTripCountMinusVF: { unsigned UF = getParent()->getPlan()->getUF(); - Value *ScalarTC = State.get(getOperand(0), {0, 0}); + Value *ScalarTC = State.get(getOperand(0), VPLane(0)); Value *Step = createStepForVF(Builder, ScalarTC->getType(), State.VF, UF); Value *Sub = Builder.CreateSub(ScalarTC, Step); Value *Cmp = Builder.CreateICmp(CmpInst::Predicate::ICMP_UGT, ScalarTC, Step); @@ -477,32 +477,24 @@ Value *VPInstruction::generate(VPTransformState &State) { return Builder.CreateSelect(Cmp, Sub, Zero); } case VPInstruction::ExplicitVectorLength: { - // Compute EVL - auto GetEVL = [=](VPTransformState &State, Value *AVL) { - assert(AVL->getType()->isIntegerTy() && - "Requested vector length should be an integer."); - - // TODO: Add support for MaxSafeDist for correct loop emission. - assert(State.VF.isScalable() && "Expected scalable vector factor."); - Value *VFArg = State.Builder.getInt32(State.VF.getKnownMinValue()); - - Value *EVL = State.Builder.CreateIntrinsic( - State.Builder.getInt32Ty(), Intrinsic::experimental_get_vector_length, - {AVL, VFArg, State.Builder.getTrue()}); - return EVL; - }; // TODO: Restructure this code with an explicit remainder loop, vsetvli can // be outside of the main loop. - // Compute VTC - IV as the AVL (requested vector length). - Value *Index = State.get(getOperand(0), VPIteration(0, 0)); - Value *TripCount = State.get(getOperand(1), VPIteration(0, 0)); - Value *AVL = State.Builder.CreateSub(TripCount, Index); - Value *EVL = GetEVL(State, AVL); + Value *AVL = State.get(getOperand(0), /*IsScalar*/ true); + // Compute EVL + assert(AVL->getType()->isIntegerTy() && + "Requested vector length should be an integer."); + + assert(State.VF.isScalable() && "Expected scalable vector factor."); + Value *VFArg = State.Builder.getInt32(State.VF.getKnownMinValue()); + + Value *EVL = State.Builder.CreateIntrinsic( + State.Builder.getInt32Ty(), Intrinsic::experimental_get_vector_length, + {AVL, VFArg, State.Builder.getTrue()}); return EVL; } case VPInstruction::CanonicalIVIncrementForPart: { unsigned Part = getUnrollPart(*this); - auto *IV = State.get(getOperand(0), VPIteration(0, 0)); + auto *IV = State.get(getOperand(0), VPLane(0)); assert(Part != 0 && "Must have a positive part"); // The canonical IV is incremented by the vectorization factor (num of // SIMD elements) times the unroll part. @@ -511,7 +503,7 @@ Value *VPInstruction::generate(VPTransformState &State) { hasNoSignedWrap()); } case VPInstruction::BranchOnCond: { - Value *Cond = State.get(getOperand(0), VPIteration(0, 0)); + Value *Cond = State.get(getOperand(0), VPLane(0)); // Replace the temporary unreachable terminator with a new conditional // branch, hooking it up to backward destination for exiting blocks now and // to forward destination(s) later when they are created. @@ -633,8 +625,7 @@ Value *VPInstruction::generate(VPTransformState &State) { assert(Offset <= State.VF.getKnownMinValue() && "invalid offset to extract from"); // Extract lane VF - Offset from the operand. - Res = State.get(getOperand(0), - VPIteration(0, VPLane::getLaneFromEnd(State.VF, Offset))); + Res = State.get(getOperand(0), VPLane::getLaneFromEnd(State.VF, Offset)); } else { assert(Offset <= 1 && "invalid offset to extract from"); Res = State.get(getOperand(0)); @@ -700,7 +691,7 @@ bool VPInstruction::isFPMathOp() const { #endif void VPInstruction::execute(VPTransformState &State) { - assert(!State.Instance && "VPInstruction executing an Instance"); + assert(!State.Lane && "VPInstruction executing an Lane"); IRBuilderBase::FastMathFlagGuard FMFGuard(State.Builder); assert((hasFastMathFlags() == isFPMathOp() || getOpcode() == Instruction::Select) && @@ -715,9 +706,9 @@ void VPInstruction::execute(VPTransformState &State) { if (GeneratesPerAllLanes) { for (unsigned Lane = 0, NumLanes = State.VF.getKnownMinValue(); Lane != NumLanes; ++Lane) { - Value *GeneratedValue = generatePerLane(State, VPIteration(0, Lane)); + Value *GeneratedValue = generatePerLane(State, VPLane(Lane)); assert(GeneratedValue && "generatePerLane must produce a value"); - State.set(this, GeneratedValue, VPIteration(0, Lane)); + State.set(this, GeneratedValue, VPLane(Lane)); } return; } @@ -865,7 +856,7 @@ void VPIRInstruction::execute(VPTransformState &State) { // Set insertion point in PredBB in case an extract needs to be generated. // TODO: Model extracts explicitly. State.Builder.SetInsertPoint(PredBB, PredBB->getFirstNonPHIIt()); - Value *V = State.get(ExitValue, VPIteration(0, Lane)); + Value *V = State.get(ExitValue, VPLane(Lane)); auto *Phi = cast(&I); Phi->addIncoming(V, PredBB); } @@ -913,12 +904,12 @@ void VPWidenCallRecipe::execute(VPTransformState &State) { Value *Arg; if (UseIntrinsic && isVectorIntrinsicWithScalarOpAtArg(VectorIntrinsicID, I.index())) - Arg = State.get(I.value(), VPIteration(0, 0)); + Arg = State.get(I.value(), VPLane(0)); // Some vectorized function variants may also take a scalar argument, // e.g. linear parameters for pointers. This needs to be the scalar value // from the start of the respective part when interleaving. else if (VFTy && !VFTy->getParamType(I.index())->isVectorTy()) - Arg = State.get(I.value(), VPIteration(0, 0)); + Arg = State.get(I.value(), VPLane(0)); else Arg = State.get(I.value()); if (UseIntrinsic && @@ -1053,7 +1044,7 @@ void VPWidenSelectRecipe::execute(VPTransformState &State) { // We have to take the 'vectorized' value and pick the first lane. // Instcombine will make this a no-op. auto *InvarCond = - isInvariantCond() ? State.get(getCond(), VPIteration(0, 0)) : nullptr; + isInvariantCond() ? State.get(getCond(), VPLane(0)) : nullptr; Value *Cond = InvarCond ? InvarCond : State.get(getCond()); Value *Op0 = State.get(getOperand(1)); @@ -1267,7 +1258,9 @@ InstructionCost VPWidenRecipe::computeCost(ElementCount VF, Instruction *CtxI = dyn_cast_or_null(getUnderlyingValue()); Type *VectorTy = ToVectorTy(Ctx.Types.inferScalarType(getOperand(0)), VF); return Ctx.TTI.getCmpSelInstrCost(Opcode, VectorTy, nullptr, getPredicate(), - CostKind, CtxI); + CostKind, + {TTI::OK_AnyValue, TTI::OP_None}, + {TTI::OK_AnyValue, TTI::OP_None}, CtxI); } default: llvm_unreachable("Unsupported opcode for instruction"); @@ -1416,7 +1409,7 @@ static Constant *getSignedIntOrFpConstant(Type *Ty, int64_t C) { } void VPWidenIntOrFpInductionRecipe::execute(VPTransformState &State) { - assert(!State.Instance && "Int or FP induction being replicated."); + assert(!State.Lane && "Int or FP induction being replicated."); Value *Start = getStartValue()->getLiveInIRValue(); const InductionDescriptor &ID = getInductionDescriptor(); @@ -1435,7 +1428,7 @@ void VPWidenIntOrFpInductionRecipe::execute(VPTransformState &State) { Builder.setFastMathFlags(ID.getInductionBinOp()->getFastMathFlags()); // Now do the actual transformations, and start with fetching the step value. - Value *Step = State.get(getStepValue(), VPIteration(0, 0)); + Value *Step = State.get(getStepValue(), VPLane(0)); assert((isa(EntryVal) || isa(EntryVal)) && "Expected either an induction phi-node or a truncate of it!"); @@ -1478,7 +1471,7 @@ void VPWidenIntOrFpInductionRecipe::execute(VPTransformState &State) { // Multiply the vectorization factor by the step using integer or // floating-point arithmetic as appropriate. Type *StepType = Step->getType(); - Value *RuntimeVF = State.get(getVFValue(), {0, 0}); + Value *RuntimeVF = State.get(getVFValue(), VPLane(0)); if (Step->getType()->isFloatingPointTy()) RuntimeVF = Builder.CreateUIToFP(RuntimeVF, StepType); else @@ -1575,8 +1568,8 @@ void VPScalarIVStepsRecipe::execute(VPTransformState &State) { /// Compute scalar induction steps. \p ScalarIV is the scalar induction /// variable on which to base the steps, \p Step is the size of the step. - Value *BaseIV = State.get(getOperand(0), VPIteration(0, 0)); - Value *Step = State.get(getStepValue(), VPIteration(0, 0)); + Value *BaseIV = State.get(getOperand(0), VPLane(0)); + Value *Step = State.get(getStepValue(), VPLane(0)); IRBuilderBase &Builder = State.Builder; // Ensure step has the same type as that of scalar IV. @@ -1613,8 +1606,8 @@ void VPScalarIVStepsRecipe::execute(VPTransformState &State) { unsigned StartLane = 0; unsigned EndLane = FirstLaneOnly ? 1 : State.VF.getKnownMinValue(); - if (State.Instance) { - StartLane = State.Instance->Lane.getKnownLane(); + if (State.Lane) { + StartLane = State.Lane->getKnownLane(); EndLane = StartLane + 1; } Value *StartIdx0 = @@ -1646,7 +1639,7 @@ void VPScalarIVStepsRecipe::execute(VPTransformState &State) { "scalable"); auto *Mul = Builder.CreateBinOp(MulOp, StartIdx, Step); auto *Add = Builder.CreateBinOp(AddOp, BaseIV, Mul); - State.set(this, Add, VPIteration(0, Lane)); + State.set(this, Add, VPLane(Lane)); } } @@ -1684,7 +1677,7 @@ void VPWidenGEPRecipe::execute(VPTransformState &State) { // the lane-zero scalar value. SmallVector Ops; for (unsigned I = 0, E = getNumOperands(); I != E; I++) - Ops.push_back(State.get(getOperand(I), VPIteration(0, 0))); + Ops.push_back(State.get(getOperand(I), VPLane(0))); auto *NewGEP = State.Builder.CreateGEP(GEP->getSourceElementType(), Ops[0], @@ -1697,9 +1690,8 @@ void VPWidenGEPRecipe::execute(VPTransformState &State) { // produce a vector of pointers unless VF is scalar. // The pointer operand of the new GEP. If it's loop-invariant, we // won't broadcast it. - auto *Ptr = isPointerLoopInvariant() - ? State.get(getOperand(0), VPIteration(0, 0)) - : State.get(getOperand(0)); + auto *Ptr = isPointerLoopInvariant() ? State.get(getOperand(0), VPLane(0)) + : State.get(getOperand(0)); // Collect all the indices for the new GEP. If any index is // loop-invariant, we won't broadcast it. @@ -1707,7 +1699,7 @@ void VPWidenGEPRecipe::execute(VPTransformState &State) { for (unsigned I = 1, E = getNumOperands(); I < E; I++) { VPValue *Operand = getOperand(I); if (isIndexLoopInvariant(I - 1)) - Indices.push_back(State.get(Operand, VPIteration(0, 0))); + Indices.push_back(State.get(Operand, VPLane(0))); else Indices.push_back(State.get(Operand)); } @@ -1749,7 +1741,7 @@ void VPVectorPointerRecipe ::execute(VPTransformState &State) { Type *IndexTy = State.VF.isScalable() && (IsReverse || CurrentPart > 0) ? DL.getIndexType(IndexedTy->getPointerTo()) : Builder.getInt32Ty(); - Value *Ptr = State.get(getOperand(0), VPIteration(0, 0)); + Value *Ptr = State.get(getOperand(0), VPLane(0)); bool InBounds = isInBounds(); Value *ResultPtr = nullptr; @@ -1850,7 +1842,7 @@ void VPBlendRecipe::print(raw_ostream &O, const Twine &Indent, #endif void VPReductionRecipe::execute(VPTransformState &State) { - assert(!State.Instance && "Reduction being replicated."); + assert(!State.Lane && "Reduction being replicated."); Value *PrevInChain = State.get(getChainOp(), /*IsScalar*/ true); RecurKind Kind = RdxDesc.getRecurrenceKind(); // Propagate the fast-math flags carried by the underlying instruction. @@ -1900,7 +1892,7 @@ void VPReductionRecipe::execute(VPTransformState &State) { } void VPReductionEVLRecipe::execute(VPTransformState &State) { - assert(!State.Instance && "Reduction being replicated."); + assert(!State.Lane && "Reduction being replicated."); auto &Builder = State.Builder; // Propagate the fast-math flags carried by the underlying instruction. @@ -1911,7 +1903,7 @@ void VPReductionEVLRecipe::execute(VPTransformState &State) { RecurKind Kind = RdxDesc.getRecurrenceKind(); Value *Prev = State.get(getChainOp(), /*IsScalar*/ true); Value *VecOp = State.get(getVecOp()); - Value *EVL = State.get(getEVL(), VPIteration(0, 0)); + Value *EVL = State.get(getEVL(), VPLane(0)); VectorBuilder VBuilder(Builder); VBuilder.setEVL(EVL); @@ -2033,7 +2025,7 @@ Value *VPScalarCastRecipe ::generate(VPTransformState &State) { case Instruction::ZExt: case Instruction::Trunc: { // Note: SExt/ZExt not used yet. - Value *Op = State.get(getOperand(0), VPIteration(0, 0)); + Value *Op = State.get(getOperand(0), VPLane(0)); return State.Builder.CreateCast(Instruction::CastOps(Opcode), Op, ResultTy); } default: @@ -2042,7 +2034,7 @@ Value *VPScalarCastRecipe ::generate(VPTransformState &State) { } void VPScalarCastRecipe ::execute(VPTransformState &State) { - State.set(this, generate(State), VPIteration(0, 0)); + State.set(this, generate(State), VPLane(0)); } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) @@ -2057,9 +2049,9 @@ void VPScalarCastRecipe ::print(raw_ostream &O, const Twine &Indent, #endif void VPBranchOnMaskRecipe::execute(VPTransformState &State) { - assert(State.Instance && "Branch on Mask works only on single instance."); + assert(State.Lane && "Branch on Mask works only on single instance."); - unsigned Lane = State.Instance->Lane.getKnownLane(); + unsigned Lane = State.Lane->getKnownLane(); Value *ConditionBit = nullptr; VPValue *BlockInMask = getMask(); @@ -2082,9 +2074,9 @@ void VPBranchOnMaskRecipe::execute(VPTransformState &State) { } void VPPredInstPHIRecipe::execute(VPTransformState &State) { - assert(State.Instance && "Predicated instruction PHI works per instance."); + assert(State.Lane && "Predicated instruction PHI works per instance."); Instruction *ScalarPredInst = - cast(State.get(getOperand(0), *State.Instance)); + cast(State.get(getOperand(0), *State.Lane)); BasicBlock *PredicatedBB = ScalarPredInst->getParent(); BasicBlock *PredicatingBB = PredicatedBB->getSinglePredecessor(); assert(PredicatingBB && "Predicated block has no single predecessor."); @@ -2116,13 +2108,13 @@ void VPPredInstPHIRecipe::execute(VPTransformState &State) { Phi->addIncoming(PoisonValue::get(ScalarPredInst->getType()), PredicatingBB); Phi->addIncoming(ScalarPredInst, PredicatedBB); - if (State.hasScalarValue(this, *State.Instance)) - State.reset(this, Phi, *State.Instance); + if (State.hasScalarValue(this, *State.Lane)) + State.reset(this, Phi, *State.Lane); else - State.set(this, Phi, *State.Instance); + State.set(this, Phi, *State.Lane); // NOTE: Currently we need to update the value of the operand, so the next // predicated iteration inserts its generated value in the correct vector. - State.reset(getOperand(0), Phi, *State.Instance); + State.reset(getOperand(0), Phi, *State.Lane); } } @@ -2245,7 +2237,7 @@ void VPWidenLoadEVLRecipe::execute(VPTransformState &State) { auto &Builder = State.Builder; State.setDebugLocFrom(getDebugLoc()); CallInst *NewLI; - Value *EVL = State.get(getEVL(), VPIteration(0, 0)); + Value *EVL = State.get(getEVL(), VPLane(0)); Value *Addr = State.get(getAddr(), !CreateGather); Value *Mask = nullptr; if (VPValue *VPMask = getMask()) { @@ -2343,7 +2335,7 @@ void VPWidenStoreEVLRecipe::execute(VPTransformState &State) { CallInst *NewSI = nullptr; Value *StoredVal = State.get(StoredValue); - Value *EVL = State.get(getEVL(), VPIteration(0, 0)); + Value *EVL = State.get(getEVL(), VPLane(0)); if (isReverse()) StoredVal = createReverseEVL(Builder, StoredVal, EVL, "vp.reverse"); Value *Mask = nullptr; @@ -2469,7 +2461,7 @@ static Value *interleaveVectors(IRBuilderBase &Builder, ArrayRef Vals, // <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11> ; Interleave R,G,B elements // store <12 x i32> %interleaved.vec ; Write 4 tuples of R,G,B void VPInterleaveRecipe::execute(VPTransformState &State) { - assert(!State.Instance && "Interleave group being replicated."); + assert(!State.Lane && "Interleave group being replicated."); const InterleaveGroup *Group = IG; Instruction *Instr = Group->getInsertPos(); @@ -2490,9 +2482,7 @@ void VPInterleaveRecipe::execute(VPTransformState &State) { // If the group is reverse, adjust the index to refer to the last vector lane // instead of the first. We adjust the index from the first vector lane, // rather than directly getting the pointer for lane VF - 1, because the - // pointer operand of the interleaved access is supposed to be uniform. For - // uniform instructions, we're only required to generate a value for the - // first vector lane in each unroll iteration. + // pointer operand of the interleaved access is supposed to be uniform. if (Group->isReverse()) { Value *RuntimeVF = getRuntimeVF(State.Builder, State.Builder.getInt32Ty(), State.VF); @@ -2505,7 +2495,7 @@ void VPInterleaveRecipe::execute(VPTransformState &State) { Idx = State.Builder.getInt32(-Index); VPValue *Addr = getAddr(); - Value *ResAddr = State.get(Addr, VPIteration(0, 0)); + Value *ResAddr = State.get(Addr, VPLane(0)); if (auto *I = dyn_cast(ResAddr)) State.setDebugLocFrom(I->getDebugLoc()); @@ -2805,7 +2795,7 @@ void VPWidenPointerInductionRecipe::execute(VPTransformState &State) { // A pointer induction, performed by using a gep BasicBlock::iterator InductionLoc = State.Builder.GetInsertPoint(); - Value *ScalarStepValue = State.get(getOperand(1), VPIteration(0, 0)); + Value *ScalarStepValue = State.get(getOperand(1), VPLane(0)); Type *PhiType = IndDesc.getStep()->getType(); Value *RuntimeVF = getRuntimeVF(State.Builder, PhiType, State.VF); // Add induction update using an incorrect block temporarily. The phi node @@ -2839,7 +2829,7 @@ void VPWidenPointerInductionRecipe::execute(VPTransformState &State) { StartOffset = State.Builder.CreateAdd( StartOffset, State.Builder.CreateStepVector(VecPhiType)); - assert(ScalarStepValue == State.get(getOperand(1), VPIteration(0, 0)) && + assert(ScalarStepValue == State.get(getOperand(1), VPLane(0)) && "scalar step must be the same across all parts"); Value *GEP = State.Builder.CreateGEP( State.Builder.getInt8Ty(), NewPointerPhi, @@ -2869,7 +2859,7 @@ void VPWidenPointerInductionRecipe::print(raw_ostream &O, const Twine &Indent, #endif void VPExpandSCEVRecipe::execute(VPTransformState &State) { - assert(!State.Instance && "cannot be used in per-lane"); + assert(!State.Lane && "cannot be used in per-lane"); const DataLayout &DL = State.CFG.PrevBB->getDataLayout(); SCEVExpander Exp(SE, DL, "induction"); @@ -2878,7 +2868,7 @@ void VPExpandSCEVRecipe::execute(VPTransformState &State) { assert(!State.ExpandedSCEVs.contains(Expr) && "Same SCEV expanded multiple times"); State.ExpandedSCEVs[Expr] = Res; - State.set(this, Res, {0, 0}); + State.set(this, Res, VPLane(0)); } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) @@ -3087,7 +3077,7 @@ void VPActiveLaneMaskPHIRecipe::print(raw_ostream &O, const Twine &Indent, void VPEVLBasedIVPHIRecipe::execute(VPTransformState &State) { BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this); - Value *Start = State.get(getOperand(0), VPIteration(0, 0)); + Value *Start = State.get(getOperand(0), VPLane(0)); PHINode *Phi = State.Builder.CreatePHI(Start->getType(), 2, "evl.based.iv"); Phi->addIncoming(Start, VectorPH); Phi->setDebugLoc(getDebugLoc()); diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index b2893e8328722..a878613c4ba48 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -593,12 +593,10 @@ static void legalizeAndOptimizeInductions(VPlan &Plan) { Plan, InductionDescriptor::IK_IntInduction, Instruction::Add, nullptr, nullptr, StartV, StepV, Builder); - auto *Recipe = new VPInstruction(VPInstruction::PtrAdd, - {PtrIV->getStartValue(), Steps}, - PtrIV->getDebugLoc(), "next.gep"); + VPValue *PtrAdd = Builder.createPtrAdd(PtrIV->getStartValue(), Steps, + PtrIV->getDebugLoc(), "next.gep"); - Recipe->insertAfter(Steps); - PtrIV->replaceAllUsesWith(Recipe); + PtrIV->replaceAllUsesWith(PtrAdd); continue; } @@ -1425,7 +1423,8 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) { /// ... /// %EVLPhi = EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI [ %StartV, %vector.ph ], /// [ %NextEVLIV, %vector.body ] -/// %VPEVL = EXPLICIT-VECTOR-LENGTH %EVLPhi, original TC +/// %AVL = sub original TC, %EVLPhi +/// %VPEVL = EXPLICIT-VECTOR-LENGTH %AVL /// ... /// %NextEVLIV = add IVSize (cast i32 %VPEVVL to IVSize), %EVLPhi /// ... @@ -1455,9 +1454,14 @@ bool VPlanTransforms::tryAddExplicitVectorLength(VPlan &Plan) { // Create the ExplicitVectorLengthPhi recipe in the main loop. auto *EVLPhi = new VPEVLBasedIVPHIRecipe(StartV, DebugLoc()); EVLPhi->insertAfter(CanonicalIVPHI); - auto *VPEVL = new VPInstruction(VPInstruction::ExplicitVectorLength, - {EVLPhi, Plan.getTripCount()}); - VPEVL->insertBefore(*Header, Header->getFirstNonPhi()); + // TODO: Add support for MaxSafeDist for correct loop emission. + // Compute original TC - IV as the AVL (application vector length). + auto *AVL = new VPInstruction(Instruction::Sub, {Plan.getTripCount(), EVLPhi}, + DebugLoc(), "avl"); + AVL->insertBefore(*Header, Header->getFirstNonPhi()); + auto *VPEVL = + new VPInstruction(VPInstruction::ExplicitVectorLength, AVL, DebugLoc()); + VPEVL->insertAfter(AVL); auto *CanonicalIVIncrement = cast(CanonicalIVPHI->getBackedgeValue()); diff --git a/llvm/test/Analysis/CostModel/RISCV/arith-fp.ll b/llvm/test/Analysis/CostModel/RISCV/arith-fp.ll index b96fdb0109829..b3e66ccc705f8 100644 --- a/llvm/test/Analysis/CostModel/RISCV/arith-fp.ll +++ b/llvm/test/Analysis/CostModel/RISCV/arith-fp.ll @@ -14,11 +14,11 @@ define void @fadd() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4BF16 = fadd <4 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8BF16 = fadd <8 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16BF16 = fadd <16 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1BF16 = fadd undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2BF16 = fadd undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4BF16 = fadd undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8BF16 = fadd undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16BF16 = fadd undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV1BF16 = fadd undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV2BF16 = fadd undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %NXV4BF16 = fadd undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %NXV8BF16 = fadd undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %NXV16BF16 = fadd undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = fadd <1 x float> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fadd <2 x float> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fadd <4 x float> undef, undef @@ -81,21 +81,37 @@ define void @fadd() { } define void @fadd_f16() { -; CHECK-LABEL: 'fadd_f16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = fadd half undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = fadd <1 x half> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F16 = fadd <2 x half> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = fadd <4 x half> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = fadd <8 x half> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = fadd <16 x half> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32F16 = fadd <32 x half> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F16 = fadd undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F16 = fadd undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4F16 = fadd undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8F16 = fadd undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16F16 = fadd undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV32F16 = fadd undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; ZVFH-LABEL: 'fadd_f16' +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = fadd half undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = fadd <1 x half> undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F16 = fadd <2 x half> undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = fadd <4 x half> undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = fadd <8 x half> undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = fadd <16 x half> undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32F16 = fadd <32 x half> undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F16 = fadd undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F16 = fadd undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4F16 = fadd undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8F16 = fadd undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16F16 = fadd undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV32F16 = fadd undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; ZVFHMIN-LABEL: 'fadd_f16' +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = fadd half undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V1F16 = fadd <1 x half> undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F16 = fadd <2 x half> undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4F16 = fadd <4 x half> undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8F16 = fadd <8 x half> undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16F16 = fadd <16 x half> undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32F16 = fadd <32 x half> undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV1F16 = fadd undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV2F16 = fadd undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %NXV4F16 = fadd undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %NXV8F16 = fadd undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %NXV16F16 = fadd undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV32F16 = fadd undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %F16 = fadd half undef, undef @@ -126,11 +142,11 @@ define void @fsub() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4BF16 = fsub <4 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8BF16 = fsub <8 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16BF16 = fsub <16 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1BF16 = fsub undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2BF16 = fsub undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4BF16 = fsub undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8BF16 = fsub undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16BF16 = fsub undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV1BF16 = fsub undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV2BF16 = fsub undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %NXV4BF16 = fsub undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %NXV8BF16 = fsub undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %NXV16BF16 = fsub undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = fsub <1 x float> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fsub <2 x float> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fsub <4 x float> undef, undef @@ -193,21 +209,37 @@ define void @fsub() { } define void @fsub_f16() { -; CHECK-LABEL: 'fsub_f16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = fsub half undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = fsub <1 x half> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F16 = fsub <2 x half> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = fsub <4 x half> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = fsub <8 x half> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = fsub <16 x half> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32F16 = fsub <32 x half> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F16 = fsub undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F16 = fsub undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4F16 = fsub undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8F16 = fsub undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16F16 = fsub undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV32F16 = fsub undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; ZVFH-LABEL: 'fsub_f16' +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = fsub half undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = fsub <1 x half> undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F16 = fsub <2 x half> undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = fsub <4 x half> undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = fsub <8 x half> undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = fsub <16 x half> undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32F16 = fsub <32 x half> undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F16 = fsub undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F16 = fsub undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4F16 = fsub undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8F16 = fsub undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16F16 = fsub undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV32F16 = fsub undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; ZVFHMIN-LABEL: 'fsub_f16' +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = fsub half undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V1F16 = fsub <1 x half> undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F16 = fsub <2 x half> undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4F16 = fsub <4 x half> undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8F16 = fsub <8 x half> undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16F16 = fsub <16 x half> undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32F16 = fsub <32 x half> undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV1F16 = fsub undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV2F16 = fsub undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %NXV4F16 = fsub undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %NXV8F16 = fsub undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %NXV16F16 = fsub undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV32F16 = fsub undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %F16 = fsub half undef, undef @@ -238,11 +270,11 @@ define void @fmul() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4BF16 = fmul <4 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8BF16 = fmul <8 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16BF16 = fmul <16 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1BF16 = fmul undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2BF16 = fmul undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4BF16 = fmul undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8BF16 = fmul undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16BF16 = fmul undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV1BF16 = fmul undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV2BF16 = fmul undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %NXV4BF16 = fmul undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %NXV8BF16 = fmul undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %NXV16BF16 = fmul undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = fmul <1 x float> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fmul <2 x float> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fmul <4 x float> undef, undef @@ -305,21 +337,37 @@ define void @fmul() { } define void @fmul_f16() { -; CHECK-LABEL: 'fmul_f16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = fmul half undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = fmul <1 x half> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F16 = fmul <2 x half> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = fmul <4 x half> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = fmul <8 x half> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = fmul <16 x half> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32F16 = fmul <32 x half> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F16 = fmul undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F16 = fmul undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4F16 = fmul undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8F16 = fmul undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16F16 = fmul undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV32F16 = fmul undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; ZVFH-LABEL: 'fmul_f16' +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = fmul half undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = fmul <1 x half> undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F16 = fmul <2 x half> undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = fmul <4 x half> undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = fmul <8 x half> undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = fmul <16 x half> undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32F16 = fmul <32 x half> undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F16 = fmul undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F16 = fmul undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4F16 = fmul undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8F16 = fmul undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16F16 = fmul undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV32F16 = fmul undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; ZVFHMIN-LABEL: 'fmul_f16' +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = fmul half undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V1F16 = fmul <1 x half> undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F16 = fmul <2 x half> undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4F16 = fmul <4 x half> undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8F16 = fmul <8 x half> undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16F16 = fmul <16 x half> undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32F16 = fmul <32 x half> undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV1F16 = fmul undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV2F16 = fmul undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %NXV4F16 = fmul undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %NXV8F16 = fmul undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %NXV16F16 = fmul undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV32F16 = fmul undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %F16 = fmul half undef, undef @@ -350,11 +398,11 @@ define void @fdiv() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4BF16 = fdiv <4 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8BF16 = fdiv <8 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16BF16 = fdiv <16 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1BF16 = fdiv undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2BF16 = fdiv undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4BF16 = fdiv undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8BF16 = fdiv undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16BF16 = fdiv undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV1BF16 = fdiv undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV2BF16 = fdiv undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %NXV4BF16 = fdiv undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %NXV8BF16 = fdiv undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %NXV16BF16 = fdiv undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = fdiv <1 x float> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fdiv <2 x float> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fdiv <4 x float> undef, undef @@ -417,21 +465,37 @@ define void @fdiv() { } define void @fdiv_f16() { -; CHECK-LABEL: 'fdiv_f16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = fdiv half undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = fdiv <1 x half> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F16 = fdiv <2 x half> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = fdiv <4 x half> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = fdiv <8 x half> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = fdiv <16 x half> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32F16 = fdiv <32 x half> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F16 = fdiv undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F16 = fdiv undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4F16 = fdiv undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8F16 = fdiv undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16F16 = fdiv undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV32F16 = fdiv undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; ZVFH-LABEL: 'fdiv_f16' +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = fdiv half undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = fdiv <1 x half> undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F16 = fdiv <2 x half> undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = fdiv <4 x half> undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = fdiv <8 x half> undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = fdiv <16 x half> undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32F16 = fdiv <32 x half> undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F16 = fdiv undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F16 = fdiv undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4F16 = fdiv undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8F16 = fdiv undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16F16 = fdiv undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV32F16 = fdiv undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; ZVFHMIN-LABEL: 'fdiv_f16' +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = fdiv half undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V1F16 = fdiv <1 x half> undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F16 = fdiv <2 x half> undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4F16 = fdiv <4 x half> undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8F16 = fdiv <8 x half> undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16F16 = fdiv <16 x half> undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32F16 = fdiv <32 x half> undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV1F16 = fdiv undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV2F16 = fdiv undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %NXV4F16 = fdiv undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %NXV8F16 = fdiv undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %NXV16F16 = fdiv undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV32F16 = fdiv undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %F16 = fdiv half undef, undef diff --git a/llvm/test/Analysis/CostModel/RISCV/rvv-select.ll b/llvm/test/Analysis/CostModel/RISCV/rvv-select.ll index 13994c46335de..2bf1e5d26e2da 100644 --- a/llvm/test/Analysis/CostModel/RISCV/rvv-select.ll +++ b/llvm/test/Analysis/CostModel/RISCV/rvv-select.ll @@ -390,3 +390,28 @@ define void @select() { ret void } + +define void @select_of_constants() { +; CHECK-LABEL: 'select_of_constants' +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %1 = select i1 undef, <2 x i64> , <2 x i64> zeroinitializer +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %2 = select i1 undef, <2 x i64> , <2 x i64> zeroinitializer +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %3 = select i1 undef, <2 x i64> , <2 x i64> zeroinitializer +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %4 = select i1 undef, <2 x i64> , <2 x i64> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %5 = select <4 x i1> undef, <4 x i32> , <4 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + ; Splat constants + select i1 undef, <2 x i64> , <2 x i64> zeroinitializer + ; LHS is a VID patern + select i1 undef, <2 x i64> , <2 x i64> zeroinitializer + select i1 undef, <2 x i64> , <2 x i64> zeroinitializer + ; 2x general (expensive) constants + select i1 undef, <2 x i64> , <2 x i64> + + ; powers of two (still expensive) + select <4 x i1> undef, <4 x i32> , <4 x i32> zeroinitializer + + ret void +} + + diff --git a/llvm/test/Assembler/auto_upgrade_nvvm_intrinsics.ll b/llvm/test/Assembler/auto_upgrade_nvvm_intrinsics.ll index 43ac246055da7..7e4a4d527fc90 100644 --- a/llvm/test/Assembler/auto_upgrade_nvvm_intrinsics.ll +++ b/llvm/test/Assembler/auto_upgrade_nvvm_intrinsics.ll @@ -31,10 +31,6 @@ declare float @llvm.nvvm.bitcast.i2f(i32) declare i64 @llvm.nvvm.bitcast.d2ll(double) declare double @llvm.nvvm.bitcast.ll2d(i64) -declare i32 @llvm.nvvm.rotate.b32(i32, i32) -declare i64 @llvm.nvvm.rotate.right.b64(i64, i32) -declare i64 @llvm.nvvm.rotate.b64(i64, i32) - ; CHECK-LABEL: @simple_upgrade define void @simple_upgrade(i32 %a, i64 %b, i16 %c) { ; CHECK: call i32 @llvm.bitreverse.i32(i32 %a) @@ -143,16 +139,4 @@ define void @bitcast(i32 %a, i64 %b, float %c, double %d) { %r4 = call double @llvm.nvvm.bitcast.ll2d(i64 %b) ret void -} - -; CHECK-LABEL: @rotate -define void @rotate(i32 %a, i64 %b) { -; CHECK: call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 6) -; CHECK: call i64 @llvm.fshr.i64(i64 %b, i64 %b, i64 7) -; CHECK: call i64 @llvm.fshl.i64(i64 %b, i64 %b, i64 8) -; - %r1 = call i32 @llvm.nvvm.rotate.b32(i32 %a, i32 6) - %r2 = call i64 @llvm.nvvm.rotate.right.b64(i64 %b, i32 7) - %r3 = call i64 @llvm.nvvm.rotate.b64(i64 %b, i32 8) - ret void -} +} \ No newline at end of file diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cmp.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cmp.mir index 89750c90fc1cb..bd80a892e239e 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cmp.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cmp.mir @@ -412,12 +412,11 @@ body: | ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8) ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8) ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<8 x s8>) = G_ICMP intpred(eq), [[BUILD_VECTOR]](<8 x s8>), [[BUILD_VECTOR1]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<4 x s8>), [[UV1:%[0-9]+]]:_(<4 x s8>) = G_UNMERGE_VALUES [[ICMP]](<8 x s8>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[UV]](<4 x s8>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[ICMP]](<8 x s8>) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s8) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s8) - ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s8) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s8) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s8) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s8) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32), [[DEF]](s32) ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR2]](<4 x s32>), [[C]](s64) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-select.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-select.mir index 92f8e524dbb31..52a28ad37e362 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-select.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-select.mir @@ -307,29 +307,24 @@ body: | ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(s8) = G_TRUNC [[UV7]](s16) ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[TRUNC5]](s8), [[TRUNC6]](s8), [[TRUNC7]](s8), [[TRUNC8]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8) ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<8 x s8>) = G_SHUFFLE_VECTOR [[BUILD_VECTOR]](<8 x s8>), [[BUILD_VECTOR1]], shufflemask(0, 0, 0, 0, undef, undef, undef, undef) - ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<4 x s8>), [[UV9:%[0-9]+]]:_(<4 x s8>) = G_UNMERGE_VALUES [[SHUF]](<8 x s8>) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s8) = G_CONSTANT i8 1 - ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(s8), [[UV11:%[0-9]+]]:_(s8), [[UV12:%[0-9]+]]:_(s8), [[UV13:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[UV8]](<4 x s8>) - ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[UV10]](s8), [[UV11]](s8), [[UV12]](s8), [[UV13]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<8 x s16>) = G_ANYEXT [[BUILD_VECTOR2]](<8 x s8>) - ; CHECK-NEXT: [[UV14:%[0-9]+]]:_(<4 x s16>), [[UV15:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[ANYEXT]](<8 x s16>) - ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(<8 x s16>) = G_ANYEXT [[BUILD_VECTOR3]](<8 x s8>) - ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(<4 x s16>), [[UV17:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[ANYEXT1]](<8 x s16>) - ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<4 x s16>) = G_XOR [[UV14]], [[UV16]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<8 x s16>) = G_ANYEXT [[SHUF]](<8 x s8>) + ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<4 x s16>), [[UV9:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[ANYEXT]](<8 x s16>) + ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(<8 x s16>) = G_ANYEXT [[BUILD_VECTOR2]](<8 x s8>) + ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(<4 x s16>), [[UV11:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[ANYEXT1]](<8 x s16>) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<4 x s16>) = G_XOR [[UV8]], [[UV10]] ; CHECK-NEXT: [[TRUNC9:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[ICMP]](<4 x s32>) - ; CHECK-NEXT: [[UV18:%[0-9]+]]:_(s8), [[UV19:%[0-9]+]]:_(s8), [[UV20:%[0-9]+]]:_(s8), [[UV21:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[UV8]](<4 x s8>) - ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[UV18]](s8), [[UV19]](s8), [[UV20]](s8), [[UV21]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8) - ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(<8 x s16>) = G_ANYEXT [[BUILD_VECTOR4]](<8 x s8>) - ; CHECK-NEXT: [[UV22:%[0-9]+]]:_(<4 x s16>), [[UV23:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[ANYEXT2]](<8 x s16>) - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<4 x s16>) = G_AND [[TRUNC9]], [[UV22]] + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(<8 x s16>) = G_ANYEXT [[SHUF]](<8 x s8>) + ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(<4 x s16>), [[UV13:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[ANYEXT2]](<8 x s16>) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<4 x s16>) = G_AND [[TRUNC9]], [[UV12]] ; CHECK-NEXT: [[TRUNC10:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[ICMP1]](<4 x s32>) ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<4 x s16>) = G_AND [[TRUNC10]], [[XOR]] ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<4 x s16>) = G_OR [[AND]], [[AND1]] ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(<4 x s32>) = G_ANYEXT [[OR]](<4 x s16>) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C3]](s32), [[C3]](s32), [[C3]](s32), [[C3]](s32) - ; CHECK-NEXT: %zext_select:_(<4 x s32>) = G_AND [[ANYEXT3]], [[BUILD_VECTOR5]] + ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C3]](s32), [[C3]](s32), [[C3]](s32), [[C3]](s32) + ; CHECK-NEXT: %zext_select:_(<4 x s32>) = G_AND [[ANYEXT3]], [[BUILD_VECTOR3]] ; CHECK-NEXT: $q0 = COPY %zext_select(<4 x s32>) ; CHECK-NEXT: RET_ReallyLR implicit $q0 %w0:_(s32) = COPY $w0 diff --git a/llvm/test/CodeGen/AArch64/bswap.ll b/llvm/test/CodeGen/AArch64/bswap.ll index b14f1a43b7dcf..74e4a167ae14c 100644 --- a/llvm/test/CodeGen/AArch64/bswap.ll +++ b/llvm/test/CodeGen/AArch64/bswap.ll @@ -179,8 +179,6 @@ define <2 x i16> @bswap_v2i16(<2 x i16> %a){ ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: uzp1 v0.4h, v0.4h, v0.4h ; CHECK-GI-NEXT: rev16 v0.8b, v0.8b -; CHECK-GI-NEXT: mov h1, v0.h[1] -; CHECK-GI-NEXT: mov v0.h[1], v1.h[0] ; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/fixed-vector-deinterleave.ll b/llvm/test/CodeGen/AArch64/fixed-vector-deinterleave.ll index 5bd680ed48938..bbfec8c7c3361 100644 --- a/llvm/test/CodeGen/AArch64/fixed-vector-deinterleave.ll +++ b/llvm/test/CodeGen/AArch64/fixed-vector-deinterleave.ll @@ -18,11 +18,6 @@ define {<2 x half>, <2 x half>} @vector_deinterleave_v2f16_v4f16(<4 x half> %vec ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: uzp1 v2.4h, v0.4h, v0.4h ; CHECK-GI-NEXT: uzp2 v1.4h, v0.4h, v0.4h -; CHECK-GI-NEXT: mov h0, v2.h[1] -; CHECK-GI-NEXT: mov h3, v1.h[1] -; CHECK-GI-NEXT: mov v2.h[1], v0.h[0] -; CHECK-GI-NEXT: mov v1.h[1], v3.h[0] -; CHECK-GI-NEXT: // kill: def $d1 killed $d1 killed $q1 ; CHECK-GI-NEXT: fmov d0, d2 ; CHECK-GI-NEXT: ret %retval = call {<2 x half>, <2 x half>} @llvm.vector.deinterleave2.v4f16(<4 x half> %vec) diff --git a/llvm/test/CodeGen/AArch64/fpext.ll b/llvm/test/CodeGen/AArch64/fpext.ll index d942839c577d2..df90f9d5f0910 100644 --- a/llvm/test/CodeGen/AArch64/fpext.ll +++ b/llvm/test/CodeGen/AArch64/fpext.ll @@ -376,15 +376,15 @@ define <4 x double> @fpext_v4f16_v4f64(<4 x half> %a) { ; CHECK-GI-LABEL: fpext_v4f16_v4f64: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: mov s1, v0.s[1] -; CHECK-GI-NEXT: mov h2, v0.h[1] +; CHECK-GI-NEXT: mov h1, v0.h[1] +; CHECK-GI-NEXT: mov h2, v0.h[2] +; CHECK-GI-NEXT: mov h3, v0.h[3] ; CHECK-GI-NEXT: fcvt d0, h0 -; CHECK-GI-NEXT: mov h3, v1.h[1] -; CHECK-GI-NEXT: fcvt d2, h2 -; CHECK-GI-NEXT: fcvt d1, h1 -; CHECK-GI-NEXT: fcvt d3, h3 -; CHECK-GI-NEXT: mov v0.d[1], v2.d[0] -; CHECK-GI-NEXT: mov v1.d[1], v3.d[0] +; CHECK-GI-NEXT: fcvt d4, h1 +; CHECK-GI-NEXT: fcvt d1, h2 +; CHECK-GI-NEXT: fcvt d2, h3 +; CHECK-GI-NEXT: mov v0.d[1], v4.d[0] +; CHECK-GI-NEXT: mov v1.d[1], v2.d[0] ; CHECK-GI-NEXT: ret entry: %c = fpext <4 x half> %a to <4 x double> @@ -392,20 +392,11 @@ entry: } define <2 x float> @fpext_v2f16_v2f32(<2 x half> %a) { -; CHECK-SD-LABEL: fpext_v2f16_v2f32: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: fcvtl v0.4s, v0.4h -; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: fpext_v2f16_v2f32: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: mov h1, v0.h[1] -; CHECK-GI-NEXT: mov v0.h[1], v1.h[0] -; CHECK-GI-NEXT: fcvtl v0.4s, v0.4h -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: fpext_v2f16_v2f32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: ret entry: %c = fpext <2 x half> %a to <2 x float> ret <2 x float> %c diff --git a/llvm/test/CodeGen/AArch64/fptoi.ll b/llvm/test/CodeGen/AArch64/fptoi.ll index f72a49f6ab7c8..c436c410a4e39 100644 --- a/llvm/test/CodeGen/AArch64/fptoi.ll +++ b/llvm/test/CodeGen/AArch64/fptoi.ll @@ -3961,9 +3961,6 @@ define <2 x i64> @fptos_v2f16_v2i64(<2 x half> %a) { ; ; CHECK-GI-NOFP16-LABEL: fptos_v2f16_v2i64: ; CHECK-GI-NOFP16: // %bb.0: // %entry -; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v1.h[0] ; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h ; CHECK-GI-NOFP16-NEXT: fcvtl v0.2d, v0.2s ; CHECK-GI-NOFP16-NEXT: fcvtzs v0.2d, v0.2d @@ -4008,9 +4005,6 @@ define <2 x i64> @fptou_v2f16_v2i64(<2 x half> %a) { ; ; CHECK-GI-NOFP16-LABEL: fptou_v2f16_v2i64: ; CHECK-GI-NOFP16: // %bb.0: // %entry -; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v1.h[0] ; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h ; CHECK-GI-NOFP16-NEXT: fcvtl v0.2d, v0.2s ; CHECK-GI-NOFP16-NEXT: fcvtzu v0.2d, v0.2d @@ -4207,17 +4201,17 @@ define <4 x i64> @fptos_v4f16_v4i64(<4 x half> %a) { ; CHECK-GI-FP16-LABEL: fptos_v4f16_v4i64: ; CHECK-GI-FP16: // %bb.0: // %entry ; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-FP16-NEXT: mov s1, v0.s[1] -; CHECK-GI-FP16-NEXT: mov h2, v0.h[1] +; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] +; CHECK-GI-FP16-NEXT: mov h2, v0.h[2] +; CHECK-GI-FP16-NEXT: mov h3, v0.h[3] ; CHECK-GI-FP16-NEXT: fcvt d0, h0 -; CHECK-GI-FP16-NEXT: mov h3, v1.h[1] -; CHECK-GI-FP16-NEXT: fcvt d2, h2 ; CHECK-GI-FP16-NEXT: fcvt d1, h1 +; CHECK-GI-FP16-NEXT: fcvt d2, h2 ; CHECK-GI-FP16-NEXT: fcvt d3, h3 -; CHECK-GI-FP16-NEXT: mov v0.d[1], v2.d[0] -; CHECK-GI-FP16-NEXT: mov v1.d[1], v3.d[0] +; CHECK-GI-FP16-NEXT: mov v0.d[1], v1.d[0] +; CHECK-GI-FP16-NEXT: mov v2.d[1], v3.d[0] ; CHECK-GI-FP16-NEXT: fcvtzs v0.2d, v0.2d -; CHECK-GI-FP16-NEXT: fcvtzs v1.2d, v1.2d +; CHECK-GI-FP16-NEXT: fcvtzs v1.2d, v2.2d ; CHECK-GI-FP16-NEXT: ret entry: %c = fptosi <4 x half> %a to <4 x i64> @@ -4273,17 +4267,17 @@ define <4 x i64> @fptou_v4f16_v4i64(<4 x half> %a) { ; CHECK-GI-FP16-LABEL: fptou_v4f16_v4i64: ; CHECK-GI-FP16: // %bb.0: // %entry ; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-FP16-NEXT: mov s1, v0.s[1] -; CHECK-GI-FP16-NEXT: mov h2, v0.h[1] +; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] +; CHECK-GI-FP16-NEXT: mov h2, v0.h[2] +; CHECK-GI-FP16-NEXT: mov h3, v0.h[3] ; CHECK-GI-FP16-NEXT: fcvt d0, h0 -; CHECK-GI-FP16-NEXT: mov h3, v1.h[1] -; CHECK-GI-FP16-NEXT: fcvt d2, h2 ; CHECK-GI-FP16-NEXT: fcvt d1, h1 +; CHECK-GI-FP16-NEXT: fcvt d2, h2 ; CHECK-GI-FP16-NEXT: fcvt d3, h3 -; CHECK-GI-FP16-NEXT: mov v0.d[1], v2.d[0] -; CHECK-GI-FP16-NEXT: mov v1.d[1], v3.d[0] +; CHECK-GI-FP16-NEXT: mov v0.d[1], v1.d[0] +; CHECK-GI-FP16-NEXT: mov v2.d[1], v3.d[0] ; CHECK-GI-FP16-NEXT: fcvtzu v0.2d, v0.2d -; CHECK-GI-FP16-NEXT: fcvtzu v1.2d, v1.2d +; CHECK-GI-FP16-NEXT: fcvtzu v1.2d, v2.2d ; CHECK-GI-FP16-NEXT: ret entry: %c = fptoui <4 x half> %a to <4 x i64> @@ -4369,29 +4363,29 @@ define <8 x i64> @fptos_v8f16_v8i64(<8 x half> %a) { ; ; CHECK-GI-FP16-LABEL: fptos_v8f16_v8i64: ; CHECK-GI-FP16: // %bb.0: // %entry -; CHECK-GI-FP16-NEXT: mov s1, v0.s[1] -; CHECK-GI-FP16-NEXT: mov s2, v0.s[2] -; CHECK-GI-FP16-NEXT: mov s3, v0.s[3] -; CHECK-GI-FP16-NEXT: mov h4, v0.h[1] +; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] +; CHECK-GI-FP16-NEXT: mov h2, v0.h[2] +; CHECK-GI-FP16-NEXT: mov h3, v0.h[3] +; CHECK-GI-FP16-NEXT: mov h4, v0.h[4] +; CHECK-GI-FP16-NEXT: mov h5, v0.h[5] +; CHECK-GI-FP16-NEXT: mov h6, v0.h[6] +; CHECK-GI-FP16-NEXT: mov h7, v0.h[7] ; CHECK-GI-FP16-NEXT: fcvt d0, h0 -; CHECK-GI-FP16-NEXT: mov h5, v1.h[1] -; CHECK-GI-FP16-NEXT: mov h6, v2.h[1] -; CHECK-GI-FP16-NEXT: mov h7, v3.h[1] -; CHECK-GI-FP16-NEXT: fcvt d4, h4 ; CHECK-GI-FP16-NEXT: fcvt d1, h1 ; CHECK-GI-FP16-NEXT: fcvt d2, h2 ; CHECK-GI-FP16-NEXT: fcvt d3, h3 +; CHECK-GI-FP16-NEXT: fcvt d4, h4 ; CHECK-GI-FP16-NEXT: fcvt d5, h5 ; CHECK-GI-FP16-NEXT: fcvt d6, h6 ; CHECK-GI-FP16-NEXT: fcvt d7, h7 -; CHECK-GI-FP16-NEXT: mov v0.d[1], v4.d[0] -; CHECK-GI-FP16-NEXT: mov v1.d[1], v5.d[0] -; CHECK-GI-FP16-NEXT: mov v2.d[1], v6.d[0] -; CHECK-GI-FP16-NEXT: mov v3.d[1], v7.d[0] +; CHECK-GI-FP16-NEXT: mov v0.d[1], v1.d[0] +; CHECK-GI-FP16-NEXT: mov v2.d[1], v3.d[0] +; CHECK-GI-FP16-NEXT: mov v4.d[1], v5.d[0] +; CHECK-GI-FP16-NEXT: mov v6.d[1], v7.d[0] ; CHECK-GI-FP16-NEXT: fcvtzs v0.2d, v0.2d -; CHECK-GI-FP16-NEXT: fcvtzs v1.2d, v1.2d -; CHECK-GI-FP16-NEXT: fcvtzs v2.2d, v2.2d -; CHECK-GI-FP16-NEXT: fcvtzs v3.2d, v3.2d +; CHECK-GI-FP16-NEXT: fcvtzs v1.2d, v2.2d +; CHECK-GI-FP16-NEXT: fcvtzs v2.2d, v4.2d +; CHECK-GI-FP16-NEXT: fcvtzs v3.2d, v6.2d ; CHECK-GI-FP16-NEXT: ret entry: %c = fptosi <8 x half> %a to <8 x i64> @@ -4477,29 +4471,29 @@ define <8 x i64> @fptou_v8f16_v8i64(<8 x half> %a) { ; ; CHECK-GI-FP16-LABEL: fptou_v8f16_v8i64: ; CHECK-GI-FP16: // %bb.0: // %entry -; CHECK-GI-FP16-NEXT: mov s1, v0.s[1] -; CHECK-GI-FP16-NEXT: mov s2, v0.s[2] -; CHECK-GI-FP16-NEXT: mov s3, v0.s[3] -; CHECK-GI-FP16-NEXT: mov h4, v0.h[1] +; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] +; CHECK-GI-FP16-NEXT: mov h2, v0.h[2] +; CHECK-GI-FP16-NEXT: mov h3, v0.h[3] +; CHECK-GI-FP16-NEXT: mov h4, v0.h[4] +; CHECK-GI-FP16-NEXT: mov h5, v0.h[5] +; CHECK-GI-FP16-NEXT: mov h6, v0.h[6] +; CHECK-GI-FP16-NEXT: mov h7, v0.h[7] ; CHECK-GI-FP16-NEXT: fcvt d0, h0 -; CHECK-GI-FP16-NEXT: mov h5, v1.h[1] -; CHECK-GI-FP16-NEXT: mov h6, v2.h[1] -; CHECK-GI-FP16-NEXT: mov h7, v3.h[1] -; CHECK-GI-FP16-NEXT: fcvt d4, h4 ; CHECK-GI-FP16-NEXT: fcvt d1, h1 ; CHECK-GI-FP16-NEXT: fcvt d2, h2 ; CHECK-GI-FP16-NEXT: fcvt d3, h3 +; CHECK-GI-FP16-NEXT: fcvt d4, h4 ; CHECK-GI-FP16-NEXT: fcvt d5, h5 ; CHECK-GI-FP16-NEXT: fcvt d6, h6 ; CHECK-GI-FP16-NEXT: fcvt d7, h7 -; CHECK-GI-FP16-NEXT: mov v0.d[1], v4.d[0] -; CHECK-GI-FP16-NEXT: mov v1.d[1], v5.d[0] -; CHECK-GI-FP16-NEXT: mov v2.d[1], v6.d[0] -; CHECK-GI-FP16-NEXT: mov v3.d[1], v7.d[0] +; CHECK-GI-FP16-NEXT: mov v0.d[1], v1.d[0] +; CHECK-GI-FP16-NEXT: mov v2.d[1], v3.d[0] +; CHECK-GI-FP16-NEXT: mov v4.d[1], v5.d[0] +; CHECK-GI-FP16-NEXT: mov v6.d[1], v7.d[0] ; CHECK-GI-FP16-NEXT: fcvtzu v0.2d, v0.2d -; CHECK-GI-FP16-NEXT: fcvtzu v1.2d, v1.2d -; CHECK-GI-FP16-NEXT: fcvtzu v2.2d, v2.2d -; CHECK-GI-FP16-NEXT: fcvtzu v3.2d, v3.2d +; CHECK-GI-FP16-NEXT: fcvtzu v1.2d, v2.2d +; CHECK-GI-FP16-NEXT: fcvtzu v2.2d, v4.2d +; CHECK-GI-FP16-NEXT: fcvtzu v3.2d, v6.2d ; CHECK-GI-FP16-NEXT: ret entry: %c = fptoui <8 x half> %a to <8 x i64> @@ -5708,9 +5702,6 @@ define <2 x i32> @fptos_v2f16_v2i32(<2 x half> %a) { ; ; CHECK-GI-LABEL: fptos_v2f16_v2i32: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: mov h1, v0.h[1] -; CHECK-GI-NEXT: mov v0.h[1], v1.h[0] ; CHECK-GI-NEXT: fcvtl v0.4s, v0.4h ; CHECK-GI-NEXT: fcvtzs v0.2s, v0.2s ; CHECK-GI-NEXT: ret @@ -5729,9 +5720,6 @@ define <2 x i32> @fptou_v2f16_v2i32(<2 x half> %a) { ; ; CHECK-GI-LABEL: fptou_v2f16_v2i32: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: mov h1, v0.h[1] -; CHECK-GI-NEXT: mov v0.h[1], v1.h[0] ; CHECK-GI-NEXT: fcvtl v0.4s, v0.4h ; CHECK-GI-NEXT: fcvtzu v0.2s, v0.2s ; CHECK-GI-NEXT: ret @@ -5984,21 +5972,13 @@ define <2 x i16> @fptos_v2f16_v2i16(<2 x half> %a) { ; ; CHECK-GI-NOFP16-LABEL: fptos_v2f16_v2i16: ; CHECK-GI-NOFP16: // %bb.0: // %entry -; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v1.h[0] ; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h ; CHECK-GI-NOFP16-NEXT: fcvtzs v0.2s, v0.2s ; CHECK-GI-NOFP16-NEXT: ret ; ; CHECK-GI-FP16-LABEL: fptos_v2f16_v2i16: ; CHECK-GI-FP16: // %bb.0: // %entry -; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] ; CHECK-GI-FP16-NEXT: fcvtzs v0.4h, v0.4h -; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] ; CHECK-GI-FP16-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-FP16-NEXT: ret @@ -6017,21 +5997,13 @@ define <2 x i16> @fptou_v2f16_v2i16(<2 x half> %a) { ; ; CHECK-GI-NOFP16-LABEL: fptou_v2f16_v2i16: ; CHECK-GI-NOFP16: // %bb.0: // %entry -; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v1.h[0] ; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h ; CHECK-GI-NOFP16-NEXT: fcvtzu v0.2s, v0.2s ; CHECK-GI-NOFP16-NEXT: ret ; ; CHECK-GI-FP16-LABEL: fptou_v2f16_v2i16: ; CHECK-GI-FP16: // %bb.0: // %entry -; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] ; CHECK-GI-FP16-NEXT: fcvtzu v0.4h, v0.4h -; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] ; CHECK-GI-FP16-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-FP16-NEXT: ret @@ -6460,21 +6432,13 @@ define <2 x i8> @fptos_v2f16_v2i8(<2 x half> %a) { ; ; CHECK-GI-NOFP16-LABEL: fptos_v2f16_v2i8: ; CHECK-GI-NOFP16: // %bb.0: // %entry -; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v1.h[0] ; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h ; CHECK-GI-NOFP16-NEXT: fcvtzs v0.2s, v0.2s ; CHECK-GI-NOFP16-NEXT: ret ; ; CHECK-GI-FP16-LABEL: fptos_v2f16_v2i8: ; CHECK-GI-FP16: // %bb.0: // %entry -; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] ; CHECK-GI-FP16-NEXT: fcvtzs v0.4h, v0.4h -; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] ; CHECK-GI-FP16-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-FP16-NEXT: ret @@ -6493,21 +6457,13 @@ define <2 x i8> @fptou_v2f16_v2i8(<2 x half> %a) { ; ; CHECK-GI-NOFP16-LABEL: fptou_v2f16_v2i8: ; CHECK-GI-NOFP16: // %bb.0: // %entry -; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v1.h[0] ; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h ; CHECK-GI-NOFP16-NEXT: fcvtzu v0.2s, v0.2s ; CHECK-GI-NOFP16-NEXT: ret ; ; CHECK-GI-FP16-LABEL: fptou_v2f16_v2i8: ; CHECK-GI-FP16: // %bb.0: // %entry -; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] ; CHECK-GI-FP16-NEXT: fcvtzu v0.4h, v0.4h -; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] ; CHECK-GI-FP16-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-FP16-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll index ed7814938da25..2d568e858c36b 100644 --- a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll +++ b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll @@ -1270,9 +1270,6 @@ define <2 x i32> @test_signed_v2f16_v2i32(<2 x half> %f) { ; ; CHECK-GI-LABEL: test_signed_v2f16_v2i32: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: mov h1, v0.h[1] -; CHECK-GI-NEXT: mov v0.h[1], v1.h[0] ; CHECK-GI-NEXT: fcvtl v0.4s, v0.4h ; CHECK-GI-NEXT: fcvtzs v0.2s, v0.2s ; CHECK-GI-NEXT: ret @@ -3301,17 +3298,17 @@ define <4 x i64> @test_signed_v4f16_v4i64(<4 x half> %f) { ; CHECK-GI-FP16-LABEL: test_signed_v4f16_v4i64: ; CHECK-GI-FP16: // %bb.0: ; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-FP16-NEXT: mov s1, v0.s[1] -; CHECK-GI-FP16-NEXT: mov h2, v0.h[1] +; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] +; CHECK-GI-FP16-NEXT: mov h2, v0.h[2] +; CHECK-GI-FP16-NEXT: mov h3, v0.h[3] ; CHECK-GI-FP16-NEXT: fcvt d0, h0 -; CHECK-GI-FP16-NEXT: mov h3, v1.h[1] -; CHECK-GI-FP16-NEXT: fcvt d2, h2 ; CHECK-GI-FP16-NEXT: fcvt d1, h1 +; CHECK-GI-FP16-NEXT: fcvt d2, h2 ; CHECK-GI-FP16-NEXT: fcvt d3, h3 -; CHECK-GI-FP16-NEXT: mov v0.d[1], v2.d[0] -; CHECK-GI-FP16-NEXT: mov v1.d[1], v3.d[0] +; CHECK-GI-FP16-NEXT: mov v0.d[1], v1.d[0] +; CHECK-GI-FP16-NEXT: mov v2.d[1], v3.d[0] ; CHECK-GI-FP16-NEXT: fcvtzs v0.2d, v0.2d -; CHECK-GI-FP16-NEXT: fcvtzs v1.2d, v1.2d +; CHECK-GI-FP16-NEXT: fcvtzs v1.2d, v2.2d ; CHECK-GI-FP16-NEXT: ret %x = call <4 x i64> @llvm.fptosi.sat.v4f16.v4i64(<4 x half> %f) ret <4 x i64> %x @@ -4167,29 +4164,29 @@ define <8 x i64> @test_signed_v8f16_v8i64(<8 x half> %f) { ; ; CHECK-GI-FP16-LABEL: test_signed_v8f16_v8i64: ; CHECK-GI-FP16: // %bb.0: -; CHECK-GI-FP16-NEXT: mov s1, v0.s[1] -; CHECK-GI-FP16-NEXT: mov s2, v0.s[2] -; CHECK-GI-FP16-NEXT: mov s3, v0.s[3] -; CHECK-GI-FP16-NEXT: mov h4, v0.h[1] +; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] +; CHECK-GI-FP16-NEXT: mov h2, v0.h[2] +; CHECK-GI-FP16-NEXT: mov h3, v0.h[3] +; CHECK-GI-FP16-NEXT: mov h4, v0.h[4] +; CHECK-GI-FP16-NEXT: mov h5, v0.h[5] +; CHECK-GI-FP16-NEXT: mov h6, v0.h[6] +; CHECK-GI-FP16-NEXT: mov h7, v0.h[7] ; CHECK-GI-FP16-NEXT: fcvt d0, h0 -; CHECK-GI-FP16-NEXT: mov h5, v1.h[1] -; CHECK-GI-FP16-NEXT: mov h6, v2.h[1] -; CHECK-GI-FP16-NEXT: mov h7, v3.h[1] -; CHECK-GI-FP16-NEXT: fcvt d4, h4 ; CHECK-GI-FP16-NEXT: fcvt d1, h1 ; CHECK-GI-FP16-NEXT: fcvt d2, h2 ; CHECK-GI-FP16-NEXT: fcvt d3, h3 +; CHECK-GI-FP16-NEXT: fcvt d4, h4 ; CHECK-GI-FP16-NEXT: fcvt d5, h5 ; CHECK-GI-FP16-NEXT: fcvt d6, h6 ; CHECK-GI-FP16-NEXT: fcvt d7, h7 -; CHECK-GI-FP16-NEXT: mov v0.d[1], v4.d[0] -; CHECK-GI-FP16-NEXT: mov v1.d[1], v5.d[0] -; CHECK-GI-FP16-NEXT: mov v2.d[1], v6.d[0] -; CHECK-GI-FP16-NEXT: mov v3.d[1], v7.d[0] +; CHECK-GI-FP16-NEXT: mov v0.d[1], v1.d[0] +; CHECK-GI-FP16-NEXT: mov v2.d[1], v3.d[0] +; CHECK-GI-FP16-NEXT: mov v4.d[1], v5.d[0] +; CHECK-GI-FP16-NEXT: mov v6.d[1], v7.d[0] ; CHECK-GI-FP16-NEXT: fcvtzs v0.2d, v0.2d -; CHECK-GI-FP16-NEXT: fcvtzs v1.2d, v1.2d -; CHECK-GI-FP16-NEXT: fcvtzs v2.2d, v2.2d -; CHECK-GI-FP16-NEXT: fcvtzs v3.2d, v3.2d +; CHECK-GI-FP16-NEXT: fcvtzs v1.2d, v2.2d +; CHECK-GI-FP16-NEXT: fcvtzs v2.2d, v4.2d +; CHECK-GI-FP16-NEXT: fcvtzs v3.2d, v6.2d ; CHECK-GI-FP16-NEXT: ret %x = call <8 x i64> @llvm.fptosi.sat.v8f16.v8i64(<8 x half> %f) ret <8 x i64> %x diff --git a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll index 40a865338cd85..f63fba9dab6c6 100644 --- a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll +++ b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll @@ -1098,9 +1098,6 @@ define <2 x i32> @test_unsigned_v2f16_v2i32(<2 x half> %f) { ; ; CHECK-GI-LABEL: test_unsigned_v2f16_v2i32: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: mov h1, v0.h[1] -; CHECK-GI-NEXT: mov v0.h[1], v1.h[0] ; CHECK-GI-NEXT: fcvtl v0.4s, v0.4h ; CHECK-GI-NEXT: fcvtzu v0.2s, v0.2s ; CHECK-GI-NEXT: ret @@ -2711,17 +2708,17 @@ define <4 x i64> @test_unsigned_v4f16_v4i64(<4 x half> %f) { ; CHECK-GI-FP16-LABEL: test_unsigned_v4f16_v4i64: ; CHECK-GI-FP16: // %bb.0: ; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-FP16-NEXT: mov s1, v0.s[1] -; CHECK-GI-FP16-NEXT: mov h2, v0.h[1] +; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] +; CHECK-GI-FP16-NEXT: mov h2, v0.h[2] +; CHECK-GI-FP16-NEXT: mov h3, v0.h[3] ; CHECK-GI-FP16-NEXT: fcvt d0, h0 -; CHECK-GI-FP16-NEXT: mov h3, v1.h[1] -; CHECK-GI-FP16-NEXT: fcvt d2, h2 ; CHECK-GI-FP16-NEXT: fcvt d1, h1 +; CHECK-GI-FP16-NEXT: fcvt d2, h2 ; CHECK-GI-FP16-NEXT: fcvt d3, h3 -; CHECK-GI-FP16-NEXT: mov v0.d[1], v2.d[0] -; CHECK-GI-FP16-NEXT: mov v1.d[1], v3.d[0] +; CHECK-GI-FP16-NEXT: mov v0.d[1], v1.d[0] +; CHECK-GI-FP16-NEXT: mov v2.d[1], v3.d[0] ; CHECK-GI-FP16-NEXT: fcvtzu v0.2d, v0.2d -; CHECK-GI-FP16-NEXT: fcvtzu v1.2d, v1.2d +; CHECK-GI-FP16-NEXT: fcvtzu v1.2d, v2.2d ; CHECK-GI-FP16-NEXT: ret %x = call <4 x i64> @llvm.fptoui.sat.v4f16.v4i64(<4 x half> %f) ret <4 x i64> %x @@ -3433,29 +3430,29 @@ define <8 x i64> @test_unsigned_v8f16_v8i64(<8 x half> %f) { ; ; CHECK-GI-FP16-LABEL: test_unsigned_v8f16_v8i64: ; CHECK-GI-FP16: // %bb.0: -; CHECK-GI-FP16-NEXT: mov s1, v0.s[1] -; CHECK-GI-FP16-NEXT: mov s2, v0.s[2] -; CHECK-GI-FP16-NEXT: mov s3, v0.s[3] -; CHECK-GI-FP16-NEXT: mov h4, v0.h[1] +; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] +; CHECK-GI-FP16-NEXT: mov h2, v0.h[2] +; CHECK-GI-FP16-NEXT: mov h3, v0.h[3] +; CHECK-GI-FP16-NEXT: mov h4, v0.h[4] +; CHECK-GI-FP16-NEXT: mov h5, v0.h[5] +; CHECK-GI-FP16-NEXT: mov h6, v0.h[6] +; CHECK-GI-FP16-NEXT: mov h7, v0.h[7] ; CHECK-GI-FP16-NEXT: fcvt d0, h0 -; CHECK-GI-FP16-NEXT: mov h5, v1.h[1] -; CHECK-GI-FP16-NEXT: mov h6, v2.h[1] -; CHECK-GI-FP16-NEXT: mov h7, v3.h[1] -; CHECK-GI-FP16-NEXT: fcvt d4, h4 ; CHECK-GI-FP16-NEXT: fcvt d1, h1 ; CHECK-GI-FP16-NEXT: fcvt d2, h2 ; CHECK-GI-FP16-NEXT: fcvt d3, h3 +; CHECK-GI-FP16-NEXT: fcvt d4, h4 ; CHECK-GI-FP16-NEXT: fcvt d5, h5 ; CHECK-GI-FP16-NEXT: fcvt d6, h6 ; CHECK-GI-FP16-NEXT: fcvt d7, h7 -; CHECK-GI-FP16-NEXT: mov v0.d[1], v4.d[0] -; CHECK-GI-FP16-NEXT: mov v1.d[1], v5.d[0] -; CHECK-GI-FP16-NEXT: mov v2.d[1], v6.d[0] -; CHECK-GI-FP16-NEXT: mov v3.d[1], v7.d[0] +; CHECK-GI-FP16-NEXT: mov v0.d[1], v1.d[0] +; CHECK-GI-FP16-NEXT: mov v2.d[1], v3.d[0] +; CHECK-GI-FP16-NEXT: mov v4.d[1], v5.d[0] +; CHECK-GI-FP16-NEXT: mov v6.d[1], v7.d[0] ; CHECK-GI-FP16-NEXT: fcvtzu v0.2d, v0.2d -; CHECK-GI-FP16-NEXT: fcvtzu v1.2d, v1.2d -; CHECK-GI-FP16-NEXT: fcvtzu v2.2d, v2.2d -; CHECK-GI-FP16-NEXT: fcvtzu v3.2d, v3.2d +; CHECK-GI-FP16-NEXT: fcvtzu v1.2d, v2.2d +; CHECK-GI-FP16-NEXT: fcvtzu v2.2d, v4.2d +; CHECK-GI-FP16-NEXT: fcvtzu v3.2d, v6.2d ; CHECK-GI-FP16-NEXT: ret %x = call <8 x i64> @llvm.fptoui.sat.v8f16.v8i64(<8 x half> %f) ret <8 x i64> %x diff --git a/llvm/test/CodeGen/AArch64/fptrunc.ll b/llvm/test/CodeGen/AArch64/fptrunc.ll index c0d4ddef23132..2187717c4148a 100644 --- a/llvm/test/CodeGen/AArch64/fptrunc.ll +++ b/llvm/test/CodeGen/AArch64/fptrunc.ll @@ -366,9 +366,6 @@ define <2 x half> @fptrunc_v2f32_v2f16(<2 x float> %a) { ; CHECK-GI-NEXT: mov v1.s[0], v0.s[0] ; CHECK-GI-NEXT: mov v1.s[1], v0.s[1] ; CHECK-GI-NEXT: fcvtn v0.4h, v1.4s -; CHECK-GI-NEXT: mov h1, v0.h[1] -; CHECK-GI-NEXT: mov v0.h[1], v1.h[0] -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: ret entry: %c = fptrunc <2 x float> %a to <2 x half> diff --git a/llvm/test/CodeGen/AArch64/itofp.ll b/llvm/test/CodeGen/AArch64/itofp.ll index f70ec0f35cb58..c5bde81ba4a5e 100644 --- a/llvm/test/CodeGen/AArch64/itofp.ll +++ b/llvm/test/CodeGen/AArch64/itofp.ll @@ -3313,24 +3313,17 @@ define <3 x double> @stofp_v3i8_v3f64(<3 x i8> %a) { ; CHECK-GI-NEXT: mov v0.h[2], w2 ; CHECK-GI-NEXT: shl v0.4h, v0.4h, #8 ; CHECK-GI-NEXT: sshr v0.4h, v0.4h, #8 -; CHECK-GI-NEXT: mov s1, v0.s[1] -; CHECK-GI-NEXT: mov h2, v0.h[1] -; CHECK-GI-NEXT: mov v0.h[1], v2.h[0] -; CHECK-GI-NEXT: mov h2, v1.h[1] -; CHECK-GI-NEXT: sshll v0.4s, v0.4h, #0 -; CHECK-GI-NEXT: mov v1.h[1], v2.h[0] -; CHECK-GI-NEXT: smov x8, v0.s[0] -; CHECK-GI-NEXT: smov x9, v0.s[1] -; CHECK-GI-NEXT: sshll v1.4s, v1.4h, #0 -; CHECK-GI-NEXT: mov v0.d[0], x8 -; CHECK-GI-NEXT: smov x8, v1.s[0] -; CHECK-GI-NEXT: mov v0.d[1], x9 -; CHECK-GI-NEXT: smov x9, v1.s[1] +; CHECK-GI-NEXT: smov x8, v0.h[0] +; CHECK-GI-NEXT: smov x9, v0.h[1] ; CHECK-GI-NEXT: mov v1.d[0], x8 +; CHECK-GI-NEXT: smov x8, v0.h[2] ; CHECK-GI-NEXT: mov v1.d[1], x9 -; CHECK-GI-NEXT: scvtf v0.2d, v0.2d -; CHECK-GI-NEXT: scvtf v2.2d, v1.2d +; CHECK-GI-NEXT: smov x9, v0.h[3] +; CHECK-GI-NEXT: mov v2.d[0], x8 +; CHECK-GI-NEXT: scvtf v0.2d, v1.2d +; CHECK-GI-NEXT: mov v2.d[1], x9 ; CHECK-GI-NEXT: mov d1, v0.d[1] +; CHECK-GI-NEXT: scvtf v2.2d, v2.2d ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: // kill: def $d2 killed $d2 killed $q2 ; CHECK-GI-NEXT: ret @@ -3365,24 +3358,17 @@ define <3 x double> @utofp_v3i8_v3f64(<3 x i8> %a) { ; CHECK-GI-NEXT: mov v0.h[1], w1 ; CHECK-GI-NEXT: mov v0.h[2], w2 ; CHECK-GI-NEXT: and v0.8b, v0.8b, v1.8b -; CHECK-GI-NEXT: mov s1, v0.s[1] -; CHECK-GI-NEXT: mov h2, v0.h[1] -; CHECK-GI-NEXT: mov v0.h[1], v2.h[0] -; CHECK-GI-NEXT: mov h2, v1.h[1] -; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0 -; CHECK-GI-NEXT: mov v1.h[1], v2.h[0] -; CHECK-GI-NEXT: mov w8, v0.s[0] -; CHECK-GI-NEXT: mov w9, v0.s[1] -; CHECK-GI-NEXT: ushll v1.4s, v1.4h, #0 -; CHECK-GI-NEXT: mov v0.d[0], x8 -; CHECK-GI-NEXT: mov w8, v1.s[0] -; CHECK-GI-NEXT: mov v0.d[1], x9 -; CHECK-GI-NEXT: mov w9, v1.s[1] +; CHECK-GI-NEXT: umov w8, v0.h[0] +; CHECK-GI-NEXT: umov w9, v0.h[1] ; CHECK-GI-NEXT: mov v1.d[0], x8 +; CHECK-GI-NEXT: umov w8, v0.h[2] ; CHECK-GI-NEXT: mov v1.d[1], x9 -; CHECK-GI-NEXT: ucvtf v0.2d, v0.2d -; CHECK-GI-NEXT: ucvtf v2.2d, v1.2d +; CHECK-GI-NEXT: umov w9, v0.h[3] +; CHECK-GI-NEXT: mov v2.d[0], x8 +; CHECK-GI-NEXT: ucvtf v0.2d, v1.2d +; CHECK-GI-NEXT: mov v2.d[1], x9 ; CHECK-GI-NEXT: mov d1, v0.d[1] +; CHECK-GI-NEXT: ucvtf v2.2d, v2.2d ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: // kill: def $d2 killed $d2 killed $q2 ; CHECK-GI-NEXT: ret @@ -5267,11 +5253,8 @@ define <3 x float> @stofp_v3i8_v3f32(<3 x i8> %a) { ; CHECK-GI-NEXT: mov v0.h[2], w2 ; CHECK-GI-NEXT: shl v0.4h, v0.4h, #8 ; CHECK-GI-NEXT: sshr v0.4h, v0.4h, #8 -; CHECK-GI-NEXT: mov s1, v0.s[1] -; CHECK-GI-NEXT: mov h2, v0.h[1] -; CHECK-GI-NEXT: mov h3, v1.h[1] -; CHECK-GI-NEXT: mov v0.h[1], v2.h[0] -; CHECK-GI-NEXT: mov v1.h[1], v3.h[0] +; CHECK-GI-NEXT: mov v1.h[0], v0.h[2] +; CHECK-GI-NEXT: mov v1.h[1], v0.h[3] ; CHECK-GI-NEXT: sshll v0.4s, v0.4h, #0 ; CHECK-GI-NEXT: sshll v1.4s, v1.4h, #0 ; CHECK-GI-NEXT: mov v0.d[1], v1.d[0] @@ -5300,11 +5283,8 @@ define <3 x float> @utofp_v3i8_v3f32(<3 x i8> %a) { ; CHECK-GI-NEXT: mov v0.h[1], w1 ; CHECK-GI-NEXT: mov v0.h[2], w2 ; CHECK-GI-NEXT: and v0.8b, v0.8b, v1.8b -; CHECK-GI-NEXT: mov s1, v0.s[1] -; CHECK-GI-NEXT: mov h2, v0.h[1] -; CHECK-GI-NEXT: mov h3, v1.h[1] -; CHECK-GI-NEXT: mov v0.h[1], v2.h[0] -; CHECK-GI-NEXT: mov v1.h[1], v3.h[0] +; CHECK-GI-NEXT: mov v1.h[0], v0.h[2] +; CHECK-GI-NEXT: mov v1.h[1], v0.h[3] ; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-GI-NEXT: ushll v1.4s, v1.4h, #0 ; CHECK-GI-NEXT: mov v0.d[1], v1.d[0] @@ -6222,9 +6202,6 @@ define <2 x half> @stofp_v2i64_v2f16(<2 x i64> %a) { ; CHECK-GI-NOFP16-NEXT: mov v1.s[0], v0.s[0] ; CHECK-GI-NOFP16-NEXT: mov v1.s[1], v0.s[1] ; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v1.4s -; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v1.h[0] -; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NOFP16-NEXT: ret ; ; CHECK-GI-FP16-LABEL: stofp_v2i64_v2f16: @@ -6271,9 +6248,6 @@ define <2 x half> @utofp_v2i64_v2f16(<2 x i64> %a) { ; CHECK-GI-NOFP16-NEXT: mov v1.s[0], v0.s[0] ; CHECK-GI-NOFP16-NEXT: mov v1.s[1], v0.s[1] ; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v1.4s -; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v1.h[0] -; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NOFP16-NEXT: ret ; ; CHECK-GI-FP16-LABEL: utofp_v2i64_v2f16: @@ -7210,9 +7184,6 @@ define <2 x half> @stofp_v2i32_v2f16(<2 x i32> %a) { ; CHECK-GI-NEXT: mov v1.s[0], v0.s[0] ; CHECK-GI-NEXT: mov v1.s[1], v0.s[1] ; CHECK-GI-NEXT: fcvtn v0.4h, v1.4s -; CHECK-GI-NEXT: mov h1, v0.h[1] -; CHECK-GI-NEXT: mov v0.h[1], v1.h[0] -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: ret entry: %c = sitofp <2 x i32> %a to <2 x half> @@ -7233,9 +7204,6 @@ define <2 x half> @utofp_v2i32_v2f16(<2 x i32> %a) { ; CHECK-GI-NEXT: mov v1.s[0], v0.s[0] ; CHECK-GI-NEXT: mov v1.s[1], v0.s[1] ; CHECK-GI-NEXT: fcvtn v0.4h, v1.4s -; CHECK-GI-NEXT: mov h1, v0.h[1] -; CHECK-GI-NEXT: mov v0.h[1], v1.h[0] -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: ret entry: %c = uitofp <2 x i32> %a to <2 x half> @@ -7443,18 +7411,12 @@ define <2 x half> @stofp_v2i16_v2f16(<2 x i16> %a) { ; CHECK-GI-NOFP16-NEXT: mov v1.s[0], v0.s[0] ; CHECK-GI-NOFP16-NEXT: mov v1.s[1], v0.s[1] ; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v1.4s -; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v1.h[0] -; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NOFP16-NEXT: ret ; ; CHECK-GI-FP16-LABEL: stofp_v2i16_v2f16: ; CHECK-GI-FP16: // %bb.0: // %entry ; CHECK-GI-FP16-NEXT: uzp1 v0.4h, v0.4h, v0.4h ; CHECK-GI-FP16-NEXT: scvtf v0.4h, v0.4h -; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] -; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-FP16-NEXT: ret entry: %c = sitofp <2 x i16> %a to <2 x half> @@ -7484,18 +7446,12 @@ define <2 x half> @utofp_v2i16_v2f16(<2 x i16> %a) { ; CHECK-GI-NOFP16-NEXT: mov v1.s[0], v0.s[0] ; CHECK-GI-NOFP16-NEXT: mov v1.s[1], v0.s[1] ; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v1.4s -; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v1.h[0] -; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NOFP16-NEXT: ret ; ; CHECK-GI-FP16-LABEL: utofp_v2i16_v2f16: ; CHECK-GI-FP16: // %bb.0: // %entry ; CHECK-GI-FP16-NEXT: uzp1 v0.4h, v0.4h, v0.4h ; CHECK-GI-FP16-NEXT: ucvtf v0.4h, v0.4h -; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] -; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-FP16-NEXT: ret entry: %c = uitofp <2 x i16> %a to <2 x half> @@ -7977,9 +7933,6 @@ define <2 x half> @stofp_v2i8_v2f16(<2 x i8> %a) { ; CHECK-GI-NOFP16-NEXT: mov v1.s[0], v0.s[0] ; CHECK-GI-NOFP16-NEXT: mov v1.s[1], v0.s[1] ; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v1.4s -; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v1.h[0] -; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NOFP16-NEXT: ret ; ; CHECK-GI-FP16-LABEL: stofp_v2i8_v2f16: @@ -7990,12 +7943,7 @@ define <2 x half> @stofp_v2i8_v2f16(<2 x i8> %a) { ; CHECK-GI-FP16-NEXT: xtn v0.4h, v1.4s ; CHECK-GI-FP16-NEXT: shl v0.4h, v0.4h, #8 ; CHECK-GI-FP16-NEXT: sshr v0.4h, v0.4h, #8 -; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] ; CHECK-GI-FP16-NEXT: scvtf v0.4h, v0.4h -; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] -; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-FP16-NEXT: ret entry: %c = sitofp <2 x i8> %a to <2 x half> @@ -8039,27 +7987,14 @@ define <2 x half> @utofp_v2i8_v2f16(<2 x i8> %a) { ; CHECK-GI-NOFP16-NEXT: mov v1.s[0], v0.s[0] ; CHECK-GI-NOFP16-NEXT: mov v1.s[1], v0.s[1] ; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v1.4s -; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v1.h[0] -; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NOFP16-NEXT: ret ; ; CHECK-GI-FP16-LABEL: utofp_v2i8_v2f16: ; CHECK-GI-FP16: // %bb.0: // %entry -; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-FP16-NEXT: mov v1.s[0], v0.s[0] -; CHECK-GI-FP16-NEXT: mov v1.s[1], v0.s[1] -; CHECK-GI-FP16-NEXT: xtn v0.4h, v1.4s -; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] ; CHECK-GI-FP16-NEXT: movi d1, #0x0000ff000000ff -; CHECK-GI-FP16-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-GI-FP16-NEXT: and v0.8b, v0.8b, v1.8b ; CHECK-GI-FP16-NEXT: uzp1 v0.4h, v0.4h, v0.4h ; CHECK-GI-FP16-NEXT: ucvtf v0.4h, v0.4h -; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] -; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-FP16-NEXT: ret entry: %c = uitofp <2 x i8> %a to <2 x half> @@ -8096,11 +8031,8 @@ define <3 x half> @stofp_v3i8_v3f16(<3 x i8> %a) { ; CHECK-GI-NOFP16-NEXT: mov v0.h[2], w2 ; CHECK-GI-NOFP16-NEXT: shl v0.4h, v0.4h, #8 ; CHECK-GI-NOFP16-NEXT: sshr v0.4h, v0.4h, #8 -; CHECK-GI-NOFP16-NEXT: mov s1, v0.s[1] -; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1] -; CHECK-GI-NOFP16-NEXT: mov h3, v1.h[1] -; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v2.h[0] -; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v3.h[0] +; CHECK-GI-NOFP16-NEXT: mov v1.h[0], v0.h[2] +; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v0.h[3] ; CHECK-GI-NOFP16-NEXT: sshll v0.4s, v0.4h, #0 ; CHECK-GI-NOFP16-NEXT: sshll v1.4s, v1.4h, #0 ; CHECK-GI-NOFP16-NEXT: mov v0.d[1], v1.d[0] @@ -8149,11 +8081,8 @@ define <3 x half> @utofp_v3i8_v3f16(<3 x i8> %a) { ; CHECK-GI-NOFP16-NEXT: mov v0.h[1], w1 ; CHECK-GI-NOFP16-NEXT: mov v0.h[2], w2 ; CHECK-GI-NOFP16-NEXT: and v0.8b, v0.8b, v1.8b -; CHECK-GI-NOFP16-NEXT: mov s1, v0.s[1] -; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1] -; CHECK-GI-NOFP16-NEXT: mov h3, v1.h[1] -; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v2.h[0] -; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v3.h[0] +; CHECK-GI-NOFP16-NEXT: mov v1.h[0], v0.h[2] +; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v0.h[3] ; CHECK-GI-NOFP16-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-GI-NOFP16-NEXT: ushll v1.4s, v1.4h, #0 ; CHECK-GI-NOFP16-NEXT: mov v0.d[1], v1.d[0] diff --git a/llvm/test/CodeGen/AArch64/neon-perm.ll b/llvm/test/CodeGen/AArch64/neon-perm.ll index 2897741780f60..7b85924ce1e32 100644 --- a/llvm/test/CodeGen/AArch64/neon-perm.ll +++ b/llvm/test/CodeGen/AArch64/neon-perm.ll @@ -1739,15 +1739,7 @@ define <4 x i8> @test_vzip1_v4i8(<8 x i8> %p) { ; ; CHECK-GI-LABEL: test_vzip1_v4i8: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: mov b1, v0.b[1] -; CHECK-GI-NEXT: mov v2.b[0], v0.b[0] -; CHECK-GI-NEXT: mov b3, v0.b[2] -; CHECK-GI-NEXT: mov b0, v0.b[3] -; CHECK-GI-NEXT: mov v2.b[1], v1.b[0] -; CHECK-GI-NEXT: mov v2.b[2], v3.b[0] -; CHECK-GI-NEXT: mov v2.b[3], v0.b[0] -; CHECK-GI-NEXT: ushll v0.8h, v2.8b, #0 +; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: ret %lo = shufflevector <8 x i8> %p, <8 x i8> undef, <4 x i32> diff --git a/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll b/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll index adac75758220e..6d331d9413f91 100644 --- a/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll @@ -2,11 +2,6 @@ ; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK,CHECK-SD ; RUN: llc < %s -mtriple=aarch64-- -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI -; CHECK-GI: warning: Instruction selection used fallback path for v2i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i4 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i1 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i128 - declare <1 x i8> @llvm.sadd.sat.v1i8(<1 x i8>, <1 x i8>) declare <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8>, <2 x i8>) declare <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8>, <4 x i8>) @@ -190,23 +185,39 @@ define void @v4i8(ptr %px, ptr %py, ptr %pz) nounwind { } define void @v2i8(ptr %px, ptr %py, ptr %pz) nounwind { -; CHECK-LABEL: v2i8: -; CHECK: // %bb.0: -; CHECK-NEXT: ld1 { v0.b }[0], [x0] -; CHECK-NEXT: ld1 { v1.b }[0], [x1] -; CHECK-NEXT: add x8, x0, #1 -; CHECK-NEXT: add x9, x1, #1 -; CHECK-NEXT: ld1 { v0.b }[4], [x8] -; CHECK-NEXT: ld1 { v1.b }[4], [x9] -; CHECK-NEXT: shl v1.2s, v1.2s, #24 -; CHECK-NEXT: shl v0.2s, v0.2s, #24 -; CHECK-NEXT: sqadd v0.2s, v0.2s, v1.2s -; CHECK-NEXT: ushr v0.2s, v0.2s, #24 -; CHECK-NEXT: mov w8, v0.s[1] -; CHECK-NEXT: fmov w9, s0 -; CHECK-NEXT: strb w9, [x2] -; CHECK-NEXT: strb w8, [x2, #1] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v2i8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ld1 { v0.b }[0], [x0] +; CHECK-SD-NEXT: ld1 { v1.b }[0], [x1] +; CHECK-SD-NEXT: add x8, x0, #1 +; CHECK-SD-NEXT: add x9, x1, #1 +; CHECK-SD-NEXT: ld1 { v0.b }[4], [x8] +; CHECK-SD-NEXT: ld1 { v1.b }[4], [x9] +; CHECK-SD-NEXT: shl v1.2s, v1.2s, #24 +; CHECK-SD-NEXT: shl v0.2s, v0.2s, #24 +; CHECK-SD-NEXT: sqadd v0.2s, v0.2s, v1.2s +; CHECK-SD-NEXT: ushr v0.2s, v0.2s, #24 +; CHECK-SD-NEXT: mov w8, v0.s[1] +; CHECK-SD-NEXT: fmov w9, s0 +; CHECK-SD-NEXT: strb w9, [x2] +; CHECK-SD-NEXT: strb w8, [x2, #1] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v2i8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr b0, [x0] +; CHECK-GI-NEXT: ldr b1, [x1] +; CHECK-GI-NEXT: add x8, x2, #1 +; CHECK-GI-NEXT: ldr b2, [x0, #1] +; CHECK-GI-NEXT: ldr b3, [x1, #1] +; CHECK-GI-NEXT: mov v0.b[0], v0.b[0] +; CHECK-GI-NEXT: mov v1.b[0], v1.b[0] +; CHECK-GI-NEXT: mov v0.b[1], v2.b[0] +; CHECK-GI-NEXT: mov v1.b[1], v3.b[0] +; CHECK-GI-NEXT: sqadd v0.8b, v0.8b, v1.8b +; CHECK-GI-NEXT: st1 { v0.b }[0], [x2] +; CHECK-GI-NEXT: st1 { v0.b }[1], [x8] +; CHECK-GI-NEXT: ret %x = load <2 x i8>, ptr %px %y = load <2 x i8>, ptr %py %z = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> %x, <2 x i8> %y) @@ -256,10 +267,10 @@ define void @v2i16(ptr %px, ptr %py, ptr %pz) nounwind { ; CHECK-GI-NEXT: add x9, x1, #2 ; CHECK-GI-NEXT: ld1 { v0.h }[1], [x8] ; CHECK-GI-NEXT: ld1 { v1.h }[1], [x9] +; CHECK-GI-NEXT: add x8, x2, #2 ; CHECK-GI-NEXT: sqadd v0.4h, v0.4h, v1.4h -; CHECK-GI-NEXT: mov h1, v0.h[1] ; CHECK-GI-NEXT: str h0, [x2] -; CHECK-GI-NEXT: str h1, [x2, #2] +; CHECK-GI-NEXT: st1 { v0.h }[1], [x8] ; CHECK-GI-NEXT: ret %x = load <2 x i16>, ptr %px %y = load <2 x i16>, ptr %py diff --git a/llvm/test/CodeGen/AArch64/shift.ll b/llvm/test/CodeGen/AArch64/shift.ll index 54f7887aee8d3..066928687cc02 100644 --- a/llvm/test/CodeGen/AArch64/shift.ll +++ b/llvm/test/CodeGen/AArch64/shift.ll @@ -534,14 +534,7 @@ define <4 x i8> @shl_v4i8(<4 x i8> %0, <4 x i8> %1){ ; CHECK-GI-NEXT: uzp1 v0.8b, v0.8b, v0.8b ; CHECK-GI-NEXT: uzp1 v1.8b, v1.8b, v0.8b ; CHECK-GI-NEXT: ushl v0.8b, v0.8b, v1.8b -; CHECK-GI-NEXT: mov b1, v0.b[1] -; CHECK-GI-NEXT: mov v2.b[0], v0.b[0] -; CHECK-GI-NEXT: mov b3, v0.b[2] -; CHECK-GI-NEXT: mov b0, v0.b[3] -; CHECK-GI-NEXT: mov v2.b[1], v1.b[0] -; CHECK-GI-NEXT: mov v2.b[2], v3.b[0] -; CHECK-GI-NEXT: mov v2.b[3], v0.b[0] -; CHECK-GI-NEXT: ushll v0.8h, v2.8b, #0 +; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: ret %3 = shl <4 x i8> %0, %1 @@ -577,8 +570,6 @@ define <2 x i16> @shl_v2i16(<2 x i16> %0, <2 x i16> %1){ ; CHECK-GI-NEXT: uzp1 v0.4h, v0.4h, v0.4h ; CHECK-GI-NEXT: uzp1 v1.4h, v1.4h, v0.4h ; CHECK-GI-NEXT: ushl v0.4h, v0.4h, v1.4h -; CHECK-GI-NEXT: mov h1, v0.h[1] -; CHECK-GI-NEXT: mov v0.h[1], v1.h[0] ; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: ret @@ -723,14 +714,7 @@ define <4 x i8> @ashr_v4i8(<4 x i8> %0, <4 x i8> %1){ ; CHECK-GI-NEXT: uzp1 v0.8b, v0.8b, v0.8b ; CHECK-GI-NEXT: neg v1.8b, v1.8b ; CHECK-GI-NEXT: sshl v0.8b, v0.8b, v1.8b -; CHECK-GI-NEXT: mov b1, v0.b[1] -; CHECK-GI-NEXT: mov v2.b[0], v0.b[0] -; CHECK-GI-NEXT: mov b3, v0.b[2] -; CHECK-GI-NEXT: mov b0, v0.b[3] -; CHECK-GI-NEXT: mov v2.b[1], v1.b[0] -; CHECK-GI-NEXT: mov v2.b[2], v3.b[0] -; CHECK-GI-NEXT: mov v2.b[3], v0.b[0] -; CHECK-GI-NEXT: ushll v0.8h, v2.8b, #0 +; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: ret %3 = ashr <4 x i8> %0, %1 @@ -766,8 +750,6 @@ define <2 x i16> @ashr_v2i16(<2 x i16> %0, <2 x i16> %1){ ; CHECK-GI-NEXT: uzp1 v0.4h, v0.4h, v0.4h ; CHECK-GI-NEXT: neg v1.4h, v1.4h ; CHECK-GI-NEXT: sshl v0.4h, v0.4h, v1.4h -; CHECK-GI-NEXT: mov h1, v0.h[1] -; CHECK-GI-NEXT: mov v0.h[1], v1.h[0] ; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: ret @@ -906,14 +888,7 @@ define <4 x i8> @lshr_v4i8(<4 x i8> %0, <4 x i8> %1){ ; CHECK-GI-NEXT: uzp1 v0.8b, v0.8b, v0.8b ; CHECK-GI-NEXT: neg v1.8b, v1.8b ; CHECK-GI-NEXT: ushl v0.8b, v0.8b, v1.8b -; CHECK-GI-NEXT: mov b1, v0.b[1] -; CHECK-GI-NEXT: mov v2.b[0], v0.b[0] -; CHECK-GI-NEXT: mov b3, v0.b[2] -; CHECK-GI-NEXT: mov b0, v0.b[3] -; CHECK-GI-NEXT: mov v2.b[1], v1.b[0] -; CHECK-GI-NEXT: mov v2.b[2], v3.b[0] -; CHECK-GI-NEXT: mov v2.b[3], v0.b[0] -; CHECK-GI-NEXT: ushll v0.8h, v2.8b, #0 +; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: ret %3 = lshr <4 x i8> %0, %1 @@ -948,8 +923,6 @@ define <2 x i16> @lshr_v2i16(<2 x i16> %0, <2 x i16> %1){ ; CHECK-GI-NEXT: uzp1 v0.4h, v0.4h, v0.4h ; CHECK-GI-NEXT: neg v1.4h, v1.4h ; CHECK-GI-NEXT: ushl v0.4h, v0.4h, v1.4h -; CHECK-GI-NEXT: mov h1, v0.h[1] -; CHECK-GI-NEXT: mov v0.h[1], v1.h[0] ; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/shufflevector.ll b/llvm/test/CodeGen/AArch64/shufflevector.ll index 5f4ff1e64673b..6b5951551c3a5 100644 --- a/llvm/test/CodeGen/AArch64/shufflevector.ll +++ b/llvm/test/CodeGen/AArch64/shufflevector.ll @@ -2,9 +2,6 @@ ; RUN: llc -mtriple=aarch64-none-linux-gnu %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD ; RUN: llc -mtriple=aarch64-none-linux-gnu -global-isel -global-isel-abort=2 %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI -; CHECK-GI: warning: Instruction selection used fallback path for shufflevector_v2i1 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for shufflevector_v2i1_zeroes - ; ===== Legal Vector Types ===== define <8 x i8> @shufflevector_v8i8(<8 x i8> %a, <8 x i8> %b) { @@ -183,13 +180,30 @@ define <2 x i64> @shufflevector_v2i64_zeroes(<2 x i64> %a, <2 x i64> %b) { ; ===== Smaller/Larger Width Vectors with Legal Element Sizes ===== define <2 x i1> @shufflevector_v2i1(<2 x i1> %a, <2 x i1> %b){ -; CHECK-LABEL: shufflevector_v2i1: -; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-NEXT: mov v0.s[1], v1.s[1] -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: shufflevector_v2i1: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-SD-NEXT: mov v0.s[1], v1.s[1] +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: shufflevector_v2i1: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-GI-NEXT: mov w8, v1.s[1] +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-NEXT: mov w9, v0.s[1] +; CHECK-GI-NEXT: mov v1.b[1], w8 +; CHECK-GI-NEXT: mov v0.b[1], w9 +; CHECK-GI-NEXT: mov b1, v1.b[1] +; CHECK-GI-NEXT: mov v0.b[1], v1.b[0] +; CHECK-GI-NEXT: umov w8, v0.b[0] +; CHECK-GI-NEXT: umov w9, v0.b[1] +; CHECK-GI-NEXT: mov v0.s[0], w8 +; CHECK-GI-NEXT: mov v0.s[1], w9 +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-GI-NEXT: ret %c = shufflevector <2 x i1> %a, <2 x i1> %b, <2 x i32> ret <2 x i1> %c } @@ -358,11 +372,24 @@ define <4 x i64> @shufflevector_v4i64(<4 x i64> %a, <4 x i64> %b) { ; ===== Smaller/Larger Width Vectors with Zero Masks ===== define <2 x i1> @shufflevector_v2i1_zeroes(<2 x i1> %a, <2 x i1> %b){ -; CHECK-LABEL: shufflevector_v2i1_zeroes: -; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: dup v0.2s, v0.s[0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: shufflevector_v2i1_zeroes: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-SD-NEXT: dup v0.2s, v0.s[0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: shufflevector_v2i1_zeroes: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-NEXT: mov w8, v0.s[1] +; CHECK-GI-NEXT: mov v0.b[1], w8 +; CHECK-GI-NEXT: dup v0.8b, v0.b[0] +; CHECK-GI-NEXT: umov w8, v0.b[0] +; CHECK-GI-NEXT: umov w9, v0.b[1] +; CHECK-GI-NEXT: mov v0.s[0], w8 +; CHECK-GI-NEXT: mov v0.s[1], w9 +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-GI-NEXT: ret %c = shufflevector <2 x i1> %a, <2 x i1> %b, <2 x i32> ret <2 x i1> %c } @@ -486,11 +513,9 @@ define <3 x i8> @shufflevector_v3i8(<3 x i8> %a, <3 x i8> %b) { ; CHECK-GI-NEXT: mov v0.d[1], v1.d[0] ; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI30_0] ; CHECK-GI-NEXT: tbl v0.16b, { v0.16b }, v1.16b -; CHECK-GI-NEXT: mov b1, v0.b[1] -; CHECK-GI-NEXT: mov b2, v0.b[2] -; CHECK-GI-NEXT: fmov w0, s0 -; CHECK-GI-NEXT: fmov w1, s1 -; CHECK-GI-NEXT: fmov w2, s2 +; CHECK-GI-NEXT: umov w0, v0.b[0] +; CHECK-GI-NEXT: umov w1, v0.b[1] +; CHECK-GI-NEXT: umov w2, v0.b[2] ; CHECK-GI-NEXT: ret %c = shufflevector <3 x i8> %a, <3 x i8> %b, <3 x i32> ret <3 x i8> %c @@ -598,11 +623,9 @@ define <3 x i8> @shufflevector_v3i8_zeroes(<3 x i8> %a, <3 x i8> %b) { ; CHECK-GI-NEXT: mov v0.b[1], w1 ; CHECK-GI-NEXT: mov v0.b[2], w2 ; CHECK-GI-NEXT: dup v0.8b, v0.b[0] -; CHECK-GI-NEXT: mov b1, v0.b[1] -; CHECK-GI-NEXT: mov b2, v0.b[2] -; CHECK-GI-NEXT: fmov w0, s0 -; CHECK-GI-NEXT: fmov w1, s1 -; CHECK-GI-NEXT: fmov w2, s2 +; CHECK-GI-NEXT: umov w0, v0.b[0] +; CHECK-GI-NEXT: umov w1, v0.b[1] +; CHECK-GI-NEXT: umov w2, v0.b[2] ; CHECK-GI-NEXT: ret %c = shufflevector <3 x i8> %a, <3 x i8> %b, <3 x i32> ret <3 x i8> %c diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-fscale.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-fscale.ll new file mode 100644 index 0000000000000..591fe8da6b79c --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-fscale.ll @@ -0,0 +1,186 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -mattr=+fp8 -force-streaming -verify-machineinstrs < %s | FileCheck %s + +; FSCALE (Single, x2) + +define { , } @multi_vec_scale_single_x2_half( %zdn1, %zdn2, %zm) { +; CHECK-LABEL: multi_vec_scale_single_x2_half: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: fscale { z0.h, z1.h }, { z0.h, z1.h }, z2.h +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sme.fp8.scale.single.x2.nxv8f16( %zdn1, %zdn2, %zm) + ret { , } %res +} + +define { , } @multi_vec_scale_single_x2_float( %zdn1, %zdn2, %zm) { +; CHECK-LABEL: multi_vec_scale_single_x2_float: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: fscale { z0.s, z1.s }, { z0.s, z1.s }, z2.s +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sme.fp8.scale.single.x2.nxv4f32( %zdn1, %zdn2, %zm) + ret { , } %res +} + +define { , } @multi_vec_scale_single_x2_double( %zdn1, %zdn2, %zm) { +; CHECK-LABEL: multi_vec_scale_single_x2_double: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: fscale { z0.d, z1.d }, { z0.d, z1.d }, z2.d +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sme.fp8.scale.single.x2.nxv2f64( %zdn1, %zdn2, %zm) + ret { , } %res +} + +; FSCALE (Single, x4) + +define { , , , } @multi_vec_scale_single_x4_half( %zdn1, %zdn2, %zdn3, %zdn4, %zm) { +; CHECK-LABEL: multi_vec_scale_single_x4_half: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: fscale { z0.h - z3.h }, { z0.h - z3.h }, z4.h +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sme.fp8.scale.single.x4.nxv8f16( %zdn1, %zdn2, %zdn3, %zdn4, %zm) + ret { , , , } %res +} + +define { , , , } @multi_vec_scale_single_x4_float( %zdn1, %zdn2, %zdn3, %zdn4, %zm) { +; CHECK-LABEL: multi_vec_scale_single_x4_float: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: fscale { z0.s - z3.s }, { z0.s - z3.s }, z4.s +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sme.fp8.scale.single.x4.nxv4f32( %zdn1, %zdn2, %zdn3, %zdn4, %zm) + ret { , , , } %res +} + +define { , , , } @multi_vec_scale_single_x4_double( %zdn1, %zdn2, %zdn3, %zdn4, %zm) { +; CHECK-LABEL: multi_vec_scale_single_x4_double: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: fscale { z0.d - z3.d }, { z0.d - z3.d }, z4.d +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sme.fp8.scale.single.x4.nxv2f64( %zdn1, %zdn2, %zdn3, %zdn4, %zm) + ret { , , , } %res +} + +; FSCALE (Multi, x2) +define { , } @multi_vec_scale_x2_half( %zdn1, %zdn2, %zm1, %zm2) { +; CHECK-LABEL: multi_vec_scale_x2_half: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: fscale { z0.h, z1.h }, { z0.h, z1.h }, { z2.h, z3.h } +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sme.fp8.scale.x2.nxv8f16( %zdn1, %zdn2, %zm1, %zm2) + ret { , } %res +} + +define { , } @multi_vec_scale_x2_float( %zdn1, %zdn2, %zm1, %zm2 ) { +; CHECK-LABEL: multi_vec_scale_x2_float: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: fscale { z0.s, z1.s }, { z0.s, z1.s }, { z2.s, z3.s } +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sme.fp8.scale.x2.nxv4f32( %zdn1, %zdn2, %zm1, %zm2) + ret { , } %res +} + +define { , } @multi_vec_scale_x2_double( %zdn1, %zdn2, %zm1, %zm2) { +; CHECK-LABEL: multi_vec_scale_x2_double: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: fscale { z0.d, z1.d }, { z0.d, z1.d }, { z2.d, z3.d } +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sme.fp8.scale.x2.nxv2f64( %zdn1, %zdn2, %zm1, %zm2) + ret { , } %res +} + +; FSCALE (Multi, x4) +define { , , , } @multi_vec_scale_x4_half( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { +; CHECK-LABEL: multi_vec_scale_x4_half: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: fscale { z0.h - z3.h }, { z0.h - z3.h }, { z4.h - z7.h } +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sme.fp8.scale.x4.nxv8f16( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) + ret { , , , } %res +} + +define { , , , } @multi_vec_scale_x4_float( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { +; CHECK-LABEL: multi_vec_scale_x4_float: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: fscale { z0.s - z3.s }, { z0.s - z3.s }, { z4.s - z7.s } +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sme.fp8.scale.x4.nxv4f32( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) + ret { , , , } %res +} + +define { , , , } @multi_vec_scale_x4_double( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { +; CHECK-LABEL: multi_vec_scale_x4_double: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: fscale { z0.d - z3.d }, { z0.d - z3.d }, { z4.d - z7.d } +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sme.fp8.scale.x4.nxv2f64( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) + ret { , , , } %res +} + +declare { , } @llvm.aarch64.sme.fp8.scale.single.x2.nxv8f16(, , ) +declare { , } @llvm.aarch64.sme.fp8.scale.single.x2.nxv4f32(, , ) +declare { , } @llvm.aarch64.sme.fp8.scale.single.x2.nxv2f64(, , ) + +declare { , , , } @llvm.aarch64.sme.fp8.scale.single.x4.nxv8f16(, ,, , ) +declare { , , , } @llvm.aarch64.sme.fp8.scale.single.x4.nxv4f32(, ,, , ) +declare { , , , } @llvm.aarch64.sme.fp8.scale.single.x4.nxv2f64(, ,, , ) + +declare { , } @llvm.aarch64.sme.fp8.scale.x2.nxv8f16(, , , ) +declare { , } @llvm.aarch64.sme.fp8.scale.x2.nxv4f32(, , , ) +declare { , } @llvm.aarch64.sme.fp8.scale.x2.nxv2f64(, , , ) + +declare { , , , } @llvm.aarch64.sme.fp8.scale.x4.nxv8f16(, ,, , , , , ) +declare { , , , } @llvm.aarch64.sme.fp8.scale.x4.nxv4f32(, ,, , , , , ) +declare { , , , } @llvm.aarch64.sme.fp8.scale.x4.nxv2f64(, ,, , , , , ) diff --git a/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll b/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll index 12371ef2c0021..dddda7e9ba64c 100644 --- a/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll @@ -2,11 +2,6 @@ ; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK,CHECK-SD ; RUN: llc < %s -mtriple=aarch64-- -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI -; CHECK-GI: warning: Instruction selection used fallback path for v2i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i4 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i1 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i128 - declare <1 x i8> @llvm.ssub.sat.v1i8(<1 x i8>, <1 x i8>) declare <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8>, <2 x i8>) declare <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8>, <4 x i8>) @@ -191,23 +186,39 @@ define void @v4i8(ptr %px, ptr %py, ptr %pz) nounwind { } define void @v2i8(ptr %px, ptr %py, ptr %pz) nounwind { -; CHECK-LABEL: v2i8: -; CHECK: // %bb.0: -; CHECK-NEXT: ld1 { v0.b }[0], [x0] -; CHECK-NEXT: ld1 { v1.b }[0], [x1] -; CHECK-NEXT: add x8, x0, #1 -; CHECK-NEXT: add x9, x1, #1 -; CHECK-NEXT: ld1 { v0.b }[4], [x8] -; CHECK-NEXT: ld1 { v1.b }[4], [x9] -; CHECK-NEXT: shl v1.2s, v1.2s, #24 -; CHECK-NEXT: shl v0.2s, v0.2s, #24 -; CHECK-NEXT: sqsub v0.2s, v0.2s, v1.2s -; CHECK-NEXT: ushr v0.2s, v0.2s, #24 -; CHECK-NEXT: mov w8, v0.s[1] -; CHECK-NEXT: fmov w9, s0 -; CHECK-NEXT: strb w9, [x2] -; CHECK-NEXT: strb w8, [x2, #1] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v2i8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ld1 { v0.b }[0], [x0] +; CHECK-SD-NEXT: ld1 { v1.b }[0], [x1] +; CHECK-SD-NEXT: add x8, x0, #1 +; CHECK-SD-NEXT: add x9, x1, #1 +; CHECK-SD-NEXT: ld1 { v0.b }[4], [x8] +; CHECK-SD-NEXT: ld1 { v1.b }[4], [x9] +; CHECK-SD-NEXT: shl v1.2s, v1.2s, #24 +; CHECK-SD-NEXT: shl v0.2s, v0.2s, #24 +; CHECK-SD-NEXT: sqsub v0.2s, v0.2s, v1.2s +; CHECK-SD-NEXT: ushr v0.2s, v0.2s, #24 +; CHECK-SD-NEXT: mov w8, v0.s[1] +; CHECK-SD-NEXT: fmov w9, s0 +; CHECK-SD-NEXT: strb w9, [x2] +; CHECK-SD-NEXT: strb w8, [x2, #1] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v2i8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr b0, [x0] +; CHECK-GI-NEXT: ldr b1, [x1] +; CHECK-GI-NEXT: add x8, x2, #1 +; CHECK-GI-NEXT: ldr b2, [x0, #1] +; CHECK-GI-NEXT: ldr b3, [x1, #1] +; CHECK-GI-NEXT: mov v0.b[0], v0.b[0] +; CHECK-GI-NEXT: mov v1.b[0], v1.b[0] +; CHECK-GI-NEXT: mov v0.b[1], v2.b[0] +; CHECK-GI-NEXT: mov v1.b[1], v3.b[0] +; CHECK-GI-NEXT: sqsub v0.8b, v0.8b, v1.8b +; CHECK-GI-NEXT: st1 { v0.b }[0], [x2] +; CHECK-GI-NEXT: st1 { v0.b }[1], [x8] +; CHECK-GI-NEXT: ret %x = load <2 x i8>, ptr %px %y = load <2 x i8>, ptr %py %z = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> %x, <2 x i8> %y) @@ -257,10 +268,10 @@ define void @v2i16(ptr %px, ptr %py, ptr %pz) nounwind { ; CHECK-GI-NEXT: add x9, x1, #2 ; CHECK-GI-NEXT: ld1 { v0.h }[1], [x8] ; CHECK-GI-NEXT: ld1 { v1.h }[1], [x9] +; CHECK-GI-NEXT: add x8, x2, #2 ; CHECK-GI-NEXT: sqsub v0.4h, v0.4h, v1.4h -; CHECK-GI-NEXT: mov h1, v0.h[1] ; CHECK-GI-NEXT: str h0, [x2] -; CHECK-GI-NEXT: str h1, [x2, #2] +; CHECK-GI-NEXT: st1 { v0.h }[1], [x8] ; CHECK-GI-NEXT: ret %x = load <2 x i16>, ptr %px %y = load <2 x i16>, ptr %py diff --git a/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll b/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll index e99935e8677fc..badd31c1c561c 100644 --- a/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll @@ -2,11 +2,6 @@ ; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK,CHECK-SD ; RUN: llc < %s -mtriple=aarch64-- -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI -; CHECK-GI: warning: Instruction selection used fallback path for v2i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i4 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i1 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i128 - declare <1 x i8> @llvm.uadd.sat.v1i8(<1 x i8>, <1 x i8>) declare <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8>, <2 x i8>) declare <4 x i8> @llvm.uadd.sat.v4i8(<4 x i8>, <4 x i8>) @@ -187,24 +182,40 @@ define void @v4i8(ptr %px, ptr %py, ptr %pz) nounwind { } define void @v2i8(ptr %px, ptr %py, ptr %pz) nounwind { -; CHECK-LABEL: v2i8: -; CHECK: // %bb.0: -; CHECK-NEXT: ldrb w8, [x0] -; CHECK-NEXT: ldrb w9, [x1] -; CHECK-NEXT: movi d2, #0x0000ff000000ff -; CHECK-NEXT: ldrb w10, [x0, #1] -; CHECK-NEXT: ldrb w11, [x1, #1] -; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: fmov s1, w9 -; CHECK-NEXT: mov v0.s[1], w10 -; CHECK-NEXT: mov v1.s[1], w11 -; CHECK-NEXT: add v0.2s, v0.2s, v1.2s -; CHECK-NEXT: umin v0.2s, v0.2s, v2.2s -; CHECK-NEXT: mov w8, v0.s[1] -; CHECK-NEXT: fmov w9, s0 -; CHECK-NEXT: strb w9, [x2] -; CHECK-NEXT: strb w8, [x2, #1] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v2i8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldrb w8, [x0] +; CHECK-SD-NEXT: ldrb w9, [x1] +; CHECK-SD-NEXT: movi d2, #0x0000ff000000ff +; CHECK-SD-NEXT: ldrb w10, [x0, #1] +; CHECK-SD-NEXT: ldrb w11, [x1, #1] +; CHECK-SD-NEXT: fmov s0, w8 +; CHECK-SD-NEXT: fmov s1, w9 +; CHECK-SD-NEXT: mov v0.s[1], w10 +; CHECK-SD-NEXT: mov v1.s[1], w11 +; CHECK-SD-NEXT: add v0.2s, v0.2s, v1.2s +; CHECK-SD-NEXT: umin v0.2s, v0.2s, v2.2s +; CHECK-SD-NEXT: mov w8, v0.s[1] +; CHECK-SD-NEXT: fmov w9, s0 +; CHECK-SD-NEXT: strb w9, [x2] +; CHECK-SD-NEXT: strb w8, [x2, #1] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v2i8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr b0, [x0] +; CHECK-GI-NEXT: ldr b1, [x1] +; CHECK-GI-NEXT: add x8, x2, #1 +; CHECK-GI-NEXT: ldr b2, [x0, #1] +; CHECK-GI-NEXT: ldr b3, [x1, #1] +; CHECK-GI-NEXT: mov v0.b[0], v0.b[0] +; CHECK-GI-NEXT: mov v1.b[0], v1.b[0] +; CHECK-GI-NEXT: mov v0.b[1], v2.b[0] +; CHECK-GI-NEXT: mov v1.b[1], v3.b[0] +; CHECK-GI-NEXT: uqadd v0.8b, v0.8b, v1.8b +; CHECK-GI-NEXT: st1 { v0.b }[0], [x2] +; CHECK-GI-NEXT: st1 { v0.b }[1], [x8] +; CHECK-GI-NEXT: ret %x = load <2 x i8>, ptr %px %y = load <2 x i8>, ptr %py %z = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> %x, <2 x i8> %y) @@ -255,10 +266,10 @@ define void @v2i16(ptr %px, ptr %py, ptr %pz) nounwind { ; CHECK-GI-NEXT: add x9, x1, #2 ; CHECK-GI-NEXT: ld1 { v0.h }[1], [x8] ; CHECK-GI-NEXT: ld1 { v1.h }[1], [x9] +; CHECK-GI-NEXT: add x8, x2, #2 ; CHECK-GI-NEXT: uqadd v0.4h, v0.4h, v1.4h -; CHECK-GI-NEXT: mov h1, v0.h[1] ; CHECK-GI-NEXT: str h0, [x2] -; CHECK-GI-NEXT: str h1, [x2, #2] +; CHECK-GI-NEXT: st1 { v0.h }[1], [x8] ; CHECK-GI-NEXT: ret %x = load <2 x i16>, ptr %px %y = load <2 x i16>, ptr %py diff --git a/llvm/test/CodeGen/AArch64/usub_sat_vec.ll b/llvm/test/CodeGen/AArch64/usub_sat_vec.ll index cdba9625431a5..45418b5c648fa 100644 --- a/llvm/test/CodeGen/AArch64/usub_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/usub_sat_vec.ll @@ -2,11 +2,6 @@ ; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK,CHECK-SD ; RUN: llc < %s -mtriple=aarch64-- -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI -; CHECK-GI: warning: Instruction selection used fallback path for v2i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i4 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i1 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i128 - declare <1 x i8> @llvm.usub.sat.v1i8(<1 x i8>, <1 x i8>) declare <2 x i8> @llvm.usub.sat.v2i8(<2 x i8>, <2 x i8>) declare <4 x i8> @llvm.usub.sat.v4i8(<4 x i8>, <4 x i8>) @@ -188,22 +183,38 @@ define void @v4i8(ptr %px, ptr %py, ptr %pz) nounwind { } define void @v2i8(ptr %px, ptr %py, ptr %pz) nounwind { -; CHECK-LABEL: v2i8: -; CHECK: // %bb.0: -; CHECK-NEXT: ldrb w8, [x0] -; CHECK-NEXT: ldrb w9, [x1] -; CHECK-NEXT: ldrb w10, [x0, #1] -; CHECK-NEXT: ldrb w11, [x1, #1] -; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: fmov s1, w9 -; CHECK-NEXT: mov v0.s[1], w10 -; CHECK-NEXT: mov v1.s[1], w11 -; CHECK-NEXT: uqsub v0.2s, v0.2s, v1.2s -; CHECK-NEXT: mov w8, v0.s[1] -; CHECK-NEXT: fmov w9, s0 -; CHECK-NEXT: strb w9, [x2] -; CHECK-NEXT: strb w8, [x2, #1] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v2i8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldrb w8, [x0] +; CHECK-SD-NEXT: ldrb w9, [x1] +; CHECK-SD-NEXT: ldrb w10, [x0, #1] +; CHECK-SD-NEXT: ldrb w11, [x1, #1] +; CHECK-SD-NEXT: fmov s0, w8 +; CHECK-SD-NEXT: fmov s1, w9 +; CHECK-SD-NEXT: mov v0.s[1], w10 +; CHECK-SD-NEXT: mov v1.s[1], w11 +; CHECK-SD-NEXT: uqsub v0.2s, v0.2s, v1.2s +; CHECK-SD-NEXT: mov w8, v0.s[1] +; CHECK-SD-NEXT: fmov w9, s0 +; CHECK-SD-NEXT: strb w9, [x2] +; CHECK-SD-NEXT: strb w8, [x2, #1] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v2i8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr b0, [x0] +; CHECK-GI-NEXT: ldr b1, [x1] +; CHECK-GI-NEXT: add x8, x2, #1 +; CHECK-GI-NEXT: ldr b2, [x0, #1] +; CHECK-GI-NEXT: ldr b3, [x1, #1] +; CHECK-GI-NEXT: mov v0.b[0], v0.b[0] +; CHECK-GI-NEXT: mov v1.b[0], v1.b[0] +; CHECK-GI-NEXT: mov v0.b[1], v2.b[0] +; CHECK-GI-NEXT: mov v1.b[1], v3.b[0] +; CHECK-GI-NEXT: uqsub v0.8b, v0.8b, v1.8b +; CHECK-GI-NEXT: st1 { v0.b }[0], [x2] +; CHECK-GI-NEXT: st1 { v0.b }[1], [x8] +; CHECK-GI-NEXT: ret %x = load <2 x i8>, ptr %px %y = load <2 x i8>, ptr %py %z = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> %x, <2 x i8> %y) @@ -252,10 +263,10 @@ define void @v2i16(ptr %px, ptr %py, ptr %pz) nounwind { ; CHECK-GI-NEXT: add x9, x1, #2 ; CHECK-GI-NEXT: ld1 { v0.h }[1], [x8] ; CHECK-GI-NEXT: ld1 { v1.h }[1], [x9] +; CHECK-GI-NEXT: add x8, x2, #2 ; CHECK-GI-NEXT: uqsub v0.4h, v0.4h, v1.4h -; CHECK-GI-NEXT: mov h1, v0.h[1] ; CHECK-GI-NEXT: str h0, [x2] -; CHECK-GI-NEXT: str h1, [x2, #2] +; CHECK-GI-NEXT: st1 { v0.h }[1], [x8] ; CHECK-GI-NEXT: ret %x = load <2 x i16>, ptr %px %y = load <2 x i16>, ptr %py diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-unmerge-values.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-unmerge-values.mir index 3b456ed248b3a..8300b2bc05e96 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-unmerge-values.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-unmerge-values.mir @@ -1151,14 +1151,13 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY]](<8 x s32>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s32>), [[UV3:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[UV]](<4 x s32>) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV2]](<2 x s32>) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV3]](<2 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>), [[UV2:%[0-9]+]]:_(<2 x s32>), [[UV3:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[COPY]](<8 x s32>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV]](<2 x s32>) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV1]](<2 x s32>) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<2 x s16>), [[TRUNC1]](<2 x s16>) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<2 x s32>), [[UV5:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[UV1]](<4 x s32>) - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV4]](<2 x s32>) - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV5]](<2 x s32>) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<2 x s32>), [[UV5:%[0-9]+]]:_(<2 x s32>), [[UV6:%[0-9]+]]:_(<2 x s32>), [[UV7:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[COPY]](<8 x s32>) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV6]](<2 x s32>) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV7]](<2 x s32>) ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[TRUNC2]](<2 x s16>), [[TRUNC3]](<2 x s16>) ; CHECK-NEXT: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<4 x s16>), implicit [[CONCAT_VECTORS1]](<4 x s16>) %0:_(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 @@ -1250,14 +1249,13 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s64>), [[UV1:%[0-9]+]]:_(<2 x s64>) = G_UNMERGE_VALUES [[COPY]](<4 x s64>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](<2 x s64>) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[UV2]](s64) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV3]](s64) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s64>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[UV]](s64) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV1]](s64) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[TRUNC]](s32), [[TRUNC1]](s32) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](<2 x s64>) - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[UV4]](s64) - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s32) = G_TRUNC [[UV5]](s64) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s64>) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[UV6]](s64) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s32) = G_TRUNC [[UV7]](s64) ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[TRUNC2]](s32), [[TRUNC3]](s32) ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<2 x s32>), implicit [[BUILD_VECTOR1]](<2 x s32>) %0:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 @@ -1300,21 +1298,20 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s64>), [[UV1:%[0-9]+]]:_(<2 x s64>) = G_UNMERGE_VALUES [[COPY]](<4 x s64>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](<2 x s64>) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[UV2]](s64) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s64>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[UV]](s64) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV3]](s64) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV1]](s64) ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC1]], [[C]] ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](<2 x s64>) - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[UV4]](s64) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s64>) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[UV6]](s64) ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[TRUNC2]], [[C]] - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s32) = G_TRUNC [[UV5]](s64) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s32) = G_TRUNC [[UV7]](s64) ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[TRUNC3]], [[C]] ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/image-waterfall-loop-O0.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/image-waterfall-loop-O0.ll index 10cbc56cc5fbe..de973481f8230 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/image-waterfall-loop-O0.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/image-waterfall-loop-O0.ll @@ -187,7 +187,6 @@ define <4 x float> @waterfall_loop(<8 x i32> %vgpr_srd) { ; CHECK-NEXT: v_mov_b32_e32 v3, s4 ; CHECK-NEXT: ; kill: killed $vgpr4 ; CHECK-NEXT: s_xor_saveexec_b32 s4, -1 -; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload ; CHECK-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:84 ; 4-byte Folded Reload ; CHECK-NEXT: s_mov_b32 exec_lo, s4 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-unmerge-values.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-unmerge-values.mir index 837f65d4bdec6..bec5f646b7839 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-unmerge-values.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-unmerge-values.mir @@ -171,9 +171,11 @@ body: | ; GCN-LABEL: name: test_unmerge_values_s_s64_s_s64_s64_s_s192 ; GCN: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[DEF:%[0-9]+]]:sgpr(s192) = G_IMPLICIT_DEF - ; GCN-NEXT: [[UV:%[0-9]+]]:sgpr(s64), [[UV1:%[0-9]+]]:sgpr(s64), [[UV2:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[DEF]](s192) - ; GCN-NEXT: S_ENDPGM 0, implicit [[UV]](s64), implicit [[UV1]](s64), implicit [[UV2]](s64) + ; GCN-NEXT: [[DEF:%[0-9]+]]:sgpr_192 = IMPLICIT_DEF + ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY [[DEF]].sub0_sub1 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY [[DEF]].sub2_sub3 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_64 = COPY [[DEF]].sub4_sub5 + ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]], implicit [[COPY1]], implicit [[COPY2]] %0:sgpr(s192) = G_IMPLICIT_DEF %1:sgpr(s64), %2:sgpr(s64), %3:sgpr(s64) = G_UNMERGE_VALUES %0 S_ENDPGM 0, implicit %1, implicit %2, implicit %3 @@ -292,11 +294,11 @@ body: | ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:sgpr_384(<12 x s32>) = G_CONCAT_VECTORS [[COPY]](<3 x s32>), [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>) ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_96(<3 x s32>) = COPY [[CONCAT_VECTORS]].sub0_sub1_sub2(<12 x s32>) ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_96(<3 x s32>) = COPY [[CONCAT_VECTORS]].sub3_sub4_sub5(<12 x s32>) - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_96(<3 x s32>), [[COPY5:%[0-9]+]]:sgpr_96(<3 x s32>), [[UV:%[0-9]+]]:sgpr_96(<3 x s32>), [[UV1:%[0-9]+]]:sgpr_96(<3 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s32>) - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2 = COPY [[COPY4]](<3 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5_sgpr6 = COPY [[COPY5]](<3 x s32>) - ; GCN-NEXT: $sgpr8_sgpr9_sgpr10 = COPY [[UV]](<3 x s32>) - ; GCN-NEXT: $sgpr12_sgpr13_sgpr14 = COPY [[UV1]](<3 x s32>) + ; GCN-NEXT: [[UV:%[0-9]+]]:sgpr_96(<3 x s32>), [[UV1:%[0-9]+]]:sgpr_96(<3 x s32>), [[UV2:%[0-9]+]]:sgpr_96(<3 x s32>), [[UV3:%[0-9]+]]:sgpr_96(<3 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s32>) + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2 = COPY [[UV]](<3 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5_sgpr6 = COPY [[UV1]](<3 x s32>) + ; GCN-NEXT: $sgpr8_sgpr9_sgpr10 = COPY [[UV2]](<3 x s32>) + ; GCN-NEXT: $sgpr12_sgpr13_sgpr14 = COPY [[UV3]](<3 x s32>) %0:sgpr(<3 x s32>) = COPY $sgpr0_sgpr1_sgpr2 %1:sgpr(<3 x s32>) = COPY $sgpr4_sgpr5_sgpr6 %2:sgpr(<3 x s32>) = COPY $sgpr8_sgpr9_sgpr10 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir index 31f28b50462b7..f2a88a21a286e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir @@ -7418,9 +7418,8 @@ body: | ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 - ; SI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>) - ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](<4 x s32>) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>) + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 @@ -7440,7 +7439,7 @@ body: | ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) + ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -7456,7 +7455,7 @@ body: | ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) + ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -7472,7 +7471,7 @@ body: | ; SI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) ; SI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; SI-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV5]](s32) + ; SI-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[COPY10]], [[C]](s32) ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) ; SI-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -7488,8 +7487,8 @@ body: | ; SI-NEXT: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) ; SI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64) - ; SI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](<4 x s32>) - ; SI-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[UV6]](s32) + ; SI-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>) + ; SI-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[UV12]](s32) ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[COPY13]], [[C]](s32) ; SI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) ; SI-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -7504,7 +7503,7 @@ body: | ; SI-NEXT: G_STORE [[LSHR12]](s32), [[PTR_ADD16]](p1) :: (store (s8) into unknown-address + 18, addrspace 1) ; SI-NEXT: G_STORE [[LSHR14]](s32), [[PTR_ADD18]](p1) :: (store (s8) into unknown-address + 19, addrspace 1) ; SI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64) - ; SI-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[UV7]](s32) + ; SI-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[UV13]](s32) ; SI-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[COPY16]], [[C]](s32) ; SI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C1]](s64) ; SI-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -7519,7 +7518,7 @@ body: | ; SI-NEXT: G_STORE [[LSHR15]](s32), [[PTR_ADD20]](p1) :: (store (s8) into unknown-address + 22, addrspace 1) ; SI-NEXT: G_STORE [[LSHR17]](s32), [[PTR_ADD22]](p1) :: (store (s8) into unknown-address + 23, addrspace 1) ; SI-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64) - ; SI-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[UV8]](s32) + ; SI-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[UV14]](s32) ; SI-NEXT: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[COPY19]], [[C]](s32) ; SI-NEXT: [[PTR_ADD24:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C1]](s64) ; SI-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -7534,7 +7533,7 @@ body: | ; SI-NEXT: G_STORE [[LSHR18]](s32), [[PTR_ADD24]](p1) :: (store (s8) into unknown-address + 26, addrspace 1) ; SI-NEXT: G_STORE [[LSHR20]](s32), [[PTR_ADD26]](p1) :: (store (s8) into unknown-address + 27, addrspace 1) ; SI-NEXT: [[PTR_ADD27:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C7]](s64) - ; SI-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY [[UV9]](s32) + ; SI-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY [[UV15]](s32) ; SI-NEXT: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[COPY22]], [[C]](s32) ; SI-NEXT: [[PTR_ADD28:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C1]](s64) ; SI-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -7565,9 +7564,8 @@ body: | ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 - ; VI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>) - ; VI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](<4 x s32>) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>) + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 @@ -7588,7 +7586,7 @@ body: | ; VI-NEXT: G_STORE [[ANYEXT1]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) + ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) @@ -7605,7 +7603,7 @@ body: | ; VI-NEXT: G_STORE [[ANYEXT3]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) + ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) @@ -7622,7 +7620,7 @@ body: | ; VI-NEXT: G_STORE [[ANYEXT5]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) ; VI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; VI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV5]](s32) + ; VI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY5]](s32) @@ -7639,8 +7637,8 @@ body: | ; VI-NEXT: G_STORE [[ANYEXT7]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) ; VI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; VI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](<4 x s32>) - ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV6]](s32) + ; VI-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>) + ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV12]](s32) ; VI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32) ; VI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) ; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY6]](s32) @@ -7656,7 +7654,7 @@ body: | ; VI-NEXT: [[ANYEXT9:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR14]](s16) ; VI-NEXT: G_STORE [[ANYEXT9]](s32), [[PTR_ADD18]](p1) :: (store (s8) into unknown-address + 19, addrspace 1) ; VI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) - ; VI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV7]](s32) + ; VI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV13]](s32) ; VI-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) ; VI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C1]](s64) ; VI-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[COPY7]](s32) @@ -7672,7 +7670,7 @@ body: | ; VI-NEXT: [[ANYEXT11:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR17]](s16) ; VI-NEXT: G_STORE [[ANYEXT11]](s32), [[PTR_ADD22]](p1) :: (store (s8) into unknown-address + 23, addrspace 1) ; VI-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64) - ; VI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UV8]](s32) + ; VI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UV14]](s32) ; VI-NEXT: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[COPY8]], [[C]](s32) ; VI-NEXT: [[PTR_ADD24:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C1]](s64) ; VI-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[COPY8]](s32) @@ -7688,7 +7686,7 @@ body: | ; VI-NEXT: [[ANYEXT13:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR20]](s16) ; VI-NEXT: G_STORE [[ANYEXT13]](s32), [[PTR_ADD26]](p1) :: (store (s8) into unknown-address + 27, addrspace 1) ; VI-NEXT: [[PTR_ADD27:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64) - ; VI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[UV9]](s32) + ; VI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[UV15]](s32) ; VI-NEXT: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[COPY9]], [[C]](s32) ; VI-NEXT: [[PTR_ADD28:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C1]](s64) ; VI-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) @@ -7730,9 +7728,8 @@ body: | ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 - ; SI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>) - ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](<4 x s32>) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>) + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 @@ -7741,47 +7738,47 @@ body: | ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) + ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) ; SI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) + ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV5]](s32) + ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) ; SI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; SI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](<4 x s32>) - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV6]](s32) + ; SI-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>) + ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV12]](s32) ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32) ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) ; SI-NEXT: G_STORE [[COPY6]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD8]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV7]](s32) + ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV13]](s32) ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C1]](s64) ; SI-NEXT: G_STORE [[COPY7]](s32), [[PTR_ADD9]](p1) :: (store (s16) into unknown-address + 20, addrspace 1) ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD10]](p1) :: (store (s16) into unknown-address + 22, addrspace 1) ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UV8]](s32) + ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UV14]](s32) ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY8]], [[C]](s32) ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) ; SI-NEXT: G_STORE [[COPY8]](s32), [[PTR_ADD11]](p1) :: (store (s16) into unknown-address + 24, addrspace 1) ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD12]](p1) :: (store (s16) into unknown-address + 26, addrspace 1) ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[UV9]](s32) + ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[UV15]](s32) ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[COPY9]], [[C]](s32) ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C1]](s64) ; SI-NEXT: G_STORE [[COPY9]](s32), [[PTR_ADD13]](p1) :: (store (s16) into unknown-address + 28, addrspace 1) @@ -7803,9 +7800,8 @@ body: | ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 - ; VI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>) - ; VI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](<4 x s32>) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>) + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 @@ -7814,47 +7810,47 @@ body: | ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) + ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) ; VI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) + ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) ; VI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) ; VI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV5]](s32) + ; VI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) ; VI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) ; VI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](<4 x s32>) - ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV6]](s32) + ; VI-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>) + ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV12]](s32) ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32) ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) ; VI-NEXT: G_STORE [[COPY6]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) ; VI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD8]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; VI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV7]](s32) + ; VI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV13]](s32) ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C1]](s64) ; VI-NEXT: G_STORE [[COPY7]](s32), [[PTR_ADD9]](p1) :: (store (s16) into unknown-address + 20, addrspace 1) ; VI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD10]](p1) :: (store (s16) into unknown-address + 22, addrspace 1) ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; VI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UV8]](s32) + ; VI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UV14]](s32) ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY8]], [[C]](s32) ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) ; VI-NEXT: G_STORE [[COPY8]](s32), [[PTR_ADD11]](p1) :: (store (s16) into unknown-address + 24, addrspace 1) ; VI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD12]](p1) :: (store (s16) into unknown-address + 26, addrspace 1) ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; VI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[UV9]](s32) + ; VI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[UV15]](s32) ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[COPY9]], [[C]](s32) ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C1]](s64) ; VI-NEXT: G_STORE [[COPY9]](s32), [[PTR_ADD13]](p1) :: (store (s16) into unknown-address + 28, addrspace 1) @@ -8107,9 +8103,8 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256) - ; SI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>) - ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](<4 x s32>) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>) + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 @@ -8129,7 +8124,7 @@ body: | ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) + ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -8145,7 +8140,7 @@ body: | ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) + ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -8161,7 +8156,7 @@ body: | ; SI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) ; SI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; SI-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV5]](s32) + ; SI-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[COPY10]], [[C]](s32) ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) ; SI-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -8177,8 +8172,8 @@ body: | ; SI-NEXT: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) ; SI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64) - ; SI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](<4 x s32>) - ; SI-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[UV6]](s32) + ; SI-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>) + ; SI-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[UV12]](s32) ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[COPY13]], [[C]](s32) ; SI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) ; SI-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -8193,7 +8188,7 @@ body: | ; SI-NEXT: G_STORE [[LSHR12]](s32), [[PTR_ADD16]](p1) :: (store (s8) into unknown-address + 18, addrspace 1) ; SI-NEXT: G_STORE [[LSHR14]](s32), [[PTR_ADD18]](p1) :: (store (s8) into unknown-address + 19, addrspace 1) ; SI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64) - ; SI-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[UV7]](s32) + ; SI-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[UV13]](s32) ; SI-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[COPY16]], [[C]](s32) ; SI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C1]](s64) ; SI-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -8208,7 +8203,7 @@ body: | ; SI-NEXT: G_STORE [[LSHR15]](s32), [[PTR_ADD20]](p1) :: (store (s8) into unknown-address + 22, addrspace 1) ; SI-NEXT: G_STORE [[LSHR17]](s32), [[PTR_ADD22]](p1) :: (store (s8) into unknown-address + 23, addrspace 1) ; SI-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64) - ; SI-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[UV8]](s32) + ; SI-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[UV14]](s32) ; SI-NEXT: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[COPY19]], [[C]](s32) ; SI-NEXT: [[PTR_ADD24:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C1]](s64) ; SI-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -8223,7 +8218,7 @@ body: | ; SI-NEXT: G_STORE [[LSHR18]](s32), [[PTR_ADD24]](p1) :: (store (s8) into unknown-address + 26, addrspace 1) ; SI-NEXT: G_STORE [[LSHR20]](s32), [[PTR_ADD26]](p1) :: (store (s8) into unknown-address + 27, addrspace 1) ; SI-NEXT: [[PTR_ADD27:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C7]](s64) - ; SI-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY [[UV9]](s32) + ; SI-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY [[UV15]](s32) ; SI-NEXT: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[COPY22]], [[C]](s32) ; SI-NEXT: [[PTR_ADD28:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C1]](s64) ; SI-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -8256,9 +8251,8 @@ body: | ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256) - ; VI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>) - ; VI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](<4 x s32>) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>) + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 @@ -8279,7 +8273,7 @@ body: | ; VI-NEXT: G_STORE [[ANYEXT1]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) + ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) @@ -8296,7 +8290,7 @@ body: | ; VI-NEXT: G_STORE [[ANYEXT3]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) + ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) @@ -8313,7 +8307,7 @@ body: | ; VI-NEXT: G_STORE [[ANYEXT5]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) ; VI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; VI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV5]](s32) + ; VI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY5]](s32) @@ -8330,8 +8324,8 @@ body: | ; VI-NEXT: G_STORE [[ANYEXT7]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) ; VI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; VI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](<4 x s32>) - ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV6]](s32) + ; VI-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>) + ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV12]](s32) ; VI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32) ; VI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) ; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY6]](s32) @@ -8347,7 +8341,7 @@ body: | ; VI-NEXT: [[ANYEXT9:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR14]](s16) ; VI-NEXT: G_STORE [[ANYEXT9]](s32), [[PTR_ADD18]](p1) :: (store (s8) into unknown-address + 19, addrspace 1) ; VI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) - ; VI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV7]](s32) + ; VI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV13]](s32) ; VI-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) ; VI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C1]](s64) ; VI-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[COPY7]](s32) @@ -8363,7 +8357,7 @@ body: | ; VI-NEXT: [[ANYEXT11:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR17]](s16) ; VI-NEXT: G_STORE [[ANYEXT11]](s32), [[PTR_ADD22]](p1) :: (store (s8) into unknown-address + 23, addrspace 1) ; VI-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64) - ; VI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UV8]](s32) + ; VI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UV14]](s32) ; VI-NEXT: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[COPY8]], [[C]](s32) ; VI-NEXT: [[PTR_ADD24:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C1]](s64) ; VI-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[COPY8]](s32) @@ -8379,7 +8373,7 @@ body: | ; VI-NEXT: [[ANYEXT13:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR20]](s16) ; VI-NEXT: G_STORE [[ANYEXT13]](s32), [[PTR_ADD26]](p1) :: (store (s8) into unknown-address + 27, addrspace 1) ; VI-NEXT: [[PTR_ADD27:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64) - ; VI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[UV9]](s32) + ; VI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[UV15]](s32) ; VI-NEXT: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[COPY9]], [[C]](s32) ; VI-NEXT: [[PTR_ADD28:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C1]](s64) ; VI-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) @@ -8423,9 +8417,8 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256) - ; SI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>) - ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](<4 x s32>) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>) + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 @@ -8434,47 +8427,47 @@ body: | ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) + ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) ; SI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) + ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV5]](s32) + ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) ; SI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; SI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](<4 x s32>) - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV6]](s32) + ; SI-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>) + ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV12]](s32) ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32) ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) ; SI-NEXT: G_STORE [[COPY6]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD8]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV7]](s32) + ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV13]](s32) ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C1]](s64) ; SI-NEXT: G_STORE [[COPY7]](s32), [[PTR_ADD9]](p1) :: (store (s16) into unknown-address + 20, addrspace 1) ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD10]](p1) :: (store (s16) into unknown-address + 22, addrspace 1) ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UV8]](s32) + ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UV14]](s32) ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY8]], [[C]](s32) ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) ; SI-NEXT: G_STORE [[COPY8]](s32), [[PTR_ADD11]](p1) :: (store (s16) into unknown-address + 24, addrspace 1) ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD12]](p1) :: (store (s16) into unknown-address + 26, addrspace 1) ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[UV9]](s32) + ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[UV15]](s32) ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[COPY9]], [[C]](s32) ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C1]](s64) ; SI-NEXT: G_STORE [[COPY9]](s32), [[PTR_ADD13]](p1) :: (store (s16) into unknown-address + 28, addrspace 1) @@ -8498,9 +8491,8 @@ body: | ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256) - ; VI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>) - ; VI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](<4 x s32>) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>) + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 @@ -8509,47 +8501,47 @@ body: | ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) + ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) ; VI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) + ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) ; VI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) ; VI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV5]](s32) + ; VI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) ; VI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) ; VI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](<4 x s32>) - ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV6]](s32) + ; VI-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>) + ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV12]](s32) ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32) ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) ; VI-NEXT: G_STORE [[COPY6]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) ; VI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD8]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; VI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV7]](s32) + ; VI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV13]](s32) ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C1]](s64) ; VI-NEXT: G_STORE [[COPY7]](s32), [[PTR_ADD9]](p1) :: (store (s16) into unknown-address + 20, addrspace 1) ; VI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD10]](p1) :: (store (s16) into unknown-address + 22, addrspace 1) ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; VI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UV8]](s32) + ; VI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UV14]](s32) ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY8]], [[C]](s32) ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) ; VI-NEXT: G_STORE [[COPY8]](s32), [[PTR_ADD11]](p1) :: (store (s16) into unknown-address + 24, addrspace 1) ; VI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD12]](p1) :: (store (s16) into unknown-address + 26, addrspace 1) ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; VI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[UV9]](s32) + ; VI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[UV15]](s32) ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[COPY9]], [[C]](s32) ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C1]](s64) ; VI-NEXT: G_STORE [[COPY9]](s32), [[PTR_ADD13]](p1) :: (store (s16) into unknown-address + 28, addrspace 1) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-trunc.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-trunc.mir index 5205386c8ea71..282550830442c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-trunc.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-trunc.mir @@ -151,21 +151,19 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s64>), [[UV1:%[0-9]+]]:_(<2 x s64>) = G_UNMERGE_VALUES [[COPY]](<4 x s64>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](<2 x s64>) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[UV2]](s64) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s64>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[UV]](s64) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV3]](s64) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV1]](s64) ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC1]], [[C]] ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](<2 x s64>) - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[UV4]](s64) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[UV2]](s64) ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[TRUNC2]], [[C]] - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s32) = G_TRUNC [[UV5]](s64) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s32) = G_TRUNC [[UV3]](s64) ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[TRUNC3]], [[C]] ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-demote-scc-branches.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-demote-scc-branches.ll new file mode 100644 index 0000000000000..9319f0d3f5d40 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-demote-scc-branches.ll @@ -0,0 +1,365 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=amdgcn -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10,GFX1010 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GFX10,GFX1030 %s + +define void @uniform_br_no_metadata(i32 noundef inreg %value, ptr addrspace(8) nocapture writeonly inreg %res, i32 noundef inreg %v_offset, i32 noundef inreg %0, i32 noundef inreg %flag) { +; GFX9-LABEL: uniform_br_no_metadata: +; GFX9: ; %bb.0: ; %entry +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_cmp_lt_i32 s21, 1 +; GFX9-NEXT: s_cbranch_scc1 .LBB0_2 +; GFX9-NEXT: ; %bb.1: ; %if.then +; GFX9-NEXT: s_mov_b32 s11, s18 +; GFX9-NEXT: s_mov_b32 s10, s17 +; GFX9-NEXT: s_mov_b32 s9, s16 +; GFX9-NEXT: s_mov_b32 s8, s7 +; GFX9-NEXT: v_mov_b32_e32 v0, s6 +; GFX9-NEXT: v_mov_b32_e32 v1, s19 +; GFX9-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen +; GFX9-NEXT: .LBB0_2: ; %if.end +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: uniform_br_no_metadata: +; GFX10: ; %bb.0: ; %entry +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_cmp_lt_i32 s21, 1 +; GFX10-NEXT: s_cbranch_scc1 .LBB0_2 +; GFX10-NEXT: ; %bb.1: ; %if.then +; GFX10-NEXT: v_mov_b32_e32 v0, s6 +; GFX10-NEXT: v_mov_b32_e32 v1, s19 +; GFX10-NEXT: s_mov_b32 s11, s18 +; GFX10-NEXT: s_mov_b32 s10, s17 +; GFX10-NEXT: s_mov_b32 s9, s16 +; GFX10-NEXT: s_mov_b32 s8, s7 +; GFX10-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen +; GFX10-NEXT: .LBB0_2: ; %if.end +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] +entry: + %cmp = icmp sgt i32 %flag, 0 + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 %value, ptr addrspace(8) %res, i32 %v_offset, i32 0, i32 0) + br label %if.end + +if.end: + call void @llvm.amdgcn.s.waitcnt(i32 0) + ret void +} + +define void @uniform_br_unprofitable(i32 noundef inreg %value, ptr addrspace(8) nocapture writeonly inreg %res, i32 noundef inreg %v_offset, i32 noundef inreg %0, i32 noundef inreg %flag) { +; GFX9-LABEL: uniform_br_unprofitable: +; GFX9: ; %bb.0: ; %entry +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_cmp_lt_i32 s21, 1 +; GFX9-NEXT: s_cbranch_scc1 .LBB1_2 +; GFX9-NEXT: ; %bb.1: ; %if.then +; GFX9-NEXT: s_mov_b32 s11, s18 +; GFX9-NEXT: s_mov_b32 s10, s17 +; GFX9-NEXT: s_mov_b32 s9, s16 +; GFX9-NEXT: s_mov_b32 s8, s7 +; GFX9-NEXT: v_mov_b32_e32 v0, s6 +; GFX9-NEXT: v_mov_b32_e32 v1, s19 +; GFX9-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen +; GFX9-NEXT: .LBB1_2: ; %if.end +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: uniform_br_unprofitable: +; GFX10: ; %bb.0: ; %entry +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_cmp_lt_i32 s21, 1 +; GFX10-NEXT: s_cbranch_scc1 .LBB1_2 +; GFX10-NEXT: ; %bb.1: ; %if.then +; GFX10-NEXT: v_mov_b32_e32 v0, s6 +; GFX10-NEXT: v_mov_b32_e32 v1, s19 +; GFX10-NEXT: s_mov_b32 s11, s18 +; GFX10-NEXT: s_mov_b32 s10, s17 +; GFX10-NEXT: s_mov_b32 s9, s16 +; GFX10-NEXT: s_mov_b32 s8, s7 +; GFX10-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen +; GFX10-NEXT: .LBB1_2: ; %if.end +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] +entry: + %cmp = icmp sgt i32 %flag, 0 + br i1 %cmp, label %if.then, label %if.end, !prof !0 + +if.then: + tail call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 %value, ptr addrspace(8) %res, i32 %v_offset, i32 0, i32 0) + br label %if.end + +if.end: + call void @llvm.amdgcn.s.waitcnt(i32 0) + ret void +} + +define void @uniform_br_profitable(i32 noundef inreg %value, ptr addrspace(8) nocapture writeonly inreg %res, i32 noundef inreg %v_offset, i32 noundef inreg %0, i32 noundef inreg %flag) { +; GFX9-LABEL: uniform_br_profitable: +; GFX9: ; %bb.0: ; %entry +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_cmp_lt_i32 s21, 1 +; GFX9-NEXT: s_cbranch_scc1 .LBB2_2 +; GFX9-NEXT: ; %bb.1: ; %if.then +; GFX9-NEXT: s_mov_b32 s11, s18 +; GFX9-NEXT: s_mov_b32 s10, s17 +; GFX9-NEXT: s_mov_b32 s9, s16 +; GFX9-NEXT: s_mov_b32 s8, s7 +; GFX9-NEXT: v_mov_b32_e32 v0, s6 +; GFX9-NEXT: v_mov_b32_e32 v1, s19 +; GFX9-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen +; GFX9-NEXT: .LBB2_2: ; %if.end +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: uniform_br_profitable: +; GFX10: ; %bb.0: ; %entry +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_cmp_lt_i32 s21, 1 +; GFX10-NEXT: s_cbranch_scc1 .LBB2_2 +; GFX10-NEXT: ; %bb.1: ; %if.then +; GFX10-NEXT: v_mov_b32_e32 v0, s6 +; GFX10-NEXT: v_mov_b32_e32 v1, s19 +; GFX10-NEXT: s_mov_b32 s11, s18 +; GFX10-NEXT: s_mov_b32 s10, s17 +; GFX10-NEXT: s_mov_b32 s9, s16 +; GFX10-NEXT: s_mov_b32 s8, s7 +; GFX10-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen +; GFX10-NEXT: .LBB2_2: ; %if.end +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] +entry: + %cmp = icmp sgt i32 %flag, 0 + br i1 %cmp, label %if.then, label %if.end, !prof !1 + +if.then: + tail call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 %value, ptr addrspace(8) %res, i32 %v_offset, i32 0, i32 0) + br label %if.end + +if.end: + call void @llvm.amdgcn.s.waitcnt(i32 0) + ret void +} + +define void @divergent_br_no_metadata(i32 noundef inreg %value, ptr addrspace(8) nocapture writeonly inreg %res, i32 noundef inreg %v_offset, i32 noundef inreg %0, i32 noundef %flag) { +; GFX9-LABEL: divergent_br_no_metadata: +; GFX9: ; %bb.0: ; %entry +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_lt_i32_e32 vcc, 0, v0 +; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc +; GFX9-NEXT: s_cbranch_execz .LBB3_2 +; GFX9-NEXT: ; %bb.1: ; %if.then +; GFX9-NEXT: s_mov_b32 s11, s18 +; GFX9-NEXT: s_mov_b32 s10, s17 +; GFX9-NEXT: s_mov_b32 s9, s16 +; GFX9-NEXT: s_mov_b32 s8, s7 +; GFX9-NEXT: v_mov_b32_e32 v0, s6 +; GFX9-NEXT: v_mov_b32_e32 v1, s19 +; GFX9-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen +; GFX9-NEXT: .LBB3_2: ; %if.end +; GFX9-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1010-LABEL: divergent_br_no_metadata: +; GFX1010: ; %bb.0: ; %entry +; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1010-NEXT: v_cmp_lt_i32_e32 vcc_lo, 0, v0 +; GFX1010-NEXT: s_and_saveexec_b32 s4, vcc_lo +; GFX1010-NEXT: s_cbranch_execz .LBB3_2 +; GFX1010-NEXT: ; %bb.1: ; %if.then +; GFX1010-NEXT: v_mov_b32_e32 v0, s6 +; GFX1010-NEXT: v_mov_b32_e32 v1, s19 +; GFX1010-NEXT: s_mov_b32 s11, s18 +; GFX1010-NEXT: s_mov_b32 s10, s17 +; GFX1010-NEXT: s_mov_b32 s9, s16 +; GFX1010-NEXT: s_mov_b32 s8, s7 +; GFX1010-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen +; GFX1010-NEXT: .LBB3_2: ; %if.end +; GFX1010-NEXT: s_waitcnt_depctr 0xffe3 +; GFX1010-NEXT: s_or_b32 exec_lo, exec_lo, s4 +; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1010-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-LABEL: divergent_br_no_metadata: +; GFX1030: ; %bb.0: ; %entry +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: s_mov_b32 s4, exec_lo +; GFX1030-NEXT: v_cmpx_lt_i32_e32 0, v0 +; GFX1030-NEXT: s_cbranch_execz .LBB3_2 +; GFX1030-NEXT: ; %bb.1: ; %if.then +; GFX1030-NEXT: v_mov_b32_e32 v0, s6 +; GFX1030-NEXT: v_mov_b32_e32 v1, s19 +; GFX1030-NEXT: s_mov_b32 s11, s18 +; GFX1030-NEXT: s_mov_b32 s10, s17 +; GFX1030-NEXT: s_mov_b32 s9, s16 +; GFX1030-NEXT: s_mov_b32 s8, s7 +; GFX1030-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen +; GFX1030-NEXT: .LBB3_2: ; %if.end +; GFX1030-NEXT: s_or_b32 exec_lo, exec_lo, s4 +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: s_setpc_b64 s[30:31] +entry: + %cmp = icmp sgt i32 %flag, 0 + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 %value, ptr addrspace(8) %res, i32 %v_offset, i32 0, i32 0) + br label %if.end + +if.end: + call void @llvm.amdgcn.s.waitcnt(i32 0) + ret void +} + +define void @divergent_br_unprofitable(i32 noundef inreg %value, ptr addrspace(8) nocapture writeonly inreg %res, i32 noundef inreg %v_offset, i32 noundef inreg %0, i32 noundef %flag) { +; GFX9-LABEL: divergent_br_unprofitable: +; GFX9: ; %bb.0: ; %entry +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_lt_i32_e32 vcc, 0, v0 +; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc +; GFX9-NEXT: s_cbranch_execz .LBB4_2 +; GFX9-NEXT: ; %bb.1: ; %if.then +; GFX9-NEXT: s_mov_b32 s11, s18 +; GFX9-NEXT: s_mov_b32 s10, s17 +; GFX9-NEXT: s_mov_b32 s9, s16 +; GFX9-NEXT: s_mov_b32 s8, s7 +; GFX9-NEXT: v_mov_b32_e32 v0, s6 +; GFX9-NEXT: v_mov_b32_e32 v1, s19 +; GFX9-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen +; GFX9-NEXT: .LBB4_2: ; %if.end +; GFX9-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1010-LABEL: divergent_br_unprofitable: +; GFX1010: ; %bb.0: ; %entry +; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1010-NEXT: v_cmp_lt_i32_e32 vcc_lo, 0, v0 +; GFX1010-NEXT: s_and_saveexec_b32 s4, vcc_lo +; GFX1010-NEXT: s_cbranch_execz .LBB4_2 +; GFX1010-NEXT: ; %bb.1: ; %if.then +; GFX1010-NEXT: v_mov_b32_e32 v0, s6 +; GFX1010-NEXT: v_mov_b32_e32 v1, s19 +; GFX1010-NEXT: s_mov_b32 s11, s18 +; GFX1010-NEXT: s_mov_b32 s10, s17 +; GFX1010-NEXT: s_mov_b32 s9, s16 +; GFX1010-NEXT: s_mov_b32 s8, s7 +; GFX1010-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen +; GFX1010-NEXT: .LBB4_2: ; %if.end +; GFX1010-NEXT: s_waitcnt_depctr 0xffe3 +; GFX1010-NEXT: s_or_b32 exec_lo, exec_lo, s4 +; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1010-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-LABEL: divergent_br_unprofitable: +; GFX1030: ; %bb.0: ; %entry +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: s_mov_b32 s4, exec_lo +; GFX1030-NEXT: v_cmpx_lt_i32_e32 0, v0 +; GFX1030-NEXT: s_cbranch_execz .LBB4_2 +; GFX1030-NEXT: ; %bb.1: ; %if.then +; GFX1030-NEXT: v_mov_b32_e32 v0, s6 +; GFX1030-NEXT: v_mov_b32_e32 v1, s19 +; GFX1030-NEXT: s_mov_b32 s11, s18 +; GFX1030-NEXT: s_mov_b32 s10, s17 +; GFX1030-NEXT: s_mov_b32 s9, s16 +; GFX1030-NEXT: s_mov_b32 s8, s7 +; GFX1030-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen +; GFX1030-NEXT: .LBB4_2: ; %if.end +; GFX1030-NEXT: s_or_b32 exec_lo, exec_lo, s4 +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: s_setpc_b64 s[30:31] +entry: + %cmp = icmp sgt i32 %flag, 0 + br i1 %cmp, label %if.then, label %if.end, !prof !0 + +if.then: + tail call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 %value, ptr addrspace(8) %res, i32 %v_offset, i32 0, i32 0) + br label %if.end + +if.end: + call void @llvm.amdgcn.s.waitcnt(i32 0) + ret void +} + +define void @divergent_br_profitable(i32 noundef inreg %value, ptr addrspace(8) nocapture writeonly inreg %res, i32 noundef inreg %v_offset, i32 noundef inreg %0, i32 noundef %flag) { +; GFX9-LABEL: divergent_br_profitable: +; GFX9: ; %bb.0: ; %entry +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_lt_i32_e32 vcc, 0, v0 +; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc +; GFX9-NEXT: s_cbranch_execz .LBB5_2 +; GFX9-NEXT: ; %bb.1: ; %if.then +; GFX9-NEXT: s_mov_b32 s11, s18 +; GFX9-NEXT: s_mov_b32 s10, s17 +; GFX9-NEXT: s_mov_b32 s9, s16 +; GFX9-NEXT: s_mov_b32 s8, s7 +; GFX9-NEXT: v_mov_b32_e32 v0, s6 +; GFX9-NEXT: v_mov_b32_e32 v1, s19 +; GFX9-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen +; GFX9-NEXT: .LBB5_2: ; %if.end +; GFX9-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1010-LABEL: divergent_br_profitable: +; GFX1010: ; %bb.0: ; %entry +; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1010-NEXT: v_cmp_lt_i32_e32 vcc_lo, 0, v0 +; GFX1010-NEXT: s_and_saveexec_b32 s4, vcc_lo +; GFX1010-NEXT: s_cbranch_execz .LBB5_2 +; GFX1010-NEXT: ; %bb.1: ; %if.then +; GFX1010-NEXT: v_mov_b32_e32 v0, s6 +; GFX1010-NEXT: v_mov_b32_e32 v1, s19 +; GFX1010-NEXT: s_mov_b32 s11, s18 +; GFX1010-NEXT: s_mov_b32 s10, s17 +; GFX1010-NEXT: s_mov_b32 s9, s16 +; GFX1010-NEXT: s_mov_b32 s8, s7 +; GFX1010-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen +; GFX1010-NEXT: .LBB5_2: ; %if.end +; GFX1010-NEXT: s_waitcnt_depctr 0xffe3 +; GFX1010-NEXT: s_or_b32 exec_lo, exec_lo, s4 +; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1010-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-LABEL: divergent_br_profitable: +; GFX1030: ; %bb.0: ; %entry +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: s_mov_b32 s4, exec_lo +; GFX1030-NEXT: v_cmpx_lt_i32_e32 0, v0 +; GFX1030-NEXT: s_cbranch_execz .LBB5_2 +; GFX1030-NEXT: ; %bb.1: ; %if.then +; GFX1030-NEXT: v_mov_b32_e32 v0, s6 +; GFX1030-NEXT: v_mov_b32_e32 v1, s19 +; GFX1030-NEXT: s_mov_b32 s11, s18 +; GFX1030-NEXT: s_mov_b32 s10, s17 +; GFX1030-NEXT: s_mov_b32 s9, s16 +; GFX1030-NEXT: s_mov_b32 s8, s7 +; GFX1030-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen +; GFX1030-NEXT: .LBB5_2: ; %if.end +; GFX1030-NEXT: s_or_b32 exec_lo, exec_lo, s4 +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: s_setpc_b64 s[30:31] +entry: + %cmp = icmp sgt i32 %flag, 0 + br i1 %cmp, label %if.then, label %if.end, !prof !1 + +if.then: + tail call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 %value, ptr addrspace(8) %res, i32 %v_offset, i32 0, i32 0) + br label %if.end + +if.end: + call void @llvm.amdgcn.s.waitcnt(i32 0) + ret void +} + +declare void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32, ptr addrspace(8) nocapture writeonly, i32, i32, i32 immarg) +declare void @llvm.amdgcn.s.waitcnt(i32) +declare i32 @llvm.amdgcn.workitem.id.x() + +!0 = !{!"branch_weights", i32 1000, i32 1000} +!1 = !{!"branch_weights", i32 2000, i32 1} diff --git a/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll b/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll index 51f9cf73488ee..67a084068941a 100644 --- a/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll +++ b/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll @@ -778,8 +778,8 @@ define amdgpu_kernel void @nested_if_else_if(ptr addrspace(1) nocapture %arg) { ; GCN-O0-NEXT: s_mov_b64 exec, s[0:1] ; GCN-O0-NEXT: s_cbranch_execz .LBB3_7 ; GCN-O0-NEXT: ; %bb.3: ; %bb.inner.then -; GCN-O0-NEXT: s_waitcnt expcnt(1) ; GCN-O0-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload +; GCN-O0-NEXT: s_waitcnt expcnt(1) ; GCN-O0-NEXT: buffer_load_dword v2, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload ; GCN-O0-NEXT: s_mov_b32 s0, 0xf000 ; GCN-O0-NEXT: s_mov_b32 s2, 0 @@ -824,8 +824,8 @@ define amdgpu_kernel void @nested_if_else_if(ptr addrspace(1) nocapture %arg) { ; GCN-O0-NEXT: s_mov_b64 exec, s[0:1] ; GCN-O0-NEXT: s_cbranch_execz .LBB3_6 ; GCN-O0-NEXT: ; %bb.5: ; %bb.inner.then2 -; GCN-O0-NEXT: s_waitcnt expcnt(1) ; GCN-O0-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload +; GCN-O0-NEXT: s_waitcnt expcnt(1) ; GCN-O0-NEXT: buffer_load_dword v2, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload ; GCN-O0-NEXT: s_mov_b32 s0, 0xf000 ; GCN-O0-NEXT: s_mov_b32 s2, 0 @@ -1242,10 +1242,13 @@ define void @scc_liveness(i32 %arg) local_unnamed_addr #0 { ; GCN-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload ; GCN-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload ; GCN-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) +; GCN-O0-NEXT: s_waitcnt vmcnt(3) ; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; GCN-O0-NEXT: s_waitcnt vmcnt(3) ; GCN-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; GCN-O0-NEXT: s_waitcnt vmcnt(3) ; GCN-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill +; GCN-O0-NEXT: s_waitcnt vmcnt(3) ; GCN-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill ; GCN-O0-NEXT: s_branch .LBB5_7 ; GCN-O0-NEXT: .LBB5_6: ; %Flow @@ -1263,10 +1266,13 @@ define void @scc_liveness(i32 %arg) local_unnamed_addr #0 { ; GCN-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload ; GCN-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload ; GCN-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) +; GCN-O0-NEXT: s_waitcnt vmcnt(3) ; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GCN-O0-NEXT: s_waitcnt vmcnt(3) ; GCN-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GCN-O0-NEXT: s_waitcnt vmcnt(3) ; GCN-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GCN-O0-NEXT: s_waitcnt vmcnt(3) ; GCN-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill ; GCN-O0-NEXT: s_branch .LBB5_5 ; GCN-O0-NEXT: .LBB5_7: ; %bb10 @@ -1336,10 +1342,13 @@ define void @scc_liveness(i32 %arg) local_unnamed_addr #0 { ; GCN-O0-NEXT: s_or_saveexec_b64 s[14:15], -1 ; GCN-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 ; 4-byte Folded Spill ; GCN-O0-NEXT: s_mov_b64 exec, s[14:15] -; GCN-O0-NEXT: s_waitcnt vmcnt(1) +; GCN-O0-NEXT: s_waitcnt vmcnt(4) ; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill +; GCN-O0-NEXT: s_waitcnt vmcnt(4) ; GCN-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill +; GCN-O0-NEXT: s_waitcnt vmcnt(4) ; GCN-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill +; GCN-O0-NEXT: s_waitcnt vmcnt(4) ; GCN-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill ; GCN-O0-NEXT: s_andn2_b64 exec, exec, s[4:5] ; GCN-O0-NEXT: s_cbranch_execnz .LBB5_1 @@ -1356,9 +1365,11 @@ define void @scc_liveness(i32 %arg) local_unnamed_addr #0 { ; GCN-O0-NEXT: s_or_saveexec_b64 s[14:15], -1 ; GCN-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GCN-O0-NEXT: s_mov_b64 exec, s[14:15] -; GCN-O0-NEXT: s_waitcnt expcnt(0) +; GCN-O0-NEXT: s_waitcnt expcnt(2) ; GCN-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload +; GCN-O0-NEXT: s_waitcnt expcnt(1) ; GCN-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload +; GCN-O0-NEXT: s_waitcnt expcnt(0) ; GCN-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload ; GCN-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload ; GCN-O0-NEXT: s_waitcnt vmcnt(0) diff --git a/llvm/test/CodeGen/AMDGPU/div_i128.ll b/llvm/test/CodeGen/AMDGPU/div_i128.ll index 712cecff40617..b541be9f5aa44 100644 --- a/llvm/test/CodeGen/AMDGPU/div_i128.ll +++ b/llvm/test/CodeGen/AMDGPU/div_i128.ll @@ -570,21 +570,21 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:140 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:144 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:148 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_waitcnt vmcnt(6) +; GFX9-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_nop 0 +; GFX9-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_waitcnt vmcnt(6) +; GFX9-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_nop 0 +; GFX9-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_waitcnt vmcnt(6) +; GFX9-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_nop 0 +; GFX9-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_waitcnt vmcnt(6) +; GFX9-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_nop 0 +; GFX9-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_branch .LBB0_5 ; GFX9-O0-NEXT: .LBB0_3: ; %Flow2 @@ -599,9 +599,9 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_waitcnt vmcnt(0) +; GFX9-O0-NEXT: s_waitcnt vmcnt(1) ; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:160 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_nop 0 +; GFX9-O0-NEXT: s_waitcnt vmcnt(1) ; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:164 ; 4-byte Folded Spill ; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:152 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 @@ -663,9 +663,9 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:100 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:88 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:92 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_waitcnt vmcnt(0) +; GFX9-O0-NEXT: s_waitcnt vmcnt(1) ; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:176 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_nop 0 +; GFX9-O0-NEXT: s_waitcnt vmcnt(1) ; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill ; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:168 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 @@ -1718,17 +1718,21 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-G-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:152 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:156 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:160 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: s_waitcnt vmcnt(4) +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-G-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: s_nop 0 +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-G-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-G-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-G-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: s_waitcnt vmcnt(4) +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: s_nop 0 +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: s_branch .LBB0_5 ; GFX9-G-O0-NEXT: .LBB0_3: ; %Flow2 @@ -1743,11 +1747,13 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-G-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0) +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) ; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:164 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: s_nop 0 +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) ; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:168 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) ; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:172 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) ; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:176 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: s_branch .LBB0_9 ; GFX9-G-O0-NEXT: .LBB0_4: ; %udiv-loop-exit @@ -1822,11 +1828,13 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-G-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:104 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:108 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:112 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0) +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) ; GFX9-G-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: s_nop 0 +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) ; GFX9-G-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:184 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) ; GFX9-G-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:188 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) ; GFX9-G-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:192 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:196 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: s_nop 0 @@ -2787,21 +2795,21 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:124 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:128 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:132 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_waitcnt vmcnt(6) +; GFX9-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_nop 0 +; GFX9-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_waitcnt vmcnt(6) +; GFX9-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_nop 0 +; GFX9-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_waitcnt vmcnt(6) +; GFX9-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_nop 0 +; GFX9-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_waitcnt vmcnt(6) +; GFX9-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_nop 0 +; GFX9-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_branch .LBB1_5 ; GFX9-O0-NEXT: .LBB1_3: ; %Flow2 @@ -2816,9 +2824,9 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_waitcnt vmcnt(0) +; GFX9-O0-NEXT: s_waitcnt vmcnt(1) ; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_nop 0 +; GFX9-O0-NEXT: s_waitcnt vmcnt(1) ; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill ; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 @@ -2880,9 +2888,9 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:84 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_waitcnt vmcnt(0) +; GFX9-O0-NEXT: s_waitcnt vmcnt(1) ; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:160 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_nop 0 +; GFX9-O0-NEXT: s_waitcnt vmcnt(1) ; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:164 ; 4-byte Folded Spill ; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:152 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 @@ -3846,17 +3854,21 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-G-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:124 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:128 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:132 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: s_waitcnt vmcnt(4) +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-G-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: s_nop 0 +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-G-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-G-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-G-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: s_waitcnt vmcnt(4) +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: s_nop 0 +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: s_branch .LBB1_5 ; GFX9-G-O0-NEXT: .LBB1_3: ; %Flow2 @@ -3871,11 +3883,13 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-G-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0) +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) ; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: s_nop 0 +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) ; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:140 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) ; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) ; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: s_branch .LBB1_9 ; GFX9-G-O0-NEXT: .LBB1_4: ; %udiv-loop-exit @@ -3950,11 +3964,13 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-G-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:84 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0) +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) ; GFX9-G-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:152 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: s_nop 0 +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) ; GFX9-G-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:156 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) ; GFX9-G-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:160 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) ; GFX9-G-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:164 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:168 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: s_nop 0 diff --git a/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll b/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll index b4fe112438b4f..60946956547a7 100644 --- a/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll +++ b/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll @@ -4111,7 +4111,6 @@ define amdgpu_kernel void @insert_neg_offset_vgpr(ptr addrspace(1) %in, ptr addr ; NOOPT-NEXT: s_or_saveexec_b64 s[16:17], -1 ; NOOPT-NEXT: buffer_store_dword v16, off, s[20:23], 0 offset:64 ; 4-byte Folded Spill ; NOOPT-NEXT: s_mov_b64 exec, s[16:17] -; NOOPT-NEXT: s_waitcnt expcnt(2) ; NOOPT-NEXT: buffer_store_dword v0, off, s[20:23], 0 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v1, off, s[20:23], 0 offset:4 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v2, off, s[20:23], 0 offset:8 ; 4-byte Folded Spill @@ -4137,7 +4136,6 @@ define amdgpu_kernel void @insert_neg_offset_vgpr(ptr addrspace(1) %in, ptr addr ; NOOPT-NEXT: s_waitcnt vmcnt(0) ; NOOPT-NEXT: v_readlane_b32 s0, v0, 6 ; NOOPT-NEXT: v_readlane_b32 s1, v0, 7 -; NOOPT-NEXT: s_waitcnt expcnt(0) ; NOOPT-NEXT: buffer_load_dword v1, off, s[20:23], 0 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v2, off, s[20:23], 0 offset:4 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v3, off, s[20:23], 0 offset:8 ; 4-byte Folded Reload @@ -4146,12 +4144,19 @@ define amdgpu_kernel void @insert_neg_offset_vgpr(ptr addrspace(1) %in, ptr addr ; NOOPT-NEXT: buffer_load_dword v6, off, s[20:23], 0 offset:20 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v7, off, s[20:23], 0 offset:24 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v8, off, s[20:23], 0 offset:28 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(6) ; NOOPT-NEXT: buffer_load_dword v9, off, s[20:23], 0 offset:32 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(5) ; NOOPT-NEXT: buffer_load_dword v10, off, s[20:23], 0 offset:36 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(4) ; NOOPT-NEXT: buffer_load_dword v11, off, s[20:23], 0 offset:40 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(3) ; NOOPT-NEXT: buffer_load_dword v12, off, s[20:23], 0 offset:44 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(2) ; NOOPT-NEXT: buffer_load_dword v13, off, s[20:23], 0 offset:48 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(1) ; NOOPT-NEXT: buffer_load_dword v14, off, s[20:23], 0 offset:52 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(0) ; NOOPT-NEXT: buffer_load_dword v15, off, s[20:23], 0 offset:56 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v16, off, s[20:23], 0 offset:60 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v17, off, s[20:23], 0 offset:68 ; 4-byte Folded Reload @@ -4178,7 +4183,6 @@ define amdgpu_kernel void @insert_neg_offset_vgpr(ptr addrspace(1) %in, ptr addr ; NOOPT-NEXT: buffer_store_dword v14, off, s[20:23], 0 offset:192 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v15, off, s[20:23], 0 offset:196 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v16, off, s[20:23], 0 offset:200 ; 4-byte Folded Spill -; NOOPT-NEXT: s_waitcnt expcnt(0) ; NOOPT-NEXT: buffer_store_dword v1, off, s[20:23], 0 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v2, off, s[20:23], 0 offset:4 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v3, off, s[20:23], 0 offset:8 ; 4-byte Folded Spill @@ -4597,7 +4601,6 @@ define amdgpu_kernel void @insert_neg_inline_offset_vgpr(ptr addrspace(1) %in, p ; NOOPT-NEXT: s_or_saveexec_b64 s[16:17], -1 ; NOOPT-NEXT: buffer_store_dword v16, off, s[20:23], 0 offset:64 ; 4-byte Folded Spill ; NOOPT-NEXT: s_mov_b64 exec, s[16:17] -; NOOPT-NEXT: s_waitcnt expcnt(2) ; NOOPT-NEXT: buffer_store_dword v0, off, s[20:23], 0 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v1, off, s[20:23], 0 offset:4 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v2, off, s[20:23], 0 offset:8 ; 4-byte Folded Spill @@ -4623,7 +4626,6 @@ define amdgpu_kernel void @insert_neg_inline_offset_vgpr(ptr addrspace(1) %in, p ; NOOPT-NEXT: s_waitcnt vmcnt(0) ; NOOPT-NEXT: v_readlane_b32 s0, v0, 6 ; NOOPT-NEXT: v_readlane_b32 s1, v0, 7 -; NOOPT-NEXT: s_waitcnt expcnt(0) ; NOOPT-NEXT: buffer_load_dword v1, off, s[20:23], 0 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v2, off, s[20:23], 0 offset:4 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v3, off, s[20:23], 0 offset:8 ; 4-byte Folded Reload @@ -4632,12 +4634,19 @@ define amdgpu_kernel void @insert_neg_inline_offset_vgpr(ptr addrspace(1) %in, p ; NOOPT-NEXT: buffer_load_dword v6, off, s[20:23], 0 offset:20 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v7, off, s[20:23], 0 offset:24 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v8, off, s[20:23], 0 offset:28 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(6) ; NOOPT-NEXT: buffer_load_dword v9, off, s[20:23], 0 offset:32 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(5) ; NOOPT-NEXT: buffer_load_dword v10, off, s[20:23], 0 offset:36 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(4) ; NOOPT-NEXT: buffer_load_dword v11, off, s[20:23], 0 offset:40 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(3) ; NOOPT-NEXT: buffer_load_dword v12, off, s[20:23], 0 offset:44 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(2) ; NOOPT-NEXT: buffer_load_dword v13, off, s[20:23], 0 offset:48 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(1) ; NOOPT-NEXT: buffer_load_dword v14, off, s[20:23], 0 offset:52 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(0) ; NOOPT-NEXT: buffer_load_dword v15, off, s[20:23], 0 offset:56 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v16, off, s[20:23], 0 offset:60 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v17, off, s[20:23], 0 offset:68 ; 4-byte Folded Reload @@ -4664,7 +4673,6 @@ define amdgpu_kernel void @insert_neg_inline_offset_vgpr(ptr addrspace(1) %in, p ; NOOPT-NEXT: buffer_store_dword v14, off, s[20:23], 0 offset:192 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v15, off, s[20:23], 0 offset:196 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v16, off, s[20:23], 0 offset:200 ; 4-byte Folded Spill -; NOOPT-NEXT: s_waitcnt expcnt(0) ; NOOPT-NEXT: buffer_store_dword v1, off, s[20:23], 0 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v2, off, s[20:23], 0 offset:4 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v3, off, s[20:23], 0 offset:8 ; 4-byte Folded Spill @@ -5912,7 +5920,6 @@ define amdgpu_kernel void @insert_vgpr_offset_multiple_in_block(ptr addrspace(1) ; NOOPT-NEXT: s_waitcnt vmcnt(0) ; NOOPT-NEXT: v_readlane_b32 s0, v0, 7 ; NOOPT-NEXT: v_readlane_b32 s1, v0, 8 -; NOOPT-NEXT: s_waitcnt expcnt(0) ; NOOPT-NEXT: buffer_load_dword v1, off, s[28:31], 0 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v2, off, s[28:31], 0 offset:4 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v3, off, s[28:31], 0 offset:8 ; 4-byte Folded Reload @@ -5921,12 +5928,19 @@ define amdgpu_kernel void @insert_vgpr_offset_multiple_in_block(ptr addrspace(1) ; NOOPT-NEXT: buffer_load_dword v6, off, s[28:31], 0 offset:20 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v7, off, s[28:31], 0 offset:24 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v8, off, s[28:31], 0 offset:28 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(6) ; NOOPT-NEXT: buffer_load_dword v9, off, s[28:31], 0 offset:32 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(5) ; NOOPT-NEXT: buffer_load_dword v10, off, s[28:31], 0 offset:36 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(4) ; NOOPT-NEXT: buffer_load_dword v11, off, s[28:31], 0 offset:40 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(3) ; NOOPT-NEXT: buffer_load_dword v12, off, s[28:31], 0 offset:44 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(2) ; NOOPT-NEXT: buffer_load_dword v13, off, s[28:31], 0 offset:48 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(1) ; NOOPT-NEXT: buffer_load_dword v14, off, s[28:31], 0 offset:52 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(0) ; NOOPT-NEXT: buffer_load_dword v15, off, s[28:31], 0 offset:56 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v16, off, s[28:31], 0 offset:60 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v17, off, s[28:31], 0 offset:72 ; 4-byte Folded Reload @@ -5953,7 +5967,6 @@ define amdgpu_kernel void @insert_vgpr_offset_multiple_in_block(ptr addrspace(1) ; NOOPT-NEXT: buffer_store_dword v14, off, s[28:31], 0 offset:140 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v15, off, s[28:31], 0 offset:144 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v16, off, s[28:31], 0 offset:148 ; 4-byte Folded Spill -; NOOPT-NEXT: s_waitcnt expcnt(0) ; NOOPT-NEXT: buffer_store_dword v1, off, s[28:31], 0 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v2, off, s[28:31], 0 offset:4 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v3, off, s[28:31], 0 offset:8 ; 4-byte Folded Spill @@ -6041,7 +6054,6 @@ define amdgpu_kernel void @insert_vgpr_offset_multiple_in_block(ptr addrspace(1) ; NOOPT-NEXT: s_waitcnt vmcnt(0) ; NOOPT-NEXT: v_readlane_b32 s0, v0, 11 ; NOOPT-NEXT: v_readlane_b32 s1, v0, 12 -; NOOPT-NEXT: s_waitcnt expcnt(0) ; NOOPT-NEXT: buffer_load_dword v1, off, s[28:31], 0 offset:152 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v2, off, s[28:31], 0 offset:156 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v3, off, s[28:31], 0 offset:160 ; 4-byte Folded Reload @@ -6050,12 +6062,19 @@ define amdgpu_kernel void @insert_vgpr_offset_multiple_in_block(ptr addrspace(1) ; NOOPT-NEXT: buffer_load_dword v6, off, s[28:31], 0 offset:172 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v7, off, s[28:31], 0 offset:176 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v8, off, s[28:31], 0 offset:180 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(6) ; NOOPT-NEXT: buffer_load_dword v9, off, s[28:31], 0 offset:184 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(5) ; NOOPT-NEXT: buffer_load_dword v10, off, s[28:31], 0 offset:188 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(4) ; NOOPT-NEXT: buffer_load_dword v11, off, s[28:31], 0 offset:192 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(3) ; NOOPT-NEXT: buffer_load_dword v12, off, s[28:31], 0 offset:196 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(2) ; NOOPT-NEXT: buffer_load_dword v13, off, s[28:31], 0 offset:200 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(1) ; NOOPT-NEXT: buffer_load_dword v14, off, s[28:31], 0 offset:204 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(0) ; NOOPT-NEXT: buffer_load_dword v15, off, s[28:31], 0 offset:208 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v16, off, s[28:31], 0 offset:212 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v17, off, s[28:31], 0 offset:216 ; 4-byte Folded Reload @@ -6082,7 +6101,6 @@ define amdgpu_kernel void @insert_vgpr_offset_multiple_in_block(ptr addrspace(1) ; NOOPT-NEXT: buffer_store_dword v14, off, s[28:31], 0 offset:272 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v15, off, s[28:31], 0 offset:276 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v16, off, s[28:31], 0 offset:280 ; 4-byte Folded Spill -; NOOPT-NEXT: s_waitcnt expcnt(0) ; NOOPT-NEXT: buffer_store_dword v1, off, s[28:31], 0 offset:152 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v2, off, s[28:31], 0 offset:156 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v3, off, s[28:31], 0 offset:160 ; 4-byte Folded Spill @@ -9175,7 +9193,6 @@ define amdgpu_kernel void @broken_phi_bb(i32 %arg, i32 %arg1) { ; NOOPT-NEXT: s_waitcnt vmcnt(0) ; NOOPT-NEXT: v_readlane_b32 s0, v0, 6 ; NOOPT-NEXT: v_readlane_b32 s1, v0, 7 -; NOOPT-NEXT: s_waitcnt expcnt(0) ; NOOPT-NEXT: buffer_load_dword v1, off, s[24:27], 0 offset:12 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v2, off, s[24:27], 0 offset:16 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v3, off, s[24:27], 0 offset:20 ; 4-byte Folded Reload @@ -9184,12 +9201,19 @@ define amdgpu_kernel void @broken_phi_bb(i32 %arg, i32 %arg1) { ; NOOPT-NEXT: buffer_load_dword v6, off, s[24:27], 0 offset:32 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v7, off, s[24:27], 0 offset:36 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v8, off, s[24:27], 0 offset:40 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(6) ; NOOPT-NEXT: buffer_load_dword v9, off, s[24:27], 0 offset:44 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(5) ; NOOPT-NEXT: buffer_load_dword v10, off, s[24:27], 0 offset:48 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(4) ; NOOPT-NEXT: buffer_load_dword v11, off, s[24:27], 0 offset:52 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(3) ; NOOPT-NEXT: buffer_load_dword v12, off, s[24:27], 0 offset:56 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(2) ; NOOPT-NEXT: buffer_load_dword v13, off, s[24:27], 0 offset:60 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(1) ; NOOPT-NEXT: buffer_load_dword v14, off, s[24:27], 0 offset:64 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(0) ; NOOPT-NEXT: buffer_load_dword v15, off, s[24:27], 0 offset:68 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v16, off, s[24:27], 0 offset:72 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v17, off, s[24:27], 0 offset:76 ; 4-byte Folded Reload @@ -9216,7 +9240,6 @@ define amdgpu_kernel void @broken_phi_bb(i32 %arg, i32 %arg1) { ; NOOPT-NEXT: buffer_store_dword v14, off, s[24:27], 0 offset:136 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v15, off, s[24:27], 0 offset:140 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v16, off, s[24:27], 0 offset:144 ; 4-byte Folded Spill -; NOOPT-NEXT: s_waitcnt expcnt(0) ; NOOPT-NEXT: buffer_store_dword v1, off, s[24:27], 0 offset:12 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v2, off, s[24:27], 0 offset:16 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v3, off, s[24:27], 0 offset:20 ; 4-byte Folded Spill @@ -9641,7 +9664,6 @@ define amdgpu_cs void @insert_or_disj_index(ptr addrspace(1) %out, ptr addrspace ; NOOPT-NEXT: s_or_saveexec_b64 s[12:13], -1 ; NOOPT-NEXT: buffer_store_dword v16, off, s[16:19], 0 offset:64 ; 4-byte Folded Spill ; NOOPT-NEXT: s_mov_b64 exec, s[12:13] -; NOOPT-NEXT: s_waitcnt expcnt(1) ; NOOPT-NEXT: buffer_store_dword v0, off, s[16:19], 0 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v1, off, s[16:19], 0 offset:4 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v2, off, s[16:19], 0 offset:8 ; 4-byte Folded Spill @@ -9667,7 +9689,6 @@ define amdgpu_cs void @insert_or_disj_index(ptr addrspace(1) %out, ptr addrspace ; NOOPT-NEXT: s_waitcnt vmcnt(0) ; NOOPT-NEXT: v_readlane_b32 s0, v0, 9 ; NOOPT-NEXT: v_readlane_b32 s1, v0, 10 -; NOOPT-NEXT: s_waitcnt expcnt(0) ; NOOPT-NEXT: buffer_load_dword v1, off, s[16:19], 0 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v2, off, s[16:19], 0 offset:4 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v3, off, s[16:19], 0 offset:8 ; 4-byte Folded Reload @@ -9676,12 +9697,19 @@ define amdgpu_cs void @insert_or_disj_index(ptr addrspace(1) %out, ptr addrspace ; NOOPT-NEXT: buffer_load_dword v6, off, s[16:19], 0 offset:20 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v7, off, s[16:19], 0 offset:24 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v8, off, s[16:19], 0 offset:28 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(6) ; NOOPT-NEXT: buffer_load_dword v9, off, s[16:19], 0 offset:32 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(5) ; NOOPT-NEXT: buffer_load_dword v10, off, s[16:19], 0 offset:36 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(4) ; NOOPT-NEXT: buffer_load_dword v11, off, s[16:19], 0 offset:40 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(3) ; NOOPT-NEXT: buffer_load_dword v12, off, s[16:19], 0 offset:44 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(2) ; NOOPT-NEXT: buffer_load_dword v13, off, s[16:19], 0 offset:48 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(1) ; NOOPT-NEXT: buffer_load_dword v14, off, s[16:19], 0 offset:52 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(0) ; NOOPT-NEXT: buffer_load_dword v15, off, s[16:19], 0 offset:56 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v16, off, s[16:19], 0 offset:60 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v17, off, s[16:19], 0 offset:144 ; 4-byte Folded Reload @@ -9708,7 +9736,6 @@ define amdgpu_cs void @insert_or_disj_index(ptr addrspace(1) %out, ptr addrspace ; NOOPT-NEXT: buffer_store_dword v14, off, s[16:19], 0 offset:200 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v15, off, s[16:19], 0 offset:204 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v16, off, s[16:19], 0 offset:208 ; 4-byte Folded Spill -; NOOPT-NEXT: s_waitcnt expcnt(0) ; NOOPT-NEXT: buffer_store_dword v1, off, s[16:19], 0 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v2, off, s[16:19], 0 offset:4 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v3, off, s[16:19], 0 offset:8 ; 4-byte Folded Spill diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.wait.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.wait.ll index a4be9ed8c2b4a..4fb28b392c9ea 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.wait.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.wait.ll @@ -768,17 +768,19 @@ define void @test5_s_barrier_init_m0(i32 %arg1 ,i32 %arg2) { } define amdgpu_kernel void @test1_s_barrier_join(ptr addrspace(1) %out) #0 { +; ; GFX12-SDAG-LABEL: test1_s_barrier_join: ; GFX12-SDAG: ; %bb.0: ; %entry ; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[2:3], 0x24 ; GFX12-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX12-SDAG-NEXT: s_barrier_join -1 -; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX12-SDAG-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_lshlrev_b32 v3, 2, v0 ; GFX12-SDAG-NEXT: v_mul_u32_u24_e32 v1, v0, v0 -; GFX12-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v0 ; GFX12-SDAG-NEXT: v_sub_nc_u32_e32 v0, v1, v0 ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 -; GFX12-SDAG-NEXT: global_store_b32 v2, v0, s[0:1] +; GFX12-SDAG-NEXT: global_store_b32 v3, v2, s[0:1] +; GFX12-SDAG-NEXT: s_barrier_join -1 +; GFX12-SDAG-NEXT: global_store_b32 v3, v0, s[0:1] ; GFX12-SDAG-NEXT: s_nop 0 ; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX12-SDAG-NEXT: s_endpgm @@ -810,17 +812,19 @@ entry: } define amdgpu_kernel void @test2_s_barrier_join(ptr addrspace(1) %out) #0 { +; ; GFX12-SDAG-LABEL: test2_s_barrier_join: ; GFX12-SDAG: ; %bb.0: ; %entry ; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[2:3], 0x24 ; GFX12-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX12-SDAG-NEXT: s_barrier_join 1 -; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX12-SDAG-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_lshlrev_b32 v3, 2, v0 ; GFX12-SDAG-NEXT: v_mul_u32_u24_e32 v1, v0, v0 -; GFX12-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v0 ; GFX12-SDAG-NEXT: v_sub_nc_u32_e32 v0, v1, v0 ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 -; GFX12-SDAG-NEXT: global_store_b32 v2, v0, s[0:1] +; GFX12-SDAG-NEXT: global_store_b32 v3, v2, s[0:1] +; GFX12-SDAG-NEXT: s_barrier_join 1 +; GFX12-SDAG-NEXT: global_store_b32 v3, v0, s[0:1] ; GFX12-SDAG-NEXT: s_nop 0 ; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX12-SDAG-NEXT: s_endpgm @@ -852,17 +856,19 @@ entry: } define amdgpu_kernel void @test3_s_barrier_join(ptr addrspace(1) %out) #0 { +; ; GFX12-SDAG-LABEL: test3_s_barrier_join: ; GFX12-SDAG: ; %bb.0: ; %entry ; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[2:3], 0x24 ; GFX12-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX12-SDAG-NEXT: s_barrier_join 0 -; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX12-SDAG-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_lshlrev_b32 v3, 2, v0 ; GFX12-SDAG-NEXT: v_mul_u32_u24_e32 v1, v0, v0 -; GFX12-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v0 ; GFX12-SDAG-NEXT: v_sub_nc_u32_e32 v0, v1, v0 ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 -; GFX12-SDAG-NEXT: global_store_b32 v2, v0, s[0:1] +; GFX12-SDAG-NEXT: global_store_b32 v3, v2, s[0:1] +; GFX12-SDAG-NEXT: s_barrier_join 0 +; GFX12-SDAG-NEXT: global_store_b32 v3, v0, s[0:1] ; GFX12-SDAG-NEXT: s_nop 0 ; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX12-SDAG-NEXT: s_endpgm @@ -967,6 +973,20 @@ define void @test5_s_barrier_join_m0(i32 %arg) { ret void } +define void @test6_s_barrier_join_0() { +; GFX12-LABEL: test6_s_barrier_join_0: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: s_barrier_join 0 +; GFX12-NEXT: s_setpc_b64 s[30:31] + call void @llvm.amdgcn.s.barrier.join(i32 0) + ret void +} + define amdgpu_kernel void @test1_s_barrier_leave(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %out) #0 { ; GFX12-SDAG-LABEL: test1_s_barrier_leave: ; GFX12-SDAG: ; %bb.0: ; %entry @@ -1026,17 +1046,19 @@ entry: } define amdgpu_kernel void @test1_s_wakeup_barrier(ptr addrspace(1) %out) #0 { +; ; GFX12-SDAG-LABEL: test1_s_wakeup_barrier: ; GFX12-SDAG: ; %bb.0: ; %entry ; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[2:3], 0x24 ; GFX12-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX12-SDAG-NEXT: s_wakeup_barrier -1 -; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX12-SDAG-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_lshlrev_b32 v3, 2, v0 ; GFX12-SDAG-NEXT: v_mul_u32_u24_e32 v1, v0, v0 -; GFX12-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v0 ; GFX12-SDAG-NEXT: v_sub_nc_u32_e32 v0, v1, v0 ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 -; GFX12-SDAG-NEXT: global_store_b32 v2, v0, s[0:1] +; GFX12-SDAG-NEXT: global_store_b32 v3, v2, s[0:1] +; GFX12-SDAG-NEXT: s_wakeup_barrier -1 +; GFX12-SDAG-NEXT: global_store_b32 v3, v0, s[0:1] ; GFX12-SDAG-NEXT: s_nop 0 ; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX12-SDAG-NEXT: s_endpgm @@ -1068,17 +1090,19 @@ entry: } define amdgpu_kernel void @test2_s_wakeup_barrier(ptr addrspace(1) %out) #0 { +; ; GFX12-SDAG-LABEL: test2_s_wakeup_barrier: ; GFX12-SDAG: ; %bb.0: ; %entry ; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[2:3], 0x24 ; GFX12-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX12-SDAG-NEXT: s_wakeup_barrier 1 -; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX12-SDAG-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_lshlrev_b32 v3, 2, v0 ; GFX12-SDAG-NEXT: v_mul_u32_u24_e32 v1, v0, v0 -; GFX12-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v0 ; GFX12-SDAG-NEXT: v_sub_nc_u32_e32 v0, v1, v0 ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 -; GFX12-SDAG-NEXT: global_store_b32 v2, v0, s[0:1] +; GFX12-SDAG-NEXT: global_store_b32 v3, v2, s[0:1] +; GFX12-SDAG-NEXT: s_wakeup_barrier 1 +; GFX12-SDAG-NEXT: global_store_b32 v3, v0, s[0:1] ; GFX12-SDAG-NEXT: s_nop 0 ; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX12-SDAG-NEXT: s_endpgm @@ -1110,17 +1134,19 @@ entry: } define amdgpu_kernel void @test3_s_wakeup_barrier(ptr addrspace(1) %out) #0 { +; ; GFX12-SDAG-LABEL: test3_s_wakeup_barrier: ; GFX12-SDAG: ; %bb.0: ; %entry ; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[2:3], 0x24 ; GFX12-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX12-SDAG-NEXT: s_wakeup_barrier 0 -; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX12-SDAG-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_lshlrev_b32 v3, 2, v0 ; GFX12-SDAG-NEXT: v_mul_u32_u24_e32 v1, v0, v0 -; GFX12-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v0 ; GFX12-SDAG-NEXT: v_sub_nc_u32_e32 v0, v1, v0 ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 -; GFX12-SDAG-NEXT: global_store_b32 v2, v0, s[0:1] +; GFX12-SDAG-NEXT: global_store_b32 v3, v2, s[0:1] +; GFX12-SDAG-NEXT: s_wakeup_barrier 0 +; GFX12-SDAG-NEXT: global_store_b32 v3, v0, s[0:1] ; GFX12-SDAG-NEXT: s_nop 0 ; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX12-SDAG-NEXT: s_endpgm @@ -1226,34 +1252,21 @@ define void @test5_s_wakeup_barrier_m0(i32 %arg) { } define amdgpu_kernel void @test1_s_get_barrier_state(ptr addrspace(1) %out) #0 { -; GFX12-SDAG-LABEL: test1_s_get_barrier_state: -; GFX12-SDAG: ; %bb.0: ; %entry -; GFX12-SDAG-NEXT: s_get_barrier_state s4, -1 -; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[2:3], 0x24 -; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 -; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX12-SDAG-NEXT: v_dual_mov_b32 v1, s4 :: v_dual_and_b32 v0, 0x3ff, v0 -; GFX12-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX12-SDAG-NEXT: global_store_b32 v0, v1, s[0:1] -; GFX12-SDAG-NEXT: s_nop 0 -; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) -; GFX12-SDAG-NEXT: s_endpgm -; -; GFX12-GISEL-LABEL: test1_s_get_barrier_state: -; GFX12-GISEL: ; %bb.0: ; %entry -; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[2:3], 0x24 -; GFX12-GISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0 -; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_2) -; GFX12-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 -; GFX12-GISEL-NEXT: global_store_b32 v0, v1, s[0:1] -; GFX12-GISEL-NEXT: s_get_barrier_state s2, -1 -; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 -; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, s2 -; GFX12-GISEL-NEXT: global_store_b32 v0, v1, s[0:1] -; GFX12-GISEL-NEXT: s_nop 0 -; GFX12-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) -; GFX12-GISEL-NEXT: s_endpgm +; GFX12-LABEL: test1_s_get_barrier_state: +; GFX12: ; %bb.0: ; %entry +; GFX12-NEXT: s_load_b64 s[0:1], s[2:3], 0x24 +; GFX12-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_2) +; GFX12-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: global_store_b32 v0, v1, s[0:1] +; GFX12-NEXT: s_get_barrier_state s2, -1 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_mov_b32_e32 v1, s2 +; GFX12-NEXT: global_store_b32 v0, v1, s[0:1] +; GFX12-NEXT: s_nop 0 +; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GFX12-NEXT: s_endpgm entry: %tmp = call i32 @llvm.amdgcn.workitem.id.x() %tmp1 = getelementptr i32, ptr addrspace(1) %out, i32 %tmp @@ -1264,34 +1277,21 @@ entry: } define amdgpu_kernel void @test2_s_get_barrier_state(ptr addrspace(1) %out) #0 { -; GFX12-SDAG-LABEL: test2_s_get_barrier_state: -; GFX12-SDAG: ; %bb.0: ; %entry -; GFX12-SDAG-NEXT: s_get_barrier_state s4, 1 -; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[2:3], 0x24 -; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 -; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX12-SDAG-NEXT: v_dual_mov_b32 v1, s4 :: v_dual_and_b32 v0, 0x3ff, v0 -; GFX12-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX12-SDAG-NEXT: global_store_b32 v0, v1, s[0:1] -; GFX12-SDAG-NEXT: s_nop 0 -; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) -; GFX12-SDAG-NEXT: s_endpgm -; -; GFX12-GISEL-LABEL: test2_s_get_barrier_state: -; GFX12-GISEL: ; %bb.0: ; %entry -; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[2:3], 0x24 -; GFX12-GISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0 -; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_2) -; GFX12-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 -; GFX12-GISEL-NEXT: global_store_b32 v0, v1, s[0:1] -; GFX12-GISEL-NEXT: s_get_barrier_state s2, 1 -; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 -; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, s2 -; GFX12-GISEL-NEXT: global_store_b32 v0, v1, s[0:1] -; GFX12-GISEL-NEXT: s_nop 0 -; GFX12-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) -; GFX12-GISEL-NEXT: s_endpgm +; GFX12-LABEL: test2_s_get_barrier_state: +; GFX12: ; %bb.0: ; %entry +; GFX12-NEXT: s_load_b64 s[0:1], s[2:3], 0x24 +; GFX12-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_2) +; GFX12-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: global_store_b32 v0, v1, s[0:1] +; GFX12-NEXT: s_get_barrier_state s2, 1 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_mov_b32_e32 v1, s2 +; GFX12-NEXT: global_store_b32 v0, v1, s[0:1] +; GFX12-NEXT: s_nop 0 +; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GFX12-NEXT: s_endpgm entry: %tmp = call i32 @llvm.amdgcn.workitem.id.x() %tmp1 = getelementptr i32, ptr addrspace(1) %out, i32 %tmp @@ -1302,34 +1302,21 @@ entry: } define amdgpu_kernel void @test3_s_get_barrier_state(ptr addrspace(1) %out) #0 { -; GFX12-SDAG-LABEL: test3_s_get_barrier_state: -; GFX12-SDAG: ; %bb.0: ; %entry -; GFX12-SDAG-NEXT: s_get_barrier_state s4, 0 -; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[2:3], 0x24 -; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 -; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX12-SDAG-NEXT: v_dual_mov_b32 v1, s4 :: v_dual_and_b32 v0, 0x3ff, v0 -; GFX12-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX12-SDAG-NEXT: global_store_b32 v0, v1, s[0:1] -; GFX12-SDAG-NEXT: s_nop 0 -; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) -; GFX12-SDAG-NEXT: s_endpgm -; -; GFX12-GISEL-LABEL: test3_s_get_barrier_state: -; GFX12-GISEL: ; %bb.0: ; %entry -; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[2:3], 0x24 -; GFX12-GISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0 -; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_2) -; GFX12-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 -; GFX12-GISEL-NEXT: global_store_b32 v0, v1, s[0:1] -; GFX12-GISEL-NEXT: s_get_barrier_state s2, 0 -; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 -; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, s2 -; GFX12-GISEL-NEXT: global_store_b32 v0, v1, s[0:1] -; GFX12-GISEL-NEXT: s_nop 0 -; GFX12-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) -; GFX12-GISEL-NEXT: s_endpgm +; GFX12-LABEL: test3_s_get_barrier_state: +; GFX12: ; %bb.0: ; %entry +; GFX12-NEXT: s_load_b64 s[0:1], s[2:3], 0x24 +; GFX12-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_2) +; GFX12-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: global_store_b32 v0, v1, s[0:1] +; GFX12-NEXT: s_get_barrier_state s2, 0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_mov_b32_e32 v1, s2 +; GFX12-NEXT: global_store_b32 v0, v1, s[0:1] +; GFX12-NEXT: s_nop 0 +; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GFX12-NEXT: s_endpgm entry: %tmp = call i32 @llvm.amdgcn.workitem.id.x() %tmp1 = getelementptr i32, ptr addrspace(1) %out, i32 %tmp @@ -1401,6 +1388,24 @@ define i32 @test5_s_get_barrier_state_m0(i32 %arg) { ret i32 %state } +define i32 @test6_s_get_barrier_state_0() { +; GFX12-LABEL: test6_s_get_barrier_state_0: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: s_get_barrier_state s0, 0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: s_wait_alu 0xfffe +; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX12-NEXT: v_mov_b32_e32 v0, s0 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %state = call i32 @llvm.amdgcn.s.get.barrier.state(i32 0) + ret i32 %state +} + define amdgpu_kernel void @test_barrier_convert(ptr addrspace(1) %out) #0 { ; GFX12-SDAG-LABEL: test_barrier_convert: ; GFX12-SDAG: ; %bb.0: ; %entry diff --git a/llvm/test/CodeGen/AMDGPU/load-global-i16.ll b/llvm/test/CodeGen/AMDGPU/load-global-i16.ll index fe5427048e8cf..e0c2d00891250 100644 --- a/llvm/test/CodeGen/AMDGPU/load-global-i16.ll +++ b/llvm/test/CodeGen/AMDGPU/load-global-i16.ll @@ -3569,7 +3569,6 @@ define amdgpu_kernel void @global_zextload_v64i16_to_v64i32(ptr addrspace(1) %ou ; GCN-NOHSA-SI-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:16 ; 4-byte Folded Reload ; GCN-NOHSA-SI-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:20 ; 4-byte Folded Reload ; GCN-NOHSA-SI-NEXT: buffer_load_dword v2, off, s[12:15], 0 offset:24 ; 4-byte Folded Reload -; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) ; GCN-NOHSA-SI-NEXT: buffer_load_dword v3, off, s[12:15], 0 offset:28 ; 4-byte Folded Reload ; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 @@ -3577,7 +3576,6 @@ define amdgpu_kernel void @global_zextload_v64i16_to_v64i32(ptr addrspace(1) %ou ; GCN-NOHSA-SI-NEXT: buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload ; GCN-NOHSA-SI-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload ; GCN-NOHSA-SI-NEXT: buffer_load_dword v2, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload -; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) ; GCN-NOHSA-SI-NEXT: buffer_load_dword v3, off, s[12:15], 0 offset:12 ; 4-byte Folded Reload ; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 @@ -4382,7 +4380,6 @@ define amdgpu_kernel void @global_sextload_v64i16_to_v64i32(ptr addrspace(1) %ou ; GCN-NOHSA-SI-NEXT: buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload ; GCN-NOHSA-SI-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload ; GCN-NOHSA-SI-NEXT: buffer_load_dword v2, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload -; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) ; GCN-NOHSA-SI-NEXT: buffer_load_dword v3, off, s[12:15], 0 offset:12 ; 4-byte Folded Reload ; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 @@ -7350,12 +7347,15 @@ define amdgpu_kernel void @global_zextload_v32i16_to_v32i64(ptr addrspace(1) %ou ; GCN-NOHSA-SI-NEXT: buffer_store_dword v13, off, s[12:15], 0 offset:20 ; 4-byte Folded Spill ; GCN-NOHSA-SI-NEXT: buffer_store_dword v14, off, s[12:15], 0 offset:24 ; 4-byte Folded Spill ; GCN-NOHSA-SI-NEXT: buffer_store_dword v15, off, s[12:15], 0 offset:28 ; 4-byte Folded Spill -; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) +; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(3) ; GCN-NOHSA-SI-NEXT: buffer_load_dword v12, off, s[12:15], 0 ; 4-byte Folded Reload +; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(2) ; GCN-NOHSA-SI-NEXT: buffer_load_dword v13, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload +; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(1) ; GCN-NOHSA-SI-NEXT: buffer_load_dword v14, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload -; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) +; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) ; GCN-NOHSA-SI-NEXT: buffer_load_dword v15, off, s[12:15], 0 offset:12 ; 4-byte Folded Reload +; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(2) ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v13, v39 ; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v15, v39 @@ -7378,7 +7378,6 @@ define amdgpu_kernel void @global_zextload_v32i16_to_v32i64(ptr addrspace(1) %ou ; GCN-NOHSA-SI-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:16 ; 4-byte Folded Reload ; GCN-NOHSA-SI-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:20 ; 4-byte Folded Reload ; GCN-NOHSA-SI-NEXT: buffer_load_dword v2, off, s[12:15], 0 offset:24 ; 4-byte Folded Reload -; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) ; GCN-NOHSA-SI-NEXT: buffer_load_dword v3, off, s[12:15], 0 offset:28 ; 4-byte Folded Reload ; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32 diff --git a/llvm/test/CodeGen/AMDGPU/load-global-i32.ll b/llvm/test/CodeGen/AMDGPU/load-global-i32.ll index 5ae2b91bdb3e7..4d7f1a9663c3d 100644 --- a/llvm/test/CodeGen/AMDGPU/load-global-i32.ll +++ b/llvm/test/CodeGen/AMDGPU/load-global-i32.ll @@ -3098,7 +3098,6 @@ define amdgpu_kernel void @global_sextload_v32i32_to_v32i64(ptr addrspace(1) %ou ; SI-NOHSA-NEXT: buffer_load_dword v8, off, s[12:15], 0 ; 4-byte Folded Reload ; SI-NOHSA-NEXT: buffer_load_dword v9, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload ; SI-NOHSA-NEXT: buffer_load_dword v10, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload -; SI-NOHSA-NEXT: s_waitcnt vmcnt(0) ; SI-NOHSA-NEXT: buffer_load_dword v11, off, s[12:15], 0 offset:12 ; 4-byte Folded Reload ; SI-NOHSA-NEXT: s_waitcnt vmcnt(0) ; SI-NOHSA-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:208 diff --git a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands-non-ptr-intrinsics.ll b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands-non-ptr-intrinsics.ll index 85d342bf303c0..c302233e748fd 100644 --- a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands-non-ptr-intrinsics.ll +++ b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands-non-ptr-intrinsics.ll @@ -244,7 +244,6 @@ define float @mubuf_vgpr(<4 x i32> %i, i32 %c) #0 { ; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload ; W64-O0-NEXT: ; kill: killed $vgpr1 ; W64-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; W64-O0-NEXT: s_waitcnt vmcnt(0) ; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload ; W64-O0-NEXT: s_nop 0 ; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload diff --git a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll index 42ed4c1f2e63d..dd6fd5aa384f6 100644 --- a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll +++ b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll @@ -258,7 +258,6 @@ define float @mubuf_vgpr(ptr addrspace(8) %i, i32 %c) #0 { ; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload ; W64-O0-NEXT: ; kill: killed $vgpr1 ; W64-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; W64-O0-NEXT: s_waitcnt vmcnt(0) ; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload ; W64-O0-NEXT: s_nop 0 ; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload diff --git a/llvm/test/CodeGen/AMDGPU/postra-sink-update-dependency.mir b/llvm/test/CodeGen/AMDGPU/postra-sink-update-dependency.mir new file mode 100644 index 0000000000000..14617e066f954 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/postra-sink-update-dependency.mir @@ -0,0 +1,66 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass=postra-machine-sink -verify-machineinstrs -o - %s | FileCheck %s +# +# In the example, the ` $sgpr4 = COPY $sgpr2` was incorrectly sunk into bb.3. This happened because we did not update +# register uses when we found that `$sgpr2 = COPY $sgpr3` should not be sunk because of conflict with the successor's +# prologue instructions. +--- +name: update_dependency_correctly +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: update_dependency_correctly + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $sgpr0, $sgpr3, $sgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $vgpr1 = IMPLICIT_DEF + ; CHECK-NEXT: renamable $sgpr4 = COPY $sgpr2 + ; CHECK-NEXT: renamable $sgpr2 = COPY $sgpr3 + ; CHECK-NEXT: $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr0, 0, $vgpr1 + ; CHECK-NEXT: $sgpr1 = S_AND_SAVEEXEC_B32 $sgpr0, implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.1, implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: S_ENDPGM 0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $sgpr0, $sgpr2, $sgpr4, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $sgpr3 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 0 + ; CHECK-NEXT: renamable $sgpr0_sgpr1 = S_GETPC_B64_pseudo + ; CHECK-NEXT: renamable $sgpr5 = COPY $sgpr1 + ; CHECK-NEXT: renamable $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM renamable $sgpr4_sgpr5, 32, 0 + ; CHECK-NEXT: S_BRANCH %bb.1 + bb.0: + successors: %bb.3(0x40000000), %bb.2(0x40000000) + liveins: $sgpr0, $sgpr3, $sgpr2 + + $vgpr1 = IMPLICIT_DEF + + renamable $sgpr4 = COPY $sgpr2 + renamable $sgpr2 = COPY $sgpr3 + + $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr0, 0, $vgpr1 + + $sgpr1 = S_AND_SAVEEXEC_B32 $sgpr0, implicit-def $exec, implicit-def $scc, implicit $exec + S_CBRANCH_EXECZ %bb.2, implicit $exec + S_BRANCH %bb.3 + + bb.2: + S_ENDPGM 0 + + bb.3: + successors: %bb.2(0x40000000) + liveins: $sgpr0, $sgpr2, $sgpr4, $vgpr1 + + $sgpr3 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 0 + + renamable $sgpr0_sgpr1 = S_GETPC_B64_pseudo + renamable $sgpr5 = COPY $sgpr1 + renamable $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM renamable $sgpr4_sgpr5, 32, 0 + + S_BRANCH %bb.2 + +... diff --git a/llvm/test/CodeGen/AMDGPU/rem_i128.ll b/llvm/test/CodeGen/AMDGPU/rem_i128.ll index fd6e06afc67da..19cc60963e900 100644 --- a/llvm/test/CodeGen/AMDGPU/rem_i128.ll +++ b/llvm/test/CodeGen/AMDGPU/rem_i128.ll @@ -609,21 +609,21 @@ define i128 @v_srem_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:168 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:172 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:176 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_waitcnt vmcnt(6) +; GFX9-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:140 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_nop 0 +; GFX9-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_waitcnt vmcnt(6) +; GFX9-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_nop 0 +; GFX9-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_waitcnt vmcnt(6) +; GFX9-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_nop 0 +; GFX9-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_waitcnt vmcnt(6) +; GFX9-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_nop 0 +; GFX9-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_branch .LBB0_5 ; GFX9-O0-NEXT: .LBB0_3: ; %Flow2 @@ -638,9 +638,9 @@ define i128 @v_srem_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_waitcnt vmcnt(0) +; GFX9-O0-NEXT: s_waitcnt vmcnt(1) ; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:188 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_nop 0 +; GFX9-O0-NEXT: s_waitcnt vmcnt(1) ; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:192 ; 4-byte Folded Spill ; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 @@ -702,9 +702,9 @@ define i128 @v_srem_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:128 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:116 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:120 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_waitcnt vmcnt(0) +; GFX9-O0-NEXT: s_waitcnt vmcnt(1) ; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:204 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_nop 0 +; GFX9-O0-NEXT: s_waitcnt vmcnt(1) ; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:208 ; 4-byte Folded Spill ; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:196 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 @@ -2007,21 +2007,21 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:156 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:160 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:164 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_waitcnt vmcnt(6) +; GFX9-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_nop 0 +; GFX9-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_waitcnt vmcnt(6) +; GFX9-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_nop 0 +; GFX9-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_waitcnt vmcnt(6) +; GFX9-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_nop 0 +; GFX9-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_waitcnt vmcnt(6) +; GFX9-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_nop 0 +; GFX9-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_branch .LBB1_5 ; GFX9-O0-NEXT: .LBB1_3: ; %Flow2 @@ -2036,9 +2036,9 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_waitcnt vmcnt(0) +; GFX9-O0-NEXT: s_waitcnt vmcnt(1) ; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:176 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_nop 0 +; GFX9-O0-NEXT: s_waitcnt vmcnt(1) ; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill ; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:168 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 @@ -2100,9 +2100,9 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:116 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:104 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:108 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_waitcnt vmcnt(0) +; GFX9-O0-NEXT: s_waitcnt vmcnt(1) ; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:192 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_nop 0 +; GFX9-O0-NEXT: s_waitcnt vmcnt(1) ; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:196 ; 4-byte Folded Spill ; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:184 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 diff --git a/llvm/test/CodeGen/AMDGPU/spill-wait.mir b/llvm/test/CodeGen/AMDGPU/spill-wait.mir index 8e896252af89b..6983a2742a41c 100644 --- a/llvm/test/CodeGen/AMDGPU/spill-wait.mir +++ b/llvm/test/CodeGen/AMDGPU/spill-wait.mir @@ -1,5 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 -# RUN: llc -mtriple=amdgcn -mcpu=gfx90a -run-pass si-insert-waitcnts %s -o - | FileCheck -check-prefix=GCN %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx90a -run-pass si-insert-waitcnts %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -run-pass si-insert-waitcnts %s -o - | FileCheck -check-prefix=GFX12 %s # There shall be no S_WAITCNT between two stores. @@ -10,14 +11,27 @@ body: | bb.0: liveins: $vgpr0_vgpr1, $sgpr76_sgpr77_sgpr78_sgpr79 - ; GCN-LABEL: name: spill_vgpr_tuple - ; GCN: liveins: $vgpr0_vgpr1, $sgpr76_sgpr77_sgpr78_sgpr79 - ; GCN-NEXT: {{ $}} - ; GCN-NEXT: S_WAITCNT 0 - ; GCN-NEXT: $vgpr64_vgpr65 = V_MOV_B64_e32 $vgpr0_vgpr1, implicit $exec - ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr64, $sgpr76_sgpr77_sgpr78_sgpr79, 0, 672, 0, 0, implicit $exec, implicit-def $vgpr64_vgpr65, implicit $vgpr64_vgpr65 - ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr65, $sgpr76_sgpr77_sgpr78_sgpr79, 0, 676, 0, 0, implicit $exec, implicit $vgpr64_vgpr65 - ; GCN-NEXT: S_ENDPGM 0 + ; GFX9-LABEL: name: spill_vgpr_tuple + ; GFX9: liveins: $vgpr0_vgpr1, $sgpr76_sgpr77_sgpr78_sgpr79 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: S_WAITCNT 0 + ; GFX9-NEXT: $vgpr64_vgpr65 = V_MOV_B64_e32 $vgpr0_vgpr1, implicit $exec + ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr64, $sgpr76_sgpr77_sgpr78_sgpr79, 0, 672, 0, 0, implicit $exec, implicit-def $vgpr64_vgpr65, implicit $vgpr64_vgpr65 + ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr65, $sgpr76_sgpr77_sgpr78_sgpr79, 0, 676, 0, 0, implicit $exec, implicit $vgpr64_vgpr65 + ; GFX9-NEXT: S_ENDPGM 0 + ; + ; GFX12-LABEL: name: spill_vgpr_tuple + ; GFX12: liveins: $vgpr0_vgpr1, $sgpr76_sgpr77_sgpr78_sgpr79 + ; GFX12-NEXT: {{ $}} + ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0 + ; GFX12-NEXT: S_WAIT_EXPCNT 0 + ; GFX12-NEXT: S_WAIT_SAMPLECNT 0 + ; GFX12-NEXT: S_WAIT_BVHCNT 0 + ; GFX12-NEXT: S_WAIT_KMCNT 0 + ; GFX12-NEXT: $vgpr64_vgpr65 = V_MOV_B64_e32 $vgpr0_vgpr1, implicit $exec + ; GFX12-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr64, $sgpr76_sgpr77_sgpr78_sgpr79, 0, 672, 0, 0, implicit $exec, implicit-def $vgpr64_vgpr65, implicit $vgpr64_vgpr65 + ; GFX12-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr65, $sgpr76_sgpr77_sgpr78_sgpr79, 0, 676, 0, 0, implicit $exec, implicit $vgpr64_vgpr65 + ; GFX12-NEXT: S_ENDPGM 0 $vgpr64_vgpr65 = V_MOV_B64_e32 $vgpr0_vgpr1, implicit $exec BUFFER_STORE_DWORD_OFFSET killed $vgpr64, $sgpr76_sgpr77_sgpr78_sgpr79, 0, 672, 0, 0, implicit $exec, implicit-def $vgpr64_vgpr65, implicit $vgpr64_vgpr65 BUFFER_STORE_DWORD_OFFSET $vgpr65, $sgpr76_sgpr77_sgpr78_sgpr79, 0, 676, 0, 0, implicit $exec, implicit $vgpr64_vgpr65 @@ -33,14 +47,27 @@ body: | bb.0: liveins: $vgpr0, $sgpr10_sgpr11 - ; GCN-LABEL: name: load_vcc_wait - ; GCN: liveins: $vgpr0, $sgpr10_sgpr11 - ; GCN-NEXT: {{ $}} - ; GCN-NEXT: S_WAITCNT 0 - ; GCN-NEXT: $vcc_lo = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 - ; GCN-NEXT: S_WAITCNT 49279 - ; GCN-NEXT: $vgpr1 = V_ADDC_U32_e32 0, $vgpr0, implicit-def $vcc, implicit $vcc, implicit $exec - ; GCN-NEXT: S_ENDPGM 0 + ; GFX9-LABEL: name: load_vcc_wait + ; GFX9: liveins: $vgpr0, $sgpr10_sgpr11 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: S_WAITCNT 0 + ; GFX9-NEXT: $vcc_lo = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 + ; GFX9-NEXT: S_WAITCNT 49279 + ; GFX9-NEXT: $vgpr1 = V_ADDC_U32_e32 0, $vgpr0, implicit-def $vcc, implicit $vcc, implicit $exec + ; GFX9-NEXT: S_ENDPGM 0 + ; + ; GFX12-LABEL: name: load_vcc_wait + ; GFX12: liveins: $vgpr0, $sgpr10_sgpr11 + ; GFX12-NEXT: {{ $}} + ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0 + ; GFX12-NEXT: S_WAIT_EXPCNT 0 + ; GFX12-NEXT: S_WAIT_SAMPLECNT 0 + ; GFX12-NEXT: S_WAIT_BVHCNT 0 + ; GFX12-NEXT: S_WAIT_KMCNT 0 + ; GFX12-NEXT: $vcc_lo = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 + ; GFX12-NEXT: S_WAIT_KMCNT 0 + ; GFX12-NEXT: $vgpr1 = V_ADDC_U32_e32 0, $vgpr0, implicit-def $vcc_lo, implicit $vcc_lo, implicit $exec + ; GFX12-NEXT: S_ENDPGM 0 $vcc_lo = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 $vgpr1 = V_ADDC_U32_e32 0, $vgpr0, implicit-def $vcc, implicit $vcc, implicit $exec S_ENDPGM 0 @@ -55,14 +82,27 @@ body: | bb.0: liveins: $sgpr10_sgpr11, $vgpr0_vgpr1 - ; GCN-LABEL: name: load_flat_scr_lo_flat_load_wait - ; GCN: liveins: $sgpr10_sgpr11, $vgpr0_vgpr1 - ; GCN-NEXT: {{ $}} - ; GCN-NEXT: S_WAITCNT 0 - ; GCN-NEXT: $flat_scr_lo = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 - ; GCN-NEXT: S_WAITCNT 49279 - ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr - ; GCN-NEXT: S_ENDPGM 0 + ; GFX9-LABEL: name: load_flat_scr_lo_flat_load_wait + ; GFX9: liveins: $sgpr10_sgpr11, $vgpr0_vgpr1 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: S_WAITCNT 0 + ; GFX9-NEXT: $flat_scr_lo = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 + ; GFX9-NEXT: S_WAITCNT 49279 + ; GFX9-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr + ; GFX9-NEXT: S_ENDPGM 0 + ; + ; GFX12-LABEL: name: load_flat_scr_lo_flat_load_wait + ; GFX12: liveins: $sgpr10_sgpr11, $vgpr0_vgpr1 + ; GFX12-NEXT: {{ $}} + ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0 + ; GFX12-NEXT: S_WAIT_EXPCNT 0 + ; GFX12-NEXT: S_WAIT_SAMPLECNT 0 + ; GFX12-NEXT: S_WAIT_BVHCNT 0 + ; GFX12-NEXT: S_WAIT_KMCNT 0 + ; GFX12-NEXT: $flat_scr_lo = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 + ; GFX12-NEXT: S_WAIT_KMCNT 0 + ; GFX12-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr + ; GFX12-NEXT: S_ENDPGM 0 $flat_scr_lo = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 $vgpr2 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr S_ENDPGM 0 @@ -75,15 +115,120 @@ body: | bb.0: liveins: $sgpr10_sgpr11, $vgpr0, $sgpr32 - ; GCN-LABEL: name: load_flat_scr_lo_scratch_store_wait - ; GCN: liveins: $sgpr10_sgpr11, $vgpr0, $sgpr32 - ; GCN-NEXT: {{ $}} - ; GCN-NEXT: S_WAITCNT 0 - ; GCN-NEXT: $flat_scr_hi = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 - ; GCN-NEXT: S_WAITCNT 49279 - ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr - ; GCN-NEXT: S_ENDPGM 0 + ; GFX9-LABEL: name: load_flat_scr_lo_scratch_store_wait + ; GFX9: liveins: $sgpr10_sgpr11, $vgpr0, $sgpr32 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: S_WAITCNT 0 + ; GFX9-NEXT: $flat_scr_hi = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 + ; GFX9-NEXT: S_WAITCNT 49279 + ; GFX9-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr + ; GFX9-NEXT: S_ENDPGM 0 + ; + ; GFX12-LABEL: name: load_flat_scr_lo_scratch_store_wait + ; GFX12: liveins: $sgpr10_sgpr11, $vgpr0, $sgpr32 + ; GFX12-NEXT: {{ $}} + ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0 + ; GFX12-NEXT: S_WAIT_EXPCNT 0 + ; GFX12-NEXT: S_WAIT_SAMPLECNT 0 + ; GFX12-NEXT: S_WAIT_BVHCNT 0 + ; GFX12-NEXT: S_WAIT_KMCNT 0 + ; GFX12-NEXT: $flat_scr_hi = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 + ; GFX12-NEXT: S_WAIT_KMCNT 0 + ; GFX12-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr + ; GFX12-NEXT: S_ENDPGM 0 $flat_scr_hi = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr S_ENDPGM 0 ... + +# Check that implicit spill defs do not force wait to zero on the first store + +--- +name: spill_load_store + +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32 + + ; GFX9-LABEL: name: spill_load_store + ; GFX9: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: S_WAITCNT 0 + ; GFX9-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX9-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec + ; GFX9-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec + ; GFX9-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX9-NEXT: S_WAITCNT 3955 + ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX9-NEXT: S_WAITCNT 3955 + ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec + ; GFX9-NEXT: S_WAITCNT 3955 + ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec + ; GFX9-NEXT: S_WAITCNT 3955 + ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX9-NEXT: S_ENDPGM 0 + ; + ; GFX12-LABEL: name: spill_load_store + ; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32 + ; GFX12-NEXT: {{ $}} + ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0 + ; GFX12-NEXT: S_WAIT_EXPCNT 0 + ; GFX12-NEXT: S_WAIT_SAMPLECNT 0 + ; GFX12-NEXT: S_WAIT_BVHCNT 0 + ; GFX12-NEXT: S_WAIT_KMCNT 0 + ; GFX12-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX12-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec + ; GFX12-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec + ; GFX12-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX12-NEXT: S_WAIT_LOADCNT 3 + ; GFX12-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX12-NEXT: S_WAIT_LOADCNT 2 + ; GFX12-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec + ; GFX12-NEXT: S_WAIT_LOADCNT 1 + ; GFX12-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec + ; GFX12-NEXT: S_WAIT_LOADCNT 0 + ; GFX12-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX12-NEXT: S_ENDPGM 0 + $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 + $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec + $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec + $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 + BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 + BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec + BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec + BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 + S_ENDPGM 0 +... + +# Make sure we have wait to mitigate WAW on gfx12 + +--- +name: scratch_load_waw +body: | + bb.0.entry: + liveins: $vgpr0, $sgpr0 + + ; GFX9-LABEL: name: scratch_load_waw + ; GFX9: liveins: $vgpr0, $sgpr0 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: S_WAITCNT 0 + ; GFX9-NEXT: $vgpr2 = SCRATCH_LOAD_DWORD $vgpr0, 0, 0, implicit $exec, implicit $flat_scr + ; GFX9-NEXT: $vgpr2 = SCRATCH_LOAD_SHORT_D16_HI_SADDR $sgpr0, 0, 0, $vgpr2, implicit $exec, implicit $flat_scr + ; GFX9-NEXT: S_ENDPGM 0 + ; + ; GFX12-LABEL: name: scratch_load_waw + ; GFX12: liveins: $vgpr0, $sgpr0 + ; GFX12-NEXT: {{ $}} + ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0 + ; GFX12-NEXT: S_WAIT_EXPCNT 0 + ; GFX12-NEXT: S_WAIT_SAMPLECNT 0 + ; GFX12-NEXT: S_WAIT_BVHCNT 0 + ; GFX12-NEXT: S_WAIT_KMCNT 0 + ; GFX12-NEXT: $vgpr2 = SCRATCH_LOAD_DWORD $vgpr0, 0, 0, implicit $exec, implicit $flat_scr + ; GFX12-NEXT: S_WAIT_LOADCNT 0 + ; GFX12-NEXT: $vgpr2 = SCRATCH_LOAD_SHORT_D16_HI_SADDR $sgpr0, 0, 0, $vgpr2, implicit $exec, implicit $flat_scr + ; GFX12-NEXT: S_ENDPGM 0 + $vgpr2 = SCRATCH_LOAD_DWORD $vgpr0, 0, 0, implicit $exec, implicit $flat_scr + $vgpr2 = SCRATCH_LOAD_SHORT_D16_HI_SADDR $sgpr0, 0, 0, $vgpr2, implicit $exec, implicit $flat_scr + S_ENDPGM 0 +... diff --git a/llvm/test/CodeGen/AMDGPU/waitcnt-agpr.mir b/llvm/test/CodeGen/AMDGPU/waitcnt-agpr.mir index d69cb448b95de..7a807260d142d 100644 --- a/llvm/test/CodeGen/AMDGPU/waitcnt-agpr.mir +++ b/llvm/test/CodeGen/AMDGPU/waitcnt-agpr.mir @@ -321,8 +321,8 @@ body: | ; GCN-NEXT: BUNDLE implicit-def $agpr0, implicit $vgpr2_vgpr3 { ; GCN-NEXT: $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr ; GCN-NEXT: } - ; GCN-NEXT: S_WAITCNT 112 ; GCN-NEXT: BUNDLE implicit $agpr0, implicit $vgpr2_vgpr3 { + ; GCN-NEXT: S_WAITCNT 112 ; GCN-NEXT: FLAT_STORE_DWORD $vgpr2_vgpr3, $agpr0, 0, 0, implicit $exec, implicit $flat_scr ; GCN-NEXT: } BUNDLE implicit-def $agpr0, implicit $vgpr2_vgpr3 { diff --git a/llvm/test/CodeGen/AMDGPU/waitcnt.mir b/llvm/test/CodeGen/AMDGPU/waitcnt.mir index 4051be18dd49f..8528de77533bf 100644 --- a/llvm/test/CodeGen/AMDGPU/waitcnt.mir +++ b/llvm/test/CodeGen/AMDGPU/waitcnt.mir @@ -301,8 +301,8 @@ body: | # CHECK: BUNDLE implicit-def $vgpr0, implicit $vgpr1_vgpr2 { # CHECK-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, implicit $exec, implicit $flat_scr # CHECK-NEXT: } -# CHECK-NEXT: S_WAITCNT 112 # CHECK-NEXT: BUNDLE implicit $vgpr0, implicit $vgpr1_vgpr2 { +# CHECK-NEXT: S_WAITCNT 112 # CHECK-NEXT: FLAT_STORE_DWORD $vgpr1_vgpr2, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr # CHECK-NEXT: } diff --git a/llvm/test/CodeGen/ARM/vbsl.ll b/llvm/test/CodeGen/ARM/vbsl.ll index d5aaf3e6f30bd..0ef725fc91b54 100644 --- a/llvm/test/CodeGen/ARM/vbsl.ll +++ b/llvm/test/CodeGen/ARM/vbsl.ll @@ -264,8 +264,7 @@ define <2 x i64> @test_vbslq_u64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) nounw define <8 x i8> @same_param_all(<8 x i8> %a, <8 x i8> %b) { ; CHECK-LABEL: same_param_all: ; CHECK: @ %bb.0: -; CHECK-NEXT: vorr d0, d1, d1 -; CHECK-NEXT: vbsl d0, d1, d1 +; CHECK-NEXT: vmov.f64 d0, d1 ; CHECK-NEXT: bx lr %vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %b, <8 x i8> %b, <8 x i8> %b) ret <8 x i8> %vbsl.i @@ -274,7 +273,7 @@ define <8 x i8> @same_param_all(<8 x i8> %a, <8 x i8> %b) { define <8 x i8> @same_param_12(<8 x i8> %a, <8 x i8> %b) { ; CHECK-LABEL: same_param_12: ; CHECK: @ %bb.0: -; CHECK-NEXT: vbsl d0, d1, d1 +; CHECK-NEXT: vmov.f64 d0, d1 ; CHECK-NEXT: bx lr %vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %b) ret <8 x i8> %vbsl.i diff --git a/llvm/test/CodeGen/BPF/BTF/atomics.ll b/llvm/test/CodeGen/BPF/BTF/atomics.ll new file mode 100644 index 0000000000000..2c02110f24c0d --- /dev/null +++ b/llvm/test/CodeGen/BPF/BTF/atomics.ll @@ -0,0 +1,151 @@ +; RUN: llc -march=bpfel -mcpu=v3 -filetype=obj -o %t1 %s +; RUN: llvm-objcopy --dump-section='.BTF'=%t2 %t1 +; RUN: %python %p/print_btf.py %t2 | FileCheck -check-prefixes=CHECK %s +; +; Source: +; #include +; struct gstruct_t { +; _Atomic int a; +; } gstruct; +; extern _Atomic int ext; +; _Atomic int gbl; +; _Atomic int *pgbl; +; volatile _Atomic int vvar; +; _Atomic int __attribute__((btf_type_tag("foo"))) *tagptr1; +; volatile __attribute__((btf_type_tag("foo"))) _Atomic int *tagptr2; +; _Atomic int foo(_Atomic int a1, _Atomic int *p1) { +; (void)__c11_atomic_fetch_add(&gstruct.a, 1, memory_order_relaxed); +; (void)__c11_atomic_fetch_add(&ext, 1, memory_order_relaxed); +; (void)__c11_atomic_fetch_add(&gbl, 1, memory_order_relaxed); +; (void)__c11_atomic_fetch_add(pgbl, 1, memory_order_relaxed); +; (void)__c11_atomic_fetch_add(&vvar, 1, memory_order_relaxed); +; (void)__c11_atomic_fetch_add(p1, 1, memory_order_relaxed); +; +; return a1; +; } + +target triple = "bpf" + +%struct.gstruct_t = type { i32 } + +@gstruct = dso_local global %struct.gstruct_t zeroinitializer, align 4, !dbg !0 +@ext = external dso_local global i32, align 4, !dbg !34 +@gbl = dso_local global i32 0, align 4, !dbg !16 +@pgbl = dso_local local_unnamed_addr global ptr null, align 8, !dbg !20 +@vvar = dso_local global i32 0, align 4, !dbg !23 +@tagptr1 = dso_local local_unnamed_addr global ptr null, align 8, !dbg !26 +@tagptr2 = dso_local local_unnamed_addr global ptr null, align 8, !dbg !31 + +; Function Attrs: mustprogress nofree norecurse nounwind willreturn +define dso_local i32 @foo(i32 returned %a1, ptr nocapture noundef %p1) local_unnamed_addr #0 !dbg !45 { +entry: + #dbg_value(i32 %a1, !49, !DIExpression(), !51) + #dbg_value(ptr %p1, !50, !DIExpression(), !51) + %0 = atomicrmw add ptr @gstruct, i32 1 monotonic, align 4, !dbg !52 + %1 = atomicrmw add ptr @ext, i32 1 monotonic, align 4, !dbg !53 + %2 = atomicrmw add ptr @gbl, i32 1 monotonic, align 4, !dbg !54 + %3 = load ptr, ptr @pgbl, align 8, !dbg !55, !tbaa !56 + %4 = atomicrmw add ptr %3, i32 1 monotonic, align 4, !dbg !60 + %5 = atomicrmw volatile add ptr @vvar, i32 1 monotonic, align 4, !dbg !61 + %6 = atomicrmw add ptr %p1, i32 1 monotonic, align 4, !dbg !62 + ret i32 %a1, !dbg !63 +} + +; CHECK: [1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED +; CHECK-NEXT: [2] PTR '(anon)' type_id=1 +; CHECK-NEXT: [3] FUNC_PROTO '(anon)' ret_type_id=1 vlen=2 +; CHECK-NEXT: 'a1' type_id=1 +; CHECK-NEXT: 'p1' type_id=2 +; CHECK-NEXT: [4] FUNC 'foo' type_id=3 linkage=global +; CHECK-NEXT: [5] STRUCT 'gstruct_t' size=4 vlen=1 +; CHECK-NEXT: 'a' type_id=1 bits_offset=0 +; CHECK-NEXT: [6] VAR 'gstruct' type_id=5, linkage=global +; CHECK-NEXT: [7] VAR 'ext' type_id=1, linkage=extern +; CHECK-NEXT: [8] VAR 'gbl' type_id=1, linkage=global +; CHECK-NEXT: [9] VAR 'pgbl' type_id=2, linkage=global +; CHECK-NEXT: [10] VOLATILE '(anon)' type_id=1 +; CHECK-NEXT: [11] VAR 'vvar' type_id=10, linkage=global +; CHECK-NEXT: [12] TYPE_TAG 'foo' type_id=1 +; CHECK-NEXT: [13] PTR '(anon)' type_id=12 +; CHECK-NEXT: [14] VAR 'tagptr1' type_id=13, linkage=global +; CHECK-NEXT: [15] TYPE_TAG 'foo' type_id=10 +; CHECK-NEXT: [16] PTR '(anon)' type_id=15 +; CHECK-NEXT: [17] VAR 'tagptr2' type_id=16, linkage=global +; CHECK-NEXT: [18] DATASEC '.bss' size=0 vlen=6 +; CHECK-NEXT: type_id=6 offset=0 size=4 +; CHECK-NEXT: type_id=8 offset=0 size=4 +; CHECK-NEXT: type_id=9 offset=0 size=8 +; CHECK-NEXT: type_id=11 offset=0 size=4 +; CHECK-NEXT: type_id=14 offset=0 size=8 +; CHECK-NEXT: type_id=17 offset=0 size=8 + +attributes #0 = { mustprogress nofree norecurse nounwind willreturn "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } + +!llvm.dbg.cu = !{!2} +!llvm.module.flags = !{!39, !40, !41, !42, !43} +!llvm.ident = !{!44} + +!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) +!1 = distinct !DIGlobalVariable(name: "gstruct", scope: !2, file: !3, line: 4, type: !36, isLocal: false, isDefinition: true) +!2 = distinct !DICompileUnit(language: DW_LANG_C11, file: !3, producer: "clang version 20.0.0git (git@github.com:yonghong-song/llvm-project.git 96b5b6e527c024bea84f07ea11d4b3ff63468c22)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, globals: !15, splitDebugInlining: false, nameTableKind: None) +!3 = !DIFile(filename: "test6.c", directory: "/tmp/home/yhs/tmp3", checksumkind: CSK_MD5, checksum: "e743f2985da6027dcc5e048bd1dcccca") +!4 = !{!5} +!5 = !DICompositeType(tag: DW_TAG_enumeration_type, name: "memory_order", file: !6, line: 68, baseType: !7, size: 32, elements: !8) +!6 = !DIFile(filename: "work/yhs/llvm-project/llvm/build/install/lib/clang/20/include/stdatomic.h", directory: "/home/yhs", checksumkind: CSK_MD5, checksum: "f17199a988fe91afffaf0f943ef87096") +!7 = !DIBasicType(name: "unsigned int", size: 32, encoding: DW_ATE_unsigned) +!8 = !{!9, !10, !11, !12, !13, !14} +!9 = !DIEnumerator(name: "memory_order_relaxed", value: 0) +!10 = !DIEnumerator(name: "memory_order_consume", value: 1) +!11 = !DIEnumerator(name: "memory_order_acquire", value: 2) +!12 = !DIEnumerator(name: "memory_order_release", value: 3) +!13 = !DIEnumerator(name: "memory_order_acq_rel", value: 4) +!14 = !DIEnumerator(name: "memory_order_seq_cst", value: 5) +!15 = !{!0, !16, !20, !23, !26, !31, !34} +!16 = !DIGlobalVariableExpression(var: !17, expr: !DIExpression()) +!17 = distinct !DIGlobalVariable(name: "gbl", scope: !2, file: !3, line: 6, type: !18, isLocal: false, isDefinition: true) +!18 = !DIDerivedType(tag: DW_TAG_atomic_type, baseType: !19) +!19 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!20 = !DIGlobalVariableExpression(var: !21, expr: !DIExpression()) +!21 = distinct !DIGlobalVariable(name: "pgbl", scope: !2, file: !3, line: 7, type: !22, isLocal: false, isDefinition: true) +!22 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !18, size: 64) +!23 = !DIGlobalVariableExpression(var: !24, expr: !DIExpression()) +!24 = distinct !DIGlobalVariable(name: "vvar", scope: !2, file: !3, line: 8, type: !25, isLocal: false, isDefinition: true) +!25 = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: !18) +!26 = !DIGlobalVariableExpression(var: !27, expr: !DIExpression()) +!27 = distinct !DIGlobalVariable(name: "tagptr1", scope: !2, file: !3, line: 9, type: !28, isLocal: false, isDefinition: true) +!28 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !18, size: 64, annotations: !29) +!29 = !{!30} +!30 = !{!"btf_type_tag", !"foo"} +!31 = !DIGlobalVariableExpression(var: !32, expr: !DIExpression()) +!32 = distinct !DIGlobalVariable(name: "tagptr2", scope: !2, file: !3, line: 10, type: !33, isLocal: false, isDefinition: true) +!33 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !25, size: 64, annotations: !29) +!34 = !DIGlobalVariableExpression(var: !35, expr: !DIExpression()) +!35 = distinct !DIGlobalVariable(name: "ext", scope: !2, file: !3, line: 5, type: !18, isLocal: false, isDefinition: false) +!36 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "gstruct_t", file: !3, line: 2, size: 32, elements: !37) +!37 = !{!38} +!38 = !DIDerivedType(tag: DW_TAG_member, name: "a", scope: !36, file: !3, line: 3, baseType: !18, size: 32) +!39 = !{i32 7, !"Dwarf Version", i32 5} +!40 = !{i32 2, !"Debug Info Version", i32 3} +!41 = !{i32 1, !"wchar_size", i32 4} +!42 = !{i32 7, !"frame-pointer", i32 2} +!43 = !{i32 7, !"debug-info-assignment-tracking", i1 true} +!44 = !{!"clang version 20.0.0git (git@github.com:yonghong-song/llvm-project.git 96b5b6e527c024bea84f07ea11d4b3ff63468c22)"} +!45 = distinct !DISubprogram(name: "foo", scope: !3, file: !3, line: 11, type: !46, scopeLine: 11, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !48) +!46 = !DISubroutineType(types: !47) +!47 = !{!18, !18, !22} +!48 = !{!49, !50} +!49 = !DILocalVariable(name: "a1", arg: 1, scope: !45, file: !3, line: 11, type: !18) +!50 = !DILocalVariable(name: "p1", arg: 2, scope: !45, file: !3, line: 11, type: !22) +!51 = !DILocation(line: 0, scope: !45) +!52 = !DILocation(line: 12, column: 9, scope: !45) +!53 = !DILocation(line: 13, column: 9, scope: !45) +!54 = !DILocation(line: 14, column: 9, scope: !45) +!55 = !DILocation(line: 15, column: 32, scope: !45) +!56 = !{!57, !57, i64 0} +!57 = !{!"any pointer", !58, i64 0} +!58 = !{!"omnipotent char", !59, i64 0} +!59 = !{!"Simple C/C++ TBAA"} +!60 = !DILocation(line: 15, column: 9, scope: !45) +!61 = !DILocation(line: 16, column: 9, scope: !45) +!62 = !DILocation(line: 17, column: 9, scope: !45) +!63 = !DILocation(line: 19, column: 3, scope: !45) diff --git a/llvm/test/CodeGen/BPF/BTF/print_btf.py b/llvm/test/CodeGen/BPF/BTF/print_btf.py new file mode 100644 index 0000000000000..6ce08b76c363e --- /dev/null +++ b/llvm/test/CodeGen/BPF/BTF/print_btf.py @@ -0,0 +1,295 @@ +#!/usr/bin/env python3 + +# Ad-hoc script to print BTF file in a readable format. +# Follows the same printing conventions as bpftool with format 'raw'. +# Usage: +# +# ./print_btf.py +# +# Parameters: +# +# :: a file name or '-' to read from stdin. +# +# Intended usage: +# +# llvm-objcopy --dump-section .BTF=- | ./print_btf.py - +# +# Kernel documentation contains detailed format description: +# https://www.kernel.org/doc/html/latest/bpf/btf.html + +import struct +import ctypes +import sys + + +class SafeDict(dict): + def __getitem__(self, key): + try: + return dict.__getitem__(self, key) + except KeyError: + return f"" + + +KINDS = SafeDict( + { + 0: "UNKN", + 1: "INT", + 2: "PTR", + 3: "ARRAY", + 4: "STRUCT", + 5: "UNION", + 6: "ENUM", + 7: "FWD", + 8: "TYPEDEF", + 9: "VOLATILE", + 10: "CONST", + 11: "RESTRICT", + 12: "FUNC", + 13: "FUNC_PROTO", + 14: "VAR", + 15: "DATASEC", + 16: "FLOAT", + 17: "DECL_TAG", + 18: "TYPE_TAG", + 19: "ENUM64", + } +) + +INT_ENCODING = SafeDict( + {0 << 0: "(none)", 1 << 0: "SIGNED", 1 << 1: "CHAR", 1 << 2: "BOOL"} +) + +ENUM_ENCODING = SafeDict({0: "UNSIGNED", 1: "SIGNED"}) + +FUNC_LINKAGE = SafeDict({0: "static", 1: "global", 2: "extern"}) + +VAR_LINKAGE = SafeDict({0: "static", 1: "global", 2: "extern"}) + +FWD_KIND = SafeDict( + { + 0: "struct", + 1: "union", + } +) + +for val, name in KINDS.items(): + globals()["BTF_KIND_" + name] = val + + +def warn(message): + print(message, file=sys.stderr) + + +def print_btf(filename): + if filename == "-": + buf = sys.stdin.buffer.read() + else: + with open(filename, "rb") as file: + buf = file.read() + + fmt_cache = {} + endian_pfx = "" + off = 0 + + def unpack(fmt): + nonlocal off, endian_pfx + fmt = endian_pfx + fmt + if fmt not in fmt_cache: + fmt_cache[fmt] = struct.Struct(fmt) + st = fmt_cache[fmt] + r = st.unpack_from(buf, off) + off += st.size + return r + + # Use magic number at the header start to determine endianness + (magic,) = unpack("H") + if magic == 0xEB9F: + endian_pfx = "<" + elif magic == 0x9FEB: + endian_pfx = ">" + else: + warn(f"Unexpected BTF magic: {magic:02x}") + return + + # Rest of the header + version, flags, hdr_len = unpack("BBI") + type_off, type_len, str_off, str_len = unpack("IIII") + + # Offsets in the header are relative to the end of a header + type_off += hdr_len + str_off += hdr_len + off = hdr_len + type_end = type_off + type_len + + def string(rel_off): + try: + start = str_off + rel_off + end = buf.index(b"\0", start) + if start == end: + return "(anon)" + return buf[start:end].decode("utf8") + except ValueError as e: + warn(f"Can't get string at offset {str_off} + {rel_off}: {e}") + return f"" + + idx = 1 + while off < type_end: + name_off, info, size = unpack("III") + kind = (info >> 24) & 0x1F + vlen = info & 0xFFFF + kflag = info >> 31 + kind_name = KINDS[kind] + name = string(name_off) + + def warn_nonzero(val, name): + nonlocal idx + if val != 0: + warn(f"<{idx}> {name} should be 0 but is {val}") + + if kind == BTF_KIND_INT: + (info,) = unpack("I") + encoding = (info & 0x0F000000) >> 24 + offset = (info & 0x00FF0000) >> 16 + bits = info & 0x000000FF + enc_name = INT_ENCODING[encoding] + print( + f"[{idx}] {kind_name} '{name}' size={size} " + f"bits_offset={offset} " + f"nr_bits={bits} encoding={enc_name}" + ) + warn_nonzero(kflag, "kflag") + warn_nonzero(vlen, "vlen") + + elif kind in [ + BTF_KIND_PTR, + BTF_KIND_CONST, + BTF_KIND_VOLATILE, + BTF_KIND_RESTRICT, + ]: + print(f"[{idx}] {kind_name} '{name}' type_id={size}") + warn_nonzero(name_off, "name_off") + warn_nonzero(kflag, "kflag") + warn_nonzero(vlen, "vlen") + + elif kind == BTF_KIND_ARRAY: + warn_nonzero(name_off, "name_off") + warn_nonzero(kflag, "kflag") + warn_nonzero(vlen, "vlen") + warn_nonzero(size, "size") + type, index_type, nelems = unpack("III") + print( + f"[{idx}] {kind_name} '{name}' type_id={type} " + f"index_type_id={index_type} nr_elems={nelems}" + ) + + elif kind in [BTF_KIND_STRUCT, BTF_KIND_UNION]: + print(f"[{idx}] {kind_name} '{name}' size={size} vlen={vlen}") + if kflag not in [0, 1]: + warn(f"<{idx}> kflag should 0 or 1: {kflag}") + for _ in range(0, vlen): + name_off, type, offset = unpack("III") + if kflag == 0: + print( + f"\t'{string(name_off)}' type_id={type} " + f"bits_offset={offset}" + ) + else: + bits_offset = offset & 0xFFFFFF + bitfield_size = offset >> 24 + print( + f"\t'{string(name_off)}' type_id={type} " + f"bits_offset={bits_offset} " + f"bitfield_size={bitfield_size}" + ) + + elif kind == BTF_KIND_ENUM: + encoding = ENUM_ENCODING[kflag] + print( + f"[{idx}] {kind_name} '{name}' encoding={encoding} " + f"size={size} vlen={vlen}" + ) + for _ in range(0, vlen): + (name_off,) = unpack("I") + (val,) = unpack("i" if kflag == 1 else "I") + print(f"\t'{string(name_off)}' val={val}") + + elif kind == BTF_KIND_ENUM64: + encoding = ENUM_ENCODING[kflag] + print( + f"[{idx}] {kind_name} '{name}' encoding={encoding} " + f"size={size} vlen={vlen}" + ) + for _ in range(0, vlen): + name_off, lo, hi = unpack("III") + val = hi << 32 | lo + if kflag == 1: + val = ctypes.c_long(val).value + print(f"\t'{string(name_off)}' val={val}LL") + + elif kind == BTF_KIND_FWD: + print(f"[{idx}] {kind_name} '{name}' fwd_kind={FWD_KIND[kflag]}") + warn_nonzero(vlen, "vlen") + warn_nonzero(size, "size") + + elif kind in [BTF_KIND_TYPEDEF, BTF_KIND_TYPE_TAG]: + print(f"[{idx}] {kind_name} '{name}' type_id={size}") + warn_nonzero(kflag, "kflag") + warn_nonzero(kflag, "vlen") + + elif kind == BTF_KIND_FUNC: + linkage = FUNC_LINKAGE[vlen] + print(f"[{idx}] {kind_name} '{name}' type_id={size} " f"linkage={linkage}") + warn_nonzero(kflag, "kflag") + + elif kind == BTF_KIND_FUNC_PROTO: + print(f"[{idx}] {kind_name} '{name}' ret_type_id={size} " f"vlen={vlen}") + warn_nonzero(name_off, "name_off") + warn_nonzero(kflag, "kflag") + for _ in range(0, vlen): + name_off, type = unpack("II") + print(f"\t'{string(name_off)}' type_id={type}") + + elif kind == BTF_KIND_VAR: + (linkage,) = unpack("I") + linkage = VAR_LINKAGE[linkage] + print(f"[{idx}] {kind_name} '{name}' type_id={size}, " f"linkage={linkage}") + warn_nonzero(kflag, "kflag") + warn_nonzero(vlen, "vlen") + + elif kind == BTF_KIND_DATASEC: + print(f"[{idx}] {kind_name} '{name}' size={size} vlen={vlen}") + warn_nonzero(kflag, "kflag") + warn_nonzero(size, "size") + for _ in range(0, vlen): + type, offset, size = unpack("III") + print(f"\ttype_id={type} offset={offset} size={size}") + + elif kind == BTF_KIND_FLOAT: + print(f"[{idx}] {kind_name} '{name}' size={size}") + warn_nonzero(kflag, "kflag") + warn_nonzero(vlen, "vlen") + + elif kind == BTF_KIND_DECL_TAG: + (component_idx,) = unpack("i") + print( + f"[{idx}] {kind_name} '{name}' type_id={size} " + + f"component_idx={component_idx}" + ) + warn_nonzero(kflag, "kflag") + warn_nonzero(vlen, "vlen") + + else: + warn( + f"<{idx}> Unexpected entry: kind={kind_name} " + f"name_off={name_off} " + f"vlen={vlen} kflag={kflag} size={size}" + ) + + idx += 1 + + +if __name__ == "__main__": + if len(sys.argv) != 2: + warn("Usage: {sys.argv[0]} ") + sys.exit(1) + print_btf(sys.argv[1]) diff --git a/llvm/test/CodeGen/BPF/atomics_mem_order_v1.ll b/llvm/test/CodeGen/BPF/atomics_mem_order_v1.ll new file mode 100644 index 0000000000000..31081586bf7af --- /dev/null +++ b/llvm/test/CodeGen/BPF/atomics_mem_order_v1.ll @@ -0,0 +1,385 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -march=bpfel -mcpu=v1 -filetype=asm < %s | FileCheck %s +; +; Source: +; $ cat atomics_mem_order_v1.c +; #include +; +; void test_fetch_add_32_noret(int _Atomic *i) { +; (void)__c11_atomic_fetch_add(i, 10, memory_order_relaxed); +; (void)__c11_atomic_fetch_add(i, 10, memory_order_acquire); +; (void)__c11_atomic_fetch_add(i, 10, memory_order_release); +; (void)__c11_atomic_fetch_add(i, 10, memory_order_acq_rel); +; (void)__c11_atomic_fetch_add(i, 10, memory_order_seq_cst); +; } +; +; void test_fetch_add_64_noret(long _Atomic *i) { +; (void)__c11_atomic_fetch_add(i, 10, memory_order_relaxed); +; (void)__c11_atomic_fetch_add(i, 10, memory_order_acquire); +; (void)__c11_atomic_fetch_add(i, 10, memory_order_release); +; (void)__c11_atomic_fetch_add(i, 10, memory_order_acq_rel); +; (void)__c11_atomic_fetch_add(i, 10, memory_order_seq_cst); +; } +; +; void test_fetch_sub_64_noret(long _Atomic *i) { +; (void)__c11_atomic_fetch_sub(i, 10, memory_order_relaxed); +; (void)__c11_atomic_fetch_sub(i, 10, memory_order_acquire); +; (void)__c11_atomic_fetch_sub(i, 10, memory_order_release); +; (void)__c11_atomic_fetch_sub(i, 10, memory_order_acq_rel); +; (void)__c11_atomic_fetch_sub(i, 10, memory_order_seq_cst); +; } +; +; long test_fetch_sub_64_ret(long _Atomic *i) { +; return __c11_atomic_fetch_sub(i, 10, memory_order_acquire) + +; __c11_atomic_fetch_sub(i, 10, memory_order_release) + +; __c11_atomic_fetch_sub(i, 10, memory_order_acq_rel) + +; __c11_atomic_fetch_sub(i, 10, memory_order_seq_cst); +; } +; +; void test_fetch_and_64_noret(long _Atomic *i) { +; (void)__c11_atomic_fetch_and(i, 10, memory_order_relaxed); +; (void)__c11_atomic_fetch_and(i, 10, memory_order_acquire); +; (void)__c11_atomic_fetch_and(i, 10, memory_order_release); +; (void)__c11_atomic_fetch_and(i, 10, memory_order_acq_rel); +; (void)__c11_atomic_fetch_and(i, 10, memory_order_seq_cst); +; } +; +; long test_fetch_and_64_ret(long _Atomic *i) { +; return __c11_atomic_fetch_and(i, 10, memory_order_relaxed) + +; __c11_atomic_fetch_and(i, 10, memory_order_acquire) + +; __c11_atomic_fetch_and(i, 10, memory_order_release) + +; __c11_atomic_fetch_and(i, 10, memory_order_acq_rel) + +; __c11_atomic_fetch_and(i, 10, memory_order_seq_cst); +; } +; +; void test_fetch_or_64_noret(long _Atomic *i) { +; (void)__c11_atomic_fetch_or(i, 10, memory_order_relaxed); +; (void)__c11_atomic_fetch_or(i, 10, memory_order_acquire); +; (void)__c11_atomic_fetch_or(i, 10, memory_order_release); +; (void)__c11_atomic_fetch_or(i, 10, memory_order_acq_rel); +; (void)__c11_atomic_fetch_or(i, 10, memory_order_seq_cst); +; } +; +; long test_fetch_or_64_ret(long _Atomic *i) { +; return __c11_atomic_fetch_or(i, 10, memory_order_relaxed) + +; __c11_atomic_fetch_or(i, 10, memory_order_acquire) + +; __c11_atomic_fetch_or(i, 10, memory_order_release) + +; __c11_atomic_fetch_or(i, 10, memory_order_acq_rel) + +; __c11_atomic_fetch_or(i, 10, memory_order_seq_cst); +; } +; +; void test_fetch_xor_64_noret(long _Atomic *i) { +; (void)__c11_atomic_fetch_xor(i, 10, memory_order_relaxed); +; (void)__c11_atomic_fetch_xor(i, 10, memory_order_acquire); +; (void)__c11_atomic_fetch_xor(i, 10, memory_order_release); +; (void)__c11_atomic_fetch_xor(i, 10, memory_order_acq_rel); +; (void)__c11_atomic_fetch_xor(i, 10, memory_order_seq_cst); +; } +; +; long test_fetch_xor_64_ret(long _Atomic *i) { +; return __c11_atomic_fetch_xor(i, 10, memory_order_relaxed) + +; __c11_atomic_fetch_xor(i, 10, memory_order_acquire) + +; __c11_atomic_fetch_xor(i, 10, memory_order_release) + +; __c11_atomic_fetch_xor(i, 10, memory_order_acq_rel) + +; __c11_atomic_fetch_xor(i, 10, memory_order_seq_cst); +; } + +target triple = "bpf" + +; Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) +define dso_local void @test_fetch_add_32_noret(ptr nocapture noundef %i) local_unnamed_addr #0 { +; CHECK-LABEL: test_fetch_add_32_noret: +; CHECK: .Ltest_fetch_add_32_noret$local: +; CHECK-NEXT: .type .Ltest_fetch_add_32_noret$local,@function +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: r2 = 10 +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: lock *(u32 *)(r1 + 0) += r3 +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: lock *(u32 *)(r1 + 0) += r3 +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: lock *(u32 *)(r1 + 0) += r3 +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: lock *(u32 *)(r1 + 0) += r3 +; CHECK-NEXT: lock *(u32 *)(r1 + 0) += r2 +; CHECK-NEXT: exit +entry: + %0 = atomicrmw add ptr %i, i32 10 monotonic, align 4 + %1 = atomicrmw add ptr %i, i32 10 acquire, align 4 + %2 = atomicrmw add ptr %i, i32 10 release, align 4 + %3 = atomicrmw add ptr %i, i32 10 acq_rel, align 4 + %4 = atomicrmw add ptr %i, i32 10 seq_cst, align 4 + ret void +} + +; Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) +define dso_local void @test_fetch_add_64_noret(ptr nocapture noundef %i) local_unnamed_addr #0 { +; CHECK-LABEL: test_fetch_add_64_noret: +; CHECK: .Ltest_fetch_add_64_noret$local: +; CHECK-NEXT: .type .Ltest_fetch_add_64_noret$local,@function +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: r2 = 10 +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: lock *(u64 *)(r1 + 0) += r3 +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: lock *(u64 *)(r1 + 0) += r3 +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: lock *(u64 *)(r1 + 0) += r3 +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: lock *(u64 *)(r1 + 0) += r3 +; CHECK-NEXT: lock *(u64 *)(r1 + 0) += r2 +; CHECK-NEXT: exit +entry: + %0 = atomicrmw add ptr %i, i64 10 monotonic, align 8 + %1 = atomicrmw add ptr %i, i64 10 acquire, align 8 + %2 = atomicrmw add ptr %i, i64 10 release, align 8 + %3 = atomicrmw add ptr %i, i64 10 acq_rel, align 8 + %4 = atomicrmw add ptr %i, i64 10 seq_cst, align 8 + ret void +} + +; Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) +define dso_local void @test_fetch_sub_64_noret(ptr nocapture noundef %i) local_unnamed_addr #0 { +; CHECK-LABEL: test_fetch_sub_64_noret: +; CHECK: .Ltest_fetch_sub_64_noret$local: +; CHECK-NEXT: .type .Ltest_fetch_sub_64_noret$local,@function +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: r2 = 10 +; CHECK-NEXT: r2 = -r2 +; CHECK-NEXT: r3 = r2 +; CHECK-NEXT: lock *(u64 *)(r1 + 0) += r3 +; CHECK-NEXT: r3 = r2 +; CHECK-NEXT: r3 = atomic_fetch_add((u64 *)(r1 + 0), r3) +; CHECK-NEXT: r3 = r2 +; CHECK-NEXT: r3 = atomic_fetch_add((u64 *)(r1 + 0), r3) +; CHECK-NEXT: r3 = r2 +; CHECK-NEXT: r3 = atomic_fetch_add((u64 *)(r1 + 0), r3) +; CHECK-NEXT: r2 = atomic_fetch_add((u64 *)(r1 + 0), r2) +; CHECK-NEXT: exit +entry: + %0 = atomicrmw sub ptr %i, i64 10 monotonic, align 8 + %1 = atomicrmw sub ptr %i, i64 10 acquire, align 8 + %2 = atomicrmw sub ptr %i, i64 10 release, align 8 + %3 = atomicrmw sub ptr %i, i64 10 acq_rel, align 8 + %4 = atomicrmw sub ptr %i, i64 10 seq_cst, align 8 + ret void +} + +; Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) +define dso_local i64 @test_fetch_sub_64_ret(ptr nocapture noundef %i) local_unnamed_addr #0 { +; CHECK-LABEL: test_fetch_sub_64_ret: +; CHECK: .Ltest_fetch_sub_64_ret$local: +; CHECK-NEXT: .type .Ltest_fetch_sub_64_ret$local,@function +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: r2 = 10 +; CHECK-NEXT: r2 = -r2 +; CHECK-NEXT: r3 = r2 +; CHECK-NEXT: r3 = atomic_fetch_add((u64 *)(r1 + 0), r3) +; CHECK-NEXT: r0 = r2 +; CHECK-NEXT: r0 = atomic_fetch_add((u64 *)(r1 + 0), r0) +; CHECK-NEXT: r0 += r3 +; CHECK-NEXT: r3 = r2 +; CHECK-NEXT: r3 = atomic_fetch_add((u64 *)(r1 + 0), r3) +; CHECK-NEXT: r0 += r3 +; CHECK-NEXT: r2 = atomic_fetch_add((u64 *)(r1 + 0), r2) +; CHECK-NEXT: r0 += r2 +; CHECK-NEXT: exit +entry: + %0 = atomicrmw sub ptr %i, i64 10 acquire, align 8 + %1 = atomicrmw sub ptr %i, i64 10 release, align 8 + %add = add nsw i64 %1, %0 + %2 = atomicrmw sub ptr %i, i64 10 acq_rel, align 8 + %add5 = add nsw i64 %add, %2 + %3 = atomicrmw sub ptr %i, i64 10 seq_cst, align 8 + %add8 = add nsw i64 %add5, %3 + ret i64 %add8 +} + +; Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) +define dso_local void @test_fetch_and_64_noret(ptr nocapture noundef %i) local_unnamed_addr #0 { +; CHECK-LABEL: test_fetch_and_64_noret: +; CHECK: .Ltest_fetch_and_64_noret$local: +; CHECK-NEXT: .type .Ltest_fetch_and_64_noret$local,@function +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: r2 = 10 +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: lock *(u64 *)(r1 + 0) &= r3 +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: r3 = atomic_fetch_and((u64 *)(r1 + 0), r3) +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: r3 = atomic_fetch_and((u64 *)(r1 + 0), r3) +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: r3 = atomic_fetch_and((u64 *)(r1 + 0), r3) +; CHECK-NEXT: r2 = atomic_fetch_and((u64 *)(r1 + 0), r2) +; CHECK-NEXT: exit +entry: + %0 = atomicrmw and ptr %i, i64 10 monotonic, align 8 + %1 = atomicrmw and ptr %i, i64 10 acquire, align 8 + %2 = atomicrmw and ptr %i, i64 10 release, align 8 + %3 = atomicrmw and ptr %i, i64 10 acq_rel, align 8 + %4 = atomicrmw and ptr %i, i64 10 seq_cst, align 8 + ret void +} + +; Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) +define dso_local i64 @test_fetch_and_64_ret(ptr nocapture noundef %i) local_unnamed_addr #0 { +; CHECK-LABEL: test_fetch_and_64_ret: +; CHECK: .Ltest_fetch_and_64_ret$local: +; CHECK-NEXT: .type .Ltest_fetch_and_64_ret$local,@function +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: r2 = 10 +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: r3 = atomic_fetch_and((u64 *)(r1 + 0), r3) +; CHECK-NEXT: r0 = 10 +; CHECK-NEXT: r0 = atomic_fetch_and((u64 *)(r1 + 0), r0) +; CHECK-NEXT: r0 += r3 +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: r3 = atomic_fetch_and((u64 *)(r1 + 0), r3) +; CHECK-NEXT: r0 += r3 +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: r3 = atomic_fetch_and((u64 *)(r1 + 0), r3) +; CHECK-NEXT: r0 += r3 +; CHECK-NEXT: r2 = atomic_fetch_and((u64 *)(r1 + 0), r2) +; CHECK-NEXT: r0 += r2 +; CHECK-NEXT: exit +entry: + %0 = atomicrmw and ptr %i, i64 10 monotonic, align 8 + %1 = atomicrmw and ptr %i, i64 10 acquire, align 8 + %add = add nsw i64 %1, %0 + %2 = atomicrmw and ptr %i, i64 10 release, align 8 + %add5 = add nsw i64 %add, %2 + %3 = atomicrmw and ptr %i, i64 10 acq_rel, align 8 + %add8 = add nsw i64 %add5, %3 + %4 = atomicrmw and ptr %i, i64 10 seq_cst, align 8 + %add11 = add nsw i64 %add8, %4 + ret i64 %add11 +} + +; Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) +define dso_local void @test_fetch_or_64_noret(ptr nocapture noundef %i) local_unnamed_addr #0 { +; CHECK-LABEL: test_fetch_or_64_noret: +; CHECK: .Ltest_fetch_or_64_noret$local: +; CHECK-NEXT: .type .Ltest_fetch_or_64_noret$local,@function +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: r2 = 10 +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: lock *(u64 *)(r1 + 0) |= r3 +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: r3 = atomic_fetch_or((u64 *)(r1 + 0), r3) +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: r3 = atomic_fetch_or((u64 *)(r1 + 0), r3) +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: r3 = atomic_fetch_or((u64 *)(r1 + 0), r3) +; CHECK-NEXT: r2 = atomic_fetch_or((u64 *)(r1 + 0), r2) +; CHECK-NEXT: exit +entry: + %0 = atomicrmw or ptr %i, i64 10 monotonic, align 8 + %1 = atomicrmw or ptr %i, i64 10 acquire, align 8 + %2 = atomicrmw or ptr %i, i64 10 release, align 8 + %3 = atomicrmw or ptr %i, i64 10 acq_rel, align 8 + %4 = atomicrmw or ptr %i, i64 10 seq_cst, align 8 + ret void +} + +; Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) +define dso_local i64 @test_fetch_or_64_ret(ptr nocapture noundef %i) local_unnamed_addr #0 { +; CHECK-LABEL: test_fetch_or_64_ret: +; CHECK: .Ltest_fetch_or_64_ret$local: +; CHECK-NEXT: .type .Ltest_fetch_or_64_ret$local,@function +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: r2 = 10 +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: r3 = atomic_fetch_or((u64 *)(r1 + 0), r3) +; CHECK-NEXT: r0 = 10 +; CHECK-NEXT: r0 = atomic_fetch_or((u64 *)(r1 + 0), r0) +; CHECK-NEXT: r0 += r3 +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: r3 = atomic_fetch_or((u64 *)(r1 + 0), r3) +; CHECK-NEXT: r0 += r3 +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: r3 = atomic_fetch_or((u64 *)(r1 + 0), r3) +; CHECK-NEXT: r0 += r3 +; CHECK-NEXT: r2 = atomic_fetch_or((u64 *)(r1 + 0), r2) +; CHECK-NEXT: r0 += r2 +; CHECK-NEXT: exit +entry: + %0 = atomicrmw or ptr %i, i64 10 monotonic, align 8 + %1 = atomicrmw or ptr %i, i64 10 acquire, align 8 + %add = add nsw i64 %1, %0 + %2 = atomicrmw or ptr %i, i64 10 release, align 8 + %add5 = add nsw i64 %add, %2 + %3 = atomicrmw or ptr %i, i64 10 acq_rel, align 8 + %add8 = add nsw i64 %add5, %3 + %4 = atomicrmw or ptr %i, i64 10 seq_cst, align 8 + %add11 = add nsw i64 %add8, %4 + ret i64 %add11 +} + +; Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) +define dso_local void @test_fetch_xor_64_noret(ptr nocapture noundef %i) local_unnamed_addr #0 { +; CHECK-LABEL: test_fetch_xor_64_noret: +; CHECK: .Ltest_fetch_xor_64_noret$local: +; CHECK-NEXT: .type .Ltest_fetch_xor_64_noret$local,@function +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: r2 = 10 +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: lock *(u64 *)(r1 + 0) ^= r3 +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: r3 = atomic_fetch_xor((u64 *)(r1 + 0), r3) +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: r3 = atomic_fetch_xor((u64 *)(r1 + 0), r3) +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: r3 = atomic_fetch_xor((u64 *)(r1 + 0), r3) +; CHECK-NEXT: r2 = atomic_fetch_xor((u64 *)(r1 + 0), r2) +; CHECK-NEXT: exit +entry: + %0 = atomicrmw xor ptr %i, i64 10 monotonic, align 8 + %1 = atomicrmw xor ptr %i, i64 10 acquire, align 8 + %2 = atomicrmw xor ptr %i, i64 10 release, align 8 + %3 = atomicrmw xor ptr %i, i64 10 acq_rel, align 8 + %4 = atomicrmw xor ptr %i, i64 10 seq_cst, align 8 + ret void +} + +; Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) +define dso_local i64 @test_fetch_xor_64_ret(ptr nocapture noundef %i) local_unnamed_addr #0 { +; CHECK-LABEL: test_fetch_xor_64_ret: +; CHECK: .Ltest_fetch_xor_64_ret$local: +; CHECK-NEXT: .type .Ltest_fetch_xor_64_ret$local,@function +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: r2 = 10 +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: r3 = atomic_fetch_xor((u64 *)(r1 + 0), r3) +; CHECK-NEXT: r0 = 10 +; CHECK-NEXT: r0 = atomic_fetch_xor((u64 *)(r1 + 0), r0) +; CHECK-NEXT: r0 += r3 +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: r3 = atomic_fetch_xor((u64 *)(r1 + 0), r3) +; CHECK-NEXT: r0 += r3 +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: r3 = atomic_fetch_xor((u64 *)(r1 + 0), r3) +; CHECK-NEXT: r0 += r3 +; CHECK-NEXT: r2 = atomic_fetch_xor((u64 *)(r1 + 0), r2) +; CHECK-NEXT: r0 += r2 +; CHECK-NEXT: exit +entry: + %0 = atomicrmw xor ptr %i, i64 10 monotonic, align 8 + %1 = atomicrmw xor ptr %i, i64 10 acquire, align 8 + %add = add nsw i64 %1, %0 + %2 = atomicrmw xor ptr %i, i64 10 release, align 8 + %add5 = add nsw i64 %add, %2 + %3 = atomicrmw xor ptr %i, i64 10 acq_rel, align 8 + %add8 = add nsw i64 %add5, %3 + %4 = atomicrmw xor ptr %i, i64 10 seq_cst, align 8 + %add11 = add nsw i64 %add8, %4 + ret i64 %add11 +} + +attributes #0 = { mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="v1" } + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!2} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 7, !"frame-pointer", i32 2} +!2 = !{!"clang version 20.0.0git (git@github.com:yonghong-song/llvm-project.git 6f71e34e194dab5a52cb2211af575c6067e9e504)"} diff --git a/llvm/test/CodeGen/BPF/atomics_mem_order_v3.ll b/llvm/test/CodeGen/BPF/atomics_mem_order_v3.ll new file mode 100644 index 0000000000000..20b9ebcb0d473 --- /dev/null +++ b/llvm/test/CodeGen/BPF/atomics_mem_order_v3.ll @@ -0,0 +1,781 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -march=bpfel -mcpu=v3 -filetype=asm < %s | FileCheck %s +; +; Source: +; $ cat atomics_mem_order_v3.c +; #include +; +; void test_fetch_add_32_noret(int _Atomic *i) { +; (void)__c11_atomic_fetch_add(i, 10, memory_order_relaxed); +; (void)__c11_atomic_fetch_add(i, 10, memory_order_acquire); +; (void)__c11_atomic_fetch_add(i, 10, memory_order_release); +; (void)__c11_atomic_fetch_add(i, 10, memory_order_acq_rel); +; (void)__c11_atomic_fetch_add(i, 10, memory_order_seq_cst); +; } +; +; int test_fetch_add_32_ret(int _Atomic *i) { +; return __c11_atomic_fetch_add(i, 10, memory_order_relaxed) + +; __c11_atomic_fetch_add(i, 10, memory_order_acquire) + +; __c11_atomic_fetch_add(i, 10, memory_order_release) + +; __c11_atomic_fetch_add(i, 10, memory_order_acq_rel) + +; __c11_atomic_fetch_add(i, 10, memory_order_seq_cst); +; } +; +; void test_fetch_add_64_noret(long _Atomic *i) { +; (void)__c11_atomic_fetch_add(i, 10, memory_order_relaxed); +; (void)__c11_atomic_fetch_add(i, 10, memory_order_acquire); +; (void)__c11_atomic_fetch_add(i, 10, memory_order_release); +; (void)__c11_atomic_fetch_add(i, 10, memory_order_acq_rel); +; (void)__c11_atomic_fetch_add(i, 10, memory_order_seq_cst); +; } +; +; long test_fetch_add_64_ret(long _Atomic *i) { +; return __c11_atomic_fetch_add(i, 10, memory_order_relaxed) + +; __c11_atomic_fetch_add(i, 10, memory_order_acquire) + +; __c11_atomic_fetch_add(i, 10, memory_order_release) + +; __c11_atomic_fetch_add(i, 10, memory_order_acq_rel) + +; __c11_atomic_fetch_add(i, 10, memory_order_seq_cst); +; } +; +; void test_fetch_sub_32_noret(int _Atomic *i) { +; (void)__c11_atomic_fetch_sub(i, 10, memory_order_relaxed); +; (void)__c11_atomic_fetch_sub(i, 10, memory_order_acquire); +; (void)__c11_atomic_fetch_sub(i, 10, memory_order_release); +; (void)__c11_atomic_fetch_sub(i, 10, memory_order_acq_rel); +; (void)__c11_atomic_fetch_sub(i, 10, memory_order_seq_cst); +; } +; +; int test_fetch_sub_32_ret(int _Atomic *i) { +; return __c11_atomic_fetch_sub(i, 10, memory_order_relaxed) + +; __c11_atomic_fetch_sub(i, 10, memory_order_acquire) + +; __c11_atomic_fetch_sub(i, 10, memory_order_release) + +; __c11_atomic_fetch_sub(i, 10, memory_order_acq_rel) + +; __c11_atomic_fetch_sub(i, 10, memory_order_seq_cst); +; } +; +; void test_fetch_sub_64_noret(long _Atomic *i) { +; (void)__c11_atomic_fetch_sub(i, 10, memory_order_relaxed); +; (void)__c11_atomic_fetch_sub(i, 10, memory_order_acquire); +; (void)__c11_atomic_fetch_sub(i, 10, memory_order_release); +; (void)__c11_atomic_fetch_sub(i, 10, memory_order_acq_rel); +; (void)__c11_atomic_fetch_sub(i, 10, memory_order_seq_cst); +; } +; +; long test_fetch_sub_64_ret(long _Atomic *i) { +; return __c11_atomic_fetch_sub(i, 10, memory_order_relaxed) + +; __c11_atomic_fetch_sub(i, 10, memory_order_acquire) + +; __c11_atomic_fetch_sub(i, 10, memory_order_release) + +; __c11_atomic_fetch_sub(i, 10, memory_order_acq_rel) + +; __c11_atomic_fetch_sub(i, 10, memory_order_seq_cst); +; } +; +; void test_fetch_and_32_noret(int _Atomic *i) { +; (void)__c11_atomic_fetch_and(i, 10, memory_order_relaxed); +; (void)__c11_atomic_fetch_and(i, 10, memory_order_acquire); +; (void)__c11_atomic_fetch_and(i, 10, memory_order_release); +; (void)__c11_atomic_fetch_and(i, 10, memory_order_acq_rel); +; (void)__c11_atomic_fetch_and(i, 10, memory_order_seq_cst); +; } +; +; int test_fetch_and_32_ret(int _Atomic *i) { +; return __c11_atomic_fetch_and(i, 10, memory_order_relaxed) + +; __c11_atomic_fetch_and(i, 10, memory_order_acquire) + +; __c11_atomic_fetch_and(i, 10, memory_order_release) + +; __c11_atomic_fetch_and(i, 10, memory_order_acq_rel) + +; __c11_atomic_fetch_and(i, 10, memory_order_seq_cst); +; } +; +; void test_fetch_and_64_noret(long _Atomic *i) { +; (void)__c11_atomic_fetch_and(i, 10, memory_order_relaxed); +; (void)__c11_atomic_fetch_and(i, 10, memory_order_acquire); +; (void)__c11_atomic_fetch_and(i, 10, memory_order_release); +; (void)__c11_atomic_fetch_and(i, 10, memory_order_acq_rel); +; (void)__c11_atomic_fetch_and(i, 10, memory_order_seq_cst); +; } +; +; long test_fetch_and_64_ret(long _Atomic *i) { +; return __c11_atomic_fetch_and(i, 10, memory_order_relaxed) + +; __c11_atomic_fetch_and(i, 10, memory_order_acquire) + +; __c11_atomic_fetch_and(i, 10, memory_order_release) + +; __c11_atomic_fetch_and(i, 10, memory_order_acq_rel) + +; __c11_atomic_fetch_and(i, 10, memory_order_seq_cst); +; } +; +; void test_fetch_or_32_noret(int _Atomic *i) { +; (void)__c11_atomic_fetch_or(i, 10, memory_order_relaxed); +; (void)__c11_atomic_fetch_or(i, 10, memory_order_acquire); +; (void)__c11_atomic_fetch_or(i, 10, memory_order_release); +; (void)__c11_atomic_fetch_or(i, 10, memory_order_acq_rel); +; (void)__c11_atomic_fetch_or(i, 10, memory_order_seq_cst); +; } +; +; int test_fetch_or_32_ret(int _Atomic *i) { +; return __c11_atomic_fetch_or(i, 10, memory_order_relaxed) + +; __c11_atomic_fetch_or(i, 10, memory_order_acquire) + +; __c11_atomic_fetch_or(i, 10, memory_order_release) + +; __c11_atomic_fetch_or(i, 10, memory_order_acq_rel) + +; __c11_atomic_fetch_or(i, 10, memory_order_seq_cst); +; } +; +; void test_fetch_or_64_noret(long _Atomic *i) { +; (void)__c11_atomic_fetch_or(i, 10, memory_order_relaxed); +; (void)__c11_atomic_fetch_or(i, 10, memory_order_acquire); +; (void)__c11_atomic_fetch_or(i, 10, memory_order_release); +; (void)__c11_atomic_fetch_or(i, 10, memory_order_acq_rel); +; (void)__c11_atomic_fetch_or(i, 10, memory_order_seq_cst); +; } +; +; long test_fetch_or_64_ret(long _Atomic *i) { +; return __c11_atomic_fetch_or(i, 10, memory_order_relaxed) + +; __c11_atomic_fetch_or(i, 10, memory_order_acquire) + +; __c11_atomic_fetch_or(i, 10, memory_order_release) + +; __c11_atomic_fetch_or(i, 10, memory_order_acq_rel) + +; __c11_atomic_fetch_or(i, 10, memory_order_seq_cst); +; } +; +; void test_fetch_xor_32_noret(int _Atomic *i) { +; (void)__c11_atomic_fetch_xor(i, 10, memory_order_relaxed); +; (void)__c11_atomic_fetch_xor(i, 10, memory_order_acquire); +; (void)__c11_atomic_fetch_xor(i, 10, memory_order_release); +; (void)__c11_atomic_fetch_xor(i, 10, memory_order_acq_rel); +; (void)__c11_atomic_fetch_xor(i, 10, memory_order_seq_cst); +; } +; +; int test_fetch_xor_32_ret(int _Atomic *i) { +; return __c11_atomic_fetch_xor(i, 10, memory_order_relaxed) + +; __c11_atomic_fetch_xor(i, 10, memory_order_acquire) + +; __c11_atomic_fetch_xor(i, 10, memory_order_release) + +; __c11_atomic_fetch_xor(i, 10, memory_order_acq_rel) + +; __c11_atomic_fetch_xor(i, 10, memory_order_seq_cst); +; } +; +; void test_fetch_xor_64_noret(long _Atomic *i) { +; (void)__c11_atomic_fetch_xor(i, 10, memory_order_relaxed); +; (void)__c11_atomic_fetch_xor(i, 10, memory_order_acquire); +; (void)__c11_atomic_fetch_xor(i, 10, memory_order_release); +; (void)__c11_atomic_fetch_xor(i, 10, memory_order_acq_rel); +; (void)__c11_atomic_fetch_xor(i, 10, memory_order_seq_cst); +; } +; +; long test_fetch_xor_64_ret(long _Atomic *i) { +; return __c11_atomic_fetch_xor(i, 10, memory_order_relaxed) + +; __c11_atomic_fetch_xor(i, 10, memory_order_acquire) + +; __c11_atomic_fetch_xor(i, 10, memory_order_release) + +; __c11_atomic_fetch_xor(i, 10, memory_order_acq_rel) + +; __c11_atomic_fetch_xor(i, 10, memory_order_seq_cst); +; } + +target triple = "bpf" + +; Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) +define dso_local void @test_fetch_add_32_noret(ptr nocapture noundef %i) local_unnamed_addr #0 { +; CHECK-LABEL: test_fetch_add_32_noret: +; CHECK: .Ltest_fetch_add_32_noret$local: +; CHECK-NEXT: .type .Ltest_fetch_add_32_noret$local,@function +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: w2 = 10 +; CHECK-NEXT: w3 = 10 +; CHECK-NEXT: lock *(u32 *)(r1 + 0) += w3 +; CHECK-NEXT: w3 = 10 +; CHECK-NEXT: w3 = atomic_fetch_add((u32 *)(r1 + 0), w3) +; CHECK-NEXT: w3 = 10 +; CHECK-NEXT: w3 = atomic_fetch_add((u32 *)(r1 + 0), w3) +; CHECK-NEXT: w3 = 10 +; CHECK-NEXT: w3 = atomic_fetch_add((u32 *)(r1 + 0), w3) +; CHECK-NEXT: w2 = atomic_fetch_add((u32 *)(r1 + 0), w2) +; CHECK-NEXT: exit +entry: + %0 = atomicrmw add ptr %i, i32 10 monotonic, align 4 + %1 = atomicrmw add ptr %i, i32 10 acquire, align 4 + %2 = atomicrmw add ptr %i, i32 10 release, align 4 + %3 = atomicrmw add ptr %i, i32 10 acq_rel, align 4 + %4 = atomicrmw add ptr %i, i32 10 seq_cst, align 4 + ret void +} + +; Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) +define dso_local i32 @test_fetch_add_32_ret(ptr nocapture noundef %i) local_unnamed_addr #0 { +; CHECK-LABEL: test_fetch_add_32_ret: +; CHECK: .Ltest_fetch_add_32_ret$local: +; CHECK-NEXT: .type .Ltest_fetch_add_32_ret$local,@function +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: w2 = 10 +; CHECK-NEXT: w3 = 10 +; CHECK-NEXT: lock *(u32 *)(r1 + 0) += w3 +; CHECK-NEXT: w0 = 10 +; CHECK-NEXT: w0 = atomic_fetch_add((u32 *)(r1 + 0), w0) +; CHECK-NEXT: w0 += w3 +; CHECK-NEXT: w3 = 10 +; CHECK-NEXT: w3 = atomic_fetch_add((u32 *)(r1 + 0), w3) +; CHECK-NEXT: w0 += w3 +; CHECK-NEXT: w3 = 10 +; CHECK-NEXT: w3 = atomic_fetch_add((u32 *)(r1 + 0), w3) +; CHECK-NEXT: w0 += w3 +; CHECK-NEXT: w2 = atomic_fetch_add((u32 *)(r1 + 0), w2) +; CHECK-NEXT: w0 += w2 +; CHECK-NEXT: exit +entry: + %0 = atomicrmw add ptr %i, i32 10 monotonic, align 4 + %1 = atomicrmw add ptr %i, i32 10 acquire, align 4 + %add = add nsw i32 %1, %0 + %2 = atomicrmw add ptr %i, i32 10 release, align 4 + %add5 = add nsw i32 %add, %2 + %3 = atomicrmw add ptr %i, i32 10 acq_rel, align 4 + %add8 = add nsw i32 %add5, %3 + %4 = atomicrmw add ptr %i, i32 10 seq_cst, align 4 + %add11 = add nsw i32 %add8, %4 + ret i32 %add11 +} + +; Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) +define dso_local void @test_fetch_add_64_noret(ptr nocapture noundef %i) local_unnamed_addr #0 { +; CHECK-LABEL: test_fetch_add_64_noret: +; CHECK: .Ltest_fetch_add_64_noret$local: +; CHECK-NEXT: .type .Ltest_fetch_add_64_noret$local,@function +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: r2 = 10 +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: lock *(u64 *)(r1 + 0) += r3 +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: r3 = atomic_fetch_add((u64 *)(r1 + 0), r3) +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: r3 = atomic_fetch_add((u64 *)(r1 + 0), r3) +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: r3 = atomic_fetch_add((u64 *)(r1 + 0), r3) +; CHECK-NEXT: r2 = atomic_fetch_add((u64 *)(r1 + 0), r2) +; CHECK-NEXT: exit +entry: + %0 = atomicrmw add ptr %i, i64 10 monotonic, align 8 + %1 = atomicrmw add ptr %i, i64 10 acquire, align 8 + %2 = atomicrmw add ptr %i, i64 10 release, align 8 + %3 = atomicrmw add ptr %i, i64 10 acq_rel, align 8 + %4 = atomicrmw add ptr %i, i64 10 seq_cst, align 8 + ret void +} + +; Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) +define dso_local i64 @test_fetch_add_64_ret(ptr nocapture noundef %i) local_unnamed_addr #0 { +; CHECK-LABEL: test_fetch_add_64_ret: +; CHECK: .Ltest_fetch_add_64_ret$local: +; CHECK-NEXT: .type .Ltest_fetch_add_64_ret$local,@function +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: r2 = 10 +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: lock *(u64 *)(r1 + 0) += r3 +; CHECK-NEXT: r0 = 10 +; CHECK-NEXT: r0 = atomic_fetch_add((u64 *)(r1 + 0), r0) +; CHECK-NEXT: r0 += r3 +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: r3 = atomic_fetch_add((u64 *)(r1 + 0), r3) +; CHECK-NEXT: r0 += r3 +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: r3 = atomic_fetch_add((u64 *)(r1 + 0), r3) +; CHECK-NEXT: r0 += r3 +; CHECK-NEXT: r2 = atomic_fetch_add((u64 *)(r1 + 0), r2) +; CHECK-NEXT: r0 += r2 +; CHECK-NEXT: exit +entry: + %0 = atomicrmw add ptr %i, i64 10 monotonic, align 8 + %1 = atomicrmw add ptr %i, i64 10 acquire, align 8 + %add = add nsw i64 %1, %0 + %2 = atomicrmw add ptr %i, i64 10 release, align 8 + %add5 = add nsw i64 %add, %2 + %3 = atomicrmw add ptr %i, i64 10 acq_rel, align 8 + %add8 = add nsw i64 %add5, %3 + %4 = atomicrmw add ptr %i, i64 10 seq_cst, align 8 + %add11 = add nsw i64 %add8, %4 + ret i64 %add11 +} + +; Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) +define dso_local void @test_fetch_sub_32_noret(ptr nocapture noundef %i) local_unnamed_addr #0 { +; CHECK-LABEL: test_fetch_sub_32_noret: +; CHECK: .Ltest_fetch_sub_32_noret$local: +; CHECK-NEXT: .type .Ltest_fetch_sub_32_noret$local,@function +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: w2 = 10 +; CHECK-NEXT: w2 = -w2 +; CHECK-NEXT: w3 = w2 +; CHECK-NEXT: lock *(u32 *)(r1 + 0) += w3 +; CHECK-NEXT: w3 = w2 +; CHECK-NEXT: w3 = atomic_fetch_add((u32 *)(r1 + 0), w3) +; CHECK-NEXT: w3 = w2 +; CHECK-NEXT: w3 = atomic_fetch_add((u32 *)(r1 + 0), w3) +; CHECK-NEXT: w3 = w2 +; CHECK-NEXT: w3 = atomic_fetch_add((u32 *)(r1 + 0), w3) +; CHECK-NEXT: w2 = atomic_fetch_add((u32 *)(r1 + 0), w2) +; CHECK-NEXT: exit +entry: + %0 = atomicrmw sub ptr %i, i32 10 monotonic, align 4 + %1 = atomicrmw sub ptr %i, i32 10 acquire, align 4 + %2 = atomicrmw sub ptr %i, i32 10 release, align 4 + %3 = atomicrmw sub ptr %i, i32 10 acq_rel, align 4 + %4 = atomicrmw sub ptr %i, i32 10 seq_cst, align 4 + ret void +} + +; Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) +define dso_local i32 @test_fetch_sub_32_ret(ptr nocapture noundef %i) local_unnamed_addr #0 { +; CHECK-LABEL: test_fetch_sub_32_ret: +; CHECK: .Ltest_fetch_sub_32_ret$local: +; CHECK-NEXT: .type .Ltest_fetch_sub_32_ret$local,@function +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: w2 = 10 +; CHECK-NEXT: w2 = -w2 +; CHECK-NEXT: w3 = w2 +; CHECK-NEXT: lock *(u32 *)(r1 + 0) += w3 +; CHECK-NEXT: w0 = w2 +; CHECK-NEXT: w0 = atomic_fetch_add((u32 *)(r1 + 0), w0) +; CHECK-NEXT: w0 += w3 +; CHECK-NEXT: w3 = w2 +; CHECK-NEXT: w3 = atomic_fetch_add((u32 *)(r1 + 0), w3) +; CHECK-NEXT: w0 += w3 +; CHECK-NEXT: w3 = w2 +; CHECK-NEXT: w3 = atomic_fetch_add((u32 *)(r1 + 0), w3) +; CHECK-NEXT: w0 += w3 +; CHECK-NEXT: w2 = atomic_fetch_add((u32 *)(r1 + 0), w2) +; CHECK-NEXT: w0 += w2 +; CHECK-NEXT: exit +entry: + %0 = atomicrmw sub ptr %i, i32 10 monotonic, align 4 + %1 = atomicrmw sub ptr %i, i32 10 acquire, align 4 + %add = add nsw i32 %1, %0 + %2 = atomicrmw sub ptr %i, i32 10 release, align 4 + %add5 = add nsw i32 %add, %2 + %3 = atomicrmw sub ptr %i, i32 10 acq_rel, align 4 + %add8 = add nsw i32 %add5, %3 + %4 = atomicrmw sub ptr %i, i32 10 seq_cst, align 4 + %add11 = add nsw i32 %add8, %4 + ret i32 %add11 +} + +; Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) +define dso_local void @test_fetch_sub_64_noret(ptr nocapture noundef %i) local_unnamed_addr #0 { +; CHECK-LABEL: test_fetch_sub_64_noret: +; CHECK: .Ltest_fetch_sub_64_noret$local: +; CHECK-NEXT: .type .Ltest_fetch_sub_64_noret$local,@function +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: r2 = 10 +; CHECK-NEXT: r2 = -r2 +; CHECK-NEXT: r3 = r2 +; CHECK-NEXT: lock *(u64 *)(r1 + 0) += r3 +; CHECK-NEXT: r3 = r2 +; CHECK-NEXT: r3 = atomic_fetch_add((u64 *)(r1 + 0), r3) +; CHECK-NEXT: r3 = r2 +; CHECK-NEXT: r3 = atomic_fetch_add((u64 *)(r1 + 0), r3) +; CHECK-NEXT: r3 = r2 +; CHECK-NEXT: r3 = atomic_fetch_add((u64 *)(r1 + 0), r3) +; CHECK-NEXT: r2 = atomic_fetch_add((u64 *)(r1 + 0), r2) +; CHECK-NEXT: exit +entry: + %0 = atomicrmw sub ptr %i, i64 10 monotonic, align 8 + %1 = atomicrmw sub ptr %i, i64 10 acquire, align 8 + %2 = atomicrmw sub ptr %i, i64 10 release, align 8 + %3 = atomicrmw sub ptr %i, i64 10 acq_rel, align 8 + %4 = atomicrmw sub ptr %i, i64 10 seq_cst, align 8 + ret void +} + +; Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) +define dso_local i64 @test_fetch_sub_64_ret(ptr nocapture noundef %i) local_unnamed_addr #0 { +; CHECK-LABEL: test_fetch_sub_64_ret: +; CHECK: .Ltest_fetch_sub_64_ret$local: +; CHECK-NEXT: .type .Ltest_fetch_sub_64_ret$local,@function +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: r2 = 10 +; CHECK-NEXT: r2 = -r2 +; CHECK-NEXT: r3 = r2 +; CHECK-NEXT: lock *(u64 *)(r1 + 0) += r3 +; CHECK-NEXT: r0 = r2 +; CHECK-NEXT: r0 = atomic_fetch_add((u64 *)(r1 + 0), r0) +; CHECK-NEXT: r0 += r3 +; CHECK-NEXT: r3 = r2 +; CHECK-NEXT: r3 = atomic_fetch_add((u64 *)(r1 + 0), r3) +; CHECK-NEXT: r0 += r3 +; CHECK-NEXT: r3 = r2 +; CHECK-NEXT: r3 = atomic_fetch_add((u64 *)(r1 + 0), r3) +; CHECK-NEXT: r0 += r3 +; CHECK-NEXT: r2 = atomic_fetch_add((u64 *)(r1 + 0), r2) +; CHECK-NEXT: r0 += r2 +; CHECK-NEXT: exit +entry: + %0 = atomicrmw sub ptr %i, i64 10 monotonic, align 8 + %1 = atomicrmw sub ptr %i, i64 10 acquire, align 8 + %add = add nsw i64 %1, %0 + %2 = atomicrmw sub ptr %i, i64 10 release, align 8 + %add5 = add nsw i64 %add, %2 + %3 = atomicrmw sub ptr %i, i64 10 acq_rel, align 8 + %add8 = add nsw i64 %add5, %3 + %4 = atomicrmw sub ptr %i, i64 10 seq_cst, align 8 + %add11 = add nsw i64 %add8, %4 + ret i64 %add11 +} + +; Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) +define dso_local void @test_fetch_and_32_noret(ptr nocapture noundef %i) local_unnamed_addr #0 { +; CHECK-LABEL: test_fetch_and_32_noret: +; CHECK: .Ltest_fetch_and_32_noret$local: +; CHECK-NEXT: .type .Ltest_fetch_and_32_noret$local,@function +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: w2 = 10 +; CHECK-NEXT: w3 = 10 +; CHECK-NEXT: lock *(u32 *)(r1 + 0) &= w3 +; CHECK-NEXT: w3 = 10 +; CHECK-NEXT: w3 = atomic_fetch_and((u32 *)(r1 + 0), w3) +; CHECK-NEXT: w3 = 10 +; CHECK-NEXT: w3 = atomic_fetch_and((u32 *)(r1 + 0), w3) +; CHECK-NEXT: w3 = 10 +; CHECK-NEXT: w3 = atomic_fetch_and((u32 *)(r1 + 0), w3) +; CHECK-NEXT: w2 = atomic_fetch_and((u32 *)(r1 + 0), w2) +; CHECK-NEXT: exit +entry: + %0 = atomicrmw and ptr %i, i32 10 monotonic, align 4 + %1 = atomicrmw and ptr %i, i32 10 acquire, align 4 + %2 = atomicrmw and ptr %i, i32 10 release, align 4 + %3 = atomicrmw and ptr %i, i32 10 acq_rel, align 4 + %4 = atomicrmw and ptr %i, i32 10 seq_cst, align 4 + ret void +} + +; Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) +define dso_local i32 @test_fetch_and_32_ret(ptr nocapture noundef %i) local_unnamed_addr #0 { +; CHECK-LABEL: test_fetch_and_32_ret: +; CHECK: .Ltest_fetch_and_32_ret$local: +; CHECK-NEXT: .type .Ltest_fetch_and_32_ret$local,@function +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: w2 = 10 +; CHECK-NEXT: w3 = 10 +; CHECK-NEXT: lock *(u32 *)(r1 + 0) &= w3 +; CHECK-NEXT: w0 = 10 +; CHECK-NEXT: w0 = atomic_fetch_and((u32 *)(r1 + 0), w0) +; CHECK-NEXT: w0 += w3 +; CHECK-NEXT: w3 = 10 +; CHECK-NEXT: w3 = atomic_fetch_and((u32 *)(r1 + 0), w3) +; CHECK-NEXT: w0 += w3 +; CHECK-NEXT: w3 = 10 +; CHECK-NEXT: w3 = atomic_fetch_and((u32 *)(r1 + 0), w3) +; CHECK-NEXT: w0 += w3 +; CHECK-NEXT: w2 = atomic_fetch_and((u32 *)(r1 + 0), w2) +; CHECK-NEXT: w0 += w2 +; CHECK-NEXT: exit +entry: + %0 = atomicrmw and ptr %i, i32 10 monotonic, align 4 + %1 = atomicrmw and ptr %i, i32 10 acquire, align 4 + %add = add nsw i32 %1, %0 + %2 = atomicrmw and ptr %i, i32 10 release, align 4 + %add5 = add nsw i32 %add, %2 + %3 = atomicrmw and ptr %i, i32 10 acq_rel, align 4 + %add8 = add nsw i32 %add5, %3 + %4 = atomicrmw and ptr %i, i32 10 seq_cst, align 4 + %add11 = add nsw i32 %add8, %4 + ret i32 %add11 +} + +; Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) +define dso_local void @test_fetch_and_64_noret(ptr nocapture noundef %i) local_unnamed_addr #0 { +; CHECK-LABEL: test_fetch_and_64_noret: +; CHECK: .Ltest_fetch_and_64_noret$local: +; CHECK-NEXT: .type .Ltest_fetch_and_64_noret$local,@function +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: r2 = 10 +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: lock *(u64 *)(r1 + 0) &= r3 +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: r3 = atomic_fetch_and((u64 *)(r1 + 0), r3) +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: r3 = atomic_fetch_and((u64 *)(r1 + 0), r3) +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: r3 = atomic_fetch_and((u64 *)(r1 + 0), r3) +; CHECK-NEXT: r2 = atomic_fetch_and((u64 *)(r1 + 0), r2) +; CHECK-NEXT: exit +entry: + %0 = atomicrmw and ptr %i, i64 10 monotonic, align 8 + %1 = atomicrmw and ptr %i, i64 10 acquire, align 8 + %2 = atomicrmw and ptr %i, i64 10 release, align 8 + %3 = atomicrmw and ptr %i, i64 10 acq_rel, align 8 + %4 = atomicrmw and ptr %i, i64 10 seq_cst, align 8 + ret void +} + +; Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) +define dso_local i64 @test_fetch_and_64_ret(ptr nocapture noundef %i) local_unnamed_addr #0 { +; CHECK-LABEL: test_fetch_and_64_ret: +; CHECK: .Ltest_fetch_and_64_ret$local: +; CHECK-NEXT: .type .Ltest_fetch_and_64_ret$local,@function +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: r2 = 10 +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: r3 = atomic_fetch_and((u64 *)(r1 + 0), r3) +; CHECK-NEXT: r0 = 10 +; CHECK-NEXT: r0 = atomic_fetch_and((u64 *)(r1 + 0), r0) +; CHECK-NEXT: r0 += r3 +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: r3 = atomic_fetch_and((u64 *)(r1 + 0), r3) +; CHECK-NEXT: r0 += r3 +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: r3 = atomic_fetch_and((u64 *)(r1 + 0), r3) +; CHECK-NEXT: r0 += r3 +; CHECK-NEXT: r2 = atomic_fetch_and((u64 *)(r1 + 0), r2) +; CHECK-NEXT: r0 += r2 +; CHECK-NEXT: exit +entry: + %0 = atomicrmw and ptr %i, i64 10 monotonic, align 8 + %1 = atomicrmw and ptr %i, i64 10 acquire, align 8 + %add = add nsw i64 %1, %0 + %2 = atomicrmw and ptr %i, i64 10 release, align 8 + %add5 = add nsw i64 %add, %2 + %3 = atomicrmw and ptr %i, i64 10 acq_rel, align 8 + %add8 = add nsw i64 %add5, %3 + %4 = atomicrmw and ptr %i, i64 10 seq_cst, align 8 + %add11 = add nsw i64 %add8, %4 + ret i64 %add11 +} + +; Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) +define dso_local void @test_fetch_or_32_noret(ptr nocapture noundef %i) local_unnamed_addr #0 { +; CHECK-LABEL: test_fetch_or_32_noret: +; CHECK: .Ltest_fetch_or_32_noret$local: +; CHECK-NEXT: .type .Ltest_fetch_or_32_noret$local,@function +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: w2 = 10 +; CHECK-NEXT: w3 = 10 +; CHECK-NEXT: lock *(u32 *)(r1 + 0) |= w3 +; CHECK-NEXT: w3 = 10 +; CHECK-NEXT: w3 = atomic_fetch_or((u32 *)(r1 + 0), w3) +; CHECK-NEXT: w3 = 10 +; CHECK-NEXT: w3 = atomic_fetch_or((u32 *)(r1 + 0), w3) +; CHECK-NEXT: w3 = 10 +; CHECK-NEXT: w3 = atomic_fetch_or((u32 *)(r1 + 0), w3) +; CHECK-NEXT: w2 = atomic_fetch_or((u32 *)(r1 + 0), w2) +; CHECK-NEXT: exit +entry: + %0 = atomicrmw or ptr %i, i32 10 monotonic, align 4 + %1 = atomicrmw or ptr %i, i32 10 acquire, align 4 + %2 = atomicrmw or ptr %i, i32 10 release, align 4 + %3 = atomicrmw or ptr %i, i32 10 acq_rel, align 4 + %4 = atomicrmw or ptr %i, i32 10 seq_cst, align 4 + ret void +} + +; Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) +define dso_local i32 @test_fetch_or_32_ret(ptr nocapture noundef %i) local_unnamed_addr #0 { +; CHECK-LABEL: test_fetch_or_32_ret: +; CHECK: .Ltest_fetch_or_32_ret$local: +; CHECK-NEXT: .type .Ltest_fetch_or_32_ret$local,@function +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: w2 = 10 +; CHECK-NEXT: w3 = 10 +; CHECK-NEXT: lock *(u32 *)(r1 + 0) |= w3 +; CHECK-NEXT: w0 = 10 +; CHECK-NEXT: w0 = atomic_fetch_or((u32 *)(r1 + 0), w0) +; CHECK-NEXT: w0 += w3 +; CHECK-NEXT: w3 = 10 +; CHECK-NEXT: w3 = atomic_fetch_or((u32 *)(r1 + 0), w3) +; CHECK-NEXT: w0 += w3 +; CHECK-NEXT: w3 = 10 +; CHECK-NEXT: w3 = atomic_fetch_or((u32 *)(r1 + 0), w3) +; CHECK-NEXT: w0 += w3 +; CHECK-NEXT: w2 = atomic_fetch_or((u32 *)(r1 + 0), w2) +; CHECK-NEXT: w0 += w2 +; CHECK-NEXT: exit +entry: + %0 = atomicrmw or ptr %i, i32 10 monotonic, align 4 + %1 = atomicrmw or ptr %i, i32 10 acquire, align 4 + %add = add nsw i32 %1, %0 + %2 = atomicrmw or ptr %i, i32 10 release, align 4 + %add5 = add nsw i32 %add, %2 + %3 = atomicrmw or ptr %i, i32 10 acq_rel, align 4 + %add8 = add nsw i32 %add5, %3 + %4 = atomicrmw or ptr %i, i32 10 seq_cst, align 4 + %add11 = add nsw i32 %add8, %4 + ret i32 %add11 +} + +; Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) +define dso_local void @test_fetch_or_64_noret(ptr nocapture noundef %i) local_unnamed_addr #0 { +; CHECK-LABEL: test_fetch_or_64_noret: +; CHECK: .Ltest_fetch_or_64_noret$local: +; CHECK-NEXT: .type .Ltest_fetch_or_64_noret$local,@function +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: r2 = 10 +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: lock *(u64 *)(r1 + 0) |= r3 +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: r3 = atomic_fetch_or((u64 *)(r1 + 0), r3) +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: r3 = atomic_fetch_or((u64 *)(r1 + 0), r3) +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: r3 = atomic_fetch_or((u64 *)(r1 + 0), r3) +; CHECK-NEXT: r2 = atomic_fetch_or((u64 *)(r1 + 0), r2) +; CHECK-NEXT: exit +entry: + %0 = atomicrmw or ptr %i, i64 10 monotonic, align 8 + %1 = atomicrmw or ptr %i, i64 10 acquire, align 8 + %2 = atomicrmw or ptr %i, i64 10 release, align 8 + %3 = atomicrmw or ptr %i, i64 10 acq_rel, align 8 + %4 = atomicrmw or ptr %i, i64 10 seq_cst, align 8 + ret void +} + +; Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) +define dso_local i64 @test_fetch_or_64_ret(ptr nocapture noundef %i) local_unnamed_addr #0 { +; CHECK-LABEL: test_fetch_or_64_ret: +; CHECK: .Ltest_fetch_or_64_ret$local: +; CHECK-NEXT: .type .Ltest_fetch_or_64_ret$local,@function +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: r2 = 10 +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: r3 = atomic_fetch_or((u64 *)(r1 + 0), r3) +; CHECK-NEXT: r0 = 10 +; CHECK-NEXT: r0 = atomic_fetch_or((u64 *)(r1 + 0), r0) +; CHECK-NEXT: r0 += r3 +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: r3 = atomic_fetch_or((u64 *)(r1 + 0), r3) +; CHECK-NEXT: r0 += r3 +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: r3 = atomic_fetch_or((u64 *)(r1 + 0), r3) +; CHECK-NEXT: r0 += r3 +; CHECK-NEXT: r2 = atomic_fetch_or((u64 *)(r1 + 0), r2) +; CHECK-NEXT: r0 += r2 +; CHECK-NEXT: exit +entry: + %0 = atomicrmw or ptr %i, i64 10 monotonic, align 8 + %1 = atomicrmw or ptr %i, i64 10 acquire, align 8 + %add = add nsw i64 %1, %0 + %2 = atomicrmw or ptr %i, i64 10 release, align 8 + %add5 = add nsw i64 %add, %2 + %3 = atomicrmw or ptr %i, i64 10 acq_rel, align 8 + %add8 = add nsw i64 %add5, %3 + %4 = atomicrmw or ptr %i, i64 10 seq_cst, align 8 + %add11 = add nsw i64 %add8, %4 + ret i64 %add11 +} + +; Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) +define dso_local void @test_fetch_xor_32_noret(ptr nocapture noundef %i) local_unnamed_addr #0 { +; CHECK-LABEL: test_fetch_xor_32_noret: +; CHECK: .Ltest_fetch_xor_32_noret$local: +; CHECK-NEXT: .type .Ltest_fetch_xor_32_noret$local,@function +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: w2 = 10 +; CHECK-NEXT: w3 = 10 +; CHECK-NEXT: lock *(u32 *)(r1 + 0) ^= w3 +; CHECK-NEXT: w3 = 10 +; CHECK-NEXT: w3 = atomic_fetch_xor((u32 *)(r1 + 0), w3) +; CHECK-NEXT: w3 = 10 +; CHECK-NEXT: w3 = atomic_fetch_xor((u32 *)(r1 + 0), w3) +; CHECK-NEXT: w3 = 10 +; CHECK-NEXT: w3 = atomic_fetch_xor((u32 *)(r1 + 0), w3) +; CHECK-NEXT: w2 = atomic_fetch_xor((u32 *)(r1 + 0), w2) +; CHECK-NEXT: exit +entry: + %0 = atomicrmw xor ptr %i, i32 10 monotonic, align 4 + %1 = atomicrmw xor ptr %i, i32 10 acquire, align 4 + %2 = atomicrmw xor ptr %i, i32 10 release, align 4 + %3 = atomicrmw xor ptr %i, i32 10 acq_rel, align 4 + %4 = atomicrmw xor ptr %i, i32 10 seq_cst, align 4 + ret void +} + +; Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) +define dso_local i32 @test_fetch_xor_32_ret(ptr nocapture noundef %i) local_unnamed_addr #0 { +; CHECK-LABEL: test_fetch_xor_32_ret: +; CHECK: .Ltest_fetch_xor_32_ret$local: +; CHECK-NEXT: .type .Ltest_fetch_xor_32_ret$local,@function +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: w2 = 10 +; CHECK-NEXT: w3 = 10 +; CHECK-NEXT: lock *(u32 *)(r1 + 0) ^= w3 +; CHECK-NEXT: w0 = 10 +; CHECK-NEXT: w0 = atomic_fetch_xor((u32 *)(r1 + 0), w0) +; CHECK-NEXT: w0 += w3 +; CHECK-NEXT: w3 = 10 +; CHECK-NEXT: w3 = atomic_fetch_xor((u32 *)(r1 + 0), w3) +; CHECK-NEXT: w0 += w3 +; CHECK-NEXT: w3 = 10 +; CHECK-NEXT: w3 = atomic_fetch_xor((u32 *)(r1 + 0), w3) +; CHECK-NEXT: w0 += w3 +; CHECK-NEXT: w2 = atomic_fetch_xor((u32 *)(r1 + 0), w2) +; CHECK-NEXT: w0 += w2 +; CHECK-NEXT: exit +entry: + %0 = atomicrmw xor ptr %i, i32 10 monotonic, align 4 + %1 = atomicrmw xor ptr %i, i32 10 acquire, align 4 + %add = add nsw i32 %1, %0 + %2 = atomicrmw xor ptr %i, i32 10 release, align 4 + %add5 = add nsw i32 %add, %2 + %3 = atomicrmw xor ptr %i, i32 10 acq_rel, align 4 + %add8 = add nsw i32 %add5, %3 + %4 = atomicrmw xor ptr %i, i32 10 seq_cst, align 4 + %add11 = add nsw i32 %add8, %4 + ret i32 %add11 +} + +; Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) +define dso_local void @test_fetch_xor_64_noret(ptr nocapture noundef %i) local_unnamed_addr #0 { +; CHECK-LABEL: test_fetch_xor_64_noret: +; CHECK: .Ltest_fetch_xor_64_noret$local: +; CHECK-NEXT: .type .Ltest_fetch_xor_64_noret$local,@function +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: r2 = 10 +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: lock *(u64 *)(r1 + 0) ^= r3 +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: r3 = atomic_fetch_xor((u64 *)(r1 + 0), r3) +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: r3 = atomic_fetch_xor((u64 *)(r1 + 0), r3) +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: r3 = atomic_fetch_xor((u64 *)(r1 + 0), r3) +; CHECK-NEXT: r2 = atomic_fetch_xor((u64 *)(r1 + 0), r2) +; CHECK-NEXT: exit +entry: + %0 = atomicrmw xor ptr %i, i64 10 monotonic, align 8 + %1 = atomicrmw xor ptr %i, i64 10 acquire, align 8 + %2 = atomicrmw xor ptr %i, i64 10 release, align 8 + %3 = atomicrmw xor ptr %i, i64 10 acq_rel, align 8 + %4 = atomicrmw xor ptr %i, i64 10 seq_cst, align 8 + ret void +} + +; Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) +define dso_local i64 @test_fetch_xor_64_ret(ptr nocapture noundef %i) local_unnamed_addr #0 { +; CHECK-LABEL: test_fetch_xor_64_ret: +; CHECK: .Ltest_fetch_xor_64_ret$local: +; CHECK-NEXT: .type .Ltest_fetch_xor_64_ret$local,@function +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: r2 = 10 +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: r3 = atomic_fetch_xor((u64 *)(r1 + 0), r3) +; CHECK-NEXT: r0 = 10 +; CHECK-NEXT: r0 = atomic_fetch_xor((u64 *)(r1 + 0), r0) +; CHECK-NEXT: r0 += r3 +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: r3 = atomic_fetch_xor((u64 *)(r1 + 0), r3) +; CHECK-NEXT: r0 += r3 +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: r3 = atomic_fetch_xor((u64 *)(r1 + 0), r3) +; CHECK-NEXT: r0 += r3 +; CHECK-NEXT: r2 = atomic_fetch_xor((u64 *)(r1 + 0), r2) +; CHECK-NEXT: r0 += r2 +; CHECK-NEXT: exit +entry: + %0 = atomicrmw xor ptr %i, i64 10 monotonic, align 8 + %1 = atomicrmw xor ptr %i, i64 10 acquire, align 8 + %add = add nsw i64 %1, %0 + %2 = atomicrmw xor ptr %i, i64 10 release, align 8 + %add5 = add nsw i64 %add, %2 + %3 = atomicrmw xor ptr %i, i64 10 acq_rel, align 8 + %add8 = add nsw i64 %add5, %3 + %4 = atomicrmw xor ptr %i, i64 10 seq_cst, align 8 + %add11 = add nsw i64 %add8, %4 + ret i64 %add11 +} + +attributes #0 = { mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="v3" } + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!2} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 7, !"frame-pointer", i32 2} +!2 = !{!"clang version 20.0.0git (git@github.com:yonghong-song/llvm-project.git 6f71e34e194dab5a52cb2211af575c6067e9e504)"} diff --git a/llvm/test/CodeGen/BPF/atomics_sub64_relaxed_v1.ll b/llvm/test/CodeGen/BPF/atomics_sub64_relaxed_v1.ll new file mode 100644 index 0000000000000..4d630d475b296 --- /dev/null +++ b/llvm/test/CodeGen/BPF/atomics_sub64_relaxed_v1.ll @@ -0,0 +1,27 @@ +; RUN: not llc -march=bpfel -mcpu=v1 -filetype=asm < %s +; +; Source: +; $ cat atomics_sub64_relaxed_v1.c +; #include +; +; long test_fetch_sub_64_ret(long _Atomic *i) { +; return __c11_atomic_fetch_sub(i, 10, memory_order_relaxed); +; } + +target triple = "bpf" + +; Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) +define dso_local i64 @test_fetch_sub_64_ret(ptr nocapture noundef %i) local_unnamed_addr #0 { +entry: + %0 = atomicrmw sub ptr %i, i64 10 monotonic, align 8 + ret i64 %0 +} + +attributes #0 = { mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="v1" } + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!2} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 7, !"frame-pointer", i32 2} +!2 = !{!"clang version 20.0.0git (git@github.com:yonghong-song/llvm-project.git 6f71e34e194dab5a52cb2211af575c6067e9e504)"} diff --git a/llvm/test/CodeGen/BPF/xaddd_v1.ll b/llvm/test/CodeGen/BPF/xaddd_v1.ll new file mode 100644 index 0000000000000..d3bfd8d81b15b --- /dev/null +++ b/llvm/test/CodeGen/BPF/xaddd_v1.ll @@ -0,0 +1,25 @@ +; RUN: not llc -march=bpfel -mcpu=v1 -filetype=asm < %s +; +; Source: +; $ cat xaddd_v1.c +; long test_fetch_add_64_ret(long *i) { +; return __sync_fetch_and_add(i, 10); +; } + +target triple = "bpf" + +; Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) +define dso_local i64 @test_fetch_add_64_ret(ptr nocapture noundef %i) local_unnamed_addr #0 { +entry: + %0 = atomicrmw add ptr %i, i64 10 seq_cst, align 8 + ret i64 %0 +} + +attributes #0 = { mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="v1" } + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!2} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 7, !"frame-pointer", i32 2} +!2 = !{!"clang version 20.0.0git (git@github.com:yonghong-song/llvm-project.git 6f71e34e194dab5a52cb2211af575c6067e9e504)"} diff --git a/llvm/test/CodeGen/DirectX/atan2.ll b/llvm/test/CodeGen/DirectX/atan2.ll new file mode 100644 index 0000000000000..9d86f87f3ed50 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/atan2.ll @@ -0,0 +1,87 @@ +; RUN: opt -S -dxil-intrinsic-expansion -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s --check-prefixes=CHECK,EXPCHECK +; RUN: opt -S -dxil-intrinsic-expansion -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s --check-prefixes=CHECK,DOPCHECK + +; Make sure correct dxil expansions for atan2 are generated for float and half. + +define noundef float @atan2_float(float noundef %y, float noundef %x) { +entry: +; CHECK: [[DIV:%.+]] = fdiv float %y, %x +; EXPCHECK: [[ATAN:%.+]] = call float @llvm.atan.f32(float [[DIV]]) +; DOPCHECK: [[ATAN:%.+]] = call float @dx.op.unary.f32(i32 17, float [[DIV]]) +; CHECK-DAG: [[ADD_PI:%.+]] = fadd float [[ATAN]], 0x400921FB60000000 +; CHECK-DAG: [[SUB_PI:%.+]] = fsub float [[ATAN]], 0x400921FB60000000 +; CHECK-DAG: [[X_LT_0:%.+]] = fcmp olt float %x, 0.000000e+00 +; CHECK-DAG: [[X_EQ_0:%.+]] = fcmp oeq float %x, 0.000000e+00 +; CHECK-DAG: [[Y_GE_0:%.+]] = fcmp oge float %y, 0.000000e+00 +; CHECK-DAG: [[Y_LT_0:%.+]] = fcmp olt float %y, 0.000000e+00 +; CHECK: [[XLT0_AND_YGE0:%.+]] = and i1 [[X_LT_0]], [[Y_GE_0]] +; CHECK: [[SELECT_ADD_PI:%.+]] = select i1 [[XLT0_AND_YGE0]], float [[ADD_PI]], float [[ATAN]] +; CHECK: [[XLT0_AND_YLT0:%.+]] = and i1 [[X_LT_0]], [[Y_LT_0]] +; CHECK: [[SELECT_SUB_PI:%.+]] = select i1 [[XLT0_AND_YLT0]], float [[SUB_PI]], float [[SELECT_ADD_PI]] +; CHECK: [[XEQ0_AND_YLT0:%.+]] = and i1 [[X_EQ_0]], [[Y_LT_0]] +; CHECK: [[SELECT_NEGHPI:%.+]] = select i1 [[XEQ0_AND_YLT0]], float 0xBFF921FB60000000, float [[SELECT_SUB_PI]] +; CHECK: [[XEQ0_AND_YGE0:%.+]] = and i1 [[X_EQ_0]], [[Y_GE_0]] +; CHECK: [[SELECT_HPI:%.+]] = select i1 [[XEQ0_AND_YGE0]], float 0x3FF921FB60000000, float [[SELECT_NEGHPI]] +; CHECK: ret float [[SELECT_HPI]] + %elt.atan2 = call float @llvm.atan2.f32(float %y, float %x) + ret float %elt.atan2 +} + +define noundef half @atan2_half(half noundef %y, half noundef %x) { +entry: +; CHECK: [[DIV:%.+]] = fdiv half %y, %x +; EXPCHECK: [[ATAN:%.+]] = call half @llvm.atan.f16(half [[DIV]]) +; DOPCHECK: [[ATAN:%.+]] = call half @dx.op.unary.f16(i32 17, half [[DIV]]) +; CHECK-DAG: [[ADD_PI:%.+]] = fadd half [[ATAN]], 0xH4248 +; CHECK-DAG: [[SUB_PI:%.+]] = fsub half [[ATAN]], 0xH4248 +; CHECK-DAG: [[X_LT_0:%.+]] = fcmp olt half %x, 0xH0000 +; CHECK-DAG: [[X_EQ_0:%.+]] = fcmp oeq half %x, 0xH0000 +; CHECK-DAG: [[Y_GE_0:%.+]] = fcmp oge half %y, 0xH0000 +; CHECK-DAG: [[Y_LT_0:%.+]] = fcmp olt half %y, 0xH0000 +; CHECK: [[XLT0_AND_YGE0:%.+]] = and i1 [[X_LT_0]], [[Y_GE_0]] +; CHECK: [[SELECT_ADD_PI:%.+]] = select i1 [[XLT0_AND_YGE0]], half [[ADD_PI]], half [[ATAN]] +; CHECK: [[XLT0_AND_YLT0:%.+]] = and i1 [[X_LT_0]], [[Y_LT_0]] +; CHECK: [[SELECT_SUB_PI:%.+]] = select i1 [[XLT0_AND_YLT0]], half [[SUB_PI]], half [[SELECT_ADD_PI]] +; CHECK: [[XEQ0_AND_YLT0:%.+]] = and i1 [[X_EQ_0]], [[Y_LT_0]] +; CHECK: [[SELECT_NEGHPI:%.+]] = select i1 [[XEQ0_AND_YLT0]], half 0xHBE48, half [[SELECT_SUB_PI]] +; CHECK: [[XEQ0_AND_YGE0:%.+]] = and i1 [[X_EQ_0]], [[Y_GE_0]] +; CHECK: [[SELECT_HPI:%.+]] = select i1 [[XEQ0_AND_YGE0]], half 0xH3E48, half [[SELECT_NEGHPI]] +; CHECK: ret half [[SELECT_HPI]] + %elt.atan2 = call half @llvm.atan2.f16(half %y, half %x) + ret half %elt.atan2 +} + +define noundef <4 x float> @atan2_float4(<4 x float> noundef %y, <4 x float> noundef %x) { +entry: +; Just Expansion, no scalarization or lowering: +; EXPCHECK: [[DIV:%.+]] = fdiv <4 x float> %y, %x +; EXPCHECK: [[ATAN:%.+]] = call <4 x float> @llvm.atan.v4f32(<4 x float> [[DIV]]) +; EXPCHECK-DAG: [[ADD_PI:%.+]] = fadd <4 x float> [[ATAN]], +; EXPCHECK-DAG: [[SUB_PI:%.+]] = fsub <4 x float> [[ATAN]], +; EXPCHECK-DAG: [[X_LT_0:%.+]] = fcmp olt <4 x float> %x, zeroinitializer +; EXPCHECK-DAG: [[X_EQ_0:%.+]] = fcmp oeq <4 x float> %x, zeroinitializer +; EXPCHECK-DAG: [[Y_GE_0:%.+]] = fcmp oge <4 x float> %y, zeroinitializer +; EXPCHECK-DAG: [[Y_LT_0:%.+]] = fcmp olt <4 x float> %y, zeroinitializer +; EXPCHECK: [[XLT0_AND_YGE0:%.+]] = and <4 x i1> [[X_LT_0]], [[Y_GE_0]] +; EXPCHECK: [[SELECT_ADD_PI:%.+]] = select <4 x i1> [[XLT0_AND_YGE0]], <4 x float> [[ADD_PI]], <4 x float> [[ATAN]] +; EXPCHECK: [[XLT0_AND_YLT0:%.+]] = and <4 x i1> [[X_LT_0]], [[Y_LT_0]] +; EXPCHECK: [[SELECT_SUB_PI:%.+]] = select <4 x i1> [[XLT0_AND_YLT0]], <4 x float> [[SUB_PI]], <4 x float> [[SELECT_ADD_PI]] +; EXPCHECK: [[XEQ0_AND_YLT0:%.+]] = and <4 x i1> [[X_EQ_0]], [[Y_LT_0]] +; EXPCHECK: [[SELECT_NEGHPI:%.+]] = select <4 x i1> [[XEQ0_AND_YLT0]], <4 x float> , <4 x float> [[SELECT_SUB_PI]] +; EXPCHECK: [[XEQ0_AND_YGE0:%.+]] = and <4 x i1> [[X_EQ_0]], [[Y_GE_0]] +; EXPCHECK: [[SELECT_HPI:%.+]] = select <4 x i1> [[XEQ0_AND_YGE0]], <4 x float> , <4 x float> [[SELECT_NEGHPI]] +; EXPCHECK: ret <4 x float> [[SELECT_HPI]] + +; Scalarization occurs after expansion, so atan scalarization is tested separately. +; Expansion, scalarization and lowering: +; Just make sure this expands to exactly 4 scalar DXIL atan (OpCode=17) calls. +; DOPCHECK-COUNT-4: call float @dx.op.unary.f32(i32 17, float %{{.*}}) +; DOPCHECK-NOT: call float @dx.op.unary.f32(i32 17, + + %elt.atan2 = call <4 x float> @llvm.atan2.v4f32(<4 x float> %y, <4 x float> %x) + ret <4 x float> %elt.atan2 +} + +declare half @llvm.atan2.f16(half, half) +declare float @llvm.atan2.f32(float, float) +declare <4 x float> @llvm.atan2.v4f32(<4 x float>, <4 x float>) diff --git a/llvm/test/CodeGen/DirectX/atan2_error.ll b/llvm/test/CodeGen/DirectX/atan2_error.ll new file mode 100644 index 0000000000000..5b3077f85f5d4 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/atan2_error.ll @@ -0,0 +1,11 @@ +; RUN: not opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s 2>&1 | FileCheck %s + +; DXIL operation atan does not support double overload type +; CHECK: in function atan2_double +; CHECK-SAME: Cannot create ATan operation: Invalid overload type + +define noundef double @atan2_double(double noundef %a, double noundef %b) #0 { +entry: + %1 = call double @llvm.atan2.f64(double %a, double %b) + ret double %1 +} diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/load-store-atomic.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/load-store-atomic.ll index 1af2b38d79943..9ef74e4960ce7 100644 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/load-store-atomic.ll +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/load-store-atomic.ll @@ -88,6 +88,50 @@ define ptr @load_acquire_ptr(ptr %ptr) { ret ptr %val } +define float @load_acquire_float(ptr %ptr) { +; LA32-LABEL: load_acquire_float: +; LA32: # %bb.0: +; LA32-NEXT: ld.w $a0, $a0, 0 +; LA32-NEXT: movgr2fr.w $fa0, $a0 +; LA32-NEXT: dbar 20 +; LA32-NEXT: ret +; +; LA64-LABEL: load_acquire_float: +; LA64: # %bb.0: +; LA64-NEXT: ld.w $a0, $a0, 0 +; LA64-NEXT: movgr2fr.w $fa0, $a0 +; LA64-NEXT: dbar 20 +; LA64-NEXT: ret + %val = load atomic float, ptr %ptr acquire, align 8 + ret float %val +} + +define double @load_acquire_double(ptr %ptr) { +; LA32-LABEL: load_acquire_double: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: .cfi_def_cfa_offset 16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: .cfi_offset 1, -4 +; LA32-NEXT: ori $a1, $zero, 2 +; LA32-NEXT: bl %plt(__atomic_load_8) +; LA32-NEXT: st.w $a1, $sp, 4 +; LA32-NEXT: st.w $a0, $sp, 0 +; LA32-NEXT: fld.d $fa0, $sp, 0 +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: load_acquire_double: +; LA64: # %bb.0: +; LA64-NEXT: ld.d $a0, $a0, 0 +; LA64-NEXT: movgr2fr.d $fa0, $a0 +; LA64-NEXT: dbar 20 +; LA64-NEXT: ret + %val = load atomic double, ptr %ptr acquire, align 8 + ret double %val +} + define i8 @load_unordered_i8(ptr %ptr) { ; LA32-LABEL: load_unordered_i8: ; LA32: # %bb.0: @@ -165,6 +209,47 @@ define ptr @load_unordered_ptr(ptr %ptr) { ret ptr %val } +define float @load_unordered_float(ptr %ptr) { +; LA32-LABEL: load_unordered_float: +; LA32: # %bb.0: +; LA32-NEXT: ld.w $a0, $a0, 0 +; LA32-NEXT: movgr2fr.w $fa0, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: load_unordered_float: +; LA64: # %bb.0: +; LA64-NEXT: ld.w $a0, $a0, 0 +; LA64-NEXT: movgr2fr.w $fa0, $a0 +; LA64-NEXT: ret + %val = load atomic float, ptr %ptr unordered, align 8 + ret float %val +} + +define double @load_unordered_double(ptr %ptr) { +; LA32-LABEL: load_unordered_double: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: .cfi_def_cfa_offset 16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: .cfi_offset 1, -4 +; LA32-NEXT: move $a1, $zero +; LA32-NEXT: bl %plt(__atomic_load_8) +; LA32-NEXT: st.w $a1, $sp, 4 +; LA32-NEXT: st.w $a0, $sp, 0 +; LA32-NEXT: fld.d $fa0, $sp, 0 +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: load_unordered_double: +; LA64: # %bb.0: +; LA64-NEXT: ld.d $a0, $a0, 0 +; LA64-NEXT: movgr2fr.d $fa0, $a0 +; LA64-NEXT: ret + %val = load atomic double, ptr %ptr unordered, align 8 + ret double %val +} + define i8 @load_monotonic_i8(ptr %ptr) { ; LA32-LABEL: load_monotonic_i8: ; LA32: # %bb.0: @@ -242,6 +327,47 @@ define ptr @load_monotonic_ptr(ptr %ptr) { ret ptr %val } +define float @load_monotonic_float(ptr %ptr) { +; LA32-LABEL: load_monotonic_float: +; LA32: # %bb.0: +; LA32-NEXT: ld.w $a0, $a0, 0 +; LA32-NEXT: movgr2fr.w $fa0, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: load_monotonic_float: +; LA64: # %bb.0: +; LA64-NEXT: ld.w $a0, $a0, 0 +; LA64-NEXT: movgr2fr.w $fa0, $a0 +; LA64-NEXT: ret + %val = load atomic float, ptr %ptr monotonic, align 8 + ret float %val +} + +define double @load_monotonic_double(ptr %ptr) { +; LA32-LABEL: load_monotonic_double: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: .cfi_def_cfa_offset 16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: .cfi_offset 1, -4 +; LA32-NEXT: move $a1, $zero +; LA32-NEXT: bl %plt(__atomic_load_8) +; LA32-NEXT: st.w $a1, $sp, 4 +; LA32-NEXT: st.w $a0, $sp, 0 +; LA32-NEXT: fld.d $fa0, $sp, 0 +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: load_monotonic_double: +; LA64: # %bb.0: +; LA64-NEXT: ld.d $a0, $a0, 0 +; LA64-NEXT: movgr2fr.d $fa0, $a0 +; LA64-NEXT: ret + %val = load atomic double, ptr %ptr monotonic, align 8 + ret double %val +} + define i8 @load_seq_cst_i8(ptr %ptr) { ; LA32-LABEL: load_seq_cst_i8: ; LA32: # %bb.0: @@ -328,6 +454,50 @@ define ptr @load_seq_cst_ptr(ptr %ptr) { ret ptr %val } +define float @load_seq_cst_float(ptr %ptr) { +; LA32-LABEL: load_seq_cst_float: +; LA32: # %bb.0: +; LA32-NEXT: ld.w $a0, $a0, 0 +; LA32-NEXT: movgr2fr.w $fa0, $a0 +; LA32-NEXT: dbar 16 +; LA32-NEXT: ret +; +; LA64-LABEL: load_seq_cst_float: +; LA64: # %bb.0: +; LA64-NEXT: ld.w $a0, $a0, 0 +; LA64-NEXT: movgr2fr.w $fa0, $a0 +; LA64-NEXT: dbar 16 +; LA64-NEXT: ret + %val = load atomic float, ptr %ptr seq_cst, align 8 + ret float %val +} + +define double @load_seq_cst_double(ptr %ptr) { +; LA32-LABEL: load_seq_cst_double: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: .cfi_def_cfa_offset 16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: .cfi_offset 1, -4 +; LA32-NEXT: ori $a1, $zero, 5 +; LA32-NEXT: bl %plt(__atomic_load_8) +; LA32-NEXT: st.w $a1, $sp, 4 +; LA32-NEXT: st.w $a0, $sp, 0 +; LA32-NEXT: fld.d $fa0, $sp, 0 +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: load_seq_cst_double: +; LA64: # %bb.0: +; LA64-NEXT: ld.d $a0, $a0, 0 +; LA64-NEXT: movgr2fr.d $fa0, $a0 +; LA64-NEXT: dbar 16 +; LA64-NEXT: ret + %val = load atomic double, ptr %ptr seq_cst, align 8 + ret double %val +} + define void @store_release_i8(ptr %ptr, i8 signext %v) { ; LA32-LABEL: store_release_i8: ; LA32: # %bb.0: @@ -411,6 +581,48 @@ define void @store_release_ptr(ptr %ptr, ptr %v) { ret void } +define void @store_release_float(ptr %ptr, float %v) { +; LA32-LABEL: store_release_float: +; LA32: # %bb.0: +; LA32-NEXT: movfr2gr.s $a1, $fa0 +; LA32-NEXT: dbar 18 +; LA32-NEXT: st.w $a1, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: store_release_float: +; LA64: # %bb.0: +; LA64-NEXT: movfr2gr.s $a1, $fa0 +; LA64-NEXT: amswap_db.w $zero, $a1, $a0 +; LA64-NEXT: ret + store atomic float %v, ptr %ptr release, align 8 + ret void +} + +define void @store_release_double(ptr %ptr, double %v) { +; LA32-LABEL: store_release_double: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: .cfi_def_cfa_offset 16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: .cfi_offset 1, -4 +; LA32-NEXT: fst.d $fa0, $sp, 0 +; LA32-NEXT: ld.w $a1, $sp, 0 +; LA32-NEXT: ld.w $a2, $sp, 4 +; LA32-NEXT: ori $a3, $zero, 3 +; LA32-NEXT: bl %plt(__atomic_store_8) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: store_release_double: +; LA64: # %bb.0: +; LA64-NEXT: movfr2gr.d $a1, $fa0 +; LA64-NEXT: amswap_db.d $zero, $a1, $a0 +; LA64-NEXT: ret + store atomic double %v, ptr %ptr release, align 8 + ret void +} + define void @store_unordered_i8(ptr %ptr, i8 signext %v) { ; LA32-LABEL: store_unordered_i8: ; LA32: # %bb.0: @@ -488,6 +700,47 @@ define void @store_unordered_ptr(ptr %ptr, ptr %v) { ret void } +define void @store_unordered_float(ptr %ptr, float %v) { +; LA32-LABEL: store_unordered_float: +; LA32: # %bb.0: +; LA32-NEXT: movfr2gr.s $a1, $fa0 +; LA32-NEXT: st.w $a1, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: store_unordered_float: +; LA64: # %bb.0: +; LA64-NEXT: movfr2gr.s $a1, $fa0 +; LA64-NEXT: st.w $a1, $a0, 0 +; LA64-NEXT: ret + store atomic float %v, ptr %ptr unordered, align 8 + ret void +} + +define void @store_unordered_double(ptr %ptr, double %v) { +; LA32-LABEL: store_unordered_double: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: .cfi_def_cfa_offset 16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: .cfi_offset 1, -4 +; LA32-NEXT: fst.d $fa0, $sp, 0 +; LA32-NEXT: ld.w $a1, $sp, 0 +; LA32-NEXT: ld.w $a2, $sp, 4 +; LA32-NEXT: move $a3, $zero +; LA32-NEXT: bl %plt(__atomic_store_8) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: store_unordered_double: +; LA64: # %bb.0: +; LA64-NEXT: movfr2gr.d $a1, $fa0 +; LA64-NEXT: st.d $a1, $a0, 0 +; LA64-NEXT: ret + store atomic double %v, ptr %ptr unordered, align 8 + ret void +} + define void @store_monotonic_i8(ptr %ptr, i8 signext %v) { ; LA32-LABEL: store_monotonic_i8: ; LA32: # %bb.0: @@ -565,6 +818,47 @@ define void @store_monotonic_ptr(ptr %ptr, ptr %v) { ret void } +define void @store_monotonic_float(ptr %ptr, float %v) { +; LA32-LABEL: store_monotonic_float: +; LA32: # %bb.0: +; LA32-NEXT: movfr2gr.s $a1, $fa0 +; LA32-NEXT: st.w $a1, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: store_monotonic_float: +; LA64: # %bb.0: +; LA64-NEXT: movfr2gr.s $a1, $fa0 +; LA64-NEXT: st.w $a1, $a0, 0 +; LA64-NEXT: ret + store atomic float %v, ptr %ptr monotonic, align 8 + ret void +} + +define void @store_monotonic_double(ptr %ptr, double %v) { +; LA32-LABEL: store_monotonic_double: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: .cfi_def_cfa_offset 16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: .cfi_offset 1, -4 +; LA32-NEXT: fst.d $fa0, $sp, 0 +; LA32-NEXT: ld.w $a1, $sp, 0 +; LA32-NEXT: ld.w $a2, $sp, 4 +; LA32-NEXT: move $a3, $zero +; LA32-NEXT: bl %plt(__atomic_store_8) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: store_monotonic_double: +; LA64: # %bb.0: +; LA64-NEXT: movfr2gr.d $a1, $fa0 +; LA64-NEXT: st.d $a1, $a0, 0 +; LA64-NEXT: ret + store atomic double %v, ptr %ptr monotonic, align 8 + ret void +} + define void @store_seq_cst_i8(ptr %ptr, i8 signext %v) { ; LA32-LABEL: store_seq_cst_i8: ; LA32: # %bb.0: @@ -653,3 +947,46 @@ define void @store_seq_cst_ptr(ptr %ptr, ptr %v) { store atomic ptr %v, ptr %ptr seq_cst, align 8 ret void } + +define void @store_seq_cst_float(ptr %ptr, float %v) { +; LA32-LABEL: store_seq_cst_float: +; LA32: # %bb.0: +; LA32-NEXT: movfr2gr.s $a1, $fa0 +; LA32-NEXT: dbar 16 +; LA32-NEXT: st.w $a1, $a0, 0 +; LA32-NEXT: dbar 16 +; LA32-NEXT: ret +; +; LA64-LABEL: store_seq_cst_float: +; LA64: # %bb.0: +; LA64-NEXT: movfr2gr.s $a1, $fa0 +; LA64-NEXT: amswap_db.w $zero, $a1, $a0 +; LA64-NEXT: ret + store atomic float %v, ptr %ptr seq_cst, align 8 + ret void +} + +define void @store_seq_cst_double(ptr %ptr, double %v) { +; LA32-LABEL: store_seq_cst_double: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: .cfi_def_cfa_offset 16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: .cfi_offset 1, -4 +; LA32-NEXT: fst.d $fa0, $sp, 0 +; LA32-NEXT: ld.w $a1, $sp, 0 +; LA32-NEXT: ld.w $a2, $sp, 4 +; LA32-NEXT: ori $a3, $zero, 5 +; LA32-NEXT: bl %plt(__atomic_store_8) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: store_seq_cst_double: +; LA64: # %bb.0: +; LA64-NEXT: movfr2gr.d $a1, $fa0 +; LA64-NEXT: amswap_db.d $zero, $a1, $a0 +; LA64-NEXT: ret + store atomic double %v, ptr %ptr seq_cst, align 8 + ret void +} diff --git a/llvm/test/CodeGen/MIR/Generic/machine-function-isssa-conflict.mir b/llvm/test/CodeGen/MIR/Generic/machine-function-isssa-conflict.mir new file mode 100644 index 0000000000000..362d54db7033f --- /dev/null +++ b/llvm/test/CodeGen/MIR/Generic/machine-function-isssa-conflict.mir @@ -0,0 +1,14 @@ +# RUN: not llc -run-pass none -o /dev/null %s 2>&1 | FileCheck %s + +# Test that computed properties are not conflicting with explicitly set +# properties + +--- +# CHECK: error: {{.*}}: TestIsSSAOverrideConflict has explicit property IsSSA, but is not valid SSA +name: TestIsSSAOverrideConflict +isSSA: true +body: | + bb.0: + %0:_(s32) = G_IMPLICIT_DEF + %0:_(s32) = G_IMPLICIT_DEF +... diff --git a/llvm/test/CodeGen/MIR/Generic/machine-function-nophis-conflict.mir b/llvm/test/CodeGen/MIR/Generic/machine-function-nophis-conflict.mir new file mode 100644 index 0000000000000..c113ea59a9049 --- /dev/null +++ b/llvm/test/CodeGen/MIR/Generic/machine-function-nophis-conflict.mir @@ -0,0 +1,18 @@ +# RUN: not llc -run-pass none -o /dev/null %s 2>&1 | FileCheck %s + +# Test that computed properties are not conflicting with explicitly set +# properties + +--- +# CHECK: error: {{.*}}: TestNoPhisOverrideConflict has explicit property NoPhi, but contains at least one PHI +name: TestNoPhisOverrideConflict +noPhis: true +tracksRegLiveness: true +body: | + bb.0: + %0:_(s32) = G_IMPLICIT_DEF + + bb.1: + %1:_(s32) = PHI %0, %bb.0, %1, %bb.1 + G_BR %bb.1 +... diff --git a/llvm/test/CodeGen/MIR/Generic/machine-function-novregs-conflict.mir b/llvm/test/CodeGen/MIR/Generic/machine-function-novregs-conflict.mir new file mode 100644 index 0000000000000..5f394a4bbbdb6 --- /dev/null +++ b/llvm/test/CodeGen/MIR/Generic/machine-function-novregs-conflict.mir @@ -0,0 +1,13 @@ +# RUN: not llc -run-pass none -o /dev/null %s 2>&1 | FileCheck %s + +# Test that computed properties are not conflicting with explicitly set +# properties + +--- +# CHECK: error: {{.*}}: TestNoVRegsOverrideConflict has explicit property NoVRegs, but contains virtual registers +name: TestNoVRegsOverrideConflict +noVRegs: true +body: | + bb.0: + %0:_(s32) = G_IMPLICIT_DEF +... diff --git a/llvm/test/CodeGen/MIR/Generic/machine-function-optionally-computed-properties-conflict.mir b/llvm/test/CodeGen/MIR/Generic/machine-function-optionally-computed-properties-conflict.mir deleted file mode 100644 index d8d178d90ae0a..0000000000000 --- a/llvm/test/CodeGen/MIR/Generic/machine-function-optionally-computed-properties-conflict.mir +++ /dev/null @@ -1,35 +0,0 @@ -# RUN: not llc -run-pass none -o /dev/null %s 2>&1 | FileCheck %s - -# Test that computed properties are not conflicting with explicitly set -# properties - ---- -# CHECK: error: {{.*}}: TestNoPhisOverrideConflict has explicit property NoPhi, but contains at least one PHI -name: TestNoPhisOverrideConflict -noPhis: true -tracksRegLiveness: true -body: | - bb.0: - %0:_(s32) = G_IMPLICIT_DEF - - bb.1: - %1:_(s32) = PHI %0, %bb.0, %1, %bb.1 - G_BR %bb.1 -... ---- -# CHECK: error: {{.*}}: TestIsSSAOverrideConflict has explicit property IsSSA, but is not valid SSA -name: TestIsSSAOverrideConflict -isSSA: true -body: | - bb.0: - %0:_(s32) = G_IMPLICIT_DEF - %0:_(s32) = G_IMPLICIT_DEF -... ---- -# CHECK: error: {{.*}}: TestNoVRegsOverrideConflict has explicit property NoVRegs, but contains virtual registers -name: TestNoVRegsOverrideConflict -noVRegs: true -body: | - bb.0: - %0:_(s32) = G_IMPLICIT_DEF -... diff --git a/llvm/test/CodeGen/NVPTX/rotate.ll b/llvm/test/CodeGen/NVPTX/rotate.ll index 9ec5bcd13403b..20c7ae5908d29 100644 --- a/llvm/test/CodeGen/NVPTX/rotate.ll +++ b/llvm/test/CodeGen/NVPTX/rotate.ll @@ -9,29 +9,26 @@ declare i32 @llvm.nvvm.rotate.b32(i32, i32) declare i64 @llvm.nvvm.rotate.b64(i64, i32) declare i64 @llvm.nvvm.rotate.right.b64(i64, i32) -declare i64 @llvm.fshl.i64(i64, i64, i64) -declare i64 @llvm.fshr.i64(i64, i64, i64) -declare i32 @llvm.fshl.i32(i32, i32, i32) -declare i32 @llvm.fshr.i32(i32, i32, i32) - - ; SM20: rotate32 ; SM35: rotate32 define i32 @rotate32(i32 %a, i32 %b) { ; SM20-LABEL: rotate32( ; SM20: { -; SM20-NEXT: .reg .b32 %r<9>; +; SM20-NEXT: .reg .b32 %r<4>; ; SM20-EMPTY: ; SM20-NEXT: // %bb.0: ; SM20-NEXT: ld.param.u32 %r1, [rotate32_param_0]; ; SM20-NEXT: ld.param.u32 %r2, [rotate32_param_1]; -; SM20-NEXT: and.b32 %r3, %r2, 31; -; SM20-NEXT: shl.b32 %r4, %r1, %r3; -; SM20-NEXT: neg.s32 %r5, %r2; -; SM20-NEXT: and.b32 %r6, %r5, 31; -; SM20-NEXT: shr.u32 %r7, %r1, %r6; -; SM20-NEXT: or.b32 %r8, %r4, %r7; -; SM20-NEXT: st.param.b32 [func_retval0+0], %r8; +; SM20-NEXT: { +; SM20-NEXT: .reg .b32 %lhs; +; SM20-NEXT: .reg .b32 %rhs; +; SM20-NEXT: .reg .b32 %amt2; +; SM20-NEXT: shl.b32 %lhs, %r1, %r2; +; SM20-NEXT: sub.s32 %amt2, 32, %r2; +; SM20-NEXT: shr.b32 %rhs, %r1, %amt2; +; SM20-NEXT: add.u32 %r3, %lhs, %rhs; +; SM20-NEXT: } +; SM20-NEXT: st.param.b32 [func_retval0+0], %r3; ; SM20-NEXT: ret; ; ; SM35-LABEL: rotate32( @@ -53,36 +50,45 @@ define i32 @rotate32(i32 %a, i32 %b) { define i64 @rotate64(i64 %a, i32 %b) { ; SM20-LABEL: rotate64( ; SM20: { -; SM20-NEXT: .reg .b32 %r<5>; -; SM20-NEXT: .reg .b64 %rd<5>; +; SM20-NEXT: .reg .b32 %r<2>; +; SM20-NEXT: .reg .b64 %rd<3>; ; SM20-EMPTY: ; SM20-NEXT: // %bb.0: ; SM20-NEXT: ld.param.u64 %rd1, [rotate64_param_0]; ; SM20-NEXT: ld.param.u32 %r1, [rotate64_param_1]; -; SM20-NEXT: and.b32 %r2, %r1, 63; -; SM20-NEXT: shl.b64 %rd2, %rd1, %r2; -; SM20-NEXT: neg.s32 %r3, %r1; -; SM20-NEXT: and.b32 %r4, %r3, 63; -; SM20-NEXT: shr.u64 %rd3, %rd1, %r4; -; SM20-NEXT: or.b64 %rd4, %rd2, %rd3; -; SM20-NEXT: st.param.b64 [func_retval0+0], %rd4; +; SM20-NEXT: { +; SM20-NEXT: .reg .b64 %lhs; +; SM20-NEXT: .reg .b64 %rhs; +; SM20-NEXT: .reg .u32 %amt2; +; SM20-NEXT: and.b32 %amt2, %r1, 63; +; SM20-NEXT: shl.b64 %lhs, %rd1, %amt2; +; SM20-NEXT: sub.u32 %amt2, 64, %amt2; +; SM20-NEXT: shr.b64 %rhs, %rd1, %amt2; +; SM20-NEXT: add.u64 %rd2, %lhs, %rhs; +; SM20-NEXT: } +; SM20-NEXT: st.param.b64 [func_retval0+0], %rd2; ; SM20-NEXT: ret; ; ; SM35-LABEL: rotate64( ; SM35: { -; SM35-NEXT: .reg .b32 %r<5>; -; SM35-NEXT: .reg .b64 %rd<5>; +; SM35-NEXT: .reg .b32 %r<6>; +; SM35-NEXT: .reg .b64 %rd<3>; ; SM35-EMPTY: ; SM35-NEXT: // %bb.0: ; SM35-NEXT: ld.param.u64 %rd1, [rotate64_param_0]; -; SM35-NEXT: ld.param.u32 %r1, [rotate64_param_1]; -; SM35-NEXT: and.b32 %r2, %r1, 63; -; SM35-NEXT: shl.b64 %rd2, %rd1, %r2; -; SM35-NEXT: neg.s32 %r3, %r1; -; SM35-NEXT: and.b32 %r4, %r3, 63; -; SM35-NEXT: shr.u64 %rd3, %rd1, %r4; -; SM35-NEXT: or.b64 %rd4, %rd2, %rd3; -; SM35-NEXT: st.param.b64 [func_retval0+0], %rd4; +; SM35-NEXT: { +; SM35-NEXT: .reg .b32 %dummy; +; SM35-NEXT: mov.b64 {%dummy,%r1}, %rd1; +; SM35-NEXT: } +; SM35-NEXT: { +; SM35-NEXT: .reg .b32 %dummy; +; SM35-NEXT: mov.b64 {%r2,%dummy}, %rd1; +; SM35-NEXT: } +; SM35-NEXT: ld.param.u32 %r3, [rotate64_param_1]; +; SM35-NEXT: shf.l.wrap.b32 %r4, %r2, %r1, %r3; +; SM35-NEXT: shf.l.wrap.b32 %r5, %r1, %r2, %r3; +; SM35-NEXT: mov.b64 %rd2, {%r5, %r4}; +; SM35-NEXT: st.param.b64 [func_retval0+0], %rd2; ; SM35-NEXT: ret; %val = tail call i64 @llvm.nvvm.rotate.b64(i64 %a, i32 %b) ret i64 %val @@ -93,36 +99,45 @@ define i64 @rotate64(i64 %a, i32 %b) { define i64 @rotateright64(i64 %a, i32 %b) { ; SM20-LABEL: rotateright64( ; SM20: { -; SM20-NEXT: .reg .b32 %r<5>; -; SM20-NEXT: .reg .b64 %rd<5>; +; SM20-NEXT: .reg .b32 %r<2>; +; SM20-NEXT: .reg .b64 %rd<3>; ; SM20-EMPTY: ; SM20-NEXT: // %bb.0: ; SM20-NEXT: ld.param.u64 %rd1, [rotateright64_param_0]; ; SM20-NEXT: ld.param.u32 %r1, [rotateright64_param_1]; -; SM20-NEXT: and.b32 %r2, %r1, 63; -; SM20-NEXT: shr.u64 %rd2, %rd1, %r2; -; SM20-NEXT: neg.s32 %r3, %r1; -; SM20-NEXT: and.b32 %r4, %r3, 63; -; SM20-NEXT: shl.b64 %rd3, %rd1, %r4; -; SM20-NEXT: or.b64 %rd4, %rd2, %rd3; -; SM20-NEXT: st.param.b64 [func_retval0+0], %rd4; +; SM20-NEXT: { +; SM20-NEXT: .reg .b64 %lhs; +; SM20-NEXT: .reg .b64 %rhs; +; SM20-NEXT: .reg .u32 %amt2; +; SM20-NEXT: and.b32 %amt2, %r1, 63; +; SM20-NEXT: shr.b64 %lhs, %rd1, %amt2; +; SM20-NEXT: sub.u32 %amt2, 64, %amt2; +; SM20-NEXT: shl.b64 %rhs, %rd1, %amt2; +; SM20-NEXT: add.u64 %rd2, %lhs, %rhs; +; SM20-NEXT: } +; SM20-NEXT: st.param.b64 [func_retval0+0], %rd2; ; SM20-NEXT: ret; ; ; SM35-LABEL: rotateright64( ; SM35: { -; SM35-NEXT: .reg .b32 %r<5>; -; SM35-NEXT: .reg .b64 %rd<5>; +; SM35-NEXT: .reg .b32 %r<6>; +; SM35-NEXT: .reg .b64 %rd<3>; ; SM35-EMPTY: ; SM35-NEXT: // %bb.0: ; SM35-NEXT: ld.param.u64 %rd1, [rotateright64_param_0]; -; SM35-NEXT: ld.param.u32 %r1, [rotateright64_param_1]; -; SM35-NEXT: and.b32 %r2, %r1, 63; -; SM35-NEXT: shr.u64 %rd2, %rd1, %r2; -; SM35-NEXT: neg.s32 %r3, %r1; -; SM35-NEXT: and.b32 %r4, %r3, 63; -; SM35-NEXT: shl.b64 %rd3, %rd1, %r4; -; SM35-NEXT: or.b64 %rd4, %rd2, %rd3; -; SM35-NEXT: st.param.b64 [func_retval0+0], %rd4; +; SM35-NEXT: { +; SM35-NEXT: .reg .b32 %dummy; +; SM35-NEXT: mov.b64 {%r1,%dummy}, %rd1; +; SM35-NEXT: } +; SM35-NEXT: { +; SM35-NEXT: .reg .b32 %dummy; +; SM35-NEXT: mov.b64 {%dummy,%r2}, %rd1; +; SM35-NEXT: } +; SM35-NEXT: ld.param.u32 %r3, [rotateright64_param_1]; +; SM35-NEXT: shf.r.wrap.b32 %r4, %r2, %r1, %r3; +; SM35-NEXT: shf.r.wrap.b32 %r5, %r1, %r2, %r3; +; SM35-NEXT: mov.b64 %rd2, {%r5, %r4}; +; SM35-NEXT: st.param.b64 [func_retval0+0], %rd2; ; SM35-NEXT: ret; %val = tail call i64 @llvm.nvvm.rotate.right.b64(i64 %a, i32 %b) ret i64 %val @@ -133,14 +148,18 @@ define i64 @rotateright64(i64 %a, i32 %b) { define i32 @rotl0(i32 %x) { ; SM20-LABEL: rotl0( ; SM20: { -; SM20-NEXT: .reg .b32 %r<5>; +; SM20-NEXT: .reg .b32 %r<3>; ; SM20-EMPTY: ; SM20-NEXT: // %bb.0: ; SM20-NEXT: ld.param.u32 %r1, [rotl0_param_0]; -; SM20-NEXT: shr.u32 %r2, %r1, 24; -; SM20-NEXT: shl.b32 %r3, %r1, 8; -; SM20-NEXT: or.b32 %r4, %r3, %r2; -; SM20-NEXT: st.param.b32 [func_retval0+0], %r4; +; SM20-NEXT: { +; SM20-NEXT: .reg .b32 %lhs; +; SM20-NEXT: .reg .b32 %rhs; +; SM20-NEXT: shl.b32 %lhs, %r1, 8; +; SM20-NEXT: shr.b32 %rhs, %r1, 24; +; SM20-NEXT: add.u32 %r2, %lhs, %rhs; +; SM20-NEXT: } +; SM20-NEXT: st.param.b32 [func_retval0+0], %r2; ; SM20-NEXT: ret; ; ; SM35-LABEL: rotl0( @@ -158,40 +177,51 @@ define i32 @rotl0(i32 %x) { ret i32 %t2 } +declare i64 @llvm.fshl.i64(i64, i64, i64) +declare i64 @llvm.fshr.i64(i64, i64, i64) + ; SM35: rotl64 define i64 @rotl64(i64 %a, i64 %n) { ; SM20-LABEL: rotl64( ; SM20: { -; SM20-NEXT: .reg .b32 %r<5>; -; SM20-NEXT: .reg .b64 %rd<5>; +; SM20-NEXT: .reg .b32 %r<2>; +; SM20-NEXT: .reg .b64 %rd<3>; ; SM20-EMPTY: ; SM20-NEXT: // %bb.0: ; SM20-NEXT: ld.param.u64 %rd1, [rotl64_param_0]; ; SM20-NEXT: ld.param.u32 %r1, [rotl64_param_1]; -; SM20-NEXT: and.b32 %r2, %r1, 63; -; SM20-NEXT: shl.b64 %rd2, %rd1, %r2; -; SM20-NEXT: neg.s32 %r3, %r1; -; SM20-NEXT: and.b32 %r4, %r3, 63; -; SM20-NEXT: shr.u64 %rd3, %rd1, %r4; -; SM20-NEXT: or.b64 %rd4, %rd2, %rd3; -; SM20-NEXT: st.param.b64 [func_retval0+0], %rd4; +; SM20-NEXT: { +; SM20-NEXT: .reg .b64 %lhs; +; SM20-NEXT: .reg .b64 %rhs; +; SM20-NEXT: .reg .u32 %amt2; +; SM20-NEXT: and.b32 %amt2, %r1, 63; +; SM20-NEXT: shl.b64 %lhs, %rd1, %amt2; +; SM20-NEXT: sub.u32 %amt2, 64, %amt2; +; SM20-NEXT: shr.b64 %rhs, %rd1, %amt2; +; SM20-NEXT: add.u64 %rd2, %lhs, %rhs; +; SM20-NEXT: } +; SM20-NEXT: st.param.b64 [func_retval0+0], %rd2; ; SM20-NEXT: ret; ; ; SM35-LABEL: rotl64( ; SM35: { -; SM35-NEXT: .reg .b32 %r<5>; -; SM35-NEXT: .reg .b64 %rd<5>; +; SM35-NEXT: .reg .b32 %r<2>; +; SM35-NEXT: .reg .b64 %rd<3>; ; SM35-EMPTY: ; SM35-NEXT: // %bb.0: ; SM35-NEXT: ld.param.u64 %rd1, [rotl64_param_0]; ; SM35-NEXT: ld.param.u32 %r1, [rotl64_param_1]; -; SM35-NEXT: and.b32 %r2, %r1, 63; -; SM35-NEXT: shl.b64 %rd2, %rd1, %r2; -; SM35-NEXT: neg.s32 %r3, %r1; -; SM35-NEXT: and.b32 %r4, %r3, 63; -; SM35-NEXT: shr.u64 %rd3, %rd1, %r4; -; SM35-NEXT: or.b64 %rd4, %rd2, %rd3; -; SM35-NEXT: st.param.b64 [func_retval0+0], %rd4; +; SM35-NEXT: { +; SM35-NEXT: .reg .b64 %lhs; +; SM35-NEXT: .reg .b64 %rhs; +; SM35-NEXT: .reg .u32 %amt2; +; SM35-NEXT: and.b32 %amt2, %r1, 63; +; SM35-NEXT: shl.b64 %lhs, %rd1, %amt2; +; SM35-NEXT: sub.u32 %amt2, 64, %amt2; +; SM35-NEXT: shr.b64 %rhs, %rd1, %amt2; +; SM35-NEXT: add.u64 %rd2, %lhs, %rhs; +; SM35-NEXT: } +; SM35-NEXT: st.param.b64 [func_retval0+0], %rd2; ; SM35-NEXT: ret; %val = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 %n) ret i64 %val @@ -201,26 +231,34 @@ define i64 @rotl64(i64 %a, i64 %n) { define i64 @rotl64_imm(i64 %a) { ; SM20-LABEL: rotl64_imm( ; SM20: { -; SM20-NEXT: .reg .b64 %rd<5>; +; SM20-NEXT: .reg .b64 %rd<3>; ; SM20-EMPTY: ; SM20-NEXT: // %bb.0: ; SM20-NEXT: ld.param.u64 %rd1, [rotl64_imm_param_0]; -; SM20-NEXT: shr.u64 %rd2, %rd1, 62; -; SM20-NEXT: shl.b64 %rd3, %rd1, 2; -; SM20-NEXT: or.b64 %rd4, %rd3, %rd2; -; SM20-NEXT: st.param.b64 [func_retval0+0], %rd4; +; SM20-NEXT: { +; SM20-NEXT: .reg .b64 %lhs; +; SM20-NEXT: .reg .b64 %rhs; +; SM20-NEXT: shl.b64 %lhs, %rd1, 2; +; SM20-NEXT: shr.b64 %rhs, %rd1, 62; +; SM20-NEXT: add.u64 %rd2, %lhs, %rhs; +; SM20-NEXT: } +; SM20-NEXT: st.param.b64 [func_retval0+0], %rd2; ; SM20-NEXT: ret; ; ; SM35-LABEL: rotl64_imm( ; SM35: { -; SM35-NEXT: .reg .b64 %rd<5>; +; SM35-NEXT: .reg .b64 %rd<3>; ; SM35-EMPTY: ; SM35-NEXT: // %bb.0: ; SM35-NEXT: ld.param.u64 %rd1, [rotl64_imm_param_0]; -; SM35-NEXT: shr.u64 %rd2, %rd1, 62; -; SM35-NEXT: shl.b64 %rd3, %rd1, 2; -; SM35-NEXT: or.b64 %rd4, %rd3, %rd2; -; SM35-NEXT: st.param.b64 [func_retval0+0], %rd4; +; SM35-NEXT: { +; SM35-NEXT: .reg .b64 %lhs; +; SM35-NEXT: .reg .b64 %rhs; +; SM35-NEXT: shl.b64 %lhs, %rd1, 2; +; SM35-NEXT: shr.b64 %rhs, %rd1, 62; +; SM35-NEXT: add.u64 %rd2, %lhs, %rhs; +; SM35-NEXT: } +; SM35-NEXT: st.param.b64 [func_retval0+0], %rd2; ; SM35-NEXT: ret; %val = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 66) ret i64 %val @@ -230,36 +268,44 @@ define i64 @rotl64_imm(i64 %a) { define i64 @rotr64(i64 %a, i64 %n) { ; SM20-LABEL: rotr64( ; SM20: { -; SM20-NEXT: .reg .b32 %r<5>; -; SM20-NEXT: .reg .b64 %rd<5>; +; SM20-NEXT: .reg .b32 %r<2>; +; SM20-NEXT: .reg .b64 %rd<3>; ; SM20-EMPTY: ; SM20-NEXT: // %bb.0: ; SM20-NEXT: ld.param.u64 %rd1, [rotr64_param_0]; ; SM20-NEXT: ld.param.u32 %r1, [rotr64_param_1]; -; SM20-NEXT: and.b32 %r2, %r1, 63; -; SM20-NEXT: shr.u64 %rd2, %rd1, %r2; -; SM20-NEXT: neg.s32 %r3, %r1; -; SM20-NEXT: and.b32 %r4, %r3, 63; -; SM20-NEXT: shl.b64 %rd3, %rd1, %r4; -; SM20-NEXT: or.b64 %rd4, %rd2, %rd3; -; SM20-NEXT: st.param.b64 [func_retval0+0], %rd4; +; SM20-NEXT: { +; SM20-NEXT: .reg .b64 %lhs; +; SM20-NEXT: .reg .b64 %rhs; +; SM20-NEXT: .reg .u32 %amt2; +; SM20-NEXT: and.b32 %amt2, %r1, 63; +; SM20-NEXT: shr.b64 %lhs, %rd1, %amt2; +; SM20-NEXT: sub.u32 %amt2, 64, %amt2; +; SM20-NEXT: shl.b64 %rhs, %rd1, %amt2; +; SM20-NEXT: add.u64 %rd2, %lhs, %rhs; +; SM20-NEXT: } +; SM20-NEXT: st.param.b64 [func_retval0+0], %rd2; ; SM20-NEXT: ret; ; ; SM35-LABEL: rotr64( ; SM35: { -; SM35-NEXT: .reg .b32 %r<5>; -; SM35-NEXT: .reg .b64 %rd<5>; +; SM35-NEXT: .reg .b32 %r<2>; +; SM35-NEXT: .reg .b64 %rd<3>; ; SM35-EMPTY: ; SM35-NEXT: // %bb.0: ; SM35-NEXT: ld.param.u64 %rd1, [rotr64_param_0]; ; SM35-NEXT: ld.param.u32 %r1, [rotr64_param_1]; -; SM35-NEXT: and.b32 %r2, %r1, 63; -; SM35-NEXT: shr.u64 %rd2, %rd1, %r2; -; SM35-NEXT: neg.s32 %r3, %r1; -; SM35-NEXT: and.b32 %r4, %r3, 63; -; SM35-NEXT: shl.b64 %rd3, %rd1, %r4; -; SM35-NEXT: or.b64 %rd4, %rd2, %rd3; -; SM35-NEXT: st.param.b64 [func_retval0+0], %rd4; +; SM35-NEXT: { +; SM35-NEXT: .reg .b64 %lhs; +; SM35-NEXT: .reg .b64 %rhs; +; SM35-NEXT: .reg .u32 %amt2; +; SM35-NEXT: and.b32 %amt2, %r1, 63; +; SM35-NEXT: shr.b64 %lhs, %rd1, %amt2; +; SM35-NEXT: sub.u32 %amt2, 64, %amt2; +; SM35-NEXT: shl.b64 %rhs, %rd1, %amt2; +; SM35-NEXT: add.u64 %rd2, %lhs, %rhs; +; SM35-NEXT: } +; SM35-NEXT: st.param.b64 [func_retval0+0], %rd2; ; SM35-NEXT: ret; %val = tail call i64 @llvm.fshr.i64(i64 %a, i64 %a, i64 %n) ret i64 %val @@ -269,180 +315,35 @@ define i64 @rotr64(i64 %a, i64 %n) { define i64 @rotr64_imm(i64 %a) { ; SM20-LABEL: rotr64_imm( ; SM20: { -; SM20-NEXT: .reg .b64 %rd<5>; +; SM20-NEXT: .reg .b64 %rd<3>; ; SM20-EMPTY: ; SM20-NEXT: // %bb.0: ; SM20-NEXT: ld.param.u64 %rd1, [rotr64_imm_param_0]; -; SM20-NEXT: shl.b64 %rd2, %rd1, 62; -; SM20-NEXT: shr.u64 %rd3, %rd1, 2; -; SM20-NEXT: or.b64 %rd4, %rd3, %rd2; -; SM20-NEXT: st.param.b64 [func_retval0+0], %rd4; +; SM20-NEXT: { +; SM20-NEXT: .reg .b64 %lhs; +; SM20-NEXT: .reg .b64 %rhs; +; SM20-NEXT: shl.b64 %lhs, %rd1, 62; +; SM20-NEXT: shr.b64 %rhs, %rd1, 2; +; SM20-NEXT: add.u64 %rd2, %lhs, %rhs; +; SM20-NEXT: } +; SM20-NEXT: st.param.b64 [func_retval0+0], %rd2; ; SM20-NEXT: ret; ; ; SM35-LABEL: rotr64_imm( ; SM35: { -; SM35-NEXT: .reg .b64 %rd<5>; +; SM35-NEXT: .reg .b64 %rd<3>; ; SM35-EMPTY: ; SM35-NEXT: // %bb.0: ; SM35-NEXT: ld.param.u64 %rd1, [rotr64_imm_param_0]; -; SM35-NEXT: shl.b64 %rd2, %rd1, 62; -; SM35-NEXT: shr.u64 %rd3, %rd1, 2; -; SM35-NEXT: or.b64 %rd4, %rd3, %rd2; -; SM35-NEXT: st.param.b64 [func_retval0+0], %rd4; +; SM35-NEXT: { +; SM35-NEXT: .reg .b64 %lhs; +; SM35-NEXT: .reg .b64 %rhs; +; SM35-NEXT: shl.b64 %lhs, %rd1, 62; +; SM35-NEXT: shr.b64 %rhs, %rd1, 2; +; SM35-NEXT: add.u64 %rd2, %lhs, %rhs; +; SM35-NEXT: } +; SM35-NEXT: st.param.b64 [func_retval0+0], %rd2; ; SM35-NEXT: ret; %val = tail call i64 @llvm.fshr.i64(i64 %a, i64 %a, i64 66) ret i64 %val } - -define i32 @funnel_shift_right_32(i32 %a, i32 %b, i32 %c) { -; SM20-LABEL: funnel_shift_right_32( -; SM20: { -; SM20-NEXT: .reg .b32 %r<11>; -; SM20-EMPTY: -; SM20-NEXT: // %bb.0: -; SM20-NEXT: ld.param.u32 %r1, [funnel_shift_right_32_param_0]; -; SM20-NEXT: ld.param.u32 %r2, [funnel_shift_right_32_param_2]; -; SM20-NEXT: and.b32 %r3, %r2, 31; -; SM20-NEXT: ld.param.u32 %r4, [funnel_shift_right_32_param_1]; -; SM20-NEXT: shr.u32 %r5, %r4, %r3; -; SM20-NEXT: shl.b32 %r6, %r1, 1; -; SM20-NEXT: not.b32 %r7, %r2; -; SM20-NEXT: and.b32 %r8, %r7, 31; -; SM20-NEXT: shl.b32 %r9, %r6, %r8; -; SM20-NEXT: or.b32 %r10, %r9, %r5; -; SM20-NEXT: st.param.b32 [func_retval0+0], %r10; -; SM20-NEXT: ret; -; -; SM35-LABEL: funnel_shift_right_32( -; SM35: { -; SM35-NEXT: .reg .b32 %r<5>; -; SM35-EMPTY: -; SM35-NEXT: // %bb.0: -; SM35-NEXT: ld.param.u32 %r1, [funnel_shift_right_32_param_0]; -; SM35-NEXT: ld.param.u32 %r2, [funnel_shift_right_32_param_1]; -; SM35-NEXT: ld.param.u32 %r3, [funnel_shift_right_32_param_2]; -; SM35-NEXT: shf.r.wrap.b32 %r4, %r1, %r2, %r3; -; SM35-NEXT: st.param.b32 [func_retval0+0], %r4; -; SM35-NEXT: ret; - %val = call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 %c) - ret i32 %val -} - -define i32 @funnel_shift_left_32(i32 %a, i32 %b, i32 %c) { -; SM20-LABEL: funnel_shift_left_32( -; SM20: { -; SM20-NEXT: .reg .b32 %r<11>; -; SM20-EMPTY: -; SM20-NEXT: // %bb.0: -; SM20-NEXT: ld.param.u32 %r1, [funnel_shift_left_32_param_0]; -; SM20-NEXT: ld.param.u32 %r2, [funnel_shift_left_32_param_2]; -; SM20-NEXT: and.b32 %r3, %r2, 31; -; SM20-NEXT: shl.b32 %r4, %r1, %r3; -; SM20-NEXT: ld.param.u32 %r5, [funnel_shift_left_32_param_1]; -; SM20-NEXT: shr.u32 %r6, %r5, 1; -; SM20-NEXT: not.b32 %r7, %r2; -; SM20-NEXT: and.b32 %r8, %r7, 31; -; SM20-NEXT: shr.u32 %r9, %r6, %r8; -; SM20-NEXT: or.b32 %r10, %r4, %r9; -; SM20-NEXT: st.param.b32 [func_retval0+0], %r10; -; SM20-NEXT: ret; -; -; SM35-LABEL: funnel_shift_left_32( -; SM35: { -; SM35-NEXT: .reg .b32 %r<5>; -; SM35-EMPTY: -; SM35-NEXT: // %bb.0: -; SM35-NEXT: ld.param.u32 %r1, [funnel_shift_left_32_param_0]; -; SM35-NEXT: ld.param.u32 %r2, [funnel_shift_left_32_param_1]; -; SM35-NEXT: ld.param.u32 %r3, [funnel_shift_left_32_param_2]; -; SM35-NEXT: shf.l.wrap.b32 %r4, %r1, %r2, %r3; -; SM35-NEXT: st.param.b32 [func_retval0+0], %r4; -; SM35-NEXT: ret; - %val = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c) - ret i32 %val -} - -define i64 @funnel_shift_right_64(i64 %a, i64 %b, i64 %c) { -; SM20-LABEL: funnel_shift_right_64( -; SM20: { -; SM20-NEXT: .reg .b32 %r<5>; -; SM20-NEXT: .reg .b64 %rd<7>; -; SM20-EMPTY: -; SM20-NEXT: // %bb.0: -; SM20-NEXT: ld.param.u64 %rd1, [funnel_shift_right_64_param_0]; -; SM20-NEXT: ld.param.u32 %r1, [funnel_shift_right_64_param_2]; -; SM20-NEXT: and.b32 %r2, %r1, 63; -; SM20-NEXT: ld.param.u64 %rd2, [funnel_shift_right_64_param_1]; -; SM20-NEXT: shr.u64 %rd3, %rd2, %r2; -; SM20-NEXT: shl.b64 %rd4, %rd1, 1; -; SM20-NEXT: not.b32 %r3, %r1; -; SM20-NEXT: and.b32 %r4, %r3, 63; -; SM20-NEXT: shl.b64 %rd5, %rd4, %r4; -; SM20-NEXT: or.b64 %rd6, %rd5, %rd3; -; SM20-NEXT: st.param.b64 [func_retval0+0], %rd6; -; SM20-NEXT: ret; -; -; SM35-LABEL: funnel_shift_right_64( -; SM35: { -; SM35-NEXT: .reg .b32 %r<5>; -; SM35-NEXT: .reg .b64 %rd<7>; -; SM35-EMPTY: -; SM35-NEXT: // %bb.0: -; SM35-NEXT: ld.param.u64 %rd1, [funnel_shift_right_64_param_0]; -; SM35-NEXT: ld.param.u32 %r1, [funnel_shift_right_64_param_2]; -; SM35-NEXT: and.b32 %r2, %r1, 63; -; SM35-NEXT: ld.param.u64 %rd2, [funnel_shift_right_64_param_1]; -; SM35-NEXT: shr.u64 %rd3, %rd2, %r2; -; SM35-NEXT: shl.b64 %rd4, %rd1, 1; -; SM35-NEXT: not.b32 %r3, %r1; -; SM35-NEXT: and.b32 %r4, %r3, 63; -; SM35-NEXT: shl.b64 %rd5, %rd4, %r4; -; SM35-NEXT: or.b64 %rd6, %rd5, %rd3; -; SM35-NEXT: st.param.b64 [func_retval0+0], %rd6; -; SM35-NEXT: ret; - %val = call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 %c) - ret i64 %val -} - -define i64 @funnel_shift_left_64(i64 %a, i64 %b, i64 %c) { -; SM20-LABEL: funnel_shift_left_64( -; SM20: { -; SM20-NEXT: .reg .b32 %r<5>; -; SM20-NEXT: .reg .b64 %rd<7>; -; SM20-EMPTY: -; SM20-NEXT: // %bb.0: -; SM20-NEXT: ld.param.u64 %rd1, [funnel_shift_left_64_param_0]; -; SM20-NEXT: ld.param.u32 %r1, [funnel_shift_left_64_param_2]; -; SM20-NEXT: and.b32 %r2, %r1, 63; -; SM20-NEXT: shl.b64 %rd2, %rd1, %r2; -; SM20-NEXT: ld.param.u64 %rd3, [funnel_shift_left_64_param_1]; -; SM20-NEXT: shr.u64 %rd4, %rd3, 1; -; SM20-NEXT: not.b32 %r3, %r1; -; SM20-NEXT: and.b32 %r4, %r3, 63; -; SM20-NEXT: shr.u64 %rd5, %rd4, %r4; -; SM20-NEXT: or.b64 %rd6, %rd2, %rd5; -; SM20-NEXT: st.param.b64 [func_retval0+0], %rd6; -; SM20-NEXT: ret; -; -; SM35-LABEL: funnel_shift_left_64( -; SM35: { -; SM35-NEXT: .reg .b32 %r<5>; -; SM35-NEXT: .reg .b64 %rd<7>; -; SM35-EMPTY: -; SM35-NEXT: // %bb.0: -; SM35-NEXT: ld.param.u64 %rd1, [funnel_shift_left_64_param_0]; -; SM35-NEXT: ld.param.u32 %r1, [funnel_shift_left_64_param_2]; -; SM35-NEXT: and.b32 %r2, %r1, 63; -; SM35-NEXT: shl.b64 %rd2, %rd1, %r2; -; SM35-NEXT: ld.param.u64 %rd3, [funnel_shift_left_64_param_1]; -; SM35-NEXT: shr.u64 %rd4, %rd3, 1; -; SM35-NEXT: not.b32 %r3, %r1; -; SM35-NEXT: and.b32 %r4, %r3, 63; -; SM35-NEXT: shr.u64 %rd5, %rd4, %r4; -; SM35-NEXT: or.b64 %rd6, %rd2, %rd5; -; SM35-NEXT: st.param.b64 [func_retval0+0], %rd6; -; SM35-NEXT: ret; - %val = call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 %c) - ret i64 %val -} - diff --git a/llvm/test/CodeGen/NVPTX/rotate_64.ll b/llvm/test/CodeGen/NVPTX/rotate_64.ll index 05fdb02ac7479..64659ce1b5c56 100644 --- a/llvm/test/CodeGen/NVPTX/rotate_64.ll +++ b/llvm/test/CodeGen/NVPTX/rotate_64.ll @@ -1,38 +1,25 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc < %s -march=nvptx64 | FileCheck %s ; RUN: %if ptxas %{ llc < %s -march=nvptx64 | %ptxas-verify %} declare i64 @llvm.nvvm.rotate.b64(i64, i32) declare i64 @llvm.nvvm.rotate.right.b64(i64, i32) +; CHECK: rotate64 define i64 @rotate64(i64 %a, i32 %b) { -; CHECK-LABEL: rotate64( -; CHECK: { -; CHECK-NEXT: .reg .b64 %rd<5>; -; CHECK-EMPTY: -; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.u64 %rd1, [rotate64_param_0]; -; CHECK-NEXT: shr.u64 %rd2, %rd1, 61; -; CHECK-NEXT: shl.b64 %rd3, %rd1, 3; -; CHECK-NEXT: or.b64 %rd4, %rd3, %rd2; -; CHECK-NEXT: st.param.b64 [func_retval0+0], %rd4; -; CHECK-NEXT: ret; +; CHECK: shl.b64 [[LHS:%.*]], [[RD1:%.*]], 3; +; CHECK: shr.b64 [[RHS:%.*]], [[RD1]], 61; +; CHECK: add.u64 [[RD2:%.*]], [[LHS]], [[RHS]]; +; CHECK: ret %val = tail call i64 @llvm.nvvm.rotate.b64(i64 %a, i32 3) ret i64 %val } +; CHECK: rotateright64 define i64 @rotateright64(i64 %a, i32 %b) { -; CHECK-LABEL: rotateright64( -; CHECK: { -; CHECK-NEXT: .reg .b64 %rd<5>; -; CHECK-EMPTY: -; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.u64 %rd1, [rotateright64_param_0]; -; CHECK-NEXT: shl.b64 %rd2, %rd1, 61; -; CHECK-NEXT: shr.u64 %rd3, %rd1, 3; -; CHECK-NEXT: or.b64 %rd4, %rd3, %rd2; -; CHECK-NEXT: st.param.b64 [func_retval0+0], %rd4; -; CHECK-NEXT: ret; +; CHECK: shl.b64 [[LHS:%.*]], [[RD1:%.*]], 61; +; CHECK: shr.b64 [[RHS:%.*]], [[RD1]], 3; +; CHECK: add.u64 [[RD2:%.*]], [[LHS]], [[RHS]]; +; CHECK: ret %val = tail call i64 @llvm.nvvm.rotate.right.b64(i64 %a, i32 3) ret i64 %val } diff --git a/llvm/test/CodeGen/NVPTX/unreachable.ll b/llvm/test/CodeGen/NVPTX/unreachable.ll index 011497c4e2340..f9118900cb737 100644 --- a/llvm/test/CodeGen/NVPTX/unreachable.ll +++ b/llvm/test/CodeGen/NVPTX/unreachable.ll @@ -1,18 +1,23 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs \ +; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs -trap-unreachable=false \ ; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NOTRAP -; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs \ +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs -trap-unreachable=false \ ; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NOTRAP -; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs -trap-unreachable \ +; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs -trap-unreachable -no-trap-after-noreturn \ +; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NOTRAP +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs -trap-unreachable -no-trap-after-noreturn \ +; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NOTRAP +; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs -trap-unreachable -no-trap-after-noreturn=false \ ; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-TRAP -; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs -trap-unreachable \ +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs -trap-unreachable -no-trap-after-noreturn=false \ ; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-TRAP ; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %} ; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %} ; CHECK: .extern .func throw declare void @throw() #0 +declare void @llvm.trap() #0 -; CHECK: .entry kernel_func +; CHECK-LABEL: .entry kernel_func define void @kernel_func() { ; CHECK: call.uni ; CHECK: throw, @@ -24,6 +29,17 @@ define void @kernel_func() { unreachable } +; CHECK-LABEL: kernel_func_2 +define void @kernel_func_2() { +; CHECK: trap; exit; + call void @llvm.trap() + +;; Make sure we avoid emitting two trap instructions. +; CHECK-NOT: trap; +; CHECK-NOT: exit; + unreachable +} + attributes #0 = { noreturn } diff --git a/llvm/test/CodeGen/PowerPC/ldexp-libcall.ll b/llvm/test/CodeGen/PowerPC/ldexp-libcall.ll index 6144a9d920365..e531516c37e87 100644 --- a/llvm/test/CodeGen/PowerPC/ldexp-libcall.ll +++ b/llvm/test/CodeGen/PowerPC/ldexp-libcall.ll @@ -10,7 +10,7 @@ define float @call_ldexpf(float %a, i32 %b) { ; CHECK-NEXT: std r0, 48(r1) ; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: .cfi_offset lr, 16 -; CHECK-NEXT: clrldi r4, r4, 32 +; CHECK-NEXT: extsw r4, r4 ; CHECK-NEXT: bl ldexpf ; CHECK-NEXT: nop ; CHECK-NEXT: addi r1, r1, 32 @@ -29,7 +29,7 @@ define double @call_ldexp(double %a, i32 %b) { ; CHECK-NEXT: std r0, 48(r1) ; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: .cfi_offset lr, 16 -; CHECK-NEXT: clrldi r4, r4, 32 +; CHECK-NEXT: extsw r4, r4 ; CHECK-NEXT: bl ldexp ; CHECK-NEXT: nop ; CHECK-NEXT: addi r1, r1, 32 diff --git a/llvm/test/CodeGen/PowerPC/ldexp.ll b/llvm/test/CodeGen/PowerPC/ldexp.ll index 151df6096b30b..ffc826cc86de5 100644 --- a/llvm/test/CodeGen/PowerPC/ldexp.ll +++ b/llvm/test/CodeGen/PowerPC/ldexp.ll @@ -57,22 +57,24 @@ define <2 x float> @ldexp_v2f32(<2 x float> %val, <2 x i32> %exp) { ; CHECK-NEXT: .cfi_offset v29, -48 ; CHECK-NEXT: .cfi_offset v30, -32 ; CHECK-NEXT: .cfi_offset v31, -16 -; CHECK-NEXT: xxsldwi vs0, v2, v2, 3 ; CHECK-NEXT: li r3, 0 +; CHECK-NEXT: xxsldwi vs0, v2, v2, 3 ; CHECK-NEXT: stxv v29, 32(r1) # 16-byte Folded Spill ; CHECK-NEXT: xscvspdpn f1, vs0 -; CHECK-NEXT: vextuwrx r4, r3, v3 +; CHECK-NEXT: vextuwrx r3, r3, v3 ; CHECK-NEXT: stxv v30, 48(r1) # 16-byte Folded Spill ; CHECK-NEXT: stxv v31, 64(r1) # 16-byte Folded Spill +; CHECK-NEXT: extsw r4, r3 ; CHECK-NEXT: vmr v31, v3 ; CHECK-NEXT: vmr v30, v2 ; CHECK-NEXT: bl ldexpf ; CHECK-NEXT: nop -; CHECK-NEXT: xxswapd vs0, v30 ; CHECK-NEXT: li r3, 4 +; CHECK-NEXT: xxswapd vs0, v30 ; CHECK-NEXT: xscvdpspn v29, f1 ; CHECK-NEXT: xscvspdpn f1, vs0 -; CHECK-NEXT: vextuwrx r4, r3, v31 +; CHECK-NEXT: vextuwrx r3, r3, v31 +; CHECK-NEXT: extsw r4, r3 ; CHECK-NEXT: bl ldexpf ; CHECK-NEXT: nop ; CHECK-NEXT: xscvdpspn vs0, f1 @@ -100,35 +102,39 @@ define <4 x float> @ldexp_v4f32(<4 x float> %val, <4 x i32> %exp) { ; CHECK-NEXT: .cfi_offset v29, -48 ; CHECK-NEXT: .cfi_offset v30, -32 ; CHECK-NEXT: .cfi_offset v31, -16 -; CHECK-NEXT: li r3, 12 -; CHECK-NEXT: xscvspdpn f1, v2 +; CHECK-NEXT: li r3, 4 +; CHECK-NEXT: xxswapd vs0, v2 ; CHECK-NEXT: stxv v28, 32(r1) # 16-byte Folded Spill +; CHECK-NEXT: xscvspdpn f1, vs0 +; CHECK-NEXT: vextuwrx r3, r3, v3 ; CHECK-NEXT: stxv v29, 48(r1) # 16-byte Folded Spill ; CHECK-NEXT: stxv v30, 64(r1) # 16-byte Folded Spill ; CHECK-NEXT: stxv v31, 80(r1) # 16-byte Folded Spill ; CHECK-NEXT: vmr v31, v3 +; CHECK-NEXT: extsw r4, r3 ; CHECK-NEXT: vmr v30, v2 -; CHECK-NEXT: vextuwrx r4, r3, v3 ; CHECK-NEXT: bl ldexpf ; CHECK-NEXT: nop -; CHECK-NEXT: xxswapd vs0, v30 -; CHECK-NEXT: li r3, 4 +; CHECK-NEXT: li r3, 12 ; CHECK-NEXT: xscpsgndp v29, f1, f1 -; CHECK-NEXT: xscvspdpn f1, vs0 -; CHECK-NEXT: vextuwrx r4, r3, v31 +; CHECK-NEXT: xscvspdpn f1, v30 +; CHECK-NEXT: vextuwrx r3, r3, v31 +; CHECK-NEXT: extsw r4, r3 ; CHECK-NEXT: bl ldexpf ; CHECK-NEXT: nop -; CHECK-NEXT: xxmrghd vs0, v29, vs1 +; CHECK-NEXT: xxmrghd vs0, vs1, v29 ; CHECK-NEXT: li r3, 0 -; CHECK-NEXT: vextuwrx r4, r3, v31 +; CHECK-NEXT: vextuwrx r3, r3, v31 ; CHECK-NEXT: xvcvdpsp v28, vs0 ; CHECK-NEXT: xxsldwi vs0, v30, v30, 3 +; CHECK-NEXT: extsw r4, r3 ; CHECK-NEXT: xscvspdpn f1, vs0 ; CHECK-NEXT: bl ldexpf ; CHECK-NEXT: nop ; CHECK-NEXT: xxsldwi vs0, v30, v30, 1 +; CHECK-NEXT: mfvsrwz r3, v31 ; CHECK-NEXT: xscpsgndp v29, f1, f1 -; CHECK-NEXT: mfvsrwz r4, v31 +; CHECK-NEXT: extsw r4, r3 ; CHECK-NEXT: xscvspdpn f1, vs0 ; CHECK-NEXT: bl ldexpf ; CHECK-NEXT: nop @@ -156,7 +162,7 @@ define half @ldexp_f16(half %arg0, i32 %arg1) { ; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: .cfi_offset lr, 16 ; CHECK-NEXT: xscvdphp f0, f1 -; CHECK-NEXT: clrldi r4, r4, 32 +; CHECK-NEXT: extsw r4, r4 ; CHECK-NEXT: mffprwz r3, f0 ; CHECK-NEXT: clrlwi r3, r3, 16 ; CHECK-NEXT: mtfprwz f0, r3 diff --git a/llvm/test/CodeGen/PowerPC/negative-integer-fp-libcall.ll b/llvm/test/CodeGen/PowerPC/negative-integer-fp-libcall.ll new file mode 100644 index 0000000000000..010ee6ef043e7 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/negative-integer-fp-libcall.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -O1 -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s + +; Test that a negative parameter smaller than 64 bits (e.g., int) +; is correctly implemented with sign-extension when passed to +; a floating point libcall. + +define double @ldexp_test(ptr %a, ptr %b) nounwind { +; CHECK-LABEL: ldexp_test: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr 0 +; CHECK-NEXT: stdu 1, -112(1) +; CHECK-NEXT: std 0, 128(1) +; CHECK-NEXT: lfd 1, 0(3) +; CHECK-NEXT: lwa 4, 0(4) +; CHECK-NEXT: bl ldexp +; CHECK-NEXT: nop +; CHECK-NEXT: addi 1, 1, 112 +; CHECK-NEXT: ld 0, 16(1) +; CHECK-NEXT: mtlr 0 +; CHECK-NEXT: blr + %base = load double, ptr %a + %exp = load i32, ptr %b + %call = call double @llvm.ldexp.f64.i32(double %base, i32 signext %exp) + ret double %call +} diff --git a/llvm/test/CodeGen/RISCV/atomic-cmpxchg-branch-on-result.ll b/llvm/test/CodeGen/RISCV/atomic-cmpxchg-branch-on-result.ll index e70ba93de75e0..234a956be809e 100644 --- a/llvm/test/CodeGen/RISCV/atomic-cmpxchg-branch-on-result.ll +++ b/llvm/test/CodeGen/RISCV/atomic-cmpxchg-branch-on-result.ll @@ -1,13 +1,13 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+a -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=NOZACAS,RV32IA %s -; RUN: llc -mtriple=riscv32 -mattr=+a,+experimental-zacas -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+a,+zacas -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=ZACAS,RV32IA-ZACAS %s ; RUN: llc -mtriple=riscv64 -mattr=+a -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=NOZACAS,RV64IA %s -; RUN: llc -mtriple=riscv64 -mattr=+a,+experimental-zacas -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+a,+zacas -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=ZACAS,RV64IA-ZACAS %s -; RUN: llc -mtriple=riscv64 -mattr=+a,+experimental-zacas,+zabha -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+a,+zacas,+zabha -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=ZACAS,RV64IA-ZABHA %s ; Test cmpxchg followed by a branch on the cmpxchg success value to see if the diff --git a/llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll b/llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll index acd6e8f9afe2a..9908503adb9c3 100644 --- a/llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll +++ b/llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll @@ -3,25 +3,25 @@ ; RUN: | FileCheck -check-prefix=RV32I %s ; RUN: llc -mtriple=riscv32 -mattr=+a -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-WMO %s -; RUN: llc -mtriple=riscv32 -mattr=+a,+experimental-zacas -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+a,+zacas -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-ZACAS,RV32IA-WMO-ZACAS %s ; RUN: llc -mtriple=riscv32 -mattr=+a,+ztso -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-TSO %s -; RUN: llc -mtriple=riscv32 -mattr=+a,+ztso,+experimental-zacas -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+a,+ztso,+zacas -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-ZACAS,RV32IA-TSO-ZACAS %s ; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefix=RV64I %s ; RUN: llc -mtriple=riscv64 -mattr=+a -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-WMO %s -; RUN: llc -mtriple=riscv64 -mattr=+a,+experimental-zacas -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+a,+zacas -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-ZACAS,RV64IA-WMO-ZACAS %s -; RUN: llc -mtriple=riscv64 -mattr=+a,+experimental-zacas,+zabha -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+a,+zacas,+zabha -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-ZABHA,RV64IA-WMO-ZABHA %s ; RUN: llc -mtriple=riscv64 -mattr=+a,+ztso -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-TSO %s -; RUN: llc -mtriple=riscv64 -mattr=+a,+ztso,+experimental-zacas -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+a,+ztso,+zacas -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-ZACAS,RV64IA-TSO-ZACAS %s -; RUN: llc -mtriple=riscv64 -mattr=+a,+ztso,+experimental-zacas,+zabha -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+a,+ztso,+zacas,+zabha -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-ZABHA,RV64IA-TSO-ZABHA %s define void @cmpxchg_i8_monotonic_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind { diff --git a/llvm/test/CodeGen/RISCV/atomic-rmw.ll b/llvm/test/CodeGen/RISCV/atomic-rmw.ll index 03157e13bff78..f50744fc3c1f3 100644 --- a/llvm/test/CodeGen/RISCV/atomic-rmw.ll +++ b/llvm/test/CodeGen/RISCV/atomic-rmw.ll @@ -12,22 +12,22 @@ ; RUN: llc -mtriple=riscv64 -mattr=+a,+ztso -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-NOZACAS,RV64IA-TSO,RV64IA-TSO-NOZACAS %s -; RUN: llc -mtriple=riscv32 -mattr=+a,+experimental-zacas -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+a,+zacas -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-ZACAS,RV32IA-WMO,RV32IA-WMO-ZACAS %s -; RUN: llc -mtriple=riscv32 -mattr=+a,+ztso,+experimental-zacas -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+a,+ztso,+zacas -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-ZACAS,RV32IA-TSO,RV32IA-TSO-ZACAS %s -; RUN: llc -mtriple=riscv64 -mattr=+a,+experimental-zacas -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+a,+zacas -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-ZACAS,RV64IA-WMO,RV64IA-WMO-ZACAS %s -; RUN: llc -mtriple=riscv64 -mattr=+a,+ztso,+experimental-zacas -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+a,+ztso,+zacas -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-ZACAS,RV64IA-TSO,RV64IA-TSO-ZACAS %s ; RUN: llc -mtriple=riscv64 -mattr=+a,+zabha -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-WMO,RV64IA-WMO-ZABHA,RV64IA-WMO-ZABHA-NOZACAS %s ; RUN: llc -mtriple=riscv64 -mattr=+a,+ztso,+zabha -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-TSO,RV64IA-TSO-ZABHA,RV64IA-TSO-ZABHA-NOZACAS %s -; RUN: llc -mtriple=riscv64 -mattr=+a,+zabha,+experimental-zacas -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+a,+zabha,+zacas -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-WMO,RV64IA-WMO-ZABHA,RV64IA-WMO-ZABHA-ZACAS %s -; RUN: llc -mtriple=riscv64 -mattr=+a,+ztso,+zabha,+experimental-zacas -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+a,+ztso,+zabha,+zacas -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-TSO,RV64IA-TSO-ZABHA,RV64IA-TSO-ZABHA-ZACAS %s define i8 @atomicrmw_xchg_i8_monotonic(ptr %a, i8 %b) nounwind { diff --git a/llvm/test/CodeGen/RISCV/atomic-signext.ll b/llvm/test/CodeGen/RISCV/atomic-signext.ll index c143be478948e..ed0a160d3f58a 100644 --- a/llvm/test/CodeGen/RISCV/atomic-signext.ll +++ b/llvm/test/CodeGen/RISCV/atomic-signext.ll @@ -3,13 +3,13 @@ ; RUN: | FileCheck -check-prefix=RV32I %s ; RUN: llc -mtriple=riscv32 -mattr=+a -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-NOZACAS %s -; RUN: llc -mtriple=riscv32 -mattr=+a,+experimental-zacas -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+a,+zacas -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-ZACAS %s ; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefix=RV64I %s ; RUN: llc -mtriple=riscv64 -mattr=+a -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-NOZACAS %s -; RUN: llc -mtriple=riscv64 -mattr=+a,+experimental-zacas -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+a,+zacas -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-ZACAS %s define signext i8 @atomic_load_i8_unordered(ptr %a) nounwind { diff --git a/llvm/test/CodeGen/RISCV/atomicrmw-cond-sub-clamp.ll b/llvm/test/CodeGen/RISCV/atomicrmw-cond-sub-clamp.ll index a9c8a4be7d2b4..b5e892c0ff6ac 100644 --- a/llvm/test/CodeGen/RISCV/atomicrmw-cond-sub-clamp.ll +++ b/llvm/test/CodeGen/RISCV/atomicrmw-cond-sub-clamp.ll @@ -3,13 +3,13 @@ ; RUN: | FileCheck -check-prefix=RV32I %s ; RUN: llc -mtriple=riscv32 -mattr=+a -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefix=RV32IA %s -; RUN: llc -mtriple=riscv32 -mattr=+a,+experimental-ztso -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+a,+ztso -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefix=RV32IA %s ; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefix=RV64I %s ; RUN: llc -mtriple=riscv64 -mattr=+a -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefix=RV64IA %s -; RUN: llc -mtriple=riscv64 -mattr=+a,+experimental-ztso -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+a,+ztso -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefix=RV64IA %s diff --git a/llvm/test/CodeGen/RISCV/attributes.ll b/llvm/test/CodeGen/RISCV/attributes.ll index 1d4a634c89a22..86ce368bc1db6 100644 --- a/llvm/test/CodeGen/RISCV/attributes.ll +++ b/llvm/test/CodeGen/RISCV/attributes.ll @@ -121,7 +121,7 @@ ; RUN: llc -mtriple=riscv32 -mattr=+zfbfmin %s -o - | FileCheck --check-prefixes=CHECK,RV32ZFBFMIN %s ; RUN: llc -mtriple=riscv32 -mattr=+zvfbfmin %s -o - | FileCheck --check-prefixes=CHECK,RV32ZVFBFMIN %s ; RUN: llc -mtriple=riscv32 -mattr=+zvfbfwma %s -o - | FileCheck --check-prefixes=CHECK,RV32ZVFBFWMA %s -; RUN: llc -mtriple=riscv32 -mattr=+a,+experimental-zacas %s -o - | FileCheck --check-prefix=RV32ZACAS %s +; RUN: llc -mtriple=riscv32 -mattr=+a,+zacas %s -o - | FileCheck --check-prefix=RV32ZACAS %s ; RUN: llc -mtriple=riscv32 -mattr=+experimental-zalasr %s -o - | FileCheck --check-prefix=RV32ZALASR %s ; RUN: llc -mtriple=riscv32 -mattr=+zama16b %s -o - | FileCheck --check-prefixes=CHECK,RV32ZAMA16B %s ; RUN: llc -mtriple=riscv32 -mattr=+experimental-zicfilp %s -o - | FileCheck --check-prefix=RV32ZICFILP %s @@ -264,7 +264,7 @@ ; RUN: llc -mtriple=riscv64 -mattr=+zfbfmin %s -o - | FileCheck --check-prefixes=CHECK,RV64ZFBFMIN %s ; RUN: llc -mtriple=riscv64 -mattr=+zvfbfmin %s -o - | FileCheck --check-prefixes=CHECK,RV64ZVFBFMIN %s ; RUN: llc -mtriple=riscv64 -mattr=+zvfbfwma %s -o - | FileCheck --check-prefixes=CHECK,RV64ZVFBFWMA %s -; RUN: llc -mtriple=riscv64 -mattr=+a,+experimental-zacas %s -o - | FileCheck --check-prefix=RV64ZACAS %s +; RUN: llc -mtriple=riscv64 -mattr=+a,+zacas %s -o - | FileCheck --check-prefix=RV64ZACAS %s ; RUN: llc -mtriple=riscv64 -mattr=+experimental-zalasr %s -o - | FileCheck --check-prefix=RV64ZALASR %s ; RUN: llc -mtriple=riscv64 -mattr=+experimental-zicfilp %s -o - | FileCheck --check-prefix=RV64ZICFILP %s ; RUN: llc -mtriple=riscv64 -mattr=+a,+zabha %s -o - | FileCheck --check-prefix=RV64ZABHA %s diff --git a/llvm/test/CodeGen/RISCV/fastcc-without-f-reg.ll b/llvm/test/CodeGen/RISCV/fastcc-without-f-reg.ll index 8e2fdfc4ba94c..ca40ba0399973 100644 --- a/llvm/test/CodeGen/RISCV/fastcc-without-f-reg.ll +++ b/llvm/test/CodeGen/RISCV/fastcc-without-f-reg.ll @@ -246,32 +246,28 @@ define fastcc half @callee_half_32(<32 x half> %A) nounwind { define half @caller_half_32(<32 x half> %A) nounwind { ; ZHINX32-LABEL: caller_half_32: ; ZHINX32: # %bb.0: -; ZHINX32-NEXT: addi sp, sp, -112 -; ZHINX32-NEXT: sw ra, 108(sp) # 4-byte Folded Spill -; ZHINX32-NEXT: sw s0, 104(sp) # 4-byte Folded Spill -; ZHINX32-NEXT: sw s1, 100(sp) # 4-byte Folded Spill -; ZHINX32-NEXT: sw s2, 96(sp) # 4-byte Folded Spill -; ZHINX32-NEXT: sw s3, 92(sp) # 4-byte Folded Spill -; ZHINX32-NEXT: sw s4, 88(sp) # 4-byte Folded Spill -; ZHINX32-NEXT: sw s5, 84(sp) # 4-byte Folded Spill -; ZHINX32-NEXT: sw s6, 80(sp) # 4-byte Folded Spill -; ZHINX32-NEXT: sw s7, 76(sp) # 4-byte Folded Spill -; ZHINX32-NEXT: sw s8, 72(sp) # 4-byte Folded Spill -; ZHINX32-NEXT: sw s9, 68(sp) # 4-byte Folded Spill -; ZHINX32-NEXT: sw s10, 64(sp) # 4-byte Folded Spill -; ZHINX32-NEXT: sw s11, 60(sp) # 4-byte Folded Spill -; ZHINX32-NEXT: lh t0, 124(sp) -; ZHINX32-NEXT: sw t0, 56(sp) # 4-byte Folded Spill -; ZHINX32-NEXT: lh t0, 120(sp) -; ZHINX32-NEXT: sw t0, 52(sp) # 4-byte Folded Spill -; ZHINX32-NEXT: lh t0, 116(sp) -; ZHINX32-NEXT: sw t0, 48(sp) # 4-byte Folded Spill +; ZHINX32-NEXT: addi sp, sp, -96 +; ZHINX32-NEXT: sw ra, 92(sp) # 4-byte Folded Spill +; ZHINX32-NEXT: sw s0, 88(sp) # 4-byte Folded Spill +; ZHINX32-NEXT: sw s1, 84(sp) # 4-byte Folded Spill +; ZHINX32-NEXT: sw s2, 80(sp) # 4-byte Folded Spill +; ZHINX32-NEXT: sw s3, 76(sp) # 4-byte Folded Spill +; ZHINX32-NEXT: sw s4, 72(sp) # 4-byte Folded Spill +; ZHINX32-NEXT: sw s5, 68(sp) # 4-byte Folded Spill +; ZHINX32-NEXT: sw s6, 64(sp) # 4-byte Folded Spill +; ZHINX32-NEXT: sw s7, 60(sp) # 4-byte Folded Spill +; ZHINX32-NEXT: sw s8, 56(sp) # 4-byte Folded Spill +; ZHINX32-NEXT: sw s9, 52(sp) # 4-byte Folded Spill +; ZHINX32-NEXT: sw s10, 48(sp) # 4-byte Folded Spill +; ZHINX32-NEXT: sw s11, 44(sp) # 4-byte Folded Spill ; ZHINX32-NEXT: lh t0, 112(sp) -; ZHINX32-NEXT: sw t0, 44(sp) # 4-byte Folded Spill -; ZHINX32-NEXT: lh t6, 128(sp) -; ZHINX32-NEXT: lh t5, 132(sp) -; ZHINX32-NEXT: lh t4, 136(sp) -; ZHINX32-NEXT: lh s0, 140(sp) +; ZHINX32-NEXT: lh t1, 116(sp) +; ZHINX32-NEXT: lh t2, 120(sp) +; ZHINX32-NEXT: lh s0, 124(sp) +; ZHINX32-NEXT: lh t3, 128(sp) +; ZHINX32-NEXT: lh t4, 132(sp) +; ZHINX32-NEXT: lh t5, 136(sp) +; ZHINX32-NEXT: lh t6, 140(sp) ; ZHINX32-NEXT: lh s1, 144(sp) ; ZHINX32-NEXT: lh s2, 148(sp) ; ZHINX32-NEXT: lh s3, 152(sp) @@ -284,79 +280,71 @@ define half @caller_half_32(<32 x half> %A) nounwind { ; ZHINX32-NEXT: lh s10, 180(sp) ; ZHINX32-NEXT: lh s11, 184(sp) ; ZHINX32-NEXT: lh ra, 188(sp) -; ZHINX32-NEXT: lh t3, 192(sp) -; ZHINX32-NEXT: lh t2, 196(sp) -; ZHINX32-NEXT: lh t1, 200(sp) -; ZHINX32-NEXT: lh t0, 204(sp) -; ZHINX32-NEXT: sh t0, 38(sp) -; ZHINX32-NEXT: sh t1, 36(sp) -; ZHINX32-NEXT: sh t2, 34(sp) -; ZHINX32-NEXT: sh t3, 32(sp) -; ZHINX32-NEXT: sh ra, 30(sp) -; ZHINX32-NEXT: sh s11, 28(sp) -; ZHINX32-NEXT: sh s10, 26(sp) -; ZHINX32-NEXT: sh s9, 24(sp) -; ZHINX32-NEXT: sh s8, 22(sp) -; ZHINX32-NEXT: sh s7, 20(sp) -; ZHINX32-NEXT: sh s6, 18(sp) -; ZHINX32-NEXT: sh s5, 16(sp) -; ZHINX32-NEXT: sh s4, 14(sp) -; ZHINX32-NEXT: sh s3, 12(sp) -; ZHINX32-NEXT: sh s2, 10(sp) -; ZHINX32-NEXT: sh s1, 8(sp) +; ZHINX32-NEXT: sh ra, 38(sp) +; ZHINX32-NEXT: sh s11, 36(sp) +; ZHINX32-NEXT: sh s10, 34(sp) +; ZHINX32-NEXT: sh s9, 32(sp) +; ZHINX32-NEXT: sh s8, 30(sp) +; ZHINX32-NEXT: sh s7, 28(sp) +; ZHINX32-NEXT: sh s6, 26(sp) +; ZHINX32-NEXT: sh s5, 24(sp) +; ZHINX32-NEXT: sh s4, 22(sp) +; ZHINX32-NEXT: sh s3, 20(sp) +; ZHINX32-NEXT: sh s2, 18(sp) +; ZHINX32-NEXT: sh s1, 16(sp) +; ZHINX32-NEXT: sh t6, 14(sp) +; ZHINX32-NEXT: sh t5, 12(sp) +; ZHINX32-NEXT: sh t4, 10(sp) +; ZHINX32-NEXT: sh t3, 8(sp) +; ZHINX32-NEXT: lh t3, 96(sp) +; ZHINX32-NEXT: lh t4, 100(sp) +; ZHINX32-NEXT: lh t5, 104(sp) +; ZHINX32-NEXT: lh t6, 108(sp) ; ZHINX32-NEXT: sh s0, 6(sp) -; ZHINX32-NEXT: sh t4, 4(sp) -; ZHINX32-NEXT: sh t5, 2(sp) -; ZHINX32-NEXT: sh t6, 0(sp) -; ZHINX32-NEXT: lw t3, 44(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: lw t4, 48(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: lw t5, 52(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: lw t6, 56(sp) # 4-byte Folded Reload +; ZHINX32-NEXT: sh t2, 4(sp) +; ZHINX32-NEXT: sh t1, 2(sp) +; ZHINX32-NEXT: sh t0, 0(sp) ; ZHINX32-NEXT: call callee_half_32 -; ZHINX32-NEXT: lw ra, 108(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: lw s0, 104(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: lw s1, 100(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: lw s2, 96(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: lw s3, 92(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: lw s4, 88(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: lw s5, 84(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: lw s6, 80(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: lw s7, 76(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: lw s8, 72(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: lw s9, 68(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: lw s10, 64(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: lw s11, 60(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: addi sp, sp, 112 +; ZHINX32-NEXT: lw ra, 92(sp) # 4-byte Folded Reload +; ZHINX32-NEXT: lw s0, 88(sp) # 4-byte Folded Reload +; ZHINX32-NEXT: lw s1, 84(sp) # 4-byte Folded Reload +; ZHINX32-NEXT: lw s2, 80(sp) # 4-byte Folded Reload +; ZHINX32-NEXT: lw s3, 76(sp) # 4-byte Folded Reload +; ZHINX32-NEXT: lw s4, 72(sp) # 4-byte Folded Reload +; ZHINX32-NEXT: lw s5, 68(sp) # 4-byte Folded Reload +; ZHINX32-NEXT: lw s6, 64(sp) # 4-byte Folded Reload +; ZHINX32-NEXT: lw s7, 60(sp) # 4-byte Folded Reload +; ZHINX32-NEXT: lw s8, 56(sp) # 4-byte Folded Reload +; ZHINX32-NEXT: lw s9, 52(sp) # 4-byte Folded Reload +; ZHINX32-NEXT: lw s10, 48(sp) # 4-byte Folded Reload +; ZHINX32-NEXT: lw s11, 44(sp) # 4-byte Folded Reload +; ZHINX32-NEXT: addi sp, sp, 96 ; ZHINX32-NEXT: ret ; ; ZHINX64-LABEL: caller_half_32: ; ZHINX64: # %bb.0: -; ZHINX64-NEXT: addi sp, sp, -176 -; ZHINX64-NEXT: sd ra, 168(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s0, 160(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s1, 152(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s2, 144(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s3, 136(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s4, 128(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s5, 120(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s6, 112(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s7, 104(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s8, 96(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s9, 88(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s10, 80(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s11, 72(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: lh t0, 200(sp) -; ZHINX64-NEXT: sd t0, 64(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: lh t0, 192(sp) -; ZHINX64-NEXT: sd t0, 56(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: lh t0, 184(sp) -; ZHINX64-NEXT: sd t0, 48(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: addi sp, sp, -144 +; ZHINX64-NEXT: sd ra, 136(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s0, 128(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s1, 120(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s2, 112(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s3, 104(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s4, 96(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s5, 88(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s6, 80(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s7, 72(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s8, 64(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s9, 56(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s10, 48(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s11, 40(sp) # 8-byte Folded Spill ; ZHINX64-NEXT: lh t0, 176(sp) -; ZHINX64-NEXT: sd t0, 40(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: lh t6, 208(sp) -; ZHINX64-NEXT: lh t5, 216(sp) -; ZHINX64-NEXT: lh t4, 224(sp) -; ZHINX64-NEXT: lh s0, 232(sp) +; ZHINX64-NEXT: lh t1, 184(sp) +; ZHINX64-NEXT: lh t2, 192(sp) +; ZHINX64-NEXT: lh s0, 200(sp) +; ZHINX64-NEXT: lh t3, 208(sp) +; ZHINX64-NEXT: lh t4, 216(sp) +; ZHINX64-NEXT: lh t5, 224(sp) +; ZHINX64-NEXT: lh t6, 232(sp) ; ZHINX64-NEXT: lh s1, 240(sp) ; ZHINX64-NEXT: lh s2, 248(sp) ; ZHINX64-NEXT: lh s3, 256(sp) @@ -369,49 +357,45 @@ define half @caller_half_32(<32 x half> %A) nounwind { ; ZHINX64-NEXT: lh s10, 312(sp) ; ZHINX64-NEXT: lh s11, 320(sp) ; ZHINX64-NEXT: lh ra, 328(sp) -; ZHINX64-NEXT: lh t3, 336(sp) -; ZHINX64-NEXT: lh t2, 344(sp) -; ZHINX64-NEXT: lh t1, 352(sp) -; ZHINX64-NEXT: lh t0, 360(sp) -; ZHINX64-NEXT: sh t0, 38(sp) -; ZHINX64-NEXT: sh t1, 36(sp) -; ZHINX64-NEXT: sh t2, 34(sp) -; ZHINX64-NEXT: sh t3, 32(sp) -; ZHINX64-NEXT: sh ra, 30(sp) -; ZHINX64-NEXT: sh s11, 28(sp) -; ZHINX64-NEXT: sh s10, 26(sp) -; ZHINX64-NEXT: sh s9, 24(sp) -; ZHINX64-NEXT: sh s8, 22(sp) -; ZHINX64-NEXT: sh s7, 20(sp) -; ZHINX64-NEXT: sh s6, 18(sp) -; ZHINX64-NEXT: sh s5, 16(sp) -; ZHINX64-NEXT: sh s4, 14(sp) -; ZHINX64-NEXT: sh s3, 12(sp) -; ZHINX64-NEXT: sh s2, 10(sp) -; ZHINX64-NEXT: sh s1, 8(sp) +; ZHINX64-NEXT: sh ra, 38(sp) +; ZHINX64-NEXT: sh s11, 36(sp) +; ZHINX64-NEXT: sh s10, 34(sp) +; ZHINX64-NEXT: sh s9, 32(sp) +; ZHINX64-NEXT: sh s8, 30(sp) +; ZHINX64-NEXT: sh s7, 28(sp) +; ZHINX64-NEXT: sh s6, 26(sp) +; ZHINX64-NEXT: sh s5, 24(sp) +; ZHINX64-NEXT: sh s4, 22(sp) +; ZHINX64-NEXT: sh s3, 20(sp) +; ZHINX64-NEXT: sh s2, 18(sp) +; ZHINX64-NEXT: sh s1, 16(sp) +; ZHINX64-NEXT: sh t6, 14(sp) +; ZHINX64-NEXT: sh t5, 12(sp) +; ZHINX64-NEXT: sh t4, 10(sp) +; ZHINX64-NEXT: sh t3, 8(sp) +; ZHINX64-NEXT: lh t3, 144(sp) +; ZHINX64-NEXT: lh t4, 152(sp) +; ZHINX64-NEXT: lh t5, 160(sp) +; ZHINX64-NEXT: lh t6, 168(sp) ; ZHINX64-NEXT: sh s0, 6(sp) -; ZHINX64-NEXT: sh t4, 4(sp) -; ZHINX64-NEXT: sh t5, 2(sp) -; ZHINX64-NEXT: sh t6, 0(sp) -; ZHINX64-NEXT: ld t3, 40(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld t4, 48(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld t5, 56(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld t6, 64(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: sh t2, 4(sp) +; ZHINX64-NEXT: sh t1, 2(sp) +; ZHINX64-NEXT: sh t0, 0(sp) ; ZHINX64-NEXT: call callee_half_32 -; ZHINX64-NEXT: ld ra, 168(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s0, 160(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s1, 152(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s2, 144(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s3, 136(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s4, 128(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s5, 120(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s6, 112(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s7, 104(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s8, 96(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s9, 88(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s10, 80(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s11, 72(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: addi sp, sp, 176 +; ZHINX64-NEXT: ld ra, 136(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s0, 128(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s1, 120(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s2, 112(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s3, 104(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s4, 96(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s5, 88(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s6, 80(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s7, 72(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s8, 64(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s9, 56(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s10, 48(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s11, 40(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: addi sp, sp, 144 ; ZHINX64-NEXT: ret ; ; ZFINX32-LABEL: caller_half_32: @@ -917,32 +901,28 @@ define float @caller_float_32(<32 x float> %A) nounwind { ; ; ZHINX64-LABEL: caller_float_32: ; ZHINX64: # %bb.0: -; ZHINX64-NEXT: addi sp, sp, -224 -; ZHINX64-NEXT: sd ra, 216(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s0, 208(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s1, 200(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s2, 192(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s3, 184(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s4, 176(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s5, 168(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s6, 160(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s7, 152(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s8, 144(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s9, 136(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s10, 128(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s11, 120(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: lw t0, 248(sp) -; ZHINX64-NEXT: sd t0, 112(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: lw t0, 240(sp) -; ZHINX64-NEXT: sd t0, 104(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: lw t0, 232(sp) -; ZHINX64-NEXT: sd t0, 96(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: addi sp, sp, -192 +; ZHINX64-NEXT: sd ra, 184(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s0, 176(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s1, 168(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s2, 160(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s3, 152(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s4, 144(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s5, 136(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s6, 128(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s7, 120(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s8, 112(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s9, 104(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s10, 96(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s11, 88(sp) # 8-byte Folded Spill ; ZHINX64-NEXT: lw t0, 224(sp) -; ZHINX64-NEXT: sd t0, 88(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: lw t6, 256(sp) -; ZHINX64-NEXT: lw t5, 264(sp) -; ZHINX64-NEXT: lw t4, 272(sp) -; ZHINX64-NEXT: lw s0, 280(sp) +; ZHINX64-NEXT: lw t1, 232(sp) +; ZHINX64-NEXT: lw t2, 240(sp) +; ZHINX64-NEXT: lw s0, 248(sp) +; ZHINX64-NEXT: lw t3, 256(sp) +; ZHINX64-NEXT: lw t4, 264(sp) +; ZHINX64-NEXT: lw t5, 272(sp) +; ZHINX64-NEXT: lw t6, 280(sp) ; ZHINX64-NEXT: lw s1, 288(sp) ; ZHINX64-NEXT: lw s2, 296(sp) ; ZHINX64-NEXT: lw s3, 304(sp) @@ -955,49 +935,45 @@ define float @caller_float_32(<32 x float> %A) nounwind { ; ZHINX64-NEXT: lw s10, 360(sp) ; ZHINX64-NEXT: lw s11, 368(sp) ; ZHINX64-NEXT: lw ra, 376(sp) -; ZHINX64-NEXT: lw t3, 384(sp) -; ZHINX64-NEXT: lw t2, 392(sp) -; ZHINX64-NEXT: lw t1, 400(sp) -; ZHINX64-NEXT: lw t0, 408(sp) -; ZHINX64-NEXT: sw t0, 76(sp) -; ZHINX64-NEXT: sw t1, 72(sp) -; ZHINX64-NEXT: sw t2, 68(sp) -; ZHINX64-NEXT: sw t3, 64(sp) -; ZHINX64-NEXT: sw ra, 60(sp) -; ZHINX64-NEXT: sw s11, 56(sp) -; ZHINX64-NEXT: sw s10, 52(sp) -; ZHINX64-NEXT: sw s9, 48(sp) -; ZHINX64-NEXT: sw s8, 44(sp) -; ZHINX64-NEXT: sw s7, 40(sp) -; ZHINX64-NEXT: sw s6, 36(sp) -; ZHINX64-NEXT: sw s5, 32(sp) -; ZHINX64-NEXT: sw s4, 28(sp) -; ZHINX64-NEXT: sw s3, 24(sp) -; ZHINX64-NEXT: sw s2, 20(sp) -; ZHINX64-NEXT: sw s1, 16(sp) +; ZHINX64-NEXT: sw ra, 76(sp) +; ZHINX64-NEXT: sw s11, 72(sp) +; ZHINX64-NEXT: sw s10, 68(sp) +; ZHINX64-NEXT: sw s9, 64(sp) +; ZHINX64-NEXT: sw s8, 60(sp) +; ZHINX64-NEXT: sw s7, 56(sp) +; ZHINX64-NEXT: sw s6, 52(sp) +; ZHINX64-NEXT: sw s5, 48(sp) +; ZHINX64-NEXT: sw s4, 44(sp) +; ZHINX64-NEXT: sw s3, 40(sp) +; ZHINX64-NEXT: sw s2, 36(sp) +; ZHINX64-NEXT: sw s1, 32(sp) +; ZHINX64-NEXT: sw t6, 28(sp) +; ZHINX64-NEXT: sw t5, 24(sp) +; ZHINX64-NEXT: sw t4, 20(sp) +; ZHINX64-NEXT: sw t3, 16(sp) +; ZHINX64-NEXT: lw t3, 192(sp) +; ZHINX64-NEXT: lw t4, 200(sp) +; ZHINX64-NEXT: lw t5, 208(sp) +; ZHINX64-NEXT: lw t6, 216(sp) ; ZHINX64-NEXT: sw s0, 12(sp) -; ZHINX64-NEXT: sw t4, 8(sp) -; ZHINX64-NEXT: sw t5, 4(sp) -; ZHINX64-NEXT: sw t6, 0(sp) -; ZHINX64-NEXT: ld t3, 88(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld t4, 96(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld t5, 104(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld t6, 112(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: sw t2, 8(sp) +; ZHINX64-NEXT: sw t1, 4(sp) +; ZHINX64-NEXT: sw t0, 0(sp) ; ZHINX64-NEXT: call callee_float_32 -; ZHINX64-NEXT: ld ra, 216(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s0, 208(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s1, 200(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s2, 192(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s3, 184(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s4, 176(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s5, 168(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s6, 160(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s7, 152(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s8, 144(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s9, 136(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s10, 128(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s11, 120(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: addi sp, sp, 224 +; ZHINX64-NEXT: ld ra, 184(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s0, 176(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s1, 168(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s2, 160(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s3, 152(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s4, 144(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s5, 136(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s6, 128(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s7, 120(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s8, 112(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s9, 104(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s10, 96(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s11, 88(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: addi sp, sp, 192 ; ZHINX64-NEXT: ret ; ; ZFINX32-LABEL: caller_float_32: @@ -1087,32 +1063,28 @@ define float @caller_float_32(<32 x float> %A) nounwind { ; ; ZFINX64-LABEL: caller_float_32: ; ZFINX64: # %bb.0: -; ZFINX64-NEXT: addi sp, sp, -224 -; ZFINX64-NEXT: sd ra, 216(sp) # 8-byte Folded Spill -; ZFINX64-NEXT: sd s0, 208(sp) # 8-byte Folded Spill -; ZFINX64-NEXT: sd s1, 200(sp) # 8-byte Folded Spill -; ZFINX64-NEXT: sd s2, 192(sp) # 8-byte Folded Spill -; ZFINX64-NEXT: sd s3, 184(sp) # 8-byte Folded Spill -; ZFINX64-NEXT: sd s4, 176(sp) # 8-byte Folded Spill -; ZFINX64-NEXT: sd s5, 168(sp) # 8-byte Folded Spill -; ZFINX64-NEXT: sd s6, 160(sp) # 8-byte Folded Spill -; ZFINX64-NEXT: sd s7, 152(sp) # 8-byte Folded Spill -; ZFINX64-NEXT: sd s8, 144(sp) # 8-byte Folded Spill -; ZFINX64-NEXT: sd s9, 136(sp) # 8-byte Folded Spill -; ZFINX64-NEXT: sd s10, 128(sp) # 8-byte Folded Spill -; ZFINX64-NEXT: sd s11, 120(sp) # 8-byte Folded Spill -; ZFINX64-NEXT: lw t0, 248(sp) -; ZFINX64-NEXT: sd t0, 112(sp) # 8-byte Folded Spill -; ZFINX64-NEXT: lw t0, 240(sp) -; ZFINX64-NEXT: sd t0, 104(sp) # 8-byte Folded Spill -; ZFINX64-NEXT: lw t0, 232(sp) -; ZFINX64-NEXT: sd t0, 96(sp) # 8-byte Folded Spill +; ZFINX64-NEXT: addi sp, sp, -192 +; ZFINX64-NEXT: sd ra, 184(sp) # 8-byte Folded Spill +; ZFINX64-NEXT: sd s0, 176(sp) # 8-byte Folded Spill +; ZFINX64-NEXT: sd s1, 168(sp) # 8-byte Folded Spill +; ZFINX64-NEXT: sd s2, 160(sp) # 8-byte Folded Spill +; ZFINX64-NEXT: sd s3, 152(sp) # 8-byte Folded Spill +; ZFINX64-NEXT: sd s4, 144(sp) # 8-byte Folded Spill +; ZFINX64-NEXT: sd s5, 136(sp) # 8-byte Folded Spill +; ZFINX64-NEXT: sd s6, 128(sp) # 8-byte Folded Spill +; ZFINX64-NEXT: sd s7, 120(sp) # 8-byte Folded Spill +; ZFINX64-NEXT: sd s8, 112(sp) # 8-byte Folded Spill +; ZFINX64-NEXT: sd s9, 104(sp) # 8-byte Folded Spill +; ZFINX64-NEXT: sd s10, 96(sp) # 8-byte Folded Spill +; ZFINX64-NEXT: sd s11, 88(sp) # 8-byte Folded Spill ; ZFINX64-NEXT: lw t0, 224(sp) -; ZFINX64-NEXT: sd t0, 88(sp) # 8-byte Folded Spill -; ZFINX64-NEXT: lw t6, 256(sp) -; ZFINX64-NEXT: lw t5, 264(sp) -; ZFINX64-NEXT: lw t4, 272(sp) -; ZFINX64-NEXT: lw s0, 280(sp) +; ZFINX64-NEXT: lw t1, 232(sp) +; ZFINX64-NEXT: lw t2, 240(sp) +; ZFINX64-NEXT: lw s0, 248(sp) +; ZFINX64-NEXT: lw t3, 256(sp) +; ZFINX64-NEXT: lw t4, 264(sp) +; ZFINX64-NEXT: lw t5, 272(sp) +; ZFINX64-NEXT: lw t6, 280(sp) ; ZFINX64-NEXT: lw s1, 288(sp) ; ZFINX64-NEXT: lw s2, 296(sp) ; ZFINX64-NEXT: lw s3, 304(sp) @@ -1125,49 +1097,45 @@ define float @caller_float_32(<32 x float> %A) nounwind { ; ZFINX64-NEXT: lw s10, 360(sp) ; ZFINX64-NEXT: lw s11, 368(sp) ; ZFINX64-NEXT: lw ra, 376(sp) -; ZFINX64-NEXT: lw t3, 384(sp) -; ZFINX64-NEXT: lw t2, 392(sp) -; ZFINX64-NEXT: lw t1, 400(sp) -; ZFINX64-NEXT: lw t0, 408(sp) -; ZFINX64-NEXT: sw t0, 76(sp) -; ZFINX64-NEXT: sw t1, 72(sp) -; ZFINX64-NEXT: sw t2, 68(sp) -; ZFINX64-NEXT: sw t3, 64(sp) -; ZFINX64-NEXT: sw ra, 60(sp) -; ZFINX64-NEXT: sw s11, 56(sp) -; ZFINX64-NEXT: sw s10, 52(sp) -; ZFINX64-NEXT: sw s9, 48(sp) -; ZFINX64-NEXT: sw s8, 44(sp) -; ZFINX64-NEXT: sw s7, 40(sp) -; ZFINX64-NEXT: sw s6, 36(sp) -; ZFINX64-NEXT: sw s5, 32(sp) -; ZFINX64-NEXT: sw s4, 28(sp) -; ZFINX64-NEXT: sw s3, 24(sp) -; ZFINX64-NEXT: sw s2, 20(sp) -; ZFINX64-NEXT: sw s1, 16(sp) +; ZFINX64-NEXT: sw ra, 76(sp) +; ZFINX64-NEXT: sw s11, 72(sp) +; ZFINX64-NEXT: sw s10, 68(sp) +; ZFINX64-NEXT: sw s9, 64(sp) +; ZFINX64-NEXT: sw s8, 60(sp) +; ZFINX64-NEXT: sw s7, 56(sp) +; ZFINX64-NEXT: sw s6, 52(sp) +; ZFINX64-NEXT: sw s5, 48(sp) +; ZFINX64-NEXT: sw s4, 44(sp) +; ZFINX64-NEXT: sw s3, 40(sp) +; ZFINX64-NEXT: sw s2, 36(sp) +; ZFINX64-NEXT: sw s1, 32(sp) +; ZFINX64-NEXT: sw t6, 28(sp) +; ZFINX64-NEXT: sw t5, 24(sp) +; ZFINX64-NEXT: sw t4, 20(sp) +; ZFINX64-NEXT: sw t3, 16(sp) +; ZFINX64-NEXT: lw t3, 192(sp) +; ZFINX64-NEXT: lw t4, 200(sp) +; ZFINX64-NEXT: lw t5, 208(sp) +; ZFINX64-NEXT: lw t6, 216(sp) ; ZFINX64-NEXT: sw s0, 12(sp) -; ZFINX64-NEXT: sw t4, 8(sp) -; ZFINX64-NEXT: sw t5, 4(sp) -; ZFINX64-NEXT: sw t6, 0(sp) -; ZFINX64-NEXT: ld t3, 88(sp) # 8-byte Folded Reload -; ZFINX64-NEXT: ld t4, 96(sp) # 8-byte Folded Reload -; ZFINX64-NEXT: ld t5, 104(sp) # 8-byte Folded Reload -; ZFINX64-NEXT: ld t6, 112(sp) # 8-byte Folded Reload +; ZFINX64-NEXT: sw t2, 8(sp) +; ZFINX64-NEXT: sw t1, 4(sp) +; ZFINX64-NEXT: sw t0, 0(sp) ; ZFINX64-NEXT: call callee_float_32 -; ZFINX64-NEXT: ld ra, 216(sp) # 8-byte Folded Reload -; ZFINX64-NEXT: ld s0, 208(sp) # 8-byte Folded Reload -; ZFINX64-NEXT: ld s1, 200(sp) # 8-byte Folded Reload -; ZFINX64-NEXT: ld s2, 192(sp) # 8-byte Folded Reload -; ZFINX64-NEXT: ld s3, 184(sp) # 8-byte Folded Reload -; ZFINX64-NEXT: ld s4, 176(sp) # 8-byte Folded Reload -; ZFINX64-NEXT: ld s5, 168(sp) # 8-byte Folded Reload -; ZFINX64-NEXT: ld s6, 160(sp) # 8-byte Folded Reload -; ZFINX64-NEXT: ld s7, 152(sp) # 8-byte Folded Reload -; ZFINX64-NEXT: ld s8, 144(sp) # 8-byte Folded Reload -; ZFINX64-NEXT: ld s9, 136(sp) # 8-byte Folded Reload -; ZFINX64-NEXT: ld s10, 128(sp) # 8-byte Folded Reload -; ZFINX64-NEXT: ld s11, 120(sp) # 8-byte Folded Reload -; ZFINX64-NEXT: addi sp, sp, 224 +; ZFINX64-NEXT: ld ra, 184(sp) # 8-byte Folded Reload +; ZFINX64-NEXT: ld s0, 176(sp) # 8-byte Folded Reload +; ZFINX64-NEXT: ld s1, 168(sp) # 8-byte Folded Reload +; ZFINX64-NEXT: ld s2, 160(sp) # 8-byte Folded Reload +; ZFINX64-NEXT: ld s3, 152(sp) # 8-byte Folded Reload +; ZFINX64-NEXT: ld s4, 144(sp) # 8-byte Folded Reload +; ZFINX64-NEXT: ld s5, 136(sp) # 8-byte Folded Reload +; ZFINX64-NEXT: ld s6, 128(sp) # 8-byte Folded Reload +; ZFINX64-NEXT: ld s7, 120(sp) # 8-byte Folded Reload +; ZFINX64-NEXT: ld s8, 112(sp) # 8-byte Folded Reload +; ZFINX64-NEXT: ld s9, 104(sp) # 8-byte Folded Reload +; ZFINX64-NEXT: ld s10, 96(sp) # 8-byte Folded Reload +; ZFINX64-NEXT: ld s11, 88(sp) # 8-byte Folded Reload +; ZFINX64-NEXT: addi sp, sp, 192 ; ZFINX64-NEXT: ret ; ; ZDINX32-LABEL: caller_float_32: @@ -1257,32 +1225,28 @@ define float @caller_float_32(<32 x float> %A) nounwind { ; ; ZDINX64-LABEL: caller_float_32: ; ZDINX64: # %bb.0: -; ZDINX64-NEXT: addi sp, sp, -224 -; ZDINX64-NEXT: sd ra, 216(sp) # 8-byte Folded Spill -; ZDINX64-NEXT: sd s0, 208(sp) # 8-byte Folded Spill -; ZDINX64-NEXT: sd s1, 200(sp) # 8-byte Folded Spill -; ZDINX64-NEXT: sd s2, 192(sp) # 8-byte Folded Spill -; ZDINX64-NEXT: sd s3, 184(sp) # 8-byte Folded Spill -; ZDINX64-NEXT: sd s4, 176(sp) # 8-byte Folded Spill -; ZDINX64-NEXT: sd s5, 168(sp) # 8-byte Folded Spill -; ZDINX64-NEXT: sd s6, 160(sp) # 8-byte Folded Spill -; ZDINX64-NEXT: sd s7, 152(sp) # 8-byte Folded Spill -; ZDINX64-NEXT: sd s8, 144(sp) # 8-byte Folded Spill -; ZDINX64-NEXT: sd s9, 136(sp) # 8-byte Folded Spill -; ZDINX64-NEXT: sd s10, 128(sp) # 8-byte Folded Spill -; ZDINX64-NEXT: sd s11, 120(sp) # 8-byte Folded Spill -; ZDINX64-NEXT: lw t0, 248(sp) -; ZDINX64-NEXT: sd t0, 112(sp) # 8-byte Folded Spill -; ZDINX64-NEXT: lw t0, 240(sp) -; ZDINX64-NEXT: sd t0, 104(sp) # 8-byte Folded Spill -; ZDINX64-NEXT: lw t0, 232(sp) -; ZDINX64-NEXT: sd t0, 96(sp) # 8-byte Folded Spill +; ZDINX64-NEXT: addi sp, sp, -192 +; ZDINX64-NEXT: sd ra, 184(sp) # 8-byte Folded Spill +; ZDINX64-NEXT: sd s0, 176(sp) # 8-byte Folded Spill +; ZDINX64-NEXT: sd s1, 168(sp) # 8-byte Folded Spill +; ZDINX64-NEXT: sd s2, 160(sp) # 8-byte Folded Spill +; ZDINX64-NEXT: sd s3, 152(sp) # 8-byte Folded Spill +; ZDINX64-NEXT: sd s4, 144(sp) # 8-byte Folded Spill +; ZDINX64-NEXT: sd s5, 136(sp) # 8-byte Folded Spill +; ZDINX64-NEXT: sd s6, 128(sp) # 8-byte Folded Spill +; ZDINX64-NEXT: sd s7, 120(sp) # 8-byte Folded Spill +; ZDINX64-NEXT: sd s8, 112(sp) # 8-byte Folded Spill +; ZDINX64-NEXT: sd s9, 104(sp) # 8-byte Folded Spill +; ZDINX64-NEXT: sd s10, 96(sp) # 8-byte Folded Spill +; ZDINX64-NEXT: sd s11, 88(sp) # 8-byte Folded Spill ; ZDINX64-NEXT: lw t0, 224(sp) -; ZDINX64-NEXT: sd t0, 88(sp) # 8-byte Folded Spill -; ZDINX64-NEXT: lw t6, 256(sp) -; ZDINX64-NEXT: lw t5, 264(sp) -; ZDINX64-NEXT: lw t4, 272(sp) -; ZDINX64-NEXT: lw s0, 280(sp) +; ZDINX64-NEXT: lw t1, 232(sp) +; ZDINX64-NEXT: lw t2, 240(sp) +; ZDINX64-NEXT: lw s0, 248(sp) +; ZDINX64-NEXT: lw t3, 256(sp) +; ZDINX64-NEXT: lw t4, 264(sp) +; ZDINX64-NEXT: lw t5, 272(sp) +; ZDINX64-NEXT: lw t6, 280(sp) ; ZDINX64-NEXT: lw s1, 288(sp) ; ZDINX64-NEXT: lw s2, 296(sp) ; ZDINX64-NEXT: lw s3, 304(sp) @@ -1295,49 +1259,45 @@ define float @caller_float_32(<32 x float> %A) nounwind { ; ZDINX64-NEXT: lw s10, 360(sp) ; ZDINX64-NEXT: lw s11, 368(sp) ; ZDINX64-NEXT: lw ra, 376(sp) -; ZDINX64-NEXT: lw t3, 384(sp) -; ZDINX64-NEXT: lw t2, 392(sp) -; ZDINX64-NEXT: lw t1, 400(sp) -; ZDINX64-NEXT: lw t0, 408(sp) -; ZDINX64-NEXT: sw t0, 76(sp) -; ZDINX64-NEXT: sw t1, 72(sp) -; ZDINX64-NEXT: sw t2, 68(sp) -; ZDINX64-NEXT: sw t3, 64(sp) -; ZDINX64-NEXT: sw ra, 60(sp) -; ZDINX64-NEXT: sw s11, 56(sp) -; ZDINX64-NEXT: sw s10, 52(sp) -; ZDINX64-NEXT: sw s9, 48(sp) -; ZDINX64-NEXT: sw s8, 44(sp) -; ZDINX64-NEXT: sw s7, 40(sp) -; ZDINX64-NEXT: sw s6, 36(sp) -; ZDINX64-NEXT: sw s5, 32(sp) -; ZDINX64-NEXT: sw s4, 28(sp) -; ZDINX64-NEXT: sw s3, 24(sp) -; ZDINX64-NEXT: sw s2, 20(sp) -; ZDINX64-NEXT: sw s1, 16(sp) +; ZDINX64-NEXT: sw ra, 76(sp) +; ZDINX64-NEXT: sw s11, 72(sp) +; ZDINX64-NEXT: sw s10, 68(sp) +; ZDINX64-NEXT: sw s9, 64(sp) +; ZDINX64-NEXT: sw s8, 60(sp) +; ZDINX64-NEXT: sw s7, 56(sp) +; ZDINX64-NEXT: sw s6, 52(sp) +; ZDINX64-NEXT: sw s5, 48(sp) +; ZDINX64-NEXT: sw s4, 44(sp) +; ZDINX64-NEXT: sw s3, 40(sp) +; ZDINX64-NEXT: sw s2, 36(sp) +; ZDINX64-NEXT: sw s1, 32(sp) +; ZDINX64-NEXT: sw t6, 28(sp) +; ZDINX64-NEXT: sw t5, 24(sp) +; ZDINX64-NEXT: sw t4, 20(sp) +; ZDINX64-NEXT: sw t3, 16(sp) +; ZDINX64-NEXT: lw t3, 192(sp) +; ZDINX64-NEXT: lw t4, 200(sp) +; ZDINX64-NEXT: lw t5, 208(sp) +; ZDINX64-NEXT: lw t6, 216(sp) ; ZDINX64-NEXT: sw s0, 12(sp) -; ZDINX64-NEXT: sw t4, 8(sp) -; ZDINX64-NEXT: sw t5, 4(sp) -; ZDINX64-NEXT: sw t6, 0(sp) -; ZDINX64-NEXT: ld t3, 88(sp) # 8-byte Folded Reload -; ZDINX64-NEXT: ld t4, 96(sp) # 8-byte Folded Reload -; ZDINX64-NEXT: ld t5, 104(sp) # 8-byte Folded Reload -; ZDINX64-NEXT: ld t6, 112(sp) # 8-byte Folded Reload +; ZDINX64-NEXT: sw t2, 8(sp) +; ZDINX64-NEXT: sw t1, 4(sp) +; ZDINX64-NEXT: sw t0, 0(sp) ; ZDINX64-NEXT: call callee_float_32 -; ZDINX64-NEXT: ld ra, 216(sp) # 8-byte Folded Reload -; ZDINX64-NEXT: ld s0, 208(sp) # 8-byte Folded Reload -; ZDINX64-NEXT: ld s1, 200(sp) # 8-byte Folded Reload -; ZDINX64-NEXT: ld s2, 192(sp) # 8-byte Folded Reload -; ZDINX64-NEXT: ld s3, 184(sp) # 8-byte Folded Reload -; ZDINX64-NEXT: ld s4, 176(sp) # 8-byte Folded Reload -; ZDINX64-NEXT: ld s5, 168(sp) # 8-byte Folded Reload -; ZDINX64-NEXT: ld s6, 160(sp) # 8-byte Folded Reload -; ZDINX64-NEXT: ld s7, 152(sp) # 8-byte Folded Reload -; ZDINX64-NEXT: ld s8, 144(sp) # 8-byte Folded Reload -; ZDINX64-NEXT: ld s9, 136(sp) # 8-byte Folded Reload -; ZDINX64-NEXT: ld s10, 128(sp) # 8-byte Folded Reload -; ZDINX64-NEXT: ld s11, 120(sp) # 8-byte Folded Reload -; ZDINX64-NEXT: addi sp, sp, 224 +; ZDINX64-NEXT: ld ra, 184(sp) # 8-byte Folded Reload +; ZDINX64-NEXT: ld s0, 176(sp) # 8-byte Folded Reload +; ZDINX64-NEXT: ld s1, 168(sp) # 8-byte Folded Reload +; ZDINX64-NEXT: ld s2, 160(sp) # 8-byte Folded Reload +; ZDINX64-NEXT: ld s3, 152(sp) # 8-byte Folded Reload +; ZDINX64-NEXT: ld s4, 144(sp) # 8-byte Folded Reload +; ZDINX64-NEXT: ld s5, 136(sp) # 8-byte Folded Reload +; ZDINX64-NEXT: ld s6, 128(sp) # 8-byte Folded Reload +; ZDINX64-NEXT: ld s7, 120(sp) # 8-byte Folded Reload +; ZDINX64-NEXT: ld s8, 112(sp) # 8-byte Folded Reload +; ZDINX64-NEXT: ld s9, 104(sp) # 8-byte Folded Reload +; ZDINX64-NEXT: ld s10, 96(sp) # 8-byte Folded Reload +; ZDINX64-NEXT: ld s11, 88(sp) # 8-byte Folded Reload +; ZDINX64-NEXT: addi sp, sp, 192 ; ZDINX64-NEXT: ret %C = call fastcc float @callee_float_32(<32 x float> %A) ret float %C diff --git a/llvm/test/CodeGen/RISCV/half-arith.ll b/llvm/test/CodeGen/RISCV/half-arith.ll index b033c75eeadd8..27829f2b65759 100644 --- a/llvm/test/CodeGen/RISCV/half-arith.ll +++ b/llvm/test/CodeGen/RISCV/half-arith.ll @@ -2877,14 +2877,13 @@ define half @fsgnjx_f16(half %x, half %y) nounwind { ; RV32IZFHMIN-LABEL: fsgnjx_f16: ; RV32IZFHMIN: # %bb.0: ; RV32IZFHMIN-NEXT: lui a0, %hi(.LCPI23_0) -; RV32IZFHMIN-NEXT: flh fa5, %lo(.LCPI23_0)(a0) -; RV32IZFHMIN-NEXT: fmv.x.h a0, fa0 -; RV32IZFHMIN-NEXT: lui a1, 1048568 -; RV32IZFHMIN-NEXT: and a0, a0, a1 -; RV32IZFHMIN-NEXT: fmv.x.h a1, fa5 -; RV32IZFHMIN-NEXT: slli a1, a1, 17 -; RV32IZFHMIN-NEXT: srli a1, a1, 17 -; RV32IZFHMIN-NEXT: or a0, a1, a0 +; RV32IZFHMIN-NEXT: lhu a0, %lo(.LCPI23_0)(a0) +; RV32IZFHMIN-NEXT: fmv.x.h a1, fa0 +; RV32IZFHMIN-NEXT: lui a2, 1048568 +; RV32IZFHMIN-NEXT: and a1, a1, a2 +; RV32IZFHMIN-NEXT: slli a0, a0, 17 +; RV32IZFHMIN-NEXT: srli a0, a0, 17 +; RV32IZFHMIN-NEXT: or a0, a0, a1 ; RV32IZFHMIN-NEXT: fmv.h.x fa5, a0 ; RV32IZFHMIN-NEXT: fcvt.s.h fa5, fa5 ; RV32IZFHMIN-NEXT: fcvt.s.h fa4, fa1 @@ -2895,14 +2894,13 @@ define half @fsgnjx_f16(half %x, half %y) nounwind { ; RV64IZFHMIN-LABEL: fsgnjx_f16: ; RV64IZFHMIN: # %bb.0: ; RV64IZFHMIN-NEXT: lui a0, %hi(.LCPI23_0) -; RV64IZFHMIN-NEXT: flh fa5, %lo(.LCPI23_0)(a0) -; RV64IZFHMIN-NEXT: fmv.x.h a0, fa0 -; RV64IZFHMIN-NEXT: lui a1, 1048568 -; RV64IZFHMIN-NEXT: and a0, a0, a1 -; RV64IZFHMIN-NEXT: fmv.x.h a1, fa5 -; RV64IZFHMIN-NEXT: slli a1, a1, 49 -; RV64IZFHMIN-NEXT: srli a1, a1, 49 -; RV64IZFHMIN-NEXT: or a0, a1, a0 +; RV64IZFHMIN-NEXT: lhu a0, %lo(.LCPI23_0)(a0) +; RV64IZFHMIN-NEXT: fmv.x.h a1, fa0 +; RV64IZFHMIN-NEXT: lui a2, 1048568 +; RV64IZFHMIN-NEXT: and a1, a1, a2 +; RV64IZFHMIN-NEXT: slli a0, a0, 49 +; RV64IZFHMIN-NEXT: srli a0, a0, 49 +; RV64IZFHMIN-NEXT: or a0, a0, a1 ; RV64IZFHMIN-NEXT: fmv.h.x fa5, a0 ; RV64IZFHMIN-NEXT: fcvt.s.h fa5, fa5 ; RV64IZFHMIN-NEXT: fcvt.s.h fa4, fa1 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec-bf16.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec-bf16.ll index 170e71af09b49..727e03125176a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec-bf16.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec-bf16.ll @@ -40,8 +40,7 @@ define <4 x bfloat> @splat_idx_v4bf16(<4 x bfloat> %v, i64 %idx) { ; RV32-ZFBFMIN-NEXT: vl1r.v v8, (a2) # Unknown-size Folded Reload ; RV32-ZFBFMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; RV32-ZFBFMIN-NEXT: vse16.v v8, (a1) -; RV32-ZFBFMIN-NEXT: flh fa5, 0(a0) -; RV32-ZFBFMIN-NEXT: fmv.x.h a0, fa5 +; RV32-ZFBFMIN-NEXT: lh a0, 0(a0) ; RV32-ZFBFMIN-NEXT: vmv.v.x v8, a0 ; RV32-ZFBFMIN-NEXT: csrr a0, vlenb ; RV32-ZFBFMIN-NEXT: slli a0, a0, 1 @@ -71,8 +70,7 @@ define <4 x bfloat> @splat_idx_v4bf16(<4 x bfloat> %v, i64 %idx) { ; RV64-ZFBFMIN-NEXT: vl1r.v v8, (a2) # Unknown-size Folded Reload ; RV64-ZFBFMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; RV64-ZFBFMIN-NEXT: vse16.v v8, (a1) -; RV64-ZFBFMIN-NEXT: flh fa5, 0(a0) -; RV64-ZFBFMIN-NEXT: fmv.x.h a0, fa5 +; RV64-ZFBFMIN-NEXT: lh a0, 0(a0) ; RV64-ZFBFMIN-NEXT: vmv.v.x v8, a0 ; RV64-ZFBFMIN-NEXT: csrr a0, vlenb ; RV64-ZFBFMIN-NEXT: slli a0, a0, 1 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll index b5d3e2cd776f2..bf2eb3ff0261a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll @@ -220,8 +220,7 @@ define <4 x half> @splat_idx_v4f16(<4 x half> %v, i64 %idx) { ; RV32-ZFHMIN-NEXT: vl1r.v v8, (a2) # Unknown-size Folded Reload ; RV32-ZFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; RV32-ZFHMIN-NEXT: vse16.v v8, (a1) -; RV32-ZFHMIN-NEXT: flh fa5, 0(a0) -; RV32-ZFHMIN-NEXT: fmv.x.h a0, fa5 +; RV32-ZFHMIN-NEXT: lh a0, 0(a0) ; RV32-ZFHMIN-NEXT: vmv.v.x v8, a0 ; RV32-ZFHMIN-NEXT: csrr a0, vlenb ; RV32-ZFHMIN-NEXT: slli a0, a0, 1 @@ -251,8 +250,7 @@ define <4 x half> @splat_idx_v4f16(<4 x half> %v, i64 %idx) { ; RV64-ZFHMIN-NEXT: vl1r.v v8, (a2) # Unknown-size Folded Reload ; RV64-ZFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; RV64-ZFHMIN-NEXT: vse16.v v8, (a1) -; RV64-ZFHMIN-NEXT: flh fa5, 0(a0) -; RV64-ZFHMIN-NEXT: fmv.x.h a0, fa5 +; RV64-ZFHMIN-NEXT: lh a0, 0(a0) ; RV64-ZFHMIN-NEXT: vmv.v.x v8, a0 ; RV64-ZFHMIN-NEXT: csrr a0, vlenb ; RV64-ZFHMIN-NEXT: slli a0, a0, 1 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll index 5ab8eab091c2e..d665d23dec68a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll @@ -516,41 +516,33 @@ define void @fabs_v8f16(ptr %x) { ; ZVFHMIN-RV32-NEXT: vle16.v v8, (a0) ; ZVFHMIN-RV32-NEXT: mv a1, sp ; ZVFHMIN-RV32-NEXT: vse16.v v8, (a1) -; ZVFHMIN-RV32-NEXT: flh fa5, 2(sp) -; ZVFHMIN-RV32-NEXT: flh fa4, 0(sp) -; ZVFHMIN-RV32-NEXT: flh fa3, 4(sp) -; ZVFHMIN-RV32-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-RV32-NEXT: fmv.x.h a2, fa4 -; ZVFHMIN-RV32-NEXT: lui a3, 8 -; ZVFHMIN-RV32-NEXT: fmv.x.h a4, fa3 -; ZVFHMIN-RV32-NEXT: flh fa5, 6(sp) -; ZVFHMIN-RV32-NEXT: addi a3, a3, -1 -; ZVFHMIN-RV32-NEXT: and a2, a2, a3 -; ZVFHMIN-RV32-NEXT: vmv.v.x v8, a2 -; ZVFHMIN-RV32-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-RV32-NEXT: flh fa5, 10(sp) -; ZVFHMIN-RV32-NEXT: and a1, a1, a3 +; ZVFHMIN-RV32-NEXT: lhu a1, 2(sp) +; ZVFHMIN-RV32-NEXT: lui a2, 8 +; ZVFHMIN-RV32-NEXT: lhu a3, 0(sp) +; ZVFHMIN-RV32-NEXT: addi a2, a2, -1 +; ZVFHMIN-RV32-NEXT: and a1, a1, a2 +; ZVFHMIN-RV32-NEXT: lhu a4, 4(sp) +; ZVFHMIN-RV32-NEXT: and a3, a3, a2 +; ZVFHMIN-RV32-NEXT: vmv.v.x v8, a3 ; ZVFHMIN-RV32-NEXT: vslide1down.vx v8, v8, a1 -; ZVFHMIN-RV32-NEXT: and a4, a4, a3 -; ZVFHMIN-RV32-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-RV32-NEXT: flh fa5, 8(sp) +; ZVFHMIN-RV32-NEXT: and a4, a4, a2 +; ZVFHMIN-RV32-NEXT: lhu a1, 6(sp) ; ZVFHMIN-RV32-NEXT: vslide1down.vx v8, v8, a4 -; ZVFHMIN-RV32-NEXT: and a2, a2, a3 -; ZVFHMIN-RV32-NEXT: vslide1down.vx v8, v8, a2 -; ZVFHMIN-RV32-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-RV32-NEXT: flh fa5, 12(sp) -; ZVFHMIN-RV32-NEXT: and a1, a1, a3 -; ZVFHMIN-RV32-NEXT: and a2, a2, a3 -; ZVFHMIN-RV32-NEXT: vmv.v.x v9, a2 -; ZVFHMIN-RV32-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-RV32-NEXT: flh fa5, 14(sp) +; ZVFHMIN-RV32-NEXT: lhu a3, 10(sp) +; ZVFHMIN-RV32-NEXT: lhu a4, 8(sp) +; ZVFHMIN-RV32-NEXT: and a1, a1, a2 +; ZVFHMIN-RV32-NEXT: vslide1down.vx v8, v8, a1 +; ZVFHMIN-RV32-NEXT: and a3, a3, a2 +; ZVFHMIN-RV32-NEXT: and a4, a4, a2 +; ZVFHMIN-RV32-NEXT: lhu a1, 12(sp) +; ZVFHMIN-RV32-NEXT: vmv.v.x v9, a4 +; ZVFHMIN-RV32-NEXT: lhu a4, 14(sp) +; ZVFHMIN-RV32-NEXT: vslide1down.vx v9, v9, a3 +; ZVFHMIN-RV32-NEXT: and a1, a1, a2 ; ZVFHMIN-RV32-NEXT: vslide1down.vx v9, v9, a1 -; ZVFHMIN-RV32-NEXT: and a2, a2, a3 -; ZVFHMIN-RV32-NEXT: vslide1down.vx v9, v9, a2 -; ZVFHMIN-RV32-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-RV32-NEXT: and a1, a1, a3 +; ZVFHMIN-RV32-NEXT: and a2, a4, a2 ; ZVFHMIN-RV32-NEXT: vmv.v.i v0, 15 -; ZVFHMIN-RV32-NEXT: vslide1down.vx v9, v9, a1 +; ZVFHMIN-RV32-NEXT: vslide1down.vx v9, v9, a2 ; ZVFHMIN-RV32-NEXT: vslidedown.vi v9, v8, 4, v0.t ; ZVFHMIN-RV32-NEXT: vse16.v v9, (a0) ; ZVFHMIN-RV32-NEXT: addi sp, sp, 16 @@ -564,41 +556,33 @@ define void @fabs_v8f16(ptr %x) { ; ZVFHMIN-RV64-NEXT: vle16.v v8, (a0) ; ZVFHMIN-RV64-NEXT: mv a1, sp ; ZVFHMIN-RV64-NEXT: vse16.v v8, (a1) -; ZVFHMIN-RV64-NEXT: flh fa5, 2(sp) -; ZVFHMIN-RV64-NEXT: flh fa4, 0(sp) -; ZVFHMIN-RV64-NEXT: flh fa3, 4(sp) -; ZVFHMIN-RV64-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-RV64-NEXT: fmv.x.h a2, fa4 -; ZVFHMIN-RV64-NEXT: lui a3, 8 -; ZVFHMIN-RV64-NEXT: fmv.x.h a4, fa3 -; ZVFHMIN-RV64-NEXT: flh fa5, 6(sp) -; ZVFHMIN-RV64-NEXT: addiw a3, a3, -1 -; ZVFHMIN-RV64-NEXT: and a2, a2, a3 -; ZVFHMIN-RV64-NEXT: vmv.v.x v8, a2 -; ZVFHMIN-RV64-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-RV64-NEXT: flh fa5, 10(sp) -; ZVFHMIN-RV64-NEXT: and a1, a1, a3 +; ZVFHMIN-RV64-NEXT: lhu a1, 2(sp) +; ZVFHMIN-RV64-NEXT: lui a2, 8 +; ZVFHMIN-RV64-NEXT: lhu a3, 0(sp) +; ZVFHMIN-RV64-NEXT: addiw a2, a2, -1 +; ZVFHMIN-RV64-NEXT: and a1, a1, a2 +; ZVFHMIN-RV64-NEXT: lhu a4, 4(sp) +; ZVFHMIN-RV64-NEXT: and a3, a3, a2 +; ZVFHMIN-RV64-NEXT: vmv.v.x v8, a3 ; ZVFHMIN-RV64-NEXT: vslide1down.vx v8, v8, a1 -; ZVFHMIN-RV64-NEXT: and a4, a4, a3 -; ZVFHMIN-RV64-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-RV64-NEXT: flh fa5, 8(sp) +; ZVFHMIN-RV64-NEXT: and a4, a4, a2 +; ZVFHMIN-RV64-NEXT: lhu a1, 6(sp) ; ZVFHMIN-RV64-NEXT: vslide1down.vx v8, v8, a4 -; ZVFHMIN-RV64-NEXT: and a2, a2, a3 -; ZVFHMIN-RV64-NEXT: vslide1down.vx v8, v8, a2 -; ZVFHMIN-RV64-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-RV64-NEXT: flh fa5, 12(sp) -; ZVFHMIN-RV64-NEXT: and a1, a1, a3 -; ZVFHMIN-RV64-NEXT: and a2, a2, a3 -; ZVFHMIN-RV64-NEXT: vmv.v.x v9, a2 -; ZVFHMIN-RV64-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-RV64-NEXT: flh fa5, 14(sp) +; ZVFHMIN-RV64-NEXT: lhu a3, 10(sp) +; ZVFHMIN-RV64-NEXT: lhu a4, 8(sp) +; ZVFHMIN-RV64-NEXT: and a1, a1, a2 +; ZVFHMIN-RV64-NEXT: vslide1down.vx v8, v8, a1 +; ZVFHMIN-RV64-NEXT: and a3, a3, a2 +; ZVFHMIN-RV64-NEXT: and a4, a4, a2 +; ZVFHMIN-RV64-NEXT: lhu a1, 12(sp) +; ZVFHMIN-RV64-NEXT: vmv.v.x v9, a4 +; ZVFHMIN-RV64-NEXT: lhu a4, 14(sp) +; ZVFHMIN-RV64-NEXT: vslide1down.vx v9, v9, a3 +; ZVFHMIN-RV64-NEXT: and a1, a1, a2 ; ZVFHMIN-RV64-NEXT: vslide1down.vx v9, v9, a1 -; ZVFHMIN-RV64-NEXT: and a2, a2, a3 -; ZVFHMIN-RV64-NEXT: vslide1down.vx v9, v9, a2 -; ZVFHMIN-RV64-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-RV64-NEXT: and a1, a1, a3 +; ZVFHMIN-RV64-NEXT: and a2, a4, a2 ; ZVFHMIN-RV64-NEXT: vmv.v.i v0, 15 -; ZVFHMIN-RV64-NEXT: vslide1down.vx v9, v9, a1 +; ZVFHMIN-RV64-NEXT: vslide1down.vx v9, v9, a2 ; ZVFHMIN-RV64-NEXT: vslidedown.vi v9, v8, 4, v0.t ; ZVFHMIN-RV64-NEXT: vse16.v v9, (a0) ; ZVFHMIN-RV64-NEXT: addi sp, sp, 16 @@ -628,41 +612,33 @@ define void @fabs_v6f16(ptr %x) { ; ZVFHMIN-RV32-NEXT: mv a1, sp ; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMIN-RV32-NEXT: vse16.v v8, (a1) -; ZVFHMIN-RV32-NEXT: flh fa5, 2(sp) -; ZVFHMIN-RV32-NEXT: flh fa4, 0(sp) -; ZVFHMIN-RV32-NEXT: flh fa3, 4(sp) -; ZVFHMIN-RV32-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-RV32-NEXT: fmv.x.h a2, fa4 -; ZVFHMIN-RV32-NEXT: lui a3, 8 -; ZVFHMIN-RV32-NEXT: fmv.x.h a4, fa3 -; ZVFHMIN-RV32-NEXT: flh fa5, 6(sp) -; ZVFHMIN-RV32-NEXT: addi a3, a3, -1 -; ZVFHMIN-RV32-NEXT: and a2, a2, a3 -; ZVFHMIN-RV32-NEXT: vmv.v.x v8, a2 -; ZVFHMIN-RV32-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-RV32-NEXT: flh fa5, 10(sp) -; ZVFHMIN-RV32-NEXT: and a1, a1, a3 +; ZVFHMIN-RV32-NEXT: lhu a1, 2(sp) +; ZVFHMIN-RV32-NEXT: lui a2, 8 +; ZVFHMIN-RV32-NEXT: lhu a3, 0(sp) +; ZVFHMIN-RV32-NEXT: addi a2, a2, -1 +; ZVFHMIN-RV32-NEXT: and a1, a1, a2 +; ZVFHMIN-RV32-NEXT: lhu a4, 4(sp) +; ZVFHMIN-RV32-NEXT: and a3, a3, a2 +; ZVFHMIN-RV32-NEXT: vmv.v.x v8, a3 ; ZVFHMIN-RV32-NEXT: vslide1down.vx v8, v8, a1 -; ZVFHMIN-RV32-NEXT: and a4, a4, a3 -; ZVFHMIN-RV32-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-RV32-NEXT: flh fa5, 8(sp) +; ZVFHMIN-RV32-NEXT: and a4, a4, a2 +; ZVFHMIN-RV32-NEXT: lhu a1, 6(sp) ; ZVFHMIN-RV32-NEXT: vslide1down.vx v8, v8, a4 -; ZVFHMIN-RV32-NEXT: and a2, a2, a3 -; ZVFHMIN-RV32-NEXT: vslide1down.vx v8, v8, a2 -; ZVFHMIN-RV32-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-RV32-NEXT: flh fa5, 12(sp) -; ZVFHMIN-RV32-NEXT: and a1, a1, a3 -; ZVFHMIN-RV32-NEXT: and a2, a2, a3 -; ZVFHMIN-RV32-NEXT: vmv.v.x v9, a2 -; ZVFHMIN-RV32-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-RV32-NEXT: flh fa5, 14(sp) +; ZVFHMIN-RV32-NEXT: lhu a3, 10(sp) +; ZVFHMIN-RV32-NEXT: lhu a4, 8(sp) +; ZVFHMIN-RV32-NEXT: and a1, a1, a2 +; ZVFHMIN-RV32-NEXT: vslide1down.vx v8, v8, a1 +; ZVFHMIN-RV32-NEXT: and a3, a3, a2 +; ZVFHMIN-RV32-NEXT: and a4, a4, a2 +; ZVFHMIN-RV32-NEXT: lhu a1, 12(sp) +; ZVFHMIN-RV32-NEXT: vmv.v.x v9, a4 +; ZVFHMIN-RV32-NEXT: lhu a4, 14(sp) +; ZVFHMIN-RV32-NEXT: vslide1down.vx v9, v9, a3 +; ZVFHMIN-RV32-NEXT: and a1, a1, a2 ; ZVFHMIN-RV32-NEXT: vslide1down.vx v9, v9, a1 -; ZVFHMIN-RV32-NEXT: and a2, a2, a3 -; ZVFHMIN-RV32-NEXT: vslide1down.vx v9, v9, a2 -; ZVFHMIN-RV32-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-RV32-NEXT: and a1, a1, a3 +; ZVFHMIN-RV32-NEXT: and a2, a4, a2 ; ZVFHMIN-RV32-NEXT: vmv.v.i v0, 15 -; ZVFHMIN-RV32-NEXT: vslide1down.vx v9, v9, a1 +; ZVFHMIN-RV32-NEXT: vslide1down.vx v9, v9, a2 ; ZVFHMIN-RV32-NEXT: vsetivli zero, 6, e16, mf2, ta, mu ; ZVFHMIN-RV32-NEXT: vslidedown.vi v9, v8, 4, v0.t ; ZVFHMIN-RV32-NEXT: vse16.v v9, (a0) @@ -678,41 +654,33 @@ define void @fabs_v6f16(ptr %x) { ; ZVFHMIN-RV64-NEXT: mv a1, sp ; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMIN-RV64-NEXT: vse16.v v8, (a1) -; ZVFHMIN-RV64-NEXT: flh fa5, 2(sp) -; ZVFHMIN-RV64-NEXT: flh fa4, 0(sp) -; ZVFHMIN-RV64-NEXT: flh fa3, 4(sp) -; ZVFHMIN-RV64-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-RV64-NEXT: fmv.x.h a2, fa4 -; ZVFHMIN-RV64-NEXT: lui a3, 8 -; ZVFHMIN-RV64-NEXT: fmv.x.h a4, fa3 -; ZVFHMIN-RV64-NEXT: flh fa5, 6(sp) -; ZVFHMIN-RV64-NEXT: addiw a3, a3, -1 -; ZVFHMIN-RV64-NEXT: and a2, a2, a3 -; ZVFHMIN-RV64-NEXT: vmv.v.x v8, a2 -; ZVFHMIN-RV64-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-RV64-NEXT: flh fa5, 10(sp) -; ZVFHMIN-RV64-NEXT: and a1, a1, a3 +; ZVFHMIN-RV64-NEXT: lhu a1, 2(sp) +; ZVFHMIN-RV64-NEXT: lui a2, 8 +; ZVFHMIN-RV64-NEXT: lhu a3, 0(sp) +; ZVFHMIN-RV64-NEXT: addiw a2, a2, -1 +; ZVFHMIN-RV64-NEXT: and a1, a1, a2 +; ZVFHMIN-RV64-NEXT: lhu a4, 4(sp) +; ZVFHMIN-RV64-NEXT: and a3, a3, a2 +; ZVFHMIN-RV64-NEXT: vmv.v.x v8, a3 ; ZVFHMIN-RV64-NEXT: vslide1down.vx v8, v8, a1 -; ZVFHMIN-RV64-NEXT: and a4, a4, a3 -; ZVFHMIN-RV64-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-RV64-NEXT: flh fa5, 8(sp) +; ZVFHMIN-RV64-NEXT: and a4, a4, a2 +; ZVFHMIN-RV64-NEXT: lhu a1, 6(sp) ; ZVFHMIN-RV64-NEXT: vslide1down.vx v8, v8, a4 -; ZVFHMIN-RV64-NEXT: and a2, a2, a3 -; ZVFHMIN-RV64-NEXT: vslide1down.vx v8, v8, a2 -; ZVFHMIN-RV64-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-RV64-NEXT: flh fa5, 12(sp) -; ZVFHMIN-RV64-NEXT: and a1, a1, a3 -; ZVFHMIN-RV64-NEXT: and a2, a2, a3 -; ZVFHMIN-RV64-NEXT: vmv.v.x v9, a2 -; ZVFHMIN-RV64-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-RV64-NEXT: flh fa5, 14(sp) +; ZVFHMIN-RV64-NEXT: lhu a3, 10(sp) +; ZVFHMIN-RV64-NEXT: lhu a4, 8(sp) +; ZVFHMIN-RV64-NEXT: and a1, a1, a2 +; ZVFHMIN-RV64-NEXT: vslide1down.vx v8, v8, a1 +; ZVFHMIN-RV64-NEXT: and a3, a3, a2 +; ZVFHMIN-RV64-NEXT: and a4, a4, a2 +; ZVFHMIN-RV64-NEXT: lhu a1, 12(sp) +; ZVFHMIN-RV64-NEXT: vmv.v.x v9, a4 +; ZVFHMIN-RV64-NEXT: lhu a4, 14(sp) +; ZVFHMIN-RV64-NEXT: vslide1down.vx v9, v9, a3 +; ZVFHMIN-RV64-NEXT: and a1, a1, a2 ; ZVFHMIN-RV64-NEXT: vslide1down.vx v9, v9, a1 -; ZVFHMIN-RV64-NEXT: and a2, a2, a3 -; ZVFHMIN-RV64-NEXT: vslide1down.vx v9, v9, a2 -; ZVFHMIN-RV64-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-RV64-NEXT: and a1, a1, a3 +; ZVFHMIN-RV64-NEXT: and a2, a4, a2 ; ZVFHMIN-RV64-NEXT: vmv.v.i v0, 15 -; ZVFHMIN-RV64-NEXT: vslide1down.vx v9, v9, a1 +; ZVFHMIN-RV64-NEXT: vslide1down.vx v9, v9, a2 ; ZVFHMIN-RV64-NEXT: vsetivli zero, 6, e16, mf2, ta, mu ; ZVFHMIN-RV64-NEXT: vslidedown.vi v9, v8, 4, v0.t ; ZVFHMIN-RV64-NEXT: vse16.v v9, (a0) @@ -898,71 +866,55 @@ define void @copysign_v8f16(ptr %x, ptr %y) { ; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a1) ; ZVFHMIN-ZFHIN-RV32-NEXT: mv a1, sp ; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v9, (a1) -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 18(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa4, 2(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 16(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa3, 0(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a3, fa4 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa4, 20(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a5, fa3 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 4(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a6, fa4 +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a2, 18(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: lui a1, 1048568 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a7, a2, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h t0, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 22(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: lui t1, 8 -; ZVFHMIN-ZFHIN-RV32-NEXT: addi a2, t1, -1 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a3, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h t2, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 6(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a3, a7 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a4, t1 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a5, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a7, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 26(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a5, a4 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a2, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a4, 2(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lui a5, 8 +; ZVFHMIN-ZFHIN-RV32-NEXT: addi a2, a5, -1 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 16(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a7, 0(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a4, a2 +; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a4, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a6, a5 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a6, a7, a2 +; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a6, a4 +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a6, 20(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a7, 4(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v8, a4 ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a3, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 10(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a6, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a7, a2 +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a6, 22(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a7, 6(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a4, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a6, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a7, a2 +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a6, 26(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a4, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a3, 10(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a6, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, t0, a2 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 24(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a7, 8(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a3, a2 +; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a3, a4 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a6, a5 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a7, a2 ; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a5, a4 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a5, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 24(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a4 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, t2, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a6, a7, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a7, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 8(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a6, a4 -; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a4 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a3, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 28(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a5, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a5, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a7, t1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a6, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 12(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a4, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a4, a5 +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a5, 28(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 12(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v9, a4 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 30(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a6, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a4, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a5, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 14(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a4, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a5, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a6, a2 +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a4, 30(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 14(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a3, a5 ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a5, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a3, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a3, a2 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a4, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a6, a2 ; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a2, a1 ; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.i v0, 15 ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a1 @@ -982,71 +934,55 @@ define void @copysign_v8f16(ptr %x, ptr %y) { ; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v8, (a1) ; ZVFHMIN-ZFHIN-RV64-NEXT: mv a1, sp ; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v9, (a1) -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 18(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa4, 2(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 16(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa3, 0(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a3, fa4 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa4, 20(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a5, fa3 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 4(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a6, fa4 +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a2, 18(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: lui a1, 1048568 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a7, a2, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h t0, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 22(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: lui t1, 8 -; ZVFHMIN-ZFHIN-RV64-NEXT: addiw a2, t1, -1 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a3, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h t2, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 6(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a3, a7 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a4, t1 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a5, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a7, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 26(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a5, a4 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a2, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a4, 2(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lui a5, 8 +; ZVFHMIN-ZFHIN-RV64-NEXT: addiw a2, a5, -1 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 16(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a7, 0(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a4, a2 +; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a4, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a6, a5 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a6, a7, a2 +; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a6, a4 +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a6, 20(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a7, 4(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v8, a4 ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a3, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 10(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a6, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a7, a2 +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a6, 22(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a7, 6(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a4, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a6, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a7, a2 +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a6, 26(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a4, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a3, 10(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a6, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, t0, a2 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 24(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a7, 8(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a3, a2 +; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a3, a4 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a6, a5 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a7, a2 ; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a5, a4 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a5, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 24(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a4 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, t2, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a6, a7, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a7, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 8(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a6, a4 -; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a4 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a3, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 28(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a5, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a5, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a7, t1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a6, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 12(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a4, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a4, a5 +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a5, 28(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 12(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v9, a4 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 30(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a6, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a4, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a5, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 14(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a4, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a5, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a6, a2 +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a4, 30(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 14(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a3, a5 ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a5, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a3, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a3, a2 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a4, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a6, a2 ; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a2, a1 ; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.i v0, 15 ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a1 @@ -1202,71 +1138,55 @@ define void @copysign_v6f16(ptr %x, ptr %y) { ; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a1) ; ZVFHMIN-ZFHIN-RV32-NEXT: mv a1, sp ; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v9, (a1) -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 18(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa4, 2(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 16(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa3, 0(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a3, fa4 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa4, 20(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a5, fa3 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 4(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a6, fa4 +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a2, 18(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: lui a1, 1048568 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a7, a2, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h t0, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 22(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: lui t1, 8 -; ZVFHMIN-ZFHIN-RV32-NEXT: addi a2, t1, -1 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a3, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h t2, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 6(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a3, a7 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a4, t1 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a5, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a7, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 26(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a5, a4 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a2, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a4, 2(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lui a5, 8 +; ZVFHMIN-ZFHIN-RV32-NEXT: addi a2, a5, -1 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 16(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a7, 0(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a4, a2 +; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a4, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a6, a5 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a6, a7, a2 +; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a6, a4 +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a6, 20(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a7, 4(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v8, a4 ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a3, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 10(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a6, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a7, a2 +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a6, 22(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a7, 6(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a4, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a6, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a7, a2 +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a6, 26(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a4, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a3, 10(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a6, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, t0, a2 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 24(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a7, 8(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a3, a2 +; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a3, a4 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a6, a5 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a7, a2 ; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a5, a4 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a5, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 24(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a4 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, t2, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a6, a7, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a7, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 8(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a6, a4 -; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a4 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a3, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 28(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a5, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a5, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a7, t1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a6, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 12(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a4, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a4, a5 +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a5, 28(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 12(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v9, a4 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 30(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a6, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a4, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a5, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 14(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a4, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a5, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a6, a2 +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a4, 30(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 14(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a3, a5 ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a5, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a3, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a3, a2 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a4, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a6, a2 ; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a2, a1 ; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.i v0, 15 ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a1 @@ -1288,71 +1208,55 @@ define void @copysign_v6f16(ptr %x, ptr %y) { ; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v8, (a1) ; ZVFHMIN-ZFHIN-RV64-NEXT: mv a1, sp ; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v9, (a1) -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 18(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa4, 2(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 16(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa3, 0(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a3, fa4 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa4, 20(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a5, fa3 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 4(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a6, fa4 +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a2, 18(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: lui a1, 1048568 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a7, a2, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h t0, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 22(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: lui t1, 8 -; ZVFHMIN-ZFHIN-RV64-NEXT: addiw a2, t1, -1 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a3, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h t2, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 6(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a3, a7 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a4, t1 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a5, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a7, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 26(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a5, a4 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a2, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a4, 2(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lui a5, 8 +; ZVFHMIN-ZFHIN-RV64-NEXT: addiw a2, a5, -1 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 16(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a7, 0(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a4, a2 +; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a4, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a6, a5 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a6, a7, a2 +; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a6, a4 +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a6, 20(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a7, 4(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v8, a4 ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a3, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 10(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a6, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a7, a2 +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a6, 22(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a7, 6(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a4, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a6, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a7, a2 +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a6, 26(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a4, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a3, 10(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a6, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, t0, a2 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 24(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a7, 8(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a3, a2 +; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a3, a4 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a6, a5 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a7, a2 ; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a5, a4 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a5, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 24(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a4 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, t2, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a6, a7, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a7, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 8(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a6, a4 -; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a4 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a3, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 28(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a5, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a5, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a7, t1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a6, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 12(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a4, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a4, a5 +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a5, 28(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 12(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v9, a4 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 30(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a6, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a4, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a5, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 14(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a4, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a5, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a6, a2 +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a4, 30(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 14(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a3, a5 ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a5, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a3, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a3, a2 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a4, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a6, a2 ; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a2, a1 ; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.i v0, 15 ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a1 @@ -1521,50 +1425,42 @@ define void @copysign_vf_v8f16(ptr %x, half %y) { ; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v8, (a0) ; ZVFHMIN-ZFHIN-RV32-NEXT: mv a1, sp ; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a1) -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 2(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-ZFHIN-RV32-NEXT: lui a2, 1048568 ; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a1, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a2, fa5 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a2, 2(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: lui a3, 8 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 0(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a4, 0(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: addi a3, a3, -1 ; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a2, a3 ; ZVFHMIN-ZFHIN-RV32-NEXT: or a2, a2, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 4(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a4, a3 ; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a4, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a5, 4(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v8, a4 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 6(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a4, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a4, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 10(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a4 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a2, 6(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a5, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: or a5, a5, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a5 ; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a2, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a4, 10(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a5, 8(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: or a2, a2, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 8(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a2 ; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a4, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a5, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: or a5, a5, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v9, a5 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a2, 12(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a4, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a2, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 12(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: or a2, a2, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v9, a2 ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a4 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 14(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a4, 14(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a2, a3 ; ZVFHMIN-ZFHIN-RV32-NEXT: or a2, a2, a1 ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a2, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a2, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a4, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a3, a1 ; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.i v0, 15 ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a1 ; ZVFHMIN-ZFHIN-RV32-NEXT: vslidedown.vi v9, v8, 4, v0.t @@ -1580,50 +1476,42 @@ define void @copysign_vf_v8f16(ptr %x, half %y) { ; ZVFHMIN-ZFHIN-RV64-NEXT: vle16.v v8, (a0) ; ZVFHMIN-ZFHIN-RV64-NEXT: mv a1, sp ; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v8, (a1) -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 2(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-ZFHIN-RV64-NEXT: lui a2, 1048568 ; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a1, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a2, fa5 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a2, 2(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: lui a3, 8 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 0(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a4, 0(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: addiw a3, a3, -1 ; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a2, a3 ; ZVFHMIN-ZFHIN-RV64-NEXT: or a2, a2, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 4(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a4, a3 ; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a4, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a5, 4(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v8, a4 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 6(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a4, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a4, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 10(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a4 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a2, 6(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a5, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: or a5, a5, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a5 ; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a2, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a4, 10(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a5, 8(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: or a2, a2, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 8(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a2 ; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a4, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a5, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: or a5, a5, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v9, a5 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a2, 12(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a4, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a2, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 12(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: or a2, a2, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v9, a2 ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a4 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 14(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a4, 14(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a2, a3 ; ZVFHMIN-ZFHIN-RV64-NEXT: or a2, a2, a1 ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a2, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a2, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a4, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a3, a1 ; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.i v0, 15 ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a1 ; ZVFHMIN-ZFHIN-RV64-NEXT: vslidedown.vi v9, v8, 4, v0.t @@ -1752,54 +1640,46 @@ define void @copysign_vf_v6f16(ptr %x, half %y) { ; ZVFHMIN-ZFHIN-RV32-NEXT: mv a1, sp ; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a1) -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 2(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-ZFHIN-RV32-NEXT: lui a2, 1048568 ; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a1, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a3, fa5 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a3, 2(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: lui a4, 8 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 0(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a5, 0(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: addi a4, a4, -1 ; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a3, a4 ; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a3, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a5, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 4(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a5, a4 ; ZVFHMIN-ZFHIN-RV32-NEXT: or a5, a5, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 4(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v8, a5 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a5, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 6(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a5, a4 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a3, 6(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a6, a4 ; ZVFHMIN-ZFHIN-RV32-NEXT: or a5, a5, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a3, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 10(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a5 ; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a3, a4 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a5, 10(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a3, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a5, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 8(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 8(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a3 ; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a5, a4 ; ZVFHMIN-ZFHIN-RV32-NEXT: or a5, a5, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a3, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a3, a4 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a6, a4 ; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a3, a1 ; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v9, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 12(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a5 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a1, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 14(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a2, a4 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a2, a2, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a2, a4 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a2, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.i v0, 15 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a1, 12(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a3, fa5 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a3, a2 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a3, 14(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a1, a4 +; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a1, a2 ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a3, a4 +; ZVFHMIN-ZFHIN-RV32-NEXT: or a2, a3, a2 +; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.i v0, 15 +; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a2 ; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 6, e16, mf2, ta, mu ; ZVFHMIN-ZFHIN-RV32-NEXT: vslidedown.vi v9, v8, 4, v0.t ; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v9, (a0) @@ -1815,54 +1695,46 @@ define void @copysign_vf_v6f16(ptr %x, half %y) { ; ZVFHMIN-ZFHIN-RV64-NEXT: mv a1, sp ; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v8, (a1) -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 2(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-ZFHIN-RV64-NEXT: lui a2, 1048568 ; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a1, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a3, fa5 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a3, 2(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: lui a4, 8 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 0(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a5, 0(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: addiw a4, a4, -1 ; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a3, a4 ; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a3, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a5, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 4(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a5, a4 ; ZVFHMIN-ZFHIN-RV64-NEXT: or a5, a5, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 4(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v8, a5 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a5, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 6(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a5, a4 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a3, 6(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a6, a4 ; ZVFHMIN-ZFHIN-RV64-NEXT: or a5, a5, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a3, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 10(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a5 ; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a3, a4 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a5, 10(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a3, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a5, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 8(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 8(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a3 ; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a5, a4 ; ZVFHMIN-ZFHIN-RV64-NEXT: or a5, a5, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a3, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a3, a4 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a6, a4 ; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a3, a1 ; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v9, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 12(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a5 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a1, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 14(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a2, a4 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a2, a2, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a2, a4 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a2, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.i v0, 15 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a1, 12(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a3, fa5 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a3, a2 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a3, 14(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a1, a4 +; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a1, a2 ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a3, a4 +; ZVFHMIN-ZFHIN-RV64-NEXT: or a2, a3, a2 +; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.i v0, 15 +; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a2 ; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 6, e16, mf2, ta, mu ; ZVFHMIN-ZFHIN-RV64-NEXT: vslidedown.vi v9, v8, 4, v0.t ; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v9, (a0) @@ -2051,77 +1923,61 @@ define void @copysign_neg_v8f16(ptr %x, ptr %y) { ; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, mu ; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v8, (a1) ; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v9, (a0) -; ZVFHMIN-ZFHIN-RV32-NEXT: lui a1, 8 -; ZVFHMIN-ZFHIN-RV32-NEXT: vxor.vx v8, v8, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: mv a2, sp -; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v9, (a2) -; ZVFHMIN-ZFHIN-RV32-NEXT: addi a2, sp, 16 -; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a2) -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 2(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa4, 18(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a3, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 0(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a4, fa4 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa4, 16(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa3, 4(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a5, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 20(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a6, fa4 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a7, fa3 -; ZVFHMIN-ZFHIN-RV32-NEXT: addi a2, a1, -1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h t0, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 6(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: and t1, a3, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: lui a3, 1048568 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a4, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h t2, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 22(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, t1, a4 +; ZVFHMIN-ZFHIN-RV32-NEXT: lui a3, 8 +; ZVFHMIN-ZFHIN-RV32-NEXT: vxor.vx v8, v8, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: mv a1, sp +; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v9, (a1) +; ZVFHMIN-ZFHIN-RV32-NEXT: addi a1, sp, 16 +; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a1) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a2, 2(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: addi a1, a3, -1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a2, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a5, 18(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lui a2, 1048568 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 0(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a7, 16(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a5, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a6, a6, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h t1, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 10(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a4, a5 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a6, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a6, a7, a3 ; ZVFHMIN-ZFHIN-RV32-NEXT: or a5, a5, a6 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 4(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a7, 20(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v8, a5 ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a4 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 26(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a6, a1 ; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a7, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a6, t0, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a5, a5, a6 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a6, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 8(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a5 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, t2, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a7, t1, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h t0, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 24(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: or a5, a5, a7 -; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a5 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 6(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a7, 22(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a4, a5 +; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a4 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a6, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a7, a2 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 10(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a4, a5 +; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a4 +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a4, 26(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a6, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 8(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a7, 24(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a4, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a5, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 12(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: and a6, a6, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a4, a6 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a5, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a5, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 28(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: and a6, t0, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a6, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v9, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 14(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a5, a4 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a6, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a7, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a5, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a5, 12(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a6, 28(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v9, a3 ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a4 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a5, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a1, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 30(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a5, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a4, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a1, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a2, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a5, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a6, a2 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a4, 14(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a6, 30(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a5, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a4, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a6, a2 +; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a1, a2 ; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.i v0, 15 ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a1 ; ZVFHMIN-ZFHIN-RV32-NEXT: vslidedown.vi v9, v8, 4, v0.t @@ -2136,77 +1992,61 @@ define void @copysign_neg_v8f16(ptr %x, ptr %y) { ; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, mu ; ZVFHMIN-ZFHIN-RV64-NEXT: vle16.v v8, (a1) ; ZVFHMIN-ZFHIN-RV64-NEXT: vle16.v v9, (a0) -; ZVFHMIN-ZFHIN-RV64-NEXT: lui a1, 8 -; ZVFHMIN-ZFHIN-RV64-NEXT: vxor.vx v8, v8, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: mv a2, sp -; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v9, (a2) -; ZVFHMIN-ZFHIN-RV64-NEXT: addi a2, sp, 16 -; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v8, (a2) -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 2(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa4, 18(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a3, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 0(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a4, fa4 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa4, 16(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa3, 4(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a5, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 20(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a6, fa4 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a7, fa3 -; ZVFHMIN-ZFHIN-RV64-NEXT: addiw a2, a1, -1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h t0, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 6(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: and t1, a3, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: lui a3, 1048568 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a4, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h t2, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 22(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, t1, a4 +; ZVFHMIN-ZFHIN-RV64-NEXT: lui a3, 8 +; ZVFHMIN-ZFHIN-RV64-NEXT: vxor.vx v8, v8, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: mv a1, sp +; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v9, (a1) +; ZVFHMIN-ZFHIN-RV64-NEXT: addi a1, sp, 16 +; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v8, (a1) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a2, 2(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: addiw a1, a3, -1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a2, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a5, 18(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lui a2, 1048568 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 0(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a7, 16(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a5, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a6, a6, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h t1, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 10(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a4, a5 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a6, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a6, a7, a3 ; ZVFHMIN-ZFHIN-RV64-NEXT: or a5, a5, a6 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 4(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a7, 20(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v8, a5 ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a4 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 26(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a6, a1 ; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a7, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a6, t0, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a5, a5, a6 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a6, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 8(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a5 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, t2, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a7, t1, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h t0, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 24(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: or a5, a5, a7 -; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a5 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 6(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a7, 22(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a4, a5 +; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a4 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a6, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a7, a2 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 10(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a4, a5 +; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a4 +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a4, 26(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a6, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 8(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a7, 24(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a4, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a5, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 12(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: and a6, a6, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a4, a6 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a5, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a5, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 28(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: and a6, t0, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a6, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v9, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 14(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a5, a4 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a6, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a7, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a5, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a5, 12(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a6, 28(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v9, a3 ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a4 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a5, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a1, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 30(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a5, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a4, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a1, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a2, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a5, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a6, a2 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a4, 14(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a6, 30(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a5, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a4, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a6, a2 +; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a1, a2 ; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.i v0, 15 ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a1 ; ZVFHMIN-ZFHIN-RV64-NEXT: vslidedown.vi v9, v8, 4, v0.t @@ -2360,78 +2200,62 @@ define void @copysign_neg_v6f16(ptr %x, ptr %y) { ; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 6, e16, mf2, ta, ma ; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v8, (a1) ; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v9, (a0) -; ZVFHMIN-ZFHIN-RV32-NEXT: lui a1, 8 +; ZVFHMIN-ZFHIN-RV32-NEXT: lui a3, 8 ; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMIN-ZFHIN-RV32-NEXT: vxor.vx v8, v8, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: mv a2, sp -; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v9, (a2) -; ZVFHMIN-ZFHIN-RV32-NEXT: addi a2, sp, 16 -; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a2) -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 2(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa4, 18(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a3, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 0(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a4, fa4 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa4, 16(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa3, 4(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a5, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 20(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a6, fa4 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a7, fa3 -; ZVFHMIN-ZFHIN-RV32-NEXT: addi a2, a1, -1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h t0, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 6(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: and t1, a3, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: lui a3, 1048568 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a4, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h t2, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 22(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, t1, a4 +; ZVFHMIN-ZFHIN-RV32-NEXT: vxor.vx v8, v8, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: mv a1, sp +; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v9, (a1) +; ZVFHMIN-ZFHIN-RV32-NEXT: addi a1, sp, 16 +; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a1) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a2, 2(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: addi a1, a3, -1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a2, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a5, 18(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lui a2, 1048568 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 0(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a7, 16(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a5, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a6, a6, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h t1, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 10(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a4, a5 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a6, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a6, a7, a3 ; ZVFHMIN-ZFHIN-RV32-NEXT: or a5, a5, a6 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 4(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a7, 20(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v8, a5 ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a4 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 26(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a6, a1 ; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a7, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a6, t0, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a5, a5, a6 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a6, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 8(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a5 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, t2, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a7, t1, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h t0, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 24(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: or a5, a5, a7 -; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a5 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 6(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a7, 22(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a4, a5 +; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a4 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a6, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a7, a2 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 10(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a4, a5 +; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a4 +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a4, 26(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a6, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 8(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a7, 24(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a4, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a5, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 12(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: and a6, a6, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a4, a6 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a5, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a5, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 28(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: and a6, t0, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a6, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v9, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 14(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a5, a4 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a6, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a7, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a5, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a5, 12(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a6, 28(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v9, a3 ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a4 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a5, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a1, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 30(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a5, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a4, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a1, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a2, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a5, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a6, a2 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a4, 14(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a6, 30(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a5, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a4, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a6, a2 +; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a1, a2 ; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.i v0, 15 ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a1 ; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 6, e16, mf2, ta, mu @@ -2447,78 +2271,62 @@ define void @copysign_neg_v6f16(ptr %x, ptr %y) { ; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 6, e16, mf2, ta, ma ; ZVFHMIN-ZFHIN-RV64-NEXT: vle16.v v8, (a1) ; ZVFHMIN-ZFHIN-RV64-NEXT: vle16.v v9, (a0) -; ZVFHMIN-ZFHIN-RV64-NEXT: lui a1, 8 +; ZVFHMIN-ZFHIN-RV64-NEXT: lui a3, 8 ; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMIN-ZFHIN-RV64-NEXT: vxor.vx v8, v8, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: mv a2, sp -; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v9, (a2) -; ZVFHMIN-ZFHIN-RV64-NEXT: addi a2, sp, 16 -; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v8, (a2) -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 2(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa4, 18(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a3, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 0(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a4, fa4 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa4, 16(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa3, 4(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a5, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 20(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a6, fa4 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a7, fa3 -; ZVFHMIN-ZFHIN-RV64-NEXT: addiw a2, a1, -1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h t0, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 6(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: and t1, a3, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: lui a3, 1048568 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a4, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h t2, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 22(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, t1, a4 +; ZVFHMIN-ZFHIN-RV64-NEXT: vxor.vx v8, v8, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: mv a1, sp +; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v9, (a1) +; ZVFHMIN-ZFHIN-RV64-NEXT: addi a1, sp, 16 +; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v8, (a1) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a2, 2(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: addiw a1, a3, -1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a2, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a5, 18(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lui a2, 1048568 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 0(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a7, 16(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a5, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a6, a6, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h t1, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 10(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a4, a5 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a6, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a6, a7, a3 ; ZVFHMIN-ZFHIN-RV64-NEXT: or a5, a5, a6 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 4(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a7, 20(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v8, a5 ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a4 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 26(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a6, a1 ; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a7, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a6, t0, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a5, a5, a6 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a6, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 8(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a5 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, t2, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a7, t1, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h t0, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 24(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: or a5, a5, a7 -; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a5 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 6(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a7, 22(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a4, a5 +; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a4 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a6, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a7, a2 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 10(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a4, a5 +; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a4 +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a4, 26(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a6, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 8(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a7, 24(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a4, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a5, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 12(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: and a6, a6, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a4, a6 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a5, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a5, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 28(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: and a6, t0, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a6, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v9, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 14(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a5, a4 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a6, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a7, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a5, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a5, 12(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a6, 28(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v9, a3 ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a4 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a5, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a1, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 30(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a5, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a4, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a1, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a2, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a5, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a6, a2 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a4, 14(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a6, 30(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a5, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a4, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a6, a2 +; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a1, a2 ; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.i v0, 15 ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a1 ; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 6, e16, mf2, ta, mu @@ -2678,38 +2486,30 @@ define void @copysign_neg_trunc_v4f16_v4f32(ptr %x, ptr %y) { ; ZVFHMIN-ZFHIN-RV32-NEXT: vxor.vx v8, v8, a1 ; ZVFHMIN-ZFHIN-RV32-NEXT: addi a2, sp, 8 ; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a2) -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 2(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa4, 10(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa3, 0(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a3, fa4 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a4, fa3 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 8(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: addi a5, a1, -1 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a2, a5 -; ZVFHMIN-ZFHIN-RV32-NEXT: lui a6, 1048568 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a7, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 4(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a3, a6 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a2, a2, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a7, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a3, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 12(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a2, 2(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: addi a3, a1, -1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a2, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a4, 10(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lui a5, 1048568 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 0(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a7, 8(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a4, a5 +; ZVFHMIN-ZFHIN-RV32-NEXT: or a2, a2, a4 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a6, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a7, a1 ; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a4, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a4, 4(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a6, 12(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v8, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 6(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a3, a5 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a1, a6 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 14(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a3, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a4, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a6, a5 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a2, 6(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a6, 14(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a4, a1 ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a2, a5 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a1, a6 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a2, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a6, a5 ; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a2, a1 ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a1 ; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a0) @@ -2730,38 +2530,30 @@ define void @copysign_neg_trunc_v4f16_v4f32(ptr %x, ptr %y) { ; ZVFHMIN-ZFHIN-RV64-NEXT: vxor.vx v8, v8, a1 ; ZVFHMIN-ZFHIN-RV64-NEXT: addi a2, sp, 8 ; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v8, (a2) -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 2(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa4, 10(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa3, 0(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a3, fa4 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a4, fa3 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 8(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: addiw a5, a1, -1 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a2, a5 -; ZVFHMIN-ZFHIN-RV64-NEXT: lui a6, 1048568 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a7, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 4(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a3, a6 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a2, a2, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a7, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a3, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 12(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a2, 2(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: addiw a3, a1, -1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a2, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a4, 10(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lui a5, 1048568 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 0(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a7, 8(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a4, a5 +; ZVFHMIN-ZFHIN-RV64-NEXT: or a2, a2, a4 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a6, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a7, a1 ; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a4, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a4, 4(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a6, 12(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v8, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 6(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a3, a5 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a1, a6 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 14(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a3, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a4, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a6, a5 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a2, 6(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a6, 14(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a4, a1 ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a2, a5 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a1, a6 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a2, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a6, a5 ; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a2, a1 ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a1 ; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v8, (a0) @@ -2885,38 +2677,30 @@ define void @copysign_neg_trunc_v3f16_v3f32(ptr %x, ptr %y) { ; ZVFHMIN-ZFHIN-RV32-NEXT: vxor.vx v8, v8, a1 ; ZVFHMIN-ZFHIN-RV32-NEXT: addi a2, sp, 8 ; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a2) -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 2(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa4, 10(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa3, 0(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a3, fa4 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a4, fa3 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 8(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: addi a5, a1, -1 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a2, a5 -; ZVFHMIN-ZFHIN-RV32-NEXT: lui a6, 1048568 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a7, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 4(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a3, a6 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a2, a2, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a7, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a3, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 12(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a2, 2(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: addi a3, a1, -1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a2, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a4, 10(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lui a5, 1048568 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 0(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a7, 8(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a4, a5 +; ZVFHMIN-ZFHIN-RV32-NEXT: or a2, a2, a4 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a6, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a7, a1 ; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a4, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a4, 4(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a6, 12(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v8, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 6(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a3, a5 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a1, a6 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 14(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a3, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a4, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a6, a5 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a2, 6(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a6, 14(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a4, a1 ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a2, a5 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a1, a6 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a2, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a6, a5 ; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a2, a1 ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a1 ; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 3, e16, mf4, ta, ma @@ -2939,38 +2723,30 @@ define void @copysign_neg_trunc_v3f16_v3f32(ptr %x, ptr %y) { ; ZVFHMIN-ZFHIN-RV64-NEXT: vxor.vx v8, v8, a1 ; ZVFHMIN-ZFHIN-RV64-NEXT: addi a2, sp, 8 ; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v8, (a2) -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 2(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa4, 10(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa3, 0(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a3, fa4 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a4, fa3 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 8(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: addiw a5, a1, -1 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a2, a5 -; ZVFHMIN-ZFHIN-RV64-NEXT: lui a6, 1048568 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a7, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 4(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a3, a6 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a2, a2, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a7, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a3, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 12(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a2, 2(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: addiw a3, a1, -1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a2, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a4, 10(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lui a5, 1048568 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 0(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a7, 8(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a4, a5 +; ZVFHMIN-ZFHIN-RV64-NEXT: or a2, a2, a4 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a6, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a7, a1 ; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a4, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a4, 4(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a6, 12(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v8, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 6(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a3, a5 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a1, a6 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 14(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a3, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a4, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a6, a5 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a2, 6(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a6, 14(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a4, a1 ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a2, a5 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a1, a6 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a2, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a6, a5 ; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a2, a1 ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a1 ; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 3, e16, mf4, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/masked-load-fp.ll b/llvm/test/CodeGen/RISCV/rvv/masked-load-fp.ll index df1bd889c1042..9c7ad239bcade 100644 --- a/llvm/test/CodeGen/RISCV/rvv/masked-load-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/masked-load-fp.ll @@ -1,6 +1,19 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zvfbfmin,+v -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zvfbfmin,+v -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+zvfbfmin,+v -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+zvfbfmin,+v -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s + +define @masked_load_nxv1bf16(ptr %a, %mask) nounwind { +; CHECK-LABEL: masked_load_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-NEXT: vle16.v v8, (a0), v0.t +; CHECK-NEXT: ret + %load = call @llvm.masked.load.nxv1bf16(ptr %a, i32 2, %mask, undef) + ret %load +} +declare @llvm.masked.load.nxv1bf16(ptr, i32, , ) define @masked_load_nxv1f16(ptr %a, %mask) nounwind { ; CHECK-LABEL: masked_load_nxv1f16: @@ -35,6 +48,17 @@ define @masked_load_nxv1f64(ptr %a, %mas } declare @llvm.masked.load.nxv1f64(ptr, i32, , ) +define @masked_load_nxv2bf16(ptr %a, %mask) nounwind { +; CHECK-LABEL: masked_load_nxv2bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; CHECK-NEXT: vle16.v v8, (a0), v0.t +; CHECK-NEXT: ret + %load = call @llvm.masked.load.nxv2bf16(ptr %a, i32 2, %mask, undef) + ret %load +} +declare @llvm.masked.load.nxv2bf16(ptr, i32, , ) + define @masked_load_nxv2f16(ptr %a, %mask) nounwind { ; CHECK-LABEL: masked_load_nxv2f16: ; CHECK: # %bb.0: @@ -68,6 +92,17 @@ define @masked_load_nxv2f64(ptr %a, %mas } declare @llvm.masked.load.nxv2f64(ptr, i32, , ) +define @masked_load_nxv4bf16(ptr %a, %mask) nounwind { +; CHECK-LABEL: masked_load_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0), v0.t +; CHECK-NEXT: ret + %load = call @llvm.masked.load.nxv4bf16(ptr %a, i32 2, %mask, undef) + ret %load +} +declare @llvm.masked.load.nxv4bf16(ptr, i32, , ) + define @masked_load_nxv4f16(ptr %a, %mask) nounwind { ; CHECK-LABEL: masked_load_nxv4f16: ; CHECK: # %bb.0: @@ -101,6 +136,17 @@ define @masked_load_nxv4f64(ptr %a, %mas } declare @llvm.masked.load.nxv4f64(ptr, i32, , ) +define @masked_load_nxv8bf16(ptr %a, %mask) nounwind { +; CHECK-LABEL: masked_load_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vle16.v v8, (a0), v0.t +; CHECK-NEXT: ret + %load = call @llvm.masked.load.nxv8bf16(ptr %a, i32 2, %mask, undef) + ret %load +} +declare @llvm.masked.load.nxv8bf16(ptr, i32, , ) + define @masked_load_nxv8f16(ptr %a, %mask) nounwind { ; CHECK-LABEL: masked_load_nxv8f16: ; CHECK: # %bb.0: @@ -134,6 +180,17 @@ define @masked_load_nxv8f64(ptr %a, %mas } declare @llvm.masked.load.nxv8f64(ptr, i32, , ) +define @masked_load_nxv16bf16(ptr %a, %mask) nounwind { +; CHECK-LABEL: masked_load_nxv16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; CHECK-NEXT: vle16.v v8, (a0), v0.t +; CHECK-NEXT: ret + %load = call @llvm.masked.load.nxv16bf16(ptr %a, i32 2, %mask, undef) + ret %load +} +declare @llvm.masked.load.nxv16bf16(ptr, i32, , ) + define @masked_load_nxv16f16(ptr %a, %mask) nounwind { ; CHECK-LABEL: masked_load_nxv16f16: ; CHECK: # %bb.0: @@ -156,6 +213,17 @@ define @masked_load_nxv16f32(ptr %a, %m } declare @llvm.masked.load.nxv16f32(ptr, i32, , ) +define @masked_load_nxv32bf16(ptr %a, %mask) nounwind { +; CHECK-LABEL: masked_load_nxv32bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma +; CHECK-NEXT: vle16.v v8, (a0), v0.t +; CHECK-NEXT: ret + %load = call @llvm.masked.load.nxv32bf16(ptr %a, i32 2, %mask, undef) + ret %load +} +declare @llvm.masked.load.nxv32bf16(ptr, i32, , ) + define @masked_load_nxv32f16(ptr %a, %mask) nounwind { ; CHECK-LABEL: masked_load_nxv32f16: ; CHECK: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/rvv/masked-store-fp.ll b/llvm/test/CodeGen/RISCV/rvv/masked-store-fp.ll index 17193aef1dff9..ddb56e0d979a1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/masked-store-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/masked-store-fp.ll @@ -1,6 +1,19 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zvfbfmin,+v -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zvfbfmin,+v -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+zvfbfmin,+v -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+zvfbfmin,+v -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s + +define void @masked_store_nxv1bf16( %val, ptr %a, %mask) nounwind { +; CHECK-LABEL: masked_store_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-NEXT: vse16.v v8, (a0), v0.t +; CHECK-NEXT: ret + call void @llvm.masked.store.nxv1bf16.p0( %val, ptr %a, i32 2, %mask) + ret void +} +declare void @llvm.masked.store.nxv1bf16.p0(, ptr, i32, ) define void @masked_store_nxv1f16( %val, ptr %a, %mask) nounwind { ; CHECK-LABEL: masked_store_nxv1f16: @@ -35,6 +48,17 @@ define void @masked_store_nxv1f64( %val, ptr %a, , ptr, i32, ) +define void @masked_store_nxv2bf16( %val, ptr %a, %mask) nounwind { +; CHECK-LABEL: masked_store_nxv2bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; CHECK-NEXT: vse16.v v8, (a0), v0.t +; CHECK-NEXT: ret + call void @llvm.masked.store.nxv2bf16.p0( %val, ptr %a, i32 2, %mask) + ret void +} +declare void @llvm.masked.store.nxv2bf16.p0(, ptr, i32, ) + define void @masked_store_nxv2f16( %val, ptr %a, %mask) nounwind { ; CHECK-LABEL: masked_store_nxv2f16: ; CHECK: # %bb.0: @@ -68,6 +92,17 @@ define void @masked_store_nxv2f64( %val, ptr %a, , ptr, i32, ) +define void @masked_store_nxv4bf16( %val, ptr %a, %mask) nounwind { +; CHECK-LABEL: masked_store_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vse16.v v8, (a0), v0.t +; CHECK-NEXT: ret + call void @llvm.masked.store.nxv4bf16.p0( %val, ptr %a, i32 2, %mask) + ret void +} +declare void @llvm.masked.store.nxv4bf16.p0(, ptr, i32, ) + define void @masked_store_nxv4f16( %val, ptr %a, %mask) nounwind { ; CHECK-LABEL: masked_store_nxv4f16: ; CHECK: # %bb.0: @@ -101,6 +136,17 @@ define void @masked_store_nxv4f64( %val, ptr %a, , ptr, i32, ) +define void @masked_store_nxv8bf16( %val, ptr %a, %mask) nounwind { +; CHECK-LABEL: masked_store_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vse16.v v8, (a0), v0.t +; CHECK-NEXT: ret + call void @llvm.masked.store.nxv8bf16.p0( %val, ptr %a, i32 2, %mask) + ret void +} +declare void @llvm.masked.store.nxv8bf16.p0(, ptr, i32, ) + define void @masked_store_nxv8f16( %val, ptr %a, %mask) nounwind { ; CHECK-LABEL: masked_store_nxv8f16: ; CHECK: # %bb.0: @@ -134,6 +180,17 @@ define void @masked_store_nxv8f64( %val, ptr %a, , ptr, i32, ) +define void @masked_store_nxv16bf16( %val, ptr %a, %mask) nounwind { +; CHECK-LABEL: masked_store_nxv16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; CHECK-NEXT: vse16.v v8, (a0), v0.t +; CHECK-NEXT: ret + call void @llvm.masked.store.nxv16bf16.p0( %val, ptr %a, i32 2, %mask) + ret void +} +declare void @llvm.masked.store.nxv16bf16.p0(, ptr, i32, ) + define void @masked_store_nxv16f16( %val, ptr %a, %mask) nounwind { ; CHECK-LABEL: masked_store_nxv16f16: ; CHECK: # %bb.0: @@ -156,6 +213,17 @@ define void @masked_store_nxv16f32( %val, ptr %a, , ptr, i32, ) +define void @masked_store_nxv32bf16( %val, ptr %a, %mask) nounwind { +; CHECK-LABEL: masked_store_nxv32bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma +; CHECK-NEXT: vse16.v v8, (a0), v0.t +; CHECK-NEXT: ret + call void @llvm.masked.store.nxv32bf16.p0( %val, ptr %a, i32 2, %mask) + ret void +} +declare void @llvm.masked.store.nxv32bf16.p0(, ptr, i32, ) + define void @masked_store_nxv32f16( %val, ptr %a, %mask) nounwind { ; CHECK-LABEL: masked_store_nxv32f16: ; CHECK: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll index be37be06f0e77..189ba08dddc7a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll @@ -1,8 +1,16 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfh,+zvfbfmin,+v \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfh,+zvfbfmin,+v \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,RV64 +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfhmin,+zvfbfmin,+v \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfhmin,+zvfbfmin,+v \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,RV64 declare @llvm.masked.gather.nxv1i8.nxv1p0(, i32, , ) @@ -1257,6 +1265,206 @@ define void @mgather_nxv16i64( %ptrs0, %ptr ret void } +declare @llvm.masked.gather.nxv1bf16.nxv1p0(, i32, , ) + +define @mgather_nxv1bf16( %ptrs, %m, %passthru) { +; RV32-LABEL: mgather_nxv1bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, mu +; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t +; RV32-NEXT: vmv1r.v v8, v9 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_nxv1bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, mu +; RV64-NEXT: vluxei64.v v9, (zero), v8, v0.t +; RV64-NEXT: vmv1r.v v8, v9 +; RV64-NEXT: ret + %v = call @llvm.masked.gather.nxv1bf16.nxv1p0( %ptrs, i32 2, %m, %passthru) + ret %v +} + +declare @llvm.masked.gather.nxv2bf16.nxv2p0(, i32, , ) + +define @mgather_nxv2bf16( %ptrs, %m, %passthru) { +; RV32-LABEL: mgather_nxv2bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e16, mf2, ta, mu +; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t +; RV32-NEXT: vmv1r.v v8, v9 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_nxv2bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e16, mf2, ta, mu +; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t +; RV64-NEXT: vmv1r.v v8, v10 +; RV64-NEXT: ret + %v = call @llvm.masked.gather.nxv2bf16.nxv2p0( %ptrs, i32 2, %m, %passthru) + ret %v +} + +declare @llvm.masked.gather.nxv4bf16.nxv4p0(, i32, , ) + +define @mgather_nxv4bf16( %ptrs, %m, %passthru) { +; RV32-LABEL: mgather_nxv4bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t +; RV32-NEXT: vmv.v.v v8, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_nxv4bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; RV64-NEXT: vluxei64.v v12, (zero), v8, v0.t +; RV64-NEXT: vmv.v.v v8, v12 +; RV64-NEXT: ret + %v = call @llvm.masked.gather.nxv4bf16.nxv4p0( %ptrs, i32 2, %m, %passthru) + ret %v +} + +define @mgather_truemask_nxv4bf16( %ptrs, %passthru) { +; RV32-LABEL: mgather_truemask_nxv4bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; RV32-NEXT: vluxei32.v v10, (zero), v8 +; RV32-NEXT: vmv.v.v v8, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_truemask_nxv4bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; RV64-NEXT: vluxei64.v v12, (zero), v8 +; RV64-NEXT: vmv.v.v v8, v12 +; RV64-NEXT: ret + %v = call @llvm.masked.gather.nxv4bf16.nxv4p0( %ptrs, i32 2, splat (i1 1), %passthru) + ret %v +} + +define @mgather_falsemask_nxv4bf16( %ptrs, %passthru) { +; RV32-LABEL: mgather_falsemask_nxv4bf16: +; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v8, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_falsemask_nxv4bf16: +; RV64: # %bb.0: +; RV64-NEXT: vmv1r.v v8, v12 +; RV64-NEXT: ret + %v = call @llvm.masked.gather.nxv4bf16.nxv4p0( %ptrs, i32 2, zeroinitializer, %passthru) + ret %v +} + +declare @llvm.masked.gather.nxv8bf16.nxv8p0(, i32, , ) + +define @mgather_nxv8bf16( %ptrs, %m, %passthru) { +; RV32-LABEL: mgather_nxv8bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e16, m2, ta, mu +; RV32-NEXT: vluxei32.v v12, (zero), v8, v0.t +; RV32-NEXT: vmv.v.v v8, v12 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_nxv8bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e16, m2, ta, mu +; RV64-NEXT: vluxei64.v v16, (zero), v8, v0.t +; RV64-NEXT: vmv.v.v v8, v16 +; RV64-NEXT: ret + %v = call @llvm.masked.gather.nxv8bf16.nxv8p0( %ptrs, i32 2, %m, %passthru) + ret %v +} + +define @mgather_baseidx_nxv8i8_nxv8bf16(ptr %base, %idxs, %m, %passthru) { +; RV32-LABEL: mgather_baseidx_nxv8i8_nxv8bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; RV32-NEXT: vsext.vf4 v12, v8 +; RV32-NEXT: vadd.vv v12, v12, v12 +; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; RV32-NEXT: vluxei32.v v10, (a0), v12, v0.t +; RV32-NEXT: vmv.v.v v8, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_baseidx_nxv8i8_nxv8bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; RV64-NEXT: vsext.vf8 v16, v8 +; RV64-NEXT: vadd.vv v16, v16, v16 +; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; RV64-NEXT: vluxei64.v v10, (a0), v16, v0.t +; RV64-NEXT: vmv.v.v v8, v10 +; RV64-NEXT: ret + %ptrs = getelementptr inbounds bfloat, ptr %base, %idxs + %v = call @llvm.masked.gather.nxv8bf16.nxv8p0( %ptrs, i32 2, %m, %passthru) + ret %v +} + +define @mgather_baseidx_sext_nxv8i8_nxv8bf16(ptr %base, %idxs, %m, %passthru) { +; RV32-LABEL: mgather_baseidx_sext_nxv8i8_nxv8bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; RV32-NEXT: vsext.vf4 v12, v8 +; RV32-NEXT: vadd.vv v12, v12, v12 +; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; RV32-NEXT: vluxei32.v v10, (a0), v12, v0.t +; RV32-NEXT: vmv.v.v v8, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_baseidx_sext_nxv8i8_nxv8bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; RV64-NEXT: vsext.vf8 v16, v8 +; RV64-NEXT: vadd.vv v16, v16, v16 +; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; RV64-NEXT: vluxei64.v v10, (a0), v16, v0.t +; RV64-NEXT: vmv.v.v v8, v10 +; RV64-NEXT: ret + %eidxs = sext %idxs to + %ptrs = getelementptr inbounds bfloat, ptr %base, %eidxs + %v = call @llvm.masked.gather.nxv8bf16.nxv8p0( %ptrs, i32 2, %m, %passthru) + ret %v +} + +define @mgather_baseidx_zext_nxv8i8_nxv8bf16(ptr %base, %idxs, %m, %passthru) { +; CHECK-LABEL: mgather_baseidx_zext_nxv8i8_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma +; CHECK-NEXT: vwaddu.vv v12, v8, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; CHECK-NEXT: vluxei16.v v10, (a0), v12, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %eidxs = zext %idxs to + %ptrs = getelementptr inbounds bfloat, ptr %base, %eidxs + %v = call @llvm.masked.gather.nxv8bf16.nxv8p0( %ptrs, i32 2, %m, %passthru) + ret %v +} + +define @mgather_baseidx_nxv8bf16(ptr %base, %idxs, %m, %passthru) { +; RV32-LABEL: mgather_baseidx_nxv8bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e16, m2, ta, mu +; RV32-NEXT: vwadd.vv v12, v8, v8 +; RV32-NEXT: vluxei32.v v10, (a0), v12, v0.t +; RV32-NEXT: vmv.v.v v8, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_baseidx_nxv8bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; RV64-NEXT: vsext.vf4 v16, v8 +; RV64-NEXT: vadd.vv v16, v16, v16 +; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; RV64-NEXT: vluxei64.v v10, (a0), v16, v0.t +; RV64-NEXT: vmv.v.v v8, v10 +; RV64-NEXT: ret + %ptrs = getelementptr inbounds bfloat, ptr %base, %idxs + %v = call @llvm.masked.gather.nxv8bf16.nxv8p0( %ptrs, i32 2, %m, %passthru) + ret %v +} declare @llvm.masked.gather.nxv1f16.nxv1p0(, i32, , ) diff --git a/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll index 9bfa0f31dc3a6..29db67b4b0a41 100644 --- a/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll @@ -1,8 +1,16 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfh,+zvfbfmin,+v \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfh,+zvfbfmin,+v \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,RV64 +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfhmin,+zvfbfmin,+v \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfhmin,+zvfbfmin,+v \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,RV64 declare void @llvm.masked.scatter.nxv1i8.nxv1p0(, , i32, ) @@ -967,6 +975,184 @@ define void @mscatter_baseidx_nxv8i64( %val, ptr %base, , , i32, ) + +define void @mscatter_nxv1bf16( %val, %ptrs, %m) { +; RV32-LABEL: mscatter_nxv1bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_nxv1bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t +; RV64-NEXT: ret + call void @llvm.masked.scatter.nxv1bf16.nxv1p0( %val, %ptrs, i32 2, %m) + ret void +} + +declare void @llvm.masked.scatter.nxv2bf16.nxv2p0(, , i32, ) + +define void @mscatter_nxv2bf16( %val, %ptrs, %m) { +; RV32-LABEL: mscatter_nxv2bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_nxv2bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t +; RV64-NEXT: ret + call void @llvm.masked.scatter.nxv2bf16.nxv2p0( %val, %ptrs, i32 2, %m) + ret void +} + +declare void @llvm.masked.scatter.nxv4bf16.nxv4p0(, , i32, ) + +define void @mscatter_nxv4bf16( %val, %ptrs, %m) { +; RV32-LABEL: mscatter_nxv4bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_nxv4bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t +; RV64-NEXT: ret + call void @llvm.masked.scatter.nxv4bf16.nxv4p0( %val, %ptrs, i32 2, %m) + ret void +} + +define void @mscatter_truemask_nxv4bf16( %val, %ptrs) { +; RV32-LABEL: mscatter_truemask_nxv4bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; RV32-NEXT: vsoxei32.v v8, (zero), v10 +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_truemask_nxv4bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; RV64-NEXT: vsoxei64.v v8, (zero), v12 +; RV64-NEXT: ret + call void @llvm.masked.scatter.nxv4bf16.nxv4p0( %val, %ptrs, i32 2, splat (i1 1)) + ret void +} + +define void @mscatter_falsemask_nxv4bf16( %val, %ptrs) { +; CHECK-LABEL: mscatter_falsemask_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: ret + call void @llvm.masked.scatter.nxv4bf16.nxv4p0( %val, %ptrs, i32 2, zeroinitializer) + ret void +} + +declare void @llvm.masked.scatter.nxv8bf16.nxv8p0(, , i32, ) + +define void @mscatter_nxv8bf16( %val, %ptrs, %m) { +; RV32-LABEL: mscatter_nxv8bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_nxv8bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t +; RV64-NEXT: ret + call void @llvm.masked.scatter.nxv8bf16.nxv8p0( %val, %ptrs, i32 2, %m) + ret void +} + +define void @mscatter_baseidx_nxv8i8_nxv8bf16( %val, ptr %base, %idxs, %m) { +; RV32-LABEL: mscatter_baseidx_nxv8i8_nxv8bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; RV32-NEXT: vsext.vf4 v12, v10 +; RV32-NEXT: vadd.vv v12, v12, v12 +; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_baseidx_nxv8i8_nxv8bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; RV64-NEXT: vsext.vf8 v16, v10 +; RV64-NEXT: vadd.vv v16, v16, v16 +; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: ret + %ptrs = getelementptr inbounds bfloat, ptr %base, %idxs + call void @llvm.masked.scatter.nxv8bf16.nxv8p0( %val, %ptrs, i32 2, %m) + ret void +} + +define void @mscatter_baseidx_sext_nxv8i8_nxv8bf16( %val, ptr %base, %idxs, %m) { +; RV32-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; RV32-NEXT: vsext.vf4 v12, v10 +; RV32-NEXT: vadd.vv v12, v12, v12 +; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; RV64-NEXT: vsext.vf8 v16, v10 +; RV64-NEXT: vadd.vv v16, v16, v16 +; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: ret + %eidxs = sext %idxs to + %ptrs = getelementptr inbounds bfloat, ptr %base, %eidxs + call void @llvm.masked.scatter.nxv8bf16.nxv8p0( %val, %ptrs, i32 2, %m) + ret void +} + +define void @mscatter_baseidx_zext_nxv8i8_nxv8bf16( %val, ptr %base, %idxs, %m) { +; CHECK-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma +; CHECK-NEXT: vwaddu.vv v12, v10, v10 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vsoxei16.v v8, (a0), v12, v0.t +; CHECK-NEXT: ret + %eidxs = zext %idxs to + %ptrs = getelementptr inbounds bfloat, ptr %base, %eidxs + call void @llvm.masked.scatter.nxv8bf16.nxv8p0( %val, %ptrs, i32 2, %m) + ret void +} + +define void @mscatter_baseidx_nxv8bf16( %val, ptr %base, %idxs, %m) { +; RV32-LABEL: mscatter_baseidx_nxv8bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; RV32-NEXT: vwadd.vv v12, v10, v10 +; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_baseidx_nxv8bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; RV64-NEXT: vsext.vf4 v16, v10 +; RV64-NEXT: vadd.vv v16, v16, v16 +; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: ret + %ptrs = getelementptr inbounds bfloat, ptr %base, %idxs + call void @llvm.masked.scatter.nxv8bf16.nxv8p0( %val, %ptrs, i32 2, %m) + ret void +} + declare void @llvm.masked.scatter.nxv1f16.nxv1p0(, , i32, ) define void @mscatter_nxv1f16( %val, %ptrs, %m) { diff --git a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll index d1c98f828e76d..abf89361cdea5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll +++ b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll @@ -5424,8 +5424,8 @@ for.cond.cleanup: ; preds = %vector.body ret void } -define void @sink_splat_select(ptr nocapture %a, i32 signext %x) { -; CHECK-LABEL: sink_splat_select: +define void @sink_splat_select_op1(ptr nocapture %a, i32 signext %x) { +; CHECK-LABEL: sink_splat_select_op1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: lui a2, 1 ; CHECK-NEXT: add a2, a0, a2 @@ -5460,3 +5460,41 @@ vector.body: ; preds = %vector.body, %entry for.cond.cleanup: ; preds = %vector.body ret void } + +define void @sink_splat_select_op2(ptr nocapture %a, i32 signext %x) { +; CHECK-LABEL: sink_splat_select_op2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.x v8, a1 +; CHECK-NEXT: lui a1, 1 +; CHECK-NEXT: add a1, a0, a1 +; CHECK-NEXT: li a2, 42 +; CHECK-NEXT: .LBB118_1: # %vector.body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vle32.v v9, (a0) +; CHECK-NEXT: vmseq.vx v0, v9, a2 +; CHECK-NEXT: vmerge.vvm v9, v8, v9, v0 +; CHECK-NEXT: vse32.v v9, (a0) +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: bne a0, a1, .LBB118_1 +; CHECK-NEXT: # %bb.2: # %for.cond.cleanup +; CHECK-NEXT: ret +entry: + %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0 + %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer + br label %vector.body + +vector.body: ; preds = %vector.body, %entry + %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] + %0 = getelementptr inbounds i32, ptr %a, i64 %index + %load = load <4 x i32>, ptr %0, align 4 + %cond = icmp eq <4 x i32> %load, splat (i32 42) + %1 = select <4 x i1> %cond, <4 x i32> %load, <4 x i32> %broadcast.splat + store <4 x i32> %1, ptr %0, align 4 + %index.next = add nuw i64 %index, 4 + %2 = icmp eq i64 %index.next, 1024 + br i1 %2, label %for.cond.cleanup, label %vector.body + +for.cond.cleanup: ; preds = %vector.body + ret void +} diff --git a/llvm/test/CodeGen/RISCV/rvv/stack-folding.ll b/llvm/test/CodeGen/RISCV/rvv/stack-folding.ll new file mode 100644 index 0000000000000..4771d7fe6ec92 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/stack-folding.ll @@ -0,0 +1,162 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=riscv32 -mattr=+v -verify-machineinstrs | FileCheck --check-prefixes=CHECK,RV32 %s +; RUN: llc < %s -mtriple=riscv64 -mattr=+v -verify-machineinstrs | FileCheck --check-prefixes=CHECK,RV64 %s + +define i64 @i64( %v, i1 %c) { +; RV32-LABEL: i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill +; RV32-NEXT: andi a0, a0, 1 +; RV32-NEXT: #APP +; RV32-NEXT: #NO_APP +; RV32-NEXT: beqz a0, .LBB0_2 +; RV32-NEXT: # %bb.1: # %truebb +; RV32-NEXT: li a0, 32 +; RV32-NEXT: vl1r.v v9, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV32-NEXT: vsrl.vx v8, v9, a0 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: vmv.x.s a0, v9 +; RV32-NEXT: j .LBB0_3 +; RV32-NEXT: .LBB0_2: # %falsebb +; RV32-NEXT: li a1, 0 +; RV32-NEXT: .LBB0_3: # %falsebb +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 1 +; RV32-NEXT: add sp, sp, a2 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: i64: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: sub sp, sp, a1 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill +; RV64-NEXT: andi a0, a0, 1 +; RV64-NEXT: #APP +; RV64-NEXT: #NO_APP +; RV64-NEXT: beqz a0, .LBB0_2 +; RV64-NEXT: # %bb.1: # %truebb +; RV64-NEXT: ld a0, 16(sp) # 8-byte Folded Reload +; RV64-NEXT: .LBB0_2: # %falsebb +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add sp, sp, a1 +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + tail call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() + br i1 %c, label %truebb, label %falsebb +truebb: + %x = extractelement %v, i32 0 + ret i64 %x +falsebb: + ret i64 0 +} + +define i32 @i32( %v, i1 %c) { +; CHECK-LABEL: i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: andi a0, a0, 1 +; CHECK-NEXT: #APP +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: beqz a0, .LBB1_2 +; CHECK-NEXT: # %bb.1: # %truebb +; CHECK-NEXT: lw a0, 16(sp) # 8-byte Folded Reload +; CHECK-NEXT: .LBB1_2: # %falsebb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add sp, sp, a1 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + tail call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() + br i1 %c, label %truebb, label %falsebb +truebb: + %x = extractelement %v, i32 0 + ret i32 %x +falsebb: + ret i32 0 +} + +define i16 @i16( %v, i1 %c) { +; CHECK-LABEL: i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: andi a0, a0, 1 +; CHECK-NEXT: #APP +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: beqz a0, .LBB2_2 +; CHECK-NEXT: # %bb.1: # %truebb +; CHECK-NEXT: lh a0, 16(sp) # 8-byte Folded Reload +; CHECK-NEXT: .LBB2_2: # %falsebb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add sp, sp, a1 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + tail call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() + br i1 %c, label %truebb, label %falsebb +truebb: + %x = extractelement %v, i32 0 + ret i16 %x +falsebb: + ret i16 0 +} + +define i8 @i8( %v, i1 %c) { +; CHECK-LABEL: i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: andi a0, a0, 1 +; CHECK-NEXT: #APP +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: beqz a0, .LBB3_2 +; CHECK-NEXT: # %bb.1: # %truebb +; CHECK-NEXT: lb a0, 16(sp) # 8-byte Folded Reload +; CHECK-NEXT: .LBB3_2: # %falsebb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add sp, sp, a1 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + tail call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() + br i1 %c, label %truebb, label %falsebb +truebb: + %x = extractelement %v, i32 0 + ret i8 %x +falsebb: + ret i8 0 +} diff --git a/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll b/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll index 14976f21b7dbb..87ff1859a4d2d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll +++ b/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll @@ -1,16 +1,28 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+v,+zvfh,+optimized-zero-stride-load \ +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+v,+zvfh,+zvfbfmin,+optimized-zero-stride-load \ ; RUN: -verify-machineinstrs < %s \ -; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,CHECK-OPT,CHECK-OPT-RV32 -; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+v,+zvfh,+optimized-zero-stride-load \ +; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,CHECK-OPT,CHECK-OPT-ZVFH,CHECK-OPT-RV32 +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+v,+zvfh,+zvfbfmin,+optimized-zero-stride-load \ ; RUN: -verify-machineinstrs < %s \ -; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,CHECK-OPT,CHECK-OPT-RV64 -; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+v,+zvfh \ +; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,CHECK-OPT,CHECK-OPT-ZVFH,CHECK-OPT-RV64 +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+v,+zvfh,+zvfbfmin \ ; RUN: -verify-machineinstrs < %s \ -; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,CHECK-NO-OPT,CHECK-NO-OPT-RV32 -; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+v,+zvfh \ +; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,CHECK-NO-OPT,CHECK-NO-OPT-ZVFH,CHECK-NO-OPT-RV32 +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+v,+zvfh,+zvfbfmin \ ; RUN: -verify-machineinstrs < %s \ -; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,CHECK-NO-OPT,CHECK-NO-OPT-RV64 +; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,CHECK-NO-OPT,CHECK-NO-OPT-ZVFH,CHECK-NO-OPT-RV64 +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+v,+zvfhmin,+zvfbfmin,+optimized-zero-stride-load \ +; RUN: -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,CHECK-OPT,CHECK-OPT-ZVFHMIN,CHECK-OPT-RV32 +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+v,+zvfhmin,+zvfbfmin,+optimized-zero-stride-load \ +; RUN: -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,CHECK-OPT,CHECK-OPT-ZVFHMIN,CHECK-OPT-RV64 +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+v,+zvfhmin,+zvfbfmin \ +; RUN: -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,CHECK-NO-OPT,CHECK-NO-OPT-ZVFHMIN,CHECK-NO-OPT-RV32 +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+v,+zvfhmin,+zvfbfmin \ +; RUN: -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,CHECK-NO-OPT,CHECK-NO-OPT-ZVFHMIN,CHECK-NO-OPT-RV64 declare @llvm.experimental.vp.strided.load.nxv1i8.p0.i8(ptr, i8, , i32) @@ -352,6 +364,74 @@ define @strided_vpload_nxv8i64(ptr %ptr, i32 signext %stride, ret %load } +declare @llvm.experimental.vp.strided.load.nxv1bf16.p0.i32(ptr, i32, , i32) + +define @strided_vpload_nxv1bf16(ptr %ptr, i32 signext %stride, %m, i32 zeroext %evl) { +; CHECK-LABEL: strided_vpload_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a2, e16, mf4, ta, ma +; CHECK-NEXT: vlse16.v v8, (a0), a1, v0.t +; CHECK-NEXT: ret + %load = call @llvm.experimental.vp.strided.load.nxv1bf16.p0.i32(ptr %ptr, i32 signext %stride, %m, i32 %evl) + ret %load +} + +declare @llvm.experimental.vp.strided.load.nxv2bf16.p0.i32(ptr, i32, , i32) + +define @strided_vpload_nxv2bf16(ptr %ptr, i32 signext %stride, %m, i32 zeroext %evl) { +; CHECK-LABEL: strided_vpload_nxv2bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma +; CHECK-NEXT: vlse16.v v8, (a0), a1, v0.t +; CHECK-NEXT: ret + %load = call @llvm.experimental.vp.strided.load.nxv2bf16.p0.i32(ptr %ptr, i32 signext %stride, %m, i32 %evl) + ret %load +} + +define @strided_vpload_nxv2bf16_allones_mask(ptr %ptr, i32 signext %stride, i32 zeroext %evl) { +; CHECK-LABEL: strided_vpload_nxv2bf16_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma +; CHECK-NEXT: vlse16.v v8, (a0), a1 +; CHECK-NEXT: ret + %load = call @llvm.experimental.vp.strided.load.nxv2bf16.p0.i32(ptr %ptr, i32 signext %stride, splat (i1 true), i32 %evl) + ret %load +} + +declare @llvm.experimental.vp.strided.load.nxv4bf16.p0.i32(ptr, i32, , i32) + +define @strided_vpload_nxv4bf16(ptr %ptr, i32 signext %stride, %m, i32 zeroext %evl) { +; CHECK-LABEL: strided_vpload_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma +; CHECK-NEXT: vlse16.v v8, (a0), a1, v0.t +; CHECK-NEXT: ret + %load = call @llvm.experimental.vp.strided.load.nxv4bf16.p0.i32(ptr %ptr, i32 signext %stride, %m, i32 %evl) + ret %load +} + +define @strided_vpload_nxv4bf16_unit_stride(ptr %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: strided_vpload_nxv4bf16_unit_stride: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0), v0.t +; CHECK-NEXT: ret + %load = call @llvm.experimental.vp.strided.load.nxv4bf16.p0.i32(ptr %ptr, i32 2, %m, i32 %evl) + ret %load +} + +declare @llvm.experimental.vp.strided.load.nxv8bf16.p0.i32(ptr, i32, , i32) + +define @strided_vpload_nxv8bf16(ptr %ptr, i32 signext %stride, %m, i32 zeroext %evl) { +; CHECK-LABEL: strided_vpload_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a2, e16, m2, ta, ma +; CHECK-NEXT: vlse16.v v8, (a0), a1, v0.t +; CHECK-NEXT: ret + %load = call @llvm.experimental.vp.strided.load.nxv8bf16.p0.i32(ptr %ptr, i32 signext %stride, %m, i32 %evl) + ret %load +} + declare @llvm.experimental.vp.strided.load.nxv1f16.p0.i32(ptr, i32, , i32) define @strided_vpload_nxv1f16(ptr %ptr, i32 signext %stride, %m, i32 zeroext %evl) { @@ -589,10 +669,10 @@ define @strided_load_nxv16f64(ptr %ptr, i64 %stride, @strided_load_nxv16f64(ptr %ptr, i64 %stride, @strided_load_nxv16f64_allones_mask(ptr %ptr, i64 ; CHECK-RV32-NEXT: sltu a5, a3, a2 ; CHECK-RV32-NEXT: addi a5, a5, -1 ; CHECK-RV32-NEXT: and a2, a5, a2 -; CHECK-RV32-NEXT: bltu a3, a4, .LBB50_2 +; CHECK-RV32-NEXT: bltu a3, a4, .LBB56_2 ; CHECK-RV32-NEXT: # %bb.1: ; CHECK-RV32-NEXT: mv a3, a4 -; CHECK-RV32-NEXT: .LBB50_2: +; CHECK-RV32-NEXT: .LBB56_2: ; CHECK-RV32-NEXT: mul a4, a3, a1 ; CHECK-RV32-NEXT: add a4, a0, a4 ; CHECK-RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma @@ -661,10 +741,10 @@ define @strided_load_nxv16f64_allones_mask(ptr %ptr, i64 ; CHECK-RV64-NEXT: sltu a5, a2, a3 ; CHECK-RV64-NEXT: addi a5, a5, -1 ; CHECK-RV64-NEXT: and a3, a5, a3 -; CHECK-RV64-NEXT: bltu a2, a4, .LBB50_2 +; CHECK-RV64-NEXT: bltu a2, a4, .LBB56_2 ; CHECK-RV64-NEXT: # %bb.1: ; CHECK-RV64-NEXT: mv a2, a4 -; CHECK-RV64-NEXT: .LBB50_2: +; CHECK-RV64-NEXT: .LBB56_2: ; CHECK-RV64-NEXT: mul a4, a2, a1 ; CHECK-RV64-NEXT: add a4, a0, a4 ; CHECK-RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma @@ -689,19 +769,19 @@ define @strided_load_nxv17f64(ptr %ptr, i64 %stride, @strided_load_nxv17f64(ptr %ptr, i64 %stride, @strided_load_nxv17f64(ptr %ptr, i64 %stride, @strided_load_nxv17f64(ptr %ptr, i64 %stride, @zero_strided_unmasked_vpload_nxv1f16(ptr %ptr) { ; CHECK-OPT-NEXT: vlse16.v v8, (a0), zero ; CHECK-OPT-NEXT: ret ; -; CHECK-NO-OPT-LABEL: zero_strided_unmasked_vpload_nxv1f16: -; CHECK-NO-OPT: # %bb.0: -; CHECK-NO-OPT-NEXT: flh fa5, 0(a0) -; CHECK-NO-OPT-NEXT: vsetivli zero, 4, e16, mf4, ta, ma -; CHECK-NO-OPT-NEXT: vfmv.v.f v8, fa5 -; CHECK-NO-OPT-NEXT: ret +; CHECK-NO-OPT-ZVFH-LABEL: zero_strided_unmasked_vpload_nxv1f16: +; CHECK-NO-OPT-ZVFH: # %bb.0: +; CHECK-NO-OPT-ZVFH-NEXT: flh fa5, 0(a0) +; CHECK-NO-OPT-ZVFH-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; CHECK-NO-OPT-ZVFH-NEXT: vfmv.v.f v8, fa5 +; CHECK-NO-OPT-ZVFH-NEXT: ret +; +; CHECK-NO-OPT-ZVFHMIN-LABEL: zero_strided_unmasked_vpload_nxv1f16: +; CHECK-NO-OPT-ZVFHMIN: # %bb.0: +; CHECK-NO-OPT-ZVFHMIN-NEXT: lh a0, 0(a0) +; CHECK-NO-OPT-ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; CHECK-NO-OPT-ZVFHMIN-NEXT: vmv.v.x v8, a0 +; CHECK-NO-OPT-ZVFHMIN-NEXT: ret %load = call @llvm.experimental.vp.strided.load.nxv1f16.p0.i32(ptr %ptr, i32 0, splat (i1 true), i32 4) ret %load } @@ -854,10 +941,10 @@ define @zero_strided_vadd_nxv16i64( %v, p ; CHECK-RV32-NEXT: and a3, a4, a3 ; CHECK-RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; CHECK-RV32-NEXT: vlse64.v v24, (a0), zero -; CHECK-RV32-NEXT: bltu a2, a1, .LBB55_2 +; CHECK-RV32-NEXT: bltu a2, a1, .LBB61_2 ; CHECK-RV32-NEXT: # %bb.1: ; CHECK-RV32-NEXT: mv a2, a1 -; CHECK-RV32-NEXT: .LBB55_2: +; CHECK-RV32-NEXT: .LBB61_2: ; CHECK-RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-RV32-NEXT: vlse64.v v0, (a0), zero ; CHECK-RV32-NEXT: vsetvli a0, zero, e64, m8, ta, ma @@ -908,3 +995,6 @@ define @zero_strided_vadd_nxv1p0( %v, ptr % %load = call @llvm.experimental.vp.strided.load.nxv1p0.p0.i32(ptr %ptr, i32 0, splat (i1 true), i32 %vscale) ret %load } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK-OPT-ZVFH: {{.*}} +; CHECK-OPT-ZVFHMIN: {{.*}} diff --git a/llvm/test/CodeGen/RISCV/rvv/strided-vpstore.ll b/llvm/test/CodeGen/RISCV/rvv/strided-vpstore.ll index e8704b35f31f7..abdf9ab09bb9a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/strided-vpstore.ll +++ b/llvm/test/CodeGen/RISCV/rvv/strided-vpstore.ll @@ -1,8 +1,14 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+v,+zvfh \ +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+v,+zvfh,+zvfbfmin \ ; RUN: -verify-machineinstrs < %s | FileCheck %s \ ; RUN: -check-prefixes=CHECK,CHECK-RV32 -; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+v,+zvfh \ +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+v,+zvfh,+zvfbfmin \ +; RUN: -verify-machineinstrs < %s | FileCheck %s \ +; RUN: -check-prefixes=CHECK,CHECK-RV64 +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+v,+zvfhmin,+zvfbfmin \ +; RUN: -verify-machineinstrs < %s | FileCheck %s \ +; RUN: -check-prefixes=CHECK,CHECK-RV32 +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+v,+zvfhmin,+zvfbfmin \ ; RUN: -verify-machineinstrs < %s | FileCheck %s \ ; RUN: -check-prefixes=CHECK,CHECK-RV64 @@ -280,6 +286,64 @@ define void @strided_vpstore_nxv8i64( %val, ptr %ptr, i32 sign ret void } +declare void @llvm.experimental.vp.strided.store.nxv1bf16.p0.i32(, ptr, i32, , i32) + +define void @strided_vpstore_nxv1bf16( %val, ptr %ptr, i32 signext %strided, %m, i32 zeroext %evl) { +; CHECK-LABEL: strided_vpstore_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a2, e16, mf4, ta, ma +; CHECK-NEXT: vsse16.v v8, (a0), a1, v0.t +; CHECK-NEXT: ret + call void @llvm.experimental.vp.strided.store.nxv1bf16.p0.i32( %val, ptr %ptr, i32 %strided, %m, i32 %evl) + ret void +} + +declare void @llvm.experimental.vp.strided.store.nxv2bf16.p0.i32(, ptr, i32, , i32) + +define void @strided_vpstore_nxv2bf16( %val, ptr %ptr, i32 signext %strided, %m, i32 zeroext %evl) { +; CHECK-LABEL: strided_vpstore_nxv2bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma +; CHECK-NEXT: vsse16.v v8, (a0), a1, v0.t +; CHECK-NEXT: ret + call void @llvm.experimental.vp.strided.store.nxv2bf16.p0.i32( %val, ptr %ptr, i32 %strided, %m, i32 %evl) + ret void +} + +declare void @llvm.experimental.vp.strided.store.nxv4bf16.p0.i32(, ptr, i32, , i32) + +define void @strided_vpstore_nxv4bf16( %val, ptr %ptr, i32 signext %strided, %m, i32 zeroext %evl) { +; CHECK-LABEL: strided_vpstore_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma +; CHECK-NEXT: vsse16.v v8, (a0), a1, v0.t +; CHECK-NEXT: ret + call void @llvm.experimental.vp.strided.store.nxv4bf16.p0.i32( %val, ptr %ptr, i32 %strided, %m, i32 %evl) + ret void +} + +define void @strided_vpstore_nxv4bf16_unit_stride( %val, ptr %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: strided_vpstore_nxv4bf16_unit_stride: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vse16.v v8, (a0), v0.t +; CHECK-NEXT: ret + call void @llvm.experimental.vp.strided.store.nxv4bf16.p0.i32( %val, ptr %ptr, i32 2, %m, i32 %evl) + ret void +} + +declare void @llvm.experimental.vp.strided.store.nxv8bf16.p0.i32(, ptr, i32, , i32) + +define void @strided_vpstore_nxv8bf16( %val, ptr %ptr, i32 signext %strided, %m, i32 zeroext %evl) { +; CHECK-LABEL: strided_vpstore_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a2, e16, m2, ta, ma +; CHECK-NEXT: vsse16.v v8, (a0), a1, v0.t +; CHECK-NEXT: ret + call void @llvm.experimental.vp.strided.store.nxv8bf16.p0.i32( %val, ptr %ptr, i32 %strided, %m, i32 %evl) + ret void +} + declare void @llvm.experimental.vp.strided.store.nxv1f16.p0.i32(, ptr, i32, , i32) define void @strided_vpstore_nxv1f16( %val, ptr %ptr, i32 signext %strided, %m, i32 zeroext %evl) { @@ -493,10 +557,10 @@ define void @strided_store_nxv16f64( %v, ptr %ptr, i32 sig ; CHECK: # %bb.0: ; CHECK-NEXT: csrr a3, vlenb ; CHECK-NEXT: mv a4, a2 -; CHECK-NEXT: bltu a2, a3, .LBB41_2 +; CHECK-NEXT: bltu a2, a3, .LBB46_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a4, a3 -; CHECK-NEXT: .LBB41_2: +; CHECK-NEXT: .LBB46_2: ; CHECK-NEXT: vsetvli zero, a4, e64, m8, ta, ma ; CHECK-NEXT: vsse64.v v8, (a0), a1, v0.t ; CHECK-NEXT: sub a5, a2, a3 @@ -520,10 +584,10 @@ define void @strided_store_nxv16f64_allones_mask( %v, ptr ; CHECK: # %bb.0: ; CHECK-NEXT: csrr a3, vlenb ; CHECK-NEXT: mv a4, a2 -; CHECK-NEXT: bltu a2, a3, .LBB42_2 +; CHECK-NEXT: bltu a2, a3, .LBB47_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a4, a3 -; CHECK-NEXT: .LBB42_2: +; CHECK-NEXT: .LBB47_2: ; CHECK-NEXT: vsetvli zero, a4, e64, m8, ta, ma ; CHECK-NEXT: vsse64.v v8, (a0), a1 ; CHECK-NEXT: sub a3, a2, a3 @@ -549,15 +613,15 @@ define void @strided_store_nxv17f64( %v, ptr %ptr, i32 sig ; CHECK-NEXT: slli a6, a4, 1 ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: mv a5, a3 -; CHECK-NEXT: bltu a3, a6, .LBB43_2 +; CHECK-NEXT: bltu a3, a6, .LBB48_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a5, a6 -; CHECK-NEXT: .LBB43_2: +; CHECK-NEXT: .LBB48_2: ; CHECK-NEXT: mv a7, a5 -; CHECK-NEXT: bltu a5, a4, .LBB43_4 +; CHECK-NEXT: bltu a5, a4, .LBB48_4 ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: mv a7, a4 -; CHECK-NEXT: .LBB43_4: +; CHECK-NEXT: .LBB48_4: ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr t0, vlenb @@ -585,10 +649,10 @@ define void @strided_store_nxv17f64( %v, ptr %ptr, i32 sig ; CHECK-NEXT: and a0, a3, a0 ; CHECK-NEXT: vsetvli zero, t0, e64, m8, ta, ma ; CHECK-NEXT: vsse64.v v16, (a7), a2, v0.t -; CHECK-NEXT: bltu a0, a4, .LBB43_6 +; CHECK-NEXT: bltu a0, a4, .LBB48_6 ; CHECK-NEXT: # %bb.5: ; CHECK-NEXT: mv a0, a4 -; CHECK-NEXT: .LBB43_6: +; CHECK-NEXT: .LBB48_6: ; CHECK-NEXT: mul a3, a5, a2 ; CHECK-NEXT: srli a4, a4, 2 ; CHECK-NEXT: vsetvli a5, zero, e8, mf2, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-splat.ll b/llvm/test/CodeGen/RISCV/rvv/vp-splat.ll index 5fbdefda9f402..0da05c1bd4364 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vp-splat.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vp-splat.ll @@ -1,6 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh,+zvfh -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh,+zvfh -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zvfh,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,NOZFMIN,ZVFH +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zvfh,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,NOZFMIN,ZVFH +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zvfhmin,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,NOZFMIN,ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zvfhmin,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,NOZFMIN,ZVFHMIN +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfhmin,+zfbfmin,+zvfhmin,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,ZFMIN +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfhmin,+zfbfmin,+zvfhmin,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,ZFMIN define @vp_splat_nxv1i8(i8 %val, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_splat_nxv1i8: @@ -270,62 +274,254 @@ define @vp_splat_nxv8i64(i64 %val, %m, i32 ret %splat } +define @vp_splat_nxv1bf16(bfloat %val, %m, i32 zeroext %evl) { +; NOZFMIN-LABEL: vp_splat_nxv1bf16: +; NOZFMIN: # %bb.0: +; NOZFMIN-NEXT: fmv.x.w a1, fa0 +; NOZFMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; NOZFMIN-NEXT: vmv.v.x v8, a1 +; NOZFMIN-NEXT: ret +; +; ZFMIN-LABEL: vp_splat_nxv1bf16: +; ZFMIN: # %bb.0: +; ZFMIN-NEXT: fmv.x.h a1, fa0 +; ZFMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZFMIN-NEXT: vmv.v.x v8, a1 +; ZFMIN-NEXT: ret + %splat = call @llvm.experimental.vp.splat.nxv1bf16(bfloat %val, %m, i32 %evl) + ret %splat +} + +define @vp_splat_nxv2bf16(bfloat %val, %m, i32 zeroext %evl) { +; NOZFMIN-LABEL: vp_splat_nxv2bf16: +; NOZFMIN: # %bb.0: +; NOZFMIN-NEXT: fmv.x.w a1, fa0 +; NOZFMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; NOZFMIN-NEXT: vmv.v.x v8, a1 +; NOZFMIN-NEXT: ret +; +; ZFMIN-LABEL: vp_splat_nxv2bf16: +; ZFMIN: # %bb.0: +; ZFMIN-NEXT: fmv.x.h a1, fa0 +; ZFMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZFMIN-NEXT: vmv.v.x v8, a1 +; ZFMIN-NEXT: ret + %splat = call @llvm.experimental.vp.splat.nxv2bf16(bfloat %val, %m, i32 %evl) + ret %splat +} + +define @vp_splat_nxv4bf16(bfloat %val, %m, i32 zeroext %evl) { +; NOZFMIN-LABEL: vp_splat_nxv4bf16: +; NOZFMIN: # %bb.0: +; NOZFMIN-NEXT: fmv.x.w a1, fa0 +; NOZFMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; NOZFMIN-NEXT: vmv.v.x v8, a1 +; NOZFMIN-NEXT: ret +; +; ZFMIN-LABEL: vp_splat_nxv4bf16: +; ZFMIN: # %bb.0: +; ZFMIN-NEXT: fmv.x.h a1, fa0 +; ZFMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZFMIN-NEXT: vmv.v.x v8, a1 +; ZFMIN-NEXT: ret + %splat = call @llvm.experimental.vp.splat.nxv4bf16(bfloat %val, %m, i32 %evl) + ret %splat +} + +define @vp_splat_nxv8bf16(bfloat %val, %m, i32 zeroext %evl) { +; NOZFMIN-LABEL: vp_splat_nxv8bf16: +; NOZFMIN: # %bb.0: +; NOZFMIN-NEXT: fmv.x.w a1, fa0 +; NOZFMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; NOZFMIN-NEXT: vmv.v.x v8, a1 +; NOZFMIN-NEXT: ret +; +; ZFMIN-LABEL: vp_splat_nxv8bf16: +; ZFMIN: # %bb.0: +; ZFMIN-NEXT: fmv.x.h a1, fa0 +; ZFMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZFMIN-NEXT: vmv.v.x v8, a1 +; ZFMIN-NEXT: ret + %splat = call @llvm.experimental.vp.splat.nxv8bf16(bfloat %val, %m, i32 %evl) + ret %splat +} + +define @vp_splat_nxv16bf16(bfloat %val, %m, i32 zeroext %evl) { +; NOZFMIN-LABEL: vp_splat_nxv16bf16: +; NOZFMIN: # %bb.0: +; NOZFMIN-NEXT: fmv.x.w a1, fa0 +; NOZFMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; NOZFMIN-NEXT: vmv.v.x v8, a1 +; NOZFMIN-NEXT: ret +; +; ZFMIN-LABEL: vp_splat_nxv16bf16: +; ZFMIN: # %bb.0: +; ZFMIN-NEXT: fmv.x.h a1, fa0 +; ZFMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZFMIN-NEXT: vmv.v.x v8, a1 +; ZFMIN-NEXT: ret + %splat = call @llvm.experimental.vp.splat.nxv16bf16(bfloat %val, %m, i32 %evl) + ret %splat +} + +define @vp_splat_nxv32bf16(bfloat %val, %m, i32 zeroext %evl) { +; NOZFMIN-LABEL: vp_splat_nxv32bf16: +; NOZFMIN: # %bb.0: +; NOZFMIN-NEXT: fmv.x.w a1, fa0 +; NOZFMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; NOZFMIN-NEXT: vmv.v.x v8, a1 +; NOZFMIN-NEXT: ret +; +; ZFMIN-LABEL: vp_splat_nxv32bf16: +; ZFMIN: # %bb.0: +; ZFMIN-NEXT: fmv.x.h a1, fa0 +; ZFMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZFMIN-NEXT: vmv.v.x v8, a1 +; ZFMIN-NEXT: ret + %splat = call @llvm.experimental.vp.splat.nxv32bf16(bfloat %val, %m, i32 %evl) + ret %splat +} + define @vp_splat_nxv1f16(half %val, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_splat_nxv1f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; CHECK-NEXT: vfmv.v.f v8, fa0 -; CHECK-NEXT: ret +; ZVFH-LABEL: vp_splat_nxv1f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFH-NEXT: vfmv.v.f v8, fa0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vp_splat_nxv1f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fmv.x.w a1, fa0 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vmv.v.x v8, a1 +; ZVFHMIN-NEXT: ret +; +; ZFMIN-LABEL: vp_splat_nxv1f16: +; ZFMIN: # %bb.0: +; ZFMIN-NEXT: fmv.x.h a1, fa0 +; ZFMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZFMIN-NEXT: vmv.v.x v8, a1 +; ZFMIN-NEXT: ret %splat = call @llvm.experimental.vp.splat.nxv1f16(half %val, %m, i32 %evl) ret %splat } define @vp_splat_nxv2f16(half %val, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_splat_nxv2f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; CHECK-NEXT: vfmv.v.f v8, fa0 -; CHECK-NEXT: ret +; ZVFH-LABEL: vp_splat_nxv2f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFH-NEXT: vfmv.v.f v8, fa0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vp_splat_nxv2f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fmv.x.w a1, fa0 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vmv.v.x v8, a1 +; ZVFHMIN-NEXT: ret +; +; ZFMIN-LABEL: vp_splat_nxv2f16: +; ZFMIN: # %bb.0: +; ZFMIN-NEXT: fmv.x.h a1, fa0 +; ZFMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZFMIN-NEXT: vmv.v.x v8, a1 +; ZFMIN-NEXT: ret %splat = call @llvm.experimental.vp.splat.nxv2f16(half %val, %m, i32 %evl) ret %splat } define @vp_splat_nxv4f16(half %val, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_splat_nxv4f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vfmv.v.f v8, fa0 -; CHECK-NEXT: ret +; ZVFH-LABEL: vp_splat_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vfmv.v.f v8, fa0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vp_splat_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fmv.x.w a1, fa0 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFHMIN-NEXT: vmv.v.x v8, a1 +; ZVFHMIN-NEXT: ret +; +; ZFMIN-LABEL: vp_splat_nxv4f16: +; ZFMIN: # %bb.0: +; ZFMIN-NEXT: fmv.x.h a1, fa0 +; ZFMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZFMIN-NEXT: vmv.v.x v8, a1 +; ZFMIN-NEXT: ret %splat = call @llvm.experimental.vp.splat.nxv4f16(half %val, %m, i32 %evl) ret %splat } define @vp_splat_nxv8f16(half %val, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_splat_nxv8f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vfmv.v.f v8, fa0 -; CHECK-NEXT: ret +; ZVFH-LABEL: vp_splat_nxv8f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFH-NEXT: vfmv.v.f v8, fa0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vp_splat_nxv8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fmv.x.w a1, fa0 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFHMIN-NEXT: vmv.v.x v8, a1 +; ZVFHMIN-NEXT: ret +; +; ZFMIN-LABEL: vp_splat_nxv8f16: +; ZFMIN: # %bb.0: +; ZFMIN-NEXT: fmv.x.h a1, fa0 +; ZFMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZFMIN-NEXT: vmv.v.x v8, a1 +; ZFMIN-NEXT: ret %splat = call @llvm.experimental.vp.splat.nxv8f16(half %val, %m, i32 %evl) ret %splat } define @vp_splat_nxv16f16(half %val, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_splat_nxv16f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vfmv.v.f v8, fa0 -; CHECK-NEXT: ret +; ZVFH-LABEL: vp_splat_nxv16f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFH-NEXT: vfmv.v.f v8, fa0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vp_splat_nxv16f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fmv.x.w a1, fa0 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFHMIN-NEXT: vmv.v.x v8, a1 +; ZVFHMIN-NEXT: ret +; +; ZFMIN-LABEL: vp_splat_nxv16f16: +; ZFMIN: # %bb.0: +; ZFMIN-NEXT: fmv.x.h a1, fa0 +; ZFMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZFMIN-NEXT: vmv.v.x v8, a1 +; ZFMIN-NEXT: ret %splat = call @llvm.experimental.vp.splat.nxv16f16(half %val, %m, i32 %evl) ret %splat } define @vp_splat_nxv32f16(half %val, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_splat_nxv32f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; CHECK-NEXT: vfmv.v.f v8, fa0 -; CHECK-NEXT: ret +; ZVFH-LABEL: vp_splat_nxv32f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFH-NEXT: vfmv.v.f v8, fa0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vp_splat_nxv32f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fmv.x.w a1, fa0 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFHMIN-NEXT: vmv.v.x v8, a1 +; ZVFHMIN-NEXT: ret +; +; ZFMIN-LABEL: vp_splat_nxv32f16: +; ZFMIN: # %bb.0: +; ZFMIN-NEXT: fmv.x.h a1, fa0 +; ZFMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZFMIN-NEXT: vmv.v.x v8, a1 +; ZFMIN-NEXT: ret %splat = call @llvm.experimental.vp.splat.nxv32f16(half %val, %m, i32 %evl) ret %splat } @@ -452,10 +648,10 @@ define @vp_splat_nxv32i32(i32 %val, %m, i ; CHECK-NEXT: and a3, a4, a3 ; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; CHECK-NEXT: vmv.v.x v16, a0 -; CHECK-NEXT: bltu a1, a2, .LBB39_2 +; CHECK-NEXT: bltu a1, a2, .LBB45_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a1, a2 -; CHECK-NEXT: .LBB39_2: +; CHECK-NEXT: .LBB45_2: ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vmv.v.x v8, a0 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll index c0d7ecf74956b..84c8321b5b934 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll @@ -1,7 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zvfbfmin,+v \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV32 -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zvfbfmin,+v \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64 +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+zvfbfmin,+v \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV32 +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+zvfbfmin,+v \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64 declare @llvm.vp.gather.nxv1i8.nxv1p0(, , i32) @@ -1237,6 +1241,195 @@ define @vpgather_baseidx_nxv8i64(ptr %base, %v } +declare @llvm.vp.gather.nxv1bf16.nxv1p0(, , i32) + +define @vpgather_nxv1bf16( %ptrs, %m, i32 zeroext %evl) { +; RV32-LABEL: vpgather_nxv1bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t +; RV32-NEXT: vmv1r.v v8, v9 +; RV32-NEXT: ret +; +; RV64-LABEL: vpgather_nxv1bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; RV64-NEXT: vluxei64.v v9, (zero), v8, v0.t +; RV64-NEXT: vmv1r.v v8, v9 +; RV64-NEXT: ret + %v = call @llvm.vp.gather.nxv1bf16.nxv1p0( %ptrs, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.gather.nxv2bf16.nxv2p0(, , i32) + +define @vpgather_nxv2bf16( %ptrs, %m, i32 zeroext %evl) { +; RV32-LABEL: vpgather_nxv2bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t +; RV32-NEXT: vmv1r.v v8, v9 +; RV32-NEXT: ret +; +; RV64-LABEL: vpgather_nxv2bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t +; RV64-NEXT: vmv1r.v v8, v10 +; RV64-NEXT: ret + %v = call @llvm.vp.gather.nxv2bf16.nxv2p0( %ptrs, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.gather.nxv4bf16.nxv4p0(, , i32) + +define @vpgather_nxv4bf16( %ptrs, %m, i32 zeroext %evl) { +; RV32-LABEL: vpgather_nxv4bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t +; RV32-NEXT: vmv.v.v v8, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: vpgather_nxv4bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; RV64-NEXT: vluxei64.v v12, (zero), v8, v0.t +; RV64-NEXT: vmv.v.v v8, v12 +; RV64-NEXT: ret + %v = call @llvm.vp.gather.nxv4bf16.nxv4p0( %ptrs, %m, i32 %evl) + ret %v +} + +define @vpgather_truemask_nxv4bf16( %ptrs, i32 zeroext %evl) { +; RV32-LABEL: vpgather_truemask_nxv4bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; RV32-NEXT: vluxei32.v v10, (zero), v8 +; RV32-NEXT: vmv.v.v v8, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: vpgather_truemask_nxv4bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; RV64-NEXT: vluxei64.v v12, (zero), v8 +; RV64-NEXT: vmv.v.v v8, v12 +; RV64-NEXT: ret + %v = call @llvm.vp.gather.nxv4bf16.nxv4p0( %ptrs, splat (i1 1), i32 %evl) + ret %v +} + +declare @llvm.vp.gather.nxv8bf16.nxv8p0(, , i32) + +define @vpgather_nxv8bf16( %ptrs, %m, i32 zeroext %evl) { +; RV32-LABEL: vpgather_nxv8bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; RV32-NEXT: vluxei32.v v12, (zero), v8, v0.t +; RV32-NEXT: vmv.v.v v8, v12 +; RV32-NEXT: ret +; +; RV64-LABEL: vpgather_nxv8bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; RV64-NEXT: vluxei64.v v16, (zero), v8, v0.t +; RV64-NEXT: vmv.v.v v8, v16 +; RV64-NEXT: ret + %v = call @llvm.vp.gather.nxv8bf16.nxv8p0( %ptrs, %m, i32 %evl) + ret %v +} + +define @vpgather_baseidx_nxv8i8_nxv8bf16(ptr %base, %idxs, %m, i32 zeroext %evl) { +; RV32-LABEL: vpgather_baseidx_nxv8i8_nxv8bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vsext.vf4 v12, v8 +; RV32-NEXT: vadd.vv v12, v12, v12 +; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vpgather_baseidx_nxv8i8_nxv8bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV64-NEXT: vsext.vf8 v16, v8 +; RV64-NEXT: vadd.vv v16, v16, v16 +; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: ret + %ptrs = getelementptr inbounds bfloat, ptr %base, %idxs + %v = call @llvm.vp.gather.nxv8bf16.nxv8p0( %ptrs, %m, i32 %evl) + ret %v +} + +define @vpgather_baseidx_sext_nxv8i8_nxv8bf16(ptr %base, %idxs, %m, i32 zeroext %evl) { +; RV32-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vsext.vf4 v12, v8 +; RV32-NEXT: vadd.vv v12, v12, v12 +; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV64-NEXT: vsext.vf8 v16, v8 +; RV64-NEXT: vadd.vv v16, v16, v16 +; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: ret + %eidxs = sext %idxs to + %ptrs = getelementptr inbounds bfloat, ptr %base, %eidxs + %v = call @llvm.vp.gather.nxv8bf16.nxv8p0( %ptrs, %m, i32 %evl) + ret %v +} + +define @vpgather_baseidx_zext_nxv8i8_nxv8bf16(ptr %base, %idxs, %m, i32 zeroext %evl) { +; RV32-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a2, zero, e8, m1, ta, ma +; RV32-NEXT: vwaddu.vv v10, v8, v8 +; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV32-NEXT: vluxei16.v v8, (a0), v10, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a2, zero, e8, m1, ta, ma +; RV64-NEXT: vwaddu.vv v10, v8, v8 +; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV64-NEXT: vluxei16.v v8, (a0), v10, v0.t +; RV64-NEXT: ret + %eidxs = zext %idxs to + %ptrs = getelementptr inbounds bfloat, ptr %base, %eidxs + %v = call @llvm.vp.gather.nxv8bf16.nxv8p0( %ptrs, %m, i32 %evl) + ret %v +} + +define @vpgather_baseidx_nxv8bf16(ptr %base, %idxs, %m, i32 zeroext %evl) { +; RV32-LABEL: vpgather_baseidx_nxv8bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; RV32-NEXT: vwadd.vv v12, v8, v8 +; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vpgather_baseidx_nxv8bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV64-NEXT: vsext.vf4 v16, v8 +; RV64-NEXT: vadd.vv v16, v16, v16 +; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: ret + %ptrs = getelementptr inbounds bfloat, ptr %base, %idxs + %v = call @llvm.vp.gather.nxv8bf16.nxv8p0( %ptrs, %m, i32 %evl) + ret %v +} + declare @llvm.vp.gather.nxv1f16.nxv1p0(, , i32) define @vpgather_nxv1f16( %ptrs, %m, i32 zeroext %evl) { @@ -2275,10 +2468,10 @@ define @vpgather_nxv16f64( %ptrs, @vpgather_nxv16f64( %ptrs, @vpgather_baseidx_nxv16i16_nxv16f64(ptr %base, @vpgather_baseidx_nxv16i16_nxv16f64(ptr %base, @vpgather_baseidx_sext_nxv16i16_nxv16f64(ptr %base ; RV32-NEXT: and a3, a4, a3 ; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v16, (a0), v28, v0.t -; RV32-NEXT: bltu a1, a2, .LBB104_2 +; RV32-NEXT: bltu a1, a2, .LBB113_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: mv a1, a2 -; RV32-NEXT: .LBB104_2: +; RV32-NEXT: .LBB113_2: ; RV32-NEXT: vmv1r.v v0, v12 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v24, v0.t @@ -2413,10 +2606,10 @@ define @vpgather_baseidx_sext_nxv16i16_nxv16f64(ptr %base ; RV64-NEXT: vsetvli a3, zero, e64, m8, ta, ma ; RV64-NEXT: vsext.vf4 v24, v8 ; RV64-NEXT: vsll.vi v24, v24, 3 -; RV64-NEXT: bltu a1, a2, .LBB104_2 +; RV64-NEXT: bltu a1, a2, .LBB113_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: mv a1, a2 -; RV64-NEXT: .LBB104_2: +; RV64-NEXT: .LBB113_2: ; RV64-NEXT: vmv1r.v v0, v12 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vluxei64.v v8, (a0), v24, v0.t @@ -2444,10 +2637,10 @@ define @vpgather_baseidx_zext_nxv16i16_nxv16f64(ptr %base ; RV32-NEXT: and a3, a4, a3 ; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v16, (a0), v28, v0.t -; RV32-NEXT: bltu a1, a2, .LBB105_2 +; RV32-NEXT: bltu a1, a2, .LBB114_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: mv a1, a2 -; RV32-NEXT: .LBB105_2: +; RV32-NEXT: .LBB114_2: ; RV32-NEXT: vmv1r.v v0, v12 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v24, v0.t @@ -2469,10 +2662,10 @@ define @vpgather_baseidx_zext_nxv16i16_nxv16f64(ptr %base ; RV64-NEXT: and a3, a4, a3 ; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; RV64-NEXT: vluxei32.v v16, (a0), v28, v0.t -; RV64-NEXT: bltu a1, a2, .LBB105_2 +; RV64-NEXT: bltu a1, a2, .LBB114_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: mv a1, a2 -; RV64-NEXT: .LBB105_2: +; RV64-NEXT: .LBB114_2: ; RV64-NEXT: vmv1r.v v0, v12 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vluxei32.v v8, (a0), v24, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/vpload.ll b/llvm/test/CodeGen/RISCV/rvv/vpload.ll index d4f117fad37ee..0a98b672fb19c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vpload.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpload.ll @@ -1,7 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zvfbfmin,+v \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zvfbfmin,+v \ +; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+zvfbfmin,+v \ +; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+zvfbfmin,+v \ ; RUN: -verify-machineinstrs < %s | FileCheck %s declare @llvm.vp.load.nxv1i8.p0(ptr, , i32) @@ -269,6 +273,64 @@ define @vpload_nxv8i64(ptr %ptr, %m, i32 ze ret %load } +declare @llvm.vp.load.nxv1bf16.p0(ptr, , i32) + +define @vpload_nxv1bf16(ptr %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpload_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vle16.v v8, (a0), v0.t +; CHECK-NEXT: ret + %load = call @llvm.vp.load.nxv1bf16.p0(ptr %ptr, %m, i32 %evl) + ret %load +} + +declare @llvm.vp.load.nxv2bf16.p0(ptr, , i32) + +define @vpload_nxv2bf16(ptr %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpload_nxv2bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vle16.v v8, (a0), v0.t +; CHECK-NEXT: ret + %load = call @llvm.vp.load.nxv2bf16.p0(ptr %ptr, %m, i32 %evl) + ret %load +} + +define @vpload_nxv2bf16_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vpload_nxv2bf16_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: ret + %load = call @llvm.vp.load.nxv2bf16.p0(ptr %ptr, splat (i1 true), i32 %evl) + ret %load +} + +declare @llvm.vp.load.nxv4bf16.p0(ptr, , i32) + +define @vpload_nxv4bf16(ptr %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpload_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0), v0.t +; CHECK-NEXT: ret + %load = call @llvm.vp.load.nxv4bf16.p0(ptr %ptr, %m, i32 %evl) + ret %load +} + +declare @llvm.vp.load.nxv8bf16.p0(ptr, , i32) + +define @vpload_nxv8bf16(ptr %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpload_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT: vle16.v v8, (a0), v0.t +; CHECK-NEXT: ret + %load = call @llvm.vp.load.nxv8bf16.p0(ptr %ptr, %m, i32 %evl) + ret %load +} + declare @llvm.vp.load.nxv1f16.p0(ptr, , i32) define @vpload_nxv1f16(ptr %ptr, %m, i32 zeroext %evl) { @@ -461,10 +523,10 @@ define @vpload_nxv16f64(ptr %ptr, %m, ; CHECK-NEXT: add a4, a0, a4 ; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v16, (a4), v0.t -; CHECK-NEXT: bltu a1, a2, .LBB38_2 +; CHECK-NEXT: bltu a1, a2, .LBB43_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a1, a2 -; CHECK-NEXT: .LBB38_2: +; CHECK-NEXT: .LBB43_2: ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v8, (a0), v0.t @@ -491,10 +553,10 @@ define @vpload_nxv17f64(ptr %ptr, ptr %out, @vpload_nxv17f64(ptr %ptr, ptr %out, @vpload_nxv17f64(ptr %ptr, ptr %out, , , , i32) @@ -1106,6 +1110,185 @@ define void @vpscatter_baseidx_nxv8i64( %val, ptr %base, , , , i32) + +define void @vpscatter_nxv1bf16( %val, %ptrs, %m, i32 zeroext %evl) { +; RV32-LABEL: vpscatter_nxv1bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vpscatter_nxv1bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t +; RV64-NEXT: ret + call void @llvm.vp.scatter.nxv1bf16.nxv1p0( %val, %ptrs, %m, i32 %evl) + ret void +} + +declare void @llvm.vp.scatter.nxv2bf16.nxv2p0(, , , i32) + +define void @vpscatter_nxv2bf16( %val, %ptrs, %m, i32 zeroext %evl) { +; RV32-LABEL: vpscatter_nxv2bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vpscatter_nxv2bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t +; RV64-NEXT: ret + call void @llvm.vp.scatter.nxv2bf16.nxv2p0( %val, %ptrs, %m, i32 %evl) + ret void +} + +declare void @llvm.vp.scatter.nxv4bf16.nxv4p0(, , , i32) + +define void @vpscatter_nxv4bf16( %val, %ptrs, %m, i32 zeroext %evl) { +; RV32-LABEL: vpscatter_nxv4bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vpscatter_nxv4bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t +; RV64-NEXT: ret + call void @llvm.vp.scatter.nxv4bf16.nxv4p0( %val, %ptrs, %m, i32 %evl) + ret void +} + +define void @vpscatter_truemask_nxv4bf16( %val, %ptrs, i32 zeroext %evl) { +; RV32-LABEL: vpscatter_truemask_nxv4bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; RV32-NEXT: vsoxei32.v v8, (zero), v10 +; RV32-NEXT: ret +; +; RV64-LABEL: vpscatter_truemask_nxv4bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; RV64-NEXT: vsoxei64.v v8, (zero), v12 +; RV64-NEXT: ret + call void @llvm.vp.scatter.nxv4bf16.nxv4p0( %val, %ptrs, splat (i1 1), i32 %evl) + ret void +} + +declare void @llvm.vp.scatter.nxv8bf16.nxv8p0(, , , i32) + +define void @vpscatter_nxv8bf16( %val, %ptrs, %m, i32 zeroext %evl) { +; RV32-LABEL: vpscatter_nxv8bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vpscatter_nxv8bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t +; RV64-NEXT: ret + call void @llvm.vp.scatter.nxv8bf16.nxv8p0( %val, %ptrs, %m, i32 %evl) + ret void +} + +define void @vpscatter_baseidx_nxv8i8_nxv8bf16( %val, ptr %base, %idxs, %m, i32 zeroext %evl) { +; RV32-LABEL: vpscatter_baseidx_nxv8i8_nxv8bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vsext.vf4 v12, v10 +; RV32-NEXT: vadd.vv v12, v12, v12 +; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vpscatter_baseidx_nxv8i8_nxv8bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV64-NEXT: vsext.vf8 v16, v10 +; RV64-NEXT: vadd.vv v16, v16, v16 +; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: ret + %ptrs = getelementptr inbounds bfloat, ptr %base, %idxs + call void @llvm.vp.scatter.nxv8bf16.nxv8p0( %val, %ptrs, %m, i32 %evl) + ret void +} + +define void @vpscatter_baseidx_sext_nxv8i8_nxv8bf16( %val, ptr %base, %idxs, %m, i32 zeroext %evl) { +; RV32-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vsext.vf4 v12, v10 +; RV32-NEXT: vadd.vv v12, v12, v12 +; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV64-NEXT: vsext.vf8 v16, v10 +; RV64-NEXT: vadd.vv v16, v16, v16 +; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: ret + %eidxs = sext %idxs to + %ptrs = getelementptr inbounds bfloat, ptr %base, %eidxs + call void @llvm.vp.scatter.nxv8bf16.nxv8p0( %val, %ptrs, %m, i32 %evl) + ret void +} + +define void @vpscatter_baseidx_zext_nxv8i8_nxv8bf16( %val, ptr %base, %idxs, %m, i32 zeroext %evl) { +; RV32-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a2, zero, e8, m1, ta, ma +; RV32-NEXT: vwaddu.vv v12, v10, v10 +; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV32-NEXT: vsoxei16.v v8, (a0), v12, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a2, zero, e8, m1, ta, ma +; RV64-NEXT: vwaddu.vv v12, v10, v10 +; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV64-NEXT: vsoxei16.v v8, (a0), v12, v0.t +; RV64-NEXT: ret + %eidxs = zext %idxs to + %ptrs = getelementptr inbounds bfloat, ptr %base, %eidxs + call void @llvm.vp.scatter.nxv8bf16.nxv8p0( %val, %ptrs, %m, i32 %evl) + ret void +} + +define void @vpscatter_baseidx_nxv8bf16( %val, ptr %base, %idxs, %m, i32 zeroext %evl) { +; RV32-LABEL: vpscatter_baseidx_nxv8bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; RV32-NEXT: vwadd.vv v12, v10, v10 +; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vpscatter_baseidx_nxv8bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV64-NEXT: vsext.vf4 v16, v10 +; RV64-NEXT: vadd.vv v16, v16, v16 +; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: ret + %ptrs = getelementptr inbounds bfloat, ptr %base, %idxs + call void @llvm.vp.scatter.nxv8bf16.nxv8p0( %val, %ptrs, %m, i32 %evl) + ret void +} + declare void @llvm.vp.scatter.nxv1f16.nxv1p0(, , , i32) define void @vpscatter_nxv1f16( %val, %ptrs, %m, i32 zeroext %evl) { @@ -2115,10 +2298,10 @@ define void @vpscatter_nxv16f64( %val, ; RV32-NEXT: vl8re32.v v24, (a0) ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: mv a2, a1 -; RV32-NEXT: bltu a1, a0, .LBB99_2 +; RV32-NEXT: bltu a1, a0, .LBB108_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: mv a2, a0 -; RV32-NEXT: .LBB99_2: +; RV32-NEXT: .LBB108_2: ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v8, (zero), v24, v0.t ; RV32-NEXT: sub a2, a1, a0 @@ -2148,10 +2331,10 @@ define void @vpscatter_nxv16f64( %val, ; RV64-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill ; RV64-NEXT: vl8re64.v v24, (a0) ; RV64-NEXT: mv a0, a2 -; RV64-NEXT: bltu a2, a1, .LBB99_2 +; RV64-NEXT: bltu a2, a1, .LBB108_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: mv a0, a1 -; RV64-NEXT: .LBB99_2: +; RV64-NEXT: .LBB108_2: ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vsoxei64.v v8, (zero), v24, v0.t ; RV64-NEXT: sub a0, a2, a1 @@ -2183,10 +2366,10 @@ define void @vpscatter_baseidx_nxv16i16_nxv16f64( %val, pt ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: vsll.vi v24, v24, 3 ; RV32-NEXT: mv a3, a2 -; RV32-NEXT: bltu a2, a1, .LBB100_2 +; RV32-NEXT: bltu a2, a1, .LBB109_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: mv a3, a1 -; RV32-NEXT: .LBB100_2: +; RV32-NEXT: .LBB109_2: ; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t ; RV32-NEXT: sub a3, a2, a1 @@ -2223,10 +2406,10 @@ define void @vpscatter_baseidx_nxv16i16_nxv16f64( %val, pt ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: vsll.vi v24, v16, 3 ; RV64-NEXT: mv a3, a2 -; RV64-NEXT: bltu a2, a1, .LBB100_2 +; RV64-NEXT: bltu a2, a1, .LBB109_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: mv a3, a1 -; RV64-NEXT: .LBB100_2: +; RV64-NEXT: .LBB109_2: ; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; RV64-NEXT: vsoxei64.v v8, (a0), v24, v0.t ; RV64-NEXT: sub a3, a2, a1 @@ -2264,10 +2447,10 @@ define void @vpscatter_baseidx_sext_nxv16i16_nxv16f64( %va ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: vsll.vi v24, v24, 3 ; RV32-NEXT: mv a3, a2 -; RV32-NEXT: bltu a2, a1, .LBB101_2 +; RV32-NEXT: bltu a2, a1, .LBB110_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: mv a3, a1 -; RV32-NEXT: .LBB101_2: +; RV32-NEXT: .LBB110_2: ; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t ; RV32-NEXT: sub a3, a2, a1 @@ -2304,10 +2487,10 @@ define void @vpscatter_baseidx_sext_nxv16i16_nxv16f64( %va ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: vsll.vi v24, v0, 3 ; RV64-NEXT: mv a3, a2 -; RV64-NEXT: bltu a2, a1, .LBB101_2 +; RV64-NEXT: bltu a2, a1, .LBB110_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: mv a3, a1 -; RV64-NEXT: .LBB101_2: +; RV64-NEXT: .LBB110_2: ; RV64-NEXT: addi a4, sp, 16 ; RV64-NEXT: vl1r.v v0, (a4) # Unknown-size Folded Reload ; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma @@ -2346,10 +2529,10 @@ define void @vpscatter_baseidx_zext_nxv16i16_nxv16f64( %va ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: vsll.vi v24, v24, 3 ; RV32-NEXT: mv a3, a2 -; RV32-NEXT: bltu a2, a1, .LBB102_2 +; RV32-NEXT: bltu a2, a1, .LBB111_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: mv a3, a1 -; RV32-NEXT: .LBB102_2: +; RV32-NEXT: .LBB111_2: ; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t ; RV32-NEXT: sub a3, a2, a1 @@ -2371,10 +2554,10 @@ define void @vpscatter_baseidx_zext_nxv16i16_nxv16f64( %va ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: vsll.vi v24, v24, 3 ; RV64-NEXT: mv a3, a2 -; RV64-NEXT: bltu a2, a1, .LBB102_2 +; RV64-NEXT: bltu a2, a1, .LBB111_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: mv a3, a1 -; RV64-NEXT: .LBB102_2: +; RV64-NEXT: .LBB111_2: ; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; RV64-NEXT: vsoxei32.v v8, (a0), v24, v0.t ; RV64-NEXT: sub a3, a2, a1 diff --git a/llvm/test/CodeGen/RISCV/rvv/vpstore.ll b/llvm/test/CodeGen/RISCV/rvv/vpstore.ll index 015d7645aaa29..d935e52149d20 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vpstore.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpstore.ll @@ -1,7 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zvfbfmin,+v \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zvfbfmin,+v \ +; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+zvfbfmin,+v \ +; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+zvfbfmin,+v \ ; RUN: -verify-machineinstrs < %s | FileCheck %s declare void @llvm.vp.store.nxv1i8.p0(, ptr, , i32) @@ -208,6 +212,54 @@ define void @vpstore_nxv8i64( %val, ptr %ptr, , ptr, , i32) + +define void @vpstore_nxv1bf16( %val, ptr %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpstore_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vse16.v v8, (a0), v0.t +; CHECK-NEXT: ret + call void @llvm.vp.store.nxv1bf16.p0( %val, ptr %ptr, %m, i32 %evl) + ret void +} + +declare void @llvm.vp.store.nxv2bf16.p0(, ptr, , i32) + +define void @vpstore_nxv2bf16( %val, ptr %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpstore_nxv2bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vse16.v v8, (a0), v0.t +; CHECK-NEXT: ret + call void @llvm.vp.store.nxv2bf16.p0( %val, ptr %ptr, %m, i32 %evl) + ret void +} + +declare void @llvm.vp.store.nxv4bf16.p0(, ptr, , i32) + +define void @vpstore_nxv4bf16( %val, ptr %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpstore_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vse16.v v8, (a0), v0.t +; CHECK-NEXT: ret + call void @llvm.vp.store.nxv4bf16.p0( %val, ptr %ptr, %m, i32 %evl) + ret void +} + +declare void @llvm.vp.store.nxv8bf16.p0(, ptr, , i32) + +define void @vpstore_nxv8bf16( %val, ptr %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpstore_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT: vse16.v v8, (a0), v0.t +; CHECK-NEXT: ret + call void @llvm.vp.store.nxv8bf16.p0( %val, ptr %ptr, %m, i32 %evl) + ret void +} + declare void @llvm.vp.store.nxv1f16.p0(, ptr, , i32) define void @vpstore_nxv1f16( %val, ptr %ptr, %m, i32 zeroext %evl) { @@ -369,10 +421,10 @@ define void @vpstore_nxv16f64( %val, ptr %ptr, %val, ptr %ptr, %val, ptr %ptr, @vsplat_nxv8bf16(bfloat %f) { +; NOZFMIN-LABEL: vsplat_nxv8bf16: +; NOZFMIN: # %bb.0: +; NOZFMIN-NEXT: fmv.x.w a0, fa0 +; NOZFMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; NOZFMIN-NEXT: vmv.v.x v8, a0 +; NOZFMIN-NEXT: ret +; +; ZFMIN-LABEL: vsplat_nxv8bf16: +; ZFMIN: # %bb.0: +; ZFMIN-NEXT: fmv.x.h a0, fa0 +; ZFMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; ZFMIN-NEXT: vmv.v.x v8, a0 +; ZFMIN-NEXT: ret + %head = insertelement poison, bfloat %f, i32 0 + %splat = shufflevector %head, poison, zeroinitializer + ret %splat +} + +define @vsplat_zero_nxv8bf16() { +; CHECK-LABEL: vsplat_zero_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: ret + ret splat (bfloat zeroinitializer) +} define @vsplat_nxv8f16(half %f) { ; ZVFH-LABEL: vsplat_nxv8f16: @@ -25,10 +43,17 @@ define @vsplat_nxv8f16(half %f) { ; ; ZVFHMIN-LABEL: vsplat_nxv8f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: fmv.x.h a0, fa0 +; ZVFHMIN-NEXT: fmv.x.w a0, fa0 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v8, a0 ; ZVFHMIN-NEXT: ret +; +; ZFMIN-LABEL: vsplat_nxv8f16: +; ZFMIN: # %bb.0: +; ZFMIN-NEXT: fmv.x.h a0, fa0 +; ZFMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; ZFMIN-NEXT: vmv.v.x v8, a0 +; ZFMIN-NEXT: ret %head = insertelement poison, half %f, i32 0 %splat = shufflevector %head, poison, zeroinitializer ret %splat @@ -83,20 +108,26 @@ define @vsplat_zero_nxv8f64() { ret splat (double zeroinitializer) } -; Test that we fold this to a vlse with 0 stride. define @vsplat_load_nxv8f32(ptr %ptr) { -; OPTIMIZED-LABEL: vsplat_load_nxv8f32: -; OPTIMIZED: # %bb.0: -; OPTIMIZED-NEXT: vsetvli a1, zero, e32, m4, ta, ma -; OPTIMIZED-NEXT: vlse32.v v8, (a0), zero -; OPTIMIZED-NEXT: ret -; -; NOT-OPTIMIZED-LABEL: vsplat_load_nxv8f32: -; NOT-OPTIMIZED: # %bb.0: -; NOT-OPTIMIZED-NEXT: flw fa5, 0(a0) -; NOT-OPTIMIZED-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; NOT-OPTIMIZED-NEXT: vfmv.v.f v8, fa5 -; NOT-OPTIMIZED-NEXT: ret +; CHECK-LABEL: vsplat_load_nxv8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: flw fa5, 0(a0) +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; CHECK-NEXT: vfmv.v.f v8, fa5 +; CHECK-NEXT: ret + %f = load float, ptr %ptr + %head = insertelement poison, float %f, i32 0 + %splat = shufflevector %head, poison, zeroinitializer + ret %splat +} + +; Test that we fold this to a vlse with 0 stride. +define @vsplat_load_nxv8f32_optimized(ptr %ptr) "target-features"="+optimized-zero-stride-load" { +; CHECK-LABEL: vsplat_load_nxv8f32_optimized: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; CHECK-NEXT: vlse32.v v8, (a0), zero +; CHECK-NEXT: ret %f = load float, ptr %ptr %head = insertelement poison, float %f, i32 0 %splat = shufflevector %head, poison, zeroinitializer diff --git a/llvm/test/CodeGen/SPIRV/AtomicCompareExchange.ll b/llvm/test/CodeGen/SPIRV/AtomicCompareExchange.ll index 323afec7f35f8..f8207c56a5656 100644 --- a/llvm/test/CodeGen/SPIRV/AtomicCompareExchange.ll +++ b/llvm/test/CodeGen/SPIRV/AtomicCompareExchange.ll @@ -1,7 +1,7 @@ ; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV ; CHECK-SPIRV: %[[#Int:]] = OpTypeInt 32 0 -; CHECK-SPIRV-DAG: %[[#MemScope_Device:]] = OpConstant %[[#Int]] 1 +; CHECK-SPIRV-DAG: %[[#MemScope_CrossDevice:]] = OpConstant %[[#Int]] 0 ; CHECK-SPIRV-DAG: %[[#MemSemEqual_SeqCst:]] = OpConstant %[[#Int]] 16 ; CHECK-SPIRV-DAG: %[[#MemSemUnequal_Acquire:]] = OpConstant %[[#Int]] 2 ; CHECK-SPIRV-DAG: %[[#Constant_456:]] = OpConstant %[[#Int]] 456 @@ -11,7 +11,7 @@ ; CHECK-SPIRV-DAG: %[[#UndefStruct:]] = OpUndef %[[#Struct]] ; CHECK-SPIRV: %[[#Value:]] = OpLoad %[[#Int]] %[[#Value_ptr:]] -; CHECK-SPIRV: %[[#Res:]] = OpAtomicCompareExchange %[[#Int]] %[[#Pointer:]] %[[#MemScope_Device]] +; CHECK-SPIRV: %[[#Res:]] = OpAtomicCompareExchange %[[#Int]] %[[#Pointer:]] %[[#MemScope_CrossDevice]] ; CHECK-SPIRV-SAME: %[[#MemSemEqual_SeqCst]] %[[#MemSemUnequal_Acquire]] %[[#Value]] %[[#Comparator:]] ; CHECK-SPIRV: %[[#Success:]] = OpIEqual %[[#]] %[[#Res]] %[[#Comparator]] ; CHECK-SPIRV: %[[#Composite_0:]] = OpCompositeInsert %[[#Struct]] %[[#Res]] %[[#UndefStruct]] 0 @@ -34,7 +34,7 @@ cmpxchg.continue: ; preds = %cmpxchg.store_expec ret void } -; CHECK-SPIRV: %[[#Res_1:]] = OpAtomicCompareExchange %[[#Int]] %[[#Ptr:]] %[[#MemScope_Device]] +; CHECK-SPIRV: %[[#Res_1:]] = OpAtomicCompareExchange %[[#Int]] %[[#Ptr:]] %[[#MemScope_CrossDevice]] ; CHECK-SPIRV-SAME: %[[#MemSemEqual_SeqCst]] %[[#MemSemUnequal_Acquire]] %[[#Constant_456]] %[[#Constant_128]] ; CHECK-SPIRV: %[[#Success_1:]] = OpIEqual %[[#]] %[[#Res_1]] %[[#Constant_128]] ; CHECK-SPIRV: %[[#Composite:]] = OpCompositeInsert %[[#Struct]] %[[#Res_1]] %[[#UndefStruct]] 0 diff --git a/llvm/test/CodeGen/SPIRV/atomicrmw.ll b/llvm/test/CodeGen/SPIRV/atomicrmw.ll index 5f95a974ba671..07576056117cb 100644 --- a/llvm/test/CodeGen/SPIRV/atomicrmw.ll +++ b/llvm/test/CodeGen/SPIRV/atomicrmw.ll @@ -5,8 +5,7 @@ ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv32-unknown-unknown %s -o - -filetype=obj | spirv-val %} ; CHECK: %[[#Int:]] = OpTypeInt 32 0 -; CHECK-DAG: %[[#Scope_Device:]] = OpConstant %[[#Int]] 1{{$}} -; CHECK-DAG: %[[#MemSem_Relaxed:]] = OpConstant %[[#Int]] 0 +; CHECK-DAG: %[[#Scope_CrossDevice:]] = OpConstant %[[#Int]] 0{{$}} ; CHECK-DAG: %[[#MemSem_Acquire:]] = OpConstant %[[#Int]] 2 ; CHECK-DAG: %[[#MemSem_Release:]] = OpConstant %[[#Int]] 4{{$}} ; CHECK-DAG: %[[#MemSem_AcquireRelease:]] = OpConstant %[[#Int]] 8 @@ -25,37 +24,37 @@ define dso_local spir_func void @test_atomicrmw() local_unnamed_addr { entry: %0 = atomicrmw xchg i32 addrspace(1)* @ui, i32 42 acq_rel -; CHECK: %[[#]] = OpAtomicExchange %[[#Int]] %[[#Pointer]] %[[#Scope_Device]] %[[#MemSem_AcquireRelease]] %[[#Value]] +; CHECK: %[[#]] = OpAtomicExchange %[[#Int]] %[[#Pointer]] %[[#Scope_CrossDevice]] %[[#MemSem_AcquireRelease]] %[[#Value]] %1 = atomicrmw xchg float addrspace(1)* @f, float 42.000000e+00 seq_cst -; CHECK: %[[#]] = OpAtomicExchange %[[#Float]] %[[#FPPointer]] %[[#Scope_Device]] %[[#MemSem_SequentiallyConsistent]] %[[#FPValue]] +; CHECK: %[[#]] = OpAtomicExchange %[[#Float]] %[[#FPPointer]] %[[#Scope_CrossDevice]] %[[#MemSem_SequentiallyConsistent]] %[[#FPValue]] %2 = atomicrmw add i32 addrspace(1)* @ui, i32 42 monotonic -; CHECK: %[[#]] = OpAtomicIAdd %[[#Int]] %[[#Pointer]] %[[#Scope_Device]] %[[#MemSem_Relaxed]] %[[#Value]] +; CHECK: %[[#]] = OpAtomicIAdd %[[#Int]] %[[#Pointer]] %[[#Scope_CrossDevice]] %{{.+}} %[[#Value]] %3 = atomicrmw sub i32 addrspace(1)* @ui, i32 42 acquire -; CHECK: %[[#]] = OpAtomicISub %[[#Int]] %[[#Pointer]] %[[#Scope_Device]] %[[#MemSem_Acquire]] %[[#Value]] +; CHECK: %[[#]] = OpAtomicISub %[[#Int]] %[[#Pointer]] %[[#Scope_CrossDevice]] %[[#MemSem_Acquire]] %[[#Value]] %4 = atomicrmw or i32 addrspace(1)* @ui, i32 42 release -; CHECK: %[[#]] = OpAtomicOr %[[#Int]] %[[#Pointer]] %[[#Scope_Device]] %[[#MemSem_Release]] %[[#Value]] +; CHECK: %[[#]] = OpAtomicOr %[[#Int]] %[[#Pointer]] %[[#Scope_CrossDevice]] %[[#MemSem_Release]] %[[#Value]] %5 = atomicrmw xor i32 addrspace(1)* @ui, i32 42 acq_rel -; CHECK: %[[#]] = OpAtomicXor %[[#Int]] %[[#Pointer]] %[[#Scope_Device]] %[[#MemSem_AcquireRelease]] %[[#Value]] +; CHECK: %[[#]] = OpAtomicXor %[[#Int]] %[[#Pointer]] %[[#Scope_CrossDevice]] %[[#MemSem_AcquireRelease]] %[[#Value]] %6 = atomicrmw and i32 addrspace(1)* @ui, i32 42 seq_cst -; CHECK: %[[#]] = OpAtomicAnd %[[#Int]] %[[#Pointer]] %[[#Scope_Device]] %[[#MemSem_SequentiallyConsistent]] %[[#Value]] +; CHECK: %[[#]] = OpAtomicAnd %[[#Int]] %[[#Pointer]] %[[#Scope_CrossDevice]] %[[#MemSem_SequentiallyConsistent]] %[[#Value]] %7 = atomicrmw max i32 addrspace(1)* @ui, i32 42 monotonic -; CHECK: %[[#]] = OpAtomicSMax %[[#Int]] %[[#Pointer]] %[[#Scope_Device]] %[[#MemSem_Relaxed]] %[[#Value]] +; CHECK: %[[#]] = OpAtomicSMax %[[#Int]] %[[#Pointer]] %[[#Scope_CrossDevice]] %{{.*}} %[[#Value]] %8 = atomicrmw min i32 addrspace(1)* @ui, i32 42 acquire -; CHECK: %[[#]] = OpAtomicSMin %[[#Int]] %[[#Pointer]] %[[#Scope_Device]] %[[#MemSem_Acquire]] %[[#Value]] +; CHECK: %[[#]] = OpAtomicSMin %[[#Int]] %[[#Pointer]] %[[#Scope_CrossDevice]] %[[#MemSem_Acquire]] %[[#Value]] %9 = atomicrmw umax i32 addrspace(1)* @ui, i32 42 release -; CHECK: %[[#]] = OpAtomicUMax %[[#Int]] %[[#Pointer]] %[[#Scope_Device]] %[[#MemSem_Release]] %[[#Value]] +; CHECK: %[[#]] = OpAtomicUMax %[[#Int]] %[[#Pointer]] %[[#Scope_CrossDevice]] %[[#MemSem_Release]] %[[#Value]] %10 = atomicrmw umin i32 addrspace(1)* @ui, i32 42 acq_rel -; CHECK: %[[#]] = OpAtomicUMin %[[#Int]] %[[#Pointer]] %[[#Scope_Device]] %[[#MemSem_AcquireRelease]] %[[#Value]] +; CHECK: %[[#]] = OpAtomicUMin %[[#Int]] %[[#Pointer]] %[[#Scope_CrossDevice]] %[[#MemSem_AcquireRelease]] %[[#Value]] ret void } diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_EXT_shader_atomic_float_add/atomicrmw_faddfsub_double.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_EXT_shader_atomic_float_add/atomicrmw_faddfsub_double.ll index 14035a68c81aa..c2ed2f8f62fc8 100644 --- a/llvm/test/CodeGen/SPIRV/extensions/SPV_EXT_shader_atomic_float_add/atomicrmw_faddfsub_double.ll +++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_EXT_shader_atomic_float_add/atomicrmw_faddfsub_double.ll @@ -10,13 +10,14 @@ ; CHECK-DAG: %[[TyInt32:[0-9]+]] = OpTypeInt 32 0 ; CHECK-DAG: %[[Const0:[0-9]+]] = OpConstant %[[TyFP64]] 0 ; CHECK-DAG: %[[Const42:[0-9]+]] = OpConstant %[[TyFP64]] 42 -; CHECK-DAG: %[[ScopeDevice:[0-9]+]] = OpConstant %[[TyInt32]] 1 +; CHECK-DAG: %[[ScopeAllSvmDevices:[0-9]+]] = OpConstantNull %[[TyInt32]] ; CHECK-DAG: %[[MemSeqCst:[0-9]+]] = OpConstant %[[TyInt32]] 16 +; CHECK-DAG: %[[ScopeDevice:[0-9]+]] = OpConstant %[[TyInt32]] 1 ; CHECK-DAG: %[[TyFP64Ptr:[0-9]+]] = OpTypePointer {{[a-zA-Z]+}} %[[TyFP64]] ; CHECK-DAG: %[[DblPtr:[0-9]+]] = OpVariable %[[TyFP64Ptr]] {{[a-zA-Z]+}} %[[Const0]] -; CHECK: OpAtomicFAddEXT %[[TyFP64]] %[[DblPtr]] %[[ScopeDevice]] %[[MemSeqCst]] %[[Const42]] +; CHECK: OpAtomicFAddEXT %[[TyFP64]] %[[DblPtr]] %[[ScopeAllSvmDevices]] %[[MemSeqCst]] %[[Const42]] ; CHECK: %[[Const42Neg:[0-9]+]] = OpFNegate %[[TyFP64]] %[[Const42]] -; CHECK: OpAtomicFAddEXT %[[TyFP64]] %[[DblPtr]] %[[ScopeDevice]] %[[MemSeqCst]] %[[Const42Neg]] +; CHECK: OpAtomicFAddEXT %[[TyFP64]] %[[DblPtr]] %[[ScopeAllSvmDevices]] %[[MemSeqCst]] %[[Const42Neg]] ; CHECK: OpAtomicFAddEXT %[[TyFP64]] %[[DblPtr]] %[[ScopeDevice]] %[[MemSeqCst]] %[[Const42]] target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_EXT_shader_atomic_float_add/atomicrmw_faddfsub_float.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_EXT_shader_atomic_float_add/atomicrmw_faddfsub_float.ll index d34811496e5a1..075e63ea6de61 100644 --- a/llvm/test/CodeGen/SPIRV/extensions/SPV_EXT_shader_atomic_float_add/atomicrmw_faddfsub_float.ll +++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_EXT_shader_atomic_float_add/atomicrmw_faddfsub_float.ll @@ -10,15 +10,16 @@ ; CHECK-DAG: %[[TyInt32:[0-9]+]] = OpTypeInt 32 0 ; CHECK-DAG: %[[Const0:[0-9]+]] = OpConstant %[[TyFP32]] 0 ; CHECK-DAG: %[[Const42:[0-9]+]] = OpConstant %[[TyFP32]] 42 +; CHECK-DAG: %[[ScopeAllSvmDevices:[0-9]+]] = OpConstantNull %[[TyInt32]] +; CHECK-DAG: %[[MemSeqCst:[0-9]+]] = OpConstant %[[TyInt32]] 16 ; CHECK-DAG: %[[ScopeDevice:[0-9]+]] = OpConstant %[[TyInt32]] 1 ; CHECK-DAG: %[[ScopeWorkgroup:[0-9]+]] = OpConstant %[[TyInt32]] 2 -; CHECK-DAG: %[[MemSeqCst:[0-9]+]] = OpConstant %[[TyInt32]] 16 ; CHECK-DAG: %[[WorkgroupMemory:[0-9]+]] = OpConstant %[[TyInt32]] 512 ; CHECK-DAG: %[[TyFP32Ptr:[0-9]+]] = OpTypePointer {{[a-zA-Z]+}} %[[TyFP32]] ; CHECK-DAG: %[[DblPtr:[0-9]+]] = OpVariable %[[TyFP32Ptr]] {{[a-zA-Z]+}} %[[Const0]] -; CHECK: OpAtomicFAddEXT %[[TyFP32]] %[[DblPtr]] %[[ScopeDevice]] %[[MemSeqCst]] %[[Const42]] +; CHECK: OpAtomicFAddEXT %[[TyFP32]] %[[DblPtr]] %[[ScopeAllSvmDevices]] %[[MemSeqCst]] %[[Const42]] ; CHECK: %[[Const42Neg:[0-9]+]] = OpFNegate %[[TyFP32]] %[[Const42]] -; CHECK: OpAtomicFAddEXT %[[TyFP32]] %[[DblPtr]] %[[ScopeDevice]] %[[MemSeqCst]] %[[Const42Neg]] +; CHECK: OpAtomicFAddEXT %[[TyFP32]] %[[DblPtr]] %[[ScopeAllSvmDevices]] %[[MemSeqCst]] %[[Const42Neg]] ; CHECK: OpAtomicFAddEXT %[[TyFP32]] %[[DblPtr]] %[[ScopeDevice]] %[[MemSeqCst]] %[[Const42]] ; CHECK: OpAtomicFAddEXT %[[TyFP32]] %[[DblPtr]] %[[ScopeWorkgroup]] %[[WorkgroupMemory]] %[[Const42]] ; CHECK: %[[Neg42:[0-9]+]] = OpFNegate %[[TyFP32]] %[[Const42]] diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_EXT_shader_atomic_float_add/atomicrmw_faddfsub_half.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_EXT_shader_atomic_float_add/atomicrmw_faddfsub_half.ll index 7da99411ae530..2c938409846d3 100644 --- a/llvm/test/CodeGen/SPIRV/extensions/SPV_EXT_shader_atomic_float_add/atomicrmw_faddfsub_half.ll +++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_EXT_shader_atomic_float_add/atomicrmw_faddfsub_half.ll @@ -13,13 +13,14 @@ ; CHECK-DAG: %[[TyInt32:[0-9]+]] = OpTypeInt 32 0 ; CHECK-DAG: %[[Const0:[0-9]+]] = OpConstant %[[TyFP16]] 0 ; CHECK-DAG: %[[Const42:[0-9]+]] = OpConstant %[[TyFP16]] 20800 -; CHECK-DAG: %[[ScopeDevice:[0-9]+]] = OpConstant %[[TyInt32]] 1 +; CHECK-DAG: %[[ScopeAllSvmDevices:[0-9]+]] = OpConstantNull %[[TyInt32]] ; CHECK-DAG: %[[MemSeqCst:[0-9]+]] = OpConstant %[[TyInt32]] 16 +; CHECK-DAG: %[[ScopeDevice:[0-9]+]] = OpConstant %[[TyInt32]] 1 ; CHECK-DAG: %[[TyFP16Ptr:[0-9]+]] = OpTypePointer {{[a-zA-Z]+}} %[[TyFP16]] ; CHECK-DAG: %[[DblPtr:[0-9]+]] = OpVariable %[[TyFP16Ptr]] {{[a-zA-Z]+}} %[[Const0]] -; CHECK: OpAtomicFAddEXT %[[TyFP16]] %[[DblPtr]] %[[ScopeDevice]] %[[MemSeqCst]] %[[Const42]] +; CHECK: OpAtomicFAddEXT %[[TyFP16]] %[[DblPtr]] %[[ScopeAllSvmDevices]] %[[MemSeqCst]] %[[Const42]] ; CHECK: %[[Const42Neg:[0-9]+]] = OpFNegate %[[TyFP16]] %[[Const42]] -; CHECK: OpAtomicFAddEXT %[[TyFP16]] %[[DblPtr]] %[[ScopeDevice]] %[[MemSeqCst]] %[[Const42Neg]] +; CHECK: OpAtomicFAddEXT %[[TyFP16]] %[[DblPtr]] %[[ScopeAllSvmDevices]] %[[MemSeqCst]] %[[Const42Neg]] ; CHECK: OpAtomicFAddEXT %[[TyFP16]] %[[DblPtr]] %[[ScopeDevice]] %[[MemSeqCst]] %[[Const42]] target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_EXT_shader_atomic_float_min_max/atomicrmw_fminfmax_double.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_EXT_shader_atomic_float_min_max/atomicrmw_fminfmax_double.ll index a2d0a594c861d..fdc05f4eac06b 100644 --- a/llvm/test/CodeGen/SPIRV/extensions/SPV_EXT_shader_atomic_float_min_max/atomicrmw_fminfmax_double.ll +++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_EXT_shader_atomic_float_min_max/atomicrmw_fminfmax_double.ll @@ -10,12 +10,13 @@ ; CHECK-DAG: %[[TyInt32:[0-9]+]] = OpTypeInt 32 0 ; CHECK-DAG: %[[Const0:[0-9]+]] = OpConstant %[[TyFP64]] 0 ; CHECK-DAG: %[[Const42:[0-9]+]] = OpConstant %[[TyFP64]] 42 -; CHECK-DAG: %[[ScopeDevice:[0-9]+]] = OpConstant %[[TyInt32]] 1 +; CHECK-DAG: %[[ScopeAllSvmDevices:[0-9]+]] = OpConstantNull %[[TyInt32]] ; CHECK-DAG: %[[MemSeqCst:[0-9]+]] = OpConstant %[[TyInt32]] 16 +; CHECK-DAG: %[[ScopeDevice:[0-9]+]] = OpConstant %[[TyInt32]] 1 ; CHECK-DAG: %[[TyFP64Ptr:[0-9]+]] = OpTypePointer {{[a-zA-Z]+}} %[[TyFP64]] ; CHECK-DAG: %[[DblPtr:[0-9]+]] = OpVariable %[[TyFP64Ptr]] {{[a-zA-Z]+}} %[[Const0]] -; CHECK: OpAtomicFMinEXT %[[TyFP64]] %[[DblPtr]] %[[ScopeDevice]] %[[MemSeqCst]] %[[Const42]] -; CHECK: OpAtomicFMaxEXT %[[TyFP64]] %[[DblPtr]] %[[ScopeDevice]] %[[MemSeqCst]] %[[Const42]] +; CHECK: OpAtomicFMinEXT %[[TyFP64]] %[[DblPtr]] %[[ScopeAllSvmDevices]] %[[MemSeqCst]] %[[Const42]] +; CHECK: OpAtomicFMaxEXT %[[TyFP64]] %[[DblPtr]] %[[ScopeAllSvmDevices]] %[[MemSeqCst]] %[[Const42]] ; CHECK: OpAtomicFMinEXT %[[TyFP64]] %[[DblPtr]] %[[ScopeDevice]] %[[MemSeqCst]] %[[Const42]] ; CHECK: OpAtomicFMaxEXT %[[TyFP64]] %[[DblPtr]] %[[ScopeDevice]] %[[MemSeqCst]] %[[Const42]] diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_EXT_shader_atomic_float_min_max/atomicrmw_fminfmax_float.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_EXT_shader_atomic_float_min_max/atomicrmw_fminfmax_float.ll index 896b7acc1c87b..a7ff448a98b98 100644 --- a/llvm/test/CodeGen/SPIRV/extensions/SPV_EXT_shader_atomic_float_min_max/atomicrmw_fminfmax_float.ll +++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_EXT_shader_atomic_float_min_max/atomicrmw_fminfmax_float.ll @@ -10,12 +10,13 @@ ; CHECK-DAG: %[[TyInt32:[0-9]+]] = OpTypeInt 32 0 ; CHECK-DAG: %[[Const0:[0-9]+]] = OpConstant %[[TyFP32]] 0 ; CHECK-DAG: %[[Const42:[0-9]+]] = OpConstant %[[TyFP32]] 42 -; CHECK-DAG: %[[ScopeDevice:[0-9]+]] = OpConstant %[[TyInt32]] 1 +; CHECK-DAG: %[[ScopeAllSvmDevices:[0-9]+]] = OpConstantNull %[[TyInt32]] ; CHECK-DAG: %[[MemSeqCst:[0-9]+]] = OpConstant %[[TyInt32]] 16 +; CHECK-DAG: %[[ScopeDevice:[0-9]+]] = OpConstant %[[TyInt32]] 1 ; CHECK-DAG: %[[TyFP32Ptr:[0-9]+]] = OpTypePointer {{[a-zA-Z]+}} %[[TyFP32]] ; CHECK-DAG: %[[DblPtr:[0-9]+]] = OpVariable %[[TyFP32Ptr]] {{[a-zA-Z]+}} %[[Const0]] -; CHECK: OpAtomicFMinEXT %[[TyFP32]] %[[DblPtr]] %[[ScopeDevice]] %[[MemSeqCst]] %[[Const42]] -; CHECK: OpAtomicFMaxEXT %[[TyFP32]] %[[DblPtr]] %[[ScopeDevice]] %[[MemSeqCst]] %[[Const42]] +; CHECK: OpAtomicFMinEXT %[[TyFP32]] %[[DblPtr]] %[[ScopeAllSvmDevices]] %[[MemSeqCst]] %[[Const42]] +; CHECK: OpAtomicFMaxEXT %[[TyFP32]] %[[DblPtr]] %[[ScopeAllSvmDevices]] %[[MemSeqCst]] %[[Const42]] ; CHECK: OpAtomicFMinEXT %[[TyFP32]] %[[DblPtr]] %[[ScopeDevice]] %[[MemSeqCst]] %[[Const42]] ; CHECK: OpAtomicFMaxEXT %[[TyFP32]] %[[DblPtr]] %[[ScopeDevice]] %[[MemSeqCst]] %[[Const42]] diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_EXT_shader_atomic_float_min_max/atomicrmw_fminfmax_half.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_EXT_shader_atomic_float_min_max/atomicrmw_fminfmax_half.ll index b3f48711707a1..d5576d1911a8b 100644 --- a/llvm/test/CodeGen/SPIRV/extensions/SPV_EXT_shader_atomic_float_min_max/atomicrmw_fminfmax_half.ll +++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_EXT_shader_atomic_float_min_max/atomicrmw_fminfmax_half.ll @@ -10,12 +10,13 @@ ; CHECK-DAG: %[[TyInt32:[0-9]+]] = OpTypeInt 32 0 ; CHECK-DAG: %[[Const0:[0-9]+]] = OpConstant %[[TyFP16]] 0 ; CHECK-DAG: %[[ConstHalf:[0-9]+]] = OpConstant %[[TyFP16]] 20800 -; CHECK-DAG: %[[ScopeDevice:[0-9]+]] = OpConstant %[[TyInt32]] 1 +; CHECK-DAG: %[[ScopeAllSvmDevices:[0-9]+]] = OpConstantNull %[[TyInt32]] ; CHECK-DAG: %[[MemSeqCst:[0-9]+]] = OpConstant %[[TyInt32]] 16 +; CHECK-DAG: %[[ScopeDevice:[0-9]+]] = OpConstant %[[TyInt32]] 1 ; CHECK-DAG: %[[TyFP16Ptr:[0-9]+]] = OpTypePointer {{[a-zA-Z]+}} %[[TyFP16]] ; CHECK-DAG: %[[DblPtr:[0-9]+]] = OpVariable %[[TyFP16Ptr]] {{[a-zA-Z]+}} %[[Const0]] -; CHECK: OpAtomicFMinEXT %[[TyFP16]] %[[DblPtr]] %[[ScopeDevice]] %[[MemSeqCst]] %[[ConstHalf]] -; CHECK: OpAtomicFMaxEXT %[[TyFP16]] %[[DblPtr]] %[[ScopeDevice]] %[[MemSeqCst]] %[[ConstHalf]] +; CHECK: OpAtomicFMinEXT %[[TyFP16]] %[[DblPtr]] %[[ScopeAllSvmDevices]] %[[MemSeqCst]] %[[ConstHalf]] +; CHECK: OpAtomicFMaxEXT %[[TyFP16]] %[[DblPtr]] %[[ScopeAllSvmDevices]] %[[MemSeqCst]] %[[ConstHalf]] ; CHECK: OpAtomicFMinEXT %[[TyFP16]] %[[DblPtr]] %[[ScopeDevice]] %[[MemSeqCst]] %[[ConstHalf]] ; CHECK: OpAtomicFMaxEXT %[[TyFP16]] %[[DblPtr]] %[[ScopeDevice]] %[[MemSeqCst]] %[[ConstHalf]] diff --git a/llvm/test/CodeGen/SPIRV/fence.ll b/llvm/test/CodeGen/SPIRV/fence.ll index 5da58667f24f2..c7496c15f2c95 100644 --- a/llvm/test/CodeGen/SPIRV/fence.ll +++ b/llvm/test/CodeGen/SPIRV/fence.ll @@ -3,16 +3,16 @@ ; CHECK-DAG: OpName %[[#GetScope:]] "_Z8getScopev" ; CHECK-DAG: %[[#Long:]] = OpTypeInt 32 0 -; CHECK-DAG: %[[#ScopeDevice:]] = OpConstant %[[#Long]] 1 ; CHECK-DAG: %[[#WrkGrpConst2:]] = OpConstant %[[#Long]] 2 -; CHECK-DAG: %[[#Const3:]] = OpConstant %[[#Long]] 3 +; CHECK-DAG: %[[#ScopeAllSvmDevices:]] = OpConstantNull %[[#Long]] ; CHECK-DAG: %[[#InvocationConst4:]] = OpConstant %[[#Long]] 4 ; CHECK-DAG: %[[#Const8:]] = OpConstant %[[#Long]] 8 ; CHECK-DAG: %[[#Const16:]] = OpConstant %[[#Long]] 16 +; CHECK-DAG: %[[#Const3:]] = OpConstant %[[#Long]] 3 ; CHECK-DAG: %[[#Const912:]] = OpConstant %[[#Long]] 912 -; CHECK: OpMemoryBarrier %[[#ScopeDevice]] %[[#WrkGrpConst2]] -; CHECK: OpMemoryBarrier %[[#ScopeDevice]] %[[#InvocationConst4]] -; CHECK: OpMemoryBarrier %[[#ScopeDevice]] %[[#Const8]] +; CHECK: OpMemoryBarrier %[[#ScopeAllSvmDevices]] %[[#WrkGrpConst2]] +; CHECK: OpMemoryBarrier %[[#ScopeAllSvmDevices]] %[[#InvocationConst4]] +; CHECK: OpMemoryBarrier %[[#ScopeAllSvmDevices]] %[[#Const8]] ; CHECK: OpMemoryBarrier %[[#InvocationConst4]] %[[#Const16]] ; CHECK: OpMemoryBarrier %[[#WrkGrpConst2]] %[[#InvocationConst4]] ; CHECK: OpFunctionEnd diff --git a/llvm/test/CodeGen/SPIRV/instructions/atomic-ptr.ll b/llvm/test/CodeGen/SPIRV/instructions/atomic-ptr.ll index 9469d24b20af2..54d0843cbf234 100644 --- a/llvm/test/CodeGen/SPIRV/instructions/atomic-ptr.ll +++ b/llvm/test/CodeGen/SPIRV/instructions/atomic-ptr.ll @@ -9,7 +9,7 @@ ; CHECK-DAG: %[[#LongTy:]] = OpTypeInt 64 0 ; CHECK-DAG: %[[#PtrLongTy:]] = OpTypePointer CrossWorkgroup %[[#LongTy]] ; CHECK-DAG: %[[#IntTy:]] = OpTypeInt 32 0 -; CHECK-DAG: %[[#Scope:]] = OpConstant %[[#IntTy]] 1 +; CHECK-DAG: %[[#Scope:]] = OpConstantNull %[[#IntTy]] ; CHECK-DAG: %[[#MemSem:]] = OpConstant %[[#IntTy]] 8 ; CHECK-DAG: %[[#PtrPtrLongTy:]] = OpTypePointer CrossWorkgroup %[[#PtrLongTy]] diff --git a/llvm/test/CodeGen/SPIRV/instructions/atomic.ll b/llvm/test/CodeGen/SPIRV/instructions/atomic.ll index 8c5c036351d97..f4e7b128f77a3 100644 --- a/llvm/test/CodeGen/SPIRV/instructions/atomic.ll +++ b/llvm/test/CodeGen/SPIRV/instructions/atomic.ll @@ -18,16 +18,15 @@ ; CHECK-DAG: [[PtrI32Ty:%.*]] = OpTypePointer Function [[I32Ty]] ; CHECK-DAG: [[I64Ty:%.*]] = OpTypeInt 64 0 ; CHECK-DAG: [[PtrI64Ty:%.*]] = OpTypePointer Generic [[I64Ty]] -;; Device scope is encoded with constant 1 -; CHECK-DAG: [[SCOPE:%.*]] = OpConstant [[I32Ty]] 1 +; CHECK-DAG: [[CROSSDEVICESCOPE:%.*]] = OpConstantNull [[I32Ty]] +; CHECK-DAG: [[DEVICESCOPE:%.*]] = OpConstant [[I32Ty]] 1 ;; "monotonic" maps to the relaxed memory semantics, encoded with constant 0 -; CHECK-DAG: [[RELAXED:%.*]] = OpConstantNull [[I32Ty]] ; CHECK: [[ADD]] = OpFunction [[I32Ty]] ; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter [[PtrI32Ty]] ; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter [[I32Ty]] ; CHECK-NEXT: OpLabel -; CHECK-NEXT: [[R:%.*]] = OpAtomicIAdd [[I32Ty]] [[A]] [[SCOPE]] [[RELAXED]] [[B]] +; CHECK-NEXT: [[R:%.*]] = OpAtomicIAdd [[I32Ty]] [[A]] [[CROSSDEVICESCOPE]] {{.+}} [[B]] ; CHECK-NEXT: OpReturnValue [[R]] ; CHECK-NEXT: OpFunctionEnd define i32 @test_add(i32* %ptr, i32 %val) { @@ -39,7 +38,7 @@ define i32 @test_add(i32* %ptr, i32 %val) { ; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter [[PtrI32Ty]] ; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter [[I32Ty]] ; CHECK-NEXT: OpLabel -; CHECK-NEXT: [[R:%.*]] = OpAtomicISub [[I32Ty]] [[A]] [[SCOPE]] [[RELAXED]] [[B]] +; CHECK-NEXT: [[R:%.*]] = OpAtomicISub [[I32Ty]] [[A]] [[CROSSDEVICESCOPE]] {{.+}} [[B]] ; CHECK-NEXT: OpReturnValue [[R]] ; CHECK-NEXT: OpFunctionEnd define i32 @test_sub(i32* %ptr, i32 %val) { @@ -51,7 +50,7 @@ define i32 @test_sub(i32* %ptr, i32 %val) { ; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter [[PtrI32Ty]] ; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter [[I32Ty]] ; CHECK-NEXT: OpLabel -; CHECK-NEXT: [[R:%.*]] = OpAtomicSMin [[I32Ty]] [[A]] [[SCOPE]] [[RELAXED]] [[B]] +; CHECK-NEXT: [[R:%.*]] = OpAtomicSMin [[I32Ty]] [[A]] [[CROSSDEVICESCOPE]] {{.+}} [[B]] ; CHECK-NEXT: OpReturnValue [[R]] ; CHECK-NEXT: OpFunctionEnd define i32 @test_min(i32* %ptr, i32 %val) { @@ -63,7 +62,7 @@ define i32 @test_min(i32* %ptr, i32 %val) { ; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter [[PtrI32Ty]] ; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter [[I32Ty]] ; CHECK-NEXT: OpLabel -; CHECK-NEXT: [[R:%.*]] = OpAtomicSMax [[I32Ty]] [[A]] [[SCOPE]] [[RELAXED]] [[B]] +; CHECK-NEXT: [[R:%.*]] = OpAtomicSMax [[I32Ty]] [[A]] [[CROSSDEVICESCOPE]] {{.+}} [[B]] ; CHECK-NEXT: OpReturnValue [[R]] ; CHECK-NEXT: OpFunctionEnd define i32 @test_max(i32* %ptr, i32 %val) { @@ -75,7 +74,7 @@ define i32 @test_max(i32* %ptr, i32 %val) { ; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter [[PtrI32Ty]] ; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter [[I32Ty]] ; CHECK-NEXT: OpLabel -; CHECK-NEXT: [[R:%.*]] = OpAtomicUMin [[I32Ty]] [[A]] [[SCOPE]] [[RELAXED]] [[B]] +; CHECK-NEXT: [[R:%.*]] = OpAtomicUMin [[I32Ty]] [[A]] [[CROSSDEVICESCOPE]] {{.+}} [[B]] ; CHECK-NEXT: OpReturnValue [[R]] ; CHECK-NEXT: OpFunctionEnd define i32 @test_umin(i32* %ptr, i32 %val) { @@ -87,7 +86,7 @@ define i32 @test_umin(i32* %ptr, i32 %val) { ; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter [[PtrI32Ty]] ; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter [[I32Ty]] ; CHECK-NEXT: OpLabel -; CHECK-NEXT: [[R:%.*]] = OpAtomicUMax [[I32Ty]] [[A]] [[SCOPE]] [[RELAXED]] [[B]] +; CHECK-NEXT: [[R:%.*]] = OpAtomicUMax [[I32Ty]] [[A]] [[CROSSDEVICESCOPE]] {{.+}} [[B]] ; CHECK-NEXT: OpReturnValue [[R]] ; CHECK-NEXT: OpFunctionEnd define i32 @test_umax(i32* %ptr, i32 %val) { @@ -99,7 +98,7 @@ define i32 @test_umax(i32* %ptr, i32 %val) { ; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter [[PtrI32Ty]] ; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter [[I32Ty]] ; CHECK-NEXT: OpLabel -; CHECK-NEXT: [[R:%.*]] = OpAtomicAnd [[I32Ty]] [[A]] [[SCOPE]] [[RELAXED]] [[B]] +; CHECK-NEXT: [[R:%.*]] = OpAtomicAnd [[I32Ty]] [[A]] [[CROSSDEVICESCOPE]] {{.+}} [[B]] ; CHECK-NEXT: OpReturnValue [[R]] ; CHECK-NEXT: OpFunctionEnd define i32 @test_and(i32* %ptr, i32 %val) { @@ -111,7 +110,7 @@ define i32 @test_and(i32* %ptr, i32 %val) { ; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter [[PtrI32Ty]] ; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter [[I32Ty]] ; CHECK-NEXT: OpLabel -; CHECK-NEXT: [[R:%.*]] = OpAtomicOr [[I32Ty]] [[A]] [[SCOPE]] [[RELAXED]] [[B]] +; CHECK-NEXT: [[R:%.*]] = OpAtomicOr [[I32Ty]] [[A]] [[CROSSDEVICESCOPE]] {{.+}} [[B]] ; CHECK-NEXT: OpReturnValue [[R]] ; CHECK-NEXT: OpFunctionEnd define i32 @test_or(i32* %ptr, i32 %val) { @@ -123,7 +122,7 @@ define i32 @test_or(i32* %ptr, i32 %val) { ; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter [[PtrI32Ty]] ; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter [[I32Ty]] ; CHECK-NEXT: OpLabel -; CHECK-NEXT: [[R:%.*]] = OpAtomicXor [[I32Ty]] [[A]] [[SCOPE]] [[RELAXED]] [[B]] +; CHECK-NEXT: [[R:%.*]] = OpAtomicXor [[I32Ty]] [[A]] [[CROSSDEVICESCOPE]] {{.+}} [[B]] ; CHECK-NEXT: OpReturnValue [[R]] ; CHECK-NEXT: OpFunctionEnd define i32 @test_xor(i32* %ptr, i32 %val) { @@ -135,10 +134,10 @@ define i32 @test_xor(i32* %ptr, i32 %val) { ; CHECK-NEXT: [[Arg1:%.*]] = OpFunctionParameter [[PtrI64Ty]] ; CHECK-NEXT: [[Arg2:%.*]] = OpFunctionParameter [[I64Ty]] ; CHECK-NEXT: OpLabel -; CHECK-NEXT: OpAtomicSMin [[I64Ty]] [[Arg1]] [[SCOPE]] [[RELAXED]] [[Arg2]] -; CHECK-NEXT: OpAtomicSMax [[I64Ty]] [[Arg1]] [[SCOPE]] [[RELAXED]] [[Arg2]] -; CHECK-NEXT: OpAtomicUMin [[I64Ty]] [[Arg1]] [[SCOPE]] [[RELAXED]] [[Arg2]] -; CHECK-NEXT: OpAtomicUMax [[I64Ty]] [[Arg1]] [[SCOPE]] [[RELAXED]] [[Arg2]] +; CHECK-NEXT: OpAtomicSMin [[I64Ty]] [[Arg1]] [[DEVICESCOPE]] {{.+}} [[Arg2]] +; CHECK-NEXT: OpAtomicSMax [[I64Ty]] [[Arg1]] [[DEVICESCOPE]] {{.+}} [[Arg2]] +; CHECK-NEXT: OpAtomicUMin [[I64Ty]] [[Arg1]] [[DEVICESCOPE]] {{.+}} [[Arg2]] +; CHECK-NEXT: OpAtomicUMax [[I64Ty]] [[Arg1]] [[DEVICESCOPE]] {{.+}} [[Arg2]] ; CHECK-NEXT: OpReturn ; CHECK-NEXT: OpFunctionEnd define dso_local spir_kernel void @test_wrappers(ptr addrspace(4) %arg, i64 %val) { diff --git a/llvm/test/CodeGen/SPIRV/instructions/atomic_acqrel.ll b/llvm/test/CodeGen/SPIRV/instructions/atomic_acqrel.ll index 07d1a5cf662ec..4d5aca6d404de 100644 --- a/llvm/test/CodeGen/SPIRV/instructions/atomic_acqrel.ll +++ b/llvm/test/CodeGen/SPIRV/instructions/atomic_acqrel.ll @@ -13,8 +13,8 @@ ; CHECK-DAG: [[I32Ty:%.*]] = OpTypeInt 32 0 ; CHECK-DAG: [[PtrI32Ty:%.*]] = OpTypePointer Function [[I32Ty]] -;; Device scope is encoded with constant 1 -; CHECK-DAG: [[SCOPE:%.*]] = OpConstant [[I32Ty]] 1 +;; AllSvmDevices scope is encoded with constant 0 +; CHECK-DAG: [[SCOPE:%.*]] = OpConstantNull [[I32Ty]] ;; "acq_rel" maps to the constant 8 ; CHECK-DAG: [[ACQREL:%.*]] = OpConstant [[I32Ty]] 8 diff --git a/llvm/test/CodeGen/SPIRV/instructions/atomic_seq.ll b/llvm/test/CodeGen/SPIRV/instructions/atomic_seq.ll index 4078ffe1a10b8..9fd3d8e95b5f1 100644 --- a/llvm/test/CodeGen/SPIRV/instructions/atomic_seq.ll +++ b/llvm/test/CodeGen/SPIRV/instructions/atomic_seq.ll @@ -13,8 +13,8 @@ ; CHECK-DAG: [[I32Ty:%.*]] = OpTypeInt 32 0 ; CHECK-DAG: [[PtrI32Ty:%.*]] = OpTypePointer Function [[I32Ty]] -;; Device scope is encoded with constant 1 -; CHECK-DAG: [[SCOPE:%.*]] = OpConstant [[I32Ty]] 1 +;; AllSvmDevices scope is encoded with constant 0 +; CHECK-DAG: [[SCOPE:%.*]] = OpConstantNull [[I32Ty]] ;; "sequentially consistent" maps to constant 16 ; CHECK-DAG: [[SEQ:%.*]] = OpConstant [[I32Ty]] 16 diff --git a/llvm/test/CodeGen/SPIRV/scoped_atomicrmw.ll b/llvm/test/CodeGen/SPIRV/scoped_atomicrmw.ll new file mode 100644 index 0000000000000..130db18534832 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/scoped_atomicrmw.ll @@ -0,0 +1,163 @@ +; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %} + +; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv32-unknown-unknown %s -o - -filetype=obj | spirv-val %} + +; CHECK: %[[#Int:]] = OpTypeInt 32 0 +; CHECK-DAG: %[[#Float:]] = OpTypeFloat 32 +; CHECK-DAG: %[[#Scope_CrossDevice:]] = OpConstant %[[#Int]] 0 +; CHECK-DAG: %[[#Value:]] = OpConstant %[[#Int]] 42 +; CHECK-DAG: %[[#FPValue:]] = OpConstant %[[#Float]] 42 +; CHECK-DAG: %[[#Scope_Invocation:]] = OpConstant %[[#Int]] 4 +; CHECK-DAG: %[[#MemSem_SeqCst:]] = OpConstant %[[#Int]] 16 +; CHECK-DAG: %[[#Scope_Subgroup:]] = OpConstant %[[#Int]] 3 +; CHECK-DAG: %[[#Scope_Workgroup:]] = OpConstant %[[#Int]] 2 +; CHECK-DAG: %[[#Scope_Device:]] = OpConstant %[[#Int]] 1 +; CHECK-DAG: %[[#PointerType:]] = OpTypePointer CrossWorkgroup %[[#Int]] +; CHECK-DAG: %[[#FPPointerType:]] = OpTypePointer CrossWorkgroup %[[#Float]] +; CHECK-DAG: %[[#Pointer:]] = OpVariable %[[#PointerType]] CrossWorkgroup +; CHECK-DAG: %[[#FPPointer:]] = OpVariable %[[#FPPointerType]] CrossWorkgroup + +@ui = common dso_local addrspace(1) global i32 0, align 4 +@f = common dso_local local_unnamed_addr addrspace(1) global float 0.000000e+00, align 4 + +define dso_local spir_func void @test_singlethread_atomicrmw() local_unnamed_addr { +entry: + %0 = atomicrmw xchg i32 addrspace(1)* @ui, i32 42 syncscope("singlethread") seq_cst + ; CHECK: %[[#]] = OpAtomicExchange %[[#Int]] %[[#Pointer:]] %[[#Scope_Invocation:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %1 = atomicrmw xchg float addrspace(1)* @f, float 42.000000e+00 syncscope("singlethread") seq_cst + ; CHECK: %[[#]] = OpAtomicExchange %[[#Float:]] %[[#FPPointer:]] %[[#Scope_Invocation:]] %[[#MemSem_SeqCst:]] %[[#FPValue:]] + %2 = atomicrmw add i32 addrspace(1)* @ui, i32 42 syncscope("singlethread") seq_cst + ; CHECK: %[[#]] = OpAtomicIAdd %[[#Int]] %[[#Pointer:]] %[[#Scope_Invocation:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %3 = atomicrmw sub i32 addrspace(1)* @ui, i32 42 syncscope("singlethread") seq_cst + ; CHECK: %[[#]] = OpAtomicISub %[[#Int]] %[[#Pointer:]] %[[#Scope_Invocation:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %4 = atomicrmw or i32 addrspace(1)* @ui, i32 42 syncscope("singlethread") seq_cst + ; CHECK: %[[#]] = OpAtomicOr %[[#Int]] %[[#Pointer:]] %[[#Scope_Invocation:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %5 = atomicrmw xor i32 addrspace(1)* @ui, i32 42 syncscope("singlethread") seq_cst + ; CHECK: %[[#]] = OpAtomicXor %[[#Int]] %[[#Pointer:]] %[[#Scope_Invocation:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %6 = atomicrmw and i32 addrspace(1)* @ui, i32 42 syncscope("singlethread") seq_cst + ; CHECK: %[[#]] = OpAtomicAnd %[[#Int]] %[[#Pointer:]] %[[#Scope_Invocation:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %7 = atomicrmw max i32 addrspace(1)* @ui, i32 42 syncscope("singlethread") seq_cst + ; CHECK: %[[#]] = OpAtomicSMax %[[#Int]] %[[#Pointer:]] %[[#Scope_Invocation:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %8 = atomicrmw min i32 addrspace(1)* @ui, i32 42 syncscope("singlethread") seq_cst + ; CHECK: %[[#]] = OpAtomicSMin %[[#Int]] %[[#Pointer:]] %[[#Scope_Invocation:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %9 = atomicrmw umax i32 addrspace(1)* @ui, i32 42 syncscope("singlethread") seq_cst + ; CHECK: %[[#]] = OpAtomicUMax %[[#Int]] %[[#Pointer:]] %[[#Scope_Invocation:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %10 = atomicrmw umin i32 addrspace(1)* @ui, i32 42 syncscope("singlethread") seq_cst + ; CHECK: %[[#]] = OpAtomicUMin %[[#Int]] %[[#Pointer:]] %[[#Scope_Invocation:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + + ret void +} + +define dso_local spir_func void @test_subgroup_atomicrmw() local_unnamed_addr { +entry: + %0 = atomicrmw xchg i32 addrspace(1)* @ui, i32 42 syncscope("subgroup") seq_cst + ; CHECK: %[[#]] = OpAtomicExchange %[[#Int]] %[[#Pointer:]] %[[#Scope_Subgroup:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %1 = atomicrmw xchg float addrspace(1)* @f, float 42.000000e+00 syncscope("subgroup") seq_cst + ; CHECK: %[[#]] = OpAtomicExchange %[[#Float:]] %[[#FPPointer:]] %[[#Scope_Subgroup:]] %[[#MemSem_SeqCst:]] %[[#FPValue:]] + %2 = atomicrmw add i32 addrspace(1)* @ui, i32 42 syncscope("subgroup") seq_cst + ; CHECK: %[[#]] = OpAtomicIAdd %[[#Int]] %[[#Pointer:]] %[[#Scope_Subgroup:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %3 = atomicrmw sub i32 addrspace(1)* @ui, i32 42 syncscope("subgroup") seq_cst + ; CHECK: %[[#]] = OpAtomicISub %[[#Int]] %[[#Pointer:]] %[[#Scope_Subgroup:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %4 = atomicrmw or i32 addrspace(1)* @ui, i32 42 syncscope("subgroup") seq_cst + ; CHECK: %[[#]] = OpAtomicOr %[[#Int]] %[[#Pointer:]] %[[#Scope_Subgroup:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %5 = atomicrmw xor i32 addrspace(1)* @ui, i32 42 syncscope("subgroup") seq_cst + ; CHECK: %[[#]] = OpAtomicXor %[[#Int]] %[[#Pointer:]] %[[#Scope_Subgroup:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %6 = atomicrmw and i32 addrspace(1)* @ui, i32 42 syncscope("subgroup") seq_cst + ; CHECK: %[[#]] = OpAtomicAnd %[[#Int]] %[[#Pointer:]] %[[#Scope_Subgroup:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %7 = atomicrmw max i32 addrspace(1)* @ui, i32 42 syncscope("subgroup") seq_cst + ; CHECK: %[[#]] = OpAtomicSMax %[[#Int]] %[[#Pointer:]] %[[#Scope_Subgroup:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %8 = atomicrmw min i32 addrspace(1)* @ui, i32 42 syncscope("subgroup") seq_cst + ; CHECK: %[[#]] = OpAtomicSMin %[[#Int]] %[[#Pointer:]] %[[#Scope_Subgroup:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %9 = atomicrmw umax i32 addrspace(1)* @ui, i32 42 syncscope("subgroup") seq_cst + ; CHECK: %[[#]] = OpAtomicUMax %[[#Int]] %[[#Pointer:]] %[[#Scope_Subgroup:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %10 = atomicrmw umin i32 addrspace(1)* @ui, i32 42 syncscope("subgroup") seq_cst + ; CHECK: %[[#]] = OpAtomicUMin %[[#Int]] %[[#Pointer:]] %[[#Scope_Subgroup:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + + ret void +} + +define dso_local spir_func void @test_workgroup_atomicrmw() local_unnamed_addr { +entry: + %0 = atomicrmw xchg i32 addrspace(1)* @ui, i32 42 syncscope("workgroup") seq_cst + ; CHECK: %[[#]] = OpAtomicExchange %[[#Int]] %[[#Pointer:]] %[[#Scope_Workgroup:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %1 = atomicrmw xchg float addrspace(1)* @f, float 42.000000e+00 syncscope("workgroup") seq_cst + ; CHECK: %[[#]] = OpAtomicExchange %[[#Float:]] %[[#FPPointer:]] %[[#Scope_Workgroup:]] %[[#MemSem_SeqCst:]] %[[#FPValue:]] + %2 = atomicrmw add i32 addrspace(1)* @ui, i32 42 syncscope("workgroup") seq_cst + ; CHECK: %[[#]] = OpAtomicIAdd %[[#Int]] %[[#Pointer:]] %[[#Scope_Workgroup:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %3 = atomicrmw sub i32 addrspace(1)* @ui, i32 42 syncscope("workgroup") seq_cst + ; CHECK: %[[#]] = OpAtomicISub %[[#Int]] %[[#Pointer:]] %[[#Scope_Workgroup:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %4 = atomicrmw or i32 addrspace(1)* @ui, i32 42 syncscope("workgroup") seq_cst + ; CHECK: %[[#]] = OpAtomicOr %[[#Int]] %[[#Pointer:]] %[[#Scope_Workgroup:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %5 = atomicrmw xor i32 addrspace(1)* @ui, i32 42 syncscope("workgroup") seq_cst + ; CHECK: %[[#]] = OpAtomicXor %[[#Int]] %[[#Pointer:]] %[[#Scope_Workgroup:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %6 = atomicrmw and i32 addrspace(1)* @ui, i32 42 syncscope("workgroup") seq_cst + ; CHECK: %[[#]] = OpAtomicAnd %[[#Int]] %[[#Pointer:]] %[[#Scope_Workgroup:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %7 = atomicrmw max i32 addrspace(1)* @ui, i32 42 syncscope("workgroup") seq_cst + ; CHECK: %[[#]] = OpAtomicSMax %[[#Int]] %[[#Pointer:]] %[[#Scope_Workgroup:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %8 = atomicrmw min i32 addrspace(1)* @ui, i32 42 syncscope("workgroup") seq_cst + ; CHECK: %[[#]] = OpAtomicSMin %[[#Int]] %[[#Pointer:]] %[[#Scope_Workgroup:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %9 = atomicrmw umax i32 addrspace(1)* @ui, i32 42 syncscope("workgroup") seq_cst + ; CHECK: %[[#]] = OpAtomicUMax %[[#Int]] %[[#Pointer:]] %[[#Scope_Workgroup:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %10 = atomicrmw umin i32 addrspace(1)* @ui, i32 42 syncscope("workgroup") seq_cst + ; CHECK: %[[#]] = OpAtomicUMin %[[#Int]] %[[#Pointer:]] %[[#Scope_Workgroup:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + + ret void +} + +define dso_local spir_func void @test_device_atomicrmw() local_unnamed_addr { +entry: + %0 = atomicrmw xchg i32 addrspace(1)* @ui, i32 42 syncscope("device") seq_cst + ; CHECK: %[[#]] = OpAtomicExchange %[[#Int]] %[[#Pointer:]] %[[#Scope_Device:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %1 = atomicrmw xchg float addrspace(1)* @f, float 42.000000e+00 syncscope("device") seq_cst + ; CHECK: %[[#]] = OpAtomicExchange %[[#Float:]] %[[#FPPointer:]] %[[#Scope_Device:]] %[[#MemSem_SeqCst:]] %[[#FPValue:]] + %2 = atomicrmw add i32 addrspace(1)* @ui, i32 42 syncscope("device") seq_cst + ; CHECK: %[[#]] = OpAtomicIAdd %[[#Int]] %[[#Pointer:]] %[[#Scope_Device:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %3 = atomicrmw sub i32 addrspace(1)* @ui, i32 42 syncscope("device") seq_cst + ; CHECK: %[[#]] = OpAtomicISub %[[#Int]] %[[#Pointer:]] %[[#Scope_Device:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %4 = atomicrmw or i32 addrspace(1)* @ui, i32 42 syncscope("device") seq_cst + ; CHECK: %[[#]] = OpAtomicOr %[[#Int]] %[[#Pointer:]] %[[#Scope_Device:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %5 = atomicrmw xor i32 addrspace(1)* @ui, i32 42 syncscope("device") seq_cst + ; CHECK: %[[#]] = OpAtomicXor %[[#Int]] %[[#Pointer:]] %[[#Scope_Device:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %6 = atomicrmw and i32 addrspace(1)* @ui, i32 42 syncscope("device") seq_cst + ; CHECK: %[[#]] = OpAtomicAnd %[[#Int]] %[[#Pointer:]] %[[#Scope_Device:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %7 = atomicrmw max i32 addrspace(1)* @ui, i32 42 syncscope("device") seq_cst + ; CHECK: %[[#]] = OpAtomicSMax %[[#Int]] %[[#Pointer:]] %[[#Scope_Device:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %8 = atomicrmw min i32 addrspace(1)* @ui, i32 42 syncscope("device") seq_cst + ; CHECK: %[[#]] = OpAtomicSMin %[[#Int]] %[[#Pointer:]] %[[#Scope_Device:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %9 = atomicrmw umax i32 addrspace(1)* @ui, i32 42 syncscope("device") seq_cst + ; CHECK: %[[#]] = OpAtomicUMax %[[#Int]] %[[#Pointer:]] %[[#Scope_Device:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %10 = atomicrmw umin i32 addrspace(1)* @ui, i32 42 syncscope("device") seq_cst + ; CHECK: %[[#]] = OpAtomicUMin %[[#Int]] %[[#Pointer:]] %[[#Scope_Device:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + + ret void +} + +define dso_local spir_func void @test_all_svm_devices_atomicrmw() local_unnamed_addr { +entry: + %0 = atomicrmw xchg i32 addrspace(1)* @ui, i32 42 seq_cst + ; CHECK: %[[#]] = OpAtomicExchange %[[#Int]] %[[#Pointer:]] %[[#Scope_CrossDevice:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %1 = atomicrmw xchg float addrspace(1)* @f, float 42.000000e+00 seq_cst + ; CHECK: %[[#]] = OpAtomicExchange %[[#Float:]] %[[#FPPointer:]] %[[#Scope_CrossDevice:]] %[[#MemSem_SeqCst:]] %[[#FPValue:]] + %2 = atomicrmw add i32 addrspace(1)* @ui, i32 42 seq_cst + ; CHECK: %[[#]] = OpAtomicIAdd %[[#Int]] %[[#Pointer:]] %[[#Scope_CrossDevice:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %3 = atomicrmw sub i32 addrspace(1)* @ui, i32 42 seq_cst + ; CHECK: %[[#]] = OpAtomicISub %[[#Int]] %[[#Pointer:]] %[[#Scope_CrossDevice:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %4 = atomicrmw or i32 addrspace(1)* @ui, i32 42 seq_cst + ; CHECK: %[[#]] = OpAtomicOr %[[#Int]] %[[#Pointer:]] %[[#Scope_CrossDevice:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %5 = atomicrmw xor i32 addrspace(1)* @ui, i32 42 seq_cst + ; CHECK: %[[#]] = OpAtomicXor %[[#Int]] %[[#Pointer:]] %[[#Scope_CrossDevice:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %6 = atomicrmw and i32 addrspace(1)* @ui, i32 42 seq_cst + ; CHECK: %[[#]] = OpAtomicAnd %[[#Int]] %[[#Pointer:]] %[[#Scope_CrossDevice:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %7 = atomicrmw max i32 addrspace(1)* @ui, i32 42 seq_cst + ; CHECK: %[[#]] = OpAtomicSMax %[[#Int]] %[[#Pointer:]] %[[#Scope_CrossDevice:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %8 = atomicrmw min i32 addrspace(1)* @ui, i32 42 seq_cst + ; CHECK: %[[#]] = OpAtomicSMin %[[#Int]] %[[#Pointer:]] %[[#Scope_CrossDevice:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %9 = atomicrmw umax i32 addrspace(1)* @ui, i32 42 seq_cst + ; CHECK: %[[#]] = OpAtomicUMax %[[#Int]] %[[#Pointer:]] %[[#Scope_CrossDevice:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %10 = atomicrmw umin i32 addrspace(1)* @ui, i32 42 seq_cst + ; CHECK: %[[#]] = OpAtomicUMin %[[#Int]] %[[#Pointer:]] %[[#Scope_CrossDevice:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + + ret void +} diff --git a/llvm/test/CodeGen/SystemZ/DAGCombine_extract_vector_elt.ll b/llvm/test/CodeGen/SystemZ/DAGCombine_extract_vector_elt.ll new file mode 100644 index 0000000000000..d568af47dbafd --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/DAGCombine_extract_vector_elt.ll @@ -0,0 +1,20 @@ +; RUN: llc -mtriple=s390x-linux-gnu -mcpu=z16 < %s | FileCheck %s +; +; Check that DAGCombiner doesn't crash in SystemZ combineExtract() +; when handling EXTRACT_VECTOR_ELT with a vector of i1:s. + +define i32 @fun(i32 %arg) { +; CHECK-LABEL: fun: +entry: + %cc = icmp eq i32 %arg, 0 + br label %loop + +loop: + %P = phi <128 x i1> [ zeroinitializer, %entry ], [ bitcast (<2 x i64> to <128 x i1>), %loop ] + br i1 %cc, label %exit, label %loop + +exit: + %E = extractelement <128 x i1> %P, i64 0 + %Res = zext i1 %E to i32 + ret i32 %Res +} diff --git a/llvm/test/CodeGen/X86/fold-int-pow2-with-fmul-or-fdiv.ll b/llvm/test/CodeGen/X86/fold-int-pow2-with-fmul-or-fdiv.ll index 5a051a9c499e4..332fbf7188af8 100644 --- a/llvm/test/CodeGen/X86/fold-int-pow2-with-fmul-or-fdiv.ll +++ b/llvm/test/CodeGen/X86/fold-int-pow2-with-fmul-or-fdiv.ll @@ -406,13 +406,15 @@ define <8 x half> @fmul_pow2_ldexp_8xhalf(<8 x i16> %i) { ; CHECK-SSE-NEXT: subq $72, %rsp ; CHECK-SSE-NEXT: .cfi_def_cfa_offset 80 ; CHECK-SSE-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill -; CHECK-SSE-NEXT: pextrw $7, %xmm0, %edi +; CHECK-SSE-NEXT: pextrw $7, %xmm0, %eax +; CHECK-SSE-NEXT: movswl %ax, %edi ; CHECK-SSE-NEXT: movss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-SSE-NEXT: callq ldexpf@PLT ; CHECK-SSE-NEXT: callq __truncsfhf2@PLT ; CHECK-SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-SSE-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload -; CHECK-SSE-NEXT: pextrw $6, %xmm0, %edi +; CHECK-SSE-NEXT: pextrw $6, %xmm0, %eax +; CHECK-SSE-NEXT: movswl %ax, %edi ; CHECK-SSE-NEXT: movd {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-SSE-NEXT: callq ldexpf@PLT ; CHECK-SSE-NEXT: callq __truncsfhf2@PLT @@ -420,13 +422,15 @@ define <8 x half> @fmul_pow2_ldexp_8xhalf(<8 x i16> %i) { ; CHECK-SSE-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] ; CHECK-SSE-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-SSE-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload -; CHECK-SSE-NEXT: pextrw $5, %xmm0, %edi +; CHECK-SSE-NEXT: pextrw $5, %xmm0, %eax +; CHECK-SSE-NEXT: movswl %ax, %edi ; CHECK-SSE-NEXT: movss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-SSE-NEXT: callq ldexpf@PLT ; CHECK-SSE-NEXT: callq __truncsfhf2@PLT ; CHECK-SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-SSE-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload -; CHECK-SSE-NEXT: pextrw $4, %xmm0, %edi +; CHECK-SSE-NEXT: pextrw $4, %xmm0, %eax +; CHECK-SSE-NEXT: movswl %ax, %edi ; CHECK-SSE-NEXT: movd {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-SSE-NEXT: callq ldexpf@PLT ; CHECK-SSE-NEXT: callq __truncsfhf2@PLT @@ -436,13 +440,15 @@ define <8 x half> @fmul_pow2_ldexp_8xhalf(<8 x i16> %i) { ; CHECK-SSE-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] ; CHECK-SSE-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-SSE-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload -; CHECK-SSE-NEXT: pextrw $3, %xmm0, %edi +; CHECK-SSE-NEXT: pextrw $3, %xmm0, %eax +; CHECK-SSE-NEXT: movswl %ax, %edi ; CHECK-SSE-NEXT: movss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-SSE-NEXT: callq ldexpf@PLT ; CHECK-SSE-NEXT: callq __truncsfhf2@PLT ; CHECK-SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-SSE-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload -; CHECK-SSE-NEXT: pextrw $2, %xmm0, %edi +; CHECK-SSE-NEXT: pextrw $2, %xmm0, %eax +; CHECK-SSE-NEXT: movswl %ax, %edi ; CHECK-SSE-NEXT: movd {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-SSE-NEXT: callq ldexpf@PLT ; CHECK-SSE-NEXT: callq __truncsfhf2@PLT @@ -450,14 +456,15 @@ define <8 x half> @fmul_pow2_ldexp_8xhalf(<8 x i16> %i) { ; CHECK-SSE-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] ; CHECK-SSE-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-SSE-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload -; CHECK-SSE-NEXT: pextrw $1, %xmm0, %edi +; CHECK-SSE-NEXT: pextrw $1, %xmm0, %eax +; CHECK-SSE-NEXT: movswl %ax, %edi ; CHECK-SSE-NEXT: movss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-SSE-NEXT: callq ldexpf@PLT ; CHECK-SSE-NEXT: callq __truncsfhf2@PLT ; CHECK-SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-SSE-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload ; CHECK-SSE-NEXT: movd %xmm0, %eax -; CHECK-SSE-NEXT: movzwl %ax, %edi +; CHECK-SSE-NEXT: movswl %ax, %edi ; CHECK-SSE-NEXT: movd {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-SSE-NEXT: callq ldexpf@PLT ; CHECK-SSE-NEXT: callq __truncsfhf2@PLT @@ -476,13 +483,15 @@ define <8 x half> @fmul_pow2_ldexp_8xhalf(<8 x i16> %i) { ; CHECK-AVX2-NEXT: subq $72, %rsp ; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 80 ; CHECK-AVX2-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill -; CHECK-AVX2-NEXT: vpextrw $7, %xmm0, %edi +; CHECK-AVX2-NEXT: vpextrw $7, %xmm0, %eax +; CHECK-AVX2-NEXT: movswl %ax, %edi ; CHECK-AVX2-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-AVX2-NEXT: callq ldexpf@PLT ; CHECK-AVX2-NEXT: callq __truncsfhf2@PLT ; CHECK-AVX2-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-AVX2-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload -; CHECK-AVX2-NEXT: vpextrw $6, %xmm0, %edi +; CHECK-AVX2-NEXT: vpextrw $6, %xmm0, %eax +; CHECK-AVX2-NEXT: movswl %ax, %edi ; CHECK-AVX2-NEXT: vmovd {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-AVX2-NEXT: callq ldexpf@PLT ; CHECK-AVX2-NEXT: callq __truncsfhf2@PLT @@ -490,13 +499,15 @@ define <8 x half> @fmul_pow2_ldexp_8xhalf(<8 x i16> %i) { ; CHECK-AVX2-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] ; CHECK-AVX2-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-AVX2-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload -; CHECK-AVX2-NEXT: vpextrw $5, %xmm0, %edi +; CHECK-AVX2-NEXT: vpextrw $5, %xmm0, %eax +; CHECK-AVX2-NEXT: movswl %ax, %edi ; CHECK-AVX2-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-AVX2-NEXT: callq ldexpf@PLT ; CHECK-AVX2-NEXT: callq __truncsfhf2@PLT ; CHECK-AVX2-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-AVX2-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload -; CHECK-AVX2-NEXT: vpextrw $4, %xmm0, %edi +; CHECK-AVX2-NEXT: vpextrw $4, %xmm0, %eax +; CHECK-AVX2-NEXT: movswl %ax, %edi ; CHECK-AVX2-NEXT: vmovd {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-AVX2-NEXT: callq ldexpf@PLT ; CHECK-AVX2-NEXT: callq __truncsfhf2@PLT @@ -506,13 +517,15 @@ define <8 x half> @fmul_pow2_ldexp_8xhalf(<8 x i16> %i) { ; CHECK-AVX2-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] ; CHECK-AVX2-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-AVX2-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload -; CHECK-AVX2-NEXT: vpextrw $3, %xmm0, %edi +; CHECK-AVX2-NEXT: vpextrw $3, %xmm0, %eax +; CHECK-AVX2-NEXT: movswl %ax, %edi ; CHECK-AVX2-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-AVX2-NEXT: callq ldexpf@PLT ; CHECK-AVX2-NEXT: callq __truncsfhf2@PLT ; CHECK-AVX2-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-AVX2-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload -; CHECK-AVX2-NEXT: vpextrw $2, %xmm0, %edi +; CHECK-AVX2-NEXT: vpextrw $2, %xmm0, %eax +; CHECK-AVX2-NEXT: movswl %ax, %edi ; CHECK-AVX2-NEXT: vmovd {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-AVX2-NEXT: callq ldexpf@PLT ; CHECK-AVX2-NEXT: callq __truncsfhf2@PLT @@ -520,14 +533,15 @@ define <8 x half> @fmul_pow2_ldexp_8xhalf(<8 x i16> %i) { ; CHECK-AVX2-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] ; CHECK-AVX2-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-AVX2-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload -; CHECK-AVX2-NEXT: vpextrw $1, %xmm0, %edi +; CHECK-AVX2-NEXT: vpextrw $1, %xmm0, %eax +; CHECK-AVX2-NEXT: movswl %ax, %edi ; CHECK-AVX2-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-AVX2-NEXT: callq ldexpf@PLT ; CHECK-AVX2-NEXT: callq __truncsfhf2@PLT ; CHECK-AVX2-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-AVX2-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload ; CHECK-AVX2-NEXT: vmovd %xmm0, %eax -; CHECK-AVX2-NEXT: movzwl %ax, %edi +; CHECK-AVX2-NEXT: movswl %ax, %edi ; CHECK-AVX2-NEXT: vmovd {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-AVX2-NEXT: callq ldexpf@PLT ; CHECK-AVX2-NEXT: callq __truncsfhf2@PLT @@ -546,7 +560,8 @@ define <8 x half> @fmul_pow2_ldexp_8xhalf(<8 x i16> %i) { ; CHECK-AVX512F-NEXT: subq $72, %rsp ; CHECK-AVX512F-NEXT: .cfi_def_cfa_offset 80 ; CHECK-AVX512F-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill -; CHECK-AVX512F-NEXT: vpextrw $7, %xmm0, %edi +; CHECK-AVX512F-NEXT: vpextrw $7, %xmm0, %eax +; CHECK-AVX512F-NEXT: movswl %ax, %edi ; CHECK-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-AVX512F-NEXT: callq ldexpf@PLT ; CHECK-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0 @@ -554,7 +569,8 @@ define <8 x half> @fmul_pow2_ldexp_8xhalf(<8 x i16> %i) { ; CHECK-AVX512F-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 ; CHECK-AVX512F-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-AVX512F-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload -; CHECK-AVX512F-NEXT: vpextrw $6, %xmm0, %edi +; CHECK-AVX512F-NEXT: vpextrw $6, %xmm0, %eax +; CHECK-AVX512F-NEXT: movswl %ax, %edi ; CHECK-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-AVX512F-NEXT: callq ldexpf@PLT ; CHECK-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0 @@ -564,7 +580,8 @@ define <8 x half> @fmul_pow2_ldexp_8xhalf(<8 x i16> %i) { ; CHECK-AVX512F-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] ; CHECK-AVX512F-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-AVX512F-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload -; CHECK-AVX512F-NEXT: vpextrw $5, %xmm0, %edi +; CHECK-AVX512F-NEXT: vpextrw $5, %xmm0, %eax +; CHECK-AVX512F-NEXT: movswl %ax, %edi ; CHECK-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-AVX512F-NEXT: callq ldexpf@PLT ; CHECK-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0 @@ -572,7 +589,8 @@ define <8 x half> @fmul_pow2_ldexp_8xhalf(<8 x i16> %i) { ; CHECK-AVX512F-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 ; CHECK-AVX512F-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-AVX512F-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload -; CHECK-AVX512F-NEXT: vpextrw $4, %xmm0, %edi +; CHECK-AVX512F-NEXT: vpextrw $4, %xmm0, %eax +; CHECK-AVX512F-NEXT: movswl %ax, %edi ; CHECK-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-AVX512F-NEXT: callq ldexpf@PLT ; CHECK-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0 @@ -584,7 +602,8 @@ define <8 x half> @fmul_pow2_ldexp_8xhalf(<8 x i16> %i) { ; CHECK-AVX512F-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] ; CHECK-AVX512F-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-AVX512F-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload -; CHECK-AVX512F-NEXT: vpextrw $3, %xmm0, %edi +; CHECK-AVX512F-NEXT: vpextrw $3, %xmm0, %eax +; CHECK-AVX512F-NEXT: movswl %ax, %edi ; CHECK-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-AVX512F-NEXT: callq ldexpf@PLT ; CHECK-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0 @@ -592,7 +611,8 @@ define <8 x half> @fmul_pow2_ldexp_8xhalf(<8 x i16> %i) { ; CHECK-AVX512F-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 ; CHECK-AVX512F-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-AVX512F-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload -; CHECK-AVX512F-NEXT: vpextrw $2, %xmm0, %edi +; CHECK-AVX512F-NEXT: vpextrw $2, %xmm0, %eax +; CHECK-AVX512F-NEXT: movswl %ax, %edi ; CHECK-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-AVX512F-NEXT: callq ldexpf@PLT ; CHECK-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0 @@ -602,7 +622,8 @@ define <8 x half> @fmul_pow2_ldexp_8xhalf(<8 x i16> %i) { ; CHECK-AVX512F-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] ; CHECK-AVX512F-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-AVX512F-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload -; CHECK-AVX512F-NEXT: vpextrw $1, %xmm0, %edi +; CHECK-AVX512F-NEXT: vpextrw $1, %xmm0, %eax +; CHECK-AVX512F-NEXT: movswl %ax, %edi ; CHECK-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-AVX512F-NEXT: callq ldexpf@PLT ; CHECK-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0 @@ -611,7 +632,7 @@ define <8 x half> @fmul_pow2_ldexp_8xhalf(<8 x i16> %i) { ; CHECK-AVX512F-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-AVX512F-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload ; CHECK-AVX512F-NEXT: vmovd %xmm0, %eax -; CHECK-AVX512F-NEXT: movzwl %ax, %edi +; CHECK-AVX512F-NEXT: movswl %ax, %edi ; CHECK-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-AVX512F-NEXT: callq ldexpf@PLT ; CHECK-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0 diff --git a/llvm/test/CodeGen/X86/isel-fp-to-int.ll b/llvm/test/CodeGen/X86/isel-fp-to-int.ll new file mode 100644 index 0000000000000..fae3db6ad0afa --- /dev/null +++ b/llvm/test/CodeGen/X86/isel-fp-to-int.ll @@ -0,0 +1,391 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs | FileCheck %s --check-prefixes X64,SDAG-X64 +; RUN: llc < %s -global-isel -global-isel-abort=1 -mtriple=x86_64-- -verify-machineinstrs | FileCheck %s --check-prefixes X64,GISEL-X64 +; RUN: llc < %s -mattr=+avx512f -mtriple=x86_64-- -verify-machineinstrs | FileCheck %s --check-prefixes AVX512,SDAG-AVX512 +; RUN: llc < %s -global-isel -global-isel-abort=1 -mattr=+avx512f -mtriple=x86_64-- -verify-machineinstrs | FileCheck %s --check-prefixes AVX512,GISEL-AVX512 + +define i64 @test_double_to_ui64(double %x) { +; SDAG-X64-LABEL: test_double_to_ui64: +; SDAG-X64: # %bb.0: # %entry +; SDAG-X64-NEXT: cvttsd2si %xmm0, %rcx +; SDAG-X64-NEXT: movq %rcx, %rdx +; SDAG-X64-NEXT: sarq $63, %rdx +; SDAG-X64-NEXT: subsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SDAG-X64-NEXT: cvttsd2si %xmm0, %rax +; SDAG-X64-NEXT: andq %rdx, %rax +; SDAG-X64-NEXT: orq %rcx, %rax +; SDAG-X64-NEXT: retq +; +; GISEL-X64-LABEL: test_double_to_ui64: +; GISEL-X64: # %bb.0: # %entry +; GISEL-X64-NEXT: cvttsd2si %xmm0, %rcx +; GISEL-X64-NEXT: movsd {{.*#+}} xmm1 = [9.2233720368547758E+18,0.0E+0] +; GISEL-X64-NEXT: movapd %xmm0, %xmm2 +; GISEL-X64-NEXT: subsd %xmm1, %xmm2 +; GISEL-X64-NEXT: cvttsd2si %xmm2, %rdx +; GISEL-X64-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 +; GISEL-X64-NEXT: xorq %rdx, %rax +; GISEL-X64-NEXT: xorl %edx, %edx +; GISEL-X64-NEXT: ucomisd %xmm1, %xmm0 +; GISEL-X64-NEXT: setb %dl +; GISEL-X64-NEXT: andl $1, %edx +; GISEL-X64-NEXT: cmovneq %rcx, %rax +; GISEL-X64-NEXT: retq +; +; AVX512-LABEL: test_double_to_ui64: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vcvttsd2usi %xmm0, %rax +; AVX512-NEXT: retq +entry: + %conv = fptoui double %x to i64 + ret i64 %conv +} + +define i32 @test_double_to_ui32(double %x) { +; X64-LABEL: test_double_to_ui32: +; X64: # %bb.0: # %entry +; X64-NEXT: cvttsd2si %xmm0, %rax +; X64-NEXT: # kill: def $eax killed $eax killed $rax +; X64-NEXT: retq +; +; AVX512-LABEL: test_double_to_ui32: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vcvttsd2usi %xmm0, %eax +; AVX512-NEXT: retq +entry: + %conv = fptoui double %x to i32 + ret i32 %conv +} + +define zeroext i16 @test_double_to_ui16(double %x) { +; X64-LABEL: test_double_to_ui16: +; X64: # %bb.0: # %entry +; X64-NEXT: cvttsd2si %xmm0, %eax +; X64-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NEXT: retq +; +; SDAG-AVX512-LABEL: test_double_to_ui16: +; SDAG-AVX512: # %bb.0: # %entry +; SDAG-AVX512-NEXT: vcvttsd2si %xmm0, %eax +; SDAG-AVX512-NEXT: # kill: def $ax killed $ax killed $eax +; SDAG-AVX512-NEXT: retq +; +; GISEL-AVX512-LABEL: test_double_to_ui16: +; GISEL-AVX512: # %bb.0: # %entry +; GISEL-AVX512-NEXT: vcvttsd2usi %xmm0, %eax +; GISEL-AVX512-NEXT: # kill: def $ax killed $ax killed $eax +; GISEL-AVX512-NEXT: retq +entry: + %conv = fptoui double %x to i16 + ret i16 %conv +} + +define zeroext i8 @test_double_to_ui8(double %x) { +; X64-LABEL: test_double_to_ui8: +; X64: # %bb.0: # %entry +; X64-NEXT: cvttsd2si %xmm0, %eax +; X64-NEXT: # kill: def $al killed $al killed $eax +; X64-NEXT: retq +; +; SDAG-AVX512-LABEL: test_double_to_ui8: +; SDAG-AVX512: # %bb.0: # %entry +; SDAG-AVX512-NEXT: vcvttsd2si %xmm0, %eax +; SDAG-AVX512-NEXT: # kill: def $al killed $al killed $eax +; SDAG-AVX512-NEXT: retq +; +; GISEL-AVX512-LABEL: test_double_to_ui8: +; GISEL-AVX512: # %bb.0: # %entry +; GISEL-AVX512-NEXT: vcvttsd2usi %xmm0, %eax +; GISEL-AVX512-NEXT: # kill: def $al killed $al killed $eax +; GISEL-AVX512-NEXT: retq +entry: + %conv = fptoui double %x to i8 + ret i8 %conv +} + +define i64 @test_float_to_ui64(float %x) { +; SDAG-X64-LABEL: test_float_to_ui64: +; SDAG-X64: # %bb.0: # %entry +; SDAG-X64-NEXT: cvttss2si %xmm0, %rcx +; SDAG-X64-NEXT: movq %rcx, %rdx +; SDAG-X64-NEXT: sarq $63, %rdx +; SDAG-X64-NEXT: subss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SDAG-X64-NEXT: cvttss2si %xmm0, %rax +; SDAG-X64-NEXT: andq %rdx, %rax +; SDAG-X64-NEXT: orq %rcx, %rax +; SDAG-X64-NEXT: retq +; +; GISEL-X64-LABEL: test_float_to_ui64: +; GISEL-X64: # %bb.0: # %entry +; GISEL-X64-NEXT: cvttss2si %xmm0, %rcx +; GISEL-X64-NEXT: movss {{.*#+}} xmm1 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0] +; GISEL-X64-NEXT: movaps %xmm0, %xmm2 +; GISEL-X64-NEXT: subss %xmm1, %xmm2 +; GISEL-X64-NEXT: cvttss2si %xmm2, %rdx +; GISEL-X64-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 +; GISEL-X64-NEXT: xorq %rdx, %rax +; GISEL-X64-NEXT: xorl %edx, %edx +; GISEL-X64-NEXT: ucomiss %xmm1, %xmm0 +; GISEL-X64-NEXT: setb %dl +; GISEL-X64-NEXT: andl $1, %edx +; GISEL-X64-NEXT: cmovneq %rcx, %rax +; GISEL-X64-NEXT: retq +; +; AVX512-LABEL: test_float_to_ui64: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vcvttss2usi %xmm0, %rax +; AVX512-NEXT: retq +entry: + %conv = fptoui float %x to i64 + ret i64 %conv +} + +define i32 @test_float_to_ui32(float %x) { +; X64-LABEL: test_float_to_ui32: +; X64: # %bb.0: # %entry +; X64-NEXT: cvttss2si %xmm0, %rax +; X64-NEXT: # kill: def $eax killed $eax killed $rax +; X64-NEXT: retq +; +; AVX512-LABEL: test_float_to_ui32: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vcvttss2usi %xmm0, %eax +; AVX512-NEXT: retq +entry: + %conv = fptoui float %x to i32 + ret i32 %conv +} + +define zeroext i16 @test_float_to_ui16(float %x) { +; X64-LABEL: test_float_to_ui16: +; X64: # %bb.0: # %entry +; X64-NEXT: cvttss2si %xmm0, %eax +; X64-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NEXT: retq +; +; SDAG-AVX512-LABEL: test_float_to_ui16: +; SDAG-AVX512: # %bb.0: # %entry +; SDAG-AVX512-NEXT: vcvttss2si %xmm0, %eax +; SDAG-AVX512-NEXT: # kill: def $ax killed $ax killed $eax +; SDAG-AVX512-NEXT: retq +; +; GISEL-AVX512-LABEL: test_float_to_ui16: +; GISEL-AVX512: # %bb.0: # %entry +; GISEL-AVX512-NEXT: vcvttss2usi %xmm0, %eax +; GISEL-AVX512-NEXT: # kill: def $ax killed $ax killed $eax +; GISEL-AVX512-NEXT: retq +entry: + %conv = fptoui float %x to i16 + ret i16 %conv +} + +define zeroext i8 @test_float_to_ui8(float %x) { +; X64-LABEL: test_float_to_ui8: +; X64: # %bb.0: # %entry +; X64-NEXT: cvttss2si %xmm0, %eax +; X64-NEXT: # kill: def $al killed $al killed $eax +; X64-NEXT: retq +; +; SDAG-AVX512-LABEL: test_float_to_ui8: +; SDAG-AVX512: # %bb.0: # %entry +; SDAG-AVX512-NEXT: vcvttss2si %xmm0, %eax +; SDAG-AVX512-NEXT: # kill: def $al killed $al killed $eax +; SDAG-AVX512-NEXT: retq +; +; GISEL-AVX512-LABEL: test_float_to_ui8: +; GISEL-AVX512: # %bb.0: # %entry +; GISEL-AVX512-NEXT: vcvttss2usi %xmm0, %eax +; GISEL-AVX512-NEXT: # kill: def $al killed $al killed $eax +; GISEL-AVX512-NEXT: retq +entry: + %conv = fptoui float %x to i8 + ret i8 %conv +} + +define i64 @test_double_to_si64(double %x) { +; X64-LABEL: test_double_to_si64: +; X64: # %bb.0: # %entry +; X64-NEXT: cvttsd2si %xmm0, %rax +; X64-NEXT: retq +; +; AVX512-LABEL: test_double_to_si64: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vcvttsd2si %xmm0, %rax +; AVX512-NEXT: retq +entry: + %conv = fptosi double %x to i64 + ret i64 %conv +} + +define i32 @test_double_to_si32(double %x) { +; X64-LABEL: test_double_to_si32: +; X64: # %bb.0: # %entry +; X64-NEXT: cvttsd2si %xmm0, %eax +; X64-NEXT: retq +; +; AVX512-LABEL: test_double_to_si32: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vcvttsd2si %xmm0, %eax +; AVX512-NEXT: retq +entry: + %conv = fptosi double %x to i32 + ret i32 %conv +} + +define signext i16 @test_double_to_si16(double %x) { +; X64-LABEL: test_double_to_si16: +; X64: # %bb.0: # %entry +; X64-NEXT: cvttsd2si %xmm0, %eax +; X64-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NEXT: retq +; +; AVX512-LABEL: test_double_to_si16: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vcvttsd2si %xmm0, %eax +; AVX512-NEXT: # kill: def $ax killed $ax killed $eax +; AVX512-NEXT: retq +entry: + %conv = fptosi double %x to i16 + ret i16 %conv +} + +define signext i8 @test_double_to_si8(double %x) { +; X64-LABEL: test_double_to_si8: +; X64: # %bb.0: # %entry +; X64-NEXT: cvttsd2si %xmm0, %eax +; X64-NEXT: # kill: def $al killed $al killed $eax +; X64-NEXT: retq +; +; AVX512-LABEL: test_double_to_si8: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vcvttsd2si %xmm0, %eax +; AVX512-NEXT: # kill: def $al killed $al killed $eax +; AVX512-NEXT: retq +entry: + %conv = fptosi double %x to i8 + ret i8 %conv +} + +define i31 @test_double_to_si31(double %x) { +; X64-LABEL: test_double_to_si31: +; X64: # %bb.0: # %entry +; X64-NEXT: cvttsd2si %xmm0, %eax +; X64-NEXT: retq +; +; AVX512-LABEL: test_double_to_si31: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vcvttsd2si %xmm0, %eax +; AVX512-NEXT: retq +entry: + %conv = fptosi double %x to i31 + ret i31 %conv +} + +define i33 @test_double_to_si33(double %x) { +; X64-LABEL: test_double_to_si33: +; X64: # %bb.0: # %entry +; X64-NEXT: cvttsd2si %xmm0, %rax +; X64-NEXT: retq +; +; AVX512-LABEL: test_double_to_si33: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vcvttsd2si %xmm0, %rax +; AVX512-NEXT: retq +entry: + %conv = fptosi double %x to i33 + ret i33 %conv +} + +define i64 @test_float_to_si64(float %x) { +; X64-LABEL: test_float_to_si64: +; X64: # %bb.0: # %entry +; X64-NEXT: cvttss2si %xmm0, %rax +; X64-NEXT: retq +; +; AVX512-LABEL: test_float_to_si64: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vcvttss2si %xmm0, %rax +; AVX512-NEXT: retq +entry: + %conv = fptosi float %x to i64 + ret i64 %conv +} + +define i32 @test_float_to_si32(float %x) { +; X64-LABEL: test_float_to_si32: +; X64: # %bb.0: # %entry +; X64-NEXT: cvttss2si %xmm0, %eax +; X64-NEXT: retq +; +; AVX512-LABEL: test_float_to_si32: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vcvttss2si %xmm0, %eax +; AVX512-NEXT: retq +entry: + %conv = fptosi float %x to i32 + ret i32 %conv +} + +define signext i16 @test_float_to_si16(float %x) { +; X64-LABEL: test_float_to_si16: +; X64: # %bb.0: # %entry +; X64-NEXT: cvttss2si %xmm0, %eax +; X64-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NEXT: retq +; +; AVX512-LABEL: test_float_to_si16: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vcvttss2si %xmm0, %eax +; AVX512-NEXT: # kill: def $ax killed $ax killed $eax +; AVX512-NEXT: retq +entry: + %conv = fptosi float %x to i16 + ret i16 %conv +} + +define signext i8 @test_float_to_si8(float %x) { +; X64-LABEL: test_float_to_si8: +; X64: # %bb.0: # %entry +; X64-NEXT: cvttss2si %xmm0, %eax +; X64-NEXT: # kill: def $al killed $al killed $eax +; X64-NEXT: retq +; +; AVX512-LABEL: test_float_to_si8: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vcvttss2si %xmm0, %eax +; AVX512-NEXT: # kill: def $al killed $al killed $eax +; AVX512-NEXT: retq +entry: + %conv = fptosi float %x to i8 + ret i8 %conv +} + +define i31 @test_float_to_si31(float %x) { +; X64-LABEL: test_float_to_si31: +; X64: # %bb.0: # %entry +; X64-NEXT: cvttss2si %xmm0, %eax +; X64-NEXT: retq +; +; AVX512-LABEL: test_float_to_si31: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vcvttss2si %xmm0, %eax +; AVX512-NEXT: retq +entry: + %conv = fptosi float %x to i31 + ret i31 %conv +} + +define i33 @test_float_to_si33(float %x) { +; X64-LABEL: test_float_to_si33: +; X64: # %bb.0: # %entry +; X64-NEXT: cvttss2si %xmm0, %rax +; X64-NEXT: retq +; +; AVX512-LABEL: test_float_to_si33: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vcvttss2si %xmm0, %rax +; AVX512-NEXT: retq +entry: + %conv = fptosi float %x to i33 + ret i33 %conv +} diff --git a/llvm/test/CodeGen/X86/isel-int-to-fp.ll b/llvm/test/CodeGen/X86/isel-int-to-fp.ll new file mode 100644 index 0000000000000..fc99ff95788f3 --- /dev/null +++ b/llvm/test/CodeGen/X86/isel-int-to-fp.ll @@ -0,0 +1,395 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs | FileCheck %s --check-prefixes X64,SDAG-X64 +; RUN: llc < %s -global-isel -global-isel-abort=1 -mtriple=x86_64-- -verify-machineinstrs | FileCheck %s --check-prefixes X64,GISEL-X64 +; RUN: llc < %s -mattr=+avx512f -mtriple=x86_64-- -verify-machineinstrs | FileCheck %s --check-prefixes AVX512,SDAG-AVX512 +; RUN: llc < %s -global-isel -global-isel-abort=1 -mattr=+avx512f -mtriple=x86_64-- -verify-machineinstrs | FileCheck %s --check-prefixes AVX512,GISEL-AVX512 + +define double @test_ui64_to_double(i64 %x) { +; SDAG-X64-LABEL: test_ui64_to_double: +; SDAG-X64: # %bb.0: # %entry +; SDAG-X64-NEXT: movq %rdi, %xmm1 +; SDAG-X64-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] +; SDAG-X64-NEXT: subpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; SDAG-X64-NEXT: movapd %xmm1, %xmm0 +; SDAG-X64-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] +; SDAG-X64-NEXT: addsd %xmm1, %xmm0 +; SDAG-X64-NEXT: retq +; +; GISEL-X64-LABEL: test_ui64_to_double: +; GISEL-X64: # %bb.0: # %entry +; GISEL-X64-NEXT: movabsq $4841369599423283200, %rax # imm = 0x4330000000000000 +; GISEL-X64-NEXT: movabsq $4985484787499139072, %rcx # imm = 0x4530000000000000 +; GISEL-X64-NEXT: movsd {{.*#+}} xmm0 = [1.9342813118337666E+25,0.0E+0] +; GISEL-X64-NEXT: movl $4294967295, %edx # imm = 0xFFFFFFFF +; GISEL-X64-NEXT: andq %rdi, %rdx +; GISEL-X64-NEXT: orq %rax, %rdx +; GISEL-X64-NEXT: shrq $32, %rdi +; GISEL-X64-NEXT: orq %rdi, %rcx +; GISEL-X64-NEXT: movq %rcx, %xmm1 +; GISEL-X64-NEXT: subsd %xmm0, %xmm1 +; GISEL-X64-NEXT: movq %rdx, %xmm0 +; GISEL-X64-NEXT: addsd %xmm1, %xmm0 +; GISEL-X64-NEXT: retq +; +; AVX512-LABEL: test_ui64_to_double: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vcvtusi2sd %rdi, %xmm0, %xmm0 +; AVX512-NEXT: retq +entry: + %conv = uitofp i64 %x to double + ret double %conv +} + +define double @test_ui32_to_double(i32 %x) { +; X64-LABEL: test_ui32_to_double: +; X64: # %bb.0: # %entry +; X64-NEXT: movl %edi, %eax +; X64-NEXT: cvtsi2sd %rax, %xmm0 +; X64-NEXT: retq +; +; AVX512-LABEL: test_ui32_to_double: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vcvtusi2sd %edi, %xmm0, %xmm0 +; AVX512-NEXT: retq +entry: + %conv = uitofp i32 %x to double + ret double %conv +} + +define double @test_ui16_to_double(i16 zeroext %x) { +; X64-LABEL: test_ui16_to_double: +; X64: # %bb.0: # %entry +; X64-NEXT: cvtsi2sd %edi, %xmm0 +; X64-NEXT: retq +; +; SDAG-AVX512-LABEL: test_ui16_to_double: +; SDAG-AVX512: # %bb.0: # %entry +; SDAG-AVX512-NEXT: vcvtsi2sd %edi, %xmm0, %xmm0 +; SDAG-AVX512-NEXT: retq +; +; GISEL-AVX512-LABEL: test_ui16_to_double: +; GISEL-AVX512: # %bb.0: # %entry +; GISEL-AVX512-NEXT: vcvtusi2sd %edi, %xmm0, %xmm0 +; GISEL-AVX512-NEXT: retq +entry: + %conv = uitofp i16 %x to double + ret double %conv +} + +define double @test_ui8_to_double(i8 zeroext %x) { +; X64-LABEL: test_ui8_to_double: +; X64: # %bb.0: # %entry +; X64-NEXT: cvtsi2sd %edi, %xmm0 +; X64-NEXT: retq +; +; SDAG-AVX512-LABEL: test_ui8_to_double: +; SDAG-AVX512: # %bb.0: # %entry +; SDAG-AVX512-NEXT: vcvtsi2sd %edi, %xmm0, %xmm0 +; SDAG-AVX512-NEXT: retq +; +; GISEL-AVX512-LABEL: test_ui8_to_double: +; GISEL-AVX512: # %bb.0: # %entry +; GISEL-AVX512-NEXT: vcvtusi2sd %edi, %xmm0, %xmm0 +; GISEL-AVX512-NEXT: retq +entry: + %conv = uitofp i8 %x to double + ret double %conv +} + +define float @test_ui64_to_float(i64 %x) { +; SDAG-X64-LABEL: test_ui64_to_float: +; SDAG-X64: # %bb.0: # %entry +; SDAG-X64-NEXT: testq %rdi, %rdi +; SDAG-X64-NEXT: js .LBB4_1 +; SDAG-X64-NEXT: # %bb.2: # %entry +; SDAG-X64-NEXT: cvtsi2ss %rdi, %xmm0 +; SDAG-X64-NEXT: retq +; SDAG-X64-NEXT: .LBB4_1: +; SDAG-X64-NEXT: movq %rdi, %rax +; SDAG-X64-NEXT: shrq %rax +; SDAG-X64-NEXT: andl $1, %edi +; SDAG-X64-NEXT: orq %rax, %rdi +; SDAG-X64-NEXT: cvtsi2ss %rdi, %xmm0 +; SDAG-X64-NEXT: addss %xmm0, %xmm0 +; SDAG-X64-NEXT: retq +; +; GISEL-X64-LABEL: test_ui64_to_float: +; GISEL-X64: # %bb.0: # %entry +; GISEL-X64-NEXT: cvtsi2ss %rdi, %xmm0 +; GISEL-X64-NEXT: movq %rdi, %rax +; GISEL-X64-NEXT: shrq %rax +; GISEL-X64-NEXT: movq %rdi, %rcx +; GISEL-X64-NEXT: andq $1, %rcx +; GISEL-X64-NEXT: orq %rax, %rcx +; GISEL-X64-NEXT: cvtsi2ss %rcx, %xmm1 +; GISEL-X64-NEXT: addss %xmm1, %xmm1 +; GISEL-X64-NEXT: xorl %eax, %eax +; GISEL-X64-NEXT: cmpq $0, %rdi +; GISEL-X64-NEXT: setl %al +; GISEL-X64-NEXT: andl $1, %eax +; GISEL-X64-NEXT: movd %xmm1, %eax +; GISEL-X64-NEXT: movd %xmm0, %ecx +; GISEL-X64-NEXT: cmovnel %eax, %ecx +; GISEL-X64-NEXT: movd %ecx, %xmm0 +; GISEL-X64-NEXT: retq +; +; AVX512-LABEL: test_ui64_to_float: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vcvtusi2ss %rdi, %xmm0, %xmm0 +; AVX512-NEXT: retq +entry: + %conv = uitofp i64 %x to float + ret float %conv +} + +define float @test_ui32_to_float(i32 %x) { +; X64-LABEL: test_ui32_to_float: +; X64: # %bb.0: # %entry +; X64-NEXT: movl %edi, %eax +; X64-NEXT: cvtsi2ss %rax, %xmm0 +; X64-NEXT: retq +; +; AVX512-LABEL: test_ui32_to_float: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vcvtusi2ss %edi, %xmm0, %xmm0 +; AVX512-NEXT: retq +entry: + %conv = uitofp i32 %x to float + ret float %conv +} + +define float @test_ui16_to_float(i16 zeroext %x) { +; X64-LABEL: test_ui16_to_float: +; X64: # %bb.0: # %entry +; X64-NEXT: cvtsi2ss %edi, %xmm0 +; X64-NEXT: retq +; +; SDAG-AVX512-LABEL: test_ui16_to_float: +; SDAG-AVX512: # %bb.0: # %entry +; SDAG-AVX512-NEXT: vcvtsi2ss %edi, %xmm0, %xmm0 +; SDAG-AVX512-NEXT: retq +; +; GISEL-AVX512-LABEL: test_ui16_to_float: +; GISEL-AVX512: # %bb.0: # %entry +; GISEL-AVX512-NEXT: vcvtusi2ss %edi, %xmm0, %xmm0 +; GISEL-AVX512-NEXT: retq +entry: + %conv = uitofp i16 %x to float + ret float %conv +} + +define float @test_ui8_to_float(i8 zeroext %x) { +; X64-LABEL: test_ui8_to_float: +; X64: # %bb.0: # %entry +; X64-NEXT: cvtsi2ss %edi, %xmm0 +; X64-NEXT: retq +; +; SDAG-AVX512-LABEL: test_ui8_to_float: +; SDAG-AVX512: # %bb.0: # %entry +; SDAG-AVX512-NEXT: vcvtsi2ss %edi, %xmm0, %xmm0 +; SDAG-AVX512-NEXT: retq +; +; GISEL-AVX512-LABEL: test_ui8_to_float: +; GISEL-AVX512: # %bb.0: # %entry +; GISEL-AVX512-NEXT: vcvtusi2ss %edi, %xmm0, %xmm0 +; GISEL-AVX512-NEXT: retq +entry: + %conv = uitofp i8 %x to float + ret float %conv +} + +define double @test_si64_to_double(i64 %x) { +; X64-LABEL: test_si64_to_double: +; X64: # %bb.0: # %entry +; X64-NEXT: cvtsi2sd %rdi, %xmm0 +; X64-NEXT: retq +; +; AVX512-LABEL: test_si64_to_double: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vcvtsi2sd %rdi, %xmm0, %xmm0 +; AVX512-NEXT: retq +entry: + %conv = sitofp i64 %x to double + ret double %conv +} + +define double @test_si32_to_double(i32 %x) { +; X64-LABEL: test_si32_to_double: +; X64: # %bb.0: # %entry +; X64-NEXT: cvtsi2sd %edi, %xmm0 +; X64-NEXT: retq +; +; AVX512-LABEL: test_si32_to_double: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vcvtsi2sd %edi, %xmm0, %xmm0 +; AVX512-NEXT: retq +entry: + %conv = sitofp i32 %x to double + ret double %conv +} + +define double @test_si16_to_double(i16 signext %x) { +; X64-LABEL: test_si16_to_double: +; X64: # %bb.0: # %entry +; X64-NEXT: cvtsi2sd %edi, %xmm0 +; X64-NEXT: retq +; +; AVX512-LABEL: test_si16_to_double: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vcvtsi2sd %edi, %xmm0, %xmm0 +; AVX512-NEXT: retq +entry: + %conv = sitofp i16 %x to double + ret double %conv +} + +define double @test_si8_to_double(i8 signext %x) { +; X64-LABEL: test_si8_to_double: +; X64: # %bb.0: # %entry +; X64-NEXT: cvtsi2sd %edi, %xmm0 +; X64-NEXT: retq +; +; AVX512-LABEL: test_si8_to_double: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vcvtsi2sd %edi, %xmm0, %xmm0 +; AVX512-NEXT: retq +entry: + %conv = sitofp i8 %x to double + ret double %conv +} + +define double @test_si31_to_double(i31 %x) { +; X64-LABEL: test_si31_to_double: +; X64: # %bb.0: # %entry +; X64-NEXT: addl %edi, %edi +; X64-NEXT: sarl %edi +; X64-NEXT: cvtsi2sd %edi, %xmm0 +; X64-NEXT: retq +; +; AVX512-LABEL: test_si31_to_double: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: addl %edi, %edi +; AVX512-NEXT: sarl %edi +; AVX512-NEXT: vcvtsi2sd %edi, %xmm0, %xmm0 +; AVX512-NEXT: retq +entry: + %conv = sitofp i31 %x to double + ret double %conv +} + +define double @test_si33_to_double(i33 %x) { +; X64-LABEL: test_si33_to_double: +; X64: # %bb.0: # %entry +; X64-NEXT: shlq $31, %rdi +; X64-NEXT: sarq $31, %rdi +; X64-NEXT: cvtsi2sd %rdi, %xmm0 +; X64-NEXT: retq +; +; AVX512-LABEL: test_si33_to_double: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: shlq $31, %rdi +; AVX512-NEXT: sarq $31, %rdi +; AVX512-NEXT: vcvtsi2sd %rdi, %xmm0, %xmm0 +; AVX512-NEXT: retq +entry: + %conv = sitofp i33 %x to double + ret double %conv +} + +define float @test_si64_to_float(i64 %x) { +; X64-LABEL: test_si64_to_float: +; X64: # %bb.0: # %entry +; X64-NEXT: cvtsi2ss %rdi, %xmm0 +; X64-NEXT: retq +; +; AVX512-LABEL: test_si64_to_float: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vcvtsi2ss %rdi, %xmm0, %xmm0 +; AVX512-NEXT: retq +entry: + %conv = sitofp i64 %x to float + ret float %conv +} + +define float @test_si32_to_float(i32 %x) { +; X64-LABEL: test_si32_to_float: +; X64: # %bb.0: # %entry +; X64-NEXT: cvtsi2ss %edi, %xmm0 +; X64-NEXT: retq +; +; AVX512-LABEL: test_si32_to_float: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vcvtsi2ss %edi, %xmm0, %xmm0 +; AVX512-NEXT: retq +entry: + %conv = sitofp i32 %x to float + ret float %conv +} + +define float @test_si16_to_float(i16 signext %x) { +; X64-LABEL: test_si16_to_float: +; X64: # %bb.0: # %entry +; X64-NEXT: cvtsi2ss %edi, %xmm0 +; X64-NEXT: retq +; +; AVX512-LABEL: test_si16_to_float: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vcvtsi2ss %edi, %xmm0, %xmm0 +; AVX512-NEXT: retq +entry: + %conv = sitofp i16 %x to float + ret float %conv +} + +define float @test_si8_to_float(i8 signext %x) { +; X64-LABEL: test_si8_to_float: +; X64: # %bb.0: # %entry +; X64-NEXT: cvtsi2ss %edi, %xmm0 +; X64-NEXT: retq +; +; AVX512-LABEL: test_si8_to_float: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vcvtsi2ss %edi, %xmm0, %xmm0 +; AVX512-NEXT: retq +entry: + %conv = sitofp i8 %x to float + ret float %conv +} + +define float @test_si31_to_float(i31 %x) { +; X64-LABEL: test_si31_to_float: +; X64: # %bb.0: # %entry +; X64-NEXT: addl %edi, %edi +; X64-NEXT: sarl %edi +; X64-NEXT: cvtsi2ss %edi, %xmm0 +; X64-NEXT: retq +; +; AVX512-LABEL: test_si31_to_float: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: addl %edi, %edi +; AVX512-NEXT: sarl %edi +; AVX512-NEXT: vcvtsi2ss %edi, %xmm0, %xmm0 +; AVX512-NEXT: retq +entry: + %conv = sitofp i31 %x to float + ret float %conv +} + +define float @test_si33_to_float(i33 %x) { +; X64-LABEL: test_si33_to_float: +; X64: # %bb.0: # %entry +; X64-NEXT: shlq $31, %rdi +; X64-NEXT: sarq $31, %rdi +; X64-NEXT: cvtsi2ss %rdi, %xmm0 +; X64-NEXT: retq +; +; AVX512-LABEL: test_si33_to_float: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: shlq $31, %rdi +; AVX512-NEXT: sarq $31, %rdi +; AVX512-NEXT: vcvtsi2ss %rdi, %xmm0, %xmm0 +; AVX512-NEXT: retq +entry: + %conv = sitofp i33 %x to float + ret float %conv +} diff --git a/llvm/test/CodeGen/X86/pmulh.ll b/llvm/test/CodeGen/X86/pmulh.ll index c2a009f06b89d..502249a87c489 100644 --- a/llvm/test/CodeGen/X86/pmulh.ll +++ b/llvm/test/CodeGen/X86/pmulh.ll @@ -937,6 +937,56 @@ define <16 x i32> @zext_mulhuw_v16i16_lshr(<16 x i16> %a, <16 x i16> %b) { ret <16 x i32> %d } +; PR109790 +define <16 x i16> @zext_mulhuw_v16i16_negative_constant(<16 x i16> %a) { +; SSE-LABEL: zext_mulhuw_v16i16_negative_constant: +; SSE: # %bb.0: +; SSE-NEXT: movdqa {{.*#+}} xmm2 = [32767,32767,32767,32767,32767,32767,32767,32767] +; SSE-NEXT: pand %xmm2, %xmm1 +; SSE-NEXT: pand %xmm2, %xmm0 +; SSE-NEXT: movdqa {{.*#+}} xmm2 = [64536,64536,64536,64536,64536,64536,64536,64536] +; SSE-NEXT: pmulhw %xmm2, %xmm0 +; SSE-NEXT: pmulhw %xmm2, %xmm1 +; SSE-NEXT: retq +; +; AVX-LABEL: zext_mulhuw_v16i16_negative_constant: +; AVX: # %bb.0: +; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [64536,64536,64536,64536,64536,64536,64536,64536,64536,64536,64536,64536,64536,64536,64536,64536] +; AVX-NEXT: retq + %k = and <16 x i16> %a, + %x = zext nneg <16 x i16> %k to <16 x i32> + %m = mul nsw <16 x i32> %x, + %s = lshr <16 x i32> %m, + %t = trunc nuw <16 x i32> %s to <16 x i16> + ret <16 x i16> %t +} + +; PR109790 +define <16 x i16> @zext_mulhuw_v16i16_positive_constant(<16 x i16> %a) { +; SSE-LABEL: zext_mulhuw_v16i16_positive_constant: +; SSE: # %bb.0: +; SSE-NEXT: movdqa {{.*#+}} xmm2 = [32767,32767,32767,32767,32767,32767,32767,32767] +; SSE-NEXT: pand %xmm2, %xmm1 +; SSE-NEXT: pand %xmm2, %xmm0 +; SSE-NEXT: movdqa {{.*#+}} xmm2 = [1000,1000,1000,1000,1000,1000,1000,1000] +; SSE-NEXT: pmulhw %xmm2, %xmm0 +; SSE-NEXT: pmulhw %xmm2, %xmm1 +; SSE-NEXT: retq +; +; AVX-LABEL: zext_mulhuw_v16i16_positive_constant: +; AVX: # %bb.0: +; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [1000,1000,1000,1000,1000,1000,1000,1000,1000,1000,1000,1000,1000,1000,1000,1000] +; AVX-NEXT: retq + %k = and <16 x i16> %a, + %x = zext nneg <16 x i16> %k to <16 x i32> + %m = mul nuw nsw <16 x i32> %x, + %s = lshr <16 x i32> %m, + %t = trunc nuw nsw <16 x i32> %s to <16 x i16> + ret <16 x i16> %t +} + define <16 x i32> @mulhsw_v16i16_lshr(<16 x i16> %a, <16 x i16> %b) { ; SSE2-LABEL: mulhsw_v16i16_lshr: ; SSE2: # %bb.0: @@ -2056,3 +2106,4 @@ define <8 x i16> @sse2_pmulhu_w_const(<8 x i16> %a0, <8 x i16> %a1) { ret <8 x i16> %res } declare <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16>, <8 x i16>) + diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512vbmi.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512vbmi.ll index 9b32005927ace..61814b48e6b3a 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512vbmi.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512vbmi.ll @@ -146,3 +146,18 @@ define <64 x i8> @combine_permi2q_pshufb_as_permi2d_mask(<8 x i64> %a0, <8 x i64 %res2 = call <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8> %res1, <64 x i8> , <64 x i8> zeroinitializer, i64 %m) ret <64 x i8> %res2 } + +; PR109272 +define <64 x i8> @combine_vpermi2var_v64i8_with_mask(<64 x i8> %a0, <64 x i8> %a1, <64 x i8> %a2) { +; CHECK-LABEL: combine_vpermi2var_v64i8_with_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vpermt2b %zmm2, %zmm1, %zmm0 +; CHECK-NEXT: vpmovb2m %zmm1, %k0 +; CHECK-NEXT: vpmovm2b %k0, %zmm1 +; CHECK-NEXT: vpandnq %zmm0, %zmm1, %zmm0 +; CHECK-NEXT: ret{{[l|q]}} + %perm = tail call <64 x i8> @llvm.x86.avx512.vpermi2var.qi.512(<64 x i8> %a0, <64 x i8> %a1, <64 x i8> %a2) + %cmp = icmp slt <64 x i8> %a1, zeroinitializer + %sel = select <64 x i1> %cmp, <64 x i8> zeroinitializer, <64 x i8> %perm + ret <64 x i8> %sel +} diff --git a/llvm/test/CodeGen/Xtensa/inline-asm-invalid.ll b/llvm/test/CodeGen/Xtensa/inline-asm-invalid.ll new file mode 100644 index 0000000000000..2a436dd156dd7 --- /dev/null +++ b/llvm/test/CodeGen/Xtensa/inline-asm-invalid.ll @@ -0,0 +1,14 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: not llc --mtriple=xtensa < %s 2>&1 | FileCheck %s + +define void @constraint_f() nounwind { +; CHECK: error: unknown asm constraint 'f' + tail call void asm "addi a1, a1, $0", "f"(i32 1) + ret void +} + +define i32 @register_a100(i32 %a) nounwind { +; CHECK: error: couldn't allocate input reg for constraint '{$a100}' + %1 = tail call i32 asm "addi $0, $1, 1", "=r,{$a100}"(i32 %a) + ret i32 %1 +} diff --git a/llvm/test/CodeGen/Xtensa/inline-asm-mem-constraint.ll b/llvm/test/CodeGen/Xtensa/inline-asm-mem-constraint.ll new file mode 100644 index 0000000000000..4b27ba9337f88 --- /dev/null +++ b/llvm/test/CodeGen/Xtensa/inline-asm-mem-constraint.ll @@ -0,0 +1,46 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=xtensa < %s | FileCheck %s --check-prefix=XTENSA + +define i32 @m_offset_0(ptr %p) nounwind { +; XTENSA-LABEL: m_offset_0: +; XTENSA: #APP +; XTENSA-NEXT: l32i a2, a2, 0 +; XTENSA-NEXT: #NO_APP +; XTENSA-NEXT: ret + %1 = call i32 asm "l32i $0, $1", "=r,*m"(ptr elementtype(i32) %p) + ret i32 %1 +} + +define i32 @m_offset_1020(ptr %p) nounwind { +; XTENSA-LABEL: m_offset_1020: +; XTENSA: #APP +; XTENSA-NEXT: l32i a2, a2, 1020 +; XTENSA-NEXT: #NO_APP +; XTENSA-NEXT: ret + %1 = getelementptr inbounds i8, ptr %p, i32 1020 + %2 = call i32 asm "l32i $0, $1", "=r,*m"(ptr elementtype(i32) %1) + ret i32 %2 +} + +define i8 @m_i8_offset_7(ptr %p) nounwind { +; XTENSA-LABEL: m_i8_offset_7: +; XTENSA: addi a8, a2, 7 +; XTENSA-NEXT: #APP +; XTENSA-NEXT: l8ui a2, a8, 0 +; XTENSA-NEXT: #NO_APP +; XTENSA-NEXT: ret + %1 = getelementptr inbounds i8, ptr %p, i32 7 + %2 = call i8 asm "l8ui $0, $1", "=r,*m"(ptr elementtype(i8) %1) + ret i8 %2 +} + +define i16 @m_i16_offset_10(ptr %p) nounwind { +; XTENSA-LABEL: m_i16_offset_10: +; XTENSA: #APP +; XTENSA-NEXT: l16si a2, a2, 20 +; XTENSA-NEXT: #NO_APP +; XTENSA-NEXT: ret + %1 = getelementptr inbounds i16, ptr %p, i32 10 + %2 = call i16 asm "l16si $0, $1", "=r,*m"(ptr elementtype(i16) %1) + ret i16 %2 +} diff --git a/llvm/test/CodeGen/Xtensa/inline-asm.ll b/llvm/test/CodeGen/Xtensa/inline-asm.ll new file mode 100644 index 0000000000000..748f5f857acfd --- /dev/null +++ b/llvm/test/CodeGen/Xtensa/inline-asm.ll @@ -0,0 +1,40 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -mtriple=xtensa < %s \ +; RUN: | FileCheck -check-prefix=XTENSA %s + +@gi = external global i32 + +define i32 @constraint_r(i32 %a) { +; XTENSA-LABEL: constraint_r: +; XTENSA: l32r a8, .LCPI0_0 +; XTENSA-NEXT: l32i a8, a8, 0 +; XTENSA-NEXT: #APP +; XTENSA-NEXT: add a2, a2, a8 +; XTENSA-NEXT: #NO_APP +; XTENSA-NEXT: ret + %1 = load i32, ptr @gi + %2 = tail call i32 asm "add $0, $1, $2", "=r,r,r"(i32 %a, i32 %1) + ret i32 %2 +} + +define i32 @constraint_i(i32 %a) { +; XTENSA-LABEL: constraint_i: +; XTENSA: #APP +; XTENSA-NEXT: addi a2, a2, 113 +; XTENSA-NEXT: #NO_APP +; XTENSA-NEXT: ret + %1 = load i32, ptr @gi + %2 = tail call i32 asm "addi $0, $1, $2", "=r,r,i"(i32 %a, i32 113) + ret i32 %2 +} + +define i32 @explicit_register_a3(i32 %a) nounwind { +; XTENSA-LABEL: explicit_register_a3: +; XTENSA: or a3, a2, a2 +; XTENSA-NEXT: #APP +; XTENSA-NEXT: addi a2, a3, 1 +; XTENSA-NEXT: #NO_APP +; XTENSA-NEXT: ret + %1 = tail call i32 asm "addi $0, $1, 1", "=r,{a3}"(i32 %a) + ret i32 %1 +} diff --git a/llvm/test/Instrumentation/HWAddressSanitizer/RISCV/alloca.ll b/llvm/test/Instrumentation/HWAddressSanitizer/RISCV/alloca.ll index 24a89af97cffe..edbcdbeb8516c 100644 --- a/llvm/test/Instrumentation/HWAddressSanitizer/RISCV/alloca.ll +++ b/llvm/test/Instrumentation/HWAddressSanitizer/RISCV/alloca.ll @@ -2,8 +2,8 @@ ; Test alloca instrumentation. Command line includes check-globals so that ; changes to debug-info are detectable. ; -; RUN: opt < %s -passes=hwasan -hwasan-mapping-offset-dynamic=ifunc -S | FileCheck %s --check-prefixes=DYNAMIC-SHADOW -; RUN: opt < %s -passes=hwasan -hwasan-mapping-offset=0 -S | FileCheck %s --check-prefixes=ZERO-BASED-SHADOW +; RUN: opt < %s -passes=hwasan -hwasan-mapping-offset-dynamic=ifunc -hwasan-with-frame-record=0 -S | FileCheck %s --check-prefixes=DYNAMIC-SHADOW +; RUN: opt < %s -passes=hwasan -hwasan-mapping-offset=0 -hwasan-with-frame-record=0 -S | FileCheck %s --check-prefixes=ZERO-BASED-SHADOW target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" target triple = "riscv64-unknown-linux" diff --git a/llvm/test/Instrumentation/HWAddressSanitizer/alloca.ll b/llvm/test/Instrumentation/HWAddressSanitizer/alloca.ll index 4d0cce72470b9..451ab9ee184a3 100644 --- a/llvm/test/Instrumentation/HWAddressSanitizer/alloca.ll +++ b/llvm/test/Instrumentation/HWAddressSanitizer/alloca.ll @@ -2,11 +2,11 @@ ; Test alloca instrumentation. Command line includes check-globals so that ; changes to debug-info are detectable. ; -; RUN: opt < %s -passes=hwasan -hwasan-mapping-offset-dynamic=ifunc -S | FileCheck %s --check-prefixes=DYNAMIC-SHADOW -; RUN: opt < %s -passes=hwasan -hwasan-mapping-offset=0 -S | FileCheck %s --check-prefixes=ZERO-BASED-SHADOW +; RUN: opt < %s -passes=hwasan -hwasan-mapping-offset-dynamic=ifunc -hwasan-with-frame-record=0 -S | FileCheck %s --check-prefixes=DYNAMIC-SHADOW +; RUN: opt < %s -passes=hwasan -hwasan-mapping-offset=0 -hwasan-with-frame-record=0 -S | FileCheck %s --check-prefixes=ZERO-BASED-SHADOW -; RUN: opt < %s -passes=hwasan -hwasan-mapping-offset-dynamic=ifunc -S --try-experimental-debuginfo-iterators | FileCheck %s --check-prefixes=DYNAMIC-SHADOW -; RUN: opt < %s -passes=hwasan -hwasan-mapping-offset=0 -S --try-experimental-debuginfo-iterators | FileCheck %s --check-prefixes=ZERO-BASED-SHADOW +; RUN: opt < %s -passes=hwasan -hwasan-mapping-offset-dynamic=ifunc -hwasan-with-frame-record=0 -S --try-experimental-debuginfo-iterators | FileCheck %s --check-prefixes=DYNAMIC-SHADOW +; RUN: opt < %s -passes=hwasan -hwasan-mapping-offset=0 -hwasan-with-frame-record=0 -S --try-experimental-debuginfo-iterators | FileCheck %s --check-prefixes=ZERO-BASED-SHADOW target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" target triple = "aarch64--linux-android10000" diff --git a/llvm/test/Instrumentation/HWAddressSanitizer/mapping-override.ll b/llvm/test/Instrumentation/HWAddressSanitizer/mapping-override.ll new file mode 100644 index 0000000000000..5cd23f3ebe2b0 --- /dev/null +++ b/llvm/test/Instrumentation/HWAddressSanitizer/mapping-override.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 + +; RUN: opt < %s -passes=hwasan -S | FileCheck %s +; RUN: opt < %s -passes=hwasan -hwasan-mapping-offset-dynamic=global -S | FileCheck %s --check-prefixes=GLOBAL +; RUN: opt < %s -passes=hwasan -hwasan-mapping-offset=567 -S | FileCheck %s --check-prefixes=FIXED +; RUN: opt < %s -passes=hwasan -hwasan-mapping-offset=567 -hwasan-mapping-offset-dynamic=global -S | FileCheck %s --check-prefixes=FIXED-GLOBAL +; RUN: opt < %s -passes=hwasan -hwasan-mapping-offset-dynamic=global -hwasan-mapping-offset=567 -S | FileCheck %s --check-prefixes=GLOBAL-FIXED + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64--linux-android" + +define i8 @test_load8(ptr %a) sanitize_hwaddress { +; CHECK-LABEL: define i8 @test_load8 +; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[DOTHWASAN_SHADOW:%.*]] = call ptr asm "", "=r,0"(ptr @__hwasan_shadow) +; CHECK-NEXT: call void @llvm.hwasan.check.memaccess(ptr [[DOTHWASAN_SHADOW]], ptr [[A]], i32 0) +; CHECK-NEXT: [[B:%.*]] = load i8, ptr [[A]], align 4 +; CHECK-NEXT: ret i8 [[B]] +; +; GLOBAL-LABEL: define i8 @test_load8 +; GLOBAL-SAME: (ptr [[A:%.*]]) #[[ATTR0:[0-9]+]] { +; GLOBAL-NEXT: [[TMP1:%.*]] = load ptr, ptr @__hwasan_shadow_memory_dynamic_address, align 8 +; GLOBAL-NEXT: call void @llvm.hwasan.check.memaccess(ptr [[TMP1]], ptr [[A]], i32 0) +; GLOBAL-NEXT: [[B:%.*]] = load i8, ptr [[A]], align 4 +; GLOBAL-NEXT: ret i8 [[B]] +; +; FIXED-LABEL: define i8 @test_load8 +; FIXED-SAME: (ptr [[A:%.*]]) #[[ATTR0:[0-9]+]] { +; FIXED-NEXT: [[DOTHWASAN_SHADOW:%.*]] = call ptr asm "", "=r,0"(ptr inttoptr (i64 567 to ptr)) +; FIXED-NEXT: call void @llvm.hwasan.check.memaccess(ptr [[DOTHWASAN_SHADOW]], ptr [[A]], i32 0) +; FIXED-NEXT: [[B:%.*]] = load i8, ptr [[A]], align 4 +; FIXED-NEXT: ret i8 [[B]] +; +; FIXED-GLOBAL-LABEL: define i8 @test_load8 +; FIXED-GLOBAL-SAME: (ptr [[A:%.*]]) #[[ATTR0:[0-9]+]] { +; FIXED-GLOBAL-NEXT: [[TMP1:%.*]] = load ptr, ptr @__hwasan_shadow_memory_dynamic_address, align 8 +; FIXED-GLOBAL-NEXT: call void @llvm.hwasan.check.memaccess(ptr [[TMP1]], ptr [[A]], i32 0) +; FIXED-GLOBAL-NEXT: [[B:%.*]] = load i8, ptr [[A]], align 4 +; FIXED-GLOBAL-NEXT: ret i8 [[B]] +; +; GLOBAL-FIXED-LABEL: define i8 @test_load8 +; GLOBAL-FIXED-SAME: (ptr [[A:%.*]]) #[[ATTR0:[0-9]+]] { +; GLOBAL-FIXED-NEXT: [[DOTHWASAN_SHADOW:%.*]] = call ptr asm "", "=r,0"(ptr inttoptr (i64 567 to ptr)) +; GLOBAL-FIXED-NEXT: call void @llvm.hwasan.check.memaccess(ptr [[DOTHWASAN_SHADOW]], ptr [[A]], i32 0) +; GLOBAL-FIXED-NEXT: [[B:%.*]] = load i8, ptr [[A]], align 4 +; GLOBAL-FIXED-NEXT: ret i8 [[B]] +; + %b = load i8, ptr %a, align 4 + ret i8 %b +} diff --git a/llvm/test/Instrumentation/HWAddressSanitizer/prologue.ll b/llvm/test/Instrumentation/HWAddressSanitizer/prologue.ll index 005a11b00c7a5..73fc077c95624 100644 --- a/llvm/test/Instrumentation/HWAddressSanitizer/prologue.ll +++ b/llvm/test/Instrumentation/HWAddressSanitizer/prologue.ll @@ -7,9 +7,9 @@ ; RUN: FileCheck %s --check-prefixes=NOIFUNC-TLS-HISTORY ; RUN: opt -passes=hwasan -S -hwasan-mapping-offset-dynamic=tls -hwasan-record-stack-history=none < %s | \ ; RUN: FileCheck %s --check-prefixes=NOIFUNC-TLS-NOHISTORY -; RUN: opt -passes=hwasan -S -hwasan-mapping-offset-dynamic=global < %s | \ +; RUN: opt -passes=hwasan -S -hwasan-mapping-offset-dynamic=global -hwasan-with-frame-record=0 < %s | \ ; RUN: FileCheck %s --check-prefixes=NOIFUNC-NOTLS -; RUN: opt -passes=hwasan -S -hwasan-mapping-offset-dynamic=ifunc < %s | \ +; RUN: opt -passes=hwasan -S -hwasan-mapping-offset-dynamic=ifunc -hwasan-with-frame-record=0 < %s | \ ; RUN: FileCheck %s --check-prefixes=IFUNC-NOTLS ; RUN: opt -passes=hwasan -S -mtriple=aarch64-fuchsia < %s | \ ; RUN: FileCheck %s --check-prefixes=FUCHSIA diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3-fake16.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3-fake16.s new file mode 100644 index 0000000000000..d78673d933b7a --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3-fake16.s @@ -0,0 +1,6199 @@ +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX11,W32 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX11,W64 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-real-true16 %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,-real-true16 %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s + +v_add3_u32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x55,0xd6,0x01,0x05,0x0e,0x00] + +v_add3_u32 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x55,0xd6,0xff,0x05,0xa4,0x01] + +v_add3_u32 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x55,0xd6,0x01,0xfe,0xff,0x01] + +v_add3_u32 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x55,0xd6,0x69,0xd2,0xf8,0x01] + +v_add3_u32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x55,0xd6,0x6a,0xf6,0x0c,0x04] + +v_add3_u32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x55,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_add3_u32 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x55,0xd6,0x7b,0xfa,0xed,0x01] + +v_add3_u32 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x55,0xd6,0x7d,0xe0,0xf5,0x01] + +v_add3_u32 v5, exec_lo, -1, vcc_hi +// GFX11: encoding: [0x05,0x00,0x55,0xd6,0x7e,0x82,0xad,0x01] + +v_add3_u32 v5, exec_hi, null, vcc_lo +// GFX11: encoding: [0x05,0x00,0x55,0xd6,0x7f,0xf8,0xa8,0x01] + +v_add3_u32 v5, null, exec_lo, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x55,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_add3_u32 v5, -1, exec_hi, src_scc +// GFX11: encoding: [0x05,0x00,0x55,0xd6,0xc1,0xfe,0xf4,0x03] + +v_add3_u32 v5, 0.5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x55,0xd6,0xf0,0xfa,0xc0,0x03] + +v_add3_u32 v5, src_scc, vcc_lo, -1 +// GFX11: encoding: [0x05,0x00,0x55,0xd6,0xfd,0xd4,0x04,0x03] + +v_add3_u32 v255, 0xaf123456, vcc_hi, null +// GFX11: encoding: [0xff,0x00,0x55,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_add_co_u32 v5, s6, v1, v2 +// W32: encoding: [0x05,0x06,0x00,0xd7,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v5, s6, v255, v255 +// W32: encoding: [0x05,0x06,0x00,0xd7,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v5, s6, s1, s2 +// W32: encoding: [0x05,0x06,0x00,0xd7,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v5, s6, s105, s105 +// W32: encoding: [0x05,0x06,0x00,0xd7,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v5, s6, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x06,0x00,0xd7,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v5, s6, vcc_hi, 0xaf123456 +// W32: encoding: [0x05,0x06,0x00,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v5, s6, ttmp15, src_scc +// W32: encoding: [0x05,0x06,0x00,0xd7,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v5, s6, m0, 0.5 +// W32: encoding: [0x05,0x06,0x00,0xd7,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v5, s6, exec_lo, -1 +// W32: encoding: [0x05,0x06,0x00,0xd7,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v5, s6, exec_hi, null +// W32: encoding: [0x05,0x06,0x00,0xd7,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v5, s105, null, exec_lo +// W32: encoding: [0x05,0x69,0x00,0xd7,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v5, vcc_lo, -1, exec_hi +// W32: encoding: [0x05,0x6a,0x00,0xd7,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v5, vcc_hi, 0.5, m0 +// W32: encoding: [0x05,0x6b,0x00,0xd7,0xf0,0xfa,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v5, ttmp15, src_scc, vcc_lo +// W32: encoding: [0x05,0x7b,0x00,0xd7,0xfd,0xd4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v5, s[12:13], v1, v2 +// W64: encoding: [0x05,0x0c,0x00,0xd7,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v5, s[12:13], v255, v255 +// W64: encoding: [0x05,0x0c,0x00,0xd7,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v5, s[12:13], s1, s2 +// W64: encoding: [0x05,0x0c,0x00,0xd7,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v5, s[12:13], s105, s105 +// W64: encoding: [0x05,0x0c,0x00,0xd7,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v5, s[12:13], vcc_lo, ttmp15 +// W64: encoding: [0x05,0x0c,0x00,0xd7,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v5, s[12:13], vcc_hi, 0xaf123456 +// W64: encoding: [0x05,0x0c,0x00,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v5, s[12:13], ttmp15, src_scc +// W64: encoding: [0x05,0x0c,0x00,0xd7,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v5, s[12:13], m0, 0.5 +// W64: encoding: [0x05,0x0c,0x00,0xd7,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v5, s[12:13], exec_lo, -1 +// W64: encoding: [0x05,0x0c,0x00,0xd7,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v5, s[12:13], exec_hi, null +// W64: encoding: [0x05,0x0c,0x00,0xd7,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v5, s[12:13], null, exec_lo +// W64: encoding: [0x05,0x0c,0x00,0xd7,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v5, s[104:105], -1, exec_hi +// W64: encoding: [0x05,0x68,0x00,0xd7,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v5, vcc, 0.5, m0 +// W64: encoding: [0x05,0x6a,0x00,0xd7,0xf0,0xfa,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_u32 v5, ttmp[14:15], src_scc, vcc_lo +// W64: encoding: [0x05,0x7a,0x00,0xd7,0xfd,0xd4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v255, null, 0xaf123456, vcc_hi clamp +// GFX11: encoding: [0xff,0xfc,0x00,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_add_f64 v[5:6], v[1:2], v[2:3] +// GFX11: encoding: [0x05,0x00,0x27,0xd7,0x01,0x05,0x02,0x00] + +v_add_f64 v[5:6], v[254:255], v[254:255] +// GFX11: encoding: [0x05,0x00,0x27,0xd7,0xfe,0xfd,0x03,0x00] + +v_add_f64 v[5:6], s[2:3], s[4:5] +// GFX11: encoding: [0x05,0x00,0x27,0xd7,0x02,0x08,0x00,0x00] + +v_add_f64 v[5:6], s[104:105], s[104:105] +// GFX11: encoding: [0x05,0x00,0x27,0xd7,0x68,0xd0,0x00,0x00] + +v_add_f64 v[5:6], vcc, ttmp[14:15] +// GFX11: encoding: [0x05,0x00,0x27,0xd7,0x6a,0xf4,0x00,0x00] + +v_add_f64 v[5:6], ttmp[14:15], 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x27,0xd7,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_add_f64 v[5:6], -|exec|, src_scc +// GFX11: encoding: [0x05,0x01,0x27,0xd7,0x7e,0xfa,0x01,0x20] + +v_add_f64 v[5:6], null, 0.5 +// GFX11: encoding: [0x05,0x00,0x27,0xd7,0x7c,0xe0,0x01,0x00] + +v_add_f64 v[5:6], -1, -1 +// GFX11: encoding: [0x05,0x00,0x27,0xd7,0xc1,0x82,0x01,0x00] + +v_add_f64 v[5:6], 0.5, null mul:2 +// GFX11: encoding: [0x05,0x00,0x27,0xd7,0xf0,0xf8,0x00,0x08] + +v_add_f64 v[5:6], -|src_scc|, -|exec| mul:4 +// GFX11: encoding: [0x05,0x03,0x27,0xd7,0xfd,0xfc,0x00,0x70] + +v_add_f64 v[254:255], 0xaf123456, -|vcc| clamp div:2 +// GFX11: encoding: [0xfe,0x82,0x27,0xd7,0xff,0xd4,0x00,0x58,0x56,0x34,0x12,0xaf] + +v_add_lshl_u32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x47,0xd6,0x01,0x05,0x0e,0x00] + +v_add_lshl_u32 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x47,0xd6,0xff,0x05,0xa4,0x01] + +v_add_lshl_u32 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x47,0xd6,0x01,0xfe,0xff,0x01] + +v_add_lshl_u32 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x47,0xd6,0x69,0xd2,0xf8,0x01] + +v_add_lshl_u32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x47,0xd6,0x6a,0xf6,0x0c,0x04] + +v_add_lshl_u32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x47,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_add_lshl_u32 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x47,0xd6,0x7b,0xfa,0xed,0x01] + +v_add_lshl_u32 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x47,0xd6,0x7d,0xe0,0xf5,0x01] + +v_add_lshl_u32 v5, exec_lo, -1, vcc_hi +// GFX11: encoding: [0x05,0x00,0x47,0xd6,0x7e,0x82,0xad,0x01] + +v_add_lshl_u32 v5, exec_hi, null, vcc_lo +// GFX11: encoding: [0x05,0x00,0x47,0xd6,0x7f,0xf8,0xa8,0x01] + +v_add_lshl_u32 v5, null, exec_lo, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x47,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_add_lshl_u32 v5, -1, exec_hi, src_scc +// GFX11: encoding: [0x05,0x00,0x47,0xd6,0xc1,0xfe,0xf4,0x03] + +v_add_lshl_u32 v5, 0.5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x47,0xd6,0xf0,0xfa,0xc0,0x03] + +v_add_lshl_u32 v5, src_scc, vcc_lo, -1 +// GFX11: encoding: [0x05,0x00,0x47,0xd6,0xfd,0xd4,0x04,0x03] + +v_add_lshl_u32 v255, 0xaf123456, vcc_hi, null +// GFX11: encoding: [0xff,0x00,0x47,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_add_nc_i16 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x0d,0xd7,0x01,0x05,0x02,0x00] + +v_add_nc_i16 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x0d,0xd7,0xff,0xff,0x03,0x00] + +v_add_nc_i16 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x0d,0xd7,0x01,0x04,0x00,0x00] + +v_add_nc_i16 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x0d,0xd7,0x69,0xd2,0x00,0x00] + +v_add_nc_i16 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x0d,0xd7,0x6a,0xf6,0x00,0x00] + +v_add_nc_i16 v5, vcc_hi, 0xfe0b +// GFX11: encoding: [0x05,0x00,0x0d,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_add_nc_i16 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x0d,0xd7,0x7b,0xfa,0x01,0x00] + +v_add_nc_i16 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x0d,0xd7,0x7d,0xe0,0x01,0x00] + +v_add_nc_i16 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x0d,0xd7,0x7e,0x82,0x01,0x00] + +v_add_nc_i16 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x0d,0xd7,0x7f,0xf8,0x00,0x00] + +v_add_nc_i16 v5, null, exec_lo op_sel:[1,1,1] +// GFX11: encoding: [0x05,0x58,0x0d,0xd7,0x7c,0xfc,0x00,0x00] + +v_add_nc_i16 v5, -1, exec_hi op_sel:[0,0,0] +// GFX11: encoding: [0x05,0x00,0x0d,0xd7,0xc1,0xfe,0x00,0x00] + +v_add_nc_i16 v5, 0.5, m0 op_sel:[1,0,0] +// GFX11: encoding: [0x05,0x08,0x0d,0xd7,0xf0,0xfa,0x00,0x00] + +v_add_nc_i16 v5, src_scc, vcc_lo op_sel:[0,1,0] +// GFX11: encoding: [0x05,0x10,0x0d,0xd7,0xfd,0xd4,0x00,0x00] + +v_add_nc_i16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp +// GFX11: encoding: [0xff,0xc0,0x0d,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_add_nc_i32 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x26,0xd7,0x01,0x05,0x02,0x00] + +v_add_nc_i32 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x26,0xd7,0xff,0xff,0x03,0x00] + +v_add_nc_i32 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x26,0xd7,0x01,0x04,0x00,0x00] + +v_add_nc_i32 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x26,0xd7,0x69,0xd2,0x00,0x00] + +v_add_nc_i32 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x26,0xd7,0x6a,0xf6,0x00,0x00] + +v_add_nc_i32 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x26,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_add_nc_i32 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x26,0xd7,0x7b,0xfa,0x01,0x00] + +v_add_nc_i32 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x26,0xd7,0x7d,0xe0,0x01,0x00] + +v_add_nc_i32 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x26,0xd7,0x7e,0x82,0x01,0x00] + +v_add_nc_i32 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x26,0xd7,0x7f,0xf8,0x00,0x00] + +v_add_nc_i32 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x26,0xd7,0x7c,0xfc,0x00,0x00] + +v_add_nc_i32 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x26,0xd7,0xc1,0xfe,0x00,0x00] + +v_add_nc_i32 v5, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x26,0xd7,0xf0,0xfa,0x00,0x00] + +v_add_nc_i32 v5, src_scc, vcc_lo +// GFX11: encoding: [0x05,0x00,0x26,0xd7,0xfd,0xd4,0x00,0x00] + +v_add_nc_i32 v255, 0xaf123456, vcc_hi clamp +// GFX11: encoding: [0xff,0x80,0x26,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_add_nc_u16 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x03,0xd7,0x01,0x05,0x02,0x00] + +v_add_nc_u16 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x03,0xd7,0xff,0xff,0x03,0x00] + +v_add_nc_u16 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x03,0xd7,0x01,0x04,0x00,0x00] + +v_add_nc_u16 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x03,0xd7,0x69,0xd2,0x00,0x00] + +v_add_nc_u16 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x03,0xd7,0x6a,0xf6,0x00,0x00] + +v_add_nc_u16 v5, vcc_hi, 0xfe0b +// GFX11: encoding: [0x05,0x00,0x03,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_add_nc_u16 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x03,0xd7,0x7b,0xfa,0x01,0x00] + +v_add_nc_u16 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x03,0xd7,0x7d,0xe0,0x01,0x00] + +v_add_nc_u16 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x03,0xd7,0x7e,0x82,0x01,0x00] + +v_add_nc_u16 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x03,0xd7,0x7f,0xf8,0x00,0x00] + +v_add_nc_u16 v5, null, exec_lo op_sel:[1,1,1] +// GFX11: encoding: [0x05,0x58,0x03,0xd7,0x7c,0xfc,0x00,0x00] + +v_add_nc_u16 v5, -1, exec_hi op_sel:[0,0,0] +// GFX11: encoding: [0x05,0x00,0x03,0xd7,0xc1,0xfe,0x00,0x00] + +v_add_nc_u16 v5, 0.5, m0 op_sel:[1,0,0] +// GFX11: encoding: [0x05,0x08,0x03,0xd7,0xf0,0xfa,0x00,0x00] + +v_add_nc_u16 v5, src_scc, vcc_lo op_sel:[0,1,0] +// GFX11: encoding: [0x05,0x10,0x03,0xd7,0xfd,0xd4,0x00,0x00] + +v_add_nc_u16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp +// GFX11: encoding: [0xff,0xc0,0x03,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_alignbit_b32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x16,0xd6,0x01,0x05,0x0e,0x00] + +v_alignbit_b32 v5, v255, s2, s3 +// GFX11: encoding: [0x05,0x00,0x16,0xd6,0xff,0x05,0x0c,0x00] + +v_alignbit_b32 v5, s1, v255, s3 +// GFX11: encoding: [0x05,0x00,0x16,0xd6,0x01,0xfe,0x0f,0x00] + +v_alignbit_b32 v5, s105, s105, s105 +// GFX11: encoding: [0x05,0x00,0x16,0xd6,0x69,0xd2,0xa4,0x01] + +v_alignbit_b32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x16,0xd6,0x6a,0xf6,0x0c,0x04] + +v_alignbit_b32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x16,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_alignbit_b32 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x16,0xd6,0x7b,0xfa,0xed,0x01] + +v_alignbit_b32 v5, m0, 0.5, exec_lo +// GFX11: encoding: [0x05,0x00,0x16,0xd6,0x7d,0xe0,0xf9,0x01] + +v_alignbit_b32 v5, exec_lo, -1, m0 +// GFX11: encoding: [0x05,0x00,0x16,0xd6,0x7e,0x82,0xf5,0x01] + +v_alignbit_b32 v5, exec_hi, null, vcc_hi +// GFX11: encoding: [0x05,0x00,0x16,0xd6,0x7f,0xf8,0xac,0x01] + +v_alignbit_b32 v5, null, exec_lo, vcc_lo +// GFX11: encoding: [0x05,0x00,0x16,0xd6,0x7c,0xfc,0xa8,0x01] + +v_alignbit_b32 v5, -1, exec_hi, src_scc +// GFX11: encoding: [0x05,0x00,0x16,0xd6,0xc1,0xfe,0xf4,0x03] + +v_alignbit_b32 v5, 0.5, m0, exec_hi +// GFX11: encoding: [0x05,0x00,0x16,0xd6,0xf0,0xfa,0xfc,0x01] + +v_alignbit_b32 v5, src_scc, vcc_lo, -1 +// GFX11: encoding: [0x05,0x00,0x16,0xd6,0xfd,0xd4,0x04,0x03] + +v_alignbit_b32 v255, 0xaf123456, vcc_hi, null +// GFX11: encoding: [0xff,0x00,0x16,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_alignbyte_b32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x17,0xd6,0x01,0x05,0x0e,0x00] + +v_alignbyte_b32 v5, v255, s2, s3 +// GFX11: encoding: [0x05,0x00,0x17,0xd6,0xff,0x05,0x0c,0x00] + +v_alignbyte_b32 v5, s1, v255, s3 +// GFX11: encoding: [0x05,0x00,0x17,0xd6,0x01,0xfe,0x0f,0x00] + +v_alignbyte_b32 v5, s105, s105, s105 +// GFX11: encoding: [0x05,0x00,0x17,0xd6,0x69,0xd2,0xa4,0x01] + +v_alignbyte_b32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x17,0xd6,0x6a,0xf6,0x0c,0x04] + +v_alignbyte_b32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x17,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_alignbyte_b32 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x17,0xd6,0x7b,0xfa,0xed,0x01] + +v_alignbyte_b32 v5, m0, 0.5, exec_lo +// GFX11: encoding: [0x05,0x00,0x17,0xd6,0x7d,0xe0,0xf9,0x01] + +v_alignbyte_b32 v5, exec_lo, -1, m0 +// GFX11: encoding: [0x05,0x00,0x17,0xd6,0x7e,0x82,0xf5,0x01] + +v_alignbyte_b32 v5, exec_hi, null, vcc_hi +// GFX11: encoding: [0x05,0x00,0x17,0xd6,0x7f,0xf8,0xac,0x01] + +v_alignbyte_b32 v5, null, exec_lo, vcc_lo +// GFX11: encoding: [0x05,0x00,0x17,0xd6,0x7c,0xfc,0xa8,0x01] + +v_alignbyte_b32 v5, -1, exec_hi, src_scc +// GFX11: encoding: [0x05,0x00,0x17,0xd6,0xc1,0xfe,0xf4,0x03] + +v_alignbyte_b32 v5, 0.5, m0, exec_hi +// GFX11: encoding: [0x05,0x00,0x17,0xd6,0xf0,0xfa,0xfc,0x01] + +v_alignbyte_b32 v5, src_scc, vcc_lo, -1 +// GFX11: encoding: [0x05,0x00,0x17,0xd6,0xfd,0xd4,0x04,0x03] + +v_alignbyte_b32 v255, 0xaf123456, vcc_hi, null +// GFX11: encoding: [0xff,0x00,0x17,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_and_b16 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x62,0xd7,0x01,0x05,0x02,0x00] + +v_and_b16 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x62,0xd7,0xff,0xff,0x03,0x00] + +v_and_b16 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x62,0xd7,0x01,0x04,0x00,0x00] + +v_and_b16 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x62,0xd7,0x69,0xd2,0x00,0x00] + +v_and_b16 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x62,0xd7,0x6a,0xf6,0x00,0x00] + +v_and_b16 v5, vcc_hi, 0xfe0b +// GFX11: encoding: [0x05,0x00,0x62,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_and_b16 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x62,0xd7,0x7b,0xfa,0x01,0x00] + +v_and_b16 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x62,0xd7,0x7d,0xe0,0x01,0x00] + +v_and_b16 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x62,0xd7,0x7e,0x82,0x01,0x00] + +v_and_b16 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x62,0xd7,0x7f,0xf8,0x00,0x00] + +v_and_b16 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x62,0xd7,0x7c,0xfc,0x00,0x00] + +v_and_b16 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x62,0xd7,0xc1,0xfe,0x00,0x00] + +v_and_b16 v5, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x62,0xd7,0xf0,0xfa,0x00,0x00] + +v_and_b16 v5, src_scc, vcc_lo +// GFX11: encoding: [0x05,0x00,0x62,0xd7,0xfd,0xd4,0x00,0x00] + +v_and_b16 v255, 0xfe0b, vcc_hi +// GFX11: encoding: [0xff,0x00,0x62,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_and_or_b32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x57,0xd6,0x01,0x05,0x0e,0x00] + +v_and_or_b32 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x57,0xd6,0xff,0x05,0xa4,0x01] + +v_and_or_b32 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x57,0xd6,0x01,0xfe,0xff,0x01] + +v_and_or_b32 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x57,0xd6,0x69,0xd2,0xf8,0x01] + +v_and_or_b32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x57,0xd6,0x6a,0xf6,0x0c,0x04] + +v_and_or_b32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x57,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_and_or_b32 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x57,0xd6,0x7b,0xfa,0xed,0x01] + +v_and_or_b32 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x57,0xd6,0x7d,0xe0,0xf5,0x01] + +v_and_or_b32 v5, exec_lo, -1, vcc_hi +// GFX11: encoding: [0x05,0x00,0x57,0xd6,0x7e,0x82,0xad,0x01] + +v_and_or_b32 v5, exec_hi, null, vcc_lo +// GFX11: encoding: [0x05,0x00,0x57,0xd6,0x7f,0xf8,0xa8,0x01] + +v_and_or_b32 v5, null, exec_lo, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x57,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_and_or_b32 v5, -1, exec_hi, src_scc +// GFX11: encoding: [0x05,0x00,0x57,0xd6,0xc1,0xfe,0xf4,0x03] + +v_and_or_b32 v5, 0.5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x57,0xd6,0xf0,0xfa,0xc0,0x03] + +v_and_or_b32 v5, src_scc, vcc_lo, -1 +// GFX11: encoding: [0x05,0x00,0x57,0xd6,0xfd,0xd4,0x04,0x03] + +v_and_or_b32 v255, 0xaf123456, vcc_hi, null +// GFX11: encoding: [0xff,0x00,0x57,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_ashrrev_i16 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x3a,0xd7,0x01,0x05,0x02,0x00] + +v_ashrrev_i16 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x3a,0xd7,0xff,0xff,0x03,0x00] + +v_ashrrev_i16 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x3a,0xd7,0x01,0x04,0x00,0x00] + +v_ashrrev_i16 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x3a,0xd7,0x69,0xd2,0x00,0x00] + +v_ashrrev_i16 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x3a,0xd7,0x6a,0xf6,0x00,0x00] + +v_ashrrev_i16 v5, vcc_hi, 0xfe0b +// GFX11: encoding: [0x05,0x00,0x3a,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_ashrrev_i16 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x3a,0xd7,0x7b,0xfa,0x01,0x00] + +v_ashrrev_i16 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x3a,0xd7,0x7d,0xe0,0x01,0x00] + +v_ashrrev_i16 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x3a,0xd7,0x7e,0x82,0x01,0x00] + +v_ashrrev_i16 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x3a,0xd7,0x7f,0xf8,0x00,0x00] + +v_ashrrev_i16 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x3a,0xd7,0x7c,0xfc,0x00,0x00] + +v_ashrrev_i16 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x3a,0xd7,0xc1,0xfe,0x00,0x00] + +v_ashrrev_i16 v5, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x3a,0xd7,0xf0,0xfa,0x00,0x00] + +v_ashrrev_i16 v5, src_scc, vcc_lo +// GFX11: encoding: [0x05,0x00,0x3a,0xd7,0xfd,0xd4,0x00,0x00] + +v_ashrrev_i16 v255, 0xfe0b, vcc_hi +// GFX11: encoding: [0xff,0x00,0x3a,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_ashrrev_i64 v[5:6], v1, vcc +// GFX11: encoding: [0x05,0x00,0x3e,0xd7,0x01,0xd5,0x00,0x00] + +v_ashrrev_i64 v[5:6], v255, exec +// GFX11: encoding: [0x05,0x00,0x3e,0xd7,0xff,0xfd,0x00,0x00] + +v_ashrrev_i64 v[5:6], exec_lo, v[2:3] +// GFX11: encoding: [0x05,0x00,0x3e,0xd7,0x7e,0x04,0x02,0x00] + +v_ashrrev_i64 v[5:6], exec_hi, v[254:255] +// GFX11: encoding: [0x05,0x00,0x3e,0xd7,0x7f,0xfc,0x03,0x00] + +v_ashrrev_i64 v[5:6], null, null +// GFX11: encoding: [0x05,0x00,0x3e,0xd7,0x7c,0xf8,0x00,0x00] + +v_ashrrev_i64 v[5:6], -1, -1 +// GFX11: encoding: [0x05,0x00,0x3e,0xd7,0xc1,0x82,0x01,0x00] + +v_ashrrev_i64 v[5:6], 0.5, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x3e,0xd7,0xf0,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_ashrrev_i64 v[5:6], src_scc, src_scc +// GFX11: encoding: [0x05,0x00,0x3e,0xd7,0xfd,0xfa,0x01,0x00] + +v_ashrrev_i64 v[254:255], 0xaf123456, 0.5 +// GFX11: encoding: [0xfe,0x00,0x3e,0xd7,0xff,0xe0,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_bcnt_u32_b32 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x1e,0xd7,0x01,0x05,0x02,0x00] + +v_bcnt_u32_b32 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x1e,0xd7,0xff,0xff,0x03,0x00] + +v_bcnt_u32_b32 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x1e,0xd7,0x01,0x04,0x00,0x00] + +v_bcnt_u32_b32 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x1e,0xd7,0x69,0xd2,0x00,0x00] + +v_bcnt_u32_b32 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x1e,0xd7,0x6a,0xf6,0x00,0x00] + +v_bcnt_u32_b32 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x1e,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_bcnt_u32_b32 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x1e,0xd7,0x7b,0xfa,0x01,0x00] + +v_bcnt_u32_b32 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x1e,0xd7,0x7d,0xe0,0x01,0x00] + +v_bcnt_u32_b32 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x1e,0xd7,0x7e,0x82,0x01,0x00] + +v_bcnt_u32_b32 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x1e,0xd7,0x7f,0xf8,0x00,0x00] + +v_bcnt_u32_b32 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x1e,0xd7,0x7c,0xfc,0x00,0x00] + +v_bcnt_u32_b32 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x1e,0xd7,0xc1,0xfe,0x00,0x00] + +v_bcnt_u32_b32 v5, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x1e,0xd7,0xf0,0xfa,0x00,0x00] + +v_bcnt_u32_b32 v5, src_scc, vcc_lo +// GFX11: encoding: [0x05,0x00,0x1e,0xd7,0xfd,0xd4,0x00,0x00] + +v_bcnt_u32_b32 v255, 0xaf123456, vcc_hi +// GFX11: encoding: [0xff,0x00,0x1e,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_bfe_i32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x11,0xd6,0x01,0x05,0x0e,0x00] + +v_bfe_i32 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x11,0xd6,0xff,0x05,0xa4,0x01] + +v_bfe_i32 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x11,0xd6,0x01,0xfe,0xff,0x01] + +v_bfe_i32 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x11,0xd6,0x69,0xd2,0xf8,0x01] + +v_bfe_i32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x11,0xd6,0x6a,0xf6,0x0c,0x04] + +v_bfe_i32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x11,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_bfe_i32 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x11,0xd6,0x7b,0xfa,0xed,0x01] + +v_bfe_i32 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x11,0xd6,0x7d,0xe0,0xf5,0x01] + +v_bfe_i32 v5, exec_lo, -1, vcc_hi +// GFX11: encoding: [0x05,0x00,0x11,0xd6,0x7e,0x82,0xad,0x01] + +v_bfe_i32 v5, exec_hi, null, vcc_lo +// GFX11: encoding: [0x05,0x00,0x11,0xd6,0x7f,0xf8,0xa8,0x01] + +v_bfe_i32 v5, null, exec_lo, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x11,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_bfe_i32 v5, -1, exec_hi, src_scc +// GFX11: encoding: [0x05,0x00,0x11,0xd6,0xc1,0xfe,0xf4,0x03] + +v_bfe_i32 v5, 0.5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x11,0xd6,0xf0,0xfa,0xc0,0x03] + +v_bfe_i32 v5, src_scc, vcc_lo, -1 +// GFX11: encoding: [0x05,0x00,0x11,0xd6,0xfd,0xd4,0x04,0x03] + +v_bfe_i32 v255, 0xaf123456, vcc_hi, null +// GFX11: encoding: [0xff,0x00,0x11,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_bfe_u32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x10,0xd6,0x01,0x05,0x0e,0x00] + +v_bfe_u32 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x10,0xd6,0xff,0x05,0xa4,0x01] + +v_bfe_u32 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x10,0xd6,0x01,0xfe,0xff,0x01] + +v_bfe_u32 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x10,0xd6,0x69,0xd2,0xf8,0x01] + +v_bfe_u32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x10,0xd6,0x6a,0xf6,0x0c,0x04] + +v_bfe_u32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x10,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_bfe_u32 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x10,0xd6,0x7b,0xfa,0xed,0x01] + +v_bfe_u32 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x10,0xd6,0x7d,0xe0,0xf5,0x01] + +v_bfe_u32 v5, exec_lo, -1, vcc_hi +// GFX11: encoding: [0x05,0x00,0x10,0xd6,0x7e,0x82,0xad,0x01] + +v_bfe_u32 v5, exec_hi, null, vcc_lo +// GFX11: encoding: [0x05,0x00,0x10,0xd6,0x7f,0xf8,0xa8,0x01] + +v_bfe_u32 v5, null, exec_lo, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x10,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_bfe_u32 v5, -1, exec_hi, src_scc +// GFX11: encoding: [0x05,0x00,0x10,0xd6,0xc1,0xfe,0xf4,0x03] + +v_bfe_u32 v5, 0.5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x10,0xd6,0xf0,0xfa,0xc0,0x03] + +v_bfe_u32 v5, src_scc, vcc_lo, -1 +// GFX11: encoding: [0x05,0x00,0x10,0xd6,0xfd,0xd4,0x04,0x03] + +v_bfe_u32 v255, 0xaf123456, vcc_hi, null +// GFX11: encoding: [0xff,0x00,0x10,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_bfi_b32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x12,0xd6,0x01,0x05,0x0e,0x00] + +v_bfi_b32 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x12,0xd6,0xff,0x05,0xa4,0x01] + +v_bfi_b32 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x12,0xd6,0x01,0xfe,0xff,0x01] + +v_bfi_b32 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x12,0xd6,0x69,0xd2,0xf8,0x01] + +v_bfi_b32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x12,0xd6,0x6a,0xf6,0x0c,0x04] + +v_bfi_b32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x12,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_bfi_b32 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x12,0xd6,0x7b,0xfa,0xed,0x01] + +v_bfi_b32 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x12,0xd6,0x7d,0xe0,0xf5,0x01] + +v_bfi_b32 v5, exec_lo, -1, vcc_hi +// GFX11: encoding: [0x05,0x00,0x12,0xd6,0x7e,0x82,0xad,0x01] + +v_bfi_b32 v5, exec_hi, null, vcc_lo +// GFX11: encoding: [0x05,0x00,0x12,0xd6,0x7f,0xf8,0xa8,0x01] + +v_bfi_b32 v5, null, exec_lo, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x12,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_bfi_b32 v5, -1, exec_hi, src_scc +// GFX11: encoding: [0x05,0x00,0x12,0xd6,0xc1,0xfe,0xf4,0x03] + +v_bfi_b32 v5, 0.5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x12,0xd6,0xf0,0xfa,0xc0,0x03] + +v_bfi_b32 v5, src_scc, vcc_lo, -1 +// GFX11: encoding: [0x05,0x00,0x12,0xd6,0xfd,0xd4,0x04,0x03] + +v_bfi_b32 v255, 0xaf123456, vcc_hi, null +// GFX11: encoding: [0xff,0x00,0x12,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_bfm_b32 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x1d,0xd7,0x01,0x05,0x02,0x00] + +v_bfm_b32 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x1d,0xd7,0xff,0xff,0x03,0x00] + +v_bfm_b32 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x1d,0xd7,0x01,0x04,0x00,0x00] + +v_bfm_b32 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x1d,0xd7,0x69,0xd2,0x00,0x00] + +v_bfm_b32 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x1d,0xd7,0x6a,0xf6,0x00,0x00] + +v_bfm_b32 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x1d,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_bfm_b32 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x1d,0xd7,0x7b,0xfa,0x01,0x00] + +v_bfm_b32 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x1d,0xd7,0x7d,0xe0,0x01,0x00] + +v_bfm_b32 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x1d,0xd7,0x7e,0x82,0x01,0x00] + +v_bfm_b32 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x1d,0xd7,0x7f,0xf8,0x00,0x00] + +v_bfm_b32 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x1d,0xd7,0x7c,0xfc,0x00,0x00] + +v_bfm_b32 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x1d,0xd7,0xc1,0xfe,0x00,0x00] + +v_bfm_b32 v5, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x1d,0xd7,0xf0,0xfa,0x00,0x00] + +v_bfm_b32 v5, src_scc, vcc_lo +// GFX11: encoding: [0x05,0x00,0x1d,0xd7,0xfd,0xd4,0x00,0x00] + +v_bfm_b32 v255, 0xaf123456, vcc_hi +// GFX11: encoding: [0xff,0x00,0x1d,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_cndmask_b16 v5, v1, src_scc, s3 +// W32: encoding: [0x05,0x00,0x5d,0xd6,0x01,0xfb,0x0d,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16 v5, v255, 0.5, s3 +// W32: encoding: [0x05,0x00,0x5d,0xd6,0xff,0xe1,0x0d,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16 v5, s105, s105, s3 +// W32: encoding: [0x05,0x00,0x5d,0xd6,0x69,0xd2,0x0c,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16 v5, vcc_hi, v2, s3 +// W32: encoding: [0x05,0x00,0x5d,0xd6,0x6b,0x04,0x0e,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16 v5, ttmp15, ttmp15, s3 +// W32: encoding: [0x05,0x00,0x5d,0xd6,0x7b,0xf6,0x0c,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16 v5, m0, v255, s3 +// W32: encoding: [0x05,0x00,0x5d,0xd6,0x7d,0xfe,0x0f,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16 v5, exec_lo, exec_lo, s3 +// W32: encoding: [0x05,0x00,0x5d,0xd6,0x7e,0xfc,0x0c,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16 v5, exec_hi, exec_hi, s3 +// W32: encoding: [0x05,0x00,0x5d,0xd6,0x7f,0xfe,0x0c,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16 v5, null, m0, s105 +// W32: encoding: [0x05,0x00,0x5d,0xd6,0x7c,0xfa,0xa4,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16 v5, -1, -|vcc_lo|, vcc_lo +// W32: encoding: [0x05,0x02,0x5d,0xd6,0xc1,0xd4,0xa8,0x41] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16 v5, 0.5, -1, vcc_hi +// W32: encoding: [0x05,0x00,0x5d,0xd6,0xf0,0x82,0xad,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16 v5, -|src_scc|, null, ttmp15 +// W32: encoding: [0x05,0x01,0x5d,0xd6,0xfd,0xf8,0xec,0x21] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16 v5, v1, src_scc, s[6:7] +// W64: encoding: [0x05,0x00,0x5d,0xd6,0x01,0xfb,0x19,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16 v5, v255, 0.5, s[6:7] +// W64: encoding: [0x05,0x00,0x5d,0xd6,0xff,0xe1,0x19,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16 v5, s105, s105, s[6:7] +// W64: encoding: [0x05,0x00,0x5d,0xd6,0x69,0xd2,0x18,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16 v5, vcc_hi, v2, s[6:7] +// W64: encoding: [0x05,0x00,0x5d,0xd6,0x6b,0x04,0x1a,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16 v5, ttmp15, ttmp15, s[6:7] +// W64: encoding: [0x05,0x00,0x5d,0xd6,0x7b,0xf6,0x18,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16 v5, m0, v255, s[6:7] +// W64: encoding: [0x05,0x00,0x5d,0xd6,0x7d,0xfe,0x1b,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16 v5, exec_lo, exec_lo, s[6:7] +// W64: encoding: [0x05,0x00,0x5d,0xd6,0x7e,0xfc,0x18,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16 v5, exec_hi, exec_hi, s[6:7] +// W64: encoding: [0x05,0x00,0x5d,0xd6,0x7f,0xfe,0x18,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16 v5, null, m0, s[6:7] +// W64: encoding: [0x05,0x00,0x5d,0xd6,0x7c,0xfa,0x18,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16 v5, -1, -|vcc_lo|, s[104:105] +// W64: encoding: [0x05,0x02,0x5d,0xd6,0xc1,0xd4,0xa0,0x41] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16 v5, 0.5, -1, vcc +// W64: encoding: [0x05,0x00,0x5d,0xd6,0xf0,0x82,0xa9,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16 v5, -|src_scc|, null, ttmp[14:15] +// W64: encoding: [0x05,0x01,0x5d,0xd6,0xfd,0xf8,0xe8,0x21] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16 v255, -|0xfe0b|, -|vcc_hi|, null +// GFX11: encoding: [0xff,0x03,0x5d,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] + +v_cubeid_f32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x0c,0xd6,0x01,0x05,0x0e,0x00] + +v_cubeid_f32 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x0c,0xd6,0xff,0x05,0xa4,0x01] + +v_cubeid_f32 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x0c,0xd6,0x01,0xfe,0xff,0x01] + +v_cubeid_f32 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x0c,0xd6,0x69,0xd2,0xf8,0x01] + +v_cubeid_f32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x0c,0xd6,0x6a,0xf6,0x0c,0x04] + +v_cubeid_f32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x0c,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_cubeid_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX11: encoding: [0x05,0x07,0x0c,0xd6,0x7b,0xfa,0xed,0xe1] + +v_cubeid_f32 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x0c,0xd6,0x7d,0xe0,0xf5,0x01] + +v_cubeid_f32 v5, |exec_lo|, -1, vcc_hi +// GFX11: encoding: [0x05,0x01,0x0c,0xd6,0x7e,0x82,0xad,0x01] + +v_cubeid_f32 v5, -|exec_hi|, null, -|vcc_lo| +// GFX11: encoding: [0x05,0x05,0x0c,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_cubeid_f32 v5, null, exec_lo, -|0xaf123456| +// GFX11: encoding: [0x05,0x04,0x0c,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] + +v_cubeid_f32 v5, -1, -|exec_hi|, -|src_scc| +// GFX11: encoding: [0x05,0x06,0x0c,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_cubeid_f32 v5, 0.5, -m0, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0x0c,0xd6,0xf0,0xfa,0xc0,0x4b] + +v_cubeid_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 +// GFX11: encoding: [0x05,0x02,0x0c,0xd6,0xfd,0xd4,0x04,0x33] + +v_cubeid_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 +// GFX11: encoding: [0xff,0x83,0x0c,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] + +v_cubema_f32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x0f,0xd6,0x01,0x05,0x0e,0x00] + +v_cubema_f32 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x0f,0xd6,0xff,0x05,0xa4,0x01] + +v_cubema_f32 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x0f,0xd6,0x01,0xfe,0xff,0x01] + +v_cubema_f32 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x0f,0xd6,0x69,0xd2,0xf8,0x01] + +v_cubema_f32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x0f,0xd6,0x6a,0xf6,0x0c,0x04] + +v_cubema_f32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x0f,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_cubema_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX11: encoding: [0x05,0x07,0x0f,0xd6,0x7b,0xfa,0xed,0xe1] + +v_cubema_f32 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x0f,0xd6,0x7d,0xe0,0xf5,0x01] + +v_cubema_f32 v5, |exec_lo|, -1, vcc_hi +// GFX11: encoding: [0x05,0x01,0x0f,0xd6,0x7e,0x82,0xad,0x01] + +v_cubema_f32 v5, -|exec_hi|, null, -|vcc_lo| +// GFX11: encoding: [0x05,0x05,0x0f,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_cubema_f32 v5, null, exec_lo, -|0xaf123456| +// GFX11: encoding: [0x05,0x04,0x0f,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] + +v_cubema_f32 v5, -1, -|exec_hi|, -|src_scc| +// GFX11: encoding: [0x05,0x06,0x0f,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_cubema_f32 v5, 0.5, -m0, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0x0f,0xd6,0xf0,0xfa,0xc0,0x4b] + +v_cubema_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 +// GFX11: encoding: [0x05,0x02,0x0f,0xd6,0xfd,0xd4,0x04,0x33] + +v_cubema_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 +// GFX11: encoding: [0xff,0x83,0x0f,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] + +v_cubesc_f32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x0d,0xd6,0x01,0x05,0x0e,0x00] + +v_cubesc_f32 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x0d,0xd6,0xff,0x05,0xa4,0x01] + +v_cubesc_f32 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x0d,0xd6,0x01,0xfe,0xff,0x01] + +v_cubesc_f32 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x0d,0xd6,0x69,0xd2,0xf8,0x01] + +v_cubesc_f32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x0d,0xd6,0x6a,0xf6,0x0c,0x04] + +v_cubesc_f32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x0d,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_cubesc_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX11: encoding: [0x05,0x07,0x0d,0xd6,0x7b,0xfa,0xed,0xe1] + +v_cubesc_f32 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x0d,0xd6,0x7d,0xe0,0xf5,0x01] + +v_cubesc_f32 v5, |exec_lo|, -1, vcc_hi +// GFX11: encoding: [0x05,0x01,0x0d,0xd6,0x7e,0x82,0xad,0x01] + +v_cubesc_f32 v5, -|exec_hi|, null, -|vcc_lo| +// GFX11: encoding: [0x05,0x05,0x0d,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_cubesc_f32 v5, null, exec_lo, -|0xaf123456| +// GFX11: encoding: [0x05,0x04,0x0d,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] + +v_cubesc_f32 v5, -1, -|exec_hi|, -|src_scc| +// GFX11: encoding: [0x05,0x06,0x0d,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_cubesc_f32 v5, 0.5, -m0, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0x0d,0xd6,0xf0,0xfa,0xc0,0x4b] + +v_cubesc_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 +// GFX11: encoding: [0x05,0x02,0x0d,0xd6,0xfd,0xd4,0x04,0x33] + +v_cubesc_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 +// GFX11: encoding: [0xff,0x83,0x0d,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] + +v_cubetc_f32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x0e,0xd6,0x01,0x05,0x0e,0x00] + +v_cubetc_f32 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x0e,0xd6,0xff,0x05,0xa4,0x01] + +v_cubetc_f32 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x0e,0xd6,0x01,0xfe,0xff,0x01] + +v_cubetc_f32 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x0e,0xd6,0x69,0xd2,0xf8,0x01] + +v_cubetc_f32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x0e,0xd6,0x6a,0xf6,0x0c,0x04] + +v_cubetc_f32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x0e,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_cubetc_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX11: encoding: [0x05,0x07,0x0e,0xd6,0x7b,0xfa,0xed,0xe1] + +v_cubetc_f32 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x0e,0xd6,0x7d,0xe0,0xf5,0x01] + +v_cubetc_f32 v5, |exec_lo|, -1, vcc_hi +// GFX11: encoding: [0x05,0x01,0x0e,0xd6,0x7e,0x82,0xad,0x01] + +v_cubetc_f32 v5, -|exec_hi|, null, -|vcc_lo| +// GFX11: encoding: [0x05,0x05,0x0e,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_cubetc_f32 v5, null, exec_lo, -|0xaf123456| +// GFX11: encoding: [0x05,0x04,0x0e,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] + +v_cubetc_f32 v5, -1, -|exec_hi|, -|src_scc| +// GFX11: encoding: [0x05,0x06,0x0e,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_cubetc_f32 v5, 0.5, -m0, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0x0e,0xd6,0xf0,0xfa,0xc0,0x4b] + +v_cubetc_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 +// GFX11: encoding: [0x05,0x02,0x0e,0xd6,0xfd,0xd4,0x04,0x33] + +v_cubetc_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 +// GFX11: encoding: [0xff,0x83,0x0e,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] + +v_cvt_pk_i16_f32 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x06,0xd7,0x01,0x05,0x02,0x00] + +v_cvt_pk_i16_f32 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x06,0xd7,0xff,0xff,0x03,0x00] + +v_cvt_pk_i16_f32 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x06,0xd7,0x01,0x04,0x00,0x00] + +v_cvt_pk_i16_f32 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x06,0xd7,0x69,0xd2,0x00,0x00] + +v_cvt_pk_i16_f32 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x06,0xd7,0x6a,0xf6,0x00,0x00] + +v_cvt_pk_i16_f32 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x06,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_cvt_pk_i16_f32 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x06,0xd7,0x7b,0xfa,0x01,0x00] + +v_cvt_pk_i16_f32 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x06,0xd7,0x7d,0xe0,0x01,0x00] + +v_cvt_pk_i16_f32 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x06,0xd7,0x7e,0x82,0x01,0x00] + +v_cvt_pk_i16_f32 v5, |exec_hi|, null +// GFX11: encoding: [0x05,0x01,0x06,0xd7,0x7f,0xf8,0x00,0x00] + +v_cvt_pk_i16_f32 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x06,0xd7,0x7c,0xfc,0x00,0x00] + +v_cvt_pk_i16_f32 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x06,0xd7,0xc1,0xfe,0x00,0x00] + +v_cvt_pk_i16_f32 v5, 0.5, -m0 +// GFX11: encoding: [0x05,0x00,0x06,0xd7,0xf0,0xfa,0x00,0x40] + +v_cvt_pk_i16_f32 v5, -src_scc, |vcc_lo| +// GFX11: encoding: [0x05,0x02,0x06,0xd7,0xfd,0xd4,0x00,0x20] + +v_cvt_pk_i16_f32 v255, -|0xaf123456|, -|vcc_hi| +// GFX11: encoding: [0xff,0x03,0x06,0xd7,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] + +v_cvt_pk_i16_i32 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x24,0xd7,0x01,0x05,0x02,0x00] + +v_cvt_pk_i16_i32 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x24,0xd7,0xff,0xff,0x03,0x00] + +v_cvt_pk_i16_i32 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x24,0xd7,0x01,0x04,0x00,0x00] + +v_cvt_pk_i16_i32 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x24,0xd7,0x69,0xd2,0x00,0x00] + +v_cvt_pk_i16_i32 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x24,0xd7,0x6a,0xf6,0x00,0x00] + +v_cvt_pk_i16_i32 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x24,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_cvt_pk_i16_i32 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x24,0xd7,0x7b,0xfa,0x01,0x00] + +v_cvt_pk_i16_i32 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x24,0xd7,0x7d,0xe0,0x01,0x00] + +v_cvt_pk_i16_i32 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x24,0xd7,0x7e,0x82,0x01,0x00] + +v_cvt_pk_i16_i32 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x24,0xd7,0x7f,0xf8,0x00,0x00] + +v_cvt_pk_i16_i32 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x24,0xd7,0x7c,0xfc,0x00,0x00] + +v_cvt_pk_i16_i32 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x24,0xd7,0xc1,0xfe,0x00,0x00] + +v_cvt_pk_i16_i32 v5, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x24,0xd7,0xf0,0xfa,0x00,0x00] + +v_cvt_pk_i16_i32 v5, src_scc, vcc_lo +// GFX11: encoding: [0x05,0x00,0x24,0xd7,0xfd,0xd4,0x00,0x00] + +v_cvt_pk_i16_i32 v255, 0xaf123456, vcc_hi +// GFX11: encoding: [0xff,0x00,0x24,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_cvt_pk_norm_i16_f16 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x01,0x05,0x02,0x00] + +v_cvt_pk_norm_i16_f16 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x12,0xd7,0xff,0xff,0x03,0x00] + +v_cvt_pk_norm_i16_f16 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x01,0x04,0x00,0x00] + +v_cvt_pk_norm_i16_f16 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x69,0xd2,0x00,0x00] + +v_cvt_pk_norm_i16_f16 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x6a,0xf6,0x00,0x00] + +v_cvt_pk_norm_i16_f16 v5, vcc_hi, 0xfe0b +// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_cvt_pk_norm_i16_f16 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x7b,0xfa,0x01,0x00] + +v_cvt_pk_norm_i16_f16 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x7d,0xe0,0x01,0x00] + +v_cvt_pk_norm_i16_f16 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x7e,0x82,0x01,0x00] + +v_cvt_pk_norm_i16_f16 v5, |exec_hi|, null +// GFX11: encoding: [0x05,0x01,0x12,0xd7,0x7f,0xf8,0x00,0x00] + +v_cvt_pk_norm_i16_f16 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x7c,0xfc,0x00,0x00] + +v_cvt_pk_norm_i16_f16 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x12,0xd7,0xc1,0xfe,0x00,0x00] + +v_cvt_pk_norm_i16_f16 v5, 0.5, -m0 op_sel:[0,0,0] +// GFX11: encoding: [0x05,0x00,0x12,0xd7,0xf0,0xfa,0x00,0x40] + +v_cvt_pk_norm_i16_f16 v5, -src_scc, |vcc_lo| op_sel:[1,0,0] +// GFX11: encoding: [0x05,0x0a,0x12,0xd7,0xfd,0xd4,0x00,0x20] + +v_cvt_pk_norm_i16_f16 v255, -|0xfe0b|, -|vcc_hi| op_sel:[0,1,0] +// GFX11: encoding: [0xff,0x13,0x12,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] + +v_cvt_pk_norm_u16_f16 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x01,0x05,0x02,0x00] + +v_cvt_pk_norm_u16_f16 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x13,0xd7,0xff,0xff,0x03,0x00] + +v_cvt_pk_norm_u16_f16 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x01,0x04,0x00,0x00] + +v_cvt_pk_norm_u16_f16 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x69,0xd2,0x00,0x00] + +v_cvt_pk_norm_u16_f16 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x6a,0xf6,0x00,0x00] + +v_cvt_pk_norm_u16_f16 v5, vcc_hi, 0xfe0b +// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_cvt_pk_norm_u16_f16 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x7b,0xfa,0x01,0x00] + +v_cvt_pk_norm_u16_f16 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x7d,0xe0,0x01,0x00] + +v_cvt_pk_norm_u16_f16 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x7e,0x82,0x01,0x00] + +v_cvt_pk_norm_u16_f16 v5, |exec_hi|, null +// GFX11: encoding: [0x05,0x01,0x13,0xd7,0x7f,0xf8,0x00,0x00] + +v_cvt_pk_norm_u16_f16 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x7c,0xfc,0x00,0x00] + +v_cvt_pk_norm_u16_f16 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x13,0xd7,0xc1,0xfe,0x00,0x00] + +v_cvt_pk_norm_u16_f16 v5, 0.5, -m0 op_sel:[0,0,0] +// GFX11: encoding: [0x05,0x00,0x13,0xd7,0xf0,0xfa,0x00,0x40] + +v_cvt_pk_norm_u16_f16 v5, -src_scc, |vcc_lo| op_sel:[1,0,0] +// GFX11: encoding: [0x05,0x0a,0x13,0xd7,0xfd,0xd4,0x00,0x20] + +v_cvt_pk_norm_u16_f16 v255, -|0xfe0b|, -|vcc_hi| op_sel:[0,1,0] +// GFX11: encoding: [0xff,0x13,0x13,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] + +v_cvt_pk_u16_f32 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x07,0xd7,0x01,0x05,0x02,0x00] + +v_cvt_pk_u16_f32 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x07,0xd7,0xff,0xff,0x03,0x00] + +v_cvt_pk_u16_f32 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x07,0xd7,0x01,0x04,0x00,0x00] + +v_cvt_pk_u16_f32 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x07,0xd7,0x69,0xd2,0x00,0x00] + +v_cvt_pk_u16_f32 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x07,0xd7,0x6a,0xf6,0x00,0x00] + +v_cvt_pk_u16_f32 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x07,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_cvt_pk_u16_f32 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x07,0xd7,0x7b,0xfa,0x01,0x00] + +v_cvt_pk_u16_f32 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x07,0xd7,0x7d,0xe0,0x01,0x00] + +v_cvt_pk_u16_f32 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x07,0xd7,0x7e,0x82,0x01,0x00] + +v_cvt_pk_u16_f32 v5, |exec_hi|, null +// GFX11: encoding: [0x05,0x01,0x07,0xd7,0x7f,0xf8,0x00,0x00] + +v_cvt_pk_u16_f32 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x07,0xd7,0x7c,0xfc,0x00,0x00] + +v_cvt_pk_u16_f32 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x07,0xd7,0xc1,0xfe,0x00,0x00] + +v_cvt_pk_u16_f32 v5, 0.5, -m0 +// GFX11: encoding: [0x05,0x00,0x07,0xd7,0xf0,0xfa,0x00,0x40] + +v_cvt_pk_u16_f32 v5, -src_scc, |vcc_lo| +// GFX11: encoding: [0x05,0x02,0x07,0xd7,0xfd,0xd4,0x00,0x20] + +v_cvt_pk_u16_f32 v255, -|0xaf123456|, -|vcc_hi| +// GFX11: encoding: [0xff,0x03,0x07,0xd7,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] + +v_cvt_pk_u16_u32 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x23,0xd7,0x01,0x05,0x02,0x00] + +v_cvt_pk_u16_u32 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x23,0xd7,0xff,0xff,0x03,0x00] + +v_cvt_pk_u16_u32 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x23,0xd7,0x01,0x04,0x00,0x00] + +v_cvt_pk_u16_u32 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x23,0xd7,0x69,0xd2,0x00,0x00] + +v_cvt_pk_u16_u32 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x23,0xd7,0x6a,0xf6,0x00,0x00] + +v_cvt_pk_u16_u32 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x23,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_cvt_pk_u16_u32 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x23,0xd7,0x7b,0xfa,0x01,0x00] + +v_cvt_pk_u16_u32 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x23,0xd7,0x7d,0xe0,0x01,0x00] + +v_cvt_pk_u16_u32 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x23,0xd7,0x7e,0x82,0x01,0x00] + +v_cvt_pk_u16_u32 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x23,0xd7,0x7f,0xf8,0x00,0x00] + +v_cvt_pk_u16_u32 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x23,0xd7,0x7c,0xfc,0x00,0x00] + +v_cvt_pk_u16_u32 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x23,0xd7,0xc1,0xfe,0x00,0x00] + +v_cvt_pk_u16_u32 v5, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x23,0xd7,0xf0,0xfa,0x00,0x00] + +v_cvt_pk_u16_u32 v5, src_scc, vcc_lo +// GFX11: encoding: [0x05,0x00,0x23,0xd7,0xfd,0xd4,0x00,0x00] + +v_cvt_pk_u16_u32 v255, 0xaf123456, vcc_hi +// GFX11: encoding: [0xff,0x00,0x23,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_cvt_pk_u8_f32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x26,0xd6,0x01,0x05,0x0e,0x00] + +v_cvt_pk_u8_f32 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x26,0xd6,0xff,0x05,0xa4,0x01] + +v_cvt_pk_u8_f32 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x26,0xd6,0x01,0xfe,0xff,0x01] + +v_cvt_pk_u8_f32 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x26,0xd6,0x69,0xd2,0xf8,0x01] + +v_cvt_pk_u8_f32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x26,0xd6,0x6a,0xf6,0x0c,0x04] + +v_cvt_pk_u8_f32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x26,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_cvt_pk_u8_f32 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x26,0xd6,0x7b,0xfa,0xed,0x01] + +v_cvt_pk_u8_f32 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x26,0xd6,0x7d,0xe0,0xf5,0x01] + +v_cvt_pk_u8_f32 v5, exec_lo, -1, vcc_hi +// GFX11: encoding: [0x05,0x00,0x26,0xd6,0x7e,0x82,0xad,0x01] + +v_cvt_pk_u8_f32 v5, exec_hi, null, vcc_lo +// GFX11: encoding: [0x05,0x00,0x26,0xd6,0x7f,0xf8,0xa8,0x01] + +v_cvt_pk_u8_f32 v5, null, exec_lo, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x26,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_cvt_pk_u8_f32 v5, -1, exec_hi, src_scc +// GFX11: encoding: [0x05,0x00,0x26,0xd6,0xc1,0xfe,0xf4,0x03] + +v_cvt_pk_u8_f32 v5, 0.5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x26,0xd6,0xf0,0xfa,0xc0,0x03] + +v_cvt_pk_u8_f32 v5, src_scc, vcc_lo, -1 +// GFX11: encoding: [0x05,0x00,0x26,0xd6,0xfd,0xd4,0x04,0x03] + +v_cvt_pk_u8_f32 v255, -|0xaf123456|, vcc_hi, null +// GFX11: encoding: [0xff,0x01,0x26,0xd6,0xff,0xd6,0xf0,0x21,0x56,0x34,0x12,0xaf] + +v_cvt_pk_norm_i16_f16 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x01,0x05,0x02,0x00] + +v_cvt_pk_norm_i16_f16 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x12,0xd7,0xff,0xff,0x03,0x00] + +v_cvt_pk_norm_i16_f16 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x01,0x04,0x00,0x00] + +v_cvt_pk_norm_i16_f16 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x69,0xd2,0x00,0x00] + +v_cvt_pk_norm_i16_f16 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x6a,0xf6,0x00,0x00] + +v_cvt_pk_norm_i16_f16 v5, vcc_hi, 0xfe0b +// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_cvt_pk_norm_i16_f16 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x7b,0xfa,0x01,0x00] + +v_cvt_pk_norm_i16_f16 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x7d,0xe0,0x01,0x00] + +v_cvt_pk_norm_i16_f16 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x7e,0x82,0x01,0x00] + +v_cvt_pk_norm_i16_f16 v5, |exec_hi|, null +// GFX11: encoding: [0x05,0x01,0x12,0xd7,0x7f,0xf8,0x00,0x00] + +v_cvt_pk_norm_i16_f16 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x7c,0xfc,0x00,0x00] + +v_cvt_pk_norm_i16_f16 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x12,0xd7,0xc1,0xfe,0x00,0x00] + +v_cvt_pk_norm_i16_f16 v5, 0.5, -m0 op_sel:[0,0,0] +// GFX11: encoding: [0x05,0x00,0x12,0xd7,0xf0,0xfa,0x00,0x40] + +v_cvt_pk_norm_i16_f16 v5, -src_scc, |vcc_lo| op_sel:[1,0,0] +// GFX11: encoding: [0x05,0x0a,0x12,0xd7,0xfd,0xd4,0x00,0x20] + +v_cvt_pk_norm_i16_f16 v255, -|0xfe0b|, -|vcc_hi| op_sel:[0,1,0] +// GFX11: encoding: [0xff,0x13,0x12,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] + +v_cvt_pk_norm_i16_f32 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x21,0xd7,0x01,0x05,0x02,0x00] + +v_cvt_pk_norm_i16_f32 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x21,0xd7,0xff,0xff,0x03,0x00] + +v_cvt_pk_norm_i16_f32 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x21,0xd7,0x01,0x04,0x00,0x00] + +v_cvt_pk_norm_i16_f32 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x21,0xd7,0x69,0xd2,0x00,0x00] + +v_cvt_pk_norm_i16_f32 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x21,0xd7,0x6a,0xf6,0x00,0x00] + +v_cvt_pk_norm_i16_f32 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x21,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_cvt_pk_norm_i16_f32 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x21,0xd7,0x7b,0xfa,0x01,0x00] + +v_cvt_pk_norm_i16_f32 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x21,0xd7,0x7d,0xe0,0x01,0x00] + +v_cvt_pk_norm_i16_f32 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x21,0xd7,0x7e,0x82,0x01,0x00] + +v_cvt_pk_norm_i16_f32 v5, |exec_hi|, null +// GFX11: encoding: [0x05,0x01,0x21,0xd7,0x7f,0xf8,0x00,0x00] + +v_cvt_pk_norm_i16_f32 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x21,0xd7,0x7c,0xfc,0x00,0x00] + +v_cvt_pk_norm_i16_f32 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x21,0xd7,0xc1,0xfe,0x00,0x00] + +v_cvt_pk_norm_i16_f32 v5, 0.5, -m0 +// GFX11: encoding: [0x05,0x00,0x21,0xd7,0xf0,0xfa,0x00,0x40] + +v_cvt_pk_norm_i16_f32 v5, -src_scc, |vcc_lo| +// GFX11: encoding: [0x05,0x02,0x21,0xd7,0xfd,0xd4,0x00,0x20] + +v_cvt_pk_norm_i16_f32 v255, -|0xaf123456|, -|vcc_hi| +// GFX11: encoding: [0xff,0x03,0x21,0xd7,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] + +v_cvt_pk_norm_u16_f16 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x01,0x05,0x02,0x00] + +v_cvt_pk_norm_u16_f16 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x13,0xd7,0xff,0xff,0x03,0x00] + +v_cvt_pk_norm_u16_f16 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x01,0x04,0x00,0x00] + +v_cvt_pk_norm_u16_f16 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x69,0xd2,0x00,0x00] + +v_cvt_pk_norm_u16_f16 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x6a,0xf6,0x00,0x00] + +v_cvt_pk_norm_u16_f16 v5, vcc_hi, 0xfe0b +// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_cvt_pk_norm_u16_f16 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x7b,0xfa,0x01,0x00] + +v_cvt_pk_norm_u16_f16 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x7d,0xe0,0x01,0x00] + +v_cvt_pk_norm_u16_f16 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x7e,0x82,0x01,0x00] + +v_cvt_pk_norm_u16_f16 v5, |exec_hi|, null +// GFX11: encoding: [0x05,0x01,0x13,0xd7,0x7f,0xf8,0x00,0x00] + +v_cvt_pk_norm_u16_f16 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x7c,0xfc,0x00,0x00] + +v_cvt_pk_norm_u16_f16 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x13,0xd7,0xc1,0xfe,0x00,0x00] + +v_cvt_pk_norm_u16_f16 v5, 0.5, -m0 op_sel:[0,0,0] +// GFX11: encoding: [0x05,0x00,0x13,0xd7,0xf0,0xfa,0x00,0x40] + +v_cvt_pk_norm_u16_f16 v5, -src_scc, |vcc_lo| op_sel:[1,0,0] +// GFX11: encoding: [0x05,0x0a,0x13,0xd7,0xfd,0xd4,0x00,0x20] + +v_cvt_pk_norm_u16_f16 v255, -|0xfe0b|, -|vcc_hi| op_sel:[0,1,0] +// GFX11: encoding: [0xff,0x13,0x13,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] + +v_cvt_pk_norm_u16_f32 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x22,0xd7,0x01,0x05,0x02,0x00] + +v_cvt_pk_norm_u16_f32 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x22,0xd7,0xff,0xff,0x03,0x00] + +v_cvt_pk_norm_u16_f32 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x22,0xd7,0x01,0x04,0x00,0x00] + +v_cvt_pk_norm_u16_f32 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x22,0xd7,0x69,0xd2,0x00,0x00] + +v_cvt_pk_norm_u16_f32 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x22,0xd7,0x6a,0xf6,0x00,0x00] + +v_cvt_pk_norm_u16_f32 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x22,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_cvt_pk_norm_u16_f32 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x22,0xd7,0x7b,0xfa,0x01,0x00] + +v_cvt_pk_norm_u16_f32 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x22,0xd7,0x7d,0xe0,0x01,0x00] + +v_cvt_pk_norm_u16_f32 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x22,0xd7,0x7e,0x82,0x01,0x00] + +v_cvt_pk_norm_u16_f32 v5, |exec_hi|, null +// GFX11: encoding: [0x05,0x01,0x22,0xd7,0x7f,0xf8,0x00,0x00] + +v_cvt_pk_norm_u16_f32 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x22,0xd7,0x7c,0xfc,0x00,0x00] + +v_cvt_pk_norm_u16_f32 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x22,0xd7,0xc1,0xfe,0x00,0x00] + +v_cvt_pk_norm_u16_f32 v5, 0.5, -m0 +// GFX11: encoding: [0x05,0x00,0x22,0xd7,0xf0,0xfa,0x00,0x40] + +v_cvt_pk_norm_u16_f32 v5, -src_scc, |vcc_lo| +// GFX11: encoding: [0x05,0x02,0x22,0xd7,0xfd,0xd4,0x00,0x20] + +v_cvt_pk_norm_u16_f32 v255, -|0xaf123456|, -|vcc_hi| +// GFX11: encoding: [0xff,0x03,0x22,0xd7,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] + +v_div_fixup_f16 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x54,0xd6,0x01,0x05,0x0e,0x00] + +v_div_fixup_f16 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x54,0xd6,0xff,0x05,0xa4,0x01] + +v_div_fixup_f16 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x54,0xd6,0x01,0xfe,0xff,0x01] + +v_div_fixup_f16 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x54,0xd6,0x69,0xd2,0xf8,0x01] + +v_div_fixup_f16 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x54,0xd6,0x6a,0xf6,0x0c,0x04] + +v_div_fixup_f16 v5, vcc_hi, 0xfe0b, v255 +// GFX11: encoding: [0x05,0x00,0x54,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +v_div_fixup_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX11: encoding: [0x05,0x07,0x54,0xd6,0x7b,0xfa,0xed,0xe1] + +v_div_fixup_f16 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x54,0xd6,0x7d,0xe0,0xf5,0x01] + +v_div_fixup_f16 v5, |exec_lo|, -1, vcc_hi +// GFX11: encoding: [0x05,0x01,0x54,0xd6,0x7e,0x82,0xad,0x01] + +v_div_fixup_f16 v5, -|exec_hi|, null, -|vcc_lo| op_sel:[1,1,1,1] +// GFX11: encoding: [0x05,0x7d,0x54,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_div_fixup_f16 v5, null, exec_lo, -|0xfe0b| op_sel:[0,0,0,0] +// GFX11: encoding: [0x05,0x04,0x54,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] + +v_div_fixup_f16 v5, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] +// GFX11: encoding: [0x05,0x0e,0x54,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_div_fixup_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] +// GFX11: encoding: [0x05,0x10,0x54,0xd6,0xf0,0xfa,0xc0,0x43] + +v_div_fixup_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] +// GFX11: encoding: [0x05,0x22,0x54,0xd6,0xfd,0xd4,0x04,0x23] + +v_div_fixup_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp +// GFX11: encoding: [0xff,0xc3,0x54,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] + +v_div_fixup_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] mul:2 +// GFX11: encoding: [0x05,0x10,0x54,0xd6,0xf0,0xfa,0xc0,0x4b] + +v_div_fixup_f32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x27,0xd6,0x01,0x05,0x0e,0x00] + +v_div_fixup_f32 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x27,0xd6,0xff,0x05,0xa4,0x01] + +v_div_fixup_f32 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x27,0xd6,0x01,0xfe,0xff,0x01] + +v_div_fixup_f32 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x27,0xd6,0x69,0xd2,0xf8,0x01] + +v_div_fixup_f32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x27,0xd6,0x6a,0xf6,0x0c,0x04] + +v_div_fixup_f32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x27,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_div_fixup_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX11: encoding: [0x05,0x07,0x27,0xd6,0x7b,0xfa,0xed,0xe1] + +v_div_fixup_f32 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x27,0xd6,0x7d,0xe0,0xf5,0x01] + +v_div_fixup_f32 v5, |exec_lo|, -1, vcc_hi +// GFX11: encoding: [0x05,0x01,0x27,0xd6,0x7e,0x82,0xad,0x01] + +v_div_fixup_f32 v5, -|exec_hi|, null, -|vcc_lo| +// GFX11: encoding: [0x05,0x05,0x27,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_div_fixup_f32 v5, null, exec_lo, -|0xaf123456| +// GFX11: encoding: [0x05,0x04,0x27,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] + +v_div_fixup_f32 v5, -1, -|exec_hi|, -|src_scc| +// GFX11: encoding: [0x05,0x06,0x27,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_div_fixup_f32 v5, 0.5, -m0, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0x27,0xd6,0xf0,0xfa,0xc0,0x4b] + +v_div_fixup_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 +// GFX11: encoding: [0x05,0x02,0x27,0xd6,0xfd,0xd4,0x04,0x33] + +v_div_fixup_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 +// GFX11: encoding: [0xff,0x83,0x27,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] + +v_div_fixup_f64 v[5:6], v[1:2], v[2:3], v[3:4] +// GFX11: encoding: [0x05,0x00,0x28,0xd6,0x01,0x05,0x0e,0x04] + +v_div_fixup_f64 v[5:6], v[254:255], v[254:255], s[6:7] +// GFX11: encoding: [0x05,0x00,0x28,0xd6,0xfe,0xfd,0x1b,0x00] + +v_div_fixup_f64 v[5:6], s[2:3], s[4:5], v[254:255] +// GFX11: encoding: [0x05,0x00,0x28,0xd6,0x02,0x08,0xf8,0x07] + +v_div_fixup_f64 v[5:6], -|s[104:105]|, s[104:105], -|s[104:105]| +// GFX11: encoding: [0x05,0x05,0x28,0xd6,0x68,0xd0,0xa0,0xa1] + +v_div_fixup_f64 v[5:6], vcc, -|ttmp[14:15]|, -|ttmp[14:15]| +// GFX11: encoding: [0x05,0x06,0x28,0xd6,0x6a,0xf4,0xe8,0xc1] + +v_div_fixup_f64 v[5:6], -|ttmp[14:15]|, 0xaf123456, null +// GFX11: encoding: [0x05,0x01,0x28,0xd6,0x7a,0xfe,0xf1,0x21,0x56,0x34,0x12,0xaf] + +v_div_fixup_f64 v[5:6], -|exec|, -|src_scc|, -|exec| +// GFX11: encoding: [0x05,0x07,0x28,0xd6,0x7e,0xfa,0xf9,0xe1] + +v_div_fixup_f64 v[5:6], null, 0.5, vcc +// GFX11: encoding: [0x05,0x00,0x28,0xd6,0x7c,0xe0,0xa9,0x01] + +v_div_fixup_f64 v[5:6], -1, -1, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x28,0xd6,0xc1,0x82,0xfd,0x03,0x56,0x34,0x12,0xaf] + +v_div_fixup_f64 v[5:6], 0.5, null, -|src_scc| mul:2 +// GFX11: encoding: [0x05,0x04,0x28,0xd6,0xf0,0xf8,0xf4,0x8b] + +v_div_fixup_f64 v[5:6], -|src_scc|, -|exec|, 0.5 mul:4 +// GFX11: encoding: [0x05,0x03,0x28,0xd6,0xfd,0xfc,0xc0,0x73] + +v_div_fixup_f64 v[254:255], 0xaf123456, -|vcc|, -1 clamp div:2 +// GFX11: encoding: [0xfe,0x82,0x28,0xd6,0xff,0xd4,0x04,0x5b,0x56,0x34,0x12,0xaf] + +v_div_fmas_f32 v5, vcc_lo, v2, vcc_lo +// W32: encoding: [0x05,0x00,0x37,0xd6,0x6a,0x04,0xaa,0x01] + +v_div_fmas_f32 v5, ttmp15, ttmp15, ttmp15 +// W32: encoding: [0x05,0x00,0x37,0xd6,0x7b,0xf6,0xec,0x01] + +v_div_fmas_f32 v5, -|m0|, -|v255|, v3 +// W32: encoding: [0x05,0x03,0x37,0xd6,0x7d,0xfe,0x0f,0x64] + +v_div_fmas_f32 v5, -|exec_lo|, -|exec_lo|, -|exec_lo| +// W32: encoding: [0x05,0x07,0x37,0xd6,0x7e,0xfc,0xf8,0xe1] + +v_div_fmas_f32 v5, -|exec_hi|, 0.5, -|v255| +// W32: encoding: [0x05,0x05,0x37,0xd6,0x7f,0xe0,0xfd,0xa7] + +v_div_fmas_f32 v5, null, exec_hi, -|exec_hi| +// W32: encoding: [0x05,0x04,0x37,0xd6,0x7c,0xfe,0xfc,0x81] + +v_div_fmas_f32 v5, -1, -|m0|, -|m0| +// W32: encoding: [0x05,0x06,0x37,0xd6,0xc1,0xfa,0xf4,0xc1] + +v_div_fmas_f32 v5, 0.5, -|vcc_lo|, 0.5 mul:2 +// W32: encoding: [0x05,0x02,0x37,0xd6,0xf0,0xd4,0xc0,0x4b] + +v_div_fmas_f32 v5, vcc_lo, v2, v3 +// W64: encoding: [0x05,0x00,0x37,0xd6,0x6a,0x04,0x0e,0x04] + +v_div_fmas_f32 v5, vcc_hi, v255, vcc_hi +// W64: encoding: [0x05,0x00,0x37,0xd6,0x6b,0xfe,0xaf,0x01] + +v_div_fmas_f32 v5, -|ttmp15|, -|ttmp15|, ttmp15 +// W64: encoding: [0x05,0x03,0x37,0xd6,0x7b,0xf6,0xec,0x61] + +v_div_fmas_f32 v5, m0, 0.5, v255 +// W64: encoding: [0x05,0x00,0x37,0xd6,0x7d,0xe0,0xfd,0x07] + +v_div_fmas_f32 v5, -|exec_lo|, exec_lo, -|exec_lo| +// W64: encoding: [0x05,0x05,0x37,0xd6,0x7e,0xfc,0xf8,0xa1] + +v_div_fmas_f32 v5, -|exec_hi|, -|exec_hi|, -|exec_hi| +// W64: encoding: [0x05,0x07,0x37,0xd6,0x7f,0xfe,0xfc,0xe1] + +v_div_fmas_f32 v5, null, m0, -|m0| +// W64: encoding: [0x05,0x04,0x37,0xd6,0x7c,0xfa,0xf4,0x81] + +v_div_fmas_f32 v5, -1, -|vcc_lo|, -|vcc_lo| +// W64: encoding: [0x05,0x06,0x37,0xd6,0xc1,0xd4,0xa8,0xc1] + +v_div_fmas_f32 v5, 0.5, -|vcc_hi|, 0.5 mul:2 +// W64: encoding: [0x05,0x02,0x37,0xd6,0xf0,0xd6,0xc0,0x4b] + +v_div_fmas_f32 v5, v1, 0xaf123456, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x37,0xd6,0x01,0xff,0xfd,0x03,0x56,0x34,0x12,0xaf] + +v_div_fmas_f32 v5, v255, src_scc, src_scc +// GFX11: encoding: [0x05,0x00,0x37,0xd6,0xff,0xfb,0xf5,0x03] + +v_div_fmas_f32 v5, s105, s105, s105 +// GFX11: encoding: [0x05,0x00,0x37,0xd6,0x69,0xd2,0xa4,0x01] + +v_div_fmas_f32 v5, src_scc, -1, -1 mul:4 +// GFX11: encoding: [0x05,0x00,0x37,0xd6,0xfd,0x82,0x05,0x13] + +v_div_fmas_f32 v255, -|0xaf123456|, null, null clamp div:2 +// GFX11: encoding: [0xff,0x81,0x37,0xd6,0xff,0xf8,0xf0,0x39,0x56,0x34,0x12,0xaf] + +v_div_fmas_f64 v[5:6], v[1:2], 0xaf123456, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x38,0xd6,0x01,0xff,0xfd,0x03,0x56,0x34,0x12,0xaf] + +v_div_fmas_f64 v[5:6], v[254:255], src_scc, v[3:4] +// GFX11: encoding: [0x05,0x00,0x38,0xd6,0xfe,0xfb,0x0d,0x04] + +v_div_fmas_f64 v[5:6], s[104:105], |s[104:105]|, s[104:105] +// GFX11: encoding: [0x05,0x02,0x38,0xd6,0x68,0xd0,0xa0,0x01] + +v_div_fmas_f64 v[5:6], -|vcc|, v[2:3], -|v[254:255]| +// GFX11: encoding: [0x05,0x05,0x38,0xd6,0x6a,0x04,0xfa,0xa7] + +v_div_fmas_f64 v[5:6], -|ttmp[14:15]|, -|ttmp[14:15]|, -|ttmp[14:15]| +// GFX11: encoding: [0x05,0x07,0x38,0xd6,0x7a,0xf4,0xe8,0xe1] + +v_div_fmas_f64 v[5:6], -|exec|, -|v[254:255]|, null +// GFX11: encoding: [0x05,0x03,0x38,0xd6,0x7e,0xfc,0xf3,0x61] + +v_div_fmas_f64 v[5:6], null, 0.5, -src_scc +// GFX11: encoding: [0x05,0x00,0x38,0xd6,0x7c,0xe0,0xf5,0x83] + +v_div_fmas_f64 v[5:6], -1, -exec, |exec| +// GFX11: encoding: [0x05,0x04,0x38,0xd6,0xc1,0xfc,0xf8,0x41] + +v_div_fmas_f64 v[5:6], 0.5, -|vcc|, -|vcc| mul:2 +// GFX11: encoding: [0x05,0x06,0x38,0xd6,0xf0,0xd4,0xa8,0xc9] + +v_div_fmas_f64 v[5:6], -|src_scc|, -1, 0.5 mul:4 +// GFX11: encoding: [0x05,0x01,0x38,0xd6,0xfd,0x82,0xc1,0x33] + +v_div_fmas_f64 v[254:255], 0xaf123456, null, -1 clamp div:2 +// GFX11: encoding: [0xfe,0x80,0x38,0xd6,0xff,0xf8,0x04,0x1b,0x56,0x34,0x12,0xaf] + +v_div_scale_f32 v5, vcc_lo, v1, v2, s3 +// W32: encoding: [0x05,0x6a,0xfc,0xd6,0x01,0x05,0x0e,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc_lo, v255, s2, s105 +// W32: encoding: [0x05,0x6a,0xfc,0xd6,0xff,0x05,0xa4,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc_lo, s1, v255, exec_hi +// W32: encoding: [0x05,0x6a,0xfc,0xd6,0x01,0xfe,0xff,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc_lo, s105, s105, exec_lo +// W32: encoding: [0x05,0x6a,0xfc,0xd6,0x69,0xd2,0xf8,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc_lo, vcc_lo, ttmp15, v3 +// W32: encoding: [0x05,0x6a,0xfc,0xd6,0x6a,0xf6,0x0c,0x04] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc_lo, vcc_hi, 0xaf123456, v255 +// W32: encoding: [0x05,0x6a,0xfc,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc_lo, -ttmp15, -src_scc, -ttmp15 +// W32: encoding: [0x05,0x6a,0xfc,0xd6,0x7b,0xfa,0xed,0xe1] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc_lo, m0, 0.5, m0 +// W32: encoding: [0x05,0x6a,0xfc,0xd6,0x7d,0xe0,0xf5,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc_lo, exec_lo, -1, vcc_hi +// W32: encoding: [0x05,0x6a,0xfc,0xd6,0x7e,0x82,0xad,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc_lo, -exec_hi, null, -vcc_lo +// W32: encoding: [0x05,0x6a,0xfc,0xd6,0x7f,0xf8,0xa8,0xa1] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc_lo, null, exec_lo, neg(0xaf123456) +// W32: encoding: [0x05,0x6a,0xfc,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc_lo, -1, -exec_hi, -src_scc +// W32: encoding: [0x05,0x6a,0xfc,0xd6,0xc1,0xfe,0xf4,0xc3] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc_lo, 0.5, -m0, 0.5 mul:2 +// W32: encoding: [0x05,0x6a,0xfc,0xd6,0xf0,0xfa,0xc0,0x4b] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc_lo, -src_scc, vcc_lo, -1 mul:4 +// W32: encoding: [0x05,0x6a,0xfc,0xd6,0xfd,0xd4,0x04,0x33] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v255, vcc_lo, neg(0xaf123456), -vcc_hi, null clamp div:2 +// W32: encoding: [0xff,0xea,0xfc,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc, v1, v2, s3 +// W64: encoding: [0x05,0x6a,0xfc,0xd6,0x01,0x05,0x0e,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc, v255, s2, s105 +// W64: encoding: [0x05,0x6a,0xfc,0xd6,0xff,0x05,0xa4,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc, s1, v255, exec_hi +// W64: encoding: [0x05,0x6a,0xfc,0xd6,0x01,0xfe,0xff,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc, s105, s105, exec_lo +// W64: encoding: [0x05,0x6a,0xfc,0xd6,0x69,0xd2,0xf8,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc, vcc_lo, ttmp15, v3 +// W64: encoding: [0x05,0x6a,0xfc,0xd6,0x6a,0xf6,0x0c,0x04] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc, vcc_hi, 0xaf123456, v255 +// W64: encoding: [0x05,0x6a,0xfc,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc, -ttmp15, -src_scc, -ttmp15 +// W64: encoding: [0x05,0x6a,0xfc,0xd6,0x7b,0xfa,0xed,0xe1] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc, m0, 0.5, m0 +// W64: encoding: [0x05,0x6a,0xfc,0xd6,0x7d,0xe0,0xf5,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc, exec_lo, -1, vcc_hi +// W64: encoding: [0x05,0x6a,0xfc,0xd6,0x7e,0x82,0xad,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc, -exec_hi, null, -vcc_lo +// W64: encoding: [0x05,0x6a,0xfc,0xd6,0x7f,0xf8,0xa8,0xa1] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc, null, exec_lo, neg(0xaf123456) +// W64: encoding: [0x05,0x6a,0xfc,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc, -1, -exec_hi, -src_scc +// W64: encoding: [0x05,0x6a,0xfc,0xd6,0xc1,0xfe,0xf4,0xc3] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc, 0.5, -m0, 0.5 mul:2 +// W64: encoding: [0x05,0x6a,0xfc,0xd6,0xf0,0xfa,0xc0,0x4b] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc, -src_scc, vcc_lo, -1 mul:4 +// W64: encoding: [0x05,0x6a,0xfc,0xd6,0xfd,0xd4,0x04,0x33] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v255, vcc, neg(0xaf123456), -vcc_hi, null clamp div:2 +// W64: encoding: [0xff,0xea,0xfc,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f64 v[5:6], vcc_lo, v[1:2], v[2:3], v[3:4] +// W32: encoding: [0x05,0x6a,0xfd,0xd6,0x01,0x05,0x0e,0x04] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f64 v[5:6], vcc_lo, v[254:255], v[254:255], s[6:7] +// W32: encoding: [0x05,0x6a,0xfd,0xd6,0xfe,0xfd,0x1b,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f64 v[5:6], vcc_lo, s[2:3], s[4:5], v[254:255] +// W32: encoding: [0x05,0x6a,0xfd,0xd6,0x02,0x08,0xf8,0x07] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f64 v[5:6], vcc_lo, -s[104:105], s[104:105], -s[104:105] +// W32: encoding: [0x05,0x6a,0xfd,0xd6,0x68,0xd0,0xa0,0xa1] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f64 v[5:6], vcc_lo, vcc, -ttmp[14:15], -ttmp[14:15] +// W32: encoding: [0x05,0x6a,0xfd,0xd6,0x6a,0xf4,0xe8,0xc1] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f64 v[5:6], vcc_lo, -ttmp[14:15], 0xaf123456, null +// W32: encoding: [0x05,0x6a,0xfd,0xd6,0x7a,0xfe,0xf1,0x21,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f64 v[5:6], vcc_lo, -exec, -src_scc, -exec +// W32: encoding: [0x05,0x6a,0xfd,0xd6,0x7e,0xfa,0xf9,0xe1] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f64 v[5:6], vcc_lo, null, 0.5, vcc +// W32: encoding: [0x05,0x6a,0xfd,0xd6,0x7c,0xe0,0xa9,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f64 v[5:6], vcc_lo, -1, -1, 0xaf123456 +// W32: encoding: [0x05,0x6a,0xfd,0xd6,0xc1,0x82,0xfd,0x03,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f64 v[5:6], vcc_lo, 0.5, null, -src_scc mul:2 +// W32: encoding: [0x05,0x6a,0xfd,0xd6,0xf0,0xf8,0xf4,0x8b] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f64 v[5:6], vcc_lo, -src_scc, -exec, 0.5 mul:4 +// W32: encoding: [0x05,0x6a,0xfd,0xd6,0xfd,0xfc,0xc0,0x73] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f64 v[254:255], vcc_lo, 0xaf123456, -vcc, -1 clamp div:2 +// W32: encoding: [0xfe,0xea,0xfd,0xd6,0xff,0xd4,0x04,0x5b,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f64 v[5:6], vcc, v[1:2], v[2:3], v[3:4] +// W64: encoding: [0x05,0x6a,0xfd,0xd6,0x01,0x05,0x0e,0x04] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f64 v[5:6], vcc, v[254:255], v[254:255], s[6:7] +// W64: encoding: [0x05,0x6a,0xfd,0xd6,0xfe,0xfd,0x1b,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f64 v[5:6], vcc, s[2:3], s[4:5], v[254:255] +// W64: encoding: [0x05,0x6a,0xfd,0xd6,0x02,0x08,0xf8,0x07] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f64 v[5:6], vcc, -s[104:105], s[104:105], -s[104:105] +// W64: encoding: [0x05,0x6a,0xfd,0xd6,0x68,0xd0,0xa0,0xa1] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f64 v[5:6], vcc, vcc, -ttmp[14:15], -ttmp[14:15] +// W64: encoding: [0x05,0x6a,0xfd,0xd6,0x6a,0xf4,0xe8,0xc1] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f64 v[5:6], vcc, -ttmp[14:15], 0xaf123456, null +// W64: encoding: [0x05,0x6a,0xfd,0xd6,0x7a,0xfe,0xf1,0x21,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f64 v[5:6], vcc, -exec, -src_scc, -exec +// W64: encoding: [0x05,0x6a,0xfd,0xd6,0x7e,0xfa,0xf9,0xe1] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f64 v[5:6], vcc, null, 0.5, vcc +// W64: encoding: [0x05,0x6a,0xfd,0xd6,0x7c,0xe0,0xa9,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f64 v[5:6], vcc, -1, -1, 0xaf123456 +// W64: encoding: [0x05,0x6a,0xfd,0xd6,0xc1,0x82,0xfd,0x03,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f64 v[5:6], vcc, 0.5, null, -src_scc mul:2 +// W64: encoding: [0x05,0x6a,0xfd,0xd6,0xf0,0xf8,0xf4,0x8b] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f64 v[5:6], vcc, -src_scc, -exec, 0.5 mul:4 +// W64: encoding: [0x05,0x6a,0xfd,0xd6,0xfd,0xfc,0xc0,0x73] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f64 v[254:255], vcc, 0xaf123456, -vcc, -1 clamp div:2 +// W64: encoding: [0xfe,0xea,0xfd,0xd6,0xff,0xd4,0x04,0x5b,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_dot2_bf16_bf16 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x67,0xd6,0x01,0x05,0x0e,0x00] + +v_dot2_bf16_bf16 v5, v255, v255, s105 +// GFX11: encoding: [0x05,0x00,0x67,0xd6,0xff,0xff,0xa7,0x01] + +v_dot2_bf16_bf16 v5, s1, s2, v3 +// GFX11: encoding: [0x05,0x00,0x67,0xd6,0x01,0x04,0x0c,0x04] + +v_dot2_bf16_bf16 v5, s105, s105, m0 +// GFX11: encoding: [0x05,0x00,0x67,0xd6,0x69,0xd2,0xf4,0x01] + +v_dot2_bf16_bf16 v5, vcc_lo, ttmp15, v255 +// GFX11: encoding: [0x05,0x00,0x67,0xd6,0x6a,0xf6,0xfc,0x07] + +v_dot2_bf16_bf16 v5, vcc_hi, 0xfe0b, vcc_hi +// GFX11: encoding: [0x05,0x00,0x67,0xd6,0x6b,0xfe,0xad,0x01,0x0b,0xfe,0x00,0x00] + +v_dot2_bf16_bf16 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x67,0xd6,0x7b,0xfa,0xed,0x01] + +v_dot2_bf16_bf16 v5, |m0|, -1, -vcc_lo +// GFX11: encoding: [0x05,0x01,0x67,0xd6,0x7d,0x82,0xa9,0x81] + +v_dot2_bf16_bf16 v5, -|exec_lo|, null, -|0xfe0b| +// GFX11: encoding: [0x05,0x05,0x67,0xd6,0x7e,0xf8,0xfc,0xa3,0x0b,0xfe,0x00,0x00] + +v_dot2_bf16_bf16 v5, -|exec_hi|, -|exec_lo|, -|exec_lo| +// GFX11: encoding: [0x05,0x07,0x67,0xd6,0x7f,0xfc,0xf8,0xe1] + +v_dot2_bf16_bf16 v5, null, -exec_hi, |src_scc| +// GFX11: encoding: [0x05,0x04,0x67,0xd6,0x7c,0xfe,0xf4,0x43] + +v_dot2_bf16_bf16 v5, -1, -|m0|, -|exec_hi| op_sel:[0,0,0,0] +// GFX11: encoding: [0x05,0x06,0x67,0xd6,0xc1,0xfa,0xfc,0xc1] + +v_dot2_bf16_bf16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] +// GFX11: encoding: [0x05,0x22,0x67,0xd6,0xfd,0xd4,0x04,0x23] + +v_dot2_bf16_bf16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] +// GFX11: encoding: [0xff,0x43,0x67,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] + +v_dot2_bf16_bf16 v2, v0, 0x20004000, v2 +// GFX11: v_dot2_bf16_bf16 v2, v0, 0x20004000, v2 ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xff,0x09,0x04,0x00,0x40,0x00,0x20] + +v_dot2_bf16_bf16 v2, 0x20004000, v0, v2 +// GFX11: v_dot2_bf16_bf16 v2, 0x20004000, v0, v2 ; encoding: [0x02,0x00,0x67,0xd6,0xff,0x00,0x0a,0x04,0x00,0x40,0x00,0x20] + +v_dot2_f16_f16 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x66,0xd6,0x01,0x05,0x0e,0x00] + +v_dot2_f16_f16 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x66,0xd6,0xff,0x05,0xa4,0x01] + +v_dot2_f16_f16 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x66,0xd6,0x01,0xfe,0xff,0x01] + +v_dot2_f16_f16 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x66,0xd6,0x69,0xd2,0xf8,0x01] + +v_dot2_f16_f16 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x66,0xd6,0x6a,0xf6,0x0c,0x04] + +v_dot2_f16_f16 v5, vcc_hi, 0xfe0b, v255 +// GFX11: encoding: [0x05,0x00,0x66,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +v_dot2_f16_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX11: encoding: [0x05,0x07,0x66,0xd6,0x7b,0xfa,0xed,0xe1] + +v_dot2_f16_f16 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x66,0xd6,0x7d,0xe0,0xf5,0x01] + +v_dot2_f16_f16 v5, |exec_lo|, -1, vcc_hi +// GFX11: encoding: [0x05,0x01,0x66,0xd6,0x7e,0x82,0xad,0x01] + +v_dot2_f16_f16 v5, -|exec_hi|, null, -|vcc_lo| +// GFX11: encoding: [0x05,0x05,0x66,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_dot2_f16_f16 v5, null, exec_lo, -|0xfe0b| +// GFX11: encoding: [0x05,0x04,0x66,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] + +v_dot2_f16_f16 v5, -1, -|exec_hi|, -|src_scc| +// GFX11: encoding: [0x05,0x06,0x66,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_dot2_f16_f16 v5, 0.5, -m0, 0.5 op_sel:[0,0,0,0] +// GFX11: encoding: [0x05,0x00,0x66,0xd6,0xf0,0xfa,0xc0,0x43] + +v_dot2_f16_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] +// GFX11: encoding: [0x05,0x22,0x66,0xd6,0xfd,0xd4,0x04,0x23] + +v_dot2_f16_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] +// GFX11: encoding: [0xff,0x43,0x66,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] + +v_dot2_f16_f16 v2, v0, 0x20004000, v2 +// GFX11: v_dot2_f16_f16 v2, v0, 0x20004000, v2 ; encoding: [0x02,0x00,0x66,0xd6,0x00,0xff,0x09,0x04,0x00,0x40,0x00,0x20] + +v_dot2_f16_f16 v2, 0x20004000, v0, v2 +// GFX11: v_dot2_f16_f16 v2, 0x20004000, v0, v2 ; encoding: [0x02,0x00,0x66,0xd6,0xff,0x00,0x0a,0x04,0x00,0x40,0x00,0x20] + +v_fma_dx9_zero_f32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x09,0xd6,0x01,0x05,0x0e,0x00] + +v_fma_dx9_zero_f32 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x09,0xd6,0xff,0x05,0xa4,0x01] + +v_fma_dx9_zero_f32 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x09,0xd6,0x01,0xfe,0xff,0x01] + +v_fma_dx9_zero_f32 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x09,0xd6,0x69,0xd2,0xf8,0x01] + +v_fma_dx9_zero_f32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x09,0xd6,0x6a,0xf6,0x0c,0x04] + +v_fma_dx9_zero_f32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x09,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_fma_dx9_zero_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX11: encoding: [0x05,0x07,0x09,0xd6,0x7b,0xfa,0xed,0xe1] + +v_fma_dx9_zero_f32 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x09,0xd6,0x7d,0xe0,0xf5,0x01] + +v_fma_dx9_zero_f32 v5, |exec_lo|, -1, vcc_hi +// GFX11: encoding: [0x05,0x01,0x09,0xd6,0x7e,0x82,0xad,0x01] + +v_fma_dx9_zero_f32 v5, -|exec_hi|, null, -|vcc_lo| +// GFX11: encoding: [0x05,0x05,0x09,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_fma_dx9_zero_f32 v5, null, exec_lo, -|0xaf123456| +// GFX11: encoding: [0x05,0x04,0x09,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] + +v_fma_dx9_zero_f32 v5, -1, -|exec_hi|, -|src_scc| +// GFX11: encoding: [0x05,0x06,0x09,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_fma_dx9_zero_f32 v5, 0.5, -m0, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0x09,0xd6,0xf0,0xfa,0xc0,0x4b] + +v_fma_dx9_zero_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 +// GFX11: encoding: [0x05,0x02,0x09,0xd6,0xfd,0xd4,0x04,0x33] + +v_fma_dx9_zero_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 +// GFX11: encoding: [0xff,0x83,0x09,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] + +v_fma_f16 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x48,0xd6,0x01,0x05,0x0e,0x00] + +v_fma_f16 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x48,0xd6,0xff,0x05,0xa4,0x01] + +v_fma_f16 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x48,0xd6,0x01,0xfe,0xff,0x01] + +v_fma_f16 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x48,0xd6,0x69,0xd2,0xf8,0x01] + +v_fma_f16 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x48,0xd6,0x6a,0xf6,0x0c,0x04] + +v_fma_f16 v5, vcc_hi, 0xfe0b, v255 +// GFX11: encoding: [0x05,0x00,0x48,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +v_fma_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX11: encoding: [0x05,0x07,0x48,0xd6,0x7b,0xfa,0xed,0xe1] + +v_fma_f16 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x48,0xd6,0x7d,0xe0,0xf5,0x01] + +v_fma_f16 v5, |exec_lo|, -1, vcc_hi +// GFX11: encoding: [0x05,0x01,0x48,0xd6,0x7e,0x82,0xad,0x01] + +v_fma_f16 v5, -|exec_hi|, null, -|vcc_lo| op_sel:[1,1,1,1] +// GFX11: encoding: [0x05,0x7d,0x48,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_fma_f16 v5, null, exec_lo, -|0xfe0b| op_sel:[0,0,0,0] +// GFX11: encoding: [0x05,0x04,0x48,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] + +v_fma_f16 v5, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] +// GFX11: encoding: [0x05,0x0e,0x48,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_fma_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] +// GFX11: encoding: [0x05,0x10,0x48,0xd6,0xf0,0xfa,0xc0,0x43] + +v_fma_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] +// GFX11: encoding: [0x05,0x22,0x48,0xd6,0xfd,0xd4,0x04,0x23] + +v_fma_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp +// GFX11: encoding: [0xff,0xc3,0x48,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] + +v_fma_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp div:2 +// GFX11: encoding: [0xff,0xc3,0x48,0xd6,0xff,0xd6,0xf0,0x79,0x0b,0xfe,0x00,0x00] + +v_fma_f32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x13,0xd6,0x01,0x05,0x0e,0x00] + +v_fma_f32 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x13,0xd6,0xff,0x05,0xa4,0x01] + +v_fma_f32 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x13,0xd6,0x01,0xfe,0xff,0x01] + +v_fma_f32 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x13,0xd6,0x69,0xd2,0xf8,0x01] + +v_fma_f32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x13,0xd6,0x6a,0xf6,0x0c,0x04] + +v_fma_f32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x13,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_fma_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX11: encoding: [0x05,0x07,0x13,0xd6,0x7b,0xfa,0xed,0xe1] + +v_fma_f32 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x13,0xd6,0x7d,0xe0,0xf5,0x01] + +v_fma_f32 v5, |exec_lo|, -1, vcc_hi +// GFX11: encoding: [0x05,0x01,0x13,0xd6,0x7e,0x82,0xad,0x01] + +v_fma_f32 v5, -|exec_hi|, null, -|vcc_lo| +// GFX11: encoding: [0x05,0x05,0x13,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_fma_f32 v5, null, exec_lo, -|0xaf123456| +// GFX11: encoding: [0x05,0x04,0x13,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] + +v_fma_f32 v5, -1, -|exec_hi|, -|src_scc| +// GFX11: encoding: [0x05,0x06,0x13,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_fma_f32 v5, 0.5, -m0, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0x13,0xd6,0xf0,0xfa,0xc0,0x4b] + +v_fma_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 +// GFX11: encoding: [0x05,0x02,0x13,0xd6,0xfd,0xd4,0x04,0x33] + +v_fma_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 +// GFX11: encoding: [0xff,0x83,0x13,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] + +v_fma_f64 v[5:6], v[1:2], v[2:3], v[3:4] +// GFX11: encoding: [0x05,0x00,0x14,0xd6,0x01,0x05,0x0e,0x04] + +v_fma_f64 v[5:6], v[254:255], v[254:255], s[6:7] +// GFX11: encoding: [0x05,0x00,0x14,0xd6,0xfe,0xfd,0x1b,0x00] + +v_fma_f64 v[5:6], s[2:3], s[4:5], v[254:255] +// GFX11: encoding: [0x05,0x00,0x14,0xd6,0x02,0x08,0xf8,0x07] + +v_fma_f64 v[5:6], -|s[104:105]|, s[104:105], -|s[104:105]| +// GFX11: encoding: [0x05,0x05,0x14,0xd6,0x68,0xd0,0xa0,0xa1] + +v_fma_f64 v[5:6], vcc, -|ttmp[14:15]|, -|ttmp[14:15]| +// GFX11: encoding: [0x05,0x06,0x14,0xd6,0x6a,0xf4,0xe8,0xc1] + +v_fma_f64 v[5:6], -|ttmp[14:15]|, 0xaf123456, null +// GFX11: encoding: [0x05,0x01,0x14,0xd6,0x7a,0xfe,0xf1,0x21,0x56,0x34,0x12,0xaf] + +v_fma_f64 v[5:6], -|exec|, -|src_scc|, -|exec| +// GFX11: encoding: [0x05,0x07,0x14,0xd6,0x7e,0xfa,0xf9,0xe1] + +v_fma_f64 v[5:6], null, 0.5, vcc +// GFX11: encoding: [0x05,0x00,0x14,0xd6,0x7c,0xe0,0xa9,0x01] + +v_fma_f64 v[5:6], -1, -1, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x14,0xd6,0xc1,0x82,0xfd,0x03,0x56,0x34,0x12,0xaf] + +v_fma_f64 v[5:6], 0.5, null, -|src_scc| mul:2 +// GFX11: encoding: [0x05,0x04,0x14,0xd6,0xf0,0xf8,0xf4,0x8b] + +v_fma_f64 v[5:6], -|src_scc|, -|exec|, 0.5 mul:4 +// GFX11: encoding: [0x05,0x03,0x14,0xd6,0xfd,0xfc,0xc0,0x73] + +v_fma_f64 v[254:255], 0xaf123456, -|vcc|, -1 clamp div:2 +// GFX11: encoding: [0xfe,0x82,0x14,0xd6,0xff,0xd4,0x04,0x5b,0x56,0x34,0x12,0xaf] + +v_fma_legacy_f32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x09,0xd6,0x01,0x05,0x0e,0x00] + +v_fma_legacy_f32 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x09,0xd6,0xff,0x05,0xa4,0x01] + +v_fma_legacy_f32 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x09,0xd6,0x01,0xfe,0xff,0x01] + +v_fma_legacy_f32 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x09,0xd6,0x69,0xd2,0xf8,0x01] + +v_fma_legacy_f32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x09,0xd6,0x6a,0xf6,0x0c,0x04] + +v_fma_legacy_f32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x09,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_fma_legacy_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX11: encoding: [0x05,0x07,0x09,0xd6,0x7b,0xfa,0xed,0xe1] + +v_fma_legacy_f32 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x09,0xd6,0x7d,0xe0,0xf5,0x01] + +v_fma_legacy_f32 v5, |exec_lo|, -1, vcc_hi +// GFX11: encoding: [0x05,0x01,0x09,0xd6,0x7e,0x82,0xad,0x01] + +v_fma_legacy_f32 v5, -|exec_hi|, null, -|vcc_lo| +// GFX11: encoding: [0x05,0x05,0x09,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_fma_legacy_f32 v5, null, exec_lo, -|0xaf123456| +// GFX11: encoding: [0x05,0x04,0x09,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] + +v_fma_legacy_f32 v5, -1, -|exec_hi|, -|src_scc| +// GFX11: encoding: [0x05,0x06,0x09,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_fma_legacy_f32 v5, 0.5, -m0, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0x09,0xd6,0xf0,0xfa,0xc0,0x4b] + +v_fma_legacy_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 +// GFX11: encoding: [0x05,0x02,0x09,0xd6,0xfd,0xd4,0x04,0x33] + +v_fma_legacy_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 +// GFX11: encoding: [0xff,0x83,0x09,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] + +v_ldexp_f32 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x1c,0xd7,0x01,0x05,0x02,0x00] + +v_ldexp_f32 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x1c,0xd7,0xff,0xff,0x03,0x00] + +v_ldexp_f32 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x1c,0xd7,0x01,0x04,0x00,0x00] + +v_ldexp_f32 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x1c,0xd7,0x69,0xd2,0x00,0x00] + +v_ldexp_f32 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x1c,0xd7,0x6a,0xf6,0x00,0x00] + +v_ldexp_f32 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x1c,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_ldexp_f32 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x1c,0xd7,0x7b,0xfa,0x01,0x00] + +v_ldexp_f32 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x1c,0xd7,0x7d,0xe0,0x01,0x00] + +v_ldexp_f32 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x1c,0xd7,0x7e,0x82,0x01,0x00] + +v_ldexp_f32 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x1c,0xd7,0x7f,0xf8,0x00,0x00] + +v_ldexp_f32 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x1c,0xd7,0x7c,0xfc,0x00,0x00] + +v_ldexp_f32 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x1c,0xd7,0xc1,0xfe,0x00,0x00] + +v_ldexp_f32 v5, 0.5, m0 mul:2 +// GFX11: encoding: [0x05,0x00,0x1c,0xd7,0xf0,0xfa,0x00,0x08] + +v_ldexp_f32 v5, src_scc, vcc_lo mul:4 +// GFX11: encoding: [0x05,0x00,0x1c,0xd7,0xfd,0xd4,0x00,0x10] + +v_ldexp_f32 v255, -|0xaf123456|, vcc_hi clamp div:2 +// GFX11: encoding: [0xff,0x81,0x1c,0xd7,0xff,0xd6,0x00,0x38,0x56,0x34,0x12,0xaf] + +v_ldexp_f64 v[5:6], v[1:2], v2 +// GFX11: encoding: [0x05,0x00,0x2b,0xd7,0x01,0x05,0x02,0x00] + +v_ldexp_f64 v[5:6], v[1:2], v255 +// GFX11: encoding: [0x05,0x00,0x2b,0xd7,0x01,0xff,0x03,0x00] + +v_ldexp_f64 v[5:6], v[1:2], s2 +// GFX11: encoding: [0x05,0x00,0x2b,0xd7,0x01,0x05,0x00,0x00] + +v_ldexp_f64 v[5:6], v[1:2], s105 +// GFX11: encoding: [0x05,0x00,0x2b,0xd7,0x01,0xd3,0x00,0x00] + +v_ldexp_f64 v[5:6], v[254:255], ttmp15 +// GFX11: encoding: [0x05,0x00,0x2b,0xd7,0xfe,0xf7,0x00,0x00] + +v_ldexp_f64 v[5:6], s[2:3], vcc_hi +// GFX11: encoding: [0x05,0x00,0x2b,0xd7,0x02,0xd6,0x00,0x00] + +v_ldexp_f64 v[5:6], s[104:105], vcc_lo +// GFX11: encoding: [0x05,0x00,0x2b,0xd7,0x68,0xd4,0x00,0x00] + +v_ldexp_f64 v[5:6], vcc, m0 +// GFX11: encoding: [0x05,0x00,0x2b,0xd7,0x6a,0xfa,0x00,0x00] + +v_ldexp_f64 v[5:6], ttmp[14:15], exec_hi +// GFX11: encoding: [0x05,0x00,0x2b,0xd7,0x7a,0xfe,0x00,0x00] + +v_ldexp_f64 v[5:6], exec, exec_lo +// GFX11: encoding: [0x05,0x00,0x2b,0xd7,0x7e,0xfc,0x00,0x00] + +v_ldexp_f64 v[5:6], null, null +// GFX11: encoding: [0x05,0x00,0x2b,0xd7,0x7c,0xf8,0x00,0x00] + +v_ldexp_f64 v[5:6], -1, -1 +// GFX11: encoding: [0x05,0x00,0x2b,0xd7,0xc1,0x82,0x01,0x00] + +v_ldexp_f64 v[5:6], 0.5, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0x2b,0xd7,0xf0,0xe0,0x01,0x08] + +v_ldexp_f64 v[5:6], -|src_scc|, src_scc mul:4 +// GFX11: encoding: [0x05,0x01,0x2b,0xd7,0xfd,0xfa,0x01,0x30] + +v_ldexp_f64 v[254:255], 0xaf123456, 0xaf123456 clamp div:2 +// GFX11: encoding: [0xfe,0x80,0x2b,0xd7,0xff,0xfe,0x01,0x18,0x56,0x34,0x12,0xaf] + +v_lerp_u8 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x15,0xd6,0x01,0x05,0x0e,0x00] + +v_lerp_u8 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x15,0xd6,0xff,0x05,0xa4,0x01] + +v_lerp_u8 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x15,0xd6,0x01,0xfe,0xff,0x01] + +v_lerp_u8 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x15,0xd6,0x69,0xd2,0xf8,0x01] + +v_lerp_u8 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x15,0xd6,0x6a,0xf6,0x0c,0x04] + +v_lerp_u8 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x15,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_lerp_u8 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x15,0xd6,0x7b,0xfa,0xed,0x01] + +v_lerp_u8 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x15,0xd6,0x7d,0xe0,0xf5,0x01] + +v_lerp_u8 v5, exec_lo, -1, vcc_hi +// GFX11: encoding: [0x05,0x00,0x15,0xd6,0x7e,0x82,0xad,0x01] + +v_lerp_u8 v5, exec_hi, null, vcc_lo +// GFX11: encoding: [0x05,0x00,0x15,0xd6,0x7f,0xf8,0xa8,0x01] + +v_lerp_u8 v5, null, exec_lo, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x15,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_lerp_u8 v5, -1, exec_hi, src_scc +// GFX11: encoding: [0x05,0x00,0x15,0xd6,0xc1,0xfe,0xf4,0x03] + +v_lerp_u8 v5, 0.5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x15,0xd6,0xf0,0xfa,0xc0,0x03] + +v_lerp_u8 v5, src_scc, vcc_lo, -1 +// GFX11: encoding: [0x05,0x00,0x15,0xd6,0xfd,0xd4,0x04,0x03] + +v_lerp_u8 v255, 0xaf123456, vcc_hi, null +// GFX11: encoding: [0xff,0x00,0x15,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_lshl_add_u32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x46,0xd6,0x01,0x05,0x0e,0x00] + +v_lshl_add_u32 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x46,0xd6,0xff,0x05,0xa4,0x01] + +v_lshl_add_u32 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x46,0xd6,0x01,0xfe,0xff,0x01] + +v_lshl_add_u32 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x46,0xd6,0x69,0xd2,0xf8,0x01] + +v_lshl_add_u32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x46,0xd6,0x6a,0xf6,0x0c,0x04] + +v_lshl_add_u32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x46,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_lshl_add_u32 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x46,0xd6,0x7b,0xfa,0xed,0x01] + +v_lshl_add_u32 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x46,0xd6,0x7d,0xe0,0xf5,0x01] + +v_lshl_add_u32 v5, exec_lo, -1, vcc_hi +// GFX11: encoding: [0x05,0x00,0x46,0xd6,0x7e,0x82,0xad,0x01] + +v_lshl_add_u32 v5, exec_hi, null, vcc_lo +// GFX11: encoding: [0x05,0x00,0x46,0xd6,0x7f,0xf8,0xa8,0x01] + +v_lshl_add_u32 v5, null, exec_lo, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x46,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_lshl_add_u32 v5, -1, exec_hi, src_scc +// GFX11: encoding: [0x05,0x00,0x46,0xd6,0xc1,0xfe,0xf4,0x03] + +v_lshl_add_u32 v5, 0.5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x46,0xd6,0xf0,0xfa,0xc0,0x03] + +v_lshl_add_u32 v5, src_scc, vcc_lo, -1 +// GFX11: encoding: [0x05,0x00,0x46,0xd6,0xfd,0xd4,0x04,0x03] + +v_lshl_add_u32 v255, 0xaf123456, vcc_hi, null +// GFX11: encoding: [0xff,0x00,0x46,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_lshl_or_b32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x56,0xd6,0x01,0x05,0x0e,0x00] + +v_lshl_or_b32 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x56,0xd6,0xff,0x05,0xa4,0x01] + +v_lshl_or_b32 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x56,0xd6,0x01,0xfe,0xff,0x01] + +v_lshl_or_b32 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x56,0xd6,0x69,0xd2,0xf8,0x01] + +v_lshl_or_b32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x56,0xd6,0x6a,0xf6,0x0c,0x04] + +v_lshl_or_b32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x56,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_lshl_or_b32 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x56,0xd6,0x7b,0xfa,0xed,0x01] + +v_lshl_or_b32 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x56,0xd6,0x7d,0xe0,0xf5,0x01] + +v_lshl_or_b32 v5, exec_lo, -1, vcc_hi +// GFX11: encoding: [0x05,0x00,0x56,0xd6,0x7e,0x82,0xad,0x01] + +v_lshl_or_b32 v5, exec_hi, null, vcc_lo +// GFX11: encoding: [0x05,0x00,0x56,0xd6,0x7f,0xf8,0xa8,0x01] + +v_lshl_or_b32 v5, null, exec_lo, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x56,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_lshl_or_b32 v5, -1, exec_hi, src_scc +// GFX11: encoding: [0x05,0x00,0x56,0xd6,0xc1,0xfe,0xf4,0x03] + +v_lshl_or_b32 v5, 0.5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x56,0xd6,0xf0,0xfa,0xc0,0x03] + +v_lshl_or_b32 v5, src_scc, vcc_lo, -1 +// GFX11: encoding: [0x05,0x00,0x56,0xd6,0xfd,0xd4,0x04,0x03] + +v_lshl_or_b32 v255, 0xaf123456, vcc_hi, null +// GFX11: encoding: [0xff,0x00,0x56,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_lshlrev_b16 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x38,0xd7,0x01,0x05,0x02,0x00] + +v_lshlrev_b16 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x38,0xd7,0xff,0xff,0x03,0x00] + +v_lshlrev_b16 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x38,0xd7,0x01,0x04,0x00,0x00] + +v_lshlrev_b16 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x38,0xd7,0x69,0xd2,0x00,0x00] + +v_lshlrev_b16 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x38,0xd7,0x6a,0xf6,0x00,0x00] + +v_lshlrev_b16 v5, vcc_hi, 0xfe0b +// GFX11: encoding: [0x05,0x00,0x38,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_lshlrev_b16 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x38,0xd7,0x7b,0xfa,0x01,0x00] + +v_lshlrev_b16 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x38,0xd7,0x7d,0xe0,0x01,0x00] + +v_lshlrev_b16 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x38,0xd7,0x7e,0x82,0x01,0x00] + +v_lshlrev_b16 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x38,0xd7,0x7f,0xf8,0x00,0x00] + +v_lshlrev_b16 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x38,0xd7,0x7c,0xfc,0x00,0x00] + +v_lshlrev_b16 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x38,0xd7,0xc1,0xfe,0x00,0x00] + +v_lshlrev_b16 v5, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x38,0xd7,0xf0,0xfa,0x00,0x00] + +v_lshlrev_b16 v5, src_scc, vcc_lo +// GFX11: encoding: [0x05,0x00,0x38,0xd7,0xfd,0xd4,0x00,0x00] + +v_lshlrev_b16 v255, 0xfe0b, vcc_hi +// GFX11: encoding: [0xff,0x00,0x38,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_lshlrev_b64 v[5:6], v1, vcc +// GFX11: encoding: [0x05,0x00,0x3c,0xd7,0x01,0xd5,0x00,0x00] + +v_lshlrev_b64 v[5:6], v255, exec +// GFX11: encoding: [0x05,0x00,0x3c,0xd7,0xff,0xfd,0x00,0x00] + +v_lshlrev_b64 v[5:6], exec_lo, v[2:3] +// GFX11: encoding: [0x05,0x00,0x3c,0xd7,0x7e,0x04,0x02,0x00] + +v_lshlrev_b64 v[5:6], exec_hi, v[254:255] +// GFX11: encoding: [0x05,0x00,0x3c,0xd7,0x7f,0xfc,0x03,0x00] + +v_lshlrev_b64 v[5:6], null, null +// GFX11: encoding: [0x05,0x00,0x3c,0xd7,0x7c,0xf8,0x00,0x00] + +v_lshlrev_b64 v[5:6], -1, -1 +// GFX11: encoding: [0x05,0x00,0x3c,0xd7,0xc1,0x82,0x01,0x00] + +v_lshlrev_b64 v[5:6], 0.5, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x3c,0xd7,0xf0,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_lshlrev_b64 v[5:6], src_scc, src_scc +// GFX11: encoding: [0x05,0x00,0x3c,0xd7,0xfd,0xfa,0x01,0x00] + +v_lshlrev_b64 v[254:255], 0xaf123456, 0.5 +// GFX11: encoding: [0xfe,0x00,0x3c,0xd7,0xff,0xe0,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_lshrrev_b16 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x39,0xd7,0x01,0x05,0x02,0x00] + +v_lshrrev_b16 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x39,0xd7,0xff,0xff,0x03,0x00] + +v_lshrrev_b16 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x39,0xd7,0x01,0x04,0x00,0x00] + +v_lshrrev_b16 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x39,0xd7,0x69,0xd2,0x00,0x00] + +v_lshrrev_b16 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x39,0xd7,0x6a,0xf6,0x00,0x00] + +v_lshrrev_b16 v5, vcc_hi, 0xfe0b +// GFX11: encoding: [0x05,0x00,0x39,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_lshrrev_b16 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x39,0xd7,0x7b,0xfa,0x01,0x00] + +v_lshrrev_b16 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x39,0xd7,0x7d,0xe0,0x01,0x00] + +v_lshrrev_b16 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x39,0xd7,0x7e,0x82,0x01,0x00] + +v_lshrrev_b16 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x39,0xd7,0x7f,0xf8,0x00,0x00] + +v_lshrrev_b16 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x39,0xd7,0x7c,0xfc,0x00,0x00] + +v_lshrrev_b16 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x39,0xd7,0xc1,0xfe,0x00,0x00] + +v_lshrrev_b16 v5, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x39,0xd7,0xf0,0xfa,0x00,0x00] + +v_lshrrev_b16 v5, src_scc, vcc_lo +// GFX11: encoding: [0x05,0x00,0x39,0xd7,0xfd,0xd4,0x00,0x00] + +v_lshrrev_b16 v255, 0xfe0b, vcc_hi +// GFX11: encoding: [0xff,0x00,0x39,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_lshrrev_b64 v[5:6], v1, vcc +// GFX11: encoding: [0x05,0x00,0x3d,0xd7,0x01,0xd5,0x00,0x00] + +v_lshrrev_b64 v[5:6], v255, exec +// GFX11: encoding: [0x05,0x00,0x3d,0xd7,0xff,0xfd,0x00,0x00] + +v_lshrrev_b64 v[5:6], exec_lo, v[2:3] +// GFX11: encoding: [0x05,0x00,0x3d,0xd7,0x7e,0x04,0x02,0x00] + +v_lshrrev_b64 v[5:6], exec_hi, v[254:255] +// GFX11: encoding: [0x05,0x00,0x3d,0xd7,0x7f,0xfc,0x03,0x00] + +v_lshrrev_b64 v[5:6], null, null +// GFX11: encoding: [0x05,0x00,0x3d,0xd7,0x7c,0xf8,0x00,0x00] + +v_lshrrev_b64 v[5:6], -1, -1 +// GFX11: encoding: [0x05,0x00,0x3d,0xd7,0xc1,0x82,0x01,0x00] + +v_lshrrev_b64 v[5:6], 0.5, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x3d,0xd7,0xf0,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_lshrrev_b64 v[5:6], src_scc, src_scc +// GFX11: encoding: [0x05,0x00,0x3d,0xd7,0xfd,0xfa,0x01,0x00] + +v_lshrrev_b64 v[254:255], 0xaf123456, 0.5 +// GFX11: encoding: [0xfe,0x00,0x3d,0xd7,0xff,0xe0,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_mad_i16 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x53,0xd6,0x01,0x05,0x0e,0x00] + +v_mad_i16 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x53,0xd6,0xff,0x05,0xa4,0x01] + +v_mad_i16 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x53,0xd6,0x01,0xfe,0xff,0x01] + +v_mad_i16 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x53,0xd6,0x69,0xd2,0xf8,0x01] + +v_mad_i16 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x53,0xd6,0x6a,0xf6,0x0c,0x04] + +v_mad_i16 v5, vcc_hi, 0xfe0b, v255 +// GFX11: encoding: [0x05,0x00,0x53,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +v_mad_i16 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x53,0xd6,0x7b,0xfa,0xed,0x01] + +v_mad_i16 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x53,0xd6,0x7d,0xe0,0xf5,0x01] + +v_mad_i16 v5, exec_lo, -1, vcc_hi +// GFX11: encoding: [0x05,0x00,0x53,0xd6,0x7e,0x82,0xad,0x01] + +v_mad_i16 v5, exec_hi, null, vcc_lo op_sel:[1,1,1,1] +// GFX11: encoding: [0x05,0x78,0x53,0xd6,0x7f,0xf8,0xa8,0x01] + +v_mad_i16 v5, null, exec_lo, 0xfe0b op_sel:[0,0,0,0] +// GFX11: encoding: [0x05,0x00,0x53,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] + +v_mad_i16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] +// GFX11: encoding: [0x05,0x08,0x53,0xd6,0xc1,0xfe,0xf4,0x03] + +v_mad_i16 v5, 0.5, m0, 0.5 op_sel:[0,1,0,0] +// GFX11: encoding: [0x05,0x10,0x53,0xd6,0xf0,0xfa,0xc0,0x03] + +v_mad_i16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] +// GFX11: encoding: [0x05,0x20,0x53,0xd6,0xfd,0xd4,0x04,0x03] + +v_mad_i16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] clamp +// GFX11: encoding: [0xff,0xc0,0x53,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] + +v_mad_i32_i16 v5, v1, v2, v3 +// GFX11: encoding: [0x05,0x00,0x5a,0xd6,0x01,0x05,0x0e,0x04] + +v_mad_i32_i16 v5, v255, v255, s3 +// GFX11: encoding: [0x05,0x00,0x5a,0xd6,0xff,0xff,0x0f,0x00] + +v_mad_i32_i16 v5, s1, s2, v255 +// GFX11: encoding: [0x05,0x00,0x5a,0xd6,0x01,0x04,0xfc,0x07] + +v_mad_i32_i16 v5, s105, s105, s105 +// GFX11: encoding: [0x05,0x00,0x5a,0xd6,0x69,0xd2,0xa4,0x01] + +v_mad_i32_i16 v5, vcc_lo, ttmp15, vcc_lo +// GFX11: encoding: [0x05,0x00,0x5a,0xd6,0x6a,0xf6,0xa8,0x01] + +v_mad_i32_i16 v5, vcc_hi, 0xfe0b, vcc_hi +// GFX11: encoding: [0x05,0x00,0x5a,0xd6,0x6b,0xfe,0xad,0x01,0x0b,0xfe,0x00,0x00] + +v_mad_i32_i16 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x5a,0xd6,0x7b,0xfa,0xed,0x01] + +v_mad_i32_i16 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x5a,0xd6,0x7d,0xe0,0xf5,0x01] + +v_mad_i32_i16 v5, exec_lo, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x5a,0xd6,0x7e,0x82,0xfd,0x01] + +v_mad_i32_i16 v5, exec_hi, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x5a,0xd6,0x7f,0xf8,0xf8,0x01] + +v_mad_i32_i16 v5, null, exec_lo, null +// GFX11: encoding: [0x05,0x00,0x5a,0xd6,0x7c,0xfc,0xf0,0x01] + +v_mad_i32_i16 v5, -1, exec_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x5a,0xd6,0xc1,0xfe,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_mad_i32_i16 v5, 0.5, m0, -1 op_sel:[0,0,0,0] +// GFX11: encoding: [0x05,0x00,0x5a,0xd6,0xf0,0xfa,0x04,0x03] + +v_mad_i32_i16 v5, src_scc, vcc_lo, src_scc op_sel:[1,0,0,0] +// GFX11: encoding: [0x05,0x08,0x5a,0xd6,0xfd,0xd4,0xf4,0x03] + +v_mad_i32_i16 v255, 0xfe0b, vcc_hi, 0.5 op_sel:[0,1,0,0] clamp +// GFX11: encoding: [0xff,0x90,0x5a,0xd6,0xff,0xd6,0xc0,0x03,0x0b,0xfe,0x00,0x00] + +v_mad_i32_i24 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x0a,0xd6,0x01,0x05,0x0e,0x00] + +v_mad_i32_i24 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x0a,0xd6,0xff,0x05,0xa4,0x01] + +v_mad_i32_i24 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x0a,0xd6,0x01,0xfe,0xff,0x01] + +v_mad_i32_i24 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x0a,0xd6,0x69,0xd2,0xf8,0x01] + +v_mad_i32_i24 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x0a,0xd6,0x6a,0xf6,0x0c,0x04] + +v_mad_i32_i24 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x0a,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_mad_i32_i24 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x0a,0xd6,0x7b,0xfa,0xed,0x01] + +v_mad_i32_i24 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x0a,0xd6,0x7d,0xe0,0xf5,0x01] + +v_mad_i32_i24 v5, exec_lo, -1, vcc_hi +// GFX11: encoding: [0x05,0x00,0x0a,0xd6,0x7e,0x82,0xad,0x01] + +v_mad_i32_i24 v5, exec_hi, null, vcc_lo +// GFX11: encoding: [0x05,0x00,0x0a,0xd6,0x7f,0xf8,0xa8,0x01] + +v_mad_i32_i24 v5, null, exec_lo, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x0a,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_mad_i32_i24 v5, -1, exec_hi, src_scc +// GFX11: encoding: [0x05,0x00,0x0a,0xd6,0xc1,0xfe,0xf4,0x03] + +v_mad_i32_i24 v5, 0.5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x0a,0xd6,0xf0,0xfa,0xc0,0x03] + +v_mad_i32_i24 v5, src_scc, vcc_lo, -1 +// GFX11: encoding: [0x05,0x00,0x0a,0xd6,0xfd,0xd4,0x04,0x03] + +v_mad_i32_i24 v255, 0xaf123456, vcc_hi, null clamp +// GFX11: encoding: [0xff,0x80,0x0a,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_mad_i64_i32 v[5:6], s6, s105, s105, s[6:7] +// W32: encoding: [0x05,0x06,0xff,0xd6,0x69,0xd2,0x18,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_i64_i32 v[5:6], s6, ttmp15, ttmp15, s[104:105] +// W32: encoding: [0x05,0x06,0xff,0xd6,0x7b,0xf6,0xa0,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_i64_i32 v[5:6], s6, m0, 0.5, ttmp[14:15] +// W32: encoding: [0x05,0x06,0xff,0xd6,0x7d,0xe0,0xe9,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_i64_i32 v[5:6], s6, exec_lo, -1, exec +// W32: encoding: [0x05,0x06,0xff,0xd6,0x7e,0x82,0xf9,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_i64_i32 v[5:6], s6, exec_hi, null, vcc +// W32: encoding: [0x05,0x06,0xff,0xd6,0x7f,0xf8,0xa8,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_i64_i32 v[5:6], s105, null, exec_lo, null +// W32: encoding: [0x05,0x69,0xff,0xd6,0x7c,0xfc,0xf0,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_i64_i32 v[5:6], vcc_lo, -1, exec_hi, -1 +// W32: encoding: [0x05,0x6a,0xff,0xd6,0xc1,0xfe,0x04,0x03] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_i64_i32 v[5:6], vcc_hi, 0.5, m0, 0xaf123456 +// W32: encoding: [0x05,0x6b,0xff,0xd6,0xf0,0xfa,0xfc,0x03,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_i64_i32 v[5:6], ttmp15, src_scc, vcc_lo, src_scc +// W32: encoding: [0x05,0x7b,0xff,0xd6,0xfd,0xd4,0xf4,0x03] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_i64_i32 v[5:6], s[12:13], s105, s105, s[6:7] +// W64: encoding: [0x05,0x0c,0xff,0xd6,0x69,0xd2,0x18,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_i64_i32 v[5:6], s[12:13], ttmp15, ttmp15, s[104:105] +// W64: encoding: [0x05,0x0c,0xff,0xd6,0x7b,0xf6,0xa0,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_i64_i32 v[5:6], s[12:13], m0, 0.5, ttmp[14:15] +// W64: encoding: [0x05,0x0c,0xff,0xd6,0x7d,0xe0,0xe9,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_i64_i32 v[5:6], s[12:13], exec_lo, -1, exec +// W64: encoding: [0x05,0x0c,0xff,0xd6,0x7e,0x82,0xf9,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_i64_i32 v[5:6], s[12:13], exec_hi, null, vcc +// W64: encoding: [0x05,0x0c,0xff,0xd6,0x7f,0xf8,0xa8,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_i64_i32 v[5:6], s[12:13], null, exec_lo, null +// W64: encoding: [0x05,0x0c,0xff,0xd6,0x7c,0xfc,0xf0,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_i64_i32 v[5:6], s[104:105], -1, exec_hi, -1 +// W64: encoding: [0x05,0x68,0xff,0xd6,0xc1,0xfe,0x04,0x03] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_i64_i32 v[5:6], vcc, 0.5, m0, 0xaf123456 +// W64: encoding: [0x05,0x6a,0xff,0xd6,0xf0,0xfa,0xfc,0x03,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_i64_i32 v[5:6], ttmp[14:15], src_scc, vcc_lo, src_scc +// W64: encoding: [0x05,0x7a,0xff,0xd6,0xfd,0xd4,0xf4,0x03] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_i64_i32 v[254:255], null, 0xaf123456, vcc_hi, 0.5 clamp +// GFX11: encoding: [0xfe,0xfc,0xff,0xd6,0xff,0xd6,0xc0,0x03,0x56,0x34,0x12,0xaf] + +v_mad_u16 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x41,0xd6,0x01,0x05,0x0e,0x00] + +v_mad_u16 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x41,0xd6,0xff,0x05,0xa4,0x01] + +v_mad_u16 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x41,0xd6,0x01,0xfe,0xff,0x01] + +v_mad_u16 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x41,0xd6,0x69,0xd2,0xf8,0x01] + +v_mad_u16 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x41,0xd6,0x6a,0xf6,0x0c,0x04] + +v_mad_u16 v5, vcc_hi, 0xfe0b, v255 +// GFX11: encoding: [0x05,0x00,0x41,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +v_mad_u16 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x41,0xd6,0x7b,0xfa,0xed,0x01] + +v_mad_u16 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x41,0xd6,0x7d,0xe0,0xf5,0x01] + +v_mad_u16 v5, exec_lo, -1, vcc_hi +// GFX11: encoding: [0x05,0x00,0x41,0xd6,0x7e,0x82,0xad,0x01] + +v_mad_u16 v5, exec_hi, null, vcc_lo op_sel:[1,1,1,1] +// GFX11: encoding: [0x05,0x78,0x41,0xd6,0x7f,0xf8,0xa8,0x01] + +v_mad_u16 v5, null, exec_lo, 0xfe0b op_sel:[0,0,0,0] +// GFX11: encoding: [0x05,0x00,0x41,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] + +v_mad_u16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] +// GFX11: encoding: [0x05,0x08,0x41,0xd6,0xc1,0xfe,0xf4,0x03] + +v_mad_u16 v5, 0.5, m0, 0.5 op_sel:[0,1,0,0] +// GFX11: encoding: [0x05,0x10,0x41,0xd6,0xf0,0xfa,0xc0,0x03] + +v_mad_u16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] +// GFX11: encoding: [0x05,0x20,0x41,0xd6,0xfd,0xd4,0x04,0x03] + +v_mad_u16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] clamp +// GFX11: encoding: [0xff,0xc0,0x41,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] + +v_mad_u32_u16 v5, v1, v2, v3 +// GFX11: encoding: [0x05,0x00,0x59,0xd6,0x01,0x05,0x0e,0x04] + +v_mad_u32_u16 v5, v255, v255, s3 +// GFX11: encoding: [0x05,0x00,0x59,0xd6,0xff,0xff,0x0f,0x00] + +v_mad_u32_u16 v5, s1, s2, v255 +// GFX11: encoding: [0x05,0x00,0x59,0xd6,0x01,0x04,0xfc,0x07] + +v_mad_u32_u16 v5, s105, s105, s105 +// GFX11: encoding: [0x05,0x00,0x59,0xd6,0x69,0xd2,0xa4,0x01] + +v_mad_u32_u16 v5, vcc_lo, ttmp15, vcc_lo +// GFX11: encoding: [0x05,0x00,0x59,0xd6,0x6a,0xf6,0xa8,0x01] + +v_mad_u32_u16 v5, vcc_hi, 0xfe0b, vcc_hi +// GFX11: encoding: [0x05,0x00,0x59,0xd6,0x6b,0xfe,0xad,0x01,0x0b,0xfe,0x00,0x00] + +v_mad_u32_u16 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x59,0xd6,0x7b,0xfa,0xed,0x01] + +v_mad_u32_u16 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x59,0xd6,0x7d,0xe0,0xf5,0x01] + +v_mad_u32_u16 v5, exec_lo, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x59,0xd6,0x7e,0x82,0xfd,0x01] + +v_mad_u32_u16 v5, exec_hi, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x59,0xd6,0x7f,0xf8,0xf8,0x01] + +v_mad_u32_u16 v5, null, exec_lo, null +// GFX11: encoding: [0x05,0x00,0x59,0xd6,0x7c,0xfc,0xf0,0x01] + +v_mad_u32_u16 v5, -1, exec_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x59,0xd6,0xc1,0xfe,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_mad_u32_u16 v5, 0.5, m0, -1 op_sel:[0,0,0,0] +// GFX11: encoding: [0x05,0x00,0x59,0xd6,0xf0,0xfa,0x04,0x03] + +v_mad_u32_u16 v5, src_scc, vcc_lo, src_scc op_sel:[1,0,0,0] +// GFX11: encoding: [0x05,0x08,0x59,0xd6,0xfd,0xd4,0xf4,0x03] + +v_mad_u32_u16 v255, 0xfe0b, vcc_hi, 0.5 op_sel:[0,1,0,0] clamp +// GFX11: encoding: [0xff,0x90,0x59,0xd6,0xff,0xd6,0xc0,0x03,0x0b,0xfe,0x00,0x00] + +v_mad_u32_u24 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x0b,0xd6,0x01,0x05,0x0e,0x00] + +v_mad_u32_u24 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x0b,0xd6,0xff,0x05,0xa4,0x01] + +v_mad_u32_u24 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x0b,0xd6,0x01,0xfe,0xff,0x01] + +v_mad_u32_u24 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x0b,0xd6,0x69,0xd2,0xf8,0x01] + +v_mad_u32_u24 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x0b,0xd6,0x6a,0xf6,0x0c,0x04] + +v_mad_u32_u24 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x0b,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_mad_u32_u24 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x0b,0xd6,0x7b,0xfa,0xed,0x01] + +v_mad_u32_u24 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x0b,0xd6,0x7d,0xe0,0xf5,0x01] + +v_mad_u32_u24 v5, exec_lo, -1, vcc_hi +// GFX11: encoding: [0x05,0x00,0x0b,0xd6,0x7e,0x82,0xad,0x01] + +v_mad_u32_u24 v5, exec_hi, null, vcc_lo +// GFX11: encoding: [0x05,0x00,0x0b,0xd6,0x7f,0xf8,0xa8,0x01] + +v_mad_u32_u24 v5, null, exec_lo, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x0b,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_mad_u32_u24 v5, -1, exec_hi, src_scc +// GFX11: encoding: [0x05,0x00,0x0b,0xd6,0xc1,0xfe,0xf4,0x03] + +v_mad_u32_u24 v5, 0.5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x0b,0xd6,0xf0,0xfa,0xc0,0x03] + +v_mad_u32_u24 v5, src_scc, vcc_lo, -1 +// GFX11: encoding: [0x05,0x00,0x0b,0xd6,0xfd,0xd4,0x04,0x03] + +v_mad_u32_u24 v255, 0xaf123456, vcc_hi, null clamp +// GFX11: encoding: [0xff,0x80,0x0b,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_mad_u64_u32 v[5:6], s6, s105, s105, s[6:7] +// W32: encoding: [0x05,0x06,0xfe,0xd6,0x69,0xd2,0x18,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_u64_u32 v[5:6], s6, ttmp15, ttmp15, s[104:105] +// W32: encoding: [0x05,0x06,0xfe,0xd6,0x7b,0xf6,0xa0,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_u64_u32 v[5:6], s6, m0, 0.5, ttmp[14:15] +// W32: encoding: [0x05,0x06,0xfe,0xd6,0x7d,0xe0,0xe9,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_u64_u32 v[5:6], s6, exec_lo, -1, exec +// W32: encoding: [0x05,0x06,0xfe,0xd6,0x7e,0x82,0xf9,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_u64_u32 v[5:6], s6, exec_hi, null, vcc +// W32: encoding: [0x05,0x06,0xfe,0xd6,0x7f,0xf8,0xa8,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_u64_u32 v[5:6], s105, null, exec_lo, null +// W32: encoding: [0x05,0x69,0xfe,0xd6,0x7c,0xfc,0xf0,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_u64_u32 v[5:6], vcc_lo, -1, exec_hi, -1 +// W32: encoding: [0x05,0x6a,0xfe,0xd6,0xc1,0xfe,0x04,0x03] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_u64_u32 v[5:6], vcc_hi, 0.5, m0, 0xaf123456 +// W32: encoding: [0x05,0x6b,0xfe,0xd6,0xf0,0xfa,0xfc,0x03,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_u64_u32 v[5:6], ttmp15, src_scc, vcc_lo, src_scc +// W32: encoding: [0x05,0x7b,0xfe,0xd6,0xfd,0xd4,0xf4,0x03] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_u64_u32 v[5:6], s[12:13], s105, s105, s[6:7] +// W64: encoding: [0x05,0x0c,0xfe,0xd6,0x69,0xd2,0x18,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_u64_u32 v[5:6], s[12:13], ttmp15, ttmp15, s[104:105] +// W64: encoding: [0x05,0x0c,0xfe,0xd6,0x7b,0xf6,0xa0,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_u64_u32 v[5:6], s[12:13], m0, 0.5, ttmp[14:15] +// W64: encoding: [0x05,0x0c,0xfe,0xd6,0x7d,0xe0,0xe9,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_u64_u32 v[5:6], s[12:13], exec_lo, -1, exec +// W64: encoding: [0x05,0x0c,0xfe,0xd6,0x7e,0x82,0xf9,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_u64_u32 v[5:6], s[12:13], exec_hi, null, vcc +// W64: encoding: [0x05,0x0c,0xfe,0xd6,0x7f,0xf8,0xa8,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_u64_u32 v[5:6], s[12:13], null, exec_lo, null +// W64: encoding: [0x05,0x0c,0xfe,0xd6,0x7c,0xfc,0xf0,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_u64_u32 v[5:6], s[104:105], -1, exec_hi, -1 +// W64: encoding: [0x05,0x68,0xfe,0xd6,0xc1,0xfe,0x04,0x03] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_u64_u32 v[5:6], vcc, 0.5, m0, 0xaf123456 +// W64: encoding: [0x05,0x6a,0xfe,0xd6,0xf0,0xfa,0xfc,0x03,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_u64_u32 v[5:6], ttmp[14:15], src_scc, vcc_lo, src_scc +// W64: encoding: [0x05,0x7a,0xfe,0xd6,0xfd,0xd4,0xf4,0x03] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_u64_u32 v[254:255], null, 0xaf123456, vcc_hi, 0.5 clamp +// GFX11: encoding: [0xfe,0xfc,0xfe,0xd6,0xff,0xd6,0xc0,0x03,0x56,0x34,0x12,0xaf] + +v_max3_f16 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x4c,0xd6,0x01,0x05,0x0e,0x00] + +v_max3_f16 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x4c,0xd6,0xff,0x05,0xa4,0x01] + +v_max3_f16 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x4c,0xd6,0x01,0xfe,0xff,0x01] + +v_max3_f16 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x4c,0xd6,0x69,0xd2,0xf8,0x01] + +v_max3_f16 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x4c,0xd6,0x6a,0xf6,0x0c,0x04] + +v_max3_f16 v5, vcc_hi, 0xfe0b, v255 +// GFX11: encoding: [0x05,0x00,0x4c,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +v_max3_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX11: encoding: [0x05,0x07,0x4c,0xd6,0x7b,0xfa,0xed,0xe1] + +v_max3_f16 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x4c,0xd6,0x7d,0xe0,0xf5,0x01] + +v_max3_f16 v5, |exec_lo|, -1, vcc_hi +// GFX11: encoding: [0x05,0x01,0x4c,0xd6,0x7e,0x82,0xad,0x01] + +v_max3_f16 v5, -|exec_hi|, null, -|vcc_lo| op_sel:[1,1,1,1] +// GFX11: encoding: [0x05,0x7d,0x4c,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_max3_f16 v5, null, exec_lo, -|0xfe0b| op_sel:[0,0,0,0] +// GFX11: encoding: [0x05,0x04,0x4c,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] + +v_max3_f16 v5, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] +// GFX11: encoding: [0x05,0x0e,0x4c,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_max3_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] +// GFX11: encoding: [0x05,0x10,0x4c,0xd6,0xf0,0xfa,0xc0,0x43] + +v_max3_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] +// GFX11: encoding: [0x05,0x22,0x4c,0xd6,0xfd,0xd4,0x04,0x23] + +v_max3_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp +// GFX11: encoding: [0xff,0xc3,0x4c,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] + +v_max3_f16 v5, v255, s2, s105 mul:2 +// GFX11: encoding: [0x05,0x00,0x4c,0xd6,0xff,0x05,0xa4,0x09] + +v_max3_f32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x1c,0xd6,0x01,0x05,0x0e,0x00] + +v_max3_f32 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x1c,0xd6,0xff,0x05,0xa4,0x01] + +v_max3_f32 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x1c,0xd6,0x01,0xfe,0xff,0x01] + +v_max3_f32 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x1c,0xd6,0x69,0xd2,0xf8,0x01] + +v_max3_f32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x1c,0xd6,0x6a,0xf6,0x0c,0x04] + +v_max3_f32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x1c,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_max3_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX11: encoding: [0x05,0x07,0x1c,0xd6,0x7b,0xfa,0xed,0xe1] + +v_max3_f32 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x1c,0xd6,0x7d,0xe0,0xf5,0x01] + +v_max3_f32 v5, |exec_lo|, -1, vcc_hi +// GFX11: encoding: [0x05,0x01,0x1c,0xd6,0x7e,0x82,0xad,0x01] + +v_max3_f32 v5, -|exec_hi|, null, -|vcc_lo| +// GFX11: encoding: [0x05,0x05,0x1c,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_max3_f32 v5, null, exec_lo, -|0xaf123456| +// GFX11: encoding: [0x05,0x04,0x1c,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] + +v_max3_f32 v5, -1, -|exec_hi|, -|src_scc| +// GFX11: encoding: [0x05,0x06,0x1c,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_max3_f32 v5, 0.5, -m0, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0x1c,0xd6,0xf0,0xfa,0xc0,0x4b] + +v_max3_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 +// GFX11: encoding: [0x05,0x02,0x1c,0xd6,0xfd,0xd4,0x04,0x33] + +v_max3_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 +// GFX11: encoding: [0xff,0x83,0x1c,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] + +v_max3_i16 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x4d,0xd6,0x01,0x05,0x0e,0x00] + +v_max3_i16 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x4d,0xd6,0xff,0x05,0xa4,0x01] + +v_max3_i16 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x4d,0xd6,0x01,0xfe,0xff,0x01] + +v_max3_i16 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x4d,0xd6,0x69,0xd2,0xf8,0x01] + +v_max3_i16 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x4d,0xd6,0x6a,0xf6,0x0c,0x04] + +v_max3_i16 v5, vcc_hi, 0xfe0b, v255 +// GFX11: encoding: [0x05,0x00,0x4d,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +v_max3_i16 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x4d,0xd6,0x7b,0xfa,0xed,0x01] + +v_max3_i16 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x4d,0xd6,0x7d,0xe0,0xf5,0x01] + +v_max3_i16 v5, exec_lo, -1, vcc_hi +// GFX11: encoding: [0x05,0x00,0x4d,0xd6,0x7e,0x82,0xad,0x01] + +v_max3_i16 v5, exec_hi, null, vcc_lo op_sel:[1,1,1,1] +// GFX11: encoding: [0x05,0x78,0x4d,0xd6,0x7f,0xf8,0xa8,0x01] + +v_max3_i16 v5, null, exec_lo, 0xfe0b op_sel:[0,0,0,0] +// GFX11: encoding: [0x05,0x00,0x4d,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] + +v_max3_i16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] +// GFX11: encoding: [0x05,0x08,0x4d,0xd6,0xc1,0xfe,0xf4,0x03] + +v_max3_i16 v5, 0.5, m0, 0.5 op_sel:[0,1,0,0] +// GFX11: encoding: [0x05,0x10,0x4d,0xd6,0xf0,0xfa,0xc0,0x03] + +v_max3_i16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] +// GFX11: encoding: [0x05,0x20,0x4d,0xd6,0xfd,0xd4,0x04,0x03] + +v_max3_i16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] +// GFX11: encoding: [0xff,0x40,0x4d,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] + +v_max3_i32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x1d,0xd6,0x01,0x05,0x0e,0x00] + +v_max3_i32 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x1d,0xd6,0xff,0x05,0xa4,0x01] + +v_max3_i32 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x1d,0xd6,0x01,0xfe,0xff,0x01] + +v_max3_i32 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x1d,0xd6,0x69,0xd2,0xf8,0x01] + +v_max3_i32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x1d,0xd6,0x6a,0xf6,0x0c,0x04] + +v_max3_i32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x1d,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_max3_i32 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x1d,0xd6,0x7b,0xfa,0xed,0x01] + +v_max3_i32 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x1d,0xd6,0x7d,0xe0,0xf5,0x01] + +v_max3_i32 v5, exec_lo, -1, vcc_hi +// GFX11: encoding: [0x05,0x00,0x1d,0xd6,0x7e,0x82,0xad,0x01] + +v_max3_i32 v5, exec_hi, null, vcc_lo +// GFX11: encoding: [0x05,0x00,0x1d,0xd6,0x7f,0xf8,0xa8,0x01] + +v_max3_i32 v5, null, exec_lo, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x1d,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_max3_i32 v5, -1, exec_hi, src_scc +// GFX11: encoding: [0x05,0x00,0x1d,0xd6,0xc1,0xfe,0xf4,0x03] + +v_max3_i32 v5, 0.5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x1d,0xd6,0xf0,0xfa,0xc0,0x03] + +v_max3_i32 v5, src_scc, vcc_lo, -1 +// GFX11: encoding: [0x05,0x00,0x1d,0xd6,0xfd,0xd4,0x04,0x03] + +v_max3_i32 v255, 0xaf123456, vcc_hi, null +// GFX11: encoding: [0xff,0x00,0x1d,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_max3_u16 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x4e,0xd6,0x01,0x05,0x0e,0x00] + +v_max3_u16 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x4e,0xd6,0xff,0x05,0xa4,0x01] + +v_max3_u16 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x4e,0xd6,0x01,0xfe,0xff,0x01] + +v_max3_u16 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x4e,0xd6,0x69,0xd2,0xf8,0x01] + +v_max3_u16 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x4e,0xd6,0x6a,0xf6,0x0c,0x04] + +v_max3_u16 v5, vcc_hi, 0xfe0b, v255 +// GFX11: encoding: [0x05,0x00,0x4e,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +v_max3_u16 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x4e,0xd6,0x7b,0xfa,0xed,0x01] + +v_max3_u16 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x4e,0xd6,0x7d,0xe0,0xf5,0x01] + +v_max3_u16 v5, exec_lo, -1, vcc_hi +// GFX11: encoding: [0x05,0x00,0x4e,0xd6,0x7e,0x82,0xad,0x01] + +v_max3_u16 v5, exec_hi, null, vcc_lo op_sel:[1,1,1,1] +// GFX11: encoding: [0x05,0x78,0x4e,0xd6,0x7f,0xf8,0xa8,0x01] + +v_max3_u16 v5, null, exec_lo, 0xfe0b op_sel:[0,0,0,0] +// GFX11: encoding: [0x05,0x00,0x4e,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] + +v_max3_u16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] +// GFX11: encoding: [0x05,0x08,0x4e,0xd6,0xc1,0xfe,0xf4,0x03] + +v_max3_u16 v5, 0.5, m0, 0.5 op_sel:[0,1,0,0] +// GFX11: encoding: [0x05,0x10,0x4e,0xd6,0xf0,0xfa,0xc0,0x03] + +v_max3_u16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] +// GFX11: encoding: [0x05,0x20,0x4e,0xd6,0xfd,0xd4,0x04,0x03] + +v_max3_u16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] +// GFX11: encoding: [0xff,0x40,0x4e,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] + +v_max3_u32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x1e,0xd6,0x01,0x05,0x0e,0x00] + +v_max3_u32 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x1e,0xd6,0xff,0x05,0xa4,0x01] + +v_max3_u32 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x1e,0xd6,0x01,0xfe,0xff,0x01] + +v_max3_u32 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x1e,0xd6,0x69,0xd2,0xf8,0x01] + +v_max3_u32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x1e,0xd6,0x6a,0xf6,0x0c,0x04] + +v_max3_u32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x1e,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_max3_u32 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x1e,0xd6,0x7b,0xfa,0xed,0x01] + +v_max3_u32 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x1e,0xd6,0x7d,0xe0,0xf5,0x01] + +v_max3_u32 v5, exec_lo, -1, vcc_hi +// GFX11: encoding: [0x05,0x00,0x1e,0xd6,0x7e,0x82,0xad,0x01] + +v_max3_u32 v5, exec_hi, null, vcc_lo +// GFX11: encoding: [0x05,0x00,0x1e,0xd6,0x7f,0xf8,0xa8,0x01] + +v_max3_u32 v5, null, exec_lo, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x1e,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_max3_u32 v5, -1, exec_hi, src_scc +// GFX11: encoding: [0x05,0x00,0x1e,0xd6,0xc1,0xfe,0xf4,0x03] + +v_max3_u32 v5, 0.5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x1e,0xd6,0xf0,0xfa,0xc0,0x03] + +v_max3_u32 v5, src_scc, vcc_lo, -1 +// GFX11: encoding: [0x05,0x00,0x1e,0xd6,0xfd,0xd4,0x04,0x03] + +v_max3_u32 v255, 0xaf123456, vcc_hi, null +// GFX11: encoding: [0xff,0x00,0x1e,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_max_f64 v[5:6], v[1:2], v[2:3] +// GFX11: encoding: [0x05,0x00,0x2a,0xd7,0x01,0x05,0x02,0x00] + +v_max_f64 v[5:6], v[254:255], v[254:255] +// GFX11: encoding: [0x05,0x00,0x2a,0xd7,0xfe,0xfd,0x03,0x00] + +v_max_f64 v[5:6], s[2:3], s[4:5] +// GFX11: encoding: [0x05,0x00,0x2a,0xd7,0x02,0x08,0x00,0x00] + +v_max_f64 v[5:6], s[104:105], s[104:105] +// GFX11: encoding: [0x05,0x00,0x2a,0xd7,0x68,0xd0,0x00,0x00] + +v_max_f64 v[5:6], vcc, ttmp[14:15] +// GFX11: encoding: [0x05,0x00,0x2a,0xd7,0x6a,0xf4,0x00,0x00] + +v_max_f64 v[5:6], ttmp[14:15], 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x2a,0xd7,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_max_f64 v[5:6], -|exec|, src_scc +// GFX11: encoding: [0x05,0x01,0x2a,0xd7,0x7e,0xfa,0x01,0x20] + +v_max_f64 v[5:6], null, 0.5 +// GFX11: encoding: [0x05,0x00,0x2a,0xd7,0x7c,0xe0,0x01,0x00] + +v_max_f64 v[5:6], -1, -1 +// GFX11: encoding: [0x05,0x00,0x2a,0xd7,0xc1,0x82,0x01,0x00] + +v_max_f64 v[5:6], 0.5, null mul:2 +// GFX11: encoding: [0x05,0x00,0x2a,0xd7,0xf0,0xf8,0x00,0x08] + +v_max_f64 v[5:6], -|src_scc|, -|exec| mul:4 +// GFX11: encoding: [0x05,0x03,0x2a,0xd7,0xfd,0xfc,0x00,0x70] + +v_max_f64 v[254:255], 0xaf123456, -|vcc| clamp div:2 +// GFX11: encoding: [0xfe,0x82,0x2a,0xd7,0xff,0xd4,0x00,0x58,0x56,0x34,0x12,0xaf] + +v_max_i16 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x0a,0xd7,0x01,0x05,0x02,0x00] + +v_max_i16 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x0a,0xd7,0xff,0xff,0x03,0x00] + +v_max_i16 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x0a,0xd7,0x01,0x04,0x00,0x00] + +v_max_i16 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x0a,0xd7,0x69,0xd2,0x00,0x00] + +v_max_i16 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x0a,0xd7,0x6a,0xf6,0x00,0x00] + +v_max_i16 v5, vcc_hi, 0xfe0b +// GFX11: encoding: [0x05,0x00,0x0a,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_max_i16 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x0a,0xd7,0x7b,0xfa,0x01,0x00] + +v_max_i16 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x0a,0xd7,0x7d,0xe0,0x01,0x00] + +v_max_i16 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x0a,0xd7,0x7e,0x82,0x01,0x00] + +v_max_i16 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x0a,0xd7,0x7f,0xf8,0x00,0x00] + +v_max_i16 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x0a,0xd7,0x7c,0xfc,0x00,0x00] + +v_max_i16 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x0a,0xd7,0xc1,0xfe,0x00,0x00] + +v_max_i16 v5, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x0a,0xd7,0xf0,0xfa,0x00,0x00] + +v_max_i16 v5, src_scc, vcc_lo +// GFX11: encoding: [0x05,0x00,0x0a,0xd7,0xfd,0xd4,0x00,0x00] + +v_max_i16 v255, 0xfe0b, vcc_hi +// GFX11: encoding: [0xff,0x00,0x0a,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_max_u16 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x09,0xd7,0x01,0x05,0x02,0x00] + +v_max_u16 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x09,0xd7,0xff,0xff,0x03,0x00] + +v_max_u16 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x09,0xd7,0x01,0x04,0x00,0x00] + +v_max_u16 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x09,0xd7,0x69,0xd2,0x00,0x00] + +v_max_u16 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x09,0xd7,0x6a,0xf6,0x00,0x00] + +v_max_u16 v5, vcc_hi, 0xfe0b +// GFX11: encoding: [0x05,0x00,0x09,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_max_u16 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x09,0xd7,0x7b,0xfa,0x01,0x00] + +v_max_u16 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x09,0xd7,0x7d,0xe0,0x01,0x00] + +v_max_u16 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x09,0xd7,0x7e,0x82,0x01,0x00] + +v_max_u16 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x09,0xd7,0x7f,0xf8,0x00,0x00] + +v_max_u16 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x09,0xd7,0x7c,0xfc,0x00,0x00] + +v_max_u16 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x09,0xd7,0xc1,0xfe,0x00,0x00] + +v_max_u16 v5, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x09,0xd7,0xf0,0xfa,0x00,0x00] + +v_max_u16 v5, src_scc, vcc_lo +// GFX11: encoding: [0x05,0x00,0x09,0xd7,0xfd,0xd4,0x00,0x00] + +v_max_u16 v255, 0xfe0b, vcc_hi +// GFX11: encoding: [0xff,0x00,0x09,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_maxmin_f16 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x60,0xd6,0x01,0x05,0x0e,0x00] + +v_maxmin_f16 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x60,0xd6,0xff,0x05,0xa4,0x01] + +v_maxmin_f16 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x60,0xd6,0x01,0xfe,0xff,0x01] + +v_maxmin_f16 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x60,0xd6,0x69,0xd2,0xf8,0x01] + +v_maxmin_f16 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x60,0xd6,0x6a,0xf6,0x0c,0x04] + +v_maxmin_f16 v5, vcc_hi, 0xfe0b, v255 +// GFX11: encoding: [0x05,0x00,0x60,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +v_maxmin_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX11: encoding: [0x05,0x07,0x60,0xd6,0x7b,0xfa,0xed,0xe1] + +v_maxmin_f16 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x60,0xd6,0x7d,0xe0,0xf5,0x01] + +v_maxmin_f16 v5, |exec_lo|, -1, vcc_hi +// GFX11: encoding: [0x05,0x01,0x60,0xd6,0x7e,0x82,0xad,0x01] + +v_maxmin_f16 v5, -|exec_hi|, null, -|vcc_lo| +// GFX11: encoding: [0x05,0x05,0x60,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_maxmin_f16 v5, null, exec_lo, -|0xfe0b| +// GFX11: encoding: [0x05,0x04,0x60,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] + +v_maxmin_f16 v5, -1, -|exec_hi|, -|src_scc| +// GFX11: encoding: [0x05,0x06,0x60,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_maxmin_f16 v5, 0.5, -m0, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0x60,0xd6,0xf0,0xfa,0xc0,0x4b] + +v_maxmin_f16 v5, -src_scc, |vcc_lo|, -1 mul:4 +// GFX11: encoding: [0x05,0x02,0x60,0xd6,0xfd,0xd4,0x04,0x33] + +v_maxmin_f16 v255, -|0xfe0b|, -|vcc_hi|, null clamp div:2 +// GFX11: encoding: [0xff,0x83,0x60,0xd6,0xff,0xd6,0xf0,0x79,0x0b,0xfe,0x00,0x00] + +v_maxmin_f32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x5e,0xd6,0x01,0x05,0x0e,0x00] + +v_maxmin_f32 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x5e,0xd6,0xff,0x05,0xa4,0x01] + +v_maxmin_f32 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x5e,0xd6,0x01,0xfe,0xff,0x01] + +v_maxmin_f32 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x5e,0xd6,0x69,0xd2,0xf8,0x01] + +v_maxmin_f32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x5e,0xd6,0x6a,0xf6,0x0c,0x04] + +v_maxmin_f32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x5e,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_maxmin_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX11: encoding: [0x05,0x07,0x5e,0xd6,0x7b,0xfa,0xed,0xe1] + +v_maxmin_f32 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x5e,0xd6,0x7d,0xe0,0xf5,0x01] + +v_maxmin_f32 v5, |exec_lo|, -1, vcc_hi +// GFX11: encoding: [0x05,0x01,0x5e,0xd6,0x7e,0x82,0xad,0x01] + +v_maxmin_f32 v5, -|exec_hi|, null, -|vcc_lo| +// GFX11: encoding: [0x05,0x05,0x5e,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_maxmin_f32 v5, null, exec_lo, -|0xaf123456| +// GFX11: encoding: [0x05,0x04,0x5e,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] + +v_maxmin_f32 v5, -1, -|exec_hi|, -|src_scc| +// GFX11: encoding: [0x05,0x06,0x5e,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_maxmin_f32 v5, 0.5, -m0, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0x5e,0xd6,0xf0,0xfa,0xc0,0x4b] + +v_maxmin_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 +// GFX11: encoding: [0x05,0x02,0x5e,0xd6,0xfd,0xd4,0x04,0x33] + +v_maxmin_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 +// GFX11: encoding: [0xff,0x83,0x5e,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] + +v_maxmin_i32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x64,0xd6,0x01,0x05,0x0e,0x00] + +v_maxmin_i32 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x64,0xd6,0xff,0x05,0xa4,0x01] + +v_maxmin_i32 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x64,0xd6,0x01,0xfe,0xff,0x01] + +v_maxmin_i32 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x64,0xd6,0x69,0xd2,0xf8,0x01] + +v_maxmin_i32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x64,0xd6,0x6a,0xf6,0x0c,0x04] + +v_maxmin_i32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x64,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_maxmin_i32 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x64,0xd6,0x7b,0xfa,0xed,0x01] + +v_maxmin_i32 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x64,0xd6,0x7d,0xe0,0xf5,0x01] + +v_maxmin_i32 v5, exec_lo, -1, vcc_hi +// GFX11: encoding: [0x05,0x00,0x64,0xd6,0x7e,0x82,0xad,0x01] + +v_maxmin_i32 v5, exec_hi, null, vcc_lo +// GFX11: encoding: [0x05,0x00,0x64,0xd6,0x7f,0xf8,0xa8,0x01] + +v_maxmin_i32 v5, null, exec_lo, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x64,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_maxmin_i32 v5, -1, exec_hi, src_scc +// GFX11: encoding: [0x05,0x00,0x64,0xd6,0xc1,0xfe,0xf4,0x03] + +v_maxmin_i32 v5, 0.5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x64,0xd6,0xf0,0xfa,0xc0,0x03] + +v_maxmin_i32 v5, src_scc, vcc_lo, -1 +// GFX11: encoding: [0x05,0x00,0x64,0xd6,0xfd,0xd4,0x04,0x03] + +v_maxmin_i32 v255, 0xaf123456, vcc_hi, null +// GFX11: encoding: [0xff,0x00,0x64,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_maxmin_u32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x62,0xd6,0x01,0x05,0x0e,0x00] + +v_maxmin_u32 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x62,0xd6,0xff,0x05,0xa4,0x01] + +v_maxmin_u32 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x62,0xd6,0x01,0xfe,0xff,0x01] + +v_maxmin_u32 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x62,0xd6,0x69,0xd2,0xf8,0x01] + +v_maxmin_u32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x62,0xd6,0x6a,0xf6,0x0c,0x04] + +v_maxmin_u32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x62,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_maxmin_u32 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x62,0xd6,0x7b,0xfa,0xed,0x01] + +v_maxmin_u32 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x62,0xd6,0x7d,0xe0,0xf5,0x01] + +v_maxmin_u32 v5, exec_lo, -1, vcc_hi +// GFX11: encoding: [0x05,0x00,0x62,0xd6,0x7e,0x82,0xad,0x01] + +v_maxmin_u32 v5, exec_hi, null, vcc_lo +// GFX11: encoding: [0x05,0x00,0x62,0xd6,0x7f,0xf8,0xa8,0x01] + +v_maxmin_u32 v5, null, exec_lo, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x62,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_maxmin_u32 v5, -1, exec_hi, src_scc +// GFX11: encoding: [0x05,0x00,0x62,0xd6,0xc1,0xfe,0xf4,0x03] + +v_maxmin_u32 v5, 0.5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x62,0xd6,0xf0,0xfa,0xc0,0x03] + +v_maxmin_u32 v5, src_scc, vcc_lo, -1 +// GFX11: encoding: [0x05,0x00,0x62,0xd6,0xfd,0xd4,0x04,0x03] + +v_maxmin_u32 v255, 0xaf123456, vcc_hi, null +// GFX11: encoding: [0xff,0x00,0x62,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_mbcnt_hi_u32_b32 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x20,0xd7,0x01,0x05,0x02,0x00] + +v_mbcnt_hi_u32_b32 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x20,0xd7,0xff,0xff,0x03,0x00] + +v_mbcnt_hi_u32_b32 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x20,0xd7,0x01,0x04,0x00,0x00] + +v_mbcnt_hi_u32_b32 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x20,0xd7,0x69,0xd2,0x00,0x00] + +v_mbcnt_hi_u32_b32 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x20,0xd7,0x6a,0xf6,0x00,0x00] + +v_mbcnt_hi_u32_b32 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x20,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_mbcnt_hi_u32_b32 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x20,0xd7,0x7b,0xfa,0x01,0x00] + +v_mbcnt_hi_u32_b32 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x20,0xd7,0x7d,0xe0,0x01,0x00] + +v_mbcnt_hi_u32_b32 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x20,0xd7,0x7e,0x82,0x01,0x00] + +v_mbcnt_hi_u32_b32 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x20,0xd7,0x7f,0xf8,0x00,0x00] + +v_mbcnt_hi_u32_b32 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x20,0xd7,0x7c,0xfc,0x00,0x00] + +v_mbcnt_hi_u32_b32 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x20,0xd7,0xc1,0xfe,0x00,0x00] + +v_mbcnt_hi_u32_b32 v5, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x20,0xd7,0xf0,0xfa,0x00,0x00] + +v_mbcnt_hi_u32_b32 v5, src_scc, vcc_lo +// GFX11: encoding: [0x05,0x00,0x20,0xd7,0xfd,0xd4,0x00,0x00] + +v_mbcnt_hi_u32_b32 v255, 0xaf123456, vcc_hi +// GFX11: encoding: [0xff,0x00,0x20,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_mbcnt_lo_u32_b32 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x1f,0xd7,0x01,0x05,0x02,0x00] + +v_mbcnt_lo_u32_b32 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x1f,0xd7,0xff,0xff,0x03,0x00] + +v_mbcnt_lo_u32_b32 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x1f,0xd7,0x01,0x04,0x00,0x00] + +v_mbcnt_lo_u32_b32 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x1f,0xd7,0x69,0xd2,0x00,0x00] + +v_mbcnt_lo_u32_b32 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x1f,0xd7,0x6a,0xf6,0x00,0x00] + +v_mbcnt_lo_u32_b32 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x1f,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_mbcnt_lo_u32_b32 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x1f,0xd7,0x7b,0xfa,0x01,0x00] + +v_mbcnt_lo_u32_b32 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x1f,0xd7,0x7d,0xe0,0x01,0x00] + +v_mbcnt_lo_u32_b32 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x1f,0xd7,0x7e,0x82,0x01,0x00] + +v_mbcnt_lo_u32_b32 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x1f,0xd7,0x7f,0xf8,0x00,0x00] + +v_mbcnt_lo_u32_b32 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x1f,0xd7,0x7c,0xfc,0x00,0x00] + +v_mbcnt_lo_u32_b32 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x1f,0xd7,0xc1,0xfe,0x00,0x00] + +v_mbcnt_lo_u32_b32 v5, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x1f,0xd7,0xf0,0xfa,0x00,0x00] + +v_mbcnt_lo_u32_b32 v5, src_scc, vcc_lo +// GFX11: encoding: [0x05,0x00,0x1f,0xd7,0xfd,0xd4,0x00,0x00] + +v_mbcnt_lo_u32_b32 v255, 0xaf123456, vcc_hi +// GFX11: encoding: [0xff,0x00,0x1f,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_med3_f16 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x4f,0xd6,0x01,0x05,0x0e,0x00] + +v_med3_f16 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x4f,0xd6,0xff,0x05,0xa4,0x01] + +v_med3_f16 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x4f,0xd6,0x01,0xfe,0xff,0x01] + +v_med3_f16 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x4f,0xd6,0x69,0xd2,0xf8,0x01] + +v_med3_f16 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x4f,0xd6,0x6a,0xf6,0x0c,0x04] + +v_med3_f16 v5, vcc_hi, 0xfe0b, v255 +// GFX11: encoding: [0x05,0x00,0x4f,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +v_med3_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX11: encoding: [0x05,0x07,0x4f,0xd6,0x7b,0xfa,0xed,0xe1] + +v_med3_f16 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x4f,0xd6,0x7d,0xe0,0xf5,0x01] + +v_med3_f16 v5, |exec_lo|, -1, vcc_hi +// GFX11: encoding: [0x05,0x01,0x4f,0xd6,0x7e,0x82,0xad,0x01] + +v_med3_f16 v5, -|exec_hi|, null, -|vcc_lo| op_sel:[1,1,1,1] +// GFX11: encoding: [0x05,0x7d,0x4f,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_med3_f16 v5, null, exec_lo, -|0xfe0b| op_sel:[0,0,0,0] +// GFX11: encoding: [0x05,0x04,0x4f,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] + +v_med3_f16 v5, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] +// GFX11: encoding: [0x05,0x0e,0x4f,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_med3_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] +// GFX11: encoding: [0x05,0x10,0x4f,0xd6,0xf0,0xfa,0xc0,0x43] + +v_med3_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] +// GFX11: encoding: [0x05,0x22,0x4f,0xd6,0xfd,0xd4,0x04,0x23] + +v_med3_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp +// GFX11: encoding: [0xff,0xc3,0x4f,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] + +v_med3_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] div:2 +// GFX11: encoding: [0x05,0x10,0x4f,0xd6,0xf0,0xfa,0xc0,0x5b] + +v_med3_f32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x1f,0xd6,0x01,0x05,0x0e,0x00] + +v_med3_f32 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x1f,0xd6,0xff,0x05,0xa4,0x01] + +v_med3_f32 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x1f,0xd6,0x01,0xfe,0xff,0x01] + +v_med3_f32 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x1f,0xd6,0x69,0xd2,0xf8,0x01] + +v_med3_f32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x1f,0xd6,0x6a,0xf6,0x0c,0x04] + +v_med3_f32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x1f,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_med3_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX11: encoding: [0x05,0x07,0x1f,0xd6,0x7b,0xfa,0xed,0xe1] + +v_med3_f32 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x1f,0xd6,0x7d,0xe0,0xf5,0x01] + +v_med3_f32 v5, |exec_lo|, -1, vcc_hi +// GFX11: encoding: [0x05,0x01,0x1f,0xd6,0x7e,0x82,0xad,0x01] + +v_med3_f32 v5, -|exec_hi|, null, -|vcc_lo| +// GFX11: encoding: [0x05,0x05,0x1f,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_med3_f32 v5, null, exec_lo, -|0xaf123456| +// GFX11: encoding: [0x05,0x04,0x1f,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] + +v_med3_f32 v5, -1, -|exec_hi|, -|src_scc| +// GFX11: encoding: [0x05,0x06,0x1f,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_med3_f32 v5, 0.5, -m0, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0x1f,0xd6,0xf0,0xfa,0xc0,0x4b] + +v_med3_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 +// GFX11: encoding: [0x05,0x02,0x1f,0xd6,0xfd,0xd4,0x04,0x33] + +v_med3_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 +// GFX11: encoding: [0xff,0x83,0x1f,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] + +v_med3_i16 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x50,0xd6,0x01,0x05,0x0e,0x00] + +v_med3_i16 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x50,0xd6,0xff,0x05,0xa4,0x01] + +v_med3_i16 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x50,0xd6,0x01,0xfe,0xff,0x01] + +v_med3_i16 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x50,0xd6,0x69,0xd2,0xf8,0x01] + +v_med3_i16 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x50,0xd6,0x6a,0xf6,0x0c,0x04] + +v_med3_i16 v5, vcc_hi, 0xfe0b, v255 +// GFX11: encoding: [0x05,0x00,0x50,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +v_med3_i16 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x50,0xd6,0x7b,0xfa,0xed,0x01] + +v_med3_i16 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x50,0xd6,0x7d,0xe0,0xf5,0x01] + +v_med3_i16 v5, exec_lo, -1, vcc_hi +// GFX11: encoding: [0x05,0x00,0x50,0xd6,0x7e,0x82,0xad,0x01] + +v_med3_i16 v5, exec_hi, null, vcc_lo op_sel:[1,1,1,1] +// GFX11: encoding: [0x05,0x78,0x50,0xd6,0x7f,0xf8,0xa8,0x01] + +v_med3_i16 v5, null, exec_lo, 0xfe0b op_sel:[0,0,0,0] +// GFX11: encoding: [0x05,0x00,0x50,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] + +v_med3_i16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] +// GFX11: encoding: [0x05,0x08,0x50,0xd6,0xc1,0xfe,0xf4,0x03] + +v_med3_i16 v5, 0.5, m0, 0.5 op_sel:[0,1,0,0] +// GFX11: encoding: [0x05,0x10,0x50,0xd6,0xf0,0xfa,0xc0,0x03] + +v_med3_i16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] +// GFX11: encoding: [0x05,0x20,0x50,0xd6,0xfd,0xd4,0x04,0x03] + +v_med3_i16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] +// GFX11: encoding: [0xff,0x40,0x50,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] + +v_med3_i32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x20,0xd6,0x01,0x05,0x0e,0x00] + +v_med3_i32 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x20,0xd6,0xff,0x05,0xa4,0x01] + +v_med3_i32 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x20,0xd6,0x01,0xfe,0xff,0x01] + +v_med3_i32 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x20,0xd6,0x69,0xd2,0xf8,0x01] + +v_med3_i32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x20,0xd6,0x6a,0xf6,0x0c,0x04] + +v_med3_i32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x20,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_med3_i32 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x20,0xd6,0x7b,0xfa,0xed,0x01] + +v_med3_i32 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x20,0xd6,0x7d,0xe0,0xf5,0x01] + +v_med3_i32 v5, exec_lo, -1, vcc_hi +// GFX11: encoding: [0x05,0x00,0x20,0xd6,0x7e,0x82,0xad,0x01] + +v_med3_i32 v5, exec_hi, null, vcc_lo +// GFX11: encoding: [0x05,0x00,0x20,0xd6,0x7f,0xf8,0xa8,0x01] + +v_med3_i32 v5, null, exec_lo, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x20,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_med3_i32 v5, -1, exec_hi, src_scc +// GFX11: encoding: [0x05,0x00,0x20,0xd6,0xc1,0xfe,0xf4,0x03] + +v_med3_i32 v5, 0.5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x20,0xd6,0xf0,0xfa,0xc0,0x03] + +v_med3_i32 v5, src_scc, vcc_lo, -1 +// GFX11: encoding: [0x05,0x00,0x20,0xd6,0xfd,0xd4,0x04,0x03] + +v_med3_i32 v255, 0xaf123456, vcc_hi, null +// GFX11: encoding: [0xff,0x00,0x20,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_med3_u16 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x51,0xd6,0x01,0x05,0x0e,0x00] + +v_med3_u16 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x51,0xd6,0xff,0x05,0xa4,0x01] + +v_med3_u16 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x51,0xd6,0x01,0xfe,0xff,0x01] + +v_med3_u16 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x51,0xd6,0x69,0xd2,0xf8,0x01] + +v_med3_u16 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x51,0xd6,0x6a,0xf6,0x0c,0x04] + +v_med3_u16 v5, vcc_hi, 0xfe0b, v255 +// GFX11: encoding: [0x05,0x00,0x51,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +v_med3_u16 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x51,0xd6,0x7b,0xfa,0xed,0x01] + +v_med3_u16 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x51,0xd6,0x7d,0xe0,0xf5,0x01] + +v_med3_u16 v5, exec_lo, -1, vcc_hi +// GFX11: encoding: [0x05,0x00,0x51,0xd6,0x7e,0x82,0xad,0x01] + +v_med3_u16 v5, exec_hi, null, vcc_lo op_sel:[1,1,1,1] +// GFX11: encoding: [0x05,0x78,0x51,0xd6,0x7f,0xf8,0xa8,0x01] + +v_med3_u16 v5, null, exec_lo, 0xfe0b op_sel:[0,0,0,0] +// GFX11: encoding: [0x05,0x00,0x51,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] + +v_med3_u16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] +// GFX11: encoding: [0x05,0x08,0x51,0xd6,0xc1,0xfe,0xf4,0x03] + +v_med3_u16 v5, 0.5, m0, 0.5 op_sel:[0,1,0,0] +// GFX11: encoding: [0x05,0x10,0x51,0xd6,0xf0,0xfa,0xc0,0x03] + +v_med3_u16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] +// GFX11: encoding: [0x05,0x20,0x51,0xd6,0xfd,0xd4,0x04,0x03] + +v_med3_u16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] +// GFX11: encoding: [0xff,0x40,0x51,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] + +v_med3_u32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x21,0xd6,0x01,0x05,0x0e,0x00] + +v_med3_u32 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x21,0xd6,0xff,0x05,0xa4,0x01] + +v_med3_u32 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x21,0xd6,0x01,0xfe,0xff,0x01] + +v_med3_u32 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x21,0xd6,0x69,0xd2,0xf8,0x01] + +v_med3_u32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x21,0xd6,0x6a,0xf6,0x0c,0x04] + +v_med3_u32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x21,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_med3_u32 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x21,0xd6,0x7b,0xfa,0xed,0x01] + +v_med3_u32 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x21,0xd6,0x7d,0xe0,0xf5,0x01] + +v_med3_u32 v5, exec_lo, -1, vcc_hi +// GFX11: encoding: [0x05,0x00,0x21,0xd6,0x7e,0x82,0xad,0x01] + +v_med3_u32 v5, exec_hi, null, vcc_lo +// GFX11: encoding: [0x05,0x00,0x21,0xd6,0x7f,0xf8,0xa8,0x01] + +v_med3_u32 v5, null, exec_lo, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x21,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_med3_u32 v5, -1, exec_hi, src_scc +// GFX11: encoding: [0x05,0x00,0x21,0xd6,0xc1,0xfe,0xf4,0x03] + +v_med3_u32 v5, 0.5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x21,0xd6,0xf0,0xfa,0xc0,0x03] + +v_med3_u32 v5, src_scc, vcc_lo, -1 +// GFX11: encoding: [0x05,0x00,0x21,0xd6,0xfd,0xd4,0x04,0x03] + +v_med3_u32 v255, 0xaf123456, vcc_hi, null +// GFX11: encoding: [0xff,0x00,0x21,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_min3_f16 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x49,0xd6,0x01,0x05,0x0e,0x00] + +v_min3_f16 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x49,0xd6,0xff,0x05,0xa4,0x01] + +v_min3_f16 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x49,0xd6,0x01,0xfe,0xff,0x01] + +v_min3_f16 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x49,0xd6,0x69,0xd2,0xf8,0x01] + +v_min3_f16 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x49,0xd6,0x6a,0xf6,0x0c,0x04] + +v_min3_f16 v5, vcc_hi, 0xfe0b, v255 +// GFX11: encoding: [0x05,0x00,0x49,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +v_min3_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX11: encoding: [0x05,0x07,0x49,0xd6,0x7b,0xfa,0xed,0xe1] + +v_min3_f16 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x49,0xd6,0x7d,0xe0,0xf5,0x01] + +v_min3_f16 v5, |exec_lo|, -1, vcc_hi +// GFX11: encoding: [0x05,0x01,0x49,0xd6,0x7e,0x82,0xad,0x01] + +v_min3_f16 v5, -|exec_hi|, null, -|vcc_lo| op_sel:[1,1,1,1] +// GFX11: encoding: [0x05,0x7d,0x49,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_min3_f16 v5, null, exec_lo, -|0xfe0b| op_sel:[0,0,0,0] +// GFX11: encoding: [0x05,0x04,0x49,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] + +v_min3_f16 v5, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] +// GFX11: encoding: [0x05,0x0e,0x49,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_min3_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] +// GFX11: encoding: [0x05,0x10,0x49,0xd6,0xf0,0xfa,0xc0,0x43] + +v_min3_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] +// GFX11: encoding: [0x05,0x22,0x49,0xd6,0xfd,0xd4,0x04,0x23] + +v_min3_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp +// GFX11: encoding: [0xff,0xc3,0x49,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] + +v_min3_f16 v5, m0, 0.5, m0 clamp mul:4 +// GFX11: encoding: [0x05,0x80,0x49,0xd6,0x7d,0xe0,0xf5,0x11] + +v_min3_f32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x19,0xd6,0x01,0x05,0x0e,0x00] + +v_min3_f32 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x19,0xd6,0xff,0x05,0xa4,0x01] + +v_min3_f32 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x19,0xd6,0x01,0xfe,0xff,0x01] + +v_min3_f32 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x19,0xd6,0x69,0xd2,0xf8,0x01] + +v_min3_f32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x19,0xd6,0x6a,0xf6,0x0c,0x04] + +v_min3_f32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x19,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_min3_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX11: encoding: [0x05,0x07,0x19,0xd6,0x7b,0xfa,0xed,0xe1] + +v_min3_f32 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x19,0xd6,0x7d,0xe0,0xf5,0x01] + +v_min3_f32 v5, |exec_lo|, -1, vcc_hi +// GFX11: encoding: [0x05,0x01,0x19,0xd6,0x7e,0x82,0xad,0x01] + +v_min3_f32 v5, -|exec_hi|, null, -|vcc_lo| +// GFX11: encoding: [0x05,0x05,0x19,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_min3_f32 v5, null, exec_lo, -|0xaf123456| +// GFX11: encoding: [0x05,0x04,0x19,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] + +v_min3_f32 v5, -1, -|exec_hi|, -|src_scc| +// GFX11: encoding: [0x05,0x06,0x19,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_min3_f32 v5, 0.5, -m0, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0x19,0xd6,0xf0,0xfa,0xc0,0x4b] + +v_min3_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 +// GFX11: encoding: [0x05,0x02,0x19,0xd6,0xfd,0xd4,0x04,0x33] + +v_min3_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 +// GFX11: encoding: [0xff,0x83,0x19,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] + +v_min3_i16 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x4a,0xd6,0x01,0x05,0x0e,0x00] + +v_min3_i16 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x4a,0xd6,0xff,0x05,0xa4,0x01] + +v_min3_i16 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x4a,0xd6,0x01,0xfe,0xff,0x01] + +v_min3_i16 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x4a,0xd6,0x69,0xd2,0xf8,0x01] + +v_min3_i16 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x4a,0xd6,0x6a,0xf6,0x0c,0x04] + +v_min3_i16 v5, vcc_hi, 0xfe0b, v255 +// GFX11: encoding: [0x05,0x00,0x4a,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +v_min3_i16 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x4a,0xd6,0x7b,0xfa,0xed,0x01] + +v_min3_i16 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x4a,0xd6,0x7d,0xe0,0xf5,0x01] + +v_min3_i16 v5, exec_lo, -1, vcc_hi +// GFX11: encoding: [0x05,0x00,0x4a,0xd6,0x7e,0x82,0xad,0x01] + +v_min3_i16 v5, exec_hi, null, vcc_lo op_sel:[1,1,1,1] +// GFX11: encoding: [0x05,0x78,0x4a,0xd6,0x7f,0xf8,0xa8,0x01] + +v_min3_i16 v5, null, exec_lo, 0xfe0b op_sel:[0,0,0,0] +// GFX11: encoding: [0x05,0x00,0x4a,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] + +v_min3_i16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] +// GFX11: encoding: [0x05,0x08,0x4a,0xd6,0xc1,0xfe,0xf4,0x03] + +v_min3_i16 v5, 0.5, m0, 0.5 op_sel:[0,1,0,0] +// GFX11: encoding: [0x05,0x10,0x4a,0xd6,0xf0,0xfa,0xc0,0x03] + +v_min3_i16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] +// GFX11: encoding: [0x05,0x20,0x4a,0xd6,0xfd,0xd4,0x04,0x03] + +v_min3_i16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] +// GFX11: encoding: [0xff,0x40,0x4a,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] + +v_min3_i32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x1a,0xd6,0x01,0x05,0x0e,0x00] + +v_min3_i32 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x1a,0xd6,0xff,0x05,0xa4,0x01] + +v_min3_i32 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x1a,0xd6,0x01,0xfe,0xff,0x01] + +v_min3_i32 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x1a,0xd6,0x69,0xd2,0xf8,0x01] + +v_min3_i32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x1a,0xd6,0x6a,0xf6,0x0c,0x04] + +v_min3_i32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x1a,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_min3_i32 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x1a,0xd6,0x7b,0xfa,0xed,0x01] + +v_min3_i32 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x1a,0xd6,0x7d,0xe0,0xf5,0x01] + +v_min3_i32 v5, exec_lo, -1, vcc_hi +// GFX11: encoding: [0x05,0x00,0x1a,0xd6,0x7e,0x82,0xad,0x01] + +v_min3_i32 v5, exec_hi, null, vcc_lo +// GFX11: encoding: [0x05,0x00,0x1a,0xd6,0x7f,0xf8,0xa8,0x01] + +v_min3_i32 v5, null, exec_lo, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x1a,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_min3_i32 v5, -1, exec_hi, src_scc +// GFX11: encoding: [0x05,0x00,0x1a,0xd6,0xc1,0xfe,0xf4,0x03] + +v_min3_i32 v5, 0.5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x1a,0xd6,0xf0,0xfa,0xc0,0x03] + +v_min3_i32 v5, src_scc, vcc_lo, -1 +// GFX11: encoding: [0x05,0x00,0x1a,0xd6,0xfd,0xd4,0x04,0x03] + +v_min3_i32 v255, 0xaf123456, vcc_hi, null +// GFX11: encoding: [0xff,0x00,0x1a,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_min3_u16 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x4b,0xd6,0x01,0x05,0x0e,0x00] + +v_min3_u16 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x4b,0xd6,0xff,0x05,0xa4,0x01] + +v_min3_u16 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x4b,0xd6,0x01,0xfe,0xff,0x01] + +v_min3_u16 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x4b,0xd6,0x69,0xd2,0xf8,0x01] + +v_min3_u16 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x4b,0xd6,0x6a,0xf6,0x0c,0x04] + +v_min3_u16 v5, vcc_hi, 0xfe0b, v255 +// GFX11: encoding: [0x05,0x00,0x4b,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +v_min3_u16 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x4b,0xd6,0x7b,0xfa,0xed,0x01] + +v_min3_u16 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x4b,0xd6,0x7d,0xe0,0xf5,0x01] + +v_min3_u16 v5, exec_lo, -1, vcc_hi +// GFX11: encoding: [0x05,0x00,0x4b,0xd6,0x7e,0x82,0xad,0x01] + +v_min3_u16 v5, exec_hi, null, vcc_lo op_sel:[1,1,1,1] +// GFX11: encoding: [0x05,0x78,0x4b,0xd6,0x7f,0xf8,0xa8,0x01] + +v_min3_u16 v5, null, exec_lo, 0xfe0b op_sel:[0,0,0,0] +// GFX11: encoding: [0x05,0x00,0x4b,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] + +v_min3_u16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] +// GFX11: encoding: [0x05,0x08,0x4b,0xd6,0xc1,0xfe,0xf4,0x03] + +v_min3_u16 v5, 0.5, m0, 0.5 op_sel:[0,1,0,0] +// GFX11: encoding: [0x05,0x10,0x4b,0xd6,0xf0,0xfa,0xc0,0x03] + +v_min3_u16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] +// GFX11: encoding: [0x05,0x20,0x4b,0xd6,0xfd,0xd4,0x04,0x03] + +v_min3_u16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] +// GFX11: encoding: [0xff,0x40,0x4b,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] + +v_min3_u32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x1b,0xd6,0x01,0x05,0x0e,0x00] + +v_min3_u32 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x1b,0xd6,0xff,0x05,0xa4,0x01] + +v_min3_u32 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x1b,0xd6,0x01,0xfe,0xff,0x01] + +v_min3_u32 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x1b,0xd6,0x69,0xd2,0xf8,0x01] + +v_min3_u32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x1b,0xd6,0x6a,0xf6,0x0c,0x04] + +v_min3_u32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x1b,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_min3_u32 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x1b,0xd6,0x7b,0xfa,0xed,0x01] + +v_min3_u32 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x1b,0xd6,0x7d,0xe0,0xf5,0x01] + +v_min3_u32 v5, exec_lo, -1, vcc_hi +// GFX11: encoding: [0x05,0x00,0x1b,0xd6,0x7e,0x82,0xad,0x01] + +v_min3_u32 v5, exec_hi, null, vcc_lo +// GFX11: encoding: [0x05,0x00,0x1b,0xd6,0x7f,0xf8,0xa8,0x01] + +v_min3_u32 v5, null, exec_lo, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x1b,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_min3_u32 v5, -1, exec_hi, src_scc +// GFX11: encoding: [0x05,0x00,0x1b,0xd6,0xc1,0xfe,0xf4,0x03] + +v_min3_u32 v5, 0.5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x1b,0xd6,0xf0,0xfa,0xc0,0x03] + +v_min3_u32 v5, src_scc, vcc_lo, -1 +// GFX11: encoding: [0x05,0x00,0x1b,0xd6,0xfd,0xd4,0x04,0x03] + +v_min3_u32 v255, 0xaf123456, vcc_hi, null +// GFX11: encoding: [0xff,0x00,0x1b,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_min_f64 v[5:6], v[1:2], v[2:3] +// GFX11: encoding: [0x05,0x00,0x29,0xd7,0x01,0x05,0x02,0x00] + +v_min_f64 v[5:6], v[254:255], v[254:255] +// GFX11: encoding: [0x05,0x00,0x29,0xd7,0xfe,0xfd,0x03,0x00] + +v_min_f64 v[5:6], s[2:3], s[4:5] +// GFX11: encoding: [0x05,0x00,0x29,0xd7,0x02,0x08,0x00,0x00] + +v_min_f64 v[5:6], s[104:105], s[104:105] +// GFX11: encoding: [0x05,0x00,0x29,0xd7,0x68,0xd0,0x00,0x00] + +v_min_f64 v[5:6], vcc, ttmp[14:15] +// GFX11: encoding: [0x05,0x00,0x29,0xd7,0x6a,0xf4,0x00,0x00] + +v_min_f64 v[5:6], ttmp[14:15], 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x29,0xd7,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_min_f64 v[5:6], -|exec|, src_scc +// GFX11: encoding: [0x05,0x01,0x29,0xd7,0x7e,0xfa,0x01,0x20] + +v_min_f64 v[5:6], null, 0.5 +// GFX11: encoding: [0x05,0x00,0x29,0xd7,0x7c,0xe0,0x01,0x00] + +v_min_f64 v[5:6], -1, -1 +// GFX11: encoding: [0x05,0x00,0x29,0xd7,0xc1,0x82,0x01,0x00] + +v_min_f64 v[5:6], 0.5, null mul:2 +// GFX11: encoding: [0x05,0x00,0x29,0xd7,0xf0,0xf8,0x00,0x08] + +v_min_f64 v[5:6], -|src_scc|, -|exec| mul:4 +// GFX11: encoding: [0x05,0x03,0x29,0xd7,0xfd,0xfc,0x00,0x70] + +v_min_f64 v[254:255], 0xaf123456, -|vcc| clamp div:2 +// GFX11: encoding: [0xfe,0x82,0x29,0xd7,0xff,0xd4,0x00,0x58,0x56,0x34,0x12,0xaf] + +v_min_i16 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x0c,0xd7,0x01,0x05,0x02,0x00] + +v_min_i16 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x0c,0xd7,0xff,0xff,0x03,0x00] + +v_min_i16 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x0c,0xd7,0x01,0x04,0x00,0x00] + +v_min_i16 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x0c,0xd7,0x69,0xd2,0x00,0x00] + +v_min_i16 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x0c,0xd7,0x6a,0xf6,0x00,0x00] + +v_min_i16 v5, vcc_hi, 0xfe0b +// GFX11: encoding: [0x05,0x00,0x0c,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_min_i16 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x0c,0xd7,0x7b,0xfa,0x01,0x00] + +v_min_i16 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x0c,0xd7,0x7d,0xe0,0x01,0x00] + +v_min_i16 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x0c,0xd7,0x7e,0x82,0x01,0x00] + +v_min_i16 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x0c,0xd7,0x7f,0xf8,0x00,0x00] + +v_min_i16 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x0c,0xd7,0x7c,0xfc,0x00,0x00] + +v_min_i16 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x0c,0xd7,0xc1,0xfe,0x00,0x00] + +v_min_i16 v5, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x0c,0xd7,0xf0,0xfa,0x00,0x00] + +v_min_i16 v5, src_scc, vcc_lo +// GFX11: encoding: [0x05,0x00,0x0c,0xd7,0xfd,0xd4,0x00,0x00] + +v_min_i16 v255, 0xfe0b, vcc_hi +// GFX11: encoding: [0xff,0x00,0x0c,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_min_u16 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x0b,0xd7,0x01,0x05,0x02,0x00] + +v_min_u16 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x0b,0xd7,0xff,0xff,0x03,0x00] + +v_min_u16 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x0b,0xd7,0x01,0x04,0x00,0x00] + +v_min_u16 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x0b,0xd7,0x69,0xd2,0x00,0x00] + +v_min_u16 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x0b,0xd7,0x6a,0xf6,0x00,0x00] + +v_min_u16 v5, vcc_hi, 0xfe0b +// GFX11: encoding: [0x05,0x00,0x0b,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_min_u16 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x0b,0xd7,0x7b,0xfa,0x01,0x00] + +v_min_u16 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x0b,0xd7,0x7d,0xe0,0x01,0x00] + +v_min_u16 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x0b,0xd7,0x7e,0x82,0x01,0x00] + +v_min_u16 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x0b,0xd7,0x7f,0xf8,0x00,0x00] + +v_min_u16 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x0b,0xd7,0x7c,0xfc,0x00,0x00] + +v_min_u16 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x0b,0xd7,0xc1,0xfe,0x00,0x00] + +v_min_u16 v5, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x0b,0xd7,0xf0,0xfa,0x00,0x00] + +v_min_u16 v5, src_scc, vcc_lo +// GFX11: encoding: [0x05,0x00,0x0b,0xd7,0xfd,0xd4,0x00,0x00] + +v_min_u16 v255, 0xfe0b, vcc_hi +// GFX11: encoding: [0xff,0x00,0x0b,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_minmax_f16 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x61,0xd6,0x01,0x05,0x0e,0x00] + +v_minmax_f16 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x61,0xd6,0xff,0x05,0xa4,0x01] + +v_minmax_f16 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x61,0xd6,0x01,0xfe,0xff,0x01] + +v_minmax_f16 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x61,0xd6,0x69,0xd2,0xf8,0x01] + +v_minmax_f16 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x61,0xd6,0x6a,0xf6,0x0c,0x04] + +v_minmax_f16 v5, vcc_hi, 0xfe0b, v255 +// GFX11: encoding: [0x05,0x00,0x61,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +v_minmax_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX11: encoding: [0x05,0x07,0x61,0xd6,0x7b,0xfa,0xed,0xe1] + +v_minmax_f16 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x61,0xd6,0x7d,0xe0,0xf5,0x01] + +v_minmax_f16 v5, |exec_lo|, -1, vcc_hi +// GFX11: encoding: [0x05,0x01,0x61,0xd6,0x7e,0x82,0xad,0x01] + +v_minmax_f16 v5, -|exec_hi|, null, -|vcc_lo| +// GFX11: encoding: [0x05,0x05,0x61,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_minmax_f16 v5, null, exec_lo, -|0xfe0b| +// GFX11: encoding: [0x05,0x04,0x61,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] + +v_minmax_f16 v5, -1, -|exec_hi|, -|src_scc| +// GFX11: encoding: [0x05,0x06,0x61,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_minmax_f16 v5, 0.5, -m0, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0x61,0xd6,0xf0,0xfa,0xc0,0x4b] + +v_minmax_f16 v5, -src_scc, |vcc_lo|, -1 mul:4 +// GFX11: encoding: [0x05,0x02,0x61,0xd6,0xfd,0xd4,0x04,0x33] + +v_minmax_f16 v255, -|0xfe0b|, -|vcc_hi|, null clamp div:2 +// GFX11: encoding: [0xff,0x83,0x61,0xd6,0xff,0xd6,0xf0,0x79,0x0b,0xfe,0x00,0x00] + +v_minmax_f32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x5f,0xd6,0x01,0x05,0x0e,0x00] + +v_minmax_f32 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x5f,0xd6,0xff,0x05,0xa4,0x01] + +v_minmax_f32 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x5f,0xd6,0x01,0xfe,0xff,0x01] + +v_minmax_f32 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x5f,0xd6,0x69,0xd2,0xf8,0x01] + +v_minmax_f32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x5f,0xd6,0x6a,0xf6,0x0c,0x04] + +v_minmax_f32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x5f,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_minmax_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX11: encoding: [0x05,0x07,0x5f,0xd6,0x7b,0xfa,0xed,0xe1] + +v_minmax_f32 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x5f,0xd6,0x7d,0xe0,0xf5,0x01] + +v_minmax_f32 v5, |exec_lo|, -1, vcc_hi +// GFX11: encoding: [0x05,0x01,0x5f,0xd6,0x7e,0x82,0xad,0x01] + +v_minmax_f32 v5, -|exec_hi|, null, -|vcc_lo| +// GFX11: encoding: [0x05,0x05,0x5f,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_minmax_f32 v5, null, exec_lo, -|0xaf123456| +// GFX11: encoding: [0x05,0x04,0x5f,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] + +v_minmax_f32 v5, -1, -|exec_hi|, -|src_scc| +// GFX11: encoding: [0x05,0x06,0x5f,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_minmax_f32 v5, 0.5, -m0, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0x5f,0xd6,0xf0,0xfa,0xc0,0x4b] + +v_minmax_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 +// GFX11: encoding: [0x05,0x02,0x5f,0xd6,0xfd,0xd4,0x04,0x33] + +v_minmax_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 +// GFX11: encoding: [0xff,0x83,0x5f,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] + +v_minmax_i32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x65,0xd6,0x01,0x05,0x0e,0x00] + +v_minmax_i32 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x65,0xd6,0xff,0x05,0xa4,0x01] + +v_minmax_i32 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x65,0xd6,0x01,0xfe,0xff,0x01] + +v_minmax_i32 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x65,0xd6,0x69,0xd2,0xf8,0x01] + +v_minmax_i32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x65,0xd6,0x6a,0xf6,0x0c,0x04] + +v_minmax_i32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x65,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_minmax_i32 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x65,0xd6,0x7b,0xfa,0xed,0x01] + +v_minmax_i32 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x65,0xd6,0x7d,0xe0,0xf5,0x01] + +v_minmax_i32 v5, exec_lo, -1, vcc_hi +// GFX11: encoding: [0x05,0x00,0x65,0xd6,0x7e,0x82,0xad,0x01] + +v_minmax_i32 v5, exec_hi, null, vcc_lo +// GFX11: encoding: [0x05,0x00,0x65,0xd6,0x7f,0xf8,0xa8,0x01] + +v_minmax_i32 v5, null, exec_lo, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x65,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_minmax_i32 v5, -1, exec_hi, src_scc +// GFX11: encoding: [0x05,0x00,0x65,0xd6,0xc1,0xfe,0xf4,0x03] + +v_minmax_i32 v5, 0.5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x65,0xd6,0xf0,0xfa,0xc0,0x03] + +v_minmax_i32 v5, src_scc, vcc_lo, -1 +// GFX11: encoding: [0x05,0x00,0x65,0xd6,0xfd,0xd4,0x04,0x03] + +v_minmax_i32 v255, 0xaf123456, vcc_hi, null +// GFX11: encoding: [0xff,0x00,0x65,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_minmax_u32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x63,0xd6,0x01,0x05,0x0e,0x00] + +v_minmax_u32 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x63,0xd6,0xff,0x05,0xa4,0x01] + +v_minmax_u32 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x63,0xd6,0x01,0xfe,0xff,0x01] + +v_minmax_u32 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x63,0xd6,0x69,0xd2,0xf8,0x01] + +v_minmax_u32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x63,0xd6,0x6a,0xf6,0x0c,0x04] + +v_minmax_u32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x63,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_minmax_u32 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x63,0xd6,0x7b,0xfa,0xed,0x01] + +v_minmax_u32 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x63,0xd6,0x7d,0xe0,0xf5,0x01] + +v_minmax_u32 v5, exec_lo, -1, vcc_hi +// GFX11: encoding: [0x05,0x00,0x63,0xd6,0x7e,0x82,0xad,0x01] + +v_minmax_u32 v5, exec_hi, null, vcc_lo +// GFX11: encoding: [0x05,0x00,0x63,0xd6,0x7f,0xf8,0xa8,0x01] + +v_minmax_u32 v5, null, exec_lo, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x63,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_minmax_u32 v5, -1, exec_hi, src_scc +// GFX11: encoding: [0x05,0x00,0x63,0xd6,0xc1,0xfe,0xf4,0x03] + +v_minmax_u32 v5, 0.5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x63,0xd6,0xf0,0xfa,0xc0,0x03] + +v_minmax_u32 v5, src_scc, vcc_lo, -1 +// GFX11: encoding: [0x05,0x00,0x63,0xd6,0xfd,0xd4,0x04,0x03] + +v_minmax_u32 v255, 0xaf123456, vcc_hi, null +// GFX11: encoding: [0xff,0x00,0x63,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_mqsad_pk_u16_u8 v[5:6], v[1:2], v2, ttmp[14:15] +// GFX11: encoding: [0x05,0x00,0x3b,0xd6,0x01,0x05,0xea,0x01] + +v_mqsad_pk_u16_u8 v[5:6], v[1:2], v255, ttmp[14:15] +// GFX11: encoding: [0x05,0x00,0x3b,0xd6,0x01,0xff,0xeb,0x01] + +v_mqsad_pk_u16_u8 v[5:6], v[1:2], s2, ttmp[14:15] +// GFX11: encoding: [0x05,0x00,0x3b,0xd6,0x01,0x05,0xe8,0x01] + +v_mqsad_pk_u16_u8 v[5:6], v[1:2], s105, ttmp[14:15] +// GFX11: encoding: [0x05,0x00,0x3b,0xd6,0x01,0xd3,0xe8,0x01] + +v_mqsad_pk_u16_u8 v[5:6], v[254:255], ttmp15, s[6:7] +// GFX11: encoding: [0x05,0x00,0x3b,0xd6,0xfe,0xf7,0x18,0x00] + +v_mqsad_pk_u16_u8 v[5:6], s[2:3], vcc_hi, v[3:4] +// GFX11: encoding: [0x05,0x00,0x3b,0xd6,0x02,0xd6,0x0c,0x04] + +v_mqsad_pk_u16_u8 v[5:6], s[104:105], vcc_lo, s[104:105] +// GFX11: encoding: [0x05,0x00,0x3b,0xd6,0x68,0xd4,0xa0,0x01] + +v_mqsad_pk_u16_u8 v[5:6], vcc, m0, v[254:255] +// GFX11: encoding: [0x05,0x00,0x3b,0xd6,0x6a,0xfa,0xf8,0x07] + +v_mqsad_pk_u16_u8 v[5:6], ttmp[14:15], exec_hi, null +// GFX11: encoding: [0x05,0x00,0x3b,0xd6,0x7a,0xfe,0xf0,0x01] + +v_mqsad_pk_u16_u8 v[5:6], exec, exec_lo, exec +// GFX11: encoding: [0x05,0x00,0x3b,0xd6,0x7e,0xfc,0xf8,0x01] + +v_mqsad_pk_u16_u8 v[5:6], null, null, vcc +// GFX11: encoding: [0x05,0x00,0x3b,0xd6,0x7c,0xf8,0xa8,0x01] + +v_mqsad_pk_u16_u8 v[5:6], -1, -1, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x3b,0xd6,0xc1,0x82,0xfd,0x03,0x56,0x34,0x12,0xaf] + +v_mqsad_pk_u16_u8 v[5:6], 0.5, 0.5, src_scc +// GFX11: encoding: [0x05,0x00,0x3b,0xd6,0xf0,0xe0,0xf5,0x03] + +v_mqsad_pk_u16_u8 v[5:6], src_scc, src_scc, 0.5 +// GFX11: encoding: [0x05,0x00,0x3b,0xd6,0xfd,0xfa,0xc1,0x03] + +v_mqsad_pk_u16_u8 v[254:255], 0xaf123456, 0xaf123456, -1 clamp +// GFX11: encoding: [0xfe,0x80,0x3b,0xd6,0xff,0xfe,0x05,0x03,0x56,0x34,0x12,0xaf] + +v_mqsad_u32_u8 v[5:8], v[1:2], v2, v[252:255] +// GFX11: encoding: [0x05,0x00,0x3d,0xd6,0x01,0x05,0xf2,0x07] + +v_mqsad_u32_u8 v[5:8], v[1:2], v255, v[252:255] +// GFX11: encoding: [0x05,0x00,0x3d,0xd6,0x01,0xff,0xf3,0x07] + +v_mqsad_u32_u8 v[5:8], v[1:2], s2, v[252:255] +// GFX11: encoding: [0x05,0x00,0x3d,0xd6,0x01,0x05,0xf0,0x07] + +v_mqsad_u32_u8 v[5:8], v[1:2], s105, v[252:255] +// GFX11: encoding: [0x05,0x00,0x3d,0xd6,0x01,0xd3,0xf0,0x07] + +v_mqsad_u32_u8 v[5:8], v[254:255], ttmp15, v[252:255] +// GFX11: encoding: [0x05,0x00,0x3d,0xd6,0xfe,0xf7,0xf0,0x07] + +v_mqsad_u32_u8 v[5:8], s[2:3], vcc_hi, v[252:255] +// GFX11: encoding: [0x05,0x00,0x3d,0xd6,0x02,0xd6,0xf0,0x07] + +v_mqsad_u32_u8 v[5:8], s[104:105], vcc_lo, v[252:255] +// GFX11: encoding: [0x05,0x00,0x3d,0xd6,0x68,0xd4,0xf0,0x07] + +v_mqsad_u32_u8 v[5:8], vcc, m0, v[252:255] +// GFX11: encoding: [0x05,0x00,0x3d,0xd6,0x6a,0xfa,0xf0,0x07] + +v_mqsad_u32_u8 v[5:8], ttmp[14:15], exec_hi, v[252:255] +// GFX11: encoding: [0x05,0x00,0x3d,0xd6,0x7a,0xfe,0xf0,0x07] + +v_mqsad_u32_u8 v[5:8], exec, exec_lo, v[252:255] +// GFX11: encoding: [0x05,0x00,0x3d,0xd6,0x7e,0xfc,0xf0,0x07] + +v_mqsad_u32_u8 v[5:8], null, null, v[252:255] +// GFX11: encoding: [0x05,0x00,0x3d,0xd6,0x7c,0xf8,0xf0,0x07] + +v_mqsad_u32_u8 v[5:8], -1, -1, v[252:255] +// GFX11: encoding: [0x05,0x00,0x3d,0xd6,0xc1,0x82,0xf1,0x07] + +v_mqsad_u32_u8 v[5:8], 0.5, 0.5, v[252:255] +// GFX11: encoding: [0x05,0x00,0x3d,0xd6,0xf0,0xe0,0xf1,0x07] + +v_mqsad_u32_u8 v[5:8], src_scc, src_scc, v[252:255] +// GFX11: encoding: [0x05,0x00,0x3d,0xd6,0xfd,0xfa,0xf1,0x07] + +v_mqsad_u32_u8 v[252:255], 0xaf123456, 0xaf123456, v[3:6] clamp +// GFX11: encoding: [0xfc,0x80,0x3d,0xd6,0xff,0xfe,0x0d,0x04,0x56,0x34,0x12,0xaf] + +v_msad_u8 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x39,0xd6,0x01,0x05,0x0e,0x00] + +v_msad_u8 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x39,0xd6,0xff,0x05,0xa4,0x01] + +v_msad_u8 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x39,0xd6,0x01,0xfe,0xff,0x01] + +v_msad_u8 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x39,0xd6,0x69,0xd2,0xf8,0x01] + +v_msad_u8 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x39,0xd6,0x6a,0xf6,0x0c,0x04] + +v_msad_u8 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x39,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_msad_u8 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x39,0xd6,0x7b,0xfa,0xed,0x01] + +v_msad_u8 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x39,0xd6,0x7d,0xe0,0xf5,0x01] + +v_msad_u8 v5, exec_lo, -1, vcc_hi +// GFX11: encoding: [0x05,0x00,0x39,0xd6,0x7e,0x82,0xad,0x01] + +v_msad_u8 v5, exec_hi, null, vcc_lo +// GFX11: encoding: [0x05,0x00,0x39,0xd6,0x7f,0xf8,0xa8,0x01] + +v_msad_u8 v5, null, exec_lo, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x39,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_msad_u8 v5, -1, exec_hi, src_scc +// GFX11: encoding: [0x05,0x00,0x39,0xd6,0xc1,0xfe,0xf4,0x03] + +v_msad_u8 v5, 0.5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x39,0xd6,0xf0,0xfa,0xc0,0x03] + +v_msad_u8 v5, src_scc, vcc_lo, -1 +// GFX11: encoding: [0x05,0x00,0x39,0xd6,0xfd,0xd4,0x04,0x03] + +v_msad_u8 v255, 0xaf123456, vcc_hi, null clamp +// GFX11: encoding: [0xff,0x80,0x39,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_mul_f64 v[5:6], v[1:2], v[2:3] +// GFX11: encoding: [0x05,0x00,0x28,0xd7,0x01,0x05,0x02,0x00] + +v_mul_f64 v[5:6], v[254:255], v[254:255] +// GFX11: encoding: [0x05,0x00,0x28,0xd7,0xfe,0xfd,0x03,0x00] + +v_mul_f64 v[5:6], s[2:3], s[4:5] +// GFX11: encoding: [0x05,0x00,0x28,0xd7,0x02,0x08,0x00,0x00] + +v_mul_f64 v[5:6], s[104:105], s[104:105] +// GFX11: encoding: [0x05,0x00,0x28,0xd7,0x68,0xd0,0x00,0x00] + +v_mul_f64 v[5:6], vcc, ttmp[14:15] +// GFX11: encoding: [0x05,0x00,0x28,0xd7,0x6a,0xf4,0x00,0x00] + +v_mul_f64 v[5:6], ttmp[14:15], 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x28,0xd7,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_mul_f64 v[5:6], -|exec|, src_scc +// GFX11: encoding: [0x05,0x01,0x28,0xd7,0x7e,0xfa,0x01,0x20] + +v_mul_f64 v[5:6], null, 0.5 +// GFX11: encoding: [0x05,0x00,0x28,0xd7,0x7c,0xe0,0x01,0x00] + +v_mul_f64 v[5:6], -1, -1 +// GFX11: encoding: [0x05,0x00,0x28,0xd7,0xc1,0x82,0x01,0x00] + +v_mul_f64 v[5:6], 0.5, null mul:2 +// GFX11: encoding: [0x05,0x00,0x28,0xd7,0xf0,0xf8,0x00,0x08] + +v_mul_f64 v[5:6], -|src_scc|, -|exec| mul:4 +// GFX11: encoding: [0x05,0x03,0x28,0xd7,0xfd,0xfc,0x00,0x70] + +v_mul_f64 v[254:255], 0xaf123456, -|vcc| clamp div:2 +// GFX11: encoding: [0xfe,0x82,0x28,0xd7,0xff,0xd4,0x00,0x58,0x56,0x34,0x12,0xaf] + +v_mul_hi_i32 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x2e,0xd7,0x01,0x05,0x02,0x00] + +v_mul_hi_i32 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x2e,0xd7,0xff,0xff,0x03,0x00] + +v_mul_hi_i32 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x2e,0xd7,0x01,0x04,0x00,0x00] + +v_mul_hi_i32 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x2e,0xd7,0x69,0xd2,0x00,0x00] + +v_mul_hi_i32 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x2e,0xd7,0x6a,0xf6,0x00,0x00] + +v_mul_hi_i32 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x2e,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_mul_hi_i32 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x2e,0xd7,0x7b,0xfa,0x01,0x00] + +v_mul_hi_i32 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x2e,0xd7,0x7d,0xe0,0x01,0x00] + +v_mul_hi_i32 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x2e,0xd7,0x7e,0x82,0x01,0x00] + +v_mul_hi_i32 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x2e,0xd7,0x7f,0xf8,0x00,0x00] + +v_mul_hi_i32 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x2e,0xd7,0x7c,0xfc,0x00,0x00] + +v_mul_hi_i32 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x2e,0xd7,0xc1,0xfe,0x00,0x00] + +v_mul_hi_i32 v5, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x2e,0xd7,0xf0,0xfa,0x00,0x00] + +v_mul_hi_i32 v5, src_scc, vcc_lo +// GFX11: encoding: [0x05,0x00,0x2e,0xd7,0xfd,0xd4,0x00,0x00] + +v_mul_hi_i32 v255, 0xaf123456, vcc_hi +// GFX11: encoding: [0xff,0x00,0x2e,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_mul_hi_u32 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x2d,0xd7,0x01,0x05,0x02,0x00] + +v_mul_hi_u32 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x2d,0xd7,0xff,0xff,0x03,0x00] + +v_mul_hi_u32 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x2d,0xd7,0x01,0x04,0x00,0x00] + +v_mul_hi_u32 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x2d,0xd7,0x69,0xd2,0x00,0x00] + +v_mul_hi_u32 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x2d,0xd7,0x6a,0xf6,0x00,0x00] + +v_mul_hi_u32 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x2d,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_mul_hi_u32 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x2d,0xd7,0x7b,0xfa,0x01,0x00] + +v_mul_hi_u32 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x2d,0xd7,0x7d,0xe0,0x01,0x00] + +v_mul_hi_u32 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x2d,0xd7,0x7e,0x82,0x01,0x00] + +v_mul_hi_u32 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x2d,0xd7,0x7f,0xf8,0x00,0x00] + +v_mul_hi_u32 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x2d,0xd7,0x7c,0xfc,0x00,0x00] + +v_mul_hi_u32 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x2d,0xd7,0xc1,0xfe,0x00,0x00] + +v_mul_hi_u32 v5, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x2d,0xd7,0xf0,0xfa,0x00,0x00] + +v_mul_hi_u32 v5, src_scc, vcc_lo +// GFX11: encoding: [0x05,0x00,0x2d,0xd7,0xfd,0xd4,0x00,0x00] + +v_mul_hi_u32 v255, 0xaf123456, vcc_hi +// GFX11: encoding: [0xff,0x00,0x2d,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_mul_lo_u16 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x05,0xd7,0x01,0x05,0x02,0x00] + +v_mul_lo_u16 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x05,0xd7,0xff,0xff,0x03,0x00] + +v_mul_lo_u16 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x05,0xd7,0x01,0x04,0x00,0x00] + +v_mul_lo_u16 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x05,0xd7,0x69,0xd2,0x00,0x00] + +v_mul_lo_u16 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x05,0xd7,0x6a,0xf6,0x00,0x00] + +v_mul_lo_u16 v5, vcc_hi, 0xfe0b +// GFX11: encoding: [0x05,0x00,0x05,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_mul_lo_u16 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x05,0xd7,0x7b,0xfa,0x01,0x00] + +v_mul_lo_u16 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x05,0xd7,0x7d,0xe0,0x01,0x00] + +v_mul_lo_u16 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x05,0xd7,0x7e,0x82,0x01,0x00] + +v_mul_lo_u16 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x05,0xd7,0x7f,0xf8,0x00,0x00] + +v_mul_lo_u16 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x05,0xd7,0x7c,0xfc,0x00,0x00] + +v_mul_lo_u16 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x05,0xd7,0xc1,0xfe,0x00,0x00] + +v_mul_lo_u16 v5, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x05,0xd7,0xf0,0xfa,0x00,0x00] + +v_mul_lo_u16 v5, src_scc, vcc_lo +// GFX11: encoding: [0x05,0x00,0x05,0xd7,0xfd,0xd4,0x00,0x00] + +v_mul_lo_u16 v255, 0xfe0b, vcc_hi +// GFX11: encoding: [0xff,0x00,0x05,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_mul_lo_u32 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x2c,0xd7,0x01,0x05,0x02,0x00] + +v_mul_lo_u32 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x2c,0xd7,0xff,0xff,0x03,0x00] + +v_mul_lo_u32 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x2c,0xd7,0x01,0x04,0x00,0x00] + +v_mul_lo_u32 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x2c,0xd7,0x69,0xd2,0x00,0x00] + +v_mul_lo_u32 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x2c,0xd7,0x6a,0xf6,0x00,0x00] + +v_mul_lo_u32 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x2c,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_mul_lo_u32 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x2c,0xd7,0x7b,0xfa,0x01,0x00] + +v_mul_lo_u32 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x2c,0xd7,0x7d,0xe0,0x01,0x00] + +v_mul_lo_u32 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x2c,0xd7,0x7e,0x82,0x01,0x00] + +v_mul_lo_u32 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x2c,0xd7,0x7f,0xf8,0x00,0x00] + +v_mul_lo_u32 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x2c,0xd7,0x7c,0xfc,0x00,0x00] + +v_mul_lo_u32 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x2c,0xd7,0xc1,0xfe,0x00,0x00] + +v_mul_lo_u32 v5, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x2c,0xd7,0xf0,0xfa,0x00,0x00] + +v_mul_lo_u32 v5, src_scc, vcc_lo +// GFX11: encoding: [0x05,0x00,0x2c,0xd7,0xfd,0xd4,0x00,0x00] + +v_mul_lo_u32 v255, 0xaf123456, vcc_hi +// GFX11: encoding: [0xff,0x00,0x2c,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_mullit_f32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x18,0xd6,0x01,0x05,0x0e,0x00] + +v_mullit_f32 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x18,0xd6,0xff,0x05,0xa4,0x01] + +v_mullit_f32 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x18,0xd6,0x01,0xfe,0xff,0x01] + +v_mullit_f32 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x18,0xd6,0x69,0xd2,0xf8,0x01] + +v_mullit_f32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x18,0xd6,0x6a,0xf6,0x0c,0x04] + +v_mullit_f32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x18,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_mullit_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX11: encoding: [0x05,0x07,0x18,0xd6,0x7b,0xfa,0xed,0xe1] + +v_mullit_f32 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x18,0xd6,0x7d,0xe0,0xf5,0x01] + +v_mullit_f32 v5, |exec_lo|, -1, vcc_hi +// GFX11: encoding: [0x05,0x01,0x18,0xd6,0x7e,0x82,0xad,0x01] + +v_mullit_f32 v5, -|exec_hi|, null, -|vcc_lo| +// GFX11: encoding: [0x05,0x05,0x18,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_mullit_f32 v5, null, exec_lo, -|0xaf123456| +// GFX11: encoding: [0x05,0x04,0x18,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] + +v_mullit_f32 v5, -1, -|exec_hi|, -|src_scc| +// GFX11: encoding: [0x05,0x06,0x18,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_mullit_f32 v5, 0.5, -m0, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0x18,0xd6,0xf0,0xfa,0xc0,0x4b] + +v_mullit_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 +// GFX11: encoding: [0x05,0x02,0x18,0xd6,0xfd,0xd4,0x04,0x33] + +v_mullit_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 +// GFX11: encoding: [0xff,0x83,0x18,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] + +v_or3_b32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x58,0xd6,0x01,0x05,0x0e,0x00] + +v_or3_b32 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x58,0xd6,0xff,0x05,0xa4,0x01] + +v_or3_b32 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x58,0xd6,0x01,0xfe,0xff,0x01] + +v_or3_b32 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x58,0xd6,0x69,0xd2,0xf8,0x01] + +v_or3_b32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x58,0xd6,0x6a,0xf6,0x0c,0x04] + +v_or3_b32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x58,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_or3_b32 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x58,0xd6,0x7b,0xfa,0xed,0x01] + +v_or3_b32 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x58,0xd6,0x7d,0xe0,0xf5,0x01] + +v_or3_b32 v5, exec_lo, -1, vcc_hi +// GFX11: encoding: [0x05,0x00,0x58,0xd6,0x7e,0x82,0xad,0x01] + +v_or3_b32 v5, exec_hi, null, vcc_lo +// GFX11: encoding: [0x05,0x00,0x58,0xd6,0x7f,0xf8,0xa8,0x01] + +v_or3_b32 v5, null, exec_lo, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x58,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_or3_b32 v5, -1, exec_hi, src_scc +// GFX11: encoding: [0x05,0x00,0x58,0xd6,0xc1,0xfe,0xf4,0x03] + +v_or3_b32 v5, 0.5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x58,0xd6,0xf0,0xfa,0xc0,0x03] + +v_or3_b32 v5, src_scc, vcc_lo, -1 +// GFX11: encoding: [0x05,0x00,0x58,0xd6,0xfd,0xd4,0x04,0x03] + +v_or3_b32 v255, 0xaf123456, vcc_hi, null +// GFX11: encoding: [0xff,0x00,0x58,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_or_b16 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x63,0xd7,0x01,0x05,0x02,0x00] + +v_or_b16 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x63,0xd7,0xff,0xff,0x03,0x00] + +v_or_b16 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x63,0xd7,0x01,0x04,0x00,0x00] + +v_or_b16 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x63,0xd7,0x69,0xd2,0x00,0x00] + +v_or_b16 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x63,0xd7,0x6a,0xf6,0x00,0x00] + +v_or_b16 v5, vcc_hi, 0xfe0b +// GFX11: encoding: [0x05,0x00,0x63,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_or_b16 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x63,0xd7,0x7b,0xfa,0x01,0x00] + +v_or_b16 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x63,0xd7,0x7d,0xe0,0x01,0x00] + +v_or_b16 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x63,0xd7,0x7e,0x82,0x01,0x00] + +v_or_b16 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x63,0xd7,0x7f,0xf8,0x00,0x00] + +v_or_b16 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x63,0xd7,0x7c,0xfc,0x00,0x00] + +v_or_b16 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x63,0xd7,0xc1,0xfe,0x00,0x00] + +v_or_b16 v5, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x63,0xd7,0xf0,0xfa,0x00,0x00] + +v_or_b16 v5, src_scc, vcc_lo +// GFX11: encoding: [0x05,0x00,0x63,0xd7,0xfd,0xd4,0x00,0x00] + +v_or_b16 v255, 0xfe0b, vcc_hi +// GFX11: encoding: [0xff,0x00,0x63,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_pack_b32_f16 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x11,0xd7,0x01,0x05,0x02,0x00] + +v_pack_b32_f16 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x11,0xd7,0xff,0xff,0x03,0x00] + +v_pack_b32_f16 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x11,0xd7,0x01,0x04,0x00,0x00] + +v_pack_b32_f16 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x11,0xd7,0x69,0xd2,0x00,0x00] + +v_pack_b32_f16 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x11,0xd7,0x6a,0xf6,0x00,0x00] + +v_pack_b32_f16 v5, vcc_hi, 0xfe0b +// GFX11: encoding: [0x05,0x00,0x11,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_pack_b32_f16 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x11,0xd7,0x7b,0xfa,0x01,0x00] + +v_pack_b32_f16 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x11,0xd7,0x7d,0xe0,0x01,0x00] + +v_pack_b32_f16 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x11,0xd7,0x7e,0x82,0x01,0x00] + +v_pack_b32_f16 v5, |exec_hi|, null +// GFX11: encoding: [0x05,0x01,0x11,0xd7,0x7f,0xf8,0x00,0x00] + +v_pack_b32_f16 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x11,0xd7,0x7c,0xfc,0x00,0x00] + +v_pack_b32_f16 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x11,0xd7,0xc1,0xfe,0x00,0x00] + +v_pack_b32_f16 v5, 0.5, -m0 op_sel:[0,0,0] +// GFX11: encoding: [0x05,0x00,0x11,0xd7,0xf0,0xfa,0x00,0x40] + +v_pack_b32_f16 v5, -src_scc, |vcc_lo| op_sel:[1,0,0] +// GFX11: encoding: [0x05,0x0a,0x11,0xd7,0xfd,0xd4,0x00,0x20] + +v_pack_b32_f16 v255, -|0xfe0b|, -|vcc_hi| op_sel:[0,1,0] +// GFX11: encoding: [0xff,0x13,0x11,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] + +v_perm_b32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x44,0xd6,0x01,0x05,0x0e,0x00] + +v_perm_b32 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x44,0xd6,0xff,0x05,0xa4,0x01] + +v_perm_b32 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x44,0xd6,0x01,0xfe,0xff,0x01] + +v_perm_b32 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x44,0xd6,0x69,0xd2,0xf8,0x01] + +v_perm_b32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x44,0xd6,0x6a,0xf6,0x0c,0x04] + +v_perm_b32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x44,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_perm_b32 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x44,0xd6,0x7b,0xfa,0xed,0x01] + +v_perm_b32 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x44,0xd6,0x7d,0xe0,0xf5,0x01] + +v_perm_b32 v5, exec_lo, -1, vcc_hi +// GFX11: encoding: [0x05,0x00,0x44,0xd6,0x7e,0x82,0xad,0x01] + +v_perm_b32 v5, exec_hi, null, vcc_lo +// GFX11: encoding: [0x05,0x00,0x44,0xd6,0x7f,0xf8,0xa8,0x01] + +v_perm_b32 v5, null, exec_lo, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x44,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_perm_b32 v5, -1, exec_hi, src_scc +// GFX11: encoding: [0x05,0x00,0x44,0xd6,0xc1,0xfe,0xf4,0x03] + +v_perm_b32 v5, 0.5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x44,0xd6,0xf0,0xfa,0xc0,0x03] + +v_perm_b32 v5, src_scc, vcc_lo, -1 +// GFX11: encoding: [0x05,0x00,0x44,0xd6,0xfd,0xd4,0x04,0x03] + +v_perm_b32 v255, 0xaf123456, vcc_hi, null +// GFX11: encoding: [0xff,0x00,0x44,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_permlane16_b32 v5, v1, s2, s3 +// GFX11: encoding: [0x05,0x00,0x5b,0xd6,0x01,0x05,0x0c,0x00] + +v_permlane16_b32 v5, v1, s105, s105 +// GFX11: encoding: [0x05,0x00,0x5b,0xd6,0x01,0xd3,0xa4,0x01] + +v_permlane16_b32 v5, v1, ttmp15, ttmp15 +// GFX11: encoding: [0x05,0x00,0x5b,0xd6,0x01,0xf7,0xec,0x01] + +v_permlane16_b32 v5, v1, vcc_hi, exec_lo +// GFX11: encoding: [0x05,0x00,0x5b,0xd6,0x01,0xd7,0xf8,0x01] + +v_permlane16_b32 v5, v1, vcc_lo, m0 +// GFX11: encoding: [0x05,0x00,0x5b,0xd6,0x01,0xd5,0xf4,0x01] + +v_permlane16_b32 v5, v1, m0, vcc_hi +// GFX11: encoding: [0x05,0x00,0x5b,0xd6,0x01,0xfb,0xac,0x01] + +v_permlane16_b32 v5, v1, exec_hi, vcc_lo +// GFX11: encoding: [0x05,0x00,0x5b,0xd6,0x01,0xff,0xa8,0x01] + +v_permlane16_b32 v5, v1, exec_lo, src_scc +// GFX11: encoding: [0x05,0x00,0x5b,0xd6,0x01,0xfd,0xf4,0x03] + +v_permlane16_b32 v5, v1, null, 0.5 op_sel:[1,1] +// GFX11: encoding: [0x05,0x18,0x5b,0xd6,0x01,0xf9,0xc0,0x03] + +v_permlane16_b32 v5, v1, -1, -1 op_sel:[0,0] +// GFX11: encoding: [0x05,0x00,0x5b,0xd6,0x01,0x83,0x05,0x03] + +v_permlane16_b32 v5, v1, 0.5, null op_sel:[1,0] +// GFX11: encoding: [0x05,0x08,0x5b,0xd6,0x01,0xe1,0xf1,0x01] + +v_permlane16_b32 v255, v255, src_scc, exec_hi op_sel:[0,1] +// GFX11: encoding: [0xff,0x10,0x5b,0xd6,0xff,0xfb,0xfd,0x01] + +v_permlane16_b32 v5, v1, 0xaf123456, s3 +// GFX11: encoding: [0x05,0x00,0x5b,0xd6,0x01,0xff,0x0d,0x00,0x56,0x34,0x12,0xaf] + +v_permlane16_b32 v5, v1, s2, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x5b,0xd6,0x01,0x05,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_permlane16_b32 v5, v1, 0x12345678, 0x12345678 +// GFX11: encoding: [0x05,0x00,0x5b,0xd6,0x01,0xff,0xfd,0x03,0x78,0x56,0x34,0x12] + +v_permlanex16_b32 v5, v1, s2, s3 +// GFX11: encoding: [0x05,0x00,0x5c,0xd6,0x01,0x05,0x0c,0x00] + +v_permlanex16_b32 v5, v1, s105, s105 +// GFX11: encoding: [0x05,0x00,0x5c,0xd6,0x01,0xd3,0xa4,0x01] + +v_permlanex16_b32 v5, v1, ttmp15, ttmp15 +// GFX11: encoding: [0x05,0x00,0x5c,0xd6,0x01,0xf7,0xec,0x01] + +v_permlanex16_b32 v5, v1, vcc_hi, exec_lo +// GFX11: encoding: [0x05,0x00,0x5c,0xd6,0x01,0xd7,0xf8,0x01] + +v_permlanex16_b32 v5, v1, vcc_lo, m0 +// GFX11: encoding: [0x05,0x00,0x5c,0xd6,0x01,0xd5,0xf4,0x01] + +v_permlanex16_b32 v5, v1, m0, vcc_hi +// GFX11: encoding: [0x05,0x00,0x5c,0xd6,0x01,0xfb,0xac,0x01] + +v_permlanex16_b32 v5, v1, exec_hi, vcc_lo +// GFX11: encoding: [0x05,0x00,0x5c,0xd6,0x01,0xff,0xa8,0x01] + +v_permlanex16_b32 v5, v1, exec_lo, src_scc +// GFX11: encoding: [0x05,0x00,0x5c,0xd6,0x01,0xfd,0xf4,0x03] + +v_permlanex16_b32 v5, v1, null, 0.5 op_sel:[1,1] +// GFX11: encoding: [0x05,0x18,0x5c,0xd6,0x01,0xf9,0xc0,0x03] + +v_permlanex16_b32 v5, v1, -1, -1 op_sel:[0,0] +// GFX11: encoding: [0x05,0x00,0x5c,0xd6,0x01,0x83,0x05,0x03] + +v_permlanex16_b32 v5, v1, 0.5, null op_sel:[1,0] +// GFX11: encoding: [0x05,0x08,0x5c,0xd6,0x01,0xe1,0xf1,0x01] + +v_permlanex16_b32 v255, v255, src_scc, exec_hi op_sel:[0,1] +// GFX11: encoding: [0xff,0x10,0x5c,0xd6,0xff,0xfb,0xfd,0x01] + +v_permlanex16_b32 v5, v1, 0xaf123456, s3 +// GFX11: encoding: [0x05,0x00,0x5c,0xd6,0x01,0xff,0x0d,0x00,0x56,0x34,0x12,0xaf] + +v_permlanex16_b32 v5, v1, s2, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x5c,0xd6,0x01,0x05,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_permlanex16_b32 v5, v1, 0x12345678, 0x12345678 +// GFX11: encoding: [0x05,0x00,0x5c,0xd6,0x01,0xff,0xfd,0x03,0x78,0x56,0x34,0x12] + +v_qsad_pk_u16_u8 v[5:6], v[1:2], v2, ttmp[14:15] +// GFX11: encoding: [0x05,0x00,0x3a,0xd6,0x01,0x05,0xea,0x01] + +v_qsad_pk_u16_u8 v[5:6], v[1:2], v255, ttmp[14:15] +// GFX11: encoding: [0x05,0x00,0x3a,0xd6,0x01,0xff,0xeb,0x01] + +v_qsad_pk_u16_u8 v[5:6], v[1:2], s2, ttmp[14:15] +// GFX11: encoding: [0x05,0x00,0x3a,0xd6,0x01,0x05,0xe8,0x01] + +v_qsad_pk_u16_u8 v[5:6], v[1:2], s105, ttmp[14:15] +// GFX11: encoding: [0x05,0x00,0x3a,0xd6,0x01,0xd3,0xe8,0x01] + +v_qsad_pk_u16_u8 v[5:6], v[254:255], ttmp15, s[6:7] +// GFX11: encoding: [0x05,0x00,0x3a,0xd6,0xfe,0xf7,0x18,0x00] + +v_qsad_pk_u16_u8 v[5:6], s[2:3], vcc_hi, v[3:4] +// GFX11: encoding: [0x05,0x00,0x3a,0xd6,0x02,0xd6,0x0c,0x04] + +v_qsad_pk_u16_u8 v[5:6], s[104:105], vcc_lo, s[104:105] +// GFX11: encoding: [0x05,0x00,0x3a,0xd6,0x68,0xd4,0xa0,0x01] + +v_qsad_pk_u16_u8 v[5:6], vcc, m0, v[254:255] +// GFX11: encoding: [0x05,0x00,0x3a,0xd6,0x6a,0xfa,0xf8,0x07] + +v_qsad_pk_u16_u8 v[5:6], ttmp[14:15], exec_hi, null +// GFX11: encoding: [0x05,0x00,0x3a,0xd6,0x7a,0xfe,0xf0,0x01] + +v_qsad_pk_u16_u8 v[5:6], exec, exec_lo, exec +// GFX11: encoding: [0x05,0x00,0x3a,0xd6,0x7e,0xfc,0xf8,0x01] + +v_qsad_pk_u16_u8 v[5:6], null, null, vcc +// GFX11: encoding: [0x05,0x00,0x3a,0xd6,0x7c,0xf8,0xa8,0x01] + +v_qsad_pk_u16_u8 v[5:6], -1, -1, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x3a,0xd6,0xc1,0x82,0xfd,0x03,0x56,0x34,0x12,0xaf] + +v_qsad_pk_u16_u8 v[5:6], 0.5, 0.5, src_scc +// GFX11: encoding: [0x05,0x00,0x3a,0xd6,0xf0,0xe0,0xf5,0x03] + +v_qsad_pk_u16_u8 v[5:6], src_scc, src_scc, 0.5 +// GFX11: encoding: [0x05,0x00,0x3a,0xd6,0xfd,0xfa,0xc1,0x03] + +v_qsad_pk_u16_u8 v[254:255], 0xaf123456, 0xaf123456, -1 clamp +// GFX11: encoding: [0xfe,0x80,0x3a,0xd6,0xff,0xfe,0x05,0x03,0x56,0x34,0x12,0xaf] + +v_readlane_b32 s5, v1, s2 +// GFX11: encoding: [0x05,0x00,0x60,0xd7,0x01,0x05,0x00,0x00] + +v_readlane_b32 s5, v1, s105 +// GFX11: encoding: [0x05,0x00,0x60,0xd7,0x01,0xd3,0x00,0x00] + +v_readlane_b32 s105, v1, ttmp15 +// GFX11: encoding: [0x69,0x00,0x60,0xd7,0x01,0xf7,0x00,0x00] + +v_readlane_b32 vcc_lo, v1, vcc_hi +// GFX11: encoding: [0x6a,0x00,0x60,0xd7,0x01,0xd7,0x00,0x00] + +v_readlane_b32 vcc_hi, v1, vcc_lo +// GFX11: encoding: [0x6b,0x00,0x60,0xd7,0x01,0xd5,0x00,0x00] + +v_readlane_b32 ttmp15, v1, m0 +// GFX11: encoding: [0x7b,0x00,0x60,0xd7,0x01,0xfb,0x00,0x00] + +v_readlane_b32 null, v255, null +// GFX11: encoding: [0x7c,0x00,0x60,0xd7,0xff,0xf9,0x00,0x00] + +v_sad_hi_u8 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x23,0xd6,0x01,0x05,0x0e,0x00] + +v_sad_hi_u8 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x23,0xd6,0xff,0x05,0xa4,0x01] + +v_sad_hi_u8 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x23,0xd6,0x01,0xfe,0xff,0x01] + +v_sad_hi_u8 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x23,0xd6,0x69,0xd2,0xf8,0x01] + +v_sad_hi_u8 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x23,0xd6,0x6a,0xf6,0x0c,0x04] + +v_sad_hi_u8 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x23,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_sad_hi_u8 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x23,0xd6,0x7b,0xfa,0xed,0x01] + +v_sad_hi_u8 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x23,0xd6,0x7d,0xe0,0xf5,0x01] + +v_sad_hi_u8 v5, exec_lo, -1, vcc_hi +// GFX11: encoding: [0x05,0x00,0x23,0xd6,0x7e,0x82,0xad,0x01] + +v_sad_hi_u8 v5, exec_hi, null, vcc_lo +// GFX11: encoding: [0x05,0x00,0x23,0xd6,0x7f,0xf8,0xa8,0x01] + +v_sad_hi_u8 v5, null, exec_lo, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x23,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_sad_hi_u8 v5, -1, exec_hi, src_scc +// GFX11: encoding: [0x05,0x00,0x23,0xd6,0xc1,0xfe,0xf4,0x03] + +v_sad_hi_u8 v5, 0.5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x23,0xd6,0xf0,0xfa,0xc0,0x03] + +v_sad_hi_u8 v5, src_scc, vcc_lo, -1 +// GFX11: encoding: [0x05,0x00,0x23,0xd6,0xfd,0xd4,0x04,0x03] + +v_sad_hi_u8 v255, 0xaf123456, vcc_hi, null clamp +// GFX11: encoding: [0xff,0x80,0x23,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_sad_u16 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x24,0xd6,0x01,0x05,0x0e,0x00] + +v_sad_u16 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x24,0xd6,0xff,0x05,0xa4,0x01] + +v_sad_u16 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x24,0xd6,0x01,0xfe,0xff,0x01] + +v_sad_u16 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x24,0xd6,0x69,0xd2,0xf8,0x01] + +v_sad_u16 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x24,0xd6,0x6a,0xf6,0x0c,0x04] + +v_sad_u16 v5, vcc_hi, 0xfe0b, v255 +// GFX11: encoding: [0x05,0x00,0x24,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +v_sad_u16 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x24,0xd6,0x7b,0xfa,0xed,0x01] + +v_sad_u16 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x24,0xd6,0x7d,0xe0,0xf5,0x01] + +v_sad_u16 v5, exec_lo, -1, vcc_hi +// GFX11: encoding: [0x05,0x00,0x24,0xd6,0x7e,0x82,0xad,0x01] + +v_sad_u16 v5, exec_hi, null, vcc_lo +// GFX11: encoding: [0x05,0x00,0x24,0xd6,0x7f,0xf8,0xa8,0x01] + +v_sad_u16 v5, null, exec_lo, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x24,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_sad_u16 v5, -1, exec_hi, src_scc +// GFX11: encoding: [0x05,0x00,0x24,0xd6,0xc1,0xfe,0xf4,0x03] + +v_sad_u16 v5, 0.5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x24,0xd6,0xf0,0xfa,0xc0,0x03] + +v_sad_u16 v5, src_scc, vcc_lo, -1 +// GFX11: encoding: [0x05,0x00,0x24,0xd6,0xfd,0xd4,0x04,0x03] + +v_sad_u16 v255, 0xfe0b, vcc_hi, null clamp +// GFX11: encoding: [0xff,0x80,0x24,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] + +v_sad_u32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x25,0xd6,0x01,0x05,0x0e,0x00] + +v_sad_u32 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x25,0xd6,0xff,0x05,0xa4,0x01] + +v_sad_u32 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x25,0xd6,0x01,0xfe,0xff,0x01] + +v_sad_u32 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x25,0xd6,0x69,0xd2,0xf8,0x01] + +v_sad_u32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x25,0xd6,0x6a,0xf6,0x0c,0x04] + +v_sad_u32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x25,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_sad_u32 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x25,0xd6,0x7b,0xfa,0xed,0x01] + +v_sad_u32 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x25,0xd6,0x7d,0xe0,0xf5,0x01] + +v_sad_u32 v5, exec_lo, -1, vcc_hi +// GFX11: encoding: [0x05,0x00,0x25,0xd6,0x7e,0x82,0xad,0x01] + +v_sad_u32 v5, exec_hi, null, vcc_lo +// GFX11: encoding: [0x05,0x00,0x25,0xd6,0x7f,0xf8,0xa8,0x01] + +v_sad_u32 v5, null, exec_lo, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x25,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_sad_u32 v5, -1, exec_hi, src_scc +// GFX11: encoding: [0x05,0x00,0x25,0xd6,0xc1,0xfe,0xf4,0x03] + +v_sad_u32 v5, 0.5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x25,0xd6,0xf0,0xfa,0xc0,0x03] + +v_sad_u32 v5, src_scc, vcc_lo, -1 +// GFX11: encoding: [0x05,0x00,0x25,0xd6,0xfd,0xd4,0x04,0x03] + +v_sad_u32 v255, 0xaf123456, vcc_hi, null clamp +// GFX11: encoding: [0xff,0x80,0x25,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_sad_u8 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x22,0xd6,0x01,0x05,0x0e,0x00] + +v_sad_u8 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x22,0xd6,0xff,0x05,0xa4,0x01] + +v_sad_u8 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x22,0xd6,0x01,0xfe,0xff,0x01] + +v_sad_u8 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x22,0xd6,0x69,0xd2,0xf8,0x01] + +v_sad_u8 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x22,0xd6,0x6a,0xf6,0x0c,0x04] + +v_sad_u8 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x22,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_sad_u8 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x22,0xd6,0x7b,0xfa,0xed,0x01] + +v_sad_u8 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x22,0xd6,0x7d,0xe0,0xf5,0x01] + +v_sad_u8 v5, exec_lo, -1, vcc_hi +// GFX11: encoding: [0x05,0x00,0x22,0xd6,0x7e,0x82,0xad,0x01] + +v_sad_u8 v5, exec_hi, null, vcc_lo +// GFX11: encoding: [0x05,0x00,0x22,0xd6,0x7f,0xf8,0xa8,0x01] + +v_sad_u8 v5, null, exec_lo, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x22,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_sad_u8 v5, -1, exec_hi, src_scc +// GFX11: encoding: [0x05,0x00,0x22,0xd6,0xc1,0xfe,0xf4,0x03] + +v_sad_u8 v5, 0.5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x22,0xd6,0xf0,0xfa,0xc0,0x03] + +v_sad_u8 v5, src_scc, vcc_lo, -1 +// GFX11: encoding: [0x05,0x00,0x22,0xd6,0xfd,0xd4,0x04,0x03] + +v_sad_u8 v255, 0xaf123456, vcc_hi, null clamp +// GFX11: encoding: [0xff,0x80,0x22,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_sub_co_u32 v5, s6, v1, v2 +// W32: encoding: [0x05,0x06,0x01,0xd7,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v5, s6, v255, v255 +// W32: encoding: [0x05,0x06,0x01,0xd7,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v5, s6, s1, s2 +// W32: encoding: [0x05,0x06,0x01,0xd7,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v5, s6, s105, s105 +// W32: encoding: [0x05,0x06,0x01,0xd7,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v5, s6, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x06,0x01,0xd7,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v5, s6, vcc_hi, 0xaf123456 +// W32: encoding: [0x05,0x06,0x01,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v5, s6, ttmp15, src_scc +// W32: encoding: [0x05,0x06,0x01,0xd7,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v5, s6, m0, 0.5 +// W32: encoding: [0x05,0x06,0x01,0xd7,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v5, s6, exec_lo, -1 +// W32: encoding: [0x05,0x06,0x01,0xd7,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v5, s6, exec_hi, null +// W32: encoding: [0x05,0x06,0x01,0xd7,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v5, s105, null, exec_lo +// W32: encoding: [0x05,0x69,0x01,0xd7,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v5, vcc_lo, -1, exec_hi +// W32: encoding: [0x05,0x6a,0x01,0xd7,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v5, vcc_hi, 0.5, m0 +// W32: encoding: [0x05,0x6b,0x01,0xd7,0xf0,0xfa,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v5, ttmp15, src_scc, vcc_lo +// W32: encoding: [0x05,0x7b,0x01,0xd7,0xfd,0xd4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v5, s[12:13], v1, v2 +// W64: encoding: [0x05,0x0c,0x01,0xd7,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v5, s[12:13], v255, v255 +// W64: encoding: [0x05,0x0c,0x01,0xd7,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v5, s[12:13], s1, s2 +// W64: encoding: [0x05,0x0c,0x01,0xd7,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v5, s[12:13], s105, s105 +// W64: encoding: [0x05,0x0c,0x01,0xd7,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v5, s[12:13], vcc_lo, ttmp15 +// W64: encoding: [0x05,0x0c,0x01,0xd7,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v5, s[12:13], vcc_hi, 0xaf123456 +// W64: encoding: [0x05,0x0c,0x01,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v5, s[12:13], ttmp15, src_scc +// W64: encoding: [0x05,0x0c,0x01,0xd7,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v5, s[12:13], m0, 0.5 +// W64: encoding: [0x05,0x0c,0x01,0xd7,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v5, s[12:13], exec_lo, -1 +// W64: encoding: [0x05,0x0c,0x01,0xd7,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v5, s[12:13], exec_hi, null +// W64: encoding: [0x05,0x0c,0x01,0xd7,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v5, s[12:13], null, exec_lo +// W64: encoding: [0x05,0x0c,0x01,0xd7,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v5, s[104:105], -1, exec_hi +// W64: encoding: [0x05,0x68,0x01,0xd7,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v5, vcc, 0.5, m0 +// W64: encoding: [0x05,0x6a,0x01,0xd7,0xf0,0xfa,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_u32 v5, ttmp[14:15], src_scc, vcc_lo +// W64: encoding: [0x05,0x7a,0x01,0xd7,0xfd,0xd4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v255, null, 0xaf123456, vcc_hi clamp +// GFX11: encoding: [0xff,0xfc,0x01,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_sub_nc_i16 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x0e,0xd7,0x01,0x05,0x02,0x00] + +v_sub_nc_i16 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x0e,0xd7,0xff,0xff,0x03,0x00] + +v_sub_nc_i16 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x0e,0xd7,0x01,0x04,0x00,0x00] + +v_sub_nc_i16 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x0e,0xd7,0x69,0xd2,0x00,0x00] + +v_sub_nc_i16 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x0e,0xd7,0x6a,0xf6,0x00,0x00] + +v_sub_nc_i16 v5, vcc_hi, 0xfe0b +// GFX11: encoding: [0x05,0x00,0x0e,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_sub_nc_i16 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x0e,0xd7,0x7b,0xfa,0x01,0x00] + +v_sub_nc_i16 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x0e,0xd7,0x7d,0xe0,0x01,0x00] + +v_sub_nc_i16 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x0e,0xd7,0x7e,0x82,0x01,0x00] + +v_sub_nc_i16 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x0e,0xd7,0x7f,0xf8,0x00,0x00] + +v_sub_nc_i16 v5, null, exec_lo op_sel:[1,1,1] +// GFX11: encoding: [0x05,0x58,0x0e,0xd7,0x7c,0xfc,0x00,0x00] + +v_sub_nc_i16 v5, -1, exec_hi op_sel:[0,0,0] +// GFX11: encoding: [0x05,0x00,0x0e,0xd7,0xc1,0xfe,0x00,0x00] + +v_sub_nc_i16 v5, 0.5, m0 op_sel:[1,0,0] +// GFX11: encoding: [0x05,0x08,0x0e,0xd7,0xf0,0xfa,0x00,0x00] + +v_sub_nc_i16 v5, src_scc, vcc_lo op_sel:[0,1,0] +// GFX11: encoding: [0x05,0x10,0x0e,0xd7,0xfd,0xd4,0x00,0x00] + +v_sub_nc_i16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp +// GFX11: encoding: [0xff,0xc0,0x0e,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_sub_nc_i32 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x25,0xd7,0x01,0x05,0x02,0x00] + +v_sub_nc_i32 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x25,0xd7,0xff,0xff,0x03,0x00] + +v_sub_nc_i32 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x25,0xd7,0x01,0x04,0x00,0x00] + +v_sub_nc_i32 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x25,0xd7,0x69,0xd2,0x00,0x00] + +v_sub_nc_i32 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x25,0xd7,0x6a,0xf6,0x00,0x00] + +v_sub_nc_i32 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x25,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_sub_nc_i32 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x25,0xd7,0x7b,0xfa,0x01,0x00] + +v_sub_nc_i32 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x25,0xd7,0x7d,0xe0,0x01,0x00] + +v_sub_nc_i32 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x25,0xd7,0x7e,0x82,0x01,0x00] + +v_sub_nc_i32 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x25,0xd7,0x7f,0xf8,0x00,0x00] + +v_sub_nc_i32 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x25,0xd7,0x7c,0xfc,0x00,0x00] + +v_sub_nc_i32 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x25,0xd7,0xc1,0xfe,0x00,0x00] + +v_sub_nc_i32 v5, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x25,0xd7,0xf0,0xfa,0x00,0x00] + +v_sub_nc_i32 v5, src_scc, vcc_lo +// GFX11: encoding: [0x05,0x00,0x25,0xd7,0xfd,0xd4,0x00,0x00] + +v_sub_nc_i32 v255, 0xaf123456, vcc_hi clamp +// GFX11: encoding: [0xff,0x80,0x25,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_sub_nc_u16 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x04,0xd7,0x01,0x05,0x02,0x00] + +v_sub_nc_u16 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x04,0xd7,0xff,0xff,0x03,0x00] + +v_sub_nc_u16 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x04,0xd7,0x01,0x04,0x00,0x00] + +v_sub_nc_u16 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x04,0xd7,0x69,0xd2,0x00,0x00] + +v_sub_nc_u16 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x04,0xd7,0x6a,0xf6,0x00,0x00] + +v_sub_nc_u16 v5, vcc_hi, 0xfe0b +// GFX11: encoding: [0x05,0x00,0x04,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_sub_nc_u16 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x04,0xd7,0x7b,0xfa,0x01,0x00] + +v_sub_nc_u16 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x04,0xd7,0x7d,0xe0,0x01,0x00] + +v_sub_nc_u16 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x04,0xd7,0x7e,0x82,0x01,0x00] + +v_sub_nc_u16 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x04,0xd7,0x7f,0xf8,0x00,0x00] + +v_sub_nc_u16 v5, null, exec_lo op_sel:[1,1,1] +// GFX11: encoding: [0x05,0x58,0x04,0xd7,0x7c,0xfc,0x00,0x00] + +v_sub_nc_u16 v5, -1, exec_hi op_sel:[0,0,0] +// GFX11: encoding: [0x05,0x00,0x04,0xd7,0xc1,0xfe,0x00,0x00] + +v_sub_nc_u16 v5, 0.5, m0 op_sel:[1,0,0] +// GFX11: encoding: [0x05,0x08,0x04,0xd7,0xf0,0xfa,0x00,0x00] + +v_sub_nc_u16 v5, src_scc, vcc_lo op_sel:[0,1,0] +// GFX11: encoding: [0x05,0x10,0x04,0xd7,0xfd,0xd4,0x00,0x00] + +v_sub_nc_u16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp +// GFX11: encoding: [0xff,0xc0,0x04,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_subrev_co_u32 v5, s6, v1, v2 +// W32: encoding: [0x05,0x06,0x02,0xd7,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v5, s6, v255, v255 +// W32: encoding: [0x05,0x06,0x02,0xd7,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v5, s6, s1, s2 +// W32: encoding: [0x05,0x06,0x02,0xd7,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v5, s6, s105, s105 +// W32: encoding: [0x05,0x06,0x02,0xd7,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v5, s6, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x06,0x02,0xd7,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v5, s6, vcc_hi, 0xaf123456 +// W32: encoding: [0x05,0x06,0x02,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v5, s6, ttmp15, src_scc +// W32: encoding: [0x05,0x06,0x02,0xd7,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v5, s6, m0, 0.5 +// W32: encoding: [0x05,0x06,0x02,0xd7,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v5, s6, exec_lo, -1 +// W32: encoding: [0x05,0x06,0x02,0xd7,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v5, s6, exec_hi, null +// W32: encoding: [0x05,0x06,0x02,0xd7,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v5, s105, null, exec_lo +// W32: encoding: [0x05,0x69,0x02,0xd7,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v5, vcc_lo, -1, exec_hi +// W32: encoding: [0x05,0x6a,0x02,0xd7,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v5, vcc_hi, 0.5, m0 +// W32: encoding: [0x05,0x6b,0x02,0xd7,0xf0,0xfa,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v5, ttmp15, src_scc, vcc_lo +// W32: encoding: [0x05,0x7b,0x02,0xd7,0xfd,0xd4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v5, s[12:13], v1, v2 +// W64: encoding: [0x05,0x0c,0x02,0xd7,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v5, s[12:13], v255, v255 +// W64: encoding: [0x05,0x0c,0x02,0xd7,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v5, s[12:13], s1, s2 +// W64: encoding: [0x05,0x0c,0x02,0xd7,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v5, s[12:13], s105, s105 +// W64: encoding: [0x05,0x0c,0x02,0xd7,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v5, s[12:13], vcc_lo, ttmp15 +// W64: encoding: [0x05,0x0c,0x02,0xd7,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v5, s[12:13], vcc_hi, 0xaf123456 +// W64: encoding: [0x05,0x0c,0x02,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v5, s[12:13], ttmp15, src_scc +// W64: encoding: [0x05,0x0c,0x02,0xd7,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v5, s[12:13], m0, 0.5 +// W64: encoding: [0x05,0x0c,0x02,0xd7,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v5, s[12:13], exec_lo, -1 +// W64: encoding: [0x05,0x0c,0x02,0xd7,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v5, s[12:13], exec_hi, null +// W64: encoding: [0x05,0x0c,0x02,0xd7,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v5, s[12:13], null, exec_lo +// W64: encoding: [0x05,0x0c,0x02,0xd7,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v5, s[104:105], -1, exec_hi +// W64: encoding: [0x05,0x68,0x02,0xd7,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v5, vcc, 0.5, m0 +// W64: encoding: [0x05,0x6a,0x02,0xd7,0xf0,0xfa,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_u32 v5, ttmp[14:15], src_scc, vcc_lo +// W64: encoding: [0x05,0x7a,0x02,0xd7,0xfd,0xd4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v255, null, 0xaf123456, vcc_hi clamp +// GFX11: encoding: [0xff,0xfc,0x02,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_trig_preop_f64 v[5:6], v[1:2], v2 +// GFX11: encoding: [0x05,0x00,0x2f,0xd7,0x01,0x05,0x02,0x00] + +v_trig_preop_f64 v[5:6], v[1:2], v255 +// GFX11: encoding: [0x05,0x00,0x2f,0xd7,0x01,0xff,0x03,0x00] + +v_trig_preop_f64 v[5:6], v[1:2], s2 +// GFX11: encoding: [0x05,0x00,0x2f,0xd7,0x01,0x05,0x00,0x00] + +v_trig_preop_f64 v[5:6], v[1:2], s105 +// GFX11: encoding: [0x05,0x00,0x2f,0xd7,0x01,0xd3,0x00,0x00] + +v_trig_preop_f64 v[5:6], v[254:255], ttmp15 +// GFX11: encoding: [0x05,0x00,0x2f,0xd7,0xfe,0xf7,0x00,0x00] + +v_trig_preop_f64 v[5:6], s[2:3], vcc_hi +// GFX11: encoding: [0x05,0x00,0x2f,0xd7,0x02,0xd6,0x00,0x00] + +v_trig_preop_f64 v[5:6], s[104:105], vcc_lo +// GFX11: encoding: [0x05,0x00,0x2f,0xd7,0x68,0xd4,0x00,0x00] + +v_trig_preop_f64 v[5:6], vcc, m0 +// GFX11: encoding: [0x05,0x00,0x2f,0xd7,0x6a,0xfa,0x00,0x00] + +v_trig_preop_f64 v[5:6], ttmp[14:15], exec_hi +// GFX11: encoding: [0x05,0x00,0x2f,0xd7,0x7a,0xfe,0x00,0x00] + +v_trig_preop_f64 v[5:6], exec, exec_lo +// GFX11: encoding: [0x05,0x00,0x2f,0xd7,0x7e,0xfc,0x00,0x00] + +v_trig_preop_f64 v[5:6], null, null +// GFX11: encoding: [0x05,0x00,0x2f,0xd7,0x7c,0xf8,0x00,0x00] + +v_trig_preop_f64 v[5:6], -1, -1 +// GFX11: encoding: [0x05,0x00,0x2f,0xd7,0xc1,0x82,0x01,0x00] + +v_trig_preop_f64 v[5:6], 0.5, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0x2f,0xd7,0xf0,0xe0,0x01,0x08] + +v_trig_preop_f64 v[5:6], -|src_scc|, src_scc mul:4 +// GFX11: encoding: [0x05,0x01,0x2f,0xd7,0xfd,0xfa,0x01,0x30] + +v_trig_preop_f64 v[254:255], 0xaf123456, 0xaf123456 clamp div:2 +// GFX11: encoding: [0xfe,0x80,0x2f,0xd7,0xff,0xfe,0x01,0x18,0x56,0x34,0x12,0xaf] + +v_writelane_b32 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x61,0xd7,0x01,0x04,0x00,0x00] + +v_writelane_b32 v5, s105, s2 +// GFX11: encoding: [0x05,0x00,0x61,0xd7,0x69,0x04,0x00,0x00] + +v_writelane_b32 v5, vcc_lo, s2 +// GFX11: encoding: [0x05,0x00,0x61,0xd7,0x6a,0x04,0x00,0x00] + +v_writelane_b32 v5, vcc_hi, s2 +// GFX11: encoding: [0x05,0x00,0x61,0xd7,0x6b,0x04,0x00,0x00] + +v_writelane_b32 v5, ttmp15, s2 +// GFX11: encoding: [0x05,0x00,0x61,0xd7,0x7b,0x04,0x00,0x00] + +v_writelane_b32 v5, m0, s2 +// GFX11: encoding: [0x05,0x00,0x61,0xd7,0x7d,0x04,0x00,0x00] + +v_writelane_b32 v5, exec_lo, s2 +// GFX11: encoding: [0x05,0x00,0x61,0xd7,0x7e,0x04,0x00,0x00] + +v_writelane_b32 v5, exec_hi, s105 +// GFX11: encoding: [0x05,0x00,0x61,0xd7,0x7f,0xd2,0x00,0x00] + +v_writelane_b32 v5, null, ttmp15 +// GFX11: encoding: [0x05,0x00,0x61,0xd7,0x7c,0xf6,0x00,0x00] + +v_writelane_b32 v5, -1, null +// GFX11: encoding: [0x05,0x00,0x61,0xd7,0xc1,0xf8,0x00,0x00] + +v_writelane_b32 v5, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x61,0xd7,0xf0,0xfa,0x00,0x00] + +v_writelane_b32 v5, src_scc, vcc_lo +// GFX11: encoding: [0x05,0x00,0x61,0xd7,0xfd,0xd4,0x00,0x00] + +v_writelane_b32 v255, 0xaf123456, vcc_hi +// GFX11: encoding: [0xff,0x00,0x61,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_xad_u32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x45,0xd6,0x01,0x05,0x0e,0x00] + +v_xad_u32 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x45,0xd6,0xff,0x05,0xa4,0x01] + +v_xad_u32 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x45,0xd6,0x01,0xfe,0xff,0x01] + +v_xad_u32 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x45,0xd6,0x69,0xd2,0xf8,0x01] + +v_xad_u32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x45,0xd6,0x6a,0xf6,0x0c,0x04] + +v_xad_u32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x45,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_xad_u32 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x45,0xd6,0x7b,0xfa,0xed,0x01] + +v_xad_u32 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x45,0xd6,0x7d,0xe0,0xf5,0x01] + +v_xad_u32 v5, exec_lo, -1, vcc_hi +// GFX11: encoding: [0x05,0x00,0x45,0xd6,0x7e,0x82,0xad,0x01] + +v_xad_u32 v5, exec_hi, null, vcc_lo +// GFX11: encoding: [0x05,0x00,0x45,0xd6,0x7f,0xf8,0xa8,0x01] + +v_xad_u32 v5, null, exec_lo, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x45,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_xad_u32 v5, -1, exec_hi, src_scc +// GFX11: encoding: [0x05,0x00,0x45,0xd6,0xc1,0xfe,0xf4,0x03] + +v_xad_u32 v5, 0.5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x45,0xd6,0xf0,0xfa,0xc0,0x03] + +v_xad_u32 v5, src_scc, vcc_lo, -1 +// GFX11: encoding: [0x05,0x00,0x45,0xd6,0xfd,0xd4,0x04,0x03] + +v_xad_u32 v255, 0xaf123456, vcc_hi, null +// GFX11: encoding: [0xff,0x00,0x45,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_xor3_b32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x40,0xd6,0x01,0x05,0x0e,0x00] + +v_xor3_b32 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x40,0xd6,0xff,0x05,0xa4,0x01] + +v_xor3_b32 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x40,0xd6,0x01,0xfe,0xff,0x01] + +v_xor3_b32 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x40,0xd6,0x69,0xd2,0xf8,0x01] + +v_xor3_b32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x40,0xd6,0x6a,0xf6,0x0c,0x04] + +v_xor3_b32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x40,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_xor3_b32 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x40,0xd6,0x7b,0xfa,0xed,0x01] + +v_xor3_b32 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x40,0xd6,0x7d,0xe0,0xf5,0x01] + +v_xor3_b32 v5, exec_lo, -1, vcc_hi +// GFX11: encoding: [0x05,0x00,0x40,0xd6,0x7e,0x82,0xad,0x01] + +v_xor3_b32 v5, exec_hi, null, vcc_lo +// GFX11: encoding: [0x05,0x00,0x40,0xd6,0x7f,0xf8,0xa8,0x01] + +v_xor3_b32 v5, null, exec_lo, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x40,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_xor3_b32 v5, -1, exec_hi, src_scc +// GFX11: encoding: [0x05,0x00,0x40,0xd6,0xc1,0xfe,0xf4,0x03] + +v_xor3_b32 v5, 0.5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x40,0xd6,0xf0,0xfa,0xc0,0x03] + +v_xor3_b32 v5, src_scc, vcc_lo, -1 +// GFX11: encoding: [0x05,0x00,0x40,0xd6,0xfd,0xd4,0x04,0x03] + +v_xor3_b32 v255, 0xaf123456, vcc_hi, null +// GFX11: encoding: [0xff,0x00,0x40,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_xor_b16 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x64,0xd7,0x01,0x05,0x02,0x00] + +v_xor_b16 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x64,0xd7,0xff,0xff,0x03,0x00] + +v_xor_b16 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x64,0xd7,0x01,0x04,0x00,0x00] + +v_xor_b16 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x64,0xd7,0x69,0xd2,0x00,0x00] + +v_xor_b16 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x64,0xd7,0x6a,0xf6,0x00,0x00] + +v_xor_b16 v5, vcc_hi, 0xfe0b +// GFX11: encoding: [0x05,0x00,0x64,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_xor_b16 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x64,0xd7,0x7b,0xfa,0x01,0x00] + +v_xor_b16 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x64,0xd7,0x7d,0xe0,0x01,0x00] + +v_xor_b16 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x64,0xd7,0x7e,0x82,0x01,0x00] + +v_xor_b16 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x64,0xd7,0x7f,0xf8,0x00,0x00] + +v_xor_b16 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x64,0xd7,0x7c,0xfc,0x00,0x00] + +v_xor_b16 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x64,0xd7,0xc1,0xfe,0x00,0x00] + +v_xor_b16 v5, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x64,0xd7,0xf0,0xfa,0x00,0x00] + +v_xor_b16 v5, src_scc, vcc_lo +// GFX11: encoding: [0x05,0x00,0x64,0xd7,0xfd,0xd4,0x00,0x00] + +v_xor_b16 v255, 0xfe0b, vcc_hi +// GFX11: encoding: [0xff,0x00,0x64,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s index e025ab73933eb..dadb515630b66 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s @@ -1,7 +1,7 @@ -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32 -show-encoding %s | FileCheck --check-prefixes=GFX11,W32 %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX11,W64 %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32 %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX11,W32 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX11,W64 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+real-true16 %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s v_add3_u32 v5, v1, v2, s3 // GFX11: encoding: [0x05,0x00,0x55,0xd6,0x01,0x05,0x0e,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16-fake16.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16-fake16.s new file mode 100644 index 0000000000000..2371dbc8c1b8f --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16-fake16.s @@ -0,0 +1,4695 @@ +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX11,W32 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX11,W64 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-real-true16 %s 2>&1 | FileCheck --check-prefixes=GFX11-ERR,W32-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,-real-true16 %s 2>&1 | FileCheck --check-prefixes=GFX11-ERR,W64-ERR --implicit-check-not=error: %s + +v_add3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x55,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_add3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x55,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_add3_u32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x55,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_add3_u32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x55,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_add3_u32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x55,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_add3_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x55,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_add3_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x55,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_add3_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX11: [0x05,0x00,0x55,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_add3_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX11: [0x05,0x00,0x55,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_add3_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX11: [0x05,0x00,0x55,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_add3_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x55,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_add3_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x55,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_add3_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x55,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_add3_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x55,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_add_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] +// W32: [0x05,0x06,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[0,1,2,3] +// W32: [0x05,0x06,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s6, v1, v2 row_mirror +// W32: [0x05,0x06,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s6, v1, v2 row_half_mirror +// W32: [0x05,0x06,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s6, v1, v2 row_shl:1 +// W32: [0x05,0x06,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s6, v1, v2 row_shl:15 +// W32: [0x05,0x06,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s6, v1, v2 row_shr:1 +// W32: [0x05,0x06,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s6, v1, v2 row_shr:15 +// W32: [0x05,0x06,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s6, v1, v2 row_ror:1 +// W32: [0x05,0x06,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s105, v1, v2 row_ror:15 +// W32: [0x05,0x69,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: [0x05,0x6a,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: [0x05,0x6b,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: [0x05,0x7b,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 quad_perm:[3,2,1,0] +// W64: [0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 quad_perm:[0,1,2,3] +// W64: [0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 row_mirror +// W64: [0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 row_half_mirror +// W64: [0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shl:1 +// W64: [0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shl:15 +// W64: [0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shr:1 +// W64: [0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shr:15 +// W64: [0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 row_ror:1 +// W64: [0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 row_ror:15 +// W64: [0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: [0x05,0x68,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: [0x05,0x6a,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: [0x05,0x7a,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v255, null, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0xfc,0x00,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_add_lshl_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x47,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_add_lshl_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x47,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_add_lshl_u32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x47,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_add_lshl_u32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x47,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_add_lshl_u32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x47,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_add_lshl_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x47,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_add_lshl_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x47,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_add_lshl_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX11: [0x05,0x00,0x47,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_add_lshl_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX11: [0x05,0x00,0x47,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_add_lshl_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX11: [0x05,0x00,0x47,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_add_lshl_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x47,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_add_lshl_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x47,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_add_lshl_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x47,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_add_lshl_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x47,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_add_nc_i16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_add_nc_i16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_add_nc_i16_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_add_nc_i16_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_add_nc_i16_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_add_nc_i16_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_add_nc_i16_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_add_nc_i16_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_add_nc_i16_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_add_nc_i16_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_add_nc_i16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_add_nc_i16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_add_nc_i16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_add_nc_i16_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x80,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_add_nc_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_add_nc_i32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_add_nc_i32_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_add_nc_i32_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_add_nc_i32_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_add_nc_i32_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_add_nc_i32_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_add_nc_i32_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_add_nc_i32_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_add_nc_i32_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_add_nc_i32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_add_nc_i32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_add_nc_i32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_add_nc_i32_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x80,0x26,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_add_nc_u16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_add_nc_u16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_add_nc_u16_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_add_nc_u16_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_add_nc_u16_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_add_nc_u16_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_add_nc_u16_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_add_nc_u16_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_add_nc_u16_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_add_nc_u16_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_add_nc_u16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_add_nc_u16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_add_nc_u16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_add_nc_u16_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x80,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_alignbit_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_alignbit_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_alignbit_b32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_alignbit_b32_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX11: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_alignbit_b32_e64_dpp v5, v1, v2, v255 row_shl:1 +// GFX11: [0x05,0x00,0x16,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] + +v_alignbit_b32_e64_dpp v5, v1, v2, s105 row_shl:15 +// GFX11: [0x05,0x00,0x16,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x0f,0x01,0xff] + +v_alignbit_b32_e64_dpp v5, v1, v2, vcc_hi row_shr:1 +// GFX11: [0x05,0x00,0x16,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] + +v_alignbit_b32_e64_dpp v5, v1, v2, vcc_lo row_shr:15 +// GFX11: [0x05,0x00,0x16,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] + +v_alignbit_b32_e64_dpp v5, v1, v2, ttmp15 row_ror:1 +// GFX11: [0x05,0x00,0x16,0xd6,0xfa,0x04,0xee,0x01,0x01,0x21,0x01,0xff] + +v_alignbit_b32_e64_dpp v5, v1, v2, exec_hi row_ror:15 +// GFX11: [0x05,0x00,0x16,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] + +v_alignbit_b32_e64_dpp v5, v1, v2, exec_lo row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x16,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] + +v_alignbit_b32_e64_dpp v5, v1, v2, null row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x16,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +v_alignbit_b32_e64_dpp v5, v1, v2, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] + +v_alignbit_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x16,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_alignbyte_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_alignbyte_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_alignbyte_b32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_alignbyte_b32_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX11: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_alignbyte_b32_e64_dpp v5, v1, v2, v255 row_shl:1 +// GFX11: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] + +v_alignbyte_b32_e64_dpp v5, v1, v2, s105 row_shl:15 +// GFX11: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x0f,0x01,0xff] + +v_alignbyte_b32_e64_dpp v5, v1, v2, vcc_hi row_shr:1 +// GFX11: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] + +v_alignbyte_b32_e64_dpp v5, v1, v2, vcc_lo row_shr:15 +// GFX11: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] + +v_alignbyte_b32_e64_dpp v5, v1, v2, ttmp15 row_ror:1 +// GFX11: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xee,0x01,0x01,0x21,0x01,0xff] + +v_alignbyte_b32_e64_dpp v5, v1, v2, exec_hi row_ror:15 +// GFX11: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] + +v_alignbyte_b32_e64_dpp v5, v1, v2, exec_lo row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] + +v_alignbyte_b32_e64_dpp v5, v1, v2, null row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +v_alignbyte_b32_e64_dpp v5, v1, v2, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] + +v_alignbyte_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x17,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_and_b16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_and_b16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_and_b16_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_and_b16_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_and_b16_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_and_b16_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_and_b16_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_and_b16_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_and_b16_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_and_b16_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_and_b16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_and_b16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_and_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_and_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x62,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_and_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x57,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_and_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x57,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_and_or_b32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x57,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_and_or_b32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x57,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_and_or_b32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x57,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_and_or_b32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x57,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_and_or_b32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x57,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_and_or_b32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX11: [0x05,0x00,0x57,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_and_or_b32_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX11: [0x05,0x00,0x57,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_and_or_b32_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX11: [0x05,0x00,0x57,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_and_or_b32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x57,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_and_or_b32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x57,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_and_or_b32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x57,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_and_or_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x57,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_ashrrev_i16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_ashrrev_i16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_ashrrev_i16_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_ashrrev_i16_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_ashrrev_i16_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_ashrrev_i16_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_ashrrev_i16_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_ashrrev_i16_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_ashrrev_i16_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_ashrrev_i16_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_ashrrev_i16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_ashrrev_i16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_ashrrev_i16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_ashrrev_i16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x3a,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_bcnt_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_bcnt_u32_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_bcnt_u32_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x1e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_bfe_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x11,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_bfe_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x11,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_bfe_i32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x11,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_bfe_i32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x11,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_bfe_i32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x11,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_bfe_i32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x11,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_bfe_i32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x11,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_bfe_i32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX11: [0x05,0x00,0x11,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_bfe_i32_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX11: [0x05,0x00,0x11,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_bfe_i32_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX11: [0x05,0x00,0x11,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_bfe_i32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x11,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_bfe_i32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x11,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_bfe_i32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x11,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_bfe_i32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x11,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_bfe_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x10,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_bfe_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x10,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_bfe_u32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x10,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_bfe_u32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x10,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_bfe_u32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x10,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_bfe_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x10,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_bfe_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x10,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_bfe_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX11: [0x05,0x00,0x10,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_bfe_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX11: [0x05,0x00,0x10,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_bfe_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX11: [0x05,0x00,0x10,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_bfe_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x10,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_bfe_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x10,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_bfe_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x10,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_bfe_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x10,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_bfi_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x12,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_bfi_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x12,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_bfi_b32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x12,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_bfi_b32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x12,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_bfi_b32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x12,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_bfi_b32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x12,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_bfi_b32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x12,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_bfi_b32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX11: [0x05,0x00,0x12,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_bfi_b32_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX11: [0x05,0x00,0x12,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_bfi_b32_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX11: [0x05,0x00,0x12,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_bfi_b32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x12,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_bfi_b32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x12,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_bfi_b32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x12,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_bfi_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x12,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_bfm_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_bfm_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_bfm_b32_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_bfm_b32_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_bfm_b32_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_bfm_b32_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_bfm_b32_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_bfm_b32_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_bfm_b32_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_bfm_b32_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_bfm_b32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_bfm_b32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_bfm_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_bfm_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x1d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_cndmask_b16_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, v2, s3 quad_perm:[0,1,2,3] +// W32: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, v2, s3 row_mirror +// W32: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, v2, s3 row_half_mirror +// W32: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, v2, s3 row_shl:1 +// W32: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, v2, s3 row_shl:15 +// W32: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, v2, s3 row_shr:1 +// W32: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, v2, s3 row_shr:15 +// W32: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, v2, s3 row_ror:1 +// W32: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, v2, s105 row_ror:15 +// W32: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, v2, vcc_hi row_share:0 row_mask:0xf bank_mask:0xf +// W32: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xae,0x01,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, |v1|, -v2, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: [0x05,0x01,0x5d,0xd6,0xfa,0x04,0xaa,0x41,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, -v1, |v2|, ttmp15 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: [0x05,0x02,0x5d,0xd6,0xfa,0x04,0xee,0x21,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] quad_perm:[3,2,1,0] +// W64: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] quad_perm:[0,1,2,3] +// W64: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_mirror +// W64: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_half_mirror +// W64: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_shl:1 +// W64: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_shl:15 +// W64: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_shr:1 +// W64: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_shr:15 +// W64: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_ror:1 +// W64: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_ror:15 +// W64: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, v2, s[104:105] row_share:0 row_mask:0xf bank_mask:0xf +// W64: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, |v1|, -v2, vcc row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: [0x05,0x01,0x5d,0xd6,0xfa,0x04,0xaa,0x41,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, -v1, |v2|, ttmp[14:15] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: [0x05,0x02,0x5d,0xd6,0xfa,0x04,0xea,0x21,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v255, -|v255|, -|v255|, null row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x03,0x5d,0xd6,0xfa,0xfe,0xf3,0x61,0xff,0x6f,0x05,0x30] + +v_cubeid_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_cubeid_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_cubeid_f32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_cubeid_f32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_cubeid_f32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_cubeid_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_cubeid_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_cubeid_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 +// GFX11: [0x05,0x01,0x0c,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +v_cubeid_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 +// GFX11: [0x05,0x02,0x0c,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +v_cubeid_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 +// GFX11: [0x05,0x04,0x0c,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_cubeid_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x03,0x0c,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_cubeid_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x05,0x0c,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] + +v_cubeid_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x06,0x0c,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] + +v_cubeid_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x87,0x0c,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] + +v_cubema_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_cubema_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_cubema_f32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_cubema_f32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_cubema_f32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_cubema_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_cubema_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_cubema_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 +// GFX11: [0x05,0x01,0x0f,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +v_cubema_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 +// GFX11: [0x05,0x02,0x0f,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +v_cubema_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 +// GFX11: [0x05,0x04,0x0f,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_cubema_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x03,0x0f,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_cubema_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x05,0x0f,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] + +v_cubema_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x06,0x0f,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] + +v_cubema_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x87,0x0f,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] + +v_cubesc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_cubesc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_cubesc_f32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_cubesc_f32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_cubesc_f32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_cubesc_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_cubesc_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_cubesc_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 +// GFX11: [0x05,0x01,0x0d,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +v_cubesc_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 +// GFX11: [0x05,0x02,0x0d,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +v_cubesc_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 +// GFX11: [0x05,0x04,0x0d,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_cubesc_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x03,0x0d,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_cubesc_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x05,0x0d,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] + +v_cubesc_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x06,0x0d,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] + +v_cubesc_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x87,0x0d,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] + +v_cubetc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_cubetc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_cubetc_f32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_cubetc_f32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_cubetc_f32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_cubetc_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_cubetc_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_cubetc_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 +// GFX11: [0x05,0x01,0x0e,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +v_cubetc_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 +// GFX11: [0x05,0x02,0x0e,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +v_cubetc_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 +// GFX11: [0x05,0x04,0x0e,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_cubetc_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x03,0x0e,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_cubetc_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x05,0x0e,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] + +v_cubetc_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x06,0x0e,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] + +v_cubetc_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x87,0x0e,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] + +v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cvt_pk_i16_f32_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x01,0x06,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] + +v_cvt_pk_i16_f32_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x02,0x06,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] + +v_cvt_pk_i16_f32_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x03,0x06,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_cvt_pk_i16_i32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x24,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x01,0x12,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x02,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] + +v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x03,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x01,0x13,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x02,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] + +v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x03,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cvt_pk_u16_f32_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x01,0x07,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] + +v_cvt_pk_u16_f32_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x02,0x07,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] + +v_cvt_pk_u16_f32_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x03,0x07,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_cvt_pk_u16_u32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x23,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX11: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX11: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX11: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_cvt_pk_u8_f32_e64_dpp v255, -|v255|, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x01,0x26,0xd6,0xfa,0xfe,0xf7,0x23,0xff,0x6f,0x05,0x30] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x01,0x12,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x02,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] + +v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x03,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x01,0x21,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x02,0x21,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] + +v_cvt_pk_norm_i16_f32_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x03,0x21,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x01,0x13,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x02,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] + +v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x03,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x01,0x22,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x02,0x22,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] + +v_cvt_pk_norm_u16_f32_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x03,0x22,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +v_div_fixup_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_div_fixup_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_div_fixup_f16_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_div_fixup_f16_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_div_fixup_f16_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_div_fixup_f16_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 +// GFX11: [0x05,0x01,0x54,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +v_div_fixup_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 +// GFX11: [0x05,0x02,0x54,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +v_div_fixup_f16_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 +// GFX11: [0x05,0x04,0x54,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_div_fixup_f16_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x03,0x54,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_div_fixup_f16_e64_dpp v5, -|v1|, v2, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x05,0x54,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] + +v_div_fixup_f16_e64_dpp v5, v1, -|v2|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x06,0x54,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13] + +v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x87,0x54,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30] + +v_fma_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_fma_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_fma_f16_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_fma_f16_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_fma_f16_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_fma_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_fma_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_fma_f16_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 +// GFX11: [0x05,0x01,0x48,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +v_fma_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 +// GFX11: [0x05,0x02,0x48,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +v_fma_f16_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 +// GFX11: [0x05,0x04,0x48,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_fma_f16_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x03,0x48,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_fma_f16_e64_dpp v5, -|v1|, v2, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x05,0x48,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] + +v_fma_f16_e64_dpp v5, v1, -|v2|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x06,0x48,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13] + +v_fma_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x87,0x48,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30] + +v_fma_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_fma_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_fma_f32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_fma_f32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x13,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_fma_f32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x13,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_fma_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x13,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_fma_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x13,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_fma_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 +// GFX11: [0x05,0x01,0x13,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +v_fma_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 +// GFX11: [0x05,0x02,0x13,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +v_fma_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 +// GFX11: [0x05,0x04,0x13,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_fma_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x03,0x13,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_fma_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x05,0x13,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] + +v_fma_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x06,0x13,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] + +v_fma_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x87,0x13,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] + +v_ldexp_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_ldexp_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_ldexp_f32_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_ldexp_f32_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_ldexp_f32_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_ldexp_f32_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_ldexp_f32_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_ldexp_f32_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_ldexp_f32_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_ldexp_f32_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_ldexp_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_ldexp_f32_e64_dpp v5, v1, v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x08,0x01,0x5f,0x01,0x01] + +v_ldexp_f32_e64_dpp v5, v1, v2 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x10,0x01,0x60,0x09,0x13] + +v_ldexp_f32_e64_dpp v255, -|v255|, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x81,0x1c,0xd7,0xfa,0xfe,0x03,0x38,0xff,0x6f,0x05,0x30] + +v_lerp_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x15,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x15,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x15,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX11: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX11: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX11: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x15,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_lerp_u8_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_lerp_u8_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x15,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_lshl_add_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x46,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x46,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x46,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x46,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x46,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x46,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x46,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX11: [0x05,0x00,0x46,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX11: [0x05,0x00,0x46,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX11: [0x05,0x00,0x46,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x46,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x46,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_lshl_add_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x46,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_lshl_add_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x46,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_lshl_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x56,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x56,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x56,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x56,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x56,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x56,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x56,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX11: [0x05,0x00,0x56,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX11: [0x05,0x00,0x56,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX11: [0x05,0x00,0x56,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x56,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x56,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_lshl_or_b32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x56,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_lshl_or_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x56,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_lshlrev_b16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_lshlrev_b16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_lshlrev_b16_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_lshlrev_b16_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_lshlrev_b16_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_lshlrev_b16_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_lshlrev_b16_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_lshlrev_b16_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_lshlrev_b16_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_lshlrev_b16_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_lshlrev_b16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_lshlrev_b16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_lshlrev_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_lshlrev_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x38,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_lshrrev_b16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_lshrrev_b16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_lshrrev_b16_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_lshrrev_b16_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_lshrrev_b16_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_lshrrev_b16_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_lshrrev_b16_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_lshrrev_b16_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_lshrrev_b16_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_lshrrev_b16_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_lshrrev_b16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_lshrrev_b16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_lshrrev_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_lshrrev_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x39,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_mad_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_mad_i16_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_mad_i16_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX11: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_mad_i16_e64_dpp v5, v1, v2, v255 row_shl:1 +// GFX11: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] + +v_mad_i16_e64_dpp v5, v1, v2, s105 row_shl:15 +// GFX11: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x0f,0x01,0xff] + +v_mad_i16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 +// GFX11: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] + +v_mad_i16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 +// GFX11: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] + +v_mad_i16_e64_dpp v5, v1, v2, ttmp15 row_ror:1 +// GFX11: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xee,0x01,0x01,0x21,0x01,0xff] + +v_mad_i16_e64_dpp v5, v1, v2, exec_hi row_ror:15 +// GFX11: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] + +v_mad_i16_e64_dpp v5, v1, v2, exec_lo row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] + +v_mad_i16_e64_dpp v5, v1, v2, null row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +v_mad_i16_e64_dpp v5, v1, v2, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] + +v_mad_i16_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x80,0x53,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_mad_i32_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i32_i16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_mad_i32_i16_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_mad_i32_i16_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_mad_i32_i16_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_mad_i32_i16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_mad_i32_i16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_mad_i32_i16_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX11: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_mad_i32_i16_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX11: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_mad_i32_i16_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX11: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_mad_i32_i16_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_mad_i32_i16_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_mad_i32_i16_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_mad_i32_i16_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x80,0x5a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_mad_i32_i24_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX11: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX11: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX11: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_mad_i32_i24_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_mad_i32_i24_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x80,0x0a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_mad_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_mad_u16_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_mad_u16_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX11: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_mad_u16_e64_dpp v5, v1, v2, v255 row_shl:1 +// GFX11: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] + +v_mad_u16_e64_dpp v5, v1, v2, s105 row_shl:15 +// GFX11: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x0f,0x01,0xff] + +v_mad_u16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 +// GFX11: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] + +v_mad_u16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 +// GFX11: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] + +v_mad_u16_e64_dpp v5, v1, v2, ttmp15 row_ror:1 +// GFX11: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xee,0x01,0x01,0x21,0x01,0xff] + +v_mad_u16_e64_dpp v5, v1, v2, exec_hi row_ror:15 +// GFX11: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] + +v_mad_u16_e64_dpp v5, v1, v2, exec_lo row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] + +v_mad_u16_e64_dpp v5, v1, v2, null row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +v_mad_u16_e64_dpp v5, v1, v2, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] + +v_mad_u16_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x80,0x41,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_mad_u32_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x59,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u32_u16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x59,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_mad_u32_u16_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x59,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_mad_u32_u16_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_mad_u32_u16_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_mad_u32_u16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_mad_u32_u16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_mad_u32_u16_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX11: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_mad_u32_u16_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX11: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_mad_u32_u16_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX11: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_mad_u32_u16_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_mad_u32_u16_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x59,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_mad_u32_u16_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_mad_u32_u16_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x80,0x59,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_mad_u32_u24_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX11: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX11: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX11: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_mad_u32_u24_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_mad_u32_u24_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x80,0x0b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_max3_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x4c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x4c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_max3_f16_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x4c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_max3_f16_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x4c,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_max3_f16_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x4c,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_max3_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x4c,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_max3_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x4c,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_max3_f16_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 +// GFX11: [0x05,0x01,0x4c,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +v_max3_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 +// GFX11: [0x05,0x02,0x4c,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +v_max3_f16_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 +// GFX11: [0x05,0x04,0x4c,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_max3_f16_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x03,0x4c,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_max3_f16_e64_dpp v5, -|v1|, v2, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x05,0x4c,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] + +v_max3_f16_e64_dpp v5, v1, -|v2|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x06,0x4c,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13] + +v_max3_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x87,0x4c,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30] + +v_max3_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x1c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x1c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_max3_f32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x1c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_max3_f32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x1c,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_max3_f32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x1c,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_max3_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x1c,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_max3_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x1c,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_max3_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 +// GFX11: [0x05,0x01,0x1c,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +v_max3_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 +// GFX11: [0x05,0x02,0x1c,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +v_max3_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 +// GFX11: [0x05,0x04,0x1c,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_max3_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x03,0x1c,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_max3_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x05,0x1c,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] + +v_max3_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x06,0x1c,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] + +v_max3_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x87,0x1c,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] + +v_max3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_max3_i16_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_max3_i16_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX11: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_max3_i16_e64_dpp v5, v1, v2, v255 row_shl:1 +// GFX11: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] + +v_max3_i16_e64_dpp v5, v1, v2, s105 row_shl:15 +// GFX11: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x0f,0x01,0xff] + +v_max3_i16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 +// GFX11: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] + +v_max3_i16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 +// GFX11: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] + +v_max3_i16_e64_dpp v5, v1, v2, ttmp15 row_ror:1 +// GFX11: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xee,0x01,0x01,0x21,0x01,0xff] + +v_max3_i16_e64_dpp v5, v1, v2, exec_hi row_ror:15 +// GFX11: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] + +v_max3_i16_e64_dpp v5, v1, v2, exec_lo row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] + +v_max3_i16_e64_dpp v5, v1, v2, null row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +v_max3_i16_e64_dpp v5, v1, v2, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] + +v_max3_i16_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x4d,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_max3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX11: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX11: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX11: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_max3_i32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_max3_i32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x1d,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_max3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_max3_u16_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_max3_u16_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX11: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_max3_u16_e64_dpp v5, v1, v2, v255 row_shl:1 +// GFX11: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] + +v_max3_u16_e64_dpp v5, v1, v2, s105 row_shl:15 +// GFX11: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x0f,0x01,0xff] + +v_max3_u16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 +// GFX11: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] + +v_max3_u16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 +// GFX11: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] + +v_max3_u16_e64_dpp v5, v1, v2, ttmp15 row_ror:1 +// GFX11: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xee,0x01,0x01,0x21,0x01,0xff] + +v_max3_u16_e64_dpp v5, v1, v2, exec_hi row_ror:15 +// GFX11: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] + +v_max3_u16_e64_dpp v5, v1, v2, exec_lo row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] + +v_max3_u16_e64_dpp v5, v1, v2, null row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +v_max3_u16_e64_dpp v5, v1, v2, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] + +v_max3_u16_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x4e,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_max3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX11: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX11: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX11: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_max3_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_max3_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x1e,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_max_i16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_max_i16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_max_i16_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_max_i16_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_max_i16_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_max_i16_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_max_i16_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_max_i16_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_max_i16_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_max_i16_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_max_i16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_max_i16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_max_i16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_max_i16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x0a,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_max_u16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_max_u16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_max_u16_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_max_u16_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_max_u16_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_max_u16_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_max_u16_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_max_u16_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_max_u16_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_max_u16_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_max_u16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_max_u16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_max_u16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_max_u16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x09,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_maxmin_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x60,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x60,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_maxmin_f16_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x60,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_maxmin_f16_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x60,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_maxmin_f16_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x60,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_maxmin_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x60,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_maxmin_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x60,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_maxmin_f16_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 +// GFX11: [0x05,0x01,0x60,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +v_maxmin_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 +// GFX11: [0x05,0x02,0x60,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +v_maxmin_f16_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 +// GFX11: [0x05,0x04,0x60,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_maxmin_f16_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x03,0x60,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_maxmin_f16_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x05,0x60,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] + +v_maxmin_f16_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x06,0x60,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] + +v_maxmin_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x87,0x60,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] + +v_maxmin_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x5e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x5e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_maxmin_f32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x5e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_maxmin_f32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x5e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_maxmin_f32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x5e,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_maxmin_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x5e,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_maxmin_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x5e,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_maxmin_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 +// GFX11: [0x05,0x01,0x5e,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +v_maxmin_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 +// GFX11: [0x05,0x02,0x5e,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +v_maxmin_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 +// GFX11: [0x05,0x04,0x5e,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_maxmin_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x03,0x5e,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_maxmin_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x05,0x5e,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] + +v_maxmin_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x06,0x5e,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] + +v_maxmin_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x87,0x5e,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] + +v_maxmin_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x64,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x64,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x64,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX11: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX11: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX11: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x64,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_maxmin_i32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_maxmin_i32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x64,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_maxmin_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x62,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x62,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x62,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX11: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX11: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX11: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x62,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_maxmin_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_maxmin_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x62,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_mbcnt_hi_u32_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x20,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_mbcnt_lo_u32_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x1f,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_med3_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x4f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_med3_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x4f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_med3_f16_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x4f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_med3_f16_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x4f,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_med3_f16_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x4f,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_med3_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x4f,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_med3_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x4f,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_med3_f16_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 +// GFX11: [0x05,0x01,0x4f,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +v_med3_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 +// GFX11: [0x05,0x02,0x4f,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +v_med3_f16_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 +// GFX11: [0x05,0x04,0x4f,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_med3_f16_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x03,0x4f,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_med3_f16_e64_dpp v5, -|v1|, v2, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x05,0x4f,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] + +v_med3_f16_e64_dpp v5, v1, -|v2|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x06,0x4f,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13] + +v_med3_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x87,0x4f,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30] + +v_med3_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x1f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_med3_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x1f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_med3_f32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x1f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_med3_f32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x1f,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_med3_f32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x1f,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_med3_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x1f,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_med3_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x1f,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_med3_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 +// GFX11: [0x05,0x01,0x1f,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +v_med3_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 +// GFX11: [0x05,0x02,0x1f,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +v_med3_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 +// GFX11: [0x05,0x04,0x1f,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_med3_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x03,0x1f,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_med3_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x05,0x1f,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] + +v_med3_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x06,0x1f,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] + +v_med3_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x87,0x1f,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] + +v_med3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_med3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_med3_i16_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_med3_i16_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX11: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_med3_i16_e64_dpp v5, v1, v2, v255 row_shl:1 +// GFX11: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] + +v_med3_i16_e64_dpp v5, v1, v2, s105 row_shl:15 +// GFX11: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x0f,0x01,0xff] + +v_med3_i16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 +// GFX11: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] + +v_med3_i16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 +// GFX11: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] + +v_med3_i16_e64_dpp v5, v1, v2, ttmp15 row_ror:1 +// GFX11: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xee,0x01,0x01,0x21,0x01,0xff] + +v_med3_i16_e64_dpp v5, v1, v2, exec_hi row_ror:15 +// GFX11: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] + +v_med3_i16_e64_dpp v5, v1, v2, exec_lo row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] + +v_med3_i16_e64_dpp v5, v1, v2, null row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +v_med3_i16_e64_dpp v5, v1, v2, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] + +v_med3_i16_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x50,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_med3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x20,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_med3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x20,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_med3_i32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x20,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_med3_i32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_med3_i32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_med3_i32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_med3_i32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_med3_i32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX11: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_med3_i32_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX11: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_med3_i32_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX11: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_med3_i32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_med3_i32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x20,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_med3_i32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_med3_i32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x20,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_med3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_med3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_med3_u16_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_med3_u16_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX11: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_med3_u16_e64_dpp v5, v1, v2, v255 row_shl:1 +// GFX11: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] + +v_med3_u16_e64_dpp v5, v1, v2, s105 row_shl:15 +// GFX11: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x0f,0x01,0xff] + +v_med3_u16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 +// GFX11: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] + +v_med3_u16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 +// GFX11: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] + +v_med3_u16_e64_dpp v5, v1, v2, ttmp15 row_ror:1 +// GFX11: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xee,0x01,0x01,0x21,0x01,0xff] + +v_med3_u16_e64_dpp v5, v1, v2, exec_hi row_ror:15 +// GFX11: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] + +v_med3_u16_e64_dpp v5, v1, v2, exec_lo row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] + +v_med3_u16_e64_dpp v5, v1, v2, null row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +v_med3_u16_e64_dpp v5, v1, v2, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] + +v_med3_u16_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x51,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_med3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x21,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_med3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x21,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_med3_u32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x21,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_med3_u32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_med3_u32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_med3_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_med3_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_med3_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX11: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_med3_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX11: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_med3_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX11: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_med3_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_med3_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x21,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_med3_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_med3_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x21,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_min3_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x49,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_min3_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x49,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_min3_f16_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x49,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_min3_f16_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x49,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_min3_f16_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x49,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_min3_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x49,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_min3_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x49,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_min3_f16_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 +// GFX11: [0x05,0x01,0x49,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +v_min3_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 +// GFX11: [0x05,0x02,0x49,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +v_min3_f16_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 +// GFX11: [0x05,0x04,0x49,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_min3_f16_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x03,0x49,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_min3_f16_e64_dpp v5, -|v1|, v2, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x05,0x49,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] + +v_min3_f16_e64_dpp v5, v1, -|v2|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x06,0x49,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13] + +v_min3_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x87,0x49,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30] + +v_min3_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x19,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_min3_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x19,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_min3_f32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x19,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_min3_f32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x19,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_min3_f32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x19,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_min3_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x19,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_min3_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x19,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_min3_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 +// GFX11: [0x05,0x01,0x19,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +v_min3_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 +// GFX11: [0x05,0x02,0x19,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +v_min3_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 +// GFX11: [0x05,0x04,0x19,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_min3_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x03,0x19,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_min3_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x05,0x19,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] + +v_min3_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x06,0x19,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] + +v_min3_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x87,0x19,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] + +v_min3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_min3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_min3_i16_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_min3_i16_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX11: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_min3_i16_e64_dpp v5, v1, v2, v255 row_shl:1 +// GFX11: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] + +v_min3_i16_e64_dpp v5, v1, v2, s105 row_shl:15 +// GFX11: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x0f,0x01,0xff] + +v_min3_i16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 +// GFX11: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] + +v_min3_i16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 +// GFX11: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] + +v_min3_i16_e64_dpp v5, v1, v2, ttmp15 row_ror:1 +// GFX11: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xee,0x01,0x01,0x21,0x01,0xff] + +v_min3_i16_e64_dpp v5, v1, v2, exec_hi row_ror:15 +// GFX11: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] + +v_min3_i16_e64_dpp v5, v1, v2, exec_lo row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] + +v_min3_i16_e64_dpp v5, v1, v2, null row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +v_min3_i16_e64_dpp v5, v1, v2, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] + +v_min3_i16_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x4a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_min3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_min3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_min3_i32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_min3_i32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_min3_i32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_min3_i32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_min3_i32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_min3_i32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX11: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_min3_i32_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX11: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_min3_i32_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX11: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_min3_i32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_min3_i32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_min3_i32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_min3_i32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x1a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_min3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_min3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_min3_u16_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_min3_u16_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX11: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_min3_u16_e64_dpp v5, v1, v2, v255 row_shl:1 +// GFX11: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] + +v_min3_u16_e64_dpp v5, v1, v2, s105 row_shl:15 +// GFX11: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x0f,0x01,0xff] + +v_min3_u16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 +// GFX11: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] + +v_min3_u16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 +// GFX11: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] + +v_min3_u16_e64_dpp v5, v1, v2, ttmp15 row_ror:1 +// GFX11: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xee,0x01,0x01,0x21,0x01,0xff] + +v_min3_u16_e64_dpp v5, v1, v2, exec_hi row_ror:15 +// GFX11: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] + +v_min3_u16_e64_dpp v5, v1, v2, exec_lo row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] + +v_min3_u16_e64_dpp v5, v1, v2, null row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +v_min3_u16_e64_dpp v5, v1, v2, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] + +v_min3_u16_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x4b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_min3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_min3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_min3_u32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_min3_u32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_min3_u32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_min3_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_min3_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_min3_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX11: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_min3_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX11: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_min3_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX11: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_min3_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_min3_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_min3_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_min3_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x1b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_min_i16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_min_i16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_min_i16_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_min_i16_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_min_i16_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_min_i16_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_min_i16_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_min_i16_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_min_i16_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_min_i16_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_min_i16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_min_i16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_min_i16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_min_i16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x0c,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_min_u16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_min_u16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_min_u16_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_min_u16_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_min_u16_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_min_u16_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_min_u16_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_min_u16_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_min_u16_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_min_u16_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_min_u16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_min_u16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_min_u16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_min_u16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x0b,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_minmax_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x61,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_minmax_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x61,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_minmax_f16_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x61,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_minmax_f16_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x61,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_minmax_f16_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x61,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_minmax_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x61,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_minmax_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x61,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_minmax_f16_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 +// GFX11: [0x05,0x01,0x61,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +v_minmax_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 +// GFX11: [0x05,0x02,0x61,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +v_minmax_f16_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 +// GFX11: [0x05,0x04,0x61,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_minmax_f16_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x03,0x61,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_minmax_f16_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x05,0x61,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] + +v_minmax_f16_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x06,0x61,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] + +v_minmax_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x87,0x61,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] + +v_minmax_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x5f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_minmax_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x5f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_minmax_f32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x5f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_minmax_f32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x5f,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_minmax_f32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x5f,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_minmax_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x5f,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_minmax_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x5f,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_minmax_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 +// GFX11: [0x05,0x01,0x5f,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +v_minmax_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 +// GFX11: [0x05,0x02,0x5f,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +v_minmax_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 +// GFX11: [0x05,0x04,0x5f,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_minmax_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x03,0x5f,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_minmax_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x05,0x5f,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] + +v_minmax_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x06,0x5f,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] + +v_minmax_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x87,0x5f,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] + +v_minmax_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x65,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_minmax_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x65,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_minmax_i32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x65,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_minmax_i32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_minmax_i32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_minmax_i32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_minmax_i32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_minmax_i32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX11: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_minmax_i32_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX11: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_minmax_i32_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX11: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_minmax_i32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_minmax_i32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x65,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_minmax_i32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_minmax_i32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x65,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_minmax_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_minmax_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_minmax_u32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_minmax_u32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_minmax_u32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_minmax_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_minmax_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_minmax_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX11: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_minmax_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX11: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_minmax_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX11: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_minmax_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_minmax_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_minmax_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_minmax_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x63,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_msad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x39,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_msad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x39,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_msad_u8_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x39,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_msad_u8_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_msad_u8_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_msad_u8_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_msad_u8_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_msad_u8_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX11: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_msad_u8_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX11: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_msad_u8_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX11: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_msad_u8_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_msad_u8_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x39,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_msad_u8_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_msad_u8_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x80,0x39,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_mul_lo_u16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_mul_lo_u16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_mul_lo_u16_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_mul_lo_u16_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_mul_lo_u16_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_mul_lo_u16_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_mul_lo_u16_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_mul_lo_u16_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_mul_lo_u16_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_mul_lo_u16_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_mul_lo_u16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_mul_lo_u16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_mul_lo_u16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_mul_lo_u16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x05,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_mullit_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x18,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mullit_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x18,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_mullit_f32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x18,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_mullit_f32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x18,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_mullit_f32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x18,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_mullit_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x18,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_mullit_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x18,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_mullit_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 +// GFX11: [0x05,0x01,0x18,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +v_mullit_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 +// GFX11: [0x05,0x02,0x18,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +v_mullit_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 +// GFX11: [0x05,0x04,0x18,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_mullit_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x03,0x18,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_mullit_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x05,0x18,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] + +v_mullit_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x06,0x18,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] + +v_mullit_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x87,0x18,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] + +v_or3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x58,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_or3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x58,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_or3_b32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x58,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_or3_b32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x58,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_or3_b32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x58,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_or3_b32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x58,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_or3_b32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x58,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_or3_b32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX11: [0x05,0x00,0x58,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_or3_b32_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX11: [0x05,0x00,0x58,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_or3_b32_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX11: [0x05,0x00,0x58,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_or3_b32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x58,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_or3_b32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x58,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_or3_b32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x58,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_or3_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x58,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_or_b16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_or_b16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_or_b16_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_or_b16_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_or_b16_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_or_b16_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_or_b16_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_or_b16_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_or_b16_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_or_b16_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_or_b16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_or_b16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_or_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_or_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x63,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_pack_b32_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_pack_b32_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_pack_b32_f16_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_pack_b32_f16_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_pack_b32_f16_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_pack_b32_f16_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_pack_b32_f16_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_pack_b32_f16_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_pack_b32_f16_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_pack_b32_f16_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_pack_b32_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_pack_b32_f16_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x01,0x11,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] + +v_pack_b32_f16_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x02,0x11,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] + +v_pack_b32_f16_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x03,0x11,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +v_perm_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x44,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_perm_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x44,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_perm_b32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x44,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_perm_b32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x44,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_perm_b32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x44,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_perm_b32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x44,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_perm_b32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x44,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_perm_b32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX11: [0x05,0x00,0x44,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_perm_b32_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX11: [0x05,0x00,0x44,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_perm_b32_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX11: [0x05,0x00,0x44,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_perm_b32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x44,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_perm_b32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x44,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_perm_b32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x44,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_perm_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x44,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_sad_hi_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x23,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_sad_hi_u8_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x23,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_sad_hi_u8_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x23,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_sad_hi_u8_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x23,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_sad_hi_u8_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x23,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_sad_hi_u8_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x23,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_sad_hi_u8_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x23,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_sad_hi_u8_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX11: [0x05,0x00,0x23,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_sad_hi_u8_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX11: [0x05,0x00,0x23,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_sad_hi_u8_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX11: [0x05,0x00,0x23,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_sad_hi_u8_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x23,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_sad_hi_u8_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x23,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_sad_hi_u8_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x23,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_sad_hi_u8_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x80,0x23,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_sad_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x24,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_sad_u16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x24,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_sad_u16_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x24,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_sad_u16_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_sad_u16_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_sad_u16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_sad_u16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_sad_u16_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX11: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_sad_u16_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX11: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_sad_u16_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX11: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_sad_u16_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_sad_u16_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x24,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_sad_u16_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_sad_u16_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x80,0x24,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_sad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x25,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_sad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x25,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_sad_u32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x25,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_sad_u32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_sad_u32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_sad_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_sad_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_sad_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX11: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_sad_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX11: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_sad_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX11: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_sad_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_sad_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x25,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_sad_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_sad_u32_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x80,0x25,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_sad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x22,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_sad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x22,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_sad_u8_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x22,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_sad_u8_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_sad_u8_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_sad_u8_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_sad_u8_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_sad_u8_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX11: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_sad_u8_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX11: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_sad_u8_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX11: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_sad_u8_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_sad_u8_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x22,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_sad_u8_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_sad_u8_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x80,0x22,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_sub_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] +// W32: [0x05,0x06,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[0,1,2,3] +// W32: [0x05,0x06,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_mirror +// W32: [0x05,0x06,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_half_mirror +// W32: [0x05,0x06,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_shl:1 +// W32: [0x05,0x06,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_shl:15 +// W32: [0x05,0x06,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_shr:1 +// W32: [0x05,0x06,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_shr:15 +// W32: [0x05,0x06,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_ror:1 +// W32: [0x05,0x06,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, s105, v1, v2 row_ror:15 +// W32: [0x05,0x69,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: [0x05,0x6a,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: [0x05,0x6b,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: [0x05,0x7b,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 quad_perm:[3,2,1,0] +// W64: [0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 quad_perm:[0,1,2,3] +// W64: [0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 row_mirror +// W64: [0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 row_half_mirror +// W64: [0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shl:1 +// W64: [0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shl:15 +// W64: [0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shr:1 +// W64: [0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shr:15 +// W64: [0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 row_ror:1 +// W64: [0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 row_ror:15 +// W64: [0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: [0x05,0x68,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: [0x05,0x6a,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: [0x05,0x7a,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v255, null, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0xfc,0x01,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_sub_nc_i16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_sub_nc_i16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_sub_nc_i16_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_sub_nc_i16_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_sub_nc_i16_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_sub_nc_i16_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_sub_nc_i16_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_sub_nc_i16_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_sub_nc_i16_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_sub_nc_i16_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_sub_nc_i16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_sub_nc_i16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_sub_nc_i16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_sub_nc_i16_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x80,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_sub_nc_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_sub_nc_i32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_sub_nc_i32_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_sub_nc_i32_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_sub_nc_i32_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_sub_nc_i32_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_sub_nc_i32_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_sub_nc_i32_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_sub_nc_i32_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_sub_nc_i32_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_sub_nc_i32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_sub_nc_i32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_sub_nc_i32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_sub_nc_i32_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x80,0x25,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_sub_nc_u16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_sub_nc_u16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_sub_nc_u16_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_sub_nc_u16_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_sub_nc_u16_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_sub_nc_u16_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_sub_nc_u16_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_sub_nc_u16_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_sub_nc_u16_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_sub_nc_u16_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_sub_nc_u16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_sub_nc_u16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_sub_nc_u16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_sub_nc_u16_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x80,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_subrev_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] +// W32: [0x05,0x06,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[0,1,2,3] +// W32: [0x05,0x06,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_mirror +// W32: [0x05,0x06,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_half_mirror +// W32: [0x05,0x06,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_shl:1 +// W32: [0x05,0x06,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_shl:15 +// W32: [0x05,0x06,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_shr:1 +// W32: [0x05,0x06,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_shr:15 +// W32: [0x05,0x06,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_ror:1 +// W32: [0x05,0x06,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, s105, v1, v2 row_ror:15 +// W32: [0x05,0x69,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: [0x05,0x6a,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: [0x05,0x6b,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: [0x05,0x7b,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 quad_perm:[3,2,1,0] +// W64: [0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 quad_perm:[0,1,2,3] +// W64: [0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 row_mirror +// W64: [0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 row_half_mirror +// W64: [0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shl:1 +// W64: [0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shl:15 +// W64: [0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shr:1 +// W64: [0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shr:15 +// W64: [0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 row_ror:1 +// W64: [0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 row_ror:15 +// W64: [0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: [0x05,0x68,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: [0x05,0x6a,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: [0x05,0x7a,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v255, null, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0xfc,0x02,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_xad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x45,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_xad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x45,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_xad_u32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x45,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_xad_u32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x45,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_xad_u32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x45,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_xad_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x45,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_xad_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x45,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_xad_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX11: [0x05,0x00,0x45,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_xad_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX11: [0x05,0x00,0x45,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_xad_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX11: [0x05,0x00,0x45,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_xad_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x45,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_xad_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x45,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_xad_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x45,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_xad_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x45,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_xor3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x40,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_xor3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x40,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_xor3_b32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x40,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_xor3_b32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x40,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_xor3_b32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x40,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_xor3_b32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x40,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_xor3_b32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x40,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_xor3_b32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX11: [0x05,0x00,0x40,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_xor3_b32_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX11: [0x05,0x00,0x40,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_xor3_b32_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX11: [0x05,0x00,0x40,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_xor3_b32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x40,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_xor3_b32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x40,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_xor3_b32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x40,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_xor3_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x40,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_xor_b16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_xor_b16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_xor_b16_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_xor_b16_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_xor_b16_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_xor_b16_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_xor_b16_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_xor_b16_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_xor_b16_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_xor_b16_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_xor_b16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_xor_b16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_xor_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_xor_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x64,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX11: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] + +v_add_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX11: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] + +v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX11: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] + +v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX11: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX11: [0x05,0x0a,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] + +v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX11: [0xff,0x13,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX11: [0x05,0x0a,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] + +v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX11: [0xff,0x13,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] + +v_div_fixup_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x7c,0x54,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_div_fixup_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x0b,0x54,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_div_fixup_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x15,0x54,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] + +v_div_fixup_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX11: [0x05,0x26,0x54,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13] + +v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX11: [0xff,0xc7,0x54,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] + +v_fma_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x7c,0x48,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_fma_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x0b,0x48,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_fma_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x15,0x48,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] + +v_fma_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX11: [0x05,0x26,0x48,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13] + +v_fma_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX11: [0xff,0xc7,0x48,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] + +v_mad_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x78,0x53,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] + +v_mad_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x08,0x53,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] + +v_mad_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x10,0x53,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +v_mad_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX11: [0x05,0x20,0x53,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] + +v_mad_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX11: [0xff,0xc0,0x53,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +v_mad_i32_i16_e64_dpp v5, v1, v2, 0.5 op_sel:[1,0,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX11: [0x05,0x08,0x5a,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] + +v_mad_i32_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,1,0,0] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX11: [0xff,0x90,0x5a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +v_mad_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x78,0x41,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] + +v_mad_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x08,0x41,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] + +v_mad_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x10,0x41,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +v_mad_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX11: [0x05,0x20,0x41,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] + +v_mad_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX11: [0xff,0xc0,0x41,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +v_mad_u32_u16_e64_dpp v5, v1, v2, 0.5 op_sel:[1,0,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX11: [0x05,0x08,0x59,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] + +v_mad_u32_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,1,0,0] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX11: [0xff,0x90,0x59,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +v_max3_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x7c,0x4c,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_max3_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x0b,0x4c,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_max3_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x15,0x4c,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] + +v_max3_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX11: [0x05,0x26,0x4c,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13] + +v_max3_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX11: [0xff,0xc7,0x4c,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] + +v_max3_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x78,0x4d,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] + +v_max3_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x08,0x4d,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] + +v_max3_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x10,0x4d,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +v_max3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX11: [0x05,0x20,0x4d,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] + +v_max3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX11: [0xff,0x40,0x4d,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +v_max3_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x78,0x4e,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] + +v_max3_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x08,0x4e,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] + +v_max3_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x10,0x4e,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +v_max3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX11: [0x05,0x20,0x4e,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] + +v_max3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX11: [0xff,0x40,0x4e,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +v_med3_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x7c,0x4f,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_med3_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x0b,0x4f,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_med3_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x15,0x4f,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] + +v_med3_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX11: [0x05,0x26,0x4f,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13] + +v_med3_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX11: [0xff,0xc7,0x4f,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] + +v_med3_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x78,0x50,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] + +v_med3_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x08,0x50,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] + +v_med3_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x10,0x50,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +v_med3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX11: [0x05,0x20,0x50,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] + +v_med3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX11: [0xff,0x40,0x50,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +v_med3_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x78,0x51,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] + +v_med3_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x08,0x51,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] + +v_med3_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x10,0x51,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +v_med3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX11: [0x05,0x20,0x51,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] + +v_med3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX11: [0xff,0x40,0x51,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +v_min3_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x7c,0x49,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_min3_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x0b,0x49,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_min3_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x15,0x49,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] + +v_min3_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX11: [0x05,0x26,0x49,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13] + +v_min3_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX11: [0xff,0xc7,0x49,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] + +v_min3_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x78,0x4a,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] + +v_min3_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x08,0x4a,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] + +v_min3_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x10,0x4a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +v_min3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX11: [0x05,0x20,0x4a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] + +v_min3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX11: [0xff,0x40,0x4a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +v_min3_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x78,0x4b,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] + +v_min3_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x08,0x4b,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] + +v_min3_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x10,0x4b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +v_min3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX11: [0x05,0x20,0x4b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] + +v_min3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX11: [0xff,0x40,0x4b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +v_pack_b32_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX11: [0x05,0x0a,0x11,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] + +v_pack_b32_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX11: [0xff,0x13,0x11,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] + +v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX11: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] + +v_sub_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX11: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] + +v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX11: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] + +v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX11: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] + +v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX11: encoding: [0x00,0x00,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] + +v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 op_sel:[1,1,0,0] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX11-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid op_sel operand + +v_dot2_f16_f16_e64_dpp v0, s1, v2, v3 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX11-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_dot2_f16_f16_e64_dpp v0, v1, s2, v3 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX11-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX11: encoding: [0x00,0x60,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] + +v_dot2_f16_f16_e64_dpp v0, |v1|, -v2, -|s3| op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX11: encoding: [0x00,0x65,0x66,0xd6,0xfa,0x04,0x0e,0xc0,0x01,0xe4,0x04,0x00] + +v_dot2_f16_f16_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0x05,0x00,0x66,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] + +v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX11: encoding: [0x00,0x00,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] + +v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 op_sel:[1,1,0,0] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX11-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid op_sel operand + +v_dot2_bf16_bf16_e64_dpp v0, s1, v2, v3 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// GFX11-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_dot2_bf16_bf16_e64_dpp v0, v1, s2, v3 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// GFX11-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX11: encoding: [0x00,0x60,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] + +v_dot2_bf16_bf16_e64_dpp v0, |v1|, -v2, -|s3| op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX11: encoding: [0x00,0x65,0x67,0xd6,0xfa,0x04,0x0e,0xc0,0x01,0xe4,0x04,0x00] + +v_dot2_bf16_bf16_e64_dpp v5, v1, v2, 0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0x05,0x00,0x67,0xd6,0xfa,0x04,0x02,0x02,0x01,0x1b,0x00,0xff] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s index 58fec38cf57fb..ceb8cac21f5aa 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s @@ -1,7 +1,7 @@ -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32 -show-encoding %s | FileCheck --check-prefixes=GFX11,W32 %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX11,W64 %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32 %s 2>&1 | FileCheck --check-prefixes=GFX11-ERR,W32-ERR --implicit-check-not=error: %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 %s 2>&1 | FileCheck --check-prefixes=GFX11-ERR,W64-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX11,W32 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX11,W64 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+real-true16 %s 2>&1 | FileCheck --check-prefixes=GFX11-ERR,W32-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 %s 2>&1 | FileCheck --check-prefixes=GFX11-ERR,W64-ERR --implicit-check-not=error: %s v_add3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX11: [0x05,0x00,0x55,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8-fake16.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8-fake16.s new file mode 100644 index 0000000000000..cf2a7ab7ef76c --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8-fake16.s @@ -0,0 +1,2968 @@ +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX11,W32 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX11,W64 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-real-true16 %s 2>&1 | FileCheck --check-prefixes=GFX11-ERR,W32-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,-real-true16 %s 2>&1 | FileCheck --check-prefixes=GFX11-ERR,W64-ERR --implicit-check-not=error: %s + +v_add3_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x55,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_add3_u32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x55,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_add3_u32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x55,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_add3_u32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x55,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_add3_u32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x55,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_add3_u32_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x55,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_add3_u32_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x55,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_add3_u32_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x55,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_add3_u32_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x55,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_add3_u32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x55,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_add3_u32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x55,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_add3_u32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x55,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_add_co_u32_e64_dpp v5, s6, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x06,0x00,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x69,0x00,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x6a,0x00,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x6b,0x00,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: [0x05,0x7b,0x00,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x05,0x0c,0x00,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x05,0x68,0x00,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x05,0x6a,0x00,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: [0x05,0x7a,0x00,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v255, null, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0xfc,0x00,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_add_lshl_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x47,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_add_lshl_u32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x47,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_add_lshl_u32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x47,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_add_lshl_u32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x47,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_add_lshl_u32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x47,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_add_lshl_u32_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x47,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_add_lshl_u32_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x47,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_add_lshl_u32_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x47,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_add_lshl_u32_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x47,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_add_lshl_u32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x47,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_add_lshl_u32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x47,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_add_lshl_u32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x47,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_add_nc_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_add_nc_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x0d,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_add_nc_i16_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x80,0x0d,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_add_nc_i32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x26,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_add_nc_i32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x26,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_add_nc_i32_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x80,0x26,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_add_nc_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_add_nc_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x03,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_add_nc_u16_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x80,0x03,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_alignbit_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x16,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_alignbit_b32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x16,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_alignbit_b32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x16,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_alignbit_b32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x16,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_alignbit_b32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x16,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_alignbit_b32_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x16,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_alignbit_b32_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x16,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_alignbit_b32_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x16,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_alignbit_b32_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x16,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_alignbit_b32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x16,0xd6,0xea,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_alignbit_b32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x16,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_alignbyte_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x17,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_alignbyte_b32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x17,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_alignbyte_b32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x17,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_alignbyte_b32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x17,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_alignbyte_b32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x17,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_alignbyte_b32_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x17,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_alignbyte_b32_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x17,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_alignbyte_b32_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x17,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_alignbyte_b32_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x17,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_alignbyte_b32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x17,0xd6,0xea,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_alignbyte_b32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x17,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_and_b16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x62,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_and_b16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x62,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_and_b16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x62,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_and_or_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x57,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_and_or_b32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x57,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_and_or_b32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x57,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_and_or_b32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x57,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_and_or_b32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x57,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_and_or_b32_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x57,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_and_or_b32_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x57,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_and_or_b32_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x57,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_and_or_b32_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x57,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_and_or_b32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x57,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_and_or_b32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x57,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_and_or_b32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x57,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_ashrrev_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x3a,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_ashrrev_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x3a,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_ashrrev_i16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x3a,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_bcnt_u32_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_bcnt_u32_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x1e,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_bcnt_u32_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x1e,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_bfe_i32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x11,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_bfe_i32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x11,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_bfe_i32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x11,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_bfe_i32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x11,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_bfe_i32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x11,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_bfe_i32_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x11,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_bfe_i32_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x11,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_bfe_i32_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x11,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_bfe_i32_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x11,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_bfe_i32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x11,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_bfe_i32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x11,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_bfe_i32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x11,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_bfe_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x10,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_bfe_u32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x10,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_bfe_u32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x10,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_bfe_u32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x10,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_bfe_u32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x10,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_bfe_u32_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x10,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_bfe_u32_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x10,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_bfe_u32_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x10,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_bfe_u32_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x10,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_bfe_u32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x10,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_bfe_u32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x10,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_bfe_u32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x10,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_bfi_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x12,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_bfi_b32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x12,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_bfi_b32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x12,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_bfi_b32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x12,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_bfi_b32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x12,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_bfi_b32_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x12,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_bfi_b32_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x12,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_bfi_b32_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x12,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_bfi_b32_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x12,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_bfi_b32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x12,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_bfi_b32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x12,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_bfi_b32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x12,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_bfm_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_bfm_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x1d,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_bfm_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x1d,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_cndmask_b16_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, |v1|, -v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x01,0x5d,0xd6,0xe9,0x04,0xaa,0x41,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, -v1, |v2|, ttmp15 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: [0x05,0x02,0x5d,0xd6,0xea,0x04,0xee,0x21,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, v2, s[104:105] dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, |v1|, -v2, vcc dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x05,0x01,0x5d,0xd6,0xe9,0x04,0xaa,0x41,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, -v1, |v2|, ttmp[14:15] dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: [0x05,0x02,0x5d,0xd6,0xea,0x04,0xea,0x21,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v255, -|v255|, -|v255|, null dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x03,0x5d,0xd6,0xe9,0xfe,0xf3,0x61,0xff,0x00,0x00,0x00] + +v_cubeid_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x0c,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_cubeid_f32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x0c,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_cubeid_f32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x0c,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_cubeid_f32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x0c,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_cubeid_f32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x0c,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_cubeid_f32_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x01,0x0c,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] + +v_cubeid_f32_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x02,0x0c,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] + +v_cubeid_f32_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x04,0x0c,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_cubeid_f32_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x03,0x0c,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_cubeid_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x05,0x0c,0xd6,0xe9,0x04,0x06,0xab,0x01,0x77,0x39,0x05] + +v_cubeid_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x06,0x0c,0xd6,0xea,0x04,0xc2,0xd3,0x01,0x77,0x39,0x05] + +v_cubeid_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x87,0x0c,0xd6,0xe9,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] + +v_cubema_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x0f,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_cubema_f32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x0f,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_cubema_f32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x0f,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_cubema_f32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x0f,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_cubema_f32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x0f,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_cubema_f32_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x01,0x0f,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] + +v_cubema_f32_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x02,0x0f,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] + +v_cubema_f32_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x04,0x0f,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_cubema_f32_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x03,0x0f,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_cubema_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x05,0x0f,0xd6,0xe9,0x04,0x06,0xab,0x01,0x77,0x39,0x05] + +v_cubema_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x06,0x0f,0xd6,0xea,0x04,0xc2,0xd3,0x01,0x77,0x39,0x05] + +v_cubema_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x87,0x0f,0xd6,0xe9,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] + +v_cubesc_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x0d,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_cubesc_f32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x0d,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_cubesc_f32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x0d,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_cubesc_f32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x0d,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_cubesc_f32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x0d,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_cubesc_f32_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x01,0x0d,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] + +v_cubesc_f32_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x02,0x0d,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] + +v_cubesc_f32_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x04,0x0d,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_cubesc_f32_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x03,0x0d,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_cubesc_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x05,0x0d,0xd6,0xe9,0x04,0x06,0xab,0x01,0x77,0x39,0x05] + +v_cubesc_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x06,0x0d,0xd6,0xea,0x04,0xc2,0xd3,0x01,0x77,0x39,0x05] + +v_cubesc_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x87,0x0d,0xd6,0xe9,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] + +v_cubetc_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x0e,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_cubetc_f32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x0e,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_cubetc_f32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x0e,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_cubetc_f32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x0e,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_cubetc_f32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x0e,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_cubetc_f32_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x01,0x0e,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] + +v_cubetc_f32_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x02,0x0e,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] + +v_cubetc_f32_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x04,0x0e,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_cubetc_f32_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x03,0x0e,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_cubetc_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x05,0x0e,0xd6,0xe9,0x04,0x06,0xab,0x01,0x77,0x39,0x05] + +v_cubetc_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x06,0x0e,0xd6,0xea,0x04,0xc2,0xd3,0x01,0x77,0x39,0x05] + +v_cubetc_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x87,0x0e,0xd6,0xe9,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] + +v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x06,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cvt_pk_i16_f32_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x01,0x06,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] + +v_cvt_pk_i16_f32_e64_dpp v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x02,0x06,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_cvt_pk_i16_f32_e64_dpp v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x03,0x06,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x24,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x24,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cvt_pk_i16_i32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x24,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x12,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x01,0x12,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x02,0x12,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x03,0x12,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x13,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x01,0x13,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x02,0x13,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x03,0x13,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x07,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cvt_pk_u16_f32_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x01,0x07,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] + +v_cvt_pk_u16_f32_e64_dpp v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x02,0x07,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_cvt_pk_u16_f32_e64_dpp v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x03,0x07,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x23,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x23,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cvt_pk_u16_u32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x23,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x26,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x26,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x26,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x26,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x26,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x26,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x26,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x26,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x26,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x26,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x26,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_cvt_pk_u8_f32_e64_dpp v255, -|v255|, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x01,0x26,0xd6,0xe9,0xfe,0xf7,0x23,0xff,0x00,0x00,0x00] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x12,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x01,0x12,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x02,0x12,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x03,0x12,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x21,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x01,0x21,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x02,0x21,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_cvt_pk_norm_i16_f32_e64_dpp v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x03,0x21,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x13,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x01,0x13,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x02,0x13,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x03,0x13,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x22,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x01,0x22,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x02,0x22,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_cvt_pk_norm_u16_f32_e64_dpp v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x03,0x22,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_div_fixup_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x54,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_div_fixup_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_div_fixup_f16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_div_fixup_f16_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x01,0x54,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] + +v_div_fixup_f16_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x02,0x54,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] + +v_div_fixup_f16_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x04,0x54,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_div_fixup_f16_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x03,0x54,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_div_fixup_f16_e64_dpp v5, -|v1|, v2, -|-1| dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x05,0x54,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] + +v_div_fixup_f16_e64_dpp v5, v1, -|v2|, -|0.5| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x06,0x54,0xd6,0xea,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] + +v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x87,0x54,0xd6,0xe9,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] + +v_fma_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x48,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_fma_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x48,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_fma_f16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x48,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_fma_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x48,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_fma_f16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x48,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_fma_f16_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x01,0x48,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] + +v_fma_f16_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x02,0x48,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] + +v_fma_f16_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x04,0x48,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_fma_f16_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x03,0x48,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_fma_f16_e64_dpp v5, -|v1|, v2, -|-1| dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x05,0x48,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] + +v_fma_f16_e64_dpp v5, v1, -|v2|, -|0.5| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x06,0x48,0xd6,0xea,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] + +v_fma_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x87,0x48,0xd6,0xe9,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] + +v_fma_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x13,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_fma_f32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x13,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_fma_f32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x13,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_fma_f32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x13,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_fma_f32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x13,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_fma_f32_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x01,0x13,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] + +v_fma_f32_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x02,0x13,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] + +v_fma_f32_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x04,0x13,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_fma_f32_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x03,0x13,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_fma_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x05,0x13,0xd6,0xe9,0x04,0x06,0xab,0x01,0x77,0x39,0x05] + +v_fma_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x06,0x13,0xd6,0xea,0x04,0xc2,0xd3,0x01,0x77,0x39,0x05] + +v_fma_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x87,0x13,0xd6,0xe9,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] + +v_ldexp_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1c,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_ldexp_f32_e64_dpp v5, v1, v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1c,0xd7,0xe9,0x04,0x02,0x08,0x01,0x77,0x39,0x05] + +v_ldexp_f32_e64_dpp v5, v1, v2 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x1c,0xd7,0xea,0x04,0x02,0x10,0x01,0x77,0x39,0x05] + +v_ldexp_f32_e64_dpp v255, -|v255|, v255 clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x81,0x1c,0xd7,0xe9,0xfe,0x03,0x38,0xff,0x00,0x00,0x00] + +v_lerp_u8_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x15,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_lerp_u8_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x15,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_lerp_u8_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x15,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_lerp_u8_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x15,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_lerp_u8_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x15,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_lerp_u8_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x15,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_lerp_u8_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x15,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_lerp_u8_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x15,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_lerp_u8_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x15,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_lerp_u8_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x15,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_lerp_u8_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x15,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_lerp_u8_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x15,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_lshl_add_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x46,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_lshl_add_u32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x46,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_lshl_add_u32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x46,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_lshl_add_u32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x46,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_lshl_add_u32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x46,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_lshl_add_u32_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x46,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_lshl_add_u32_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x46,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_lshl_add_u32_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x46,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_lshl_add_u32_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x46,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_lshl_add_u32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x46,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_lshl_add_u32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x46,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_lshl_add_u32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x46,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_lshl_or_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x56,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_lshl_or_b32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x56,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_lshl_or_b32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x56,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_lshl_or_b32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x56,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_lshl_or_b32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x56,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_lshl_or_b32_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x56,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_lshl_or_b32_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x56,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_lshl_or_b32_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x56,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_lshl_or_b32_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x56,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_lshl_or_b32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x56,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_lshl_or_b32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x56,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_lshl_or_b32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x56,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_lshlrev_b16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x38,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_lshlrev_b16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x38,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_lshlrev_b16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x38,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_lshrrev_b16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x39,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_lshrrev_b16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x39,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_lshrrev_b16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x39,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_mad_i16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x53,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_mad_i16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_mad_i16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_mad_i16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_mad_i16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_mad_i16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_mad_i16_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_mad_i16_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_mad_i16_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_mad_i16_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x53,0xd6,0xea,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_mad_i16_e64_dpp v255, v255, v255, src_scc clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x80,0x53,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_mad_i32_i16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x5a,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_mad_i32_i16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x5a,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_mad_i32_i16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x5a,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_mad_i32_i16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x5a,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_mad_i32_i16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x5a,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_mad_i32_i16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x5a,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_mad_i32_i16_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x5a,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_mad_i32_i16_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x5a,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_mad_i32_i16_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x5a,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_mad_i32_i16_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x5a,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_mad_i32_i16_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x5a,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_mad_i32_i16_e64_dpp v255, v255, v255, src_scc clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x80,0x5a,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_mad_i32_i24_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x0a,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_mad_i32_i24_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x0a,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_mad_i32_i24_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x0a,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_mad_i32_i24_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x0a,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_mad_i32_i24_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x0a,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_mad_i32_i24_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x0a,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_mad_i32_i24_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x0a,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_mad_i32_i24_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x0a,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_mad_i32_i24_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x0a,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_mad_i32_i24_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x0a,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_mad_i32_i24_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x0a,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_mad_i32_i24_e64_dpp v255, v255, v255, src_scc clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x80,0x0a,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_mad_u16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x41,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_mad_u16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_mad_u16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_mad_u16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_mad_u16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_mad_u16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_mad_u16_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_mad_u16_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_mad_u16_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_mad_u16_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x41,0xd6,0xea,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_mad_u16_e64_dpp v255, v255, v255, src_scc clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x80,0x41,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_mad_u32_u16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x59,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_mad_u32_u16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x59,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_mad_u32_u16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x59,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_mad_u32_u16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x59,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_mad_u32_u16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x59,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_mad_u32_u16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x59,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_mad_u32_u16_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x59,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_mad_u32_u16_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x59,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_mad_u32_u16_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x59,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_mad_u32_u16_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x59,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_mad_u32_u16_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x59,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_mad_u32_u16_e64_dpp v255, v255, v255, src_scc clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x80,0x59,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_mad_u32_u24_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x0b,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_mad_u32_u24_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x0b,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_mad_u32_u24_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x0b,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_mad_u32_u24_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x0b,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_mad_u32_u24_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x0b,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_mad_u32_u24_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x0b,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_mad_u32_u24_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x0b,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_mad_u32_u24_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x0b,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_mad_u32_u24_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x0b,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_mad_u32_u24_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x0b,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_mad_u32_u24_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x0b,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_mad_u32_u24_e64_dpp v255, v255, v255, src_scc clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x80,0x0b,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_max3_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x4c,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_max3_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x4c,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_max3_f16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x4c,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_max3_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x4c,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_max3_f16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x4c,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_max3_f16_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x01,0x4c,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] + +v_max3_f16_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x02,0x4c,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] + +v_max3_f16_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x04,0x4c,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_max3_f16_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x03,0x4c,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_max3_f16_e64_dpp v5, -|v1|, v2, -|-1| dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x05,0x4c,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] + +v_max3_f16_e64_dpp v5, v1, -|v2|, -|0.5| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x06,0x4c,0xd6,0xea,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] + +v_max3_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x87,0x4c,0xd6,0xe9,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] + +v_max3_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1c,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_max3_f32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1c,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_max3_f32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1c,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_max3_f32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1c,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_max3_f32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1c,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_max3_f32_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x01,0x1c,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] + +v_max3_f32_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x02,0x1c,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] + +v_max3_f32_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x04,0x1c,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_max3_f32_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x03,0x1c,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_max3_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x05,0x1c,0xd6,0xe9,0x04,0x06,0xab,0x01,0x77,0x39,0x05] + +v_max3_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x06,0x1c,0xd6,0xea,0x04,0xc2,0xd3,0x01,0x77,0x39,0x05] + +v_max3_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x87,0x1c,0xd6,0xe9,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] + +v_max3_i16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_max3_i16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_max3_i16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_max3_i16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_max3_i16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_max3_i16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_max3_i16_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_max3_i16_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_max3_i16_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_max3_i16_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x4d,0xd6,0xea,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_max3_i16_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x4d,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_max3_i32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1d,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_max3_i32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1d,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_max3_i32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1d,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_max3_i32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1d,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_max3_i32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1d,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_max3_i32_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1d,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_max3_i32_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1d,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_max3_i32_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1d,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_max3_i32_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1d,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_max3_i32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1d,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_max3_i32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x1d,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_max3_i32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x1d,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_max3_u16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_max3_u16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_max3_u16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_max3_u16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_max3_u16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_max3_u16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_max3_u16_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_max3_u16_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_max3_u16_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_max3_u16_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x4e,0xd6,0xea,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_max3_u16_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x4e,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_max3_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1e,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_max3_u32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1e,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_max3_u32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1e,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_max3_u32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1e,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_max3_u32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1e,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_max3_u32_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1e,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_max3_u32_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1e,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_max3_u32_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1e,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_max3_u32_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1e,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_max3_u32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1e,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_max3_u32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x1e,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_max3_u32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x1e,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_max_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x0a,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_max_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x0a,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_max_i16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x0a,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_max_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x09,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_max_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x09,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_max_u16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x09,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_maxmin_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x60,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_maxmin_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x60,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_maxmin_f16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x60,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_maxmin_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x60,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_maxmin_f16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x60,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_maxmin_f16_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x01,0x60,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] + +v_maxmin_f16_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x02,0x60,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] + +v_maxmin_f16_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x04,0x60,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_maxmin_f16_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x03,0x60,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_maxmin_f16_e64_dpp v5, -|v1|, v2, -|-1| mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x05,0x60,0xd6,0xe9,0x04,0x06,0xab,0x01,0x77,0x39,0x05] + +v_maxmin_f16_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x06,0x60,0xd6,0xea,0x04,0xc2,0xd3,0x01,0x77,0x39,0x05] + +v_maxmin_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x87,0x60,0xd6,0xe9,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] + +v_maxmin_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x5e,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_maxmin_f32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x5e,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_maxmin_f32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x5e,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_maxmin_f32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x5e,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_maxmin_f32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x5e,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_maxmin_f32_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x01,0x5e,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] + +v_maxmin_f32_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x02,0x5e,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] + +v_maxmin_f32_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x04,0x5e,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_maxmin_f32_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x03,0x5e,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_maxmin_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x05,0x5e,0xd6,0xe9,0x04,0x06,0xab,0x01,0x77,0x39,0x05] + +v_maxmin_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x06,0x5e,0xd6,0xea,0x04,0xc2,0xd3,0x01,0x77,0x39,0x05] + +v_maxmin_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x87,0x5e,0xd6,0xe9,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] + +v_maxmin_i32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x64,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_maxmin_i32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x64,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_maxmin_i32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x64,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_maxmin_i32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x64,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_maxmin_i32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x64,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_maxmin_i32_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x64,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_maxmin_i32_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x64,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_maxmin_i32_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x64,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_maxmin_i32_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x64,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_maxmin_i32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x64,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_maxmin_i32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x64,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_maxmin_i32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x64,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_maxmin_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x62,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_maxmin_u32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x62,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_maxmin_u32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x62,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_maxmin_u32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x62,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_maxmin_u32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x62,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_maxmin_u32_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x62,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_maxmin_u32_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x62,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_maxmin_u32_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x62,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_maxmin_u32_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x62,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_maxmin_u32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x62,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_maxmin_u32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x62,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_maxmin_u32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x62,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x20,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x20,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_mbcnt_hi_u32_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x20,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1f,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x1f,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_mbcnt_lo_u32_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x1f,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_med3_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x4f,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_med3_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x4f,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_med3_f16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x4f,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_med3_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x4f,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_med3_f16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x4f,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_med3_f16_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x01,0x4f,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] + +v_med3_f16_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x02,0x4f,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] + +v_med3_f16_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x04,0x4f,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_med3_f16_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x03,0x4f,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_med3_f16_e64_dpp v5, -|v1|, v2, -|-1| dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x05,0x4f,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] + +v_med3_f16_e64_dpp v5, v1, -|v2|, -|0.5| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x06,0x4f,0xd6,0xea,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] + +v_med3_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x87,0x4f,0xd6,0xe9,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] + +v_med3_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1f,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_med3_f32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1f,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_med3_f32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1f,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_med3_f32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1f,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_med3_f32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1f,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_med3_f32_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x01,0x1f,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] + +v_med3_f32_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x02,0x1f,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] + +v_med3_f32_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x04,0x1f,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_med3_f32_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x03,0x1f,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_med3_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x05,0x1f,0xd6,0xe9,0x04,0x06,0xab,0x01,0x77,0x39,0x05] + +v_med3_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x06,0x1f,0xd6,0xea,0x04,0xc2,0xd3,0x01,0x77,0x39,0x05] + +v_med3_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x87,0x1f,0xd6,0xe9,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] + +v_med3_i16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x50,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_med3_i16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_med3_i16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_med3_i16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_med3_i16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_med3_i16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_med3_i16_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_med3_i16_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_med3_i16_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_med3_i16_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x50,0xd6,0xea,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_med3_i16_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x50,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_med3_i32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x20,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_med3_i32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x20,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_med3_i32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x20,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_med3_i32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x20,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_med3_i32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x20,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_med3_i32_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x20,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_med3_i32_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x20,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_med3_i32_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x20,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_med3_i32_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x20,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_med3_i32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x20,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_med3_i32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x20,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_med3_i32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x20,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_med3_u16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x51,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_med3_u16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_med3_u16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_med3_u16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_med3_u16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_med3_u16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_med3_u16_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_med3_u16_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_med3_u16_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_med3_u16_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x51,0xd6,0xea,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_med3_u16_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x51,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_med3_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x21,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_med3_u32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x21,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_med3_u32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x21,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_med3_u32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x21,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_med3_u32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x21,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_med3_u32_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x21,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_med3_u32_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x21,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_med3_u32_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x21,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_med3_u32_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x21,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_med3_u32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x21,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_med3_u32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x21,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_med3_u32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x21,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_min3_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x49,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_min3_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x49,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_min3_f16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x49,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_min3_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x49,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_min3_f16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x49,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_min3_f16_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x01,0x49,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] + +v_min3_f16_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x02,0x49,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] + +v_min3_f16_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x04,0x49,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_min3_f16_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x03,0x49,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_min3_f16_e64_dpp v5, -|v1|, v2, -|-1| dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x05,0x49,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] + +v_min3_f16_e64_dpp v5, v1, -|v2|, -|0.5| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x06,0x49,0xd6,0xea,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] + +v_min3_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x87,0x49,0xd6,0xe9,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] + +v_min3_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x19,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_min3_f32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x19,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_min3_f32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x19,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_min3_f32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x19,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_min3_f32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x19,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_min3_f32_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x01,0x19,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] + +v_min3_f32_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x02,0x19,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] + +v_min3_f32_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x04,0x19,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_min3_f32_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x03,0x19,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_min3_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x05,0x19,0xd6,0xe9,0x04,0x06,0xab,0x01,0x77,0x39,0x05] + +v_min3_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x06,0x19,0xd6,0xea,0x04,0xc2,0xd3,0x01,0x77,0x39,0x05] + +v_min3_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x87,0x19,0xd6,0xe9,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] + +v_min3_i16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_min3_i16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_min3_i16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_min3_i16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_min3_i16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_min3_i16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_min3_i16_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_min3_i16_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_min3_i16_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_min3_i16_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x4a,0xd6,0xea,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_min3_i16_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x4a,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_min3_i32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1a,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_min3_i32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1a,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_min3_i32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1a,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_min3_i32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1a,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_min3_i32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1a,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_min3_i32_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1a,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_min3_i32_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1a,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_min3_i32_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1a,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_min3_i32_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1a,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_min3_i32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1a,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_min3_i32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x1a,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_min3_i32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x1a,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_min3_u16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_min3_u16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_min3_u16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_min3_u16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_min3_u16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_min3_u16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_min3_u16_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_min3_u16_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_min3_u16_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_min3_u16_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x4b,0xd6,0xea,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_min3_u16_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x4b,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_min3_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1b,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_min3_u32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1b,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_min3_u32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1b,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_min3_u32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1b,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_min3_u32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1b,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_min3_u32_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1b,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_min3_u32_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1b,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_min3_u32_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1b,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_min3_u32_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1b,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_min3_u32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1b,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_min3_u32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x1b,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_min3_u32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x1b,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_min_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x0c,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_min_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x0c,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_min_i16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x0c,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_min_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x0b,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_min_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x0b,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_min_u16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x0b,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_minmax_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x61,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_minmax_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x61,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_minmax_f16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x61,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_minmax_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x61,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_minmax_f16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x61,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_minmax_f16_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x01,0x61,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] + +v_minmax_f16_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x02,0x61,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] + +v_minmax_f16_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x04,0x61,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_minmax_f16_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x03,0x61,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_minmax_f16_e64_dpp v5, -|v1|, v2, -|-1| mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x05,0x61,0xd6,0xe9,0x04,0x06,0xab,0x01,0x77,0x39,0x05] + +v_minmax_f16_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x06,0x61,0xd6,0xea,0x04,0xc2,0xd3,0x01,0x77,0x39,0x05] + +v_minmax_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x87,0x61,0xd6,0xe9,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] + +v_minmax_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x5f,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_minmax_f32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x5f,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_minmax_f32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x5f,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_minmax_f32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x5f,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_minmax_f32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x5f,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_minmax_f32_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x01,0x5f,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] + +v_minmax_f32_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x02,0x5f,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] + +v_minmax_f32_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x04,0x5f,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_minmax_f32_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x03,0x5f,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_minmax_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x05,0x5f,0xd6,0xe9,0x04,0x06,0xab,0x01,0x77,0x39,0x05] + +v_minmax_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x06,0x5f,0xd6,0xea,0x04,0xc2,0xd3,0x01,0x77,0x39,0x05] + +v_minmax_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x87,0x5f,0xd6,0xe9,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] + +v_minmax_i32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x65,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_minmax_i32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x65,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_minmax_i32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x65,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_minmax_i32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x65,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_minmax_i32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x65,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_minmax_i32_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x65,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_minmax_i32_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x65,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_minmax_i32_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x65,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_minmax_i32_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x65,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_minmax_i32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x65,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_minmax_i32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x65,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_minmax_i32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x65,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_minmax_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x63,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_minmax_u32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x63,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_minmax_u32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x63,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_minmax_u32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x63,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_minmax_u32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x63,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_minmax_u32_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x63,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_minmax_u32_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x63,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_minmax_u32_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x63,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_minmax_u32_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x63,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_minmax_u32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x63,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_minmax_u32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x63,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_minmax_u32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x63,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_msad_u8_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x39,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_msad_u8_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x39,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_msad_u8_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x39,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_msad_u8_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x39,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_msad_u8_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x39,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_msad_u8_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x39,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_msad_u8_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x39,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_msad_u8_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x39,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_msad_u8_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x39,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_msad_u8_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x39,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_msad_u8_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x39,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_msad_u8_e64_dpp v255, v255, v255, src_scc clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x80,0x39,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_mul_lo_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x05,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_mul_lo_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x05,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_mul_lo_u16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x05,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_mullit_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x18,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_mullit_f32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x18,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_mullit_f32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x18,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_mullit_f32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x18,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_mullit_f32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x18,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_mullit_f32_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x01,0x18,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] + +v_mullit_f32_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x02,0x18,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] + +v_mullit_f32_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x04,0x18,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_mullit_f32_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x03,0x18,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_mullit_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x05,0x18,0xd6,0xe9,0x04,0x06,0xab,0x01,0x77,0x39,0x05] + +v_mullit_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x06,0x18,0xd6,0xea,0x04,0xc2,0xd3,0x01,0x77,0x39,0x05] + +v_mullit_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x87,0x18,0xd6,0xe9,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] + +v_or3_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x58,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_or3_b32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x58,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_or3_b32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x58,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_or3_b32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x58,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_or3_b32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x58,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_or3_b32_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x58,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_or3_b32_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x58,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_or3_b32_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x58,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_or3_b32_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x58,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_or3_b32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x58,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_or3_b32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x58,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_or3_b32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x58,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_or_b16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x63,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_or_b16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x63,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_or_b16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x63,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_pack_b32_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x11,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_pack_b32_f16_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x01,0x11,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] + +v_pack_b32_f16_e64_dpp v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x02,0x11,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_pack_b32_f16_e64_dpp v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x03,0x11,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_perm_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x44,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_perm_b32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x44,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_perm_b32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x44,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_perm_b32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x44,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_perm_b32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x44,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_perm_b32_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x44,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_perm_b32_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x44,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_perm_b32_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x44,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_perm_b32_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x44,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_perm_b32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x44,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_perm_b32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x44,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_perm_b32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x44,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_sad_hi_u8_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x23,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_sad_hi_u8_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x23,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_sad_hi_u8_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x23,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_sad_hi_u8_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x23,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_sad_hi_u8_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x23,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_sad_hi_u8_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x23,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_sad_hi_u8_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x23,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_sad_hi_u8_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x23,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_sad_hi_u8_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x23,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_sad_hi_u8_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x23,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_sad_hi_u8_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x23,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_sad_hi_u8_e64_dpp v255, v255, v255, src_scc clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x80,0x23,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_sad_u16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x24,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_sad_u16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x24,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_sad_u16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x24,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_sad_u16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x24,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_sad_u16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x24,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_sad_u16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x24,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_sad_u16_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x24,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_sad_u16_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x24,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_sad_u16_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x24,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_sad_u16_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x24,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_sad_u16_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x24,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_sad_u16_e64_dpp v255, v255, v255, src_scc clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x80,0x24,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_sad_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x25,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_sad_u32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x25,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_sad_u32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x25,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_sad_u32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x25,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_sad_u32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x25,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_sad_u32_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x25,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_sad_u32_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x25,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_sad_u32_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x25,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_sad_u32_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x25,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_sad_u32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x25,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_sad_u32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x25,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_sad_u32_e64_dpp v255, v255, v255, src_scc clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x80,0x25,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_sad_u8_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x22,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_sad_u8_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x22,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_sad_u8_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x22,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_sad_u8_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x22,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_sad_u8_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x22,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_sad_u8_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x22,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_sad_u8_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x22,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_sad_u8_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x22,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_sad_u8_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x22,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_sad_u8_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x22,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_sad_u8_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x22,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_sad_u8_e64_dpp v255, v255, v255, src_scc clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x80,0x22,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_sub_co_u32_e64_dpp v5, s6, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x06,0x01,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x69,0x01,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x6a,0x01,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x6b,0x01,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: [0x05,0x7b,0x01,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x05,0x0c,0x01,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x05,0x68,0x01,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x05,0x6a,0x01,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: [0x05,0x7a,0x01,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v255, null, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0xfc,0x01,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_sub_nc_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_sub_nc_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x0e,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_sub_nc_i16_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x80,0x0e,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_sub_nc_i32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x25,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_sub_nc_i32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x25,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_sub_nc_i32_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x80,0x25,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_sub_nc_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_sub_nc_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x04,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_sub_nc_u16_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x80,0x04,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_subrev_co_u32_e64_dpp v5, s6, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x06,0x02,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x69,0x02,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x6a,0x02,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x6b,0x02,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: [0x05,0x7b,0x02,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x05,0x0c,0x02,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x05,0x68,0x02,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x05,0x6a,0x02,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: [0x05,0x7a,0x02,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v255, null, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0xfc,0x02,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_xad_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x45,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_xad_u32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x45,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_xad_u32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x45,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_xad_u32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x45,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_xad_u32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x45,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_xad_u32_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x45,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_xad_u32_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x45,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_xad_u32_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x45,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_xad_u32_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x45,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_xad_u32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x45,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_xad_u32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x45,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_xad_u32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x45,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_xor3_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x40,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_xor3_b32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x40,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_xor3_b32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x40,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_xor3_b32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x40,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_xor3_b32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x40,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_xor3_b32_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x40,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_xor3_b32_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x40,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_xor3_b32_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x40,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_xor3_b32_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x40,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_xor3_b32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x40,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_xor3_b32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x40,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_xor3_b32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x40,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_xor_b16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x64,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_xor_b16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x64,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_xor_b16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x64,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x10,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_add_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX11: [0xff,0xc0,0x0d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x10,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX11: [0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x0a,0x12,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX11: [0xff,0x13,0x12,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x0a,0x13,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX11: [0xff,0x13,0x13,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_div_fixup_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x7c,0x54,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_div_fixup_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x0b,0x54,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_div_fixup_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x15,0x54,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] + +v_div_fixup_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x26,0x54,0xd6,0xe9,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] + +v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX11: [0xff,0xc7,0x54,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] + +v_fma_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x7c,0x48,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_fma_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x0b,0x48,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_fma_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x15,0x48,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] + +v_fma_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x26,0x48,0xd6,0xe9,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] + +v_fma_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX11: [0xff,0xc7,0x48,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] + +v_mad_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x78,0x53,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_mad_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x08,0x53,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_mad_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x10,0x53,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_mad_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x20,0x53,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_mad_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX11: [0xff,0xc0,0x53,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_mad_i32_i16_e64_dpp v5, v1, v2, 0.5 op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x08,0x5a,0xd6,0xe9,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_mad_i32_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,1,0,0] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX11: [0xff,0x90,0x5a,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_mad_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x78,0x41,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_mad_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x08,0x41,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_mad_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x10,0x41,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_mad_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x20,0x41,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_mad_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX11: [0xff,0xc0,0x41,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_mad_u32_u16_e64_dpp v5, v1, v2, 0.5 op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x08,0x59,0xd6,0xe9,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_mad_u32_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,1,0,0] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX11: [0xff,0x90,0x59,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_max3_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x7c,0x4c,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_max3_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x0b,0x4c,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_max3_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x15,0x4c,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] + +v_max3_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x26,0x4c,0xd6,0xe9,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] + +v_max3_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX11: [0xff,0xc7,0x4c,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] + +v_max3_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x78,0x4d,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_max3_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x08,0x4d,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_max3_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x10,0x4d,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_max3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x20,0x4d,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_max3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX11: [0xff,0x40,0x4d,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_max3_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x78,0x4e,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_max3_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x08,0x4e,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_max3_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x10,0x4e,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_max3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x20,0x4e,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_max3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX11: [0xff,0x40,0x4e,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_med3_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x7c,0x4f,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_med3_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x0b,0x4f,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_med3_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x15,0x4f,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] + +v_med3_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x26,0x4f,0xd6,0xe9,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] + +v_med3_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX11: [0xff,0xc7,0x4f,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] + +v_med3_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x78,0x50,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_med3_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x08,0x50,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_med3_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x10,0x50,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_med3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x20,0x50,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_med3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX11: [0xff,0x40,0x50,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_med3_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x78,0x51,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_med3_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x08,0x51,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_med3_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x10,0x51,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_med3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x20,0x51,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_med3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX11: [0xff,0x40,0x51,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_min3_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x7c,0x49,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_min3_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x0b,0x49,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_min3_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x15,0x49,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] + +v_min3_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x26,0x49,0xd6,0xe9,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] + +v_min3_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX11: [0xff,0xc7,0x49,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] + +v_min3_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x78,0x4a,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_min3_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x08,0x4a,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_min3_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x10,0x4a,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_min3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x20,0x4a,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_min3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX11: [0xff,0x40,0x4a,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_min3_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x78,0x4b,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_min3_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x08,0x4b,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_min3_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x10,0x4b,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_min3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x20,0x4b,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_min3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX11: [0xff,0x40,0x4b,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_pack_b32_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x0a,0x11,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_pack_b32_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX11: [0xff,0x13,0x11,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x10,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_sub_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX11: [0xff,0xc0,0x0e,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x10,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX11: [0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 dpp8:[0,1,2,3,4,4,4,4] +// GFX11: encoding: [0x00,0x00,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92] + +v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 op_sel:[1,1,0,0] dpp8:[0,1,2,3,4,4,4,4] +// GFX11-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid op_sel operand + +v_dot2_f16_f16_e64_dpp v0, s1, v2, v3 dpp8:[0,1,2,3,4,4,4,4] +// GFX11-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_dot2_f16_f16_e64_dpp v0, v1, s2, v3 dpp8:[0,1,2,3,4,4,4,4] +// GFX11-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] +// GFX11: encoding: [0x00,0x60,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92] + +v_dot2_f16_f16_e64_dpp v0, |v1|, -v2, -|s3| op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] +// GFX11: encoding: [0x00,0x65,0x66,0xd6,0xe9,0x04,0x0e,0xc0,0x01,0x88,0x46,0x92] + +v_dot2_f16_f16_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0x05,0x00,0x66,0xd6,0xe9,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 dpp8:[0,1,2,3,4,4,4,4] +// GFX11: encoding: [0x00,0x00,0x67,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92] + +v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 op_sel:[1,1,0,0] dpp8:[0,1,2,3,4,4,4,4] +// GFX11-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid op_sel operand + +v_dot2_bf16_bf16_e64_dpp v0, s1, v2, v3 dpp8:[0,1,2,3,4,4,4,4] +// GFX11-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_dot2_bf16_bf16_e64_dpp v0, v1, s2, v3 dpp8:[0,1,2,3,4,4,4,4] +// GFX11-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] +// GFX11: encoding: [0x00,0x60,0x67,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92] + +v_dot2_bf16_bf16_e64_dpp v0, |v1|, -v2, -|s3| op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] +// GFX11: encoding: [0x00,0x65,0x67,0xd6,0xe9,0x04,0x0e,0xc0,0x01,0x88,0x46,0x92] + +v_dot2_bf16_bf16_e64_dpp v5, v1, v2, 0 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0x05,0x00,0x67,0xd6,0xe9,0x04,0x02,0x02,0x01,0x77,0x39,0x05] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s index 2fb95663a2f85..446c08347b3a2 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s @@ -1,7 +1,7 @@ -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32 -show-encoding %s | FileCheck --check-prefixes=GFX11,W32 %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX11,W64 %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32 %s 2>&1 | FileCheck --check-prefixes=GFX11-ERR,W32-ERR --implicit-check-not=error: %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 %s 2>&1 | FileCheck --check-prefixes=GFX11-ERR,W64-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX11,W32 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX11,W64 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+real-true16 %s 2>&1 | FileCheck --check-prefixes=GFX11-ERR,W32-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 %s 2>&1 | FileCheck --check-prefixes=GFX11-ERR,W64-ERR --implicit-check-not=error: %s v_add3_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX11: [0x05,0x00,0x55,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3-fake16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3-fake16.s new file mode 100644 index 0000000000000..5329849855de3 --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3-fake16.s @@ -0,0 +1,7294 @@ +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12,W32 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12,W64 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,-real-true16 %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s + +v_add3_u32 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x55,0xd6,0x01,0x05,0x0e,0x00] + +v_add3_u32 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x55,0xd6,0xff,0x05,0xa4,0x01] + +v_add3_u32 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x55,0xd6,0x01,0xfe,0xff,0x01] + +v_add3_u32 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x55,0xd6,0x69,0xd2,0xf8,0x01] + +v_add3_u32 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x55,0xd6,0x6a,0xf6,0x0c,0x04] + +v_add3_u32 v5, vcc_hi, 0xaf123456, v255 +// GFX12: encoding: [0x05,0x00,0x55,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_add3_u32 v5, ttmp15, src_scc, ttmp15 +// GFX12: encoding: [0x05,0x00,0x55,0xd6,0x7b,0xfa,0xed,0x01] + +v_add3_u32 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x55,0xd6,0x7d,0xe0,0xf5,0x01] + +v_add3_u32 v5, exec_lo, -1, vcc_hi +// GFX12: encoding: [0x05,0x00,0x55,0xd6,0x7e,0x82,0xad,0x01] + +v_add3_u32 v5, exec_hi, null, vcc_lo +// GFX12: encoding: [0x05,0x00,0x55,0xd6,0x7f,0xf8,0xa8,0x01] + +v_add3_u32 v5, null, exec_lo, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x55,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_add3_u32 v5, -1, exec_hi, src_scc +// GFX12: encoding: [0x05,0x00,0x55,0xd6,0xc1,0xfe,0xf4,0x03] + +v_add3_u32 v5, 0.5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x55,0xd6,0xf0,0xfa,0xc0,0x03] + +v_add3_u32 v5, src_scc, vcc_lo, -1 +// GFX12: encoding: [0x05,0x00,0x55,0xd6,0xfd,0xd4,0x04,0x03] + +v_add3_u32 v255, 0xaf123456, vcc_hi, null +// GFX12: encoding: [0xff,0x00,0x55,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_add_co_u32 v5, s6, v1, v2 +// W32: encoding: [0x05,0x06,0x00,0xd7,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v5, s6, v255, v255 +// W32: encoding: [0x05,0x06,0x00,0xd7,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v5, s6, s1, s2 +// W32: encoding: [0x05,0x06,0x00,0xd7,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v5, s6, s105, s105 +// W32: encoding: [0x05,0x06,0x00,0xd7,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v5, s6, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x06,0x00,0xd7,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v5, s6, vcc_hi, 0xaf123456 +// W32: encoding: [0x05,0x06,0x00,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v5, s6, ttmp15, src_scc +// W32: encoding: [0x05,0x06,0x00,0xd7,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v5, s6, m0, 0.5 +// W32: encoding: [0x05,0x06,0x00,0xd7,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v5, s6, exec_lo, -1 +// W32: encoding: [0x05,0x06,0x00,0xd7,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v5, s6, exec_hi, null +// W32: encoding: [0x05,0x06,0x00,0xd7,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v5, s105, null, exec_lo +// W32: encoding: [0x05,0x69,0x00,0xd7,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v5, vcc_lo, -1, exec_hi +// W32: encoding: [0x05,0x6a,0x00,0xd7,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v5, vcc_hi, 0.5, m0 +// W32: encoding: [0x05,0x6b,0x00,0xd7,0xf0,0xfa,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v5, ttmp15, src_scc, vcc_lo +// W32: encoding: [0x05,0x7b,0x00,0xd7,0xfd,0xd4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v5, s[12:13], v1, v2 +// W64: encoding: [0x05,0x0c,0x00,0xd7,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v5, s[12:13], v255, v255 +// W64: encoding: [0x05,0x0c,0x00,0xd7,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v5, s[12:13], s1, s2 +// W64: encoding: [0x05,0x0c,0x00,0xd7,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v5, s[12:13], s105, s105 +// W64: encoding: [0x05,0x0c,0x00,0xd7,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v5, s[12:13], vcc_lo, ttmp15 +// W64: encoding: [0x05,0x0c,0x00,0xd7,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v5, s[12:13], vcc_hi, 0xaf123456 +// W64: encoding: [0x05,0x0c,0x00,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v5, s[12:13], ttmp15, src_scc +// W64: encoding: [0x05,0x0c,0x00,0xd7,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v5, s[12:13], m0, 0.5 +// W64: encoding: [0x05,0x0c,0x00,0xd7,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v5, s[12:13], exec_lo, -1 +// W64: encoding: [0x05,0x0c,0x00,0xd7,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v5, s[12:13], exec_hi, null +// W64: encoding: [0x05,0x0c,0x00,0xd7,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v5, s[12:13], null, exec_lo +// W64: encoding: [0x05,0x0c,0x00,0xd7,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v5, s[104:105], -1, exec_hi +// W64: encoding: [0x05,0x68,0x00,0xd7,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v5, vcc, 0.5, m0 +// W64: encoding: [0x05,0x6a,0x00,0xd7,0xf0,0xfa,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_u32 v5, ttmp[14:15], src_scc, vcc_lo +// W64: encoding: [0x05,0x7a,0x00,0xd7,0xfd,0xd4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v255, null, 0xaf123456, vcc_hi clamp +// GFX12: encoding: [0xff,0xfc,0x00,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_add_lshl_u32 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x47,0xd6,0x01,0x05,0x0e,0x00] + +v_add_lshl_u32 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x47,0xd6,0xff,0x05,0xa4,0x01] + +v_add_lshl_u32 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x47,0xd6,0x01,0xfe,0xff,0x01] + +v_add_lshl_u32 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x47,0xd6,0x69,0xd2,0xf8,0x01] + +v_add_lshl_u32 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x47,0xd6,0x6a,0xf6,0x0c,0x04] + +v_add_lshl_u32 v5, vcc_hi, 0xaf123456, v255 +// GFX12: encoding: [0x05,0x00,0x47,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_add_lshl_u32 v5, ttmp15, src_scc, ttmp15 +// GFX12: encoding: [0x05,0x00,0x47,0xd6,0x7b,0xfa,0xed,0x01] + +v_add_lshl_u32 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x47,0xd6,0x7d,0xe0,0xf5,0x01] + +v_add_lshl_u32 v5, exec_lo, -1, vcc_hi +// GFX12: encoding: [0x05,0x00,0x47,0xd6,0x7e,0x82,0xad,0x01] + +v_add_lshl_u32 v5, exec_hi, null, vcc_lo +// GFX12: encoding: [0x05,0x00,0x47,0xd6,0x7f,0xf8,0xa8,0x01] + +v_add_lshl_u32 v5, null, exec_lo, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x47,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_add_lshl_u32 v5, -1, exec_hi, src_scc +// GFX12: encoding: [0x05,0x00,0x47,0xd6,0xc1,0xfe,0xf4,0x03] + +v_add_lshl_u32 v5, 0.5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x47,0xd6,0xf0,0xfa,0xc0,0x03] + +v_add_lshl_u32 v5, src_scc, vcc_lo, -1 +// GFX12: encoding: [0x05,0x00,0x47,0xd6,0xfd,0xd4,0x04,0x03] + +v_add_lshl_u32 v255, 0xaf123456, vcc_hi, null +// GFX12: encoding: [0xff,0x00,0x47,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_add_nc_i16 v5, v1, v2 +// GFX12: encoding: [0x05,0x00,0x0d,0xd7,0x01,0x05,0x02,0x00] + +v_add_nc_i16 v5, v255, v255 +// GFX12: encoding: [0x05,0x00,0x0d,0xd7,0xff,0xff,0x03,0x00] + +v_add_nc_i16 v5, s1, s2 +// GFX12: encoding: [0x05,0x00,0x0d,0xd7,0x01,0x04,0x00,0x00] + +v_add_nc_i16 v5, s105, s105 +// GFX12: encoding: [0x05,0x00,0x0d,0xd7,0x69,0xd2,0x00,0x00] + +v_add_nc_i16 v5, vcc_lo, ttmp15 +// GFX12: encoding: [0x05,0x00,0x0d,0xd7,0x6a,0xf6,0x00,0x00] + +v_add_nc_i16 v5, vcc_hi, 0xfe0b +// GFX12: encoding: [0x05,0x00,0x0d,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_add_nc_i16 v5, ttmp15, src_scc +// GFX12: encoding: [0x05,0x00,0x0d,0xd7,0x7b,0xfa,0x01,0x00] + +v_add_nc_i16 v5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x0d,0xd7,0x7d,0xe0,0x01,0x00] + +v_add_nc_i16 v5, exec_lo, -1 +// GFX12: encoding: [0x05,0x00,0x0d,0xd7,0x7e,0x82,0x01,0x00] + +v_add_nc_i16 v5, exec_hi, null +// GFX12: encoding: [0x05,0x00,0x0d,0xd7,0x7f,0xf8,0x00,0x00] + +v_add_nc_i16 v5, null, exec_lo op_sel:[1,1,1] +// GFX12: encoding: [0x05,0x58,0x0d,0xd7,0x7c,0xfc,0x00,0x00] + +v_add_nc_i16 v5, -1, exec_hi op_sel:[0,0,0] +// GFX12: encoding: [0x05,0x00,0x0d,0xd7,0xc1,0xfe,0x00,0x00] + +v_add_nc_i16 v5, 0.5, m0 op_sel:[1,0,0] +// GFX12: encoding: [0x05,0x08,0x0d,0xd7,0xf0,0xfa,0x00,0x00] + +v_add_nc_i16 v5, src_scc, vcc_lo op_sel:[0,1,0] +// GFX12: encoding: [0x05,0x10,0x0d,0xd7,0xfd,0xd4,0x00,0x00] + +v_add_nc_i16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp +// GFX12: encoding: [0xff,0xc0,0x0d,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_add_nc_i32 v5, v1, v2 +// GFX12: encoding: [0x05,0x00,0x26,0xd7,0x01,0x05,0x02,0x00] + +v_add_nc_i32 v5, v255, v255 +// GFX12: encoding: [0x05,0x00,0x26,0xd7,0xff,0xff,0x03,0x00] + +v_add_nc_i32 v5, s1, s2 +// GFX12: encoding: [0x05,0x00,0x26,0xd7,0x01,0x04,0x00,0x00] + +v_add_nc_i32 v5, s105, s105 +// GFX12: encoding: [0x05,0x00,0x26,0xd7,0x69,0xd2,0x00,0x00] + +v_add_nc_i32 v5, vcc_lo, ttmp15 +// GFX12: encoding: [0x05,0x00,0x26,0xd7,0x6a,0xf6,0x00,0x00] + +v_add_nc_i32 v5, vcc_hi, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x26,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_add_nc_i32 v5, ttmp15, src_scc +// GFX12: encoding: [0x05,0x00,0x26,0xd7,0x7b,0xfa,0x01,0x00] + +v_add_nc_i32 v5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x26,0xd7,0x7d,0xe0,0x01,0x00] + +v_add_nc_i32 v5, exec_lo, -1 +// GFX12: encoding: [0x05,0x00,0x26,0xd7,0x7e,0x82,0x01,0x00] + +v_add_nc_i32 v5, exec_hi, null +// GFX12: encoding: [0x05,0x00,0x26,0xd7,0x7f,0xf8,0x00,0x00] + +v_add_nc_i32 v5, null, exec_lo +// GFX12: encoding: [0x05,0x00,0x26,0xd7,0x7c,0xfc,0x00,0x00] + +v_add_nc_i32 v5, -1, exec_hi +// GFX12: encoding: [0x05,0x00,0x26,0xd7,0xc1,0xfe,0x00,0x00] + +v_add_nc_i32 v5, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x26,0xd7,0xf0,0xfa,0x00,0x00] + +v_add_nc_i32 v5, src_scc, vcc_lo +// GFX12: encoding: [0x05,0x00,0x26,0xd7,0xfd,0xd4,0x00,0x00] + +v_add_nc_i32 v255, 0xaf123456, vcc_hi clamp +// GFX12: encoding: [0xff,0x80,0x26,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_add_nc_u16 v5, v1, v2 +// GFX12: encoding: [0x05,0x00,0x03,0xd7,0x01,0x05,0x02,0x00] + +v_add_nc_u16 v5, v255, v255 +// GFX12: encoding: [0x05,0x00,0x03,0xd7,0xff,0xff,0x03,0x00] + +v_add_nc_u16 v5, s1, s2 +// GFX12: encoding: [0x05,0x00,0x03,0xd7,0x01,0x04,0x00,0x00] + +v_add_nc_u16 v5, s105, s105 +// GFX12: encoding: [0x05,0x00,0x03,0xd7,0x69,0xd2,0x00,0x00] + +v_add_nc_u16 v5, vcc_lo, ttmp15 +// GFX12: encoding: [0x05,0x00,0x03,0xd7,0x6a,0xf6,0x00,0x00] + +v_add_nc_u16 v5, vcc_hi, 0xfe0b +// GFX12: encoding: [0x05,0x00,0x03,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_add_nc_u16 v5, ttmp15, src_scc +// GFX12: encoding: [0x05,0x00,0x03,0xd7,0x7b,0xfa,0x01,0x00] + +v_add_nc_u16 v5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x03,0xd7,0x7d,0xe0,0x01,0x00] + +v_add_nc_u16 v5, exec_lo, -1 +// GFX12: encoding: [0x05,0x00,0x03,0xd7,0x7e,0x82,0x01,0x00] + +v_add_nc_u16 v5, exec_hi, null +// GFX12: encoding: [0x05,0x00,0x03,0xd7,0x7f,0xf8,0x00,0x00] + +v_add_nc_u16 v5, null, exec_lo op_sel:[1,1,1] +// GFX12: encoding: [0x05,0x58,0x03,0xd7,0x7c,0xfc,0x00,0x00] + +v_add_nc_u16 v5, -1, exec_hi op_sel:[0,0,0] +// GFX12: encoding: [0x05,0x00,0x03,0xd7,0xc1,0xfe,0x00,0x00] + +v_add_nc_u16 v5, 0.5, m0 op_sel:[1,0,0] +// GFX12: encoding: [0x05,0x08,0x03,0xd7,0xf0,0xfa,0x00,0x00] + +v_add_nc_u16 v5, src_scc, vcc_lo op_sel:[0,1,0] +// GFX12: encoding: [0x05,0x10,0x03,0xd7,0xfd,0xd4,0x00,0x00] + +v_add_nc_u16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp +// GFX12: encoding: [0xff,0xc0,0x03,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_alignbit_b32 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x16,0xd6,0x01,0x05,0x0e,0x00] + +v_alignbit_b32 v5, v255, s2, s3 +// GFX12: encoding: [0x05,0x00,0x16,0xd6,0xff,0x05,0x0c,0x00] + +v_alignbit_b32 v5, s1, v255, s3 +// GFX12: encoding: [0x05,0x00,0x16,0xd6,0x01,0xfe,0x0f,0x00] + +v_alignbit_b32 v5, s105, s105, s105 +// GFX12: encoding: [0x05,0x00,0x16,0xd6,0x69,0xd2,0xa4,0x01] + +v_alignbit_b32 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x16,0xd6,0x6a,0xf6,0x0c,0x04] + +v_alignbit_b32 v5, vcc_hi, 0xaf123456, v255 +// GFX12: encoding: [0x05,0x00,0x16,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_alignbit_b32 v5, ttmp15, src_scc, ttmp15 +// GFX12: encoding: [0x05,0x00,0x16,0xd6,0x7b,0xfa,0xed,0x01] + +v_alignbit_b32 v5, m0, 0.5, exec_lo +// GFX12: encoding: [0x05,0x00,0x16,0xd6,0x7d,0xe0,0xf9,0x01] + +v_alignbit_b32 v5, exec_lo, -1, m0 +// GFX12: encoding: [0x05,0x00,0x16,0xd6,0x7e,0x82,0xf5,0x01] + +v_alignbit_b32 v5, exec_hi, null, vcc_hi +// GFX12: encoding: [0x05,0x00,0x16,0xd6,0x7f,0xf8,0xac,0x01] + +v_alignbit_b32 v5, null, exec_lo, vcc_lo +// GFX12: encoding: [0x05,0x00,0x16,0xd6,0x7c,0xfc,0xa8,0x01] + +v_alignbit_b32 v5, -1, exec_hi, src_scc +// GFX12: encoding: [0x05,0x00,0x16,0xd6,0xc1,0xfe,0xf4,0x03] + +v_alignbit_b32 v5, 0.5, m0, exec_hi +// GFX12: encoding: [0x05,0x00,0x16,0xd6,0xf0,0xfa,0xfc,0x01] + +v_alignbit_b32 v5, src_scc, vcc_lo, -1 +// GFX12: encoding: [0x05,0x00,0x16,0xd6,0xfd,0xd4,0x04,0x03] + +v_alignbit_b32 v255, 0xaf123456, vcc_hi, null +// GFX12: encoding: [0xff,0x00,0x16,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_alignbyte_b32 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x17,0xd6,0x01,0x05,0x0e,0x00] + +v_alignbyte_b32 v5, v255, s2, s3 +// GFX12: encoding: [0x05,0x00,0x17,0xd6,0xff,0x05,0x0c,0x00] + +v_alignbyte_b32 v5, s1, v255, s3 +// GFX12: encoding: [0x05,0x00,0x17,0xd6,0x01,0xfe,0x0f,0x00] + +v_alignbyte_b32 v5, s105, s105, s105 +// GFX12: encoding: [0x05,0x00,0x17,0xd6,0x69,0xd2,0xa4,0x01] + +v_alignbyte_b32 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x17,0xd6,0x6a,0xf6,0x0c,0x04] + +v_alignbyte_b32 v5, vcc_hi, 0xaf123456, v255 +// GFX12: encoding: [0x05,0x00,0x17,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_alignbyte_b32 v5, ttmp15, src_scc, ttmp15 +// GFX12: encoding: [0x05,0x00,0x17,0xd6,0x7b,0xfa,0xed,0x01] + +v_alignbyte_b32 v5, m0, 0.5, exec_lo +// GFX12: encoding: [0x05,0x00,0x17,0xd6,0x7d,0xe0,0xf9,0x01] + +v_alignbyte_b32 v5, exec_lo, -1, m0 +// GFX12: encoding: [0x05,0x00,0x17,0xd6,0x7e,0x82,0xf5,0x01] + +v_alignbyte_b32 v5, exec_hi, null, vcc_hi +// GFX12: encoding: [0x05,0x00,0x17,0xd6,0x7f,0xf8,0xac,0x01] + +v_alignbyte_b32 v5, null, exec_lo, vcc_lo +// GFX12: encoding: [0x05,0x00,0x17,0xd6,0x7c,0xfc,0xa8,0x01] + +v_alignbyte_b32 v5, -1, exec_hi, src_scc +// GFX12: encoding: [0x05,0x00,0x17,0xd6,0xc1,0xfe,0xf4,0x03] + +v_alignbyte_b32 v5, 0.5, m0, exec_hi +// GFX12: encoding: [0x05,0x00,0x17,0xd6,0xf0,0xfa,0xfc,0x01] + +v_alignbyte_b32 v5, src_scc, vcc_lo, -1 +// GFX12: encoding: [0x05,0x00,0x17,0xd6,0xfd,0xd4,0x04,0x03] + +v_alignbyte_b32 v255, 0xaf123456, vcc_hi, null +// GFX12: encoding: [0xff,0x00,0x17,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_and_b16 v5, v1, v2 +// GFX12: encoding: [0x05,0x00,0x62,0xd7,0x01,0x05,0x02,0x00] + +v_and_b16 v5, v255, v255 +// GFX12: encoding: [0x05,0x00,0x62,0xd7,0xff,0xff,0x03,0x00] + +v_and_b16 v5, s1, s2 +// GFX12: encoding: [0x05,0x00,0x62,0xd7,0x01,0x04,0x00,0x00] + +v_and_b16 v5, s105, s105 +// GFX12: encoding: [0x05,0x00,0x62,0xd7,0x69,0xd2,0x00,0x00] + +v_and_b16 v5, vcc_lo, ttmp15 +// GFX12: encoding: [0x05,0x00,0x62,0xd7,0x6a,0xf6,0x00,0x00] + +v_and_b16 v5, vcc_hi, 0xfe0b +// GFX12: encoding: [0x05,0x00,0x62,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_and_b16 v5, ttmp15, src_scc +// GFX12: encoding: [0x05,0x00,0x62,0xd7,0x7b,0xfa,0x01,0x00] + +v_and_b16 v5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x62,0xd7,0x7d,0xe0,0x01,0x00] + +v_and_b16 v5, exec_lo, -1 +// GFX12: encoding: [0x05,0x00,0x62,0xd7,0x7e,0x82,0x01,0x00] + +v_and_b16 v5, exec_hi, null +// GFX12: encoding: [0x05,0x00,0x62,0xd7,0x7f,0xf8,0x00,0x00] + +v_and_b16 v5, null, exec_lo +// GFX12: encoding: [0x05,0x00,0x62,0xd7,0x7c,0xfc,0x00,0x00] + +v_and_b16 v5, -1, exec_hi +// GFX12: encoding: [0x05,0x00,0x62,0xd7,0xc1,0xfe,0x00,0x00] + +v_and_b16 v5, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x62,0xd7,0xf0,0xfa,0x00,0x00] + +v_and_b16 v5, src_scc, vcc_lo +// GFX12: encoding: [0x05,0x00,0x62,0xd7,0xfd,0xd4,0x00,0x00] + +v_and_b16 v255, 0xfe0b, vcc_hi +// GFX12: encoding: [0xff,0x00,0x62,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_and_or_b32 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x57,0xd6,0x01,0x05,0x0e,0x00] + +v_and_or_b32 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x57,0xd6,0xff,0x05,0xa4,0x01] + +v_and_or_b32 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x57,0xd6,0x01,0xfe,0xff,0x01] + +v_and_or_b32 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x57,0xd6,0x69,0xd2,0xf8,0x01] + +v_and_or_b32 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x57,0xd6,0x6a,0xf6,0x0c,0x04] + +v_and_or_b32 v5, vcc_hi, 0xaf123456, v255 +// GFX12: encoding: [0x05,0x00,0x57,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_and_or_b32 v5, ttmp15, src_scc, ttmp15 +// GFX12: encoding: [0x05,0x00,0x57,0xd6,0x7b,0xfa,0xed,0x01] + +v_and_or_b32 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x57,0xd6,0x7d,0xe0,0xf5,0x01] + +v_and_or_b32 v5, exec_lo, -1, vcc_hi +// GFX12: encoding: [0x05,0x00,0x57,0xd6,0x7e,0x82,0xad,0x01] + +v_and_or_b32 v5, exec_hi, null, vcc_lo +// GFX12: encoding: [0x05,0x00,0x57,0xd6,0x7f,0xf8,0xa8,0x01] + +v_and_or_b32 v5, null, exec_lo, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x57,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_and_or_b32 v5, -1, exec_hi, src_scc +// GFX12: encoding: [0x05,0x00,0x57,0xd6,0xc1,0xfe,0xf4,0x03] + +v_and_or_b32 v5, 0.5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x57,0xd6,0xf0,0xfa,0xc0,0x03] + +v_and_or_b32 v5, src_scc, vcc_lo, -1 +// GFX12: encoding: [0x05,0x00,0x57,0xd6,0xfd,0xd4,0x04,0x03] + +v_and_or_b32 v255, 0xaf123456, vcc_hi, null +// GFX12: encoding: [0xff,0x00,0x57,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_ashrrev_i16 v5, v1, v2 +// GFX12: encoding: [0x05,0x00,0x3a,0xd7,0x01,0x05,0x02,0x00] + +v_ashrrev_i16 v5, v255, v255 +// GFX12: encoding: [0x05,0x00,0x3a,0xd7,0xff,0xff,0x03,0x00] + +v_ashrrev_i16 v5, s1, s2 +// GFX12: encoding: [0x05,0x00,0x3a,0xd7,0x01,0x04,0x00,0x00] + +v_ashrrev_i16 v5, s105, s105 +// GFX12: encoding: [0x05,0x00,0x3a,0xd7,0x69,0xd2,0x00,0x00] + +v_ashrrev_i16 v5, vcc_lo, ttmp15 +// GFX12: encoding: [0x05,0x00,0x3a,0xd7,0x6a,0xf6,0x00,0x00] + +v_ashrrev_i16 v5, vcc_hi, 0xfe0b +// GFX12: encoding: [0x05,0x00,0x3a,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_ashrrev_i16 v5, ttmp15, src_scc +// GFX12: encoding: [0x05,0x00,0x3a,0xd7,0x7b,0xfa,0x01,0x00] + +v_ashrrev_i16 v5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x3a,0xd7,0x7d,0xe0,0x01,0x00] + +v_ashrrev_i16 v5, exec_lo, -1 +// GFX12: encoding: [0x05,0x00,0x3a,0xd7,0x7e,0x82,0x01,0x00] + +v_ashrrev_i16 v5, exec_hi, null +// GFX12: encoding: [0x05,0x00,0x3a,0xd7,0x7f,0xf8,0x00,0x00] + +v_ashrrev_i16 v5, null, exec_lo +// GFX12: encoding: [0x05,0x00,0x3a,0xd7,0x7c,0xfc,0x00,0x00] + +v_ashrrev_i16 v5, -1, exec_hi +// GFX12: encoding: [0x05,0x00,0x3a,0xd7,0xc1,0xfe,0x00,0x00] + +v_ashrrev_i16 v5, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x3a,0xd7,0xf0,0xfa,0x00,0x00] + +v_ashrrev_i16 v5, src_scc, vcc_lo +// GFX12: encoding: [0x05,0x00,0x3a,0xd7,0xfd,0xd4,0x00,0x00] + +v_ashrrev_i16 v255, 0xfe0b, vcc_hi +// GFX12: encoding: [0xff,0x00,0x3a,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_ashrrev_i64 v[5:6], v1, vcc +// GFX12: encoding: [0x05,0x00,0x3e,0xd7,0x01,0xd5,0x00,0x00] + +v_ashrrev_i64 v[5:6], v255, exec +// GFX12: encoding: [0x05,0x00,0x3e,0xd7,0xff,0xfd,0x00,0x00] + +v_ashrrev_i64 v[5:6], exec_lo, v[2:3] +// GFX12: encoding: [0x05,0x00,0x3e,0xd7,0x7e,0x04,0x02,0x00] + +v_ashrrev_i64 v[5:6], exec_hi, v[254:255] +// GFX12: encoding: [0x05,0x00,0x3e,0xd7,0x7f,0xfc,0x03,0x00] + +v_ashrrev_i64 v[5:6], null, null +// GFX12: encoding: [0x05,0x00,0x3e,0xd7,0x7c,0xf8,0x00,0x00] + +v_ashrrev_i64 v[5:6], -1, -1 +// GFX12: encoding: [0x05,0x00,0x3e,0xd7,0xc1,0x82,0x01,0x00] + +v_ashrrev_i64 v[5:6], 0.5, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x3e,0xd7,0xf0,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_ashrrev_i64 v[5:6], src_scc, src_scc +// GFX12: encoding: [0x05,0x00,0x3e,0xd7,0xfd,0xfa,0x01,0x00] + +v_ashrrev_i64 v[254:255], 0xaf123456, 0.5 +// GFX12: encoding: [0xfe,0x00,0x3e,0xd7,0xff,0xe0,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_bcnt_u32_b32 v5, v1, v2 +// GFX12: encoding: [0x05,0x00,0x1e,0xd7,0x01,0x05,0x02,0x00] + +v_bcnt_u32_b32 v5, v255, v255 +// GFX12: encoding: [0x05,0x00,0x1e,0xd7,0xff,0xff,0x03,0x00] + +v_bcnt_u32_b32 v5, s1, s2 +// GFX12: encoding: [0x05,0x00,0x1e,0xd7,0x01,0x04,0x00,0x00] + +v_bcnt_u32_b32 v5, s105, s105 +// GFX12: encoding: [0x05,0x00,0x1e,0xd7,0x69,0xd2,0x00,0x00] + +v_bcnt_u32_b32 v5, vcc_lo, ttmp15 +// GFX12: encoding: [0x05,0x00,0x1e,0xd7,0x6a,0xf6,0x00,0x00] + +v_bcnt_u32_b32 v5, vcc_hi, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x1e,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_bcnt_u32_b32 v5, ttmp15, src_scc +// GFX12: encoding: [0x05,0x00,0x1e,0xd7,0x7b,0xfa,0x01,0x00] + +v_bcnt_u32_b32 v5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x1e,0xd7,0x7d,0xe0,0x01,0x00] + +v_bcnt_u32_b32 v5, exec_lo, -1 +// GFX12: encoding: [0x05,0x00,0x1e,0xd7,0x7e,0x82,0x01,0x00] + +v_bcnt_u32_b32 v5, exec_hi, null +// GFX12: encoding: [0x05,0x00,0x1e,0xd7,0x7f,0xf8,0x00,0x00] + +v_bcnt_u32_b32 v5, null, exec_lo +// GFX12: encoding: [0x05,0x00,0x1e,0xd7,0x7c,0xfc,0x00,0x00] + +v_bcnt_u32_b32 v5, -1, exec_hi +// GFX12: encoding: [0x05,0x00,0x1e,0xd7,0xc1,0xfe,0x00,0x00] + +v_bcnt_u32_b32 v5, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x1e,0xd7,0xf0,0xfa,0x00,0x00] + +v_bcnt_u32_b32 v5, src_scc, vcc_lo +// GFX12: encoding: [0x05,0x00,0x1e,0xd7,0xfd,0xd4,0x00,0x00] + +v_bcnt_u32_b32 v255, 0xaf123456, vcc_hi +// GFX12: encoding: [0xff,0x00,0x1e,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_bfe_i32 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x11,0xd6,0x01,0x05,0x0e,0x00] + +v_bfe_i32 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x11,0xd6,0xff,0x05,0xa4,0x01] + +v_bfe_i32 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x11,0xd6,0x01,0xfe,0xff,0x01] + +v_bfe_i32 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x11,0xd6,0x69,0xd2,0xf8,0x01] + +v_bfe_i32 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x11,0xd6,0x6a,0xf6,0x0c,0x04] + +v_bfe_i32 v5, vcc_hi, 0xaf123456, v255 +// GFX12: encoding: [0x05,0x00,0x11,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_bfe_i32 v5, ttmp15, src_scc, ttmp15 +// GFX12: encoding: [0x05,0x00,0x11,0xd6,0x7b,0xfa,0xed,0x01] + +v_bfe_i32 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x11,0xd6,0x7d,0xe0,0xf5,0x01] + +v_bfe_i32 v5, exec_lo, -1, vcc_hi +// GFX12: encoding: [0x05,0x00,0x11,0xd6,0x7e,0x82,0xad,0x01] + +v_bfe_i32 v5, exec_hi, null, vcc_lo +// GFX12: encoding: [0x05,0x00,0x11,0xd6,0x7f,0xf8,0xa8,0x01] + +v_bfe_i32 v5, null, exec_lo, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x11,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_bfe_i32 v5, -1, exec_hi, src_scc +// GFX12: encoding: [0x05,0x00,0x11,0xd6,0xc1,0xfe,0xf4,0x03] + +v_bfe_i32 v5, 0.5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x11,0xd6,0xf0,0xfa,0xc0,0x03] + +v_bfe_i32 v5, src_scc, vcc_lo, -1 +// GFX12: encoding: [0x05,0x00,0x11,0xd6,0xfd,0xd4,0x04,0x03] + +v_bfe_i32 v255, 0xaf123456, vcc_hi, null +// GFX12: encoding: [0xff,0x00,0x11,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_bfe_u32 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x10,0xd6,0x01,0x05,0x0e,0x00] + +v_bfe_u32 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x10,0xd6,0xff,0x05,0xa4,0x01] + +v_bfe_u32 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x10,0xd6,0x01,0xfe,0xff,0x01] + +v_bfe_u32 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x10,0xd6,0x69,0xd2,0xf8,0x01] + +v_bfe_u32 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x10,0xd6,0x6a,0xf6,0x0c,0x04] + +v_bfe_u32 v5, vcc_hi, 0xaf123456, v255 +// GFX12: encoding: [0x05,0x00,0x10,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_bfe_u32 v5, ttmp15, src_scc, ttmp15 +// GFX12: encoding: [0x05,0x00,0x10,0xd6,0x7b,0xfa,0xed,0x01] + +v_bfe_u32 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x10,0xd6,0x7d,0xe0,0xf5,0x01] + +v_bfe_u32 v5, exec_lo, -1, vcc_hi +// GFX12: encoding: [0x05,0x00,0x10,0xd6,0x7e,0x82,0xad,0x01] + +v_bfe_u32 v5, exec_hi, null, vcc_lo +// GFX12: encoding: [0x05,0x00,0x10,0xd6,0x7f,0xf8,0xa8,0x01] + +v_bfe_u32 v5, null, exec_lo, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x10,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_bfe_u32 v5, -1, exec_hi, src_scc +// GFX12: encoding: [0x05,0x00,0x10,0xd6,0xc1,0xfe,0xf4,0x03] + +v_bfe_u32 v5, 0.5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x10,0xd6,0xf0,0xfa,0xc0,0x03] + +v_bfe_u32 v5, src_scc, vcc_lo, -1 +// GFX12: encoding: [0x05,0x00,0x10,0xd6,0xfd,0xd4,0x04,0x03] + +v_bfe_u32 v255, 0xaf123456, vcc_hi, null +// GFX12: encoding: [0xff,0x00,0x10,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_bfi_b32 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x12,0xd6,0x01,0x05,0x0e,0x00] + +v_bfi_b32 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x12,0xd6,0xff,0x05,0xa4,0x01] + +v_bfi_b32 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x12,0xd6,0x01,0xfe,0xff,0x01] + +v_bfi_b32 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x12,0xd6,0x69,0xd2,0xf8,0x01] + +v_bfi_b32 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x12,0xd6,0x6a,0xf6,0x0c,0x04] + +v_bfi_b32 v5, vcc_hi, 0xaf123456, v255 +// GFX12: encoding: [0x05,0x00,0x12,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_bfi_b32 v5, ttmp15, src_scc, ttmp15 +// GFX12: encoding: [0x05,0x00,0x12,0xd6,0x7b,0xfa,0xed,0x01] + +v_bfi_b32 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x12,0xd6,0x7d,0xe0,0xf5,0x01] + +v_bfi_b32 v5, exec_lo, -1, vcc_hi +// GFX12: encoding: [0x05,0x00,0x12,0xd6,0x7e,0x82,0xad,0x01] + +v_bfi_b32 v5, exec_hi, null, vcc_lo +// GFX12: encoding: [0x05,0x00,0x12,0xd6,0x7f,0xf8,0xa8,0x01] + +v_bfi_b32 v5, null, exec_lo, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x12,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_bfi_b32 v5, -1, exec_hi, src_scc +// GFX12: encoding: [0x05,0x00,0x12,0xd6,0xc1,0xfe,0xf4,0x03] + +v_bfi_b32 v5, 0.5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x12,0xd6,0xf0,0xfa,0xc0,0x03] + +v_bfi_b32 v5, src_scc, vcc_lo, -1 +// GFX12: encoding: [0x05,0x00,0x12,0xd6,0xfd,0xd4,0x04,0x03] + +v_bfi_b32 v255, 0xaf123456, vcc_hi, null +// GFX12: encoding: [0xff,0x00,0x12,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_bfm_b32 v5, v1, v2 +// GFX12: encoding: [0x05,0x00,0x1d,0xd7,0x01,0x05,0x02,0x00] + +v_bfm_b32 v5, v255, v255 +// GFX12: encoding: [0x05,0x00,0x1d,0xd7,0xff,0xff,0x03,0x00] + +v_bfm_b32 v5, s1, s2 +// GFX12: encoding: [0x05,0x00,0x1d,0xd7,0x01,0x04,0x00,0x00] + +v_bfm_b32 v5, s105, s105 +// GFX12: encoding: [0x05,0x00,0x1d,0xd7,0x69,0xd2,0x00,0x00] + +v_bfm_b32 v5, vcc_lo, ttmp15 +// GFX12: encoding: [0x05,0x00,0x1d,0xd7,0x6a,0xf6,0x00,0x00] + +v_bfm_b32 v5, vcc_hi, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x1d,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_bfm_b32 v5, ttmp15, src_scc +// GFX12: encoding: [0x05,0x00,0x1d,0xd7,0x7b,0xfa,0x01,0x00] + +v_bfm_b32 v5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x1d,0xd7,0x7d,0xe0,0x01,0x00] + +v_bfm_b32 v5, exec_lo, -1 +// GFX12: encoding: [0x05,0x00,0x1d,0xd7,0x7e,0x82,0x01,0x00] + +v_bfm_b32 v5, exec_hi, null +// GFX12: encoding: [0x05,0x00,0x1d,0xd7,0x7f,0xf8,0x00,0x00] + +v_bfm_b32 v5, null, exec_lo +// GFX12: encoding: [0x05,0x00,0x1d,0xd7,0x7c,0xfc,0x00,0x00] + +v_bfm_b32 v5, -1, exec_hi +// GFX12: encoding: [0x05,0x00,0x1d,0xd7,0xc1,0xfe,0x00,0x00] + +v_bfm_b32 v5, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x1d,0xd7,0xf0,0xfa,0x00,0x00] + +v_bfm_b32 v5, src_scc, vcc_lo +// GFX12: encoding: [0x05,0x00,0x1d,0xd7,0xfd,0xd4,0x00,0x00] + +v_bfm_b32 v255, 0xaf123456, vcc_hi +// GFX12: encoding: [0xff,0x00,0x1d,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_cndmask_b16 v5, v1, src_scc, s3 +// W32: encoding: [0x05,0x00,0x5d,0xd6,0x01,0xfb,0x0d,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16 v5, v255, 0.5, s3 +// W32: encoding: [0x05,0x00,0x5d,0xd6,0xff,0xe1,0x0d,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16 v5, s105, s105, s3 +// W32: encoding: [0x05,0x00,0x5d,0xd6,0x69,0xd2,0x0c,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16 v5, vcc_hi, v2, s3 +// W32: encoding: [0x05,0x00,0x5d,0xd6,0x6b,0x04,0x0e,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16 v5, ttmp15, ttmp15, s3 +// W32: encoding: [0x05,0x00,0x5d,0xd6,0x7b,0xf6,0x0c,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16 v5, m0, v255, s3 +// W32: encoding: [0x05,0x00,0x5d,0xd6,0x7d,0xfe,0x0f,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16 v5, exec_lo, exec_lo, s3 +// W32: encoding: [0x05,0x00,0x5d,0xd6,0x7e,0xfc,0x0c,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16 v5, exec_hi, exec_hi, s3 +// W32: encoding: [0x05,0x00,0x5d,0xd6,0x7f,0xfe,0x0c,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16 v5, null, m0, s105 +// W32: encoding: [0x05,0x00,0x5d,0xd6,0x7c,0xfa,0xa4,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16 v5, -1, -|vcc_lo|, vcc_lo +// W32: encoding: [0x05,0x02,0x5d,0xd6,0xc1,0xd4,0xa8,0x41] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16 v5, 0.5, -1, vcc_hi +// W32: encoding: [0x05,0x00,0x5d,0xd6,0xf0,0x82,0xad,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16 v5, -|src_scc|, null, ttmp15 +// W32: encoding: [0x05,0x01,0x5d,0xd6,0xfd,0xf8,0xec,0x21] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16 v5, v1, src_scc, s[6:7] +// W64: encoding: [0x05,0x00,0x5d,0xd6,0x01,0xfb,0x19,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16 v5, v255, 0.5, s[6:7] +// W64: encoding: [0x05,0x00,0x5d,0xd6,0xff,0xe1,0x19,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16 v5, s105, s105, s[6:7] +// W64: encoding: [0x05,0x00,0x5d,0xd6,0x69,0xd2,0x18,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16 v5, vcc_hi, v2, s[6:7] +// W64: encoding: [0x05,0x00,0x5d,0xd6,0x6b,0x04,0x1a,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16 v5, ttmp15, ttmp15, s[6:7] +// W64: encoding: [0x05,0x00,0x5d,0xd6,0x7b,0xf6,0x18,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16 v5, m0, v255, s[6:7] +// W64: encoding: [0x05,0x00,0x5d,0xd6,0x7d,0xfe,0x1b,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16 v5, exec_lo, exec_lo, s[6:7] +// W64: encoding: [0x05,0x00,0x5d,0xd6,0x7e,0xfc,0x18,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16 v5, exec_hi, exec_hi, s[6:7] +// W64: encoding: [0x05,0x00,0x5d,0xd6,0x7f,0xfe,0x18,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16 v5, null, m0, s[6:7] +// W64: encoding: [0x05,0x00,0x5d,0xd6,0x7c,0xfa,0x18,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16 v5, -1, -|vcc_lo|, s[104:105] +// W64: encoding: [0x05,0x02,0x5d,0xd6,0xc1,0xd4,0xa0,0x41] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16 v5, 0.5, -1, vcc +// W64: encoding: [0x05,0x00,0x5d,0xd6,0xf0,0x82,0xa9,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16 v5, -|src_scc|, null, ttmp[14:15] +// W64: encoding: [0x05,0x01,0x5d,0xd6,0xfd,0xf8,0xe8,0x21] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16 v255, -|0xfe0b|, -|vcc_hi|, null +// GFX12: encoding: [0xff,0x03,0x5d,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] + +v_cubeid_f32 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x0c,0xd6,0x01,0x05,0x0e,0x00] + +v_cubeid_f32 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x0c,0xd6,0xff,0x05,0xa4,0x01] + +v_cubeid_f32 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x0c,0xd6,0x01,0xfe,0xff,0x01] + +v_cubeid_f32 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x0c,0xd6,0x69,0xd2,0xf8,0x01] + +v_cubeid_f32 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x0c,0xd6,0x6a,0xf6,0x0c,0x04] + +v_cubeid_f32 v5, vcc_hi, 0xaf123456, v255 +// GFX12: encoding: [0x05,0x00,0x0c,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_cubeid_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX12: encoding: [0x05,0x07,0x0c,0xd6,0x7b,0xfa,0xed,0xe1] + +v_cubeid_f32 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x0c,0xd6,0x7d,0xe0,0xf5,0x01] + +v_cubeid_f32 v5, |exec_lo|, -1, vcc_hi +// GFX12: encoding: [0x05,0x01,0x0c,0xd6,0x7e,0x82,0xad,0x01] + +v_cubeid_f32 v5, -|exec_hi|, null, -|vcc_lo| +// GFX12: encoding: [0x05,0x05,0x0c,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_cubeid_f32 v5, null, exec_lo, -|0xaf123456| +// GFX12: encoding: [0x05,0x04,0x0c,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] + +v_cubeid_f32 v5, -1, -|exec_hi|, -|src_scc| +// GFX12: encoding: [0x05,0x06,0x0c,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_cubeid_f32 v5, 0.5, -m0, 0.5 mul:2 +// GFX12: encoding: [0x05,0x00,0x0c,0xd6,0xf0,0xfa,0xc0,0x4b] + +v_cubeid_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 +// GFX12: encoding: [0x05,0x02,0x0c,0xd6,0xfd,0xd4,0x04,0x33] + +v_cubeid_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 +// GFX12: encoding: [0xff,0x83,0x0c,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] + +v_cubema_f32 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x0f,0xd6,0x01,0x05,0x0e,0x00] + +v_cubema_f32 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x0f,0xd6,0xff,0x05,0xa4,0x01] + +v_cubema_f32 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x0f,0xd6,0x01,0xfe,0xff,0x01] + +v_cubema_f32 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x0f,0xd6,0x69,0xd2,0xf8,0x01] + +v_cubema_f32 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x0f,0xd6,0x6a,0xf6,0x0c,0x04] + +v_cubema_f32 v5, vcc_hi, 0xaf123456, v255 +// GFX12: encoding: [0x05,0x00,0x0f,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_cubema_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX12: encoding: [0x05,0x07,0x0f,0xd6,0x7b,0xfa,0xed,0xe1] + +v_cubema_f32 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x0f,0xd6,0x7d,0xe0,0xf5,0x01] + +v_cubema_f32 v5, |exec_lo|, -1, vcc_hi +// GFX12: encoding: [0x05,0x01,0x0f,0xd6,0x7e,0x82,0xad,0x01] + +v_cubema_f32 v5, -|exec_hi|, null, -|vcc_lo| +// GFX12: encoding: [0x05,0x05,0x0f,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_cubema_f32 v5, null, exec_lo, -|0xaf123456| +// GFX12: encoding: [0x05,0x04,0x0f,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] + +v_cubema_f32 v5, -1, -|exec_hi|, -|src_scc| +// GFX12: encoding: [0x05,0x06,0x0f,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_cubema_f32 v5, 0.5, -m0, 0.5 mul:2 +// GFX12: encoding: [0x05,0x00,0x0f,0xd6,0xf0,0xfa,0xc0,0x4b] + +v_cubema_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 +// GFX12: encoding: [0x05,0x02,0x0f,0xd6,0xfd,0xd4,0x04,0x33] + +v_cubema_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 +// GFX12: encoding: [0xff,0x83,0x0f,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] + +v_cubesc_f32 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x0d,0xd6,0x01,0x05,0x0e,0x00] + +v_cubesc_f32 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x0d,0xd6,0xff,0x05,0xa4,0x01] + +v_cubesc_f32 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x0d,0xd6,0x01,0xfe,0xff,0x01] + +v_cubesc_f32 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x0d,0xd6,0x69,0xd2,0xf8,0x01] + +v_cubesc_f32 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x0d,0xd6,0x6a,0xf6,0x0c,0x04] + +v_cubesc_f32 v5, vcc_hi, 0xaf123456, v255 +// GFX12: encoding: [0x05,0x00,0x0d,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_cubesc_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX12: encoding: [0x05,0x07,0x0d,0xd6,0x7b,0xfa,0xed,0xe1] + +v_cubesc_f32 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x0d,0xd6,0x7d,0xe0,0xf5,0x01] + +v_cubesc_f32 v5, |exec_lo|, -1, vcc_hi +// GFX12: encoding: [0x05,0x01,0x0d,0xd6,0x7e,0x82,0xad,0x01] + +v_cubesc_f32 v5, -|exec_hi|, null, -|vcc_lo| +// GFX12: encoding: [0x05,0x05,0x0d,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_cubesc_f32 v5, null, exec_lo, -|0xaf123456| +// GFX12: encoding: [0x05,0x04,0x0d,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] + +v_cubesc_f32 v5, -1, -|exec_hi|, -|src_scc| +// GFX12: encoding: [0x05,0x06,0x0d,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_cubesc_f32 v5, 0.5, -m0, 0.5 mul:2 +// GFX12: encoding: [0x05,0x00,0x0d,0xd6,0xf0,0xfa,0xc0,0x4b] + +v_cubesc_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 +// GFX12: encoding: [0x05,0x02,0x0d,0xd6,0xfd,0xd4,0x04,0x33] + +v_cubesc_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 +// GFX12: encoding: [0xff,0x83,0x0d,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] + +v_cubetc_f32 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x0e,0xd6,0x01,0x05,0x0e,0x00] + +v_cubetc_f32 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x0e,0xd6,0xff,0x05,0xa4,0x01] + +v_cubetc_f32 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x0e,0xd6,0x01,0xfe,0xff,0x01] + +v_cubetc_f32 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x0e,0xd6,0x69,0xd2,0xf8,0x01] + +v_cubetc_f32 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x0e,0xd6,0x6a,0xf6,0x0c,0x04] + +v_cubetc_f32 v5, vcc_hi, 0xaf123456, v255 +// GFX12: encoding: [0x05,0x00,0x0e,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_cubetc_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX12: encoding: [0x05,0x07,0x0e,0xd6,0x7b,0xfa,0xed,0xe1] + +v_cubetc_f32 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x0e,0xd6,0x7d,0xe0,0xf5,0x01] + +v_cubetc_f32 v5, |exec_lo|, -1, vcc_hi +// GFX12: encoding: [0x05,0x01,0x0e,0xd6,0x7e,0x82,0xad,0x01] + +v_cubetc_f32 v5, -|exec_hi|, null, -|vcc_lo| +// GFX12: encoding: [0x05,0x05,0x0e,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_cubetc_f32 v5, null, exec_lo, -|0xaf123456| +// GFX12: encoding: [0x05,0x04,0x0e,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] + +v_cubetc_f32 v5, -1, -|exec_hi|, -|src_scc| +// GFX12: encoding: [0x05,0x06,0x0e,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_cubetc_f32 v5, 0.5, -m0, 0.5 mul:2 +// GFX12: encoding: [0x05,0x00,0x0e,0xd6,0xf0,0xfa,0xc0,0x4b] + +v_cubetc_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 +// GFX12: encoding: [0x05,0x02,0x0e,0xd6,0xfd,0xd4,0x04,0x33] + +v_cubetc_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 +// GFX12: encoding: [0xff,0x83,0x0e,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] + +v_cvt_pk_fp8_f32 v1, v2, v3 +// GFX12: encoding: [0x01,0x00,0x69,0xd7,0x02,0x07,0x02,0x00] + +v_cvt_pk_fp8_f32 v1, -v2, |v3| +// GFX12: encoding: [0x01,0x02,0x69,0xd7,0x02,0x07,0x02,0x20] + +v_cvt_pk_fp8_f32 v1, s2, 3 +// GFX12: encoding: [0x01,0x00,0x69,0xd7,0x02,0x06,0x01,0x00] + +v_cvt_pk_bf8_f32 v1, v2, v3 +// GFX12: encoding: [0x01,0x00,0x6a,0xd7,0x02,0x07,0x02,0x00] + +v_cvt_pk_bf8_f32 v1, -v2, |v3| +// GFX12: encoding: [0x01,0x02,0x6a,0xd7,0x02,0x07,0x02,0x20] + +v_cvt_pk_bf8_f32 v1, s2, 3 +// GFX12: encoding: [0x01,0x00,0x6a,0xd7,0x02,0x06,0x01,0x00] + +v_cvt_sr_fp8_f32 v1, v2, v3 +// GFX12: encoding: [0x01,0x00,0x6b,0xd7,0x02,0x07,0x02,0x00] + +v_cvt_sr_fp8_f32 v10, s2, v5 +// GFX12: encoding: [0x0a,0x00,0x6b,0xd7,0x02,0x0a,0x02,0x00] + +v_cvt_sr_fp8_f32 v5, -|v255|, v4 +// GFX12: encoding: [0x05,0x01,0x6b,0xd7,0xff,0x09,0x02,0x20] + +v_cvt_sr_fp8_f32 v1, v2, v3 byte_sel:0 +// GFX12: v_cvt_sr_fp8_f32 v1, v2, v3 ; encoding: [0x01,0x00,0x6b,0xd7,0x02,0x07,0x02,0x00] + +v_cvt_sr_fp8_f32 v1, v2, v3 byte_sel:1 +// GFX12: v_cvt_sr_fp8_f32 v1, v2, v3 byte_sel:1 ; encoding: [0x01,0x20,0x6b,0xd7,0x02,0x07,0x02,0x00] + +v_cvt_sr_fp8_f32 v1, v2, v3 byte_sel:2 +// GFX12: v_cvt_sr_fp8_f32 v1, v2, v3 byte_sel:2 ; encoding: [0x01,0x40,0x6b,0xd7,0x02,0x07,0x02,0x00] + +v_cvt_sr_fp8_f32 v1, v2, v3 byte_sel:3 +// GFX12: v_cvt_sr_fp8_f32 v1, v2, v3 byte_sel:3 ; encoding: [0x01,0x60,0x6b,0xd7,0x02,0x07,0x02,0x00] + +v_cvt_sr_bf8_f32 v1, v2, v3 +// GFX12: encoding: [0x01,0x00,0x6c,0xd7,0x02,0x07,0x02,0x00] + +v_cvt_sr_bf8_f32 v10, s2, v5 +// GFX12: encoding: [0x0a,0x00,0x6c,0xd7,0x02,0x0a,0x02,0x00] + +v_cvt_sr_bf8_f32 v5, -|v255|, v4 +// GFX12: encoding: [0x05,0x01,0x6c,0xd7,0xff,0x09,0x02,0x20] + +v_cvt_sr_bf8_f32 v1, v2, v3 byte_sel:0 +// GFX12: v_cvt_sr_bf8_f32 v1, v2, v3 ; encoding: [0x01,0x00,0x6c,0xd7,0x02,0x07,0x02,0x00] + +v_cvt_sr_bf8_f32 v1, v2, v3 byte_sel:1 +// GFX12: v_cvt_sr_bf8_f32 v1, v2, v3 byte_sel:1 ; encoding: [0x01,0x20,0x6c,0xd7,0x02,0x07,0x02,0x00] + +v_cvt_sr_bf8_f32 v1, v2, v3 byte_sel:2 +// GFX12: v_cvt_sr_bf8_f32 v1, v2, v3 byte_sel:2 ; encoding: [0x01,0x40,0x6c,0xd7,0x02,0x07,0x02,0x00] + +v_cvt_sr_bf8_f32 v1, v2, v3 byte_sel:3 +// GFX12: v_cvt_sr_bf8_f32 v1, v2, v3 byte_sel:3 ; encoding: [0x01,0x60,0x6c,0xd7,0x02,0x07,0x02,0x00] + +v_cvt_pk_i16_f32 v5, v1, v2 +// GFX12: encoding: [0x05,0x00,0x06,0xd7,0x01,0x05,0x02,0x00] + +v_cvt_pk_i16_f32 v5, v255, v255 +// GFX12: encoding: [0x05,0x00,0x06,0xd7,0xff,0xff,0x03,0x00] + +v_cvt_pk_i16_f32 v5, s1, s2 +// GFX12: encoding: [0x05,0x00,0x06,0xd7,0x01,0x04,0x00,0x00] + +v_cvt_pk_i16_f32 v5, s105, s105 +// GFX12: encoding: [0x05,0x00,0x06,0xd7,0x69,0xd2,0x00,0x00] + +v_cvt_pk_i16_f32 v5, vcc_lo, ttmp15 +// GFX12: encoding: [0x05,0x00,0x06,0xd7,0x6a,0xf6,0x00,0x00] + +v_cvt_pk_i16_f32 v5, vcc_hi, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x06,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_cvt_pk_i16_f32 v5, ttmp15, src_scc +// GFX12: encoding: [0x05,0x00,0x06,0xd7,0x7b,0xfa,0x01,0x00] + +v_cvt_pk_i16_f32 v5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x06,0xd7,0x7d,0xe0,0x01,0x00] + +v_cvt_pk_i16_f32 v5, exec_lo, -1 +// GFX12: encoding: [0x05,0x00,0x06,0xd7,0x7e,0x82,0x01,0x00] + +v_cvt_pk_i16_f32 v5, |exec_hi|, null +// GFX12: encoding: [0x05,0x01,0x06,0xd7,0x7f,0xf8,0x00,0x00] + +v_cvt_pk_i16_f32 v5, null, exec_lo +// GFX12: encoding: [0x05,0x00,0x06,0xd7,0x7c,0xfc,0x00,0x00] + +v_cvt_pk_i16_f32 v5, -1, exec_hi +// GFX12: encoding: [0x05,0x00,0x06,0xd7,0xc1,0xfe,0x00,0x00] + +v_cvt_pk_i16_f32 v5, 0.5, -m0 +// GFX12: encoding: [0x05,0x00,0x06,0xd7,0xf0,0xfa,0x00,0x40] + +v_cvt_pk_i16_f32 v5, -src_scc, |vcc_lo| +// GFX12: encoding: [0x05,0x02,0x06,0xd7,0xfd,0xd4,0x00,0x20] + +v_cvt_pk_i16_f32 v255, -|0xaf123456|, -|vcc_hi| +// GFX12: encoding: [0xff,0x03,0x06,0xd7,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] + +v_cvt_pk_i16_i32 v5, v1, v2 +// GFX12: encoding: [0x05,0x00,0x24,0xd7,0x01,0x05,0x02,0x00] + +v_cvt_pk_i16_i32 v5, v255, v255 +// GFX12: encoding: [0x05,0x00,0x24,0xd7,0xff,0xff,0x03,0x00] + +v_cvt_pk_i16_i32 v5, s1, s2 +// GFX12: encoding: [0x05,0x00,0x24,0xd7,0x01,0x04,0x00,0x00] + +v_cvt_pk_i16_i32 v5, s105, s105 +// GFX12: encoding: [0x05,0x00,0x24,0xd7,0x69,0xd2,0x00,0x00] + +v_cvt_pk_i16_i32 v5, vcc_lo, ttmp15 +// GFX12: encoding: [0x05,0x00,0x24,0xd7,0x6a,0xf6,0x00,0x00] + +v_cvt_pk_i16_i32 v5, vcc_hi, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x24,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_cvt_pk_i16_i32 v5, ttmp15, src_scc +// GFX12: encoding: [0x05,0x00,0x24,0xd7,0x7b,0xfa,0x01,0x00] + +v_cvt_pk_i16_i32 v5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x24,0xd7,0x7d,0xe0,0x01,0x00] + +v_cvt_pk_i16_i32 v5, exec_lo, -1 +// GFX12: encoding: [0x05,0x00,0x24,0xd7,0x7e,0x82,0x01,0x00] + +v_cvt_pk_i16_i32 v5, exec_hi, null +// GFX12: encoding: [0x05,0x00,0x24,0xd7,0x7f,0xf8,0x00,0x00] + +v_cvt_pk_i16_i32 v5, null, exec_lo +// GFX12: encoding: [0x05,0x00,0x24,0xd7,0x7c,0xfc,0x00,0x00] + +v_cvt_pk_i16_i32 v5, -1, exec_hi +// GFX12: encoding: [0x05,0x00,0x24,0xd7,0xc1,0xfe,0x00,0x00] + +v_cvt_pk_i16_i32 v5, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x24,0xd7,0xf0,0xfa,0x00,0x00] + +v_cvt_pk_i16_i32 v5, src_scc, vcc_lo +// GFX12: encoding: [0x05,0x00,0x24,0xd7,0xfd,0xd4,0x00,0x00] + +v_cvt_pk_i16_i32 v255, 0xaf123456, vcc_hi +// GFX12: encoding: [0xff,0x00,0x24,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_cvt_pk_norm_i16_f16 v5, v1, v2 +// GFX12: encoding: [0x05,0x00,0x12,0xd7,0x01,0x05,0x02,0x00] + +v_cvt_pk_norm_i16_f16 v5, v255, v255 +// GFX12: encoding: [0x05,0x00,0x12,0xd7,0xff,0xff,0x03,0x00] + +v_cvt_pk_norm_i16_f16 v5, s1, s2 +// GFX12: encoding: [0x05,0x00,0x12,0xd7,0x01,0x04,0x00,0x00] + +v_cvt_pk_norm_i16_f16 v5, s105, s105 +// GFX12: encoding: [0x05,0x00,0x12,0xd7,0x69,0xd2,0x00,0x00] + +v_cvt_pk_norm_i16_f16 v5, vcc_lo, ttmp15 +// GFX12: encoding: [0x05,0x00,0x12,0xd7,0x6a,0xf6,0x00,0x00] + +v_cvt_pk_norm_i16_f16 v5, vcc_hi, 0xfe0b +// GFX12: encoding: [0x05,0x00,0x12,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_cvt_pk_norm_i16_f16 v5, ttmp15, src_scc +// GFX12: encoding: [0x05,0x00,0x12,0xd7,0x7b,0xfa,0x01,0x00] + +v_cvt_pk_norm_i16_f16 v5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x12,0xd7,0x7d,0xe0,0x01,0x00] + +v_cvt_pk_norm_i16_f16 v5, exec_lo, -1 +// GFX12: encoding: [0x05,0x00,0x12,0xd7,0x7e,0x82,0x01,0x00] + +v_cvt_pk_norm_i16_f16 v5, |exec_hi|, null +// GFX12: encoding: [0x05,0x01,0x12,0xd7,0x7f,0xf8,0x00,0x00] + +v_cvt_pk_norm_i16_f16 v5, null, exec_lo +// GFX12: encoding: [0x05,0x00,0x12,0xd7,0x7c,0xfc,0x00,0x00] + +v_cvt_pk_norm_i16_f16 v5, -1, exec_hi +// GFX12: encoding: [0x05,0x00,0x12,0xd7,0xc1,0xfe,0x00,0x00] + +v_cvt_pk_norm_i16_f16 v5, 0.5, -m0 op_sel:[0,0,0] +// GFX12: encoding: [0x05,0x00,0x12,0xd7,0xf0,0xfa,0x00,0x40] + +v_cvt_pk_norm_i16_f16 v5, -src_scc, |vcc_lo| op_sel:[1,0,0] +// GFX12: encoding: [0x05,0x0a,0x12,0xd7,0xfd,0xd4,0x00,0x20] + +v_cvt_pk_norm_i16_f16 v255, -|0xfe0b|, -|vcc_hi| op_sel:[0,1,0] +// GFX12: encoding: [0xff,0x13,0x12,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] + +v_cvt_pk_norm_u16_f16 v5, v1, v2 +// GFX12: encoding: [0x05,0x00,0x13,0xd7,0x01,0x05,0x02,0x00] + +v_cvt_pk_norm_u16_f16 v5, v255, v255 +// GFX12: encoding: [0x05,0x00,0x13,0xd7,0xff,0xff,0x03,0x00] + +v_cvt_pk_norm_u16_f16 v5, s1, s2 +// GFX12: encoding: [0x05,0x00,0x13,0xd7,0x01,0x04,0x00,0x00] + +v_cvt_pk_norm_u16_f16 v5, s105, s105 +// GFX12: encoding: [0x05,0x00,0x13,0xd7,0x69,0xd2,0x00,0x00] + +v_cvt_pk_norm_u16_f16 v5, vcc_lo, ttmp15 +// GFX12: encoding: [0x05,0x00,0x13,0xd7,0x6a,0xf6,0x00,0x00] + +v_cvt_pk_norm_u16_f16 v5, vcc_hi, 0xfe0b +// GFX12: encoding: [0x05,0x00,0x13,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_cvt_pk_norm_u16_f16 v5, ttmp15, src_scc +// GFX12: encoding: [0x05,0x00,0x13,0xd7,0x7b,0xfa,0x01,0x00] + +v_cvt_pk_norm_u16_f16 v5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x13,0xd7,0x7d,0xe0,0x01,0x00] + +v_cvt_pk_norm_u16_f16 v5, exec_lo, -1 +// GFX12: encoding: [0x05,0x00,0x13,0xd7,0x7e,0x82,0x01,0x00] + +v_cvt_pk_norm_u16_f16 v5, |exec_hi|, null +// GFX12: encoding: [0x05,0x01,0x13,0xd7,0x7f,0xf8,0x00,0x00] + +v_cvt_pk_norm_u16_f16 v5, null, exec_lo +// GFX12: encoding: [0x05,0x00,0x13,0xd7,0x7c,0xfc,0x00,0x00] + +v_cvt_pk_norm_u16_f16 v5, -1, exec_hi +// GFX12: encoding: [0x05,0x00,0x13,0xd7,0xc1,0xfe,0x00,0x00] + +v_cvt_pk_norm_u16_f16 v5, 0.5, -m0 op_sel:[0,0,0] +// GFX12: encoding: [0x05,0x00,0x13,0xd7,0xf0,0xfa,0x00,0x40] + +v_cvt_pk_norm_u16_f16 v5, -src_scc, |vcc_lo| op_sel:[1,0,0] +// GFX12: encoding: [0x05,0x0a,0x13,0xd7,0xfd,0xd4,0x00,0x20] + +v_cvt_pk_norm_u16_f16 v255, -|0xfe0b|, -|vcc_hi| op_sel:[0,1,0] +// GFX12: encoding: [0xff,0x13,0x13,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] + +v_cvt_pk_u16_f32 v5, v1, v2 +// GFX12: encoding: [0x05,0x00,0x07,0xd7,0x01,0x05,0x02,0x00] + +v_cvt_pk_u16_f32 v5, v255, v255 +// GFX12: encoding: [0x05,0x00,0x07,0xd7,0xff,0xff,0x03,0x00] + +v_cvt_pk_u16_f32 v5, s1, s2 +// GFX12: encoding: [0x05,0x00,0x07,0xd7,0x01,0x04,0x00,0x00] + +v_cvt_pk_u16_f32 v5, s105, s105 +// GFX12: encoding: [0x05,0x00,0x07,0xd7,0x69,0xd2,0x00,0x00] + +v_cvt_pk_u16_f32 v5, vcc_lo, ttmp15 +// GFX12: encoding: [0x05,0x00,0x07,0xd7,0x6a,0xf6,0x00,0x00] + +v_cvt_pk_u16_f32 v5, vcc_hi, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x07,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_cvt_pk_u16_f32 v5, ttmp15, src_scc +// GFX12: encoding: [0x05,0x00,0x07,0xd7,0x7b,0xfa,0x01,0x00] + +v_cvt_pk_u16_f32 v5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x07,0xd7,0x7d,0xe0,0x01,0x00] + +v_cvt_pk_u16_f32 v5, exec_lo, -1 +// GFX12: encoding: [0x05,0x00,0x07,0xd7,0x7e,0x82,0x01,0x00] + +v_cvt_pk_u16_f32 v5, |exec_hi|, null +// GFX12: encoding: [0x05,0x01,0x07,0xd7,0x7f,0xf8,0x00,0x00] + +v_cvt_pk_u16_f32 v5, null, exec_lo +// GFX12: encoding: [0x05,0x00,0x07,0xd7,0x7c,0xfc,0x00,0x00] + +v_cvt_pk_u16_f32 v5, -1, exec_hi +// GFX12: encoding: [0x05,0x00,0x07,0xd7,0xc1,0xfe,0x00,0x00] + +v_cvt_pk_u16_f32 v5, 0.5, -m0 +// GFX12: encoding: [0x05,0x00,0x07,0xd7,0xf0,0xfa,0x00,0x40] + +v_cvt_pk_u16_f32 v5, -src_scc, |vcc_lo| +// GFX12: encoding: [0x05,0x02,0x07,0xd7,0xfd,0xd4,0x00,0x20] + +v_cvt_pk_u16_f32 v255, -|0xaf123456|, -|vcc_hi| +// GFX12: encoding: [0xff,0x03,0x07,0xd7,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] + +v_cvt_pk_u16_u32 v5, v1, v2 +// GFX12: encoding: [0x05,0x00,0x23,0xd7,0x01,0x05,0x02,0x00] + +v_cvt_pk_u16_u32 v5, v255, v255 +// GFX12: encoding: [0x05,0x00,0x23,0xd7,0xff,0xff,0x03,0x00] + +v_cvt_pk_u16_u32 v5, s1, s2 +// GFX12: encoding: [0x05,0x00,0x23,0xd7,0x01,0x04,0x00,0x00] + +v_cvt_pk_u16_u32 v5, s105, s105 +// GFX12: encoding: [0x05,0x00,0x23,0xd7,0x69,0xd2,0x00,0x00] + +v_cvt_pk_u16_u32 v5, vcc_lo, ttmp15 +// GFX12: encoding: [0x05,0x00,0x23,0xd7,0x6a,0xf6,0x00,0x00] + +v_cvt_pk_u16_u32 v5, vcc_hi, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x23,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_cvt_pk_u16_u32 v5, ttmp15, src_scc +// GFX12: encoding: [0x05,0x00,0x23,0xd7,0x7b,0xfa,0x01,0x00] + +v_cvt_pk_u16_u32 v5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x23,0xd7,0x7d,0xe0,0x01,0x00] + +v_cvt_pk_u16_u32 v5, exec_lo, -1 +// GFX12: encoding: [0x05,0x00,0x23,0xd7,0x7e,0x82,0x01,0x00] + +v_cvt_pk_u16_u32 v5, exec_hi, null +// GFX12: encoding: [0x05,0x00,0x23,0xd7,0x7f,0xf8,0x00,0x00] + +v_cvt_pk_u16_u32 v5, null, exec_lo +// GFX12: encoding: [0x05,0x00,0x23,0xd7,0x7c,0xfc,0x00,0x00] + +v_cvt_pk_u16_u32 v5, -1, exec_hi +// GFX12: encoding: [0x05,0x00,0x23,0xd7,0xc1,0xfe,0x00,0x00] + +v_cvt_pk_u16_u32 v5, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x23,0xd7,0xf0,0xfa,0x00,0x00] + +v_cvt_pk_u16_u32 v5, src_scc, vcc_lo +// GFX12: encoding: [0x05,0x00,0x23,0xd7,0xfd,0xd4,0x00,0x00] + +v_cvt_pk_u16_u32 v255, 0xaf123456, vcc_hi +// GFX12: encoding: [0xff,0x00,0x23,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_cvt_pk_u8_f32 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x26,0xd6,0x01,0x05,0x0e,0x00] + +v_cvt_pk_u8_f32 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x26,0xd6,0xff,0x05,0xa4,0x01] + +v_cvt_pk_u8_f32 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x26,0xd6,0x01,0xfe,0xff,0x01] + +v_cvt_pk_u8_f32 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x26,0xd6,0x69,0xd2,0xf8,0x01] + +v_cvt_pk_u8_f32 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x26,0xd6,0x6a,0xf6,0x0c,0x04] + +v_cvt_pk_u8_f32 v5, vcc_hi, 0xaf123456, v255 +// GFX12: encoding: [0x05,0x00,0x26,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_cvt_pk_u8_f32 v5, ttmp15, src_scc, ttmp15 +// GFX12: encoding: [0x05,0x00,0x26,0xd6,0x7b,0xfa,0xed,0x01] + +v_cvt_pk_u8_f32 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x26,0xd6,0x7d,0xe0,0xf5,0x01] + +v_cvt_pk_u8_f32 v5, exec_lo, -1, vcc_hi +// GFX12: encoding: [0x05,0x00,0x26,0xd6,0x7e,0x82,0xad,0x01] + +v_cvt_pk_u8_f32 v5, exec_hi, null, vcc_lo +// GFX12: encoding: [0x05,0x00,0x26,0xd6,0x7f,0xf8,0xa8,0x01] + +v_cvt_pk_u8_f32 v5, null, exec_lo, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x26,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_cvt_pk_u8_f32 v5, -1, exec_hi, src_scc +// GFX12: encoding: [0x05,0x00,0x26,0xd6,0xc1,0xfe,0xf4,0x03] + +v_cvt_pk_u8_f32 v5, 0.5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x26,0xd6,0xf0,0xfa,0xc0,0x03] + +v_cvt_pk_u8_f32 v5, src_scc, vcc_lo, -1 +// GFX12: encoding: [0x05,0x00,0x26,0xd6,0xfd,0xd4,0x04,0x03] + +v_cvt_pk_u8_f32 v255, -|0xaf123456|, vcc_hi, null +// GFX12: encoding: [0xff,0x01,0x26,0xd6,0xff,0xd6,0xf0,0x21,0x56,0x34,0x12,0xaf] + +v_cvt_pk_norm_i16_f16 v5, v1, v2 +// GFX12: encoding: [0x05,0x00,0x12,0xd7,0x01,0x05,0x02,0x00] + +v_cvt_pk_norm_i16_f16 v5, v255, v255 +// GFX12: encoding: [0x05,0x00,0x12,0xd7,0xff,0xff,0x03,0x00] + +v_cvt_pk_norm_i16_f16 v5, s1, s2 +// GFX12: encoding: [0x05,0x00,0x12,0xd7,0x01,0x04,0x00,0x00] + +v_cvt_pk_norm_i16_f16 v5, s105, s105 +// GFX12: encoding: [0x05,0x00,0x12,0xd7,0x69,0xd2,0x00,0x00] + +v_cvt_pk_norm_i16_f16 v5, vcc_lo, ttmp15 +// GFX12: encoding: [0x05,0x00,0x12,0xd7,0x6a,0xf6,0x00,0x00] + +v_cvt_pk_norm_i16_f16 v5, vcc_hi, 0xfe0b +// GFX12: encoding: [0x05,0x00,0x12,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_cvt_pk_norm_i16_f16 v5, ttmp15, src_scc +// GFX12: encoding: [0x05,0x00,0x12,0xd7,0x7b,0xfa,0x01,0x00] + +v_cvt_pk_norm_i16_f16 v5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x12,0xd7,0x7d,0xe0,0x01,0x00] + +v_cvt_pk_norm_i16_f16 v5, exec_lo, -1 +// GFX12: encoding: [0x05,0x00,0x12,0xd7,0x7e,0x82,0x01,0x00] + +v_cvt_pk_norm_i16_f16 v5, |exec_hi|, null +// GFX12: encoding: [0x05,0x01,0x12,0xd7,0x7f,0xf8,0x00,0x00] + +v_cvt_pk_norm_i16_f16 v5, null, exec_lo +// GFX12: encoding: [0x05,0x00,0x12,0xd7,0x7c,0xfc,0x00,0x00] + +v_cvt_pk_norm_i16_f16 v5, -1, exec_hi +// GFX12: encoding: [0x05,0x00,0x12,0xd7,0xc1,0xfe,0x00,0x00] + +v_cvt_pk_norm_i16_f16 v5, 0.5, -m0 op_sel:[0,0,0] +// GFX12: encoding: [0x05,0x00,0x12,0xd7,0xf0,0xfa,0x00,0x40] + +v_cvt_pk_norm_i16_f16 v5, -src_scc, |vcc_lo| op_sel:[1,0,0] +// GFX12: encoding: [0x05,0x0a,0x12,0xd7,0xfd,0xd4,0x00,0x20] + +v_cvt_pk_norm_i16_f16 v255, -|0xfe0b|, -|vcc_hi| op_sel:[0,1,0] +// GFX12: encoding: [0xff,0x13,0x12,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] + +v_cvt_pk_norm_i16_f32 v5, v1, v2 +// GFX12: encoding: [0x05,0x00,0x21,0xd7,0x01,0x05,0x02,0x00] + +v_cvt_pk_norm_i16_f32 v5, v255, v255 +// GFX12: encoding: [0x05,0x00,0x21,0xd7,0xff,0xff,0x03,0x00] + +v_cvt_pk_norm_i16_f32 v5, s1, s2 +// GFX12: encoding: [0x05,0x00,0x21,0xd7,0x01,0x04,0x00,0x00] + +v_cvt_pk_norm_i16_f32 v5, s105, s105 +// GFX12: encoding: [0x05,0x00,0x21,0xd7,0x69,0xd2,0x00,0x00] + +v_cvt_pk_norm_i16_f32 v5, vcc_lo, ttmp15 +// GFX12: encoding: [0x05,0x00,0x21,0xd7,0x6a,0xf6,0x00,0x00] + +v_cvt_pk_norm_i16_f32 v5, vcc_hi, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x21,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_cvt_pk_norm_i16_f32 v5, ttmp15, src_scc +// GFX12: encoding: [0x05,0x00,0x21,0xd7,0x7b,0xfa,0x01,0x00] + +v_cvt_pk_norm_i16_f32 v5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x21,0xd7,0x7d,0xe0,0x01,0x00] + +v_cvt_pk_norm_i16_f32 v5, exec_lo, -1 +// GFX12: encoding: [0x05,0x00,0x21,0xd7,0x7e,0x82,0x01,0x00] + +v_cvt_pk_norm_i16_f32 v5, |exec_hi|, null +// GFX12: encoding: [0x05,0x01,0x21,0xd7,0x7f,0xf8,0x00,0x00] + +v_cvt_pk_norm_i16_f32 v5, null, exec_lo +// GFX12: encoding: [0x05,0x00,0x21,0xd7,0x7c,0xfc,0x00,0x00] + +v_cvt_pk_norm_i16_f32 v5, -1, exec_hi +// GFX12: encoding: [0x05,0x00,0x21,0xd7,0xc1,0xfe,0x00,0x00] + +v_cvt_pk_norm_i16_f32 v5, 0.5, -m0 +// GFX12: encoding: [0x05,0x00,0x21,0xd7,0xf0,0xfa,0x00,0x40] + +v_cvt_pk_norm_i16_f32 v5, -src_scc, |vcc_lo| +// GFX12: encoding: [0x05,0x02,0x21,0xd7,0xfd,0xd4,0x00,0x20] + +v_cvt_pk_norm_i16_f32 v255, -|0xaf123456|, -|vcc_hi| +// GFX12: encoding: [0xff,0x03,0x21,0xd7,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] + +v_cvt_pk_norm_u16_f16 v5, v1, v2 +// GFX12: encoding: [0x05,0x00,0x13,0xd7,0x01,0x05,0x02,0x00] + +v_cvt_pk_norm_u16_f16 v5, v255, v255 +// GFX12: encoding: [0x05,0x00,0x13,0xd7,0xff,0xff,0x03,0x00] + +v_cvt_pk_norm_u16_f16 v5, s1, s2 +// GFX12: encoding: [0x05,0x00,0x13,0xd7,0x01,0x04,0x00,0x00] + +v_cvt_pk_norm_u16_f16 v5, s105, s105 +// GFX12: encoding: [0x05,0x00,0x13,0xd7,0x69,0xd2,0x00,0x00] + +v_cvt_pk_norm_u16_f16 v5, vcc_lo, ttmp15 +// GFX12: encoding: [0x05,0x00,0x13,0xd7,0x6a,0xf6,0x00,0x00] + +v_cvt_pk_norm_u16_f16 v5, vcc_hi, 0xfe0b +// GFX12: encoding: [0x05,0x00,0x13,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_cvt_pk_norm_u16_f16 v5, ttmp15, src_scc +// GFX12: encoding: [0x05,0x00,0x13,0xd7,0x7b,0xfa,0x01,0x00] + +v_cvt_pk_norm_u16_f16 v5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x13,0xd7,0x7d,0xe0,0x01,0x00] + +v_cvt_pk_norm_u16_f16 v5, exec_lo, -1 +// GFX12: encoding: [0x05,0x00,0x13,0xd7,0x7e,0x82,0x01,0x00] + +v_cvt_pk_norm_u16_f16 v5, |exec_hi|, null +// GFX12: encoding: [0x05,0x01,0x13,0xd7,0x7f,0xf8,0x00,0x00] + +v_cvt_pk_norm_u16_f16 v5, null, exec_lo +// GFX12: encoding: [0x05,0x00,0x13,0xd7,0x7c,0xfc,0x00,0x00] + +v_cvt_pk_norm_u16_f16 v5, -1, exec_hi +// GFX12: encoding: [0x05,0x00,0x13,0xd7,0xc1,0xfe,0x00,0x00] + +v_cvt_pk_norm_u16_f16 v5, 0.5, -m0 op_sel:[0,0,0] +// GFX12: encoding: [0x05,0x00,0x13,0xd7,0xf0,0xfa,0x00,0x40] + +v_cvt_pk_norm_u16_f16 v5, -src_scc, |vcc_lo| op_sel:[1,0,0] +// GFX12: encoding: [0x05,0x0a,0x13,0xd7,0xfd,0xd4,0x00,0x20] + +v_cvt_pk_norm_u16_f16 v255, -|0xfe0b|, -|vcc_hi| op_sel:[0,1,0] +// GFX12: encoding: [0xff,0x13,0x13,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] + +v_cvt_pk_norm_u16_f32 v5, v1, v2 +// GFX12: encoding: [0x05,0x00,0x22,0xd7,0x01,0x05,0x02,0x00] + +v_cvt_pk_norm_u16_f32 v5, v255, v255 +// GFX12: encoding: [0x05,0x00,0x22,0xd7,0xff,0xff,0x03,0x00] + +v_cvt_pk_norm_u16_f32 v5, s1, s2 +// GFX12: encoding: [0x05,0x00,0x22,0xd7,0x01,0x04,0x00,0x00] + +v_cvt_pk_norm_u16_f32 v5, s105, s105 +// GFX12: encoding: [0x05,0x00,0x22,0xd7,0x69,0xd2,0x00,0x00] + +v_cvt_pk_norm_u16_f32 v5, vcc_lo, ttmp15 +// GFX12: encoding: [0x05,0x00,0x22,0xd7,0x6a,0xf6,0x00,0x00] + +v_cvt_pk_norm_u16_f32 v5, vcc_hi, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x22,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_cvt_pk_norm_u16_f32 v5, ttmp15, src_scc +// GFX12: encoding: [0x05,0x00,0x22,0xd7,0x7b,0xfa,0x01,0x00] + +v_cvt_pk_norm_u16_f32 v5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x22,0xd7,0x7d,0xe0,0x01,0x00] + +v_cvt_pk_norm_u16_f32 v5, exec_lo, -1 +// GFX12: encoding: [0x05,0x00,0x22,0xd7,0x7e,0x82,0x01,0x00] + +v_cvt_pk_norm_u16_f32 v5, |exec_hi|, null +// GFX12: encoding: [0x05,0x01,0x22,0xd7,0x7f,0xf8,0x00,0x00] + +v_cvt_pk_norm_u16_f32 v5, null, exec_lo +// GFX12: encoding: [0x05,0x00,0x22,0xd7,0x7c,0xfc,0x00,0x00] + +v_cvt_pk_norm_u16_f32 v5, -1, exec_hi +// GFX12: encoding: [0x05,0x00,0x22,0xd7,0xc1,0xfe,0x00,0x00] + +v_cvt_pk_norm_u16_f32 v5, 0.5, -m0 +// GFX12: encoding: [0x05,0x00,0x22,0xd7,0xf0,0xfa,0x00,0x40] + +v_cvt_pk_norm_u16_f32 v5, -src_scc, |vcc_lo| +// GFX12: encoding: [0x05,0x02,0x22,0xd7,0xfd,0xd4,0x00,0x20] + +v_cvt_pk_norm_u16_f32 v255, -|0xaf123456|, -|vcc_hi| +// GFX12: encoding: [0xff,0x03,0x22,0xd7,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] + +v_div_fixup_f16 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x54,0xd6,0x01,0x05,0x0e,0x00] + +v_div_fixup_f16 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x54,0xd6,0xff,0x05,0xa4,0x01] + +v_div_fixup_f16 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x54,0xd6,0x01,0xfe,0xff,0x01] + +v_div_fixup_f16 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x54,0xd6,0x69,0xd2,0xf8,0x01] + +v_div_fixup_f16 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x54,0xd6,0x6a,0xf6,0x0c,0x04] + +v_div_fixup_f16 v5, vcc_hi, 0xfe0b, v255 +// GFX12: encoding: [0x05,0x00,0x54,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +v_div_fixup_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX12: encoding: [0x05,0x07,0x54,0xd6,0x7b,0xfa,0xed,0xe1] + +v_div_fixup_f16 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x54,0xd6,0x7d,0xe0,0xf5,0x01] + +v_div_fixup_f16 v5, |exec_lo|, -1, vcc_hi +// GFX12: encoding: [0x05,0x01,0x54,0xd6,0x7e,0x82,0xad,0x01] + +v_div_fixup_f16 v5, -|exec_hi|, null, -|vcc_lo| op_sel:[1,1,1,1] +// GFX12: encoding: [0x05,0x7d,0x54,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_div_fixup_f16 v5, null, exec_lo, -|0xfe0b| op_sel:[0,0,0,0] +// GFX12: encoding: [0x05,0x04,0x54,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] + +v_div_fixup_f16 v5, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] +// GFX12: encoding: [0x05,0x0e,0x54,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_div_fixup_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] +// GFX12: encoding: [0x05,0x10,0x54,0xd6,0xf0,0xfa,0xc0,0x43] + +v_div_fixup_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] +// GFX12: encoding: [0x05,0x22,0x54,0xd6,0xfd,0xd4,0x04,0x23] + +v_div_fixup_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp +// GFX12: encoding: [0xff,0xc3,0x54,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] + +v_div_fixup_f32 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x27,0xd6,0x01,0x05,0x0e,0x00] + +v_div_fixup_f32 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x27,0xd6,0xff,0x05,0xa4,0x01] + +v_div_fixup_f32 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x27,0xd6,0x01,0xfe,0xff,0x01] + +v_div_fixup_f32 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x27,0xd6,0x69,0xd2,0xf8,0x01] + +v_div_fixup_f32 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x27,0xd6,0x6a,0xf6,0x0c,0x04] + +v_div_fixup_f32 v5, vcc_hi, 0xaf123456, v255 +// GFX12: encoding: [0x05,0x00,0x27,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_div_fixup_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX12: encoding: [0x05,0x07,0x27,0xd6,0x7b,0xfa,0xed,0xe1] + +v_div_fixup_f32 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x27,0xd6,0x7d,0xe0,0xf5,0x01] + +v_div_fixup_f32 v5, |exec_lo|, -1, vcc_hi +// GFX12: encoding: [0x05,0x01,0x27,0xd6,0x7e,0x82,0xad,0x01] + +v_div_fixup_f32 v5, -|exec_hi|, null, -|vcc_lo| +// GFX12: encoding: [0x05,0x05,0x27,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_div_fixup_f32 v5, null, exec_lo, -|0xaf123456| +// GFX12: encoding: [0x05,0x04,0x27,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] + +v_div_fixup_f32 v5, -1, -|exec_hi|, -|src_scc| +// GFX12: encoding: [0x05,0x06,0x27,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_div_fixup_f32 v5, 0.5, -m0, 0.5 mul:2 +// GFX12: encoding: [0x05,0x00,0x27,0xd6,0xf0,0xfa,0xc0,0x4b] + +v_div_fixup_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 +// GFX12: encoding: [0x05,0x02,0x27,0xd6,0xfd,0xd4,0x04,0x33] + +v_div_fixup_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 +// GFX12: encoding: [0xff,0x83,0x27,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] + +v_div_fixup_f64 v[5:6], v[1:2], v[2:3], v[3:4] +// GFX12: encoding: [0x05,0x00,0x28,0xd6,0x01,0x05,0x0e,0x04] + +v_div_fixup_f64 v[5:6], v[254:255], v[254:255], s[6:7] +// GFX12: encoding: [0x05,0x00,0x28,0xd6,0xfe,0xfd,0x1b,0x00] + +v_div_fixup_f64 v[5:6], s[2:3], s[4:5], v[254:255] +// GFX12: encoding: [0x05,0x00,0x28,0xd6,0x02,0x08,0xf8,0x07] + +v_div_fixup_f64 v[5:6], -|s[104:105]|, s[104:105], -|s[104:105]| +// GFX12: encoding: [0x05,0x05,0x28,0xd6,0x68,0xd0,0xa0,0xa1] + +v_div_fixup_f64 v[5:6], vcc, -|ttmp[14:15]|, -|ttmp[14:15]| +// GFX12: encoding: [0x05,0x06,0x28,0xd6,0x6a,0xf4,0xe8,0xc1] + +v_div_fixup_f64 v[5:6], -|ttmp[14:15]|, 0xaf123456, null +// GFX12: encoding: [0x05,0x01,0x28,0xd6,0x7a,0xfe,0xf1,0x21,0x56,0x34,0x12,0xaf] + +v_div_fixup_f64 v[5:6], -|exec|, -|src_scc|, -|exec| +// GFX12: encoding: [0x05,0x07,0x28,0xd6,0x7e,0xfa,0xf9,0xe1] + +v_div_fixup_f64 v[5:6], null, 0.5, vcc +// GFX12: encoding: [0x05,0x00,0x28,0xd6,0x7c,0xe0,0xa9,0x01] + +v_div_fixup_f64 v[5:6], -1, -1, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x28,0xd6,0xc1,0x82,0xfd,0x03,0x56,0x34,0x12,0xaf] + +v_div_fixup_f64 v[5:6], 0.5, null, -|src_scc| mul:2 +// GFX12: encoding: [0x05,0x04,0x28,0xd6,0xf0,0xf8,0xf4,0x8b] + +v_div_fixup_f64 v[5:6], -|src_scc|, -|exec|, 0.5 mul:4 +// GFX12: encoding: [0x05,0x03,0x28,0xd6,0xfd,0xfc,0xc0,0x73] + +v_div_fixup_f64 v[254:255], 0xaf123456, -|vcc|, -1 clamp div:2 +// GFX12: encoding: [0xfe,0x82,0x28,0xd6,0xff,0xd4,0x04,0x5b,0x56,0x34,0x12,0xaf] + +v_div_fmas_f32 v5, vcc_lo, v2, vcc_lo +// W32: encoding: [0x05,0x00,0x37,0xd6,0x6a,0x04,0xaa,0x01] + +v_div_fmas_f32 v5, ttmp15, ttmp15, ttmp15 +// W32: encoding: [0x05,0x00,0x37,0xd6,0x7b,0xf6,0xec,0x01] + +v_div_fmas_f32 v5, -|m0|, -|v255|, v3 +// W32: encoding: [0x05,0x03,0x37,0xd6,0x7d,0xfe,0x0f,0x64] + +v_div_fmas_f32 v5, -|exec_lo|, -|exec_lo|, -|exec_lo| +// W32: encoding: [0x05,0x07,0x37,0xd6,0x7e,0xfc,0xf8,0xe1] + +v_div_fmas_f32 v5, -|exec_hi|, 0.5, -|v255| +// W32: encoding: [0x05,0x05,0x37,0xd6,0x7f,0xe0,0xfd,0xa7] + +v_div_fmas_f32 v5, null, exec_hi, -|exec_hi| +// W32: encoding: [0x05,0x04,0x37,0xd6,0x7c,0xfe,0xfc,0x81] + +v_div_fmas_f32 v5, -1, -|m0|, -|m0| +// W32: encoding: [0x05,0x06,0x37,0xd6,0xc1,0xfa,0xf4,0xc1] + +v_div_fmas_f32 v5, 0.5, -|vcc_lo|, 0.5 mul:2 +// W32: encoding: [0x05,0x02,0x37,0xd6,0xf0,0xd4,0xc0,0x4b] + +v_div_fmas_f32 v5, vcc_lo, v2, v3 +// W64: encoding: [0x05,0x00,0x37,0xd6,0x6a,0x04,0x0e,0x04] + +v_div_fmas_f32 v5, vcc_hi, v255, vcc_hi +// W64: encoding: [0x05,0x00,0x37,0xd6,0x6b,0xfe,0xaf,0x01] + +v_div_fmas_f32 v5, -|ttmp15|, -|ttmp15|, ttmp15 +// W64: encoding: [0x05,0x03,0x37,0xd6,0x7b,0xf6,0xec,0x61] + +v_div_fmas_f32 v5, m0, 0.5, v255 +// W64: encoding: [0x05,0x00,0x37,0xd6,0x7d,0xe0,0xfd,0x07] + +v_div_fmas_f32 v5, -|exec_lo|, exec_lo, -|exec_lo| +// W64: encoding: [0x05,0x05,0x37,0xd6,0x7e,0xfc,0xf8,0xa1] + +v_div_fmas_f32 v5, -|exec_hi|, -|exec_hi|, -|exec_hi| +// W64: encoding: [0x05,0x07,0x37,0xd6,0x7f,0xfe,0xfc,0xe1] + +v_div_fmas_f32 v5, null, m0, -|m0| +// W64: encoding: [0x05,0x04,0x37,0xd6,0x7c,0xfa,0xf4,0x81] + +v_div_fmas_f32 v5, -1, -|vcc_lo|, -|vcc_lo| +// W64: encoding: [0x05,0x06,0x37,0xd6,0xc1,0xd4,0xa8,0xc1] + +v_div_fmas_f32 v5, 0.5, -|vcc_hi|, 0.5 mul:2 +// W64: encoding: [0x05,0x02,0x37,0xd6,0xf0,0xd6,0xc0,0x4b] + +v_div_fmas_f32 v5, v1, 0xaf123456, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x37,0xd6,0x01,0xff,0xfd,0x03,0x56,0x34,0x12,0xaf] + +v_div_fmas_f32 v5, v255, src_scc, src_scc +// GFX12: encoding: [0x05,0x00,0x37,0xd6,0xff,0xfb,0xf5,0x03] + +v_div_fmas_f32 v5, s105, s105, s105 +// GFX12: encoding: [0x05,0x00,0x37,0xd6,0x69,0xd2,0xa4,0x01] + +v_div_fmas_f32 v5, src_scc, -1, -1 mul:4 +// GFX12: encoding: [0x05,0x00,0x37,0xd6,0xfd,0x82,0x05,0x13] + +v_div_fmas_f32 v255, -|0xaf123456|, null, null clamp div:2 +// GFX12: encoding: [0xff,0x81,0x37,0xd6,0xff,0xf8,0xf0,0x39,0x56,0x34,0x12,0xaf] + +v_div_fmas_f64 v[5:6], v[1:2], 0xaf123456, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x38,0xd6,0x01,0xff,0xfd,0x03,0x56,0x34,0x12,0xaf] + +v_div_fmas_f64 v[5:6], v[254:255], src_scc, v[3:4] +// GFX12: encoding: [0x05,0x00,0x38,0xd6,0xfe,0xfb,0x0d,0x04] + +v_div_fmas_f64 v[5:6], s[104:105], |s[104:105]|, s[104:105] +// GFX12: encoding: [0x05,0x02,0x38,0xd6,0x68,0xd0,0xa0,0x01] + +v_div_fmas_f64 v[5:6], -|vcc|, v[2:3], -|v[254:255]| +// GFX12: encoding: [0x05,0x05,0x38,0xd6,0x6a,0x04,0xfa,0xa7] + +v_div_fmas_f64 v[5:6], -|ttmp[14:15]|, -|ttmp[14:15]|, -|ttmp[14:15]| +// GFX12: encoding: [0x05,0x07,0x38,0xd6,0x7a,0xf4,0xe8,0xe1] + +v_div_fmas_f64 v[5:6], -|exec|, -|v[254:255]|, null +// GFX12: encoding: [0x05,0x03,0x38,0xd6,0x7e,0xfc,0xf3,0x61] + +v_div_fmas_f64 v[5:6], null, 0.5, -src_scc +// GFX12: encoding: [0x05,0x00,0x38,0xd6,0x7c,0xe0,0xf5,0x83] + +v_div_fmas_f64 v[5:6], -1, -exec, |exec| +// GFX12: encoding: [0x05,0x04,0x38,0xd6,0xc1,0xfc,0xf8,0x41] + +v_div_fmas_f64 v[5:6], 0.5, -|vcc|, -|vcc| mul:2 +// GFX12: encoding: [0x05,0x06,0x38,0xd6,0xf0,0xd4,0xa8,0xc9] + +v_div_fmas_f64 v[5:6], -|src_scc|, -1, 0.5 mul:4 +// GFX12: encoding: [0x05,0x01,0x38,0xd6,0xfd,0x82,0xc1,0x33] + +v_div_fmas_f64 v[254:255], 0xaf123456, null, -1 clamp div:2 +// GFX12: encoding: [0xfe,0x80,0x38,0xd6,0xff,0xf8,0x04,0x1b,0x56,0x34,0x12,0xaf] + +v_div_scale_f32 v5, vcc_lo, v1, v2, s3 +// W32: encoding: [0x05,0x6a,0xfc,0xd6,0x01,0x05,0x0e,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc_lo, v255, s2, s105 +// W32: encoding: [0x05,0x6a,0xfc,0xd6,0xff,0x05,0xa4,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc_lo, s1, v255, exec_hi +// W32: encoding: [0x05,0x6a,0xfc,0xd6,0x01,0xfe,0xff,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc_lo, s105, s105, exec_lo +// W32: encoding: [0x05,0x6a,0xfc,0xd6,0x69,0xd2,0xf8,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc_lo, vcc_lo, ttmp15, v3 +// W32: encoding: [0x05,0x6a,0xfc,0xd6,0x6a,0xf6,0x0c,0x04] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc_lo, vcc_hi, 0xaf123456, v255 +// W32: encoding: [0x05,0x6a,0xfc,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc_lo, -ttmp15, -src_scc, -ttmp15 +// W32: encoding: [0x05,0x6a,0xfc,0xd6,0x7b,0xfa,0xed,0xe1] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc_lo, m0, 0.5, m0 +// W32: encoding: [0x05,0x6a,0xfc,0xd6,0x7d,0xe0,0xf5,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc_lo, exec_lo, -1, vcc_hi +// W32: encoding: [0x05,0x6a,0xfc,0xd6,0x7e,0x82,0xad,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc_lo, -exec_hi, null, -vcc_lo +// W32: encoding: [0x05,0x6a,0xfc,0xd6,0x7f,0xf8,0xa8,0xa1] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc_lo, null, exec_lo, neg(0xaf123456) +// W32: encoding: [0x05,0x6a,0xfc,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc_lo, -1, -exec_hi, -src_scc +// W32: encoding: [0x05,0x6a,0xfc,0xd6,0xc1,0xfe,0xf4,0xc3] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc_lo, 0.5, -m0, 0.5 mul:2 +// W32: encoding: [0x05,0x6a,0xfc,0xd6,0xf0,0xfa,0xc0,0x4b] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc_lo, -src_scc, vcc_lo, -1 mul:4 +// W32: encoding: [0x05,0x6a,0xfc,0xd6,0xfd,0xd4,0x04,0x33] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v255, vcc_lo, neg(0xaf123456), -vcc_hi, null clamp div:2 +// W32: encoding: [0xff,0xea,0xfc,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc, v1, v2, s3 +// W64: encoding: [0x05,0x6a,0xfc,0xd6,0x01,0x05,0x0e,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc, v255, s2, s105 +// W64: encoding: [0x05,0x6a,0xfc,0xd6,0xff,0x05,0xa4,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc, s1, v255, exec_hi +// W64: encoding: [0x05,0x6a,0xfc,0xd6,0x01,0xfe,0xff,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc, s105, s105, exec_lo +// W64: encoding: [0x05,0x6a,0xfc,0xd6,0x69,0xd2,0xf8,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc, vcc_lo, ttmp15, v3 +// W64: encoding: [0x05,0x6a,0xfc,0xd6,0x6a,0xf6,0x0c,0x04] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc, vcc_hi, 0xaf123456, v255 +// W64: encoding: [0x05,0x6a,0xfc,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc, -ttmp15, -src_scc, -ttmp15 +// W64: encoding: [0x05,0x6a,0xfc,0xd6,0x7b,0xfa,0xed,0xe1] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc, m0, 0.5, m0 +// W64: encoding: [0x05,0x6a,0xfc,0xd6,0x7d,0xe0,0xf5,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc, exec_lo, -1, vcc_hi +// W64: encoding: [0x05,0x6a,0xfc,0xd6,0x7e,0x82,0xad,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc, -exec_hi, null, -vcc_lo +// W64: encoding: [0x05,0x6a,0xfc,0xd6,0x7f,0xf8,0xa8,0xa1] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc, null, exec_lo, neg(0xaf123456) +// W64: encoding: [0x05,0x6a,0xfc,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc, -1, -exec_hi, -src_scc +// W64: encoding: [0x05,0x6a,0xfc,0xd6,0xc1,0xfe,0xf4,0xc3] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc, 0.5, -m0, 0.5 mul:2 +// W64: encoding: [0x05,0x6a,0xfc,0xd6,0xf0,0xfa,0xc0,0x4b] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc, -src_scc, vcc_lo, -1 mul:4 +// W64: encoding: [0x05,0x6a,0xfc,0xd6,0xfd,0xd4,0x04,0x33] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v255, vcc, neg(0xaf123456), -vcc_hi, null clamp div:2 +// W64: encoding: [0xff,0xea,0xfc,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f64 v[5:6], vcc_lo, v[1:2], v[2:3], v[3:4] +// W32: encoding: [0x05,0x6a,0xfd,0xd6,0x01,0x05,0x0e,0x04] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f64 v[5:6], vcc_lo, v[254:255], v[254:255], s[6:7] +// W32: encoding: [0x05,0x6a,0xfd,0xd6,0xfe,0xfd,0x1b,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f64 v[5:6], vcc_lo, s[2:3], s[4:5], v[254:255] +// W32: encoding: [0x05,0x6a,0xfd,0xd6,0x02,0x08,0xf8,0x07] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f64 v[5:6], vcc_lo, -s[104:105], s[104:105], -s[104:105] +// W32: encoding: [0x05,0x6a,0xfd,0xd6,0x68,0xd0,0xa0,0xa1] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f64 v[5:6], vcc_lo, vcc, -ttmp[14:15], -ttmp[14:15] +// W32: encoding: [0x05,0x6a,0xfd,0xd6,0x6a,0xf4,0xe8,0xc1] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f64 v[5:6], vcc_lo, -ttmp[14:15], 0xaf123456, null +// W32: encoding: [0x05,0x6a,0xfd,0xd6,0x7a,0xfe,0xf1,0x21,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f64 v[5:6], vcc_lo, -exec, -src_scc, -exec +// W32: encoding: [0x05,0x6a,0xfd,0xd6,0x7e,0xfa,0xf9,0xe1] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f64 v[5:6], vcc_lo, null, 0.5, vcc +// W32: encoding: [0x05,0x6a,0xfd,0xd6,0x7c,0xe0,0xa9,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f64 v[5:6], vcc_lo, -1, -1, 0xaf123456 +// W32: encoding: [0x05,0x6a,0xfd,0xd6,0xc1,0x82,0xfd,0x03,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f64 v[5:6], vcc_lo, 0.5, null, -src_scc mul:2 +// W32: encoding: [0x05,0x6a,0xfd,0xd6,0xf0,0xf8,0xf4,0x8b] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f64 v[5:6], vcc_lo, -src_scc, -exec, 0.5 mul:4 +// W32: encoding: [0x05,0x6a,0xfd,0xd6,0xfd,0xfc,0xc0,0x73] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f64 v[254:255], vcc_lo, 0xaf123456, -vcc, -1 clamp div:2 +// W32: encoding: [0xfe,0xea,0xfd,0xd6,0xff,0xd4,0x04,0x5b,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f64 v[5:6], vcc, v[1:2], v[2:3], v[3:4] +// W64: encoding: [0x05,0x6a,0xfd,0xd6,0x01,0x05,0x0e,0x04] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f64 v[5:6], vcc, v[254:255], v[254:255], s[6:7] +// W64: encoding: [0x05,0x6a,0xfd,0xd6,0xfe,0xfd,0x1b,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f64 v[5:6], vcc, s[2:3], s[4:5], v[254:255] +// W64: encoding: [0x05,0x6a,0xfd,0xd6,0x02,0x08,0xf8,0x07] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f64 v[5:6], vcc, -s[104:105], s[104:105], -s[104:105] +// W64: encoding: [0x05,0x6a,0xfd,0xd6,0x68,0xd0,0xa0,0xa1] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f64 v[5:6], vcc, vcc, -ttmp[14:15], -ttmp[14:15] +// W64: encoding: [0x05,0x6a,0xfd,0xd6,0x6a,0xf4,0xe8,0xc1] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f64 v[5:6], vcc, -ttmp[14:15], 0xaf123456, null +// W64: encoding: [0x05,0x6a,0xfd,0xd6,0x7a,0xfe,0xf1,0x21,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f64 v[5:6], vcc, -exec, -src_scc, -exec +// W64: encoding: [0x05,0x6a,0xfd,0xd6,0x7e,0xfa,0xf9,0xe1] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f64 v[5:6], vcc, null, 0.5, vcc +// W64: encoding: [0x05,0x6a,0xfd,0xd6,0x7c,0xe0,0xa9,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f64 v[5:6], vcc, -1, -1, 0xaf123456 +// W64: encoding: [0x05,0x6a,0xfd,0xd6,0xc1,0x82,0xfd,0x03,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f64 v[5:6], vcc, 0.5, null, -src_scc mul:2 +// W64: encoding: [0x05,0x6a,0xfd,0xd6,0xf0,0xf8,0xf4,0x8b] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f64 v[5:6], vcc, -src_scc, -exec, 0.5 mul:4 +// W64: encoding: [0x05,0x6a,0xfd,0xd6,0xfd,0xfc,0xc0,0x73] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f64 v[254:255], vcc, 0xaf123456, -vcc, -1 clamp div:2 +// W64: encoding: [0xfe,0xea,0xfd,0xd6,0xff,0xd4,0x04,0x5b,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_dot2_bf16_bf16 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x67,0xd6,0x01,0x05,0x0e,0x00] + +v_dot2_bf16_bf16 v5, v255, v255, s105 +// GFX12: encoding: [0x05,0x00,0x67,0xd6,0xff,0xff,0xa7,0x01] + +v_dot2_bf16_bf16 v5, s1, s2, v3 +// GFX12: encoding: [0x05,0x00,0x67,0xd6,0x01,0x04,0x0c,0x04] + +v_dot2_bf16_bf16 v5, s105, s105, m0 +// GFX12: encoding: [0x05,0x00,0x67,0xd6,0x69,0xd2,0xf4,0x01] + +v_dot2_bf16_bf16 v5, vcc_lo, ttmp15, v255 +// GFX12: encoding: [0x05,0x00,0x67,0xd6,0x6a,0xf6,0xfc,0x07] + +v_dot2_bf16_bf16 v5, vcc_hi, 0xfe0b, vcc_hi +// GFX12: encoding: [0x05,0x00,0x67,0xd6,0x6b,0xfe,0xad,0x01,0x0b,0xfe,0x00,0x00] + +v_dot2_bf16_bf16 v5, ttmp15, src_scc, ttmp15 +// GFX12: encoding: [0x05,0x00,0x67,0xd6,0x7b,0xfa,0xed,0x01] + +v_dot2_bf16_bf16 v5, |m0|, -1, -vcc_lo +// GFX12: encoding: [0x05,0x01,0x67,0xd6,0x7d,0x82,0xa9,0x81] + +v_dot2_bf16_bf16 v5, -|exec_lo|, null, -|0xfe0b| +// GFX12: encoding: [0x05,0x05,0x67,0xd6,0x7e,0xf8,0xfc,0xa3,0x0b,0xfe,0x00,0x00] + +v_dot2_bf16_bf16 v5, -|exec_hi|, -|exec_lo|, -|exec_lo| +// GFX12: encoding: [0x05,0x07,0x67,0xd6,0x7f,0xfc,0xf8,0xe1] + +v_dot2_bf16_bf16 v5, null, -exec_hi, |src_scc| +// GFX12: encoding: [0x05,0x04,0x67,0xd6,0x7c,0xfe,0xf4,0x43] + +v_dot2_bf16_bf16 v5, -1, -|m0|, -|exec_hi| op_sel:[0,0,0,0] +// GFX12: encoding: [0x05,0x06,0x67,0xd6,0xc1,0xfa,0xfc,0xc1] + +v_dot2_bf16_bf16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] +// GFX12: encoding: [0x05,0x22,0x67,0xd6,0xfd,0xd4,0x04,0x23] + +v_dot2_bf16_bf16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] +// GFX12: encoding: [0xff,0x43,0x67,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] + +v_dot2_f16_f16 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x66,0xd6,0x01,0x05,0x0e,0x00] + +v_dot2_f16_f16 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x66,0xd6,0xff,0x05,0xa4,0x01] + +v_dot2_f16_f16 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x66,0xd6,0x01,0xfe,0xff,0x01] + +v_dot2_f16_f16 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x66,0xd6,0x69,0xd2,0xf8,0x01] + +v_dot2_f16_f16 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x66,0xd6,0x6a,0xf6,0x0c,0x04] + +v_dot2_f16_f16 v5, vcc_hi, 0xfe0b, v255 +// GFX12: encoding: [0x05,0x00,0x66,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +v_dot2_f16_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX12: encoding: [0x05,0x07,0x66,0xd6,0x7b,0xfa,0xed,0xe1] + +v_dot2_f16_f16 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x66,0xd6,0x7d,0xe0,0xf5,0x01] + +v_dot2_f16_f16 v5, |exec_lo|, -1, vcc_hi +// GFX12: encoding: [0x05,0x01,0x66,0xd6,0x7e,0x82,0xad,0x01] + +v_dot2_f16_f16 v5, -|exec_hi|, null, -|vcc_lo| +// GFX12: encoding: [0x05,0x05,0x66,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_dot2_f16_f16 v5, null, exec_lo, -|0xfe0b| +// GFX12: encoding: [0x05,0x04,0x66,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] + +v_dot2_f16_f16 v5, -1, -|exec_hi|, -|src_scc| +// GFX12: encoding: [0x05,0x06,0x66,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_dot2_f16_f16 v5, 0.5, -m0, 0.5 op_sel:[0,0,0,0] +// GFX12: encoding: [0x05,0x00,0x66,0xd6,0xf0,0xfa,0xc0,0x43] + +v_dot2_f16_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] +// GFX12: encoding: [0x05,0x22,0x66,0xd6,0xfd,0xd4,0x04,0x23] + +v_dot2_f16_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] +// GFX12: encoding: [0xff,0x43,0x66,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] + +v_fma_dx9_zero_f32 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x09,0xd6,0x01,0x05,0x0e,0x00] + +v_fma_dx9_zero_f32 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x09,0xd6,0xff,0x05,0xa4,0x01] + +v_fma_dx9_zero_f32 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x09,0xd6,0x01,0xfe,0xff,0x01] + +v_fma_dx9_zero_f32 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x09,0xd6,0x69,0xd2,0xf8,0x01] + +v_fma_dx9_zero_f32 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x09,0xd6,0x6a,0xf6,0x0c,0x04] + +v_fma_dx9_zero_f32 v5, vcc_hi, 0xaf123456, v255 +// GFX12: encoding: [0x05,0x00,0x09,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_fma_dx9_zero_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX12: encoding: [0x05,0x07,0x09,0xd6,0x7b,0xfa,0xed,0xe1] + +v_fma_dx9_zero_f32 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x09,0xd6,0x7d,0xe0,0xf5,0x01] + +v_fma_dx9_zero_f32 v5, |exec_lo|, -1, vcc_hi +// GFX12: encoding: [0x05,0x01,0x09,0xd6,0x7e,0x82,0xad,0x01] + +v_fma_dx9_zero_f32 v5, -|exec_hi|, null, -|vcc_lo| +// GFX12: encoding: [0x05,0x05,0x09,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_fma_dx9_zero_f32 v5, null, exec_lo, -|0xaf123456| +// GFX12: encoding: [0x05,0x04,0x09,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] + +v_fma_dx9_zero_f32 v5, -1, -|exec_hi|, -|src_scc| +// GFX12: encoding: [0x05,0x06,0x09,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_fma_dx9_zero_f32 v5, 0.5, -m0, 0.5 mul:2 +// GFX12: encoding: [0x05,0x00,0x09,0xd6,0xf0,0xfa,0xc0,0x4b] + +v_fma_dx9_zero_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 +// GFX12: encoding: [0x05,0x02,0x09,0xd6,0xfd,0xd4,0x04,0x33] + +v_fma_dx9_zero_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 +// GFX12: encoding: [0xff,0x83,0x09,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] + +v_fma_f16 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x48,0xd6,0x01,0x05,0x0e,0x00] + +v_fma_f16 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x48,0xd6,0xff,0x05,0xa4,0x01] + +v_fma_f16 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x48,0xd6,0x01,0xfe,0xff,0x01] + +v_fma_f16 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x48,0xd6,0x69,0xd2,0xf8,0x01] + +v_fma_f16 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x48,0xd6,0x6a,0xf6,0x0c,0x04] + +v_fma_f16 v5, vcc_hi, 0xfe0b, v255 +// GFX12: encoding: [0x05,0x00,0x48,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +v_fma_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX12: encoding: [0x05,0x07,0x48,0xd6,0x7b,0xfa,0xed,0xe1] + +v_fma_f16 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x48,0xd6,0x7d,0xe0,0xf5,0x01] + +v_fma_f16 v5, |exec_lo|, -1, vcc_hi +// GFX12: encoding: [0x05,0x01,0x48,0xd6,0x7e,0x82,0xad,0x01] + +v_fma_f16 v5, -|exec_hi|, null, -|vcc_lo| op_sel:[1,1,1,1] +// GFX12: encoding: [0x05,0x7d,0x48,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_fma_f16 v5, null, exec_lo, -|0xfe0b| op_sel:[0,0,0,0] +// GFX12: encoding: [0x05,0x04,0x48,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] + +v_fma_f16 v5, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] +// GFX12: encoding: [0x05,0x0e,0x48,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_fma_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] +// GFX12: encoding: [0x05,0x10,0x48,0xd6,0xf0,0xfa,0xc0,0x43] + +v_fma_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] +// GFX12: encoding: [0x05,0x22,0x48,0xd6,0xfd,0xd4,0x04,0x23] + +v_fma_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp +// GFX12: encoding: [0xff,0xc3,0x48,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] + +v_fma_f32 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x13,0xd6,0x01,0x05,0x0e,0x00] + +v_fma_f32 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x13,0xd6,0xff,0x05,0xa4,0x01] + +v_fma_f32 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x13,0xd6,0x01,0xfe,0xff,0x01] + +v_fma_f32 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x13,0xd6,0x69,0xd2,0xf8,0x01] + +v_fma_f32 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x13,0xd6,0x6a,0xf6,0x0c,0x04] + +v_fma_f32 v5, vcc_hi, 0xaf123456, v255 +// GFX12: encoding: [0x05,0x00,0x13,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_fma_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX12: encoding: [0x05,0x07,0x13,0xd6,0x7b,0xfa,0xed,0xe1] + +v_fma_f32 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x13,0xd6,0x7d,0xe0,0xf5,0x01] + +v_fma_f32 v5, |exec_lo|, -1, vcc_hi +// GFX12: encoding: [0x05,0x01,0x13,0xd6,0x7e,0x82,0xad,0x01] + +v_fma_f32 v5, -|exec_hi|, null, -|vcc_lo| +// GFX12: encoding: [0x05,0x05,0x13,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_fma_f32 v5, null, exec_lo, -|0xaf123456| +// GFX12: encoding: [0x05,0x04,0x13,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] + +v_fma_f32 v5, -1, -|exec_hi|, -|src_scc| +// GFX12: encoding: [0x05,0x06,0x13,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_fma_f32 v5, 0.5, -m0, 0.5 mul:2 +// GFX12: encoding: [0x05,0x00,0x13,0xd6,0xf0,0xfa,0xc0,0x4b] + +v_fma_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 +// GFX12: encoding: [0x05,0x02,0x13,0xd6,0xfd,0xd4,0x04,0x33] + +v_fma_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 +// GFX12: encoding: [0xff,0x83,0x13,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] + +v_fma_f64 v[5:6], v[1:2], v[2:3], v[3:4] +// GFX12: encoding: [0x05,0x00,0x14,0xd6,0x01,0x05,0x0e,0x04] + +v_fma_f64 v[5:6], v[254:255], v[254:255], s[6:7] +// GFX12: encoding: [0x05,0x00,0x14,0xd6,0xfe,0xfd,0x1b,0x00] + +v_fma_f64 v[5:6], s[2:3], s[4:5], v[254:255] +// GFX12: encoding: [0x05,0x00,0x14,0xd6,0x02,0x08,0xf8,0x07] + +v_fma_f64 v[5:6], -|s[104:105]|, s[104:105], -|s[104:105]| +// GFX12: encoding: [0x05,0x05,0x14,0xd6,0x68,0xd0,0xa0,0xa1] + +v_fma_f64 v[5:6], vcc, -|ttmp[14:15]|, -|ttmp[14:15]| +// GFX12: encoding: [0x05,0x06,0x14,0xd6,0x6a,0xf4,0xe8,0xc1] + +v_fma_f64 v[5:6], -|ttmp[14:15]|, 0xaf123456, null +// GFX12: encoding: [0x05,0x01,0x14,0xd6,0x7a,0xfe,0xf1,0x21,0x56,0x34,0x12,0xaf] + +v_fma_f64 v[5:6], -|exec|, -|src_scc|, -|exec| +// GFX12: encoding: [0x05,0x07,0x14,0xd6,0x7e,0xfa,0xf9,0xe1] + +v_fma_f64 v[5:6], null, 0.5, vcc +// GFX12: encoding: [0x05,0x00,0x14,0xd6,0x7c,0xe0,0xa9,0x01] + +v_fma_f64 v[5:6], -1, -1, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x14,0xd6,0xc1,0x82,0xfd,0x03,0x56,0x34,0x12,0xaf] + +v_fma_f64 v[5:6], 0.5, null, -|src_scc| mul:2 +// GFX12: encoding: [0x05,0x04,0x14,0xd6,0xf0,0xf8,0xf4,0x8b] + +v_fma_f64 v[5:6], -|src_scc|, -|exec|, 0.5 mul:4 +// GFX12: encoding: [0x05,0x03,0x14,0xd6,0xfd,0xfc,0xc0,0x73] + +v_fma_f64 v[254:255], 0xaf123456, -|vcc|, -1 clamp div:2 +// GFX12: encoding: [0xfe,0x82,0x14,0xd6,0xff,0xd4,0x04,0x5b,0x56,0x34,0x12,0xaf] + +v_fma_dx9_zero_f32 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x09,0xd6,0x01,0x05,0x0e,0x00] + +v_fma_dx9_zero_f32 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x09,0xd6,0xff,0x05,0xa4,0x01] + +v_fma_dx9_zero_f32 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x09,0xd6,0x01,0xfe,0xff,0x01] + +v_fma_dx9_zero_f32 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x09,0xd6,0x69,0xd2,0xf8,0x01] + +v_fma_dx9_zero_f32 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x09,0xd6,0x6a,0xf6,0x0c,0x04] + +v_fma_dx9_zero_f32 v5, vcc_hi, 0xaf123456, v255 +// GFX12: encoding: [0x05,0x00,0x09,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_fma_dx9_zero_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX12: encoding: [0x05,0x07,0x09,0xd6,0x7b,0xfa,0xed,0xe1] + +v_fma_dx9_zero_f32 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x09,0xd6,0x7d,0xe0,0xf5,0x01] + +v_fma_dx9_zero_f32 v5, |exec_lo|, -1, vcc_hi +// GFX12: encoding: [0x05,0x01,0x09,0xd6,0x7e,0x82,0xad,0x01] + +v_fma_dx9_zero_f32 v5, -|exec_hi|, null, -|vcc_lo| +// GFX12: encoding: [0x05,0x05,0x09,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_fma_dx9_zero_f32 v5, null, exec_lo, -|0xaf123456| +// GFX12: encoding: [0x05,0x04,0x09,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] + +v_fma_dx9_zero_f32 v5, -1, -|exec_hi|, -|src_scc| +// GFX12: encoding: [0x05,0x06,0x09,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_fma_dx9_zero_f32 v5, 0.5, -m0, 0.5 mul:2 +// GFX12: encoding: [0x05,0x00,0x09,0xd6,0xf0,0xfa,0xc0,0x4b] + +v_fma_dx9_zero_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 +// GFX12: encoding: [0x05,0x02,0x09,0xd6,0xfd,0xd4,0x04,0x33] + +v_fma_dx9_zero_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 +// GFX12: encoding: [0xff,0x83,0x09,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] + +v_ldexp_f32 v5, v1, v2 +// GFX12: encoding: [0x05,0x00,0x1c,0xd7,0x01,0x05,0x02,0x00] + +v_ldexp_f32 v5, v255, v255 +// GFX12: encoding: [0x05,0x00,0x1c,0xd7,0xff,0xff,0x03,0x00] + +v_ldexp_f32 v5, s1, s2 +// GFX12: encoding: [0x05,0x00,0x1c,0xd7,0x01,0x04,0x00,0x00] + +v_ldexp_f32 v5, s105, s105 +// GFX12: encoding: [0x05,0x00,0x1c,0xd7,0x69,0xd2,0x00,0x00] + +v_ldexp_f32 v5, vcc_lo, ttmp15 +// GFX12: encoding: [0x05,0x00,0x1c,0xd7,0x6a,0xf6,0x00,0x00] + +v_ldexp_f32 v5, vcc_hi, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x1c,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_ldexp_f32 v5, ttmp15, src_scc +// GFX12: encoding: [0x05,0x00,0x1c,0xd7,0x7b,0xfa,0x01,0x00] + +v_ldexp_f32 v5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x1c,0xd7,0x7d,0xe0,0x01,0x00] + +v_ldexp_f32 v5, exec_lo, -1 +// GFX12: encoding: [0x05,0x00,0x1c,0xd7,0x7e,0x82,0x01,0x00] + +v_ldexp_f32 v5, exec_hi, null +// GFX12: encoding: [0x05,0x00,0x1c,0xd7,0x7f,0xf8,0x00,0x00] + +v_ldexp_f32 v5, null, exec_lo +// GFX12: encoding: [0x05,0x00,0x1c,0xd7,0x7c,0xfc,0x00,0x00] + +v_ldexp_f32 v5, -1, exec_hi +// GFX12: encoding: [0x05,0x00,0x1c,0xd7,0xc1,0xfe,0x00,0x00] + +v_ldexp_f32 v5, 0.5, m0 mul:2 +// GFX12: encoding: [0x05,0x00,0x1c,0xd7,0xf0,0xfa,0x00,0x08] + +v_ldexp_f32 v5, src_scc, vcc_lo mul:4 +// GFX12: encoding: [0x05,0x00,0x1c,0xd7,0xfd,0xd4,0x00,0x10] + +v_ldexp_f32 v255, -|0xaf123456|, vcc_hi clamp div:2 +// GFX12: encoding: [0xff,0x81,0x1c,0xd7,0xff,0xd6,0x00,0x38,0x56,0x34,0x12,0xaf] + +v_ldexp_f64 v[5:6], v[1:2], v2 +// GFX12: encoding: [0x05,0x00,0x2b,0xd7,0x01,0x05,0x02,0x00] + +v_ldexp_f64 v[5:6], v[1:2], v255 +// GFX12: encoding: [0x05,0x00,0x2b,0xd7,0x01,0xff,0x03,0x00] + +v_ldexp_f64 v[5:6], v[1:2], s2 +// GFX12: encoding: [0x05,0x00,0x2b,0xd7,0x01,0x05,0x00,0x00] + +v_ldexp_f64 v[5:6], v[1:2], s105 +// GFX12: encoding: [0x05,0x00,0x2b,0xd7,0x01,0xd3,0x00,0x00] + +v_ldexp_f64 v[5:6], v[254:255], ttmp15 +// GFX12: encoding: [0x05,0x00,0x2b,0xd7,0xfe,0xf7,0x00,0x00] + +v_ldexp_f64 v[5:6], s[2:3], vcc_hi +// GFX12: encoding: [0x05,0x00,0x2b,0xd7,0x02,0xd6,0x00,0x00] + +v_ldexp_f64 v[5:6], s[104:105], vcc_lo +// GFX12: encoding: [0x05,0x00,0x2b,0xd7,0x68,0xd4,0x00,0x00] + +v_ldexp_f64 v[5:6], vcc, m0 +// GFX12: encoding: [0x05,0x00,0x2b,0xd7,0x6a,0xfa,0x00,0x00] + +v_ldexp_f64 v[5:6], ttmp[14:15], exec_hi +// GFX12: encoding: [0x05,0x00,0x2b,0xd7,0x7a,0xfe,0x00,0x00] + +v_ldexp_f64 v[5:6], exec, exec_lo +// GFX12: encoding: [0x05,0x00,0x2b,0xd7,0x7e,0xfc,0x00,0x00] + +v_ldexp_f64 v[5:6], null, null +// GFX12: encoding: [0x05,0x00,0x2b,0xd7,0x7c,0xf8,0x00,0x00] + +v_ldexp_f64 v[5:6], -1, -1 +// GFX12: encoding: [0x05,0x00,0x2b,0xd7,0xc1,0x82,0x01,0x00] + +v_ldexp_f64 v[5:6], 0.5, 0.5 mul:2 +// GFX12: encoding: [0x05,0x00,0x2b,0xd7,0xf0,0xe0,0x01,0x08] + +v_ldexp_f64 v[5:6], -|src_scc|, src_scc mul:4 +// GFX12: encoding: [0x05,0x01,0x2b,0xd7,0xfd,0xfa,0x01,0x30] + +v_ldexp_f64 v[254:255], 0xaf123456, 0xaf123456 clamp div:2 +// GFX12: encoding: [0xfe,0x80,0x2b,0xd7,0xff,0xfe,0x01,0x18,0x56,0x34,0x12,0xaf] + +v_lerp_u8 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x15,0xd6,0x01,0x05,0x0e,0x00] + +v_lerp_u8 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x15,0xd6,0xff,0x05,0xa4,0x01] + +v_lerp_u8 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x15,0xd6,0x01,0xfe,0xff,0x01] + +v_lerp_u8 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x15,0xd6,0x69,0xd2,0xf8,0x01] + +v_lerp_u8 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x15,0xd6,0x6a,0xf6,0x0c,0x04] + +v_lerp_u8 v5, vcc_hi, 0xaf123456, v255 +// GFX12: encoding: [0x05,0x00,0x15,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_lerp_u8 v5, ttmp15, src_scc, ttmp15 +// GFX12: encoding: [0x05,0x00,0x15,0xd6,0x7b,0xfa,0xed,0x01] + +v_lerp_u8 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x15,0xd6,0x7d,0xe0,0xf5,0x01] + +v_lerp_u8 v5, exec_lo, -1, vcc_hi +// GFX12: encoding: [0x05,0x00,0x15,0xd6,0x7e,0x82,0xad,0x01] + +v_lerp_u8 v5, exec_hi, null, vcc_lo +// GFX12: encoding: [0x05,0x00,0x15,0xd6,0x7f,0xf8,0xa8,0x01] + +v_lerp_u8 v5, null, exec_lo, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x15,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_lerp_u8 v5, -1, exec_hi, src_scc +// GFX12: encoding: [0x05,0x00,0x15,0xd6,0xc1,0xfe,0xf4,0x03] + +v_lerp_u8 v5, 0.5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x15,0xd6,0xf0,0xfa,0xc0,0x03] + +v_lerp_u8 v5, src_scc, vcc_lo, -1 +// GFX12: encoding: [0x05,0x00,0x15,0xd6,0xfd,0xd4,0x04,0x03] + +v_lerp_u8 v255, 0xaf123456, vcc_hi, null +// GFX12: encoding: [0xff,0x00,0x15,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_lshl_add_u32 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x46,0xd6,0x01,0x05,0x0e,0x00] + +v_lshl_add_u32 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x46,0xd6,0xff,0x05,0xa4,0x01] + +v_lshl_add_u32 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x46,0xd6,0x01,0xfe,0xff,0x01] + +v_lshl_add_u32 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x46,0xd6,0x69,0xd2,0xf8,0x01] + +v_lshl_add_u32 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x46,0xd6,0x6a,0xf6,0x0c,0x04] + +v_lshl_add_u32 v5, vcc_hi, 0xaf123456, v255 +// GFX12: encoding: [0x05,0x00,0x46,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_lshl_add_u32 v5, ttmp15, src_scc, ttmp15 +// GFX12: encoding: [0x05,0x00,0x46,0xd6,0x7b,0xfa,0xed,0x01] + +v_lshl_add_u32 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x46,0xd6,0x7d,0xe0,0xf5,0x01] + +v_lshl_add_u32 v5, exec_lo, -1, vcc_hi +// GFX12: encoding: [0x05,0x00,0x46,0xd6,0x7e,0x82,0xad,0x01] + +v_lshl_add_u32 v5, exec_hi, null, vcc_lo +// GFX12: encoding: [0x05,0x00,0x46,0xd6,0x7f,0xf8,0xa8,0x01] + +v_lshl_add_u32 v5, null, exec_lo, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x46,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_lshl_add_u32 v5, -1, exec_hi, src_scc +// GFX12: encoding: [0x05,0x00,0x46,0xd6,0xc1,0xfe,0xf4,0x03] + +v_lshl_add_u32 v5, 0.5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x46,0xd6,0xf0,0xfa,0xc0,0x03] + +v_lshl_add_u32 v5, src_scc, vcc_lo, -1 +// GFX12: encoding: [0x05,0x00,0x46,0xd6,0xfd,0xd4,0x04,0x03] + +v_lshl_add_u32 v255, 0xaf123456, vcc_hi, null +// GFX12: encoding: [0xff,0x00,0x46,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_lshl_or_b32 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x56,0xd6,0x01,0x05,0x0e,0x00] + +v_lshl_or_b32 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x56,0xd6,0xff,0x05,0xa4,0x01] + +v_lshl_or_b32 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x56,0xd6,0x01,0xfe,0xff,0x01] + +v_lshl_or_b32 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x56,0xd6,0x69,0xd2,0xf8,0x01] + +v_lshl_or_b32 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x56,0xd6,0x6a,0xf6,0x0c,0x04] + +v_lshl_or_b32 v5, vcc_hi, 0xaf123456, v255 +// GFX12: encoding: [0x05,0x00,0x56,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_lshl_or_b32 v5, ttmp15, src_scc, ttmp15 +// GFX12: encoding: [0x05,0x00,0x56,0xd6,0x7b,0xfa,0xed,0x01] + +v_lshl_or_b32 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x56,0xd6,0x7d,0xe0,0xf5,0x01] + +v_lshl_or_b32 v5, exec_lo, -1, vcc_hi +// GFX12: encoding: [0x05,0x00,0x56,0xd6,0x7e,0x82,0xad,0x01] + +v_lshl_or_b32 v5, exec_hi, null, vcc_lo +// GFX12: encoding: [0x05,0x00,0x56,0xd6,0x7f,0xf8,0xa8,0x01] + +v_lshl_or_b32 v5, null, exec_lo, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x56,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_lshl_or_b32 v5, -1, exec_hi, src_scc +// GFX12: encoding: [0x05,0x00,0x56,0xd6,0xc1,0xfe,0xf4,0x03] + +v_lshl_or_b32 v5, 0.5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x56,0xd6,0xf0,0xfa,0xc0,0x03] + +v_lshl_or_b32 v5, src_scc, vcc_lo, -1 +// GFX12: encoding: [0x05,0x00,0x56,0xd6,0xfd,0xd4,0x04,0x03] + +v_lshl_or_b32 v255, 0xaf123456, vcc_hi, null +// GFX12: encoding: [0xff,0x00,0x56,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_lshlrev_b16 v5, v1, v2 +// GFX12: encoding: [0x05,0x00,0x38,0xd7,0x01,0x05,0x02,0x00] + +v_lshlrev_b16 v5, v255, v255 +// GFX12: encoding: [0x05,0x00,0x38,0xd7,0xff,0xff,0x03,0x00] + +v_lshlrev_b16 v5, s1, s2 +// GFX12: encoding: [0x05,0x00,0x38,0xd7,0x01,0x04,0x00,0x00] + +v_lshlrev_b16 v5, s105, s105 +// GFX12: encoding: [0x05,0x00,0x38,0xd7,0x69,0xd2,0x00,0x00] + +v_lshlrev_b16 v5, vcc_lo, ttmp15 +// GFX12: encoding: [0x05,0x00,0x38,0xd7,0x6a,0xf6,0x00,0x00] + +v_lshlrev_b16 v5, vcc_hi, 0xfe0b +// GFX12: encoding: [0x05,0x00,0x38,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_lshlrev_b16 v5, ttmp15, src_scc +// GFX12: encoding: [0x05,0x00,0x38,0xd7,0x7b,0xfa,0x01,0x00] + +v_lshlrev_b16 v5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x38,0xd7,0x7d,0xe0,0x01,0x00] + +v_lshlrev_b16 v5, exec_lo, -1 +// GFX12: encoding: [0x05,0x00,0x38,0xd7,0x7e,0x82,0x01,0x00] + +v_lshlrev_b16 v5, exec_hi, null +// GFX12: encoding: [0x05,0x00,0x38,0xd7,0x7f,0xf8,0x00,0x00] + +v_lshlrev_b16 v5, null, exec_lo +// GFX12: encoding: [0x05,0x00,0x38,0xd7,0x7c,0xfc,0x00,0x00] + +v_lshlrev_b16 v5, -1, exec_hi +// GFX12: encoding: [0x05,0x00,0x38,0xd7,0xc1,0xfe,0x00,0x00] + +v_lshlrev_b16 v5, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x38,0xd7,0xf0,0xfa,0x00,0x00] + +v_lshlrev_b16 v5, src_scc, vcc_lo +// GFX12: encoding: [0x05,0x00,0x38,0xd7,0xfd,0xd4,0x00,0x00] + +v_lshlrev_b16 v255, 0xfe0b, vcc_hi +// GFX12: encoding: [0xff,0x00,0x38,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_lshrrev_b16 v5, v1, v2 +// GFX12: encoding: [0x05,0x00,0x39,0xd7,0x01,0x05,0x02,0x00] + +v_lshrrev_b16 v5, v255, v255 +// GFX12: encoding: [0x05,0x00,0x39,0xd7,0xff,0xff,0x03,0x00] + +v_lshrrev_b16 v5, s1, s2 +// GFX12: encoding: [0x05,0x00,0x39,0xd7,0x01,0x04,0x00,0x00] + +v_lshrrev_b16 v5, s105, s105 +// GFX12: encoding: [0x05,0x00,0x39,0xd7,0x69,0xd2,0x00,0x00] + +v_lshrrev_b16 v5, vcc_lo, ttmp15 +// GFX12: encoding: [0x05,0x00,0x39,0xd7,0x6a,0xf6,0x00,0x00] + +v_lshrrev_b16 v5, vcc_hi, 0xfe0b +// GFX12: encoding: [0x05,0x00,0x39,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_lshrrev_b16 v5, ttmp15, src_scc +// GFX12: encoding: [0x05,0x00,0x39,0xd7,0x7b,0xfa,0x01,0x00] + +v_lshrrev_b16 v5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x39,0xd7,0x7d,0xe0,0x01,0x00] + +v_lshrrev_b16 v5, exec_lo, -1 +// GFX12: encoding: [0x05,0x00,0x39,0xd7,0x7e,0x82,0x01,0x00] + +v_lshrrev_b16 v5, exec_hi, null +// GFX12: encoding: [0x05,0x00,0x39,0xd7,0x7f,0xf8,0x00,0x00] + +v_lshrrev_b16 v5, null, exec_lo +// GFX12: encoding: [0x05,0x00,0x39,0xd7,0x7c,0xfc,0x00,0x00] + +v_lshrrev_b16 v5, -1, exec_hi +// GFX12: encoding: [0x05,0x00,0x39,0xd7,0xc1,0xfe,0x00,0x00] + +v_lshrrev_b16 v5, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x39,0xd7,0xf0,0xfa,0x00,0x00] + +v_lshrrev_b16 v5, src_scc, vcc_lo +// GFX12: encoding: [0x05,0x00,0x39,0xd7,0xfd,0xd4,0x00,0x00] + +v_lshrrev_b16 v255, 0xfe0b, vcc_hi +// GFX12: encoding: [0xff,0x00,0x39,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_lshrrev_b64 v[5:6], v1, vcc +// GFX12: encoding: [0x05,0x00,0x3d,0xd7,0x01,0xd5,0x00,0x00] + +v_lshrrev_b64 v[5:6], v255, exec +// GFX12: encoding: [0x05,0x00,0x3d,0xd7,0xff,0xfd,0x00,0x00] + +v_lshrrev_b64 v[5:6], exec_lo, v[2:3] +// GFX12: encoding: [0x05,0x00,0x3d,0xd7,0x7e,0x04,0x02,0x00] + +v_lshrrev_b64 v[5:6], exec_hi, v[254:255] +// GFX12: encoding: [0x05,0x00,0x3d,0xd7,0x7f,0xfc,0x03,0x00] + +v_lshrrev_b64 v[5:6], null, null +// GFX12: encoding: [0x05,0x00,0x3d,0xd7,0x7c,0xf8,0x00,0x00] + +v_lshrrev_b64 v[5:6], -1, -1 +// GFX12: encoding: [0x05,0x00,0x3d,0xd7,0xc1,0x82,0x01,0x00] + +v_lshrrev_b64 v[5:6], 0.5, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x3d,0xd7,0xf0,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_lshrrev_b64 v[5:6], src_scc, src_scc +// GFX12: encoding: [0x05,0x00,0x3d,0xd7,0xfd,0xfa,0x01,0x00] + +v_lshrrev_b64 v[254:255], 0xaf123456, 0.5 +// GFX12: encoding: [0xfe,0x00,0x3d,0xd7,0xff,0xe0,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_mad_i16 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x53,0xd6,0x01,0x05,0x0e,0x00] + +v_mad_i16 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x53,0xd6,0xff,0x05,0xa4,0x01] + +v_mad_i16 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x53,0xd6,0x01,0xfe,0xff,0x01] + +v_mad_i16 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x53,0xd6,0x69,0xd2,0xf8,0x01] + +v_mad_i16 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x53,0xd6,0x6a,0xf6,0x0c,0x04] + +v_mad_i16 v5, vcc_hi, 0xfe0b, v255 +// GFX12: encoding: [0x05,0x00,0x53,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +v_mad_i16 v5, ttmp15, src_scc, ttmp15 +// GFX12: encoding: [0x05,0x00,0x53,0xd6,0x7b,0xfa,0xed,0x01] + +v_mad_i16 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x53,0xd6,0x7d,0xe0,0xf5,0x01] + +v_mad_i16 v5, exec_lo, -1, vcc_hi +// GFX12: encoding: [0x05,0x00,0x53,0xd6,0x7e,0x82,0xad,0x01] + +v_mad_i16 v5, exec_hi, null, vcc_lo op_sel:[1,1,1,1] +// GFX12: encoding: [0x05,0x78,0x53,0xd6,0x7f,0xf8,0xa8,0x01] + +v_mad_i16 v5, null, exec_lo, 0xfe0b op_sel:[0,0,0,0] +// GFX12: encoding: [0x05,0x00,0x53,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] + +v_mad_i16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] +// GFX12: encoding: [0x05,0x08,0x53,0xd6,0xc1,0xfe,0xf4,0x03] + +v_mad_i16 v5, 0.5, m0, 0.5 op_sel:[0,1,0,0] +// GFX12: encoding: [0x05,0x10,0x53,0xd6,0xf0,0xfa,0xc0,0x03] + +v_mad_i16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] +// GFX12: encoding: [0x05,0x20,0x53,0xd6,0xfd,0xd4,0x04,0x03] + +v_mad_i16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] clamp +// GFX12: encoding: [0xff,0xc0,0x53,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] + +v_mad_i32_i16 v5, v1, v2, v3 +// GFX12: encoding: [0x05,0x00,0x5a,0xd6,0x01,0x05,0x0e,0x04] + +v_mad_i32_i16 v5, v255, v255, s3 +// GFX12: encoding: [0x05,0x00,0x5a,0xd6,0xff,0xff,0x0f,0x00] + +v_mad_i32_i16 v5, s1, s2, v255 +// GFX12: encoding: [0x05,0x00,0x5a,0xd6,0x01,0x04,0xfc,0x07] + +v_mad_i32_i16 v5, s105, s105, s105 +// GFX12: encoding: [0x05,0x00,0x5a,0xd6,0x69,0xd2,0xa4,0x01] + +v_mad_i32_i16 v5, vcc_lo, ttmp15, vcc_lo +// GFX12: encoding: [0x05,0x00,0x5a,0xd6,0x6a,0xf6,0xa8,0x01] + +v_mad_i32_i16 v5, vcc_hi, 0xfe0b, vcc_hi +// GFX12: encoding: [0x05,0x00,0x5a,0xd6,0x6b,0xfe,0xad,0x01,0x0b,0xfe,0x00,0x00] + +v_mad_i32_i16 v5, ttmp15, src_scc, ttmp15 +// GFX12: encoding: [0x05,0x00,0x5a,0xd6,0x7b,0xfa,0xed,0x01] + +v_mad_i32_i16 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x5a,0xd6,0x7d,0xe0,0xf5,0x01] + +v_mad_i32_i16 v5, exec_lo, -1, exec_hi +// GFX12: encoding: [0x05,0x00,0x5a,0xd6,0x7e,0x82,0xfd,0x01] + +v_mad_i32_i16 v5, exec_hi, null, exec_lo +// GFX12: encoding: [0x05,0x00,0x5a,0xd6,0x7f,0xf8,0xf8,0x01] + +v_mad_i32_i16 v5, null, exec_lo, null +// GFX12: encoding: [0x05,0x00,0x5a,0xd6,0x7c,0xfc,0xf0,0x01] + +v_mad_i32_i16 v5, -1, exec_hi, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x5a,0xd6,0xc1,0xfe,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_mad_i32_i16 v5, 0.5, m0, -1 op_sel:[0,0,0,0] +// GFX12: encoding: [0x05,0x00,0x5a,0xd6,0xf0,0xfa,0x04,0x03] + +v_mad_i32_i16 v5, src_scc, vcc_lo, src_scc op_sel:[1,0,0,0] +// GFX12: encoding: [0x05,0x08,0x5a,0xd6,0xfd,0xd4,0xf4,0x03] + +v_mad_i32_i16 v255, 0xfe0b, vcc_hi, 0.5 op_sel:[0,1,0,0] clamp +// GFX12: encoding: [0xff,0x90,0x5a,0xd6,0xff,0xd6,0xc0,0x03,0x0b,0xfe,0x00,0x00] + +v_mad_i32_i24 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x0a,0xd6,0x01,0x05,0x0e,0x00] + +v_mad_i32_i24 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x0a,0xd6,0xff,0x05,0xa4,0x01] + +v_mad_i32_i24 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x0a,0xd6,0x01,0xfe,0xff,0x01] + +v_mad_i32_i24 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x0a,0xd6,0x69,0xd2,0xf8,0x01] + +v_mad_i32_i24 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x0a,0xd6,0x6a,0xf6,0x0c,0x04] + +v_mad_i32_i24 v5, vcc_hi, 0xaf123456, v255 +// GFX12: encoding: [0x05,0x00,0x0a,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_mad_i32_i24 v5, ttmp15, src_scc, ttmp15 +// GFX12: encoding: [0x05,0x00,0x0a,0xd6,0x7b,0xfa,0xed,0x01] + +v_mad_i32_i24 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x0a,0xd6,0x7d,0xe0,0xf5,0x01] + +v_mad_i32_i24 v5, exec_lo, -1, vcc_hi +// GFX12: encoding: [0x05,0x00,0x0a,0xd6,0x7e,0x82,0xad,0x01] + +v_mad_i32_i24 v5, exec_hi, null, vcc_lo +// GFX12: encoding: [0x05,0x00,0x0a,0xd6,0x7f,0xf8,0xa8,0x01] + +v_mad_i32_i24 v5, null, exec_lo, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x0a,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_mad_i32_i24 v5, -1, exec_hi, src_scc +// GFX12: encoding: [0x05,0x00,0x0a,0xd6,0xc1,0xfe,0xf4,0x03] + +v_mad_i32_i24 v5, 0.5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x0a,0xd6,0xf0,0xfa,0xc0,0x03] + +v_mad_i32_i24 v5, src_scc, vcc_lo, -1 +// GFX12: encoding: [0x05,0x00,0x0a,0xd6,0xfd,0xd4,0x04,0x03] + +v_mad_i32_i24 v255, 0xaf123456, vcc_hi, null clamp +// GFX12: encoding: [0xff,0x80,0x0a,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_mad_co_i64_i32 v[5:6], s6, s105, s105, s[6:7] +// W32: encoding: [0x05,0x06,0xff,0xd6,0x69,0xd2,0x18,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_co_i64_i32 v[5:6], s6, ttmp15, ttmp15, s[104:105] +// W32: encoding: [0x05,0x06,0xff,0xd6,0x7b,0xf6,0xa0,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_co_i64_i32 v[5:6], s6, m0, 0.5, ttmp[14:15] +// W32: encoding: [0x05,0x06,0xff,0xd6,0x7d,0xe0,0xe9,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_co_i64_i32 v[5:6], s6, exec_lo, -1, exec +// W32: encoding: [0x05,0x06,0xff,0xd6,0x7e,0x82,0xf9,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_co_i64_i32 v[5:6], s6, exec_hi, null, vcc +// W32: encoding: [0x05,0x06,0xff,0xd6,0x7f,0xf8,0xa8,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_co_i64_i32 v[5:6], s105, null, exec_lo, null +// W32: encoding: [0x05,0x69,0xff,0xd6,0x7c,0xfc,0xf0,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_co_i64_i32 v[5:6], vcc_lo, -1, exec_hi, -1 +// W32: encoding: [0x05,0x6a,0xff,0xd6,0xc1,0xfe,0x04,0x03] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_co_i64_i32 v[5:6], vcc_hi, 0.5, m0, 0xaf123456 +// W32: encoding: [0x05,0x6b,0xff,0xd6,0xf0,0xfa,0xfc,0x03,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_co_i64_i32 v[5:6], ttmp15, src_scc, vcc_lo, src_scc +// W32: encoding: [0x05,0x7b,0xff,0xd6,0xfd,0xd4,0xf4,0x03] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_co_i64_i32 v[5:6], s[12:13], s105, s105, s[6:7] +// W64: encoding: [0x05,0x0c,0xff,0xd6,0x69,0xd2,0x18,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_co_i64_i32 v[5:6], s[12:13], ttmp15, ttmp15, s[104:105] +// W64: encoding: [0x05,0x0c,0xff,0xd6,0x7b,0xf6,0xa0,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_co_i64_i32 v[5:6], s[12:13], m0, 0.5, ttmp[14:15] +// W64: encoding: [0x05,0x0c,0xff,0xd6,0x7d,0xe0,0xe9,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_co_i64_i32 v[5:6], s[12:13], exec_lo, -1, exec +// W64: encoding: [0x05,0x0c,0xff,0xd6,0x7e,0x82,0xf9,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_co_i64_i32 v[5:6], s[12:13], exec_hi, null, vcc +// W64: encoding: [0x05,0x0c,0xff,0xd6,0x7f,0xf8,0xa8,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_co_i64_i32 v[5:6], s[12:13], null, exec_lo, null +// W64: encoding: [0x05,0x0c,0xff,0xd6,0x7c,0xfc,0xf0,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_co_i64_i32 v[5:6], s[104:105], -1, exec_hi, -1 +// W64: encoding: [0x05,0x68,0xff,0xd6,0xc1,0xfe,0x04,0x03] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_co_i64_i32 v[5:6], vcc, 0.5, m0, 0xaf123456 +// W64: encoding: [0x05,0x6a,0xff,0xd6,0xf0,0xfa,0xfc,0x03,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_co_i64_i32 v[5:6], ttmp[14:15], src_scc, vcc_lo, src_scc +// W64: encoding: [0x05,0x7a,0xff,0xd6,0xfd,0xd4,0xf4,0x03] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_co_i64_i32 v[254:255], null, 0xaf123456, vcc_hi, 0.5 clamp +// GFX12: encoding: [0xfe,0xfc,0xff,0xd6,0xff,0xd6,0xc0,0x03,0x56,0x34,0x12,0xaf] + +v_mad_u16 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x41,0xd6,0x01,0x05,0x0e,0x00] + +v_mad_u16 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x41,0xd6,0xff,0x05,0xa4,0x01] + +v_mad_u16 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x41,0xd6,0x01,0xfe,0xff,0x01] + +v_mad_u16 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x41,0xd6,0x69,0xd2,0xf8,0x01] + +v_mad_u16 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x41,0xd6,0x6a,0xf6,0x0c,0x04] + +v_mad_u16 v5, vcc_hi, 0xfe0b, v255 +// GFX12: encoding: [0x05,0x00,0x41,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +v_mad_u16 v5, ttmp15, src_scc, ttmp15 +// GFX12: encoding: [0x05,0x00,0x41,0xd6,0x7b,0xfa,0xed,0x01] + +v_mad_u16 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x41,0xd6,0x7d,0xe0,0xf5,0x01] + +v_mad_u16 v5, exec_lo, -1, vcc_hi +// GFX12: encoding: [0x05,0x00,0x41,0xd6,0x7e,0x82,0xad,0x01] + +v_mad_u16 v5, exec_hi, null, vcc_lo op_sel:[1,1,1,1] +// GFX12: encoding: [0x05,0x78,0x41,0xd6,0x7f,0xf8,0xa8,0x01] + +v_mad_u16 v5, null, exec_lo, 0xfe0b op_sel:[0,0,0,0] +// GFX12: encoding: [0x05,0x00,0x41,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] + +v_mad_u16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] +// GFX12: encoding: [0x05,0x08,0x41,0xd6,0xc1,0xfe,0xf4,0x03] + +v_mad_u16 v5, 0.5, m0, 0.5 op_sel:[0,1,0,0] +// GFX12: encoding: [0x05,0x10,0x41,0xd6,0xf0,0xfa,0xc0,0x03] + +v_mad_u16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] +// GFX12: encoding: [0x05,0x20,0x41,0xd6,0xfd,0xd4,0x04,0x03] + +v_mad_u16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] clamp +// GFX12: encoding: [0xff,0xc0,0x41,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] + +v_mad_u32_u16 v5, v1, v2, v3 +// GFX12: encoding: [0x05,0x00,0x59,0xd6,0x01,0x05,0x0e,0x04] + +v_mad_u32_u16 v5, v255, v255, s3 +// GFX12: encoding: [0x05,0x00,0x59,0xd6,0xff,0xff,0x0f,0x00] + +v_mad_u32_u16 v5, s1, s2, v255 +// GFX12: encoding: [0x05,0x00,0x59,0xd6,0x01,0x04,0xfc,0x07] + +v_mad_u32_u16 v5, s105, s105, s105 +// GFX12: encoding: [0x05,0x00,0x59,0xd6,0x69,0xd2,0xa4,0x01] + +v_mad_u32_u16 v5, vcc_lo, ttmp15, vcc_lo +// GFX12: encoding: [0x05,0x00,0x59,0xd6,0x6a,0xf6,0xa8,0x01] + +v_mad_u32_u16 v5, vcc_hi, 0xfe0b, vcc_hi +// GFX12: encoding: [0x05,0x00,0x59,0xd6,0x6b,0xfe,0xad,0x01,0x0b,0xfe,0x00,0x00] + +v_mad_u32_u16 v5, ttmp15, src_scc, ttmp15 +// GFX12: encoding: [0x05,0x00,0x59,0xd6,0x7b,0xfa,0xed,0x01] + +v_mad_u32_u16 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x59,0xd6,0x7d,0xe0,0xf5,0x01] + +v_mad_u32_u16 v5, exec_lo, -1, exec_hi +// GFX12: encoding: [0x05,0x00,0x59,0xd6,0x7e,0x82,0xfd,0x01] + +v_mad_u32_u16 v5, exec_hi, null, exec_lo +// GFX12: encoding: [0x05,0x00,0x59,0xd6,0x7f,0xf8,0xf8,0x01] + +v_mad_u32_u16 v5, null, exec_lo, null +// GFX12: encoding: [0x05,0x00,0x59,0xd6,0x7c,0xfc,0xf0,0x01] + +v_mad_u32_u16 v5, -1, exec_hi, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x59,0xd6,0xc1,0xfe,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_mad_u32_u16 v5, 0.5, m0, -1 op_sel:[0,0,0,0] +// GFX12: encoding: [0x05,0x00,0x59,0xd6,0xf0,0xfa,0x04,0x03] + +v_mad_u32_u16 v5, src_scc, vcc_lo, src_scc op_sel:[1,0,0,0] +// GFX12: encoding: [0x05,0x08,0x59,0xd6,0xfd,0xd4,0xf4,0x03] + +v_mad_u32_u16 v255, 0xfe0b, vcc_hi, 0.5 op_sel:[0,1,0,0] clamp +// GFX12: encoding: [0xff,0x90,0x59,0xd6,0xff,0xd6,0xc0,0x03,0x0b,0xfe,0x00,0x00] + +v_mad_u32_u24 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x0b,0xd6,0x01,0x05,0x0e,0x00] + +v_mad_u32_u24 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x0b,0xd6,0xff,0x05,0xa4,0x01] + +v_mad_u32_u24 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x0b,0xd6,0x01,0xfe,0xff,0x01] + +v_mad_u32_u24 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x0b,0xd6,0x69,0xd2,0xf8,0x01] + +v_mad_u32_u24 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x0b,0xd6,0x6a,0xf6,0x0c,0x04] + +v_mad_u32_u24 v5, vcc_hi, 0xaf123456, v255 +// GFX12: encoding: [0x05,0x00,0x0b,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_mad_u32_u24 v5, ttmp15, src_scc, ttmp15 +// GFX12: encoding: [0x05,0x00,0x0b,0xd6,0x7b,0xfa,0xed,0x01] + +v_mad_u32_u24 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x0b,0xd6,0x7d,0xe0,0xf5,0x01] + +v_mad_u32_u24 v5, exec_lo, -1, vcc_hi +// GFX12: encoding: [0x05,0x00,0x0b,0xd6,0x7e,0x82,0xad,0x01] + +v_mad_u32_u24 v5, exec_hi, null, vcc_lo +// GFX12: encoding: [0x05,0x00,0x0b,0xd6,0x7f,0xf8,0xa8,0x01] + +v_mad_u32_u24 v5, null, exec_lo, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x0b,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_mad_u32_u24 v5, -1, exec_hi, src_scc +// GFX12: encoding: [0x05,0x00,0x0b,0xd6,0xc1,0xfe,0xf4,0x03] + +v_mad_u32_u24 v5, 0.5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x0b,0xd6,0xf0,0xfa,0xc0,0x03] + +v_mad_u32_u24 v5, src_scc, vcc_lo, -1 +// GFX12: encoding: [0x05,0x00,0x0b,0xd6,0xfd,0xd4,0x04,0x03] + +v_mad_u32_u24 v255, 0xaf123456, vcc_hi, null clamp +// GFX12: encoding: [0xff,0x80,0x0b,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_mad_co_u64_u32 v[5:6], s6, s105, s105, s[6:7] +// W32: encoding: [0x05,0x06,0xfe,0xd6,0x69,0xd2,0x18,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_co_u64_u32 v[5:6], s6, ttmp15, ttmp15, s[104:105] +// W32: encoding: [0x05,0x06,0xfe,0xd6,0x7b,0xf6,0xa0,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_co_u64_u32 v[5:6], s6, m0, 0.5, ttmp[14:15] +// W32: encoding: [0x05,0x06,0xfe,0xd6,0x7d,0xe0,0xe9,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_co_u64_u32 v[5:6], s6, exec_lo, -1, exec +// W32: encoding: [0x05,0x06,0xfe,0xd6,0x7e,0x82,0xf9,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_co_u64_u32 v[5:6], s6, exec_hi, null, vcc +// W32: encoding: [0x05,0x06,0xfe,0xd6,0x7f,0xf8,0xa8,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_co_u64_u32 v[5:6], s105, null, exec_lo, null +// W32: encoding: [0x05,0x69,0xfe,0xd6,0x7c,0xfc,0xf0,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_co_u64_u32 v[5:6], vcc_lo, -1, exec_hi, -1 +// W32: encoding: [0x05,0x6a,0xfe,0xd6,0xc1,0xfe,0x04,0x03] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_co_u64_u32 v[5:6], vcc_hi, 0.5, m0, 0xaf123456 +// W32: encoding: [0x05,0x6b,0xfe,0xd6,0xf0,0xfa,0xfc,0x03,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_co_u64_u32 v[5:6], ttmp15, src_scc, vcc_lo, src_scc +// W32: encoding: [0x05,0x7b,0xfe,0xd6,0xfd,0xd4,0xf4,0x03] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_co_u64_u32 v[5:6], s[12:13], s105, s105, s[6:7] +// W64: encoding: [0x05,0x0c,0xfe,0xd6,0x69,0xd2,0x18,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_co_u64_u32 v[5:6], s[12:13], ttmp15, ttmp15, s[104:105] +// W64: encoding: [0x05,0x0c,0xfe,0xd6,0x7b,0xf6,0xa0,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_co_u64_u32 v[5:6], s[12:13], m0, 0.5, ttmp[14:15] +// W64: encoding: [0x05,0x0c,0xfe,0xd6,0x7d,0xe0,0xe9,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_co_u64_u32 v[5:6], s[12:13], exec_lo, -1, exec +// W64: encoding: [0x05,0x0c,0xfe,0xd6,0x7e,0x82,0xf9,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_co_u64_u32 v[5:6], s[12:13], exec_hi, null, vcc +// W64: encoding: [0x05,0x0c,0xfe,0xd6,0x7f,0xf8,0xa8,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_co_u64_u32 v[5:6], s[12:13], null, exec_lo, null +// W64: encoding: [0x05,0x0c,0xfe,0xd6,0x7c,0xfc,0xf0,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_co_u64_u32 v[5:6], s[104:105], -1, exec_hi, -1 +// W64: encoding: [0x05,0x68,0xfe,0xd6,0xc1,0xfe,0x04,0x03] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_co_u64_u32 v[5:6], vcc, 0.5, m0, 0xaf123456 +// W64: encoding: [0x05,0x6a,0xfe,0xd6,0xf0,0xfa,0xfc,0x03,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_co_u64_u32 v[5:6], ttmp[14:15], src_scc, vcc_lo, src_scc +// W64: encoding: [0x05,0x7a,0xfe,0xd6,0xfd,0xd4,0xf4,0x03] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_co_u64_u32 v[254:255], null, 0xaf123456, vcc_hi, 0.5 clamp +// GFX12: encoding: [0xfe,0xfc,0xfe,0xd6,0xff,0xd6,0xc0,0x03,0x56,0x34,0x12,0xaf] + +v_max3_num_f16 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x2c,0xd6,0x01,0x05,0x0e,0x00] + +v_max3_num_f16 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x2c,0xd6,0xff,0x05,0xa4,0x01] + +v_max3_num_f16 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x2c,0xd6,0x01,0xfe,0xff,0x01] + +v_max3_num_f16 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x2c,0xd6,0x69,0xd2,0xf8,0x01] + +v_max3_num_f16 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x2c,0xd6,0x6a,0xf6,0x0c,0x04] + +v_max3_num_f16 v5, vcc_hi, 0xfe0b, v255 +// GFX12: encoding: [0x05,0x00,0x2c,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +v_max3_num_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX12: encoding: [0x05,0x07,0x2c,0xd6,0x7b,0xfa,0xed,0xe1] + +v_max3_num_f16 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x2c,0xd6,0x7d,0xe0,0xf5,0x01] + +v_max3_num_f16 v5, |exec_lo|, -1, vcc_hi +// GFX12: encoding: [0x05,0x01,0x2c,0xd6,0x7e,0x82,0xad,0x01] + +v_max3_num_f16 v5, -|exec_hi|, null, -|vcc_lo| op_sel:[1,1,1,1] +// GFX12: encoding: [0x05,0x7d,0x2c,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_max3_num_f16 v5, null, exec_lo, -|0xfe0b| op_sel:[0,0,0,0] +// GFX12: encoding: [0x05,0x04,0x2c,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] + +v_max3_num_f16 v5, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] +// GFX12: encoding: [0x05,0x0e,0x2c,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_max3_num_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] +// GFX12: encoding: [0x05,0x10,0x2c,0xd6,0xf0,0xfa,0xc0,0x43] + +v_max3_num_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] +// GFX12: encoding: [0x05,0x22,0x2c,0xd6,0xfd,0xd4,0x04,0x23] + +v_max3_num_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp +// GFX12: encoding: [0xff,0xc3,0x2c,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] + +v_max3_num_f32 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x2a,0xd6,0x01,0x05,0x0e,0x00] + +v_max3_num_f32 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x2a,0xd6,0xff,0x05,0xa4,0x01] + +v_max3_num_f32 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x2a,0xd6,0x01,0xfe,0xff,0x01] + +v_max3_num_f32 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x2a,0xd6,0x69,0xd2,0xf8,0x01] + +v_max3_num_f32 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x2a,0xd6,0x6a,0xf6,0x0c,0x04] + +v_max3_num_f32 v5, vcc_hi, 0xaf123456, v255 +// GFX12: encoding: [0x05,0x00,0x2a,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_max3_num_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX12: encoding: [0x05,0x07,0x2a,0xd6,0x7b,0xfa,0xed,0xe1] + +v_max3_num_f32 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x2a,0xd6,0x7d,0xe0,0xf5,0x01] + +v_max3_num_f32 v5, |exec_lo|, -1, vcc_hi +// GFX12: encoding: [0x05,0x01,0x2a,0xd6,0x7e,0x82,0xad,0x01] + +v_max3_num_f32 v5, -|exec_hi|, null, -|vcc_lo| +// GFX12: encoding: [0x05,0x05,0x2a,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_max3_num_f32 v5, null, exec_lo, -|0xaf123456| +// GFX12: encoding: [0x05,0x04,0x2a,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] + +v_max3_num_f32 v5, -1, -|exec_hi|, -|src_scc| +// GFX12: encoding: [0x05,0x06,0x2a,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_max3_num_f32 v5, 0.5, -m0, 0.5 mul:2 +// GFX12: encoding: [0x05,0x00,0x2a,0xd6,0xf0,0xfa,0xc0,0x4b] + +v_max3_num_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 +// GFX12: encoding: [0x05,0x02,0x2a,0xd6,0xfd,0xd4,0x04,0x33] + +v_max3_num_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 +// GFX12: encoding: [0xff,0x83,0x2a,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] + +v_max3_i16 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x4d,0xd6,0x01,0x05,0x0e,0x00] + +v_max3_i16 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x4d,0xd6,0xff,0x05,0xa4,0x01] + +v_max3_i16 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x4d,0xd6,0x01,0xfe,0xff,0x01] + +v_max3_i16 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x4d,0xd6,0x69,0xd2,0xf8,0x01] + +v_max3_i16 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x4d,0xd6,0x6a,0xf6,0x0c,0x04] + +v_max3_i16 v5, vcc_hi, 0xfe0b, v255 +// GFX12: encoding: [0x05,0x00,0x4d,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +v_max3_i16 v5, ttmp15, src_scc, ttmp15 +// GFX12: encoding: [0x05,0x00,0x4d,0xd6,0x7b,0xfa,0xed,0x01] + +v_max3_i16 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x4d,0xd6,0x7d,0xe0,0xf5,0x01] + +v_max3_i16 v5, exec_lo, -1, vcc_hi +// GFX12: encoding: [0x05,0x00,0x4d,0xd6,0x7e,0x82,0xad,0x01] + +v_max3_i16 v5, exec_hi, null, vcc_lo op_sel:[1,1,1,1] +// GFX12: encoding: [0x05,0x78,0x4d,0xd6,0x7f,0xf8,0xa8,0x01] + +v_max3_i16 v5, null, exec_lo, 0xfe0b op_sel:[0,0,0,0] +// GFX12: encoding: [0x05,0x00,0x4d,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] + +v_max3_i16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] +// GFX12: encoding: [0x05,0x08,0x4d,0xd6,0xc1,0xfe,0xf4,0x03] + +v_max3_i16 v5, 0.5, m0, 0.5 op_sel:[0,1,0,0] +// GFX12: encoding: [0x05,0x10,0x4d,0xd6,0xf0,0xfa,0xc0,0x03] + +v_max3_i16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] +// GFX12: encoding: [0x05,0x20,0x4d,0xd6,0xfd,0xd4,0x04,0x03] + +v_max3_i16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] +// GFX12: encoding: [0xff,0x40,0x4d,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] + +v_max3_i32 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x1d,0xd6,0x01,0x05,0x0e,0x00] + +v_max3_i32 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x1d,0xd6,0xff,0x05,0xa4,0x01] + +v_max3_i32 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x1d,0xd6,0x01,0xfe,0xff,0x01] + +v_max3_i32 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x1d,0xd6,0x69,0xd2,0xf8,0x01] + +v_max3_i32 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x1d,0xd6,0x6a,0xf6,0x0c,0x04] + +v_max3_i32 v5, vcc_hi, 0xaf123456, v255 +// GFX12: encoding: [0x05,0x00,0x1d,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_max3_i32 v5, ttmp15, src_scc, ttmp15 +// GFX12: encoding: [0x05,0x00,0x1d,0xd6,0x7b,0xfa,0xed,0x01] + +v_max3_i32 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x1d,0xd6,0x7d,0xe0,0xf5,0x01] + +v_max3_i32 v5, exec_lo, -1, vcc_hi +// GFX12: encoding: [0x05,0x00,0x1d,0xd6,0x7e,0x82,0xad,0x01] + +v_max3_i32 v5, exec_hi, null, vcc_lo +// GFX12: encoding: [0x05,0x00,0x1d,0xd6,0x7f,0xf8,0xa8,0x01] + +v_max3_i32 v5, null, exec_lo, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x1d,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_max3_i32 v5, -1, exec_hi, src_scc +// GFX12: encoding: [0x05,0x00,0x1d,0xd6,0xc1,0xfe,0xf4,0x03] + +v_max3_i32 v5, 0.5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x1d,0xd6,0xf0,0xfa,0xc0,0x03] + +v_max3_i32 v5, src_scc, vcc_lo, -1 +// GFX12: encoding: [0x05,0x00,0x1d,0xd6,0xfd,0xd4,0x04,0x03] + +v_max3_i32 v255, 0xaf123456, vcc_hi, null +// GFX12: encoding: [0xff,0x00,0x1d,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_max3_u16 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x4e,0xd6,0x01,0x05,0x0e,0x00] + +v_max3_u16 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x4e,0xd6,0xff,0x05,0xa4,0x01] + +v_max3_u16 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x4e,0xd6,0x01,0xfe,0xff,0x01] + +v_max3_u16 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x4e,0xd6,0x69,0xd2,0xf8,0x01] + +v_max3_u16 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x4e,0xd6,0x6a,0xf6,0x0c,0x04] + +v_max3_u16 v5, vcc_hi, 0xfe0b, v255 +// GFX12: encoding: [0x05,0x00,0x4e,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +v_max3_u16 v5, ttmp15, src_scc, ttmp15 +// GFX12: encoding: [0x05,0x00,0x4e,0xd6,0x7b,0xfa,0xed,0x01] + +v_max3_u16 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x4e,0xd6,0x7d,0xe0,0xf5,0x01] + +v_max3_u16 v5, exec_lo, -1, vcc_hi +// GFX12: encoding: [0x05,0x00,0x4e,0xd6,0x7e,0x82,0xad,0x01] + +v_max3_u16 v5, exec_hi, null, vcc_lo op_sel:[1,1,1,1] +// GFX12: encoding: [0x05,0x78,0x4e,0xd6,0x7f,0xf8,0xa8,0x01] + +v_max3_u16 v5, null, exec_lo, 0xfe0b op_sel:[0,0,0,0] +// GFX12: encoding: [0x05,0x00,0x4e,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] + +v_max3_u16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] +// GFX12: encoding: [0x05,0x08,0x4e,0xd6,0xc1,0xfe,0xf4,0x03] + +v_max3_u16 v5, 0.5, m0, 0.5 op_sel:[0,1,0,0] +// GFX12: encoding: [0x05,0x10,0x4e,0xd6,0xf0,0xfa,0xc0,0x03] + +v_max3_u16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] +// GFX12: encoding: [0x05,0x20,0x4e,0xd6,0xfd,0xd4,0x04,0x03] + +v_max3_u16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] +// GFX12: encoding: [0xff,0x40,0x4e,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] + +v_max3_u32 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x1e,0xd6,0x01,0x05,0x0e,0x00] + +v_max3_u32 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x1e,0xd6,0xff,0x05,0xa4,0x01] + +v_max3_u32 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x1e,0xd6,0x01,0xfe,0xff,0x01] + +v_max3_u32 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x1e,0xd6,0x69,0xd2,0xf8,0x01] + +v_max3_u32 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x1e,0xd6,0x6a,0xf6,0x0c,0x04] + +v_max3_u32 v5, vcc_hi, 0xaf123456, v255 +// GFX12: encoding: [0x05,0x00,0x1e,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_max3_u32 v5, ttmp15, src_scc, ttmp15 +// GFX12: encoding: [0x05,0x00,0x1e,0xd6,0x7b,0xfa,0xed,0x01] + +v_max3_u32 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x1e,0xd6,0x7d,0xe0,0xf5,0x01] + +v_max3_u32 v5, exec_lo, -1, vcc_hi +// GFX12: encoding: [0x05,0x00,0x1e,0xd6,0x7e,0x82,0xad,0x01] + +v_max3_u32 v5, exec_hi, null, vcc_lo +// GFX12: encoding: [0x05,0x00,0x1e,0xd6,0x7f,0xf8,0xa8,0x01] + +v_max3_u32 v5, null, exec_lo, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x1e,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_max3_u32 v5, -1, exec_hi, src_scc +// GFX12: encoding: [0x05,0x00,0x1e,0xd6,0xc1,0xfe,0xf4,0x03] + +v_max3_u32 v5, 0.5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x1e,0xd6,0xf0,0xfa,0xc0,0x03] + +v_max3_u32 v5, src_scc, vcc_lo, -1 +// GFX12: encoding: [0x05,0x00,0x1e,0xd6,0xfd,0xd4,0x04,0x03] + +v_max3_u32 v255, 0xaf123456, vcc_hi, null +// GFX12: encoding: [0xff,0x00,0x1e,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_max_i16 v5, v1, v2 +// GFX12: encoding: [0x05,0x00,0x0a,0xd7,0x01,0x05,0x02,0x00] + +v_max_i16 v5, v255, v255 +// GFX12: encoding: [0x05,0x00,0x0a,0xd7,0xff,0xff,0x03,0x00] + +v_max_i16 v5, s1, s2 +// GFX12: encoding: [0x05,0x00,0x0a,0xd7,0x01,0x04,0x00,0x00] + +v_max_i16 v5, s105, s105 +// GFX12: encoding: [0x05,0x00,0x0a,0xd7,0x69,0xd2,0x00,0x00] + +v_max_i16 v5, vcc_lo, ttmp15 +// GFX12: encoding: [0x05,0x00,0x0a,0xd7,0x6a,0xf6,0x00,0x00] + +v_max_i16 v5, vcc_hi, 0xfe0b +// GFX12: encoding: [0x05,0x00,0x0a,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_max_i16 v5, ttmp15, src_scc +// GFX12: encoding: [0x05,0x00,0x0a,0xd7,0x7b,0xfa,0x01,0x00] + +v_max_i16 v5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x0a,0xd7,0x7d,0xe0,0x01,0x00] + +v_max_i16 v5, exec_lo, -1 +// GFX12: encoding: [0x05,0x00,0x0a,0xd7,0x7e,0x82,0x01,0x00] + +v_max_i16 v5, exec_hi, null +// GFX12: encoding: [0x05,0x00,0x0a,0xd7,0x7f,0xf8,0x00,0x00] + +v_max_i16 v5, null, exec_lo +// GFX12: encoding: [0x05,0x00,0x0a,0xd7,0x7c,0xfc,0x00,0x00] + +v_max_i16 v5, -1, exec_hi +// GFX12: encoding: [0x05,0x00,0x0a,0xd7,0xc1,0xfe,0x00,0x00] + +v_max_i16 v5, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x0a,0xd7,0xf0,0xfa,0x00,0x00] + +v_max_i16 v5, src_scc, vcc_lo +// GFX12: encoding: [0x05,0x00,0x0a,0xd7,0xfd,0xd4,0x00,0x00] + +v_max_i16 v255, 0xfe0b, vcc_hi +// GFX12: encoding: [0xff,0x00,0x0a,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_max_u16 v5, v1, v2 +// GFX12: encoding: [0x05,0x00,0x09,0xd7,0x01,0x05,0x02,0x00] + +v_max_u16 v5, v255, v255 +// GFX12: encoding: [0x05,0x00,0x09,0xd7,0xff,0xff,0x03,0x00] + +v_max_u16 v5, s1, s2 +// GFX12: encoding: [0x05,0x00,0x09,0xd7,0x01,0x04,0x00,0x00] + +v_max_u16 v5, s105, s105 +// GFX12: encoding: [0x05,0x00,0x09,0xd7,0x69,0xd2,0x00,0x00] + +v_max_u16 v5, vcc_lo, ttmp15 +// GFX12: encoding: [0x05,0x00,0x09,0xd7,0x6a,0xf6,0x00,0x00] + +v_max_u16 v5, vcc_hi, 0xfe0b +// GFX12: encoding: [0x05,0x00,0x09,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_max_u16 v5, ttmp15, src_scc +// GFX12: encoding: [0x05,0x00,0x09,0xd7,0x7b,0xfa,0x01,0x00] + +v_max_u16 v5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x09,0xd7,0x7d,0xe0,0x01,0x00] + +v_max_u16 v5, exec_lo, -1 +// GFX12: encoding: [0x05,0x00,0x09,0xd7,0x7e,0x82,0x01,0x00] + +v_max_u16 v5, exec_hi, null +// GFX12: encoding: [0x05,0x00,0x09,0xd7,0x7f,0xf8,0x00,0x00] + +v_max_u16 v5, null, exec_lo +// GFX12: encoding: [0x05,0x00,0x09,0xd7,0x7c,0xfc,0x00,0x00] + +v_max_u16 v5, -1, exec_hi +// GFX12: encoding: [0x05,0x00,0x09,0xd7,0xc1,0xfe,0x00,0x00] + +v_max_u16 v5, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x09,0xd7,0xf0,0xfa,0x00,0x00] + +v_max_u16 v5, src_scc, vcc_lo +// GFX12: encoding: [0x05,0x00,0x09,0xd7,0xfd,0xd4,0x00,0x00] + +v_max_u16 v255, 0xfe0b, vcc_hi +// GFX12: encoding: [0xff,0x00,0x09,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_maxmin_num_f16 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x6b,0xd6,0x01,0x05,0x0e,0x00] + +v_maxmin_num_f16 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x6b,0xd6,0xff,0x05,0xa4,0x01] + +v_maxmin_num_f16 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x6b,0xd6,0x01,0xfe,0xff,0x01] + +v_maxmin_num_f16 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x6b,0xd6,0x69,0xd2,0xf8,0x01] + +v_maxmin_num_f16 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x6b,0xd6,0x6a,0xf6,0x0c,0x04] + +v_maxmin_num_f16 v5, vcc_hi, 0xfe0b, v255 +// GFX12: encoding: [0x05,0x00,0x6b,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +v_maxmin_num_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX12: encoding: [0x05,0x07,0x6b,0xd6,0x7b,0xfa,0xed,0xe1] + +v_maxmin_num_f16 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x6b,0xd6,0x7d,0xe0,0xf5,0x01] + +v_maxmin_num_f16 v5, |exec_lo|, -1, vcc_hi +// GFX12: encoding: [0x05,0x01,0x6b,0xd6,0x7e,0x82,0xad,0x01] + +v_maxmin_num_f16 v5, -|exec_hi|, null, -|vcc_lo| +// GFX12: encoding: [0x05,0x05,0x6b,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_maxmin_num_f16 v5, null, exec_lo, -|0xfe0b| +// GFX12: encoding: [0x05,0x04,0x6b,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] + +v_maxmin_num_f16 v5, -1, -|exec_hi|, -|src_scc| +// GFX12: encoding: [0x05,0x06,0x6b,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_maxmin_num_f16 v5, 0.5, -m0, 0.5 mul:2 +// GFX12: encoding: [0x05,0x00,0x6b,0xd6,0xf0,0xfa,0xc0,0x4b] + +v_maxmin_num_f16 v5, -src_scc, |vcc_lo|, -1 mul:4 +// GFX12: encoding: [0x05,0x02,0x6b,0xd6,0xfd,0xd4,0x04,0x33] + +v_maxmin_num_f16 v255, -|0xfe0b|, -|vcc_hi|, null clamp div:2 +// GFX12: encoding: [0xff,0x83,0x6b,0xd6,0xff,0xd6,0xf0,0x79,0x0b,0xfe,0x00,0x00] + +v_maxmin_num_f32 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x69,0xd6,0x01,0x05,0x0e,0x00] + +v_maxmin_num_f32 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x69,0xd6,0xff,0x05,0xa4,0x01] + +v_maxmin_num_f32 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x69,0xd6,0x01,0xfe,0xff,0x01] + +v_maxmin_num_f32 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x69,0xd6,0x69,0xd2,0xf8,0x01] + +v_maxmin_num_f32 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x69,0xd6,0x6a,0xf6,0x0c,0x04] + +v_maxmin_num_f32 v5, vcc_hi, 0xaf123456, v255 +// GFX12: encoding: [0x05,0x00,0x69,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_maxmin_num_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX12: encoding: [0x05,0x07,0x69,0xd6,0x7b,0xfa,0xed,0xe1] + +v_maxmin_num_f32 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x69,0xd6,0x7d,0xe0,0xf5,0x01] + +v_maxmin_num_f32 v5, |exec_lo|, -1, vcc_hi +// GFX12: encoding: [0x05,0x01,0x69,0xd6,0x7e,0x82,0xad,0x01] + +v_maxmin_num_f32 v5, -|exec_hi|, null, -|vcc_lo| +// GFX12: encoding: [0x05,0x05,0x69,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_maxmin_num_f32 v5, null, exec_lo, -|0xaf123456| +// GFX12: encoding: [0x05,0x04,0x69,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] + +v_maxmin_num_f32 v5, -1, -|exec_hi|, -|src_scc| +// GFX12: encoding: [0x05,0x06,0x69,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_maxmin_num_f32 v5, 0.5, -m0, 0.5 mul:2 +// GFX12: encoding: [0x05,0x00,0x69,0xd6,0xf0,0xfa,0xc0,0x4b] + +v_maxmin_num_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 +// GFX12: encoding: [0x05,0x02,0x69,0xd6,0xfd,0xd4,0x04,0x33] + +v_maxmin_num_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 +// GFX12: encoding: [0xff,0x83,0x69,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] + +v_maxmin_i32 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x64,0xd6,0x01,0x05,0x0e,0x00] + +v_maxmin_i32 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x64,0xd6,0xff,0x05,0xa4,0x01] + +v_maxmin_i32 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x64,0xd6,0x01,0xfe,0xff,0x01] + +v_maxmin_i32 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x64,0xd6,0x69,0xd2,0xf8,0x01] + +v_maxmin_i32 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x64,0xd6,0x6a,0xf6,0x0c,0x04] + +v_maxmin_i32 v5, vcc_hi, 0xaf123456, v255 +// GFX12: encoding: [0x05,0x00,0x64,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_maxmin_i32 v5, ttmp15, src_scc, ttmp15 +// GFX12: encoding: [0x05,0x00,0x64,0xd6,0x7b,0xfa,0xed,0x01] + +v_maxmin_i32 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x64,0xd6,0x7d,0xe0,0xf5,0x01] + +v_maxmin_i32 v5, exec_lo, -1, vcc_hi +// GFX12: encoding: [0x05,0x00,0x64,0xd6,0x7e,0x82,0xad,0x01] + +v_maxmin_i32 v5, exec_hi, null, vcc_lo +// GFX12: encoding: [0x05,0x00,0x64,0xd6,0x7f,0xf8,0xa8,0x01] + +v_maxmin_i32 v5, null, exec_lo, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x64,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_maxmin_i32 v5, -1, exec_hi, src_scc +// GFX12: encoding: [0x05,0x00,0x64,0xd6,0xc1,0xfe,0xf4,0x03] + +v_maxmin_i32 v5, 0.5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x64,0xd6,0xf0,0xfa,0xc0,0x03] + +v_maxmin_i32 v5, src_scc, vcc_lo, -1 +// GFX12: encoding: [0x05,0x00,0x64,0xd6,0xfd,0xd4,0x04,0x03] + +v_maxmin_i32 v255, 0xaf123456, vcc_hi, null +// GFX12: encoding: [0xff,0x00,0x64,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_maxmin_u32 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x62,0xd6,0x01,0x05,0x0e,0x00] + +v_maxmin_u32 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x62,0xd6,0xff,0x05,0xa4,0x01] + +v_maxmin_u32 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x62,0xd6,0x01,0xfe,0xff,0x01] + +v_maxmin_u32 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x62,0xd6,0x69,0xd2,0xf8,0x01] + +v_maxmin_u32 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x62,0xd6,0x6a,0xf6,0x0c,0x04] + +v_maxmin_u32 v5, vcc_hi, 0xaf123456, v255 +// GFX12: encoding: [0x05,0x00,0x62,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_maxmin_u32 v5, ttmp15, src_scc, ttmp15 +// GFX12: encoding: [0x05,0x00,0x62,0xd6,0x7b,0xfa,0xed,0x01] + +v_maxmin_u32 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x62,0xd6,0x7d,0xe0,0xf5,0x01] + +v_maxmin_u32 v5, exec_lo, -1, vcc_hi +// GFX12: encoding: [0x05,0x00,0x62,0xd6,0x7e,0x82,0xad,0x01] + +v_maxmin_u32 v5, exec_hi, null, vcc_lo +// GFX12: encoding: [0x05,0x00,0x62,0xd6,0x7f,0xf8,0xa8,0x01] + +v_maxmin_u32 v5, null, exec_lo, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x62,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_maxmin_u32 v5, -1, exec_hi, src_scc +// GFX12: encoding: [0x05,0x00,0x62,0xd6,0xc1,0xfe,0xf4,0x03] + +v_maxmin_u32 v5, 0.5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x62,0xd6,0xf0,0xfa,0xc0,0x03] + +v_maxmin_u32 v5, src_scc, vcc_lo, -1 +// GFX12: encoding: [0x05,0x00,0x62,0xd6,0xfd,0xd4,0x04,0x03] + +v_maxmin_u32 v255, 0xaf123456, vcc_hi, null +// GFX12: encoding: [0xff,0x00,0x62,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_mbcnt_hi_u32_b32 v5, v1, v2 +// GFX12: encoding: [0x05,0x00,0x20,0xd7,0x01,0x05,0x02,0x00] + +v_mbcnt_hi_u32_b32 v5, v255, v255 +// GFX12: encoding: [0x05,0x00,0x20,0xd7,0xff,0xff,0x03,0x00] + +v_mbcnt_hi_u32_b32 v5, s1, s2 +// GFX12: encoding: [0x05,0x00,0x20,0xd7,0x01,0x04,0x00,0x00] + +v_mbcnt_hi_u32_b32 v5, s105, s105 +// GFX12: encoding: [0x05,0x00,0x20,0xd7,0x69,0xd2,0x00,0x00] + +v_mbcnt_hi_u32_b32 v5, vcc_lo, ttmp15 +// GFX12: encoding: [0x05,0x00,0x20,0xd7,0x6a,0xf6,0x00,0x00] + +v_mbcnt_hi_u32_b32 v5, vcc_hi, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x20,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_mbcnt_hi_u32_b32 v5, ttmp15, src_scc +// GFX12: encoding: [0x05,0x00,0x20,0xd7,0x7b,0xfa,0x01,0x00] + +v_mbcnt_hi_u32_b32 v5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x20,0xd7,0x7d,0xe0,0x01,0x00] + +v_mbcnt_hi_u32_b32 v5, exec_lo, -1 +// GFX12: encoding: [0x05,0x00,0x20,0xd7,0x7e,0x82,0x01,0x00] + +v_mbcnt_hi_u32_b32 v5, exec_hi, null +// GFX12: encoding: [0x05,0x00,0x20,0xd7,0x7f,0xf8,0x00,0x00] + +v_mbcnt_hi_u32_b32 v5, null, exec_lo +// GFX12: encoding: [0x05,0x00,0x20,0xd7,0x7c,0xfc,0x00,0x00] + +v_mbcnt_hi_u32_b32 v5, -1, exec_hi +// GFX12: encoding: [0x05,0x00,0x20,0xd7,0xc1,0xfe,0x00,0x00] + +v_mbcnt_hi_u32_b32 v5, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x20,0xd7,0xf0,0xfa,0x00,0x00] + +v_mbcnt_hi_u32_b32 v5, src_scc, vcc_lo +// GFX12: encoding: [0x05,0x00,0x20,0xd7,0xfd,0xd4,0x00,0x00] + +v_mbcnt_hi_u32_b32 v255, 0xaf123456, vcc_hi +// GFX12: encoding: [0xff,0x00,0x20,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_mbcnt_lo_u32_b32 v5, v1, v2 +// GFX12: encoding: [0x05,0x00,0x1f,0xd7,0x01,0x05,0x02,0x00] + +v_mbcnt_lo_u32_b32 v5, v255, v255 +// GFX12: encoding: [0x05,0x00,0x1f,0xd7,0xff,0xff,0x03,0x00] + +v_mbcnt_lo_u32_b32 v5, s1, s2 +// GFX12: encoding: [0x05,0x00,0x1f,0xd7,0x01,0x04,0x00,0x00] + +v_mbcnt_lo_u32_b32 v5, s105, s105 +// GFX12: encoding: [0x05,0x00,0x1f,0xd7,0x69,0xd2,0x00,0x00] + +v_mbcnt_lo_u32_b32 v5, vcc_lo, ttmp15 +// GFX12: encoding: [0x05,0x00,0x1f,0xd7,0x6a,0xf6,0x00,0x00] + +v_mbcnt_lo_u32_b32 v5, vcc_hi, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x1f,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_mbcnt_lo_u32_b32 v5, ttmp15, src_scc +// GFX12: encoding: [0x05,0x00,0x1f,0xd7,0x7b,0xfa,0x01,0x00] + +v_mbcnt_lo_u32_b32 v5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x1f,0xd7,0x7d,0xe0,0x01,0x00] + +v_mbcnt_lo_u32_b32 v5, exec_lo, -1 +// GFX12: encoding: [0x05,0x00,0x1f,0xd7,0x7e,0x82,0x01,0x00] + +v_mbcnt_lo_u32_b32 v5, exec_hi, null +// GFX12: encoding: [0x05,0x00,0x1f,0xd7,0x7f,0xf8,0x00,0x00] + +v_mbcnt_lo_u32_b32 v5, null, exec_lo +// GFX12: encoding: [0x05,0x00,0x1f,0xd7,0x7c,0xfc,0x00,0x00] + +v_mbcnt_lo_u32_b32 v5, -1, exec_hi +// GFX12: encoding: [0x05,0x00,0x1f,0xd7,0xc1,0xfe,0x00,0x00] + +v_mbcnt_lo_u32_b32 v5, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x1f,0xd7,0xf0,0xfa,0x00,0x00] + +v_mbcnt_lo_u32_b32 v5, src_scc, vcc_lo +// GFX12: encoding: [0x05,0x00,0x1f,0xd7,0xfd,0xd4,0x00,0x00] + +v_mbcnt_lo_u32_b32 v255, 0xaf123456, vcc_hi +// GFX12: encoding: [0xff,0x00,0x1f,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_med3_num_f16 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x32,0xd6,0x01,0x05,0x0e,0x00] + +v_med3_num_f16 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x32,0xd6,0xff,0x05,0xa4,0x01] + +v_med3_num_f16 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x32,0xd6,0x01,0xfe,0xff,0x01] + +v_med3_num_f16 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x32,0xd6,0x69,0xd2,0xf8,0x01] + +v_med3_num_f16 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x32,0xd6,0x6a,0xf6,0x0c,0x04] + +v_med3_num_f16 v5, vcc_hi, 0xfe0b, v255 +// GFX12: encoding: [0x05,0x00,0x32,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +v_med3_num_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX12: encoding: [0x05,0x07,0x32,0xd6,0x7b,0xfa,0xed,0xe1] + +v_med3_num_f16 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x32,0xd6,0x7d,0xe0,0xf5,0x01] + +v_med3_num_f16 v5, |exec_lo|, -1, vcc_hi +// GFX12: encoding: [0x05,0x01,0x32,0xd6,0x7e,0x82,0xad,0x01] + +v_med3_num_f16 v5, -|exec_hi|, null, -|vcc_lo| op_sel:[1,1,1,1] +// GFX12: encoding: [0x05,0x7d,0x32,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_med3_num_f16 v5, null, exec_lo, -|0xfe0b| op_sel:[0,0,0,0] +// GFX12: encoding: [0x05,0x04,0x32,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] + +v_med3_num_f16 v5, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] +// GFX12: encoding: [0x05,0x0e,0x32,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_med3_num_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] +// GFX12: encoding: [0x05,0x10,0x32,0xd6,0xf0,0xfa,0xc0,0x43] + +v_med3_num_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] +// GFX12: encoding: [0x05,0x22,0x32,0xd6,0xfd,0xd4,0x04,0x23] + +v_med3_num_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp +// GFX12: encoding: [0xff,0xc3,0x32,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] + +v_med3_num_f32 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x31,0xd6,0x01,0x05,0x0e,0x00] + +v_med3_num_f32 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x31,0xd6,0xff,0x05,0xa4,0x01] + +v_med3_num_f32 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x31,0xd6,0x01,0xfe,0xff,0x01] + +v_med3_num_f32 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x31,0xd6,0x69,0xd2,0xf8,0x01] + +v_med3_num_f32 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x31,0xd6,0x6a,0xf6,0x0c,0x04] + +v_med3_num_f32 v5, vcc_hi, 0xaf123456, v255 +// GFX12: encoding: [0x05,0x00,0x31,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_med3_num_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX12: encoding: [0x05,0x07,0x31,0xd6,0x7b,0xfa,0xed,0xe1] + +v_med3_num_f32 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x31,0xd6,0x7d,0xe0,0xf5,0x01] + +v_med3_num_f32 v5, |exec_lo|, -1, vcc_hi +// GFX12: encoding: [0x05,0x01,0x31,0xd6,0x7e,0x82,0xad,0x01] + +v_med3_num_f32 v5, -|exec_hi|, null, -|vcc_lo| +// GFX12: encoding: [0x05,0x05,0x31,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_med3_num_f32 v5, null, exec_lo, -|0xaf123456| +// GFX12: encoding: [0x05,0x04,0x31,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] + +v_med3_num_f32 v5, -1, -|exec_hi|, -|src_scc| +// GFX12: encoding: [0x05,0x06,0x31,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_med3_num_f32 v5, 0.5, -m0, 0.5 mul:2 +// GFX12: encoding: [0x05,0x00,0x31,0xd6,0xf0,0xfa,0xc0,0x4b] + +v_med3_num_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 +// GFX12: encoding: [0x05,0x02,0x31,0xd6,0xfd,0xd4,0x04,0x33] + +v_med3_num_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 +// GFX12: encoding: [0xff,0x83,0x31,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] + +v_med3_i16 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x50,0xd6,0x01,0x05,0x0e,0x00] + +v_med3_i16 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x50,0xd6,0xff,0x05,0xa4,0x01] + +v_med3_i16 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x50,0xd6,0x01,0xfe,0xff,0x01] + +v_med3_i16 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x50,0xd6,0x69,0xd2,0xf8,0x01] + +v_med3_i16 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x50,0xd6,0x6a,0xf6,0x0c,0x04] + +v_med3_i16 v5, vcc_hi, 0xfe0b, v255 +// GFX12: encoding: [0x05,0x00,0x50,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +v_med3_i16 v5, ttmp15, src_scc, ttmp15 +// GFX12: encoding: [0x05,0x00,0x50,0xd6,0x7b,0xfa,0xed,0x01] + +v_med3_i16 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x50,0xd6,0x7d,0xe0,0xf5,0x01] + +v_med3_i16 v5, exec_lo, -1, vcc_hi +// GFX12: encoding: [0x05,0x00,0x50,0xd6,0x7e,0x82,0xad,0x01] + +v_med3_i16 v5, exec_hi, null, vcc_lo op_sel:[1,1,1,1] +// GFX12: encoding: [0x05,0x78,0x50,0xd6,0x7f,0xf8,0xa8,0x01] + +v_med3_i16 v5, null, exec_lo, 0xfe0b op_sel:[0,0,0,0] +// GFX12: encoding: [0x05,0x00,0x50,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] + +v_med3_i16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] +// GFX12: encoding: [0x05,0x08,0x50,0xd6,0xc1,0xfe,0xf4,0x03] + +v_med3_i16 v5, 0.5, m0, 0.5 op_sel:[0,1,0,0] +// GFX12: encoding: [0x05,0x10,0x50,0xd6,0xf0,0xfa,0xc0,0x03] + +v_med3_i16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] +// GFX12: encoding: [0x05,0x20,0x50,0xd6,0xfd,0xd4,0x04,0x03] + +v_med3_i16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] +// GFX12: encoding: [0xff,0x40,0x50,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] + +v_med3_i32 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x20,0xd6,0x01,0x05,0x0e,0x00] + +v_med3_i32 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x20,0xd6,0xff,0x05,0xa4,0x01] + +v_med3_i32 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x20,0xd6,0x01,0xfe,0xff,0x01] + +v_med3_i32 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x20,0xd6,0x69,0xd2,0xf8,0x01] + +v_med3_i32 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x20,0xd6,0x6a,0xf6,0x0c,0x04] + +v_med3_i32 v5, vcc_hi, 0xaf123456, v255 +// GFX12: encoding: [0x05,0x00,0x20,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_med3_i32 v5, ttmp15, src_scc, ttmp15 +// GFX12: encoding: [0x05,0x00,0x20,0xd6,0x7b,0xfa,0xed,0x01] + +v_med3_i32 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x20,0xd6,0x7d,0xe0,0xf5,0x01] + +v_med3_i32 v5, exec_lo, -1, vcc_hi +// GFX12: encoding: [0x05,0x00,0x20,0xd6,0x7e,0x82,0xad,0x01] + +v_med3_i32 v5, exec_hi, null, vcc_lo +// GFX12: encoding: [0x05,0x00,0x20,0xd6,0x7f,0xf8,0xa8,0x01] + +v_med3_i32 v5, null, exec_lo, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x20,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_med3_i32 v5, -1, exec_hi, src_scc +// GFX12: encoding: [0x05,0x00,0x20,0xd6,0xc1,0xfe,0xf4,0x03] + +v_med3_i32 v5, 0.5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x20,0xd6,0xf0,0xfa,0xc0,0x03] + +v_med3_i32 v5, src_scc, vcc_lo, -1 +// GFX12: encoding: [0x05,0x00,0x20,0xd6,0xfd,0xd4,0x04,0x03] + +v_med3_i32 v255, 0xaf123456, vcc_hi, null +// GFX12: encoding: [0xff,0x00,0x20,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_med3_u16 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x51,0xd6,0x01,0x05,0x0e,0x00] + +v_med3_u16 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x51,0xd6,0xff,0x05,0xa4,0x01] + +v_med3_u16 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x51,0xd6,0x01,0xfe,0xff,0x01] + +v_med3_u16 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x51,0xd6,0x69,0xd2,0xf8,0x01] + +v_med3_u16 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x51,0xd6,0x6a,0xf6,0x0c,0x04] + +v_med3_u16 v5, vcc_hi, 0xfe0b, v255 +// GFX12: encoding: [0x05,0x00,0x51,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +v_med3_u16 v5, ttmp15, src_scc, ttmp15 +// GFX12: encoding: [0x05,0x00,0x51,0xd6,0x7b,0xfa,0xed,0x01] + +v_med3_u16 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x51,0xd6,0x7d,0xe0,0xf5,0x01] + +v_med3_u16 v5, exec_lo, -1, vcc_hi +// GFX12: encoding: [0x05,0x00,0x51,0xd6,0x7e,0x82,0xad,0x01] + +v_med3_u16 v5, exec_hi, null, vcc_lo op_sel:[1,1,1,1] +// GFX12: encoding: [0x05,0x78,0x51,0xd6,0x7f,0xf8,0xa8,0x01] + +v_med3_u16 v5, null, exec_lo, 0xfe0b op_sel:[0,0,0,0] +// GFX12: encoding: [0x05,0x00,0x51,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] + +v_med3_u16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] +// GFX12: encoding: [0x05,0x08,0x51,0xd6,0xc1,0xfe,0xf4,0x03] + +v_med3_u16 v5, 0.5, m0, 0.5 op_sel:[0,1,0,0] +// GFX12: encoding: [0x05,0x10,0x51,0xd6,0xf0,0xfa,0xc0,0x03] + +v_med3_u16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] +// GFX12: encoding: [0x05,0x20,0x51,0xd6,0xfd,0xd4,0x04,0x03] + +v_med3_u16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] +// GFX12: encoding: [0xff,0x40,0x51,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] + +v_med3_u32 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x21,0xd6,0x01,0x05,0x0e,0x00] + +v_med3_u32 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x21,0xd6,0xff,0x05,0xa4,0x01] + +v_med3_u32 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x21,0xd6,0x01,0xfe,0xff,0x01] + +v_med3_u32 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x21,0xd6,0x69,0xd2,0xf8,0x01] + +v_med3_u32 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x21,0xd6,0x6a,0xf6,0x0c,0x04] + +v_med3_u32 v5, vcc_hi, 0xaf123456, v255 +// GFX12: encoding: [0x05,0x00,0x21,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_med3_u32 v5, ttmp15, src_scc, ttmp15 +// GFX12: encoding: [0x05,0x00,0x21,0xd6,0x7b,0xfa,0xed,0x01] + +v_med3_u32 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x21,0xd6,0x7d,0xe0,0xf5,0x01] + +v_med3_u32 v5, exec_lo, -1, vcc_hi +// GFX12: encoding: [0x05,0x00,0x21,0xd6,0x7e,0x82,0xad,0x01] + +v_med3_u32 v5, exec_hi, null, vcc_lo +// GFX12: encoding: [0x05,0x00,0x21,0xd6,0x7f,0xf8,0xa8,0x01] + +v_med3_u32 v5, null, exec_lo, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x21,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_med3_u32 v5, -1, exec_hi, src_scc +// GFX12: encoding: [0x05,0x00,0x21,0xd6,0xc1,0xfe,0xf4,0x03] + +v_med3_u32 v5, 0.5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x21,0xd6,0xf0,0xfa,0xc0,0x03] + +v_med3_u32 v5, src_scc, vcc_lo, -1 +// GFX12: encoding: [0x05,0x00,0x21,0xd6,0xfd,0xd4,0x04,0x03] + +v_med3_u32 v255, 0xaf123456, vcc_hi, null +// GFX12: encoding: [0xff,0x00,0x21,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_min3_num_f16 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x2b,0xd6,0x01,0x05,0x0e,0x00] + +v_min3_num_f16 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x2b,0xd6,0xff,0x05,0xa4,0x01] + +v_min3_num_f16 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x2b,0xd6,0x01,0xfe,0xff,0x01] + +v_min3_num_f16 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x2b,0xd6,0x69,0xd2,0xf8,0x01] + +v_min3_num_f16 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x2b,0xd6,0x6a,0xf6,0x0c,0x04] + +v_min3_num_f16 v5, vcc_hi, 0xfe0b, v255 +// GFX12: encoding: [0x05,0x00,0x2b,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +v_min3_num_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX12: encoding: [0x05,0x07,0x2b,0xd6,0x7b,0xfa,0xed,0xe1] + +v_min3_num_f16 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x2b,0xd6,0x7d,0xe0,0xf5,0x01] + +v_min3_num_f16 v5, |exec_lo|, -1, vcc_hi +// GFX12: encoding: [0x05,0x01,0x2b,0xd6,0x7e,0x82,0xad,0x01] + +v_min3_num_f16 v5, -|exec_hi|, null, -|vcc_lo| op_sel:[1,1,1,1] +// GFX12: encoding: [0x05,0x7d,0x2b,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_min3_num_f16 v5, null, exec_lo, -|0xfe0b| op_sel:[0,0,0,0] +// GFX12: encoding: [0x05,0x04,0x2b,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] + +v_min3_num_f16 v5, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] +// GFX12: encoding: [0x05,0x0e,0x2b,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_min3_num_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] +// GFX12: encoding: [0x05,0x10,0x2b,0xd6,0xf0,0xfa,0xc0,0x43] + +v_min3_num_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] +// GFX12: encoding: [0x05,0x22,0x2b,0xd6,0xfd,0xd4,0x04,0x23] + +v_min3_num_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp +// GFX12: encoding: [0xff,0xc3,0x2b,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] + +v_min3_num_f32 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x29,0xd6,0x01,0x05,0x0e,0x00] + +v_min3_num_f32 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x29,0xd6,0xff,0x05,0xa4,0x01] + +v_min3_num_f32 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x29,0xd6,0x01,0xfe,0xff,0x01] + +v_min3_num_f32 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x29,0xd6,0x69,0xd2,0xf8,0x01] + +v_min3_num_f32 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x29,0xd6,0x6a,0xf6,0x0c,0x04] + +v_min3_num_f32 v5, vcc_hi, 0xaf123456, v255 +// GFX12: encoding: [0x05,0x00,0x29,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_min3_num_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX12: encoding: [0x05,0x07,0x29,0xd6,0x7b,0xfa,0xed,0xe1] + +v_min3_num_f32 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x29,0xd6,0x7d,0xe0,0xf5,0x01] + +v_min3_num_f32 v5, |exec_lo|, -1, vcc_hi +// GFX12: encoding: [0x05,0x01,0x29,0xd6,0x7e,0x82,0xad,0x01] + +v_min3_num_f32 v5, -|exec_hi|, null, -|vcc_lo| +// GFX12: encoding: [0x05,0x05,0x29,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_min3_num_f32 v5, null, exec_lo, -|0xaf123456| +// GFX12: encoding: [0x05,0x04,0x29,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] + +v_min3_num_f32 v5, -1, -|exec_hi|, -|src_scc| +// GFX12: encoding: [0x05,0x06,0x29,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_min3_num_f32 v5, 0.5, -m0, 0.5 mul:2 +// GFX12: encoding: [0x05,0x00,0x29,0xd6,0xf0,0xfa,0xc0,0x4b] + +v_min3_num_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 +// GFX12: encoding: [0x05,0x02,0x29,0xd6,0xfd,0xd4,0x04,0x33] + +v_min3_num_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 +// GFX12: encoding: [0xff,0x83,0x29,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] + +v_min3_i16 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x4a,0xd6,0x01,0x05,0x0e,0x00] + +v_min3_i16 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x4a,0xd6,0xff,0x05,0xa4,0x01] + +v_min3_i16 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x4a,0xd6,0x01,0xfe,0xff,0x01] + +v_min3_i16 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x4a,0xd6,0x69,0xd2,0xf8,0x01] + +v_min3_i16 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x4a,0xd6,0x6a,0xf6,0x0c,0x04] + +v_min3_i16 v5, vcc_hi, 0xfe0b, v255 +// GFX12: encoding: [0x05,0x00,0x4a,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +v_min3_i16 v5, ttmp15, src_scc, ttmp15 +// GFX12: encoding: [0x05,0x00,0x4a,0xd6,0x7b,0xfa,0xed,0x01] + +v_min3_i16 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x4a,0xd6,0x7d,0xe0,0xf5,0x01] + +v_min3_i16 v5, exec_lo, -1, vcc_hi +// GFX12: encoding: [0x05,0x00,0x4a,0xd6,0x7e,0x82,0xad,0x01] + +v_min3_i16 v5, exec_hi, null, vcc_lo op_sel:[1,1,1,1] +// GFX12: encoding: [0x05,0x78,0x4a,0xd6,0x7f,0xf8,0xa8,0x01] + +v_min3_i16 v5, null, exec_lo, 0xfe0b op_sel:[0,0,0,0] +// GFX12: encoding: [0x05,0x00,0x4a,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] + +v_min3_i16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] +// GFX12: encoding: [0x05,0x08,0x4a,0xd6,0xc1,0xfe,0xf4,0x03] + +v_min3_i16 v5, 0.5, m0, 0.5 op_sel:[0,1,0,0] +// GFX12: encoding: [0x05,0x10,0x4a,0xd6,0xf0,0xfa,0xc0,0x03] + +v_min3_i16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] +// GFX12: encoding: [0x05,0x20,0x4a,0xd6,0xfd,0xd4,0x04,0x03] + +v_min3_i16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] +// GFX12: encoding: [0xff,0x40,0x4a,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] + +v_min3_i32 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x1a,0xd6,0x01,0x05,0x0e,0x00] + +v_min3_i32 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x1a,0xd6,0xff,0x05,0xa4,0x01] + +v_min3_i32 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x1a,0xd6,0x01,0xfe,0xff,0x01] + +v_min3_i32 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x1a,0xd6,0x69,0xd2,0xf8,0x01] + +v_min3_i32 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x1a,0xd6,0x6a,0xf6,0x0c,0x04] + +v_min3_i32 v5, vcc_hi, 0xaf123456, v255 +// GFX12: encoding: [0x05,0x00,0x1a,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_min3_i32 v5, ttmp15, src_scc, ttmp15 +// GFX12: encoding: [0x05,0x00,0x1a,0xd6,0x7b,0xfa,0xed,0x01] + +v_min3_i32 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x1a,0xd6,0x7d,0xe0,0xf5,0x01] + +v_min3_i32 v5, exec_lo, -1, vcc_hi +// GFX12: encoding: [0x05,0x00,0x1a,0xd6,0x7e,0x82,0xad,0x01] + +v_min3_i32 v5, exec_hi, null, vcc_lo +// GFX12: encoding: [0x05,0x00,0x1a,0xd6,0x7f,0xf8,0xa8,0x01] + +v_min3_i32 v5, null, exec_lo, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x1a,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_min3_i32 v5, -1, exec_hi, src_scc +// GFX12: encoding: [0x05,0x00,0x1a,0xd6,0xc1,0xfe,0xf4,0x03] + +v_min3_i32 v5, 0.5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x1a,0xd6,0xf0,0xfa,0xc0,0x03] + +v_min3_i32 v5, src_scc, vcc_lo, -1 +// GFX12: encoding: [0x05,0x00,0x1a,0xd6,0xfd,0xd4,0x04,0x03] + +v_min3_i32 v255, 0xaf123456, vcc_hi, null +// GFX12: encoding: [0xff,0x00,0x1a,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_min3_u16 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x4b,0xd6,0x01,0x05,0x0e,0x00] + +v_min3_u16 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x4b,0xd6,0xff,0x05,0xa4,0x01] + +v_min3_u16 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x4b,0xd6,0x01,0xfe,0xff,0x01] + +v_min3_u16 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x4b,0xd6,0x69,0xd2,0xf8,0x01] + +v_min3_u16 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x4b,0xd6,0x6a,0xf6,0x0c,0x04] + +v_min3_u16 v5, vcc_hi, 0xfe0b, v255 +// GFX12: encoding: [0x05,0x00,0x4b,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +v_min3_u16 v5, ttmp15, src_scc, ttmp15 +// GFX12: encoding: [0x05,0x00,0x4b,0xd6,0x7b,0xfa,0xed,0x01] + +v_min3_u16 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x4b,0xd6,0x7d,0xe0,0xf5,0x01] + +v_min3_u16 v5, exec_lo, -1, vcc_hi +// GFX12: encoding: [0x05,0x00,0x4b,0xd6,0x7e,0x82,0xad,0x01] + +v_min3_u16 v5, exec_hi, null, vcc_lo op_sel:[1,1,1,1] +// GFX12: encoding: [0x05,0x78,0x4b,0xd6,0x7f,0xf8,0xa8,0x01] + +v_min3_u16 v5, null, exec_lo, 0xfe0b op_sel:[0,0,0,0] +// GFX12: encoding: [0x05,0x00,0x4b,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] + +v_min3_u16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] +// GFX12: encoding: [0x05,0x08,0x4b,0xd6,0xc1,0xfe,0xf4,0x03] + +v_min3_u16 v5, 0.5, m0, 0.5 op_sel:[0,1,0,0] +// GFX12: encoding: [0x05,0x10,0x4b,0xd6,0xf0,0xfa,0xc0,0x03] + +v_min3_u16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] +// GFX12: encoding: [0x05,0x20,0x4b,0xd6,0xfd,0xd4,0x04,0x03] + +v_min3_u16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] +// GFX12: encoding: [0xff,0x40,0x4b,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] + +v_min3_u32 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x1b,0xd6,0x01,0x05,0x0e,0x00] + +v_min3_u32 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x1b,0xd6,0xff,0x05,0xa4,0x01] + +v_min3_u32 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x1b,0xd6,0x01,0xfe,0xff,0x01] + +v_min3_u32 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x1b,0xd6,0x69,0xd2,0xf8,0x01] + +v_min3_u32 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x1b,0xd6,0x6a,0xf6,0x0c,0x04] + +v_min3_u32 v5, vcc_hi, 0xaf123456, v255 +// GFX12: encoding: [0x05,0x00,0x1b,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_min3_u32 v5, ttmp15, src_scc, ttmp15 +// GFX12: encoding: [0x05,0x00,0x1b,0xd6,0x7b,0xfa,0xed,0x01] + +v_min3_u32 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x1b,0xd6,0x7d,0xe0,0xf5,0x01] + +v_min3_u32 v5, exec_lo, -1, vcc_hi +// GFX12: encoding: [0x05,0x00,0x1b,0xd6,0x7e,0x82,0xad,0x01] + +v_min3_u32 v5, exec_hi, null, vcc_lo +// GFX12: encoding: [0x05,0x00,0x1b,0xd6,0x7f,0xf8,0xa8,0x01] + +v_min3_u32 v5, null, exec_lo, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x1b,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_min3_u32 v5, -1, exec_hi, src_scc +// GFX12: encoding: [0x05,0x00,0x1b,0xd6,0xc1,0xfe,0xf4,0x03] + +v_min3_u32 v5, 0.5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x1b,0xd6,0xf0,0xfa,0xc0,0x03] + +v_min3_u32 v5, src_scc, vcc_lo, -1 +// GFX12: encoding: [0x05,0x00,0x1b,0xd6,0xfd,0xd4,0x04,0x03] + +v_min3_u32 v255, 0xaf123456, vcc_hi, null +// GFX12: encoding: [0xff,0x00,0x1b,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_min_i16 v5, v1, v2 +// GFX12: encoding: [0x05,0x00,0x0c,0xd7,0x01,0x05,0x02,0x00] + +v_min_i16 v5, v255, v255 +// GFX12: encoding: [0x05,0x00,0x0c,0xd7,0xff,0xff,0x03,0x00] + +v_min_i16 v5, s1, s2 +// GFX12: encoding: [0x05,0x00,0x0c,0xd7,0x01,0x04,0x00,0x00] + +v_min_i16 v5, s105, s105 +// GFX12: encoding: [0x05,0x00,0x0c,0xd7,0x69,0xd2,0x00,0x00] + +v_min_i16 v5, vcc_lo, ttmp15 +// GFX12: encoding: [0x05,0x00,0x0c,0xd7,0x6a,0xf6,0x00,0x00] + +v_min_i16 v5, vcc_hi, 0xfe0b +// GFX12: encoding: [0x05,0x00,0x0c,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_min_i16 v5, ttmp15, src_scc +// GFX12: encoding: [0x05,0x00,0x0c,0xd7,0x7b,0xfa,0x01,0x00] + +v_min_i16 v5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x0c,0xd7,0x7d,0xe0,0x01,0x00] + +v_min_i16 v5, exec_lo, -1 +// GFX12: encoding: [0x05,0x00,0x0c,0xd7,0x7e,0x82,0x01,0x00] + +v_min_i16 v5, exec_hi, null +// GFX12: encoding: [0x05,0x00,0x0c,0xd7,0x7f,0xf8,0x00,0x00] + +v_min_i16 v5, null, exec_lo +// GFX12: encoding: [0x05,0x00,0x0c,0xd7,0x7c,0xfc,0x00,0x00] + +v_min_i16 v5, -1, exec_hi +// GFX12: encoding: [0x05,0x00,0x0c,0xd7,0xc1,0xfe,0x00,0x00] + +v_min_i16 v5, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x0c,0xd7,0xf0,0xfa,0x00,0x00] + +v_min_i16 v5, src_scc, vcc_lo +// GFX12: encoding: [0x05,0x00,0x0c,0xd7,0xfd,0xd4,0x00,0x00] + +v_min_i16 v255, 0xfe0b, vcc_hi +// GFX12: encoding: [0xff,0x00,0x0c,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_min_u16 v5, v1, v2 +// GFX12: encoding: [0x05,0x00,0x0b,0xd7,0x01,0x05,0x02,0x00] + +v_min_u16 v5, v255, v255 +// GFX12: encoding: [0x05,0x00,0x0b,0xd7,0xff,0xff,0x03,0x00] + +v_min_u16 v5, s1, s2 +// GFX12: encoding: [0x05,0x00,0x0b,0xd7,0x01,0x04,0x00,0x00] + +v_min_u16 v5, s105, s105 +// GFX12: encoding: [0x05,0x00,0x0b,0xd7,0x69,0xd2,0x00,0x00] + +v_min_u16 v5, vcc_lo, ttmp15 +// GFX12: encoding: [0x05,0x00,0x0b,0xd7,0x6a,0xf6,0x00,0x00] + +v_min_u16 v5, vcc_hi, 0xfe0b +// GFX12: encoding: [0x05,0x00,0x0b,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_min_u16 v5, ttmp15, src_scc +// GFX12: encoding: [0x05,0x00,0x0b,0xd7,0x7b,0xfa,0x01,0x00] + +v_min_u16 v5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x0b,0xd7,0x7d,0xe0,0x01,0x00] + +v_min_u16 v5, exec_lo, -1 +// GFX12: encoding: [0x05,0x00,0x0b,0xd7,0x7e,0x82,0x01,0x00] + +v_min_u16 v5, exec_hi, null +// GFX12: encoding: [0x05,0x00,0x0b,0xd7,0x7f,0xf8,0x00,0x00] + +v_min_u16 v5, null, exec_lo +// GFX12: encoding: [0x05,0x00,0x0b,0xd7,0x7c,0xfc,0x00,0x00] + +v_min_u16 v5, -1, exec_hi +// GFX12: encoding: [0x05,0x00,0x0b,0xd7,0xc1,0xfe,0x00,0x00] + +v_min_u16 v5, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x0b,0xd7,0xf0,0xfa,0x00,0x00] + +v_min_u16 v5, src_scc, vcc_lo +// GFX12: encoding: [0x05,0x00,0x0b,0xd7,0xfd,0xd4,0x00,0x00] + +v_min_u16 v255, 0xfe0b, vcc_hi +// GFX12: encoding: [0xff,0x00,0x0b,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_minmax_num_f16 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x6a,0xd6,0x01,0x05,0x0e,0x00] + +v_minmax_num_f16 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x6a,0xd6,0xff,0x05,0xa4,0x01] + +v_minmax_num_f16 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x6a,0xd6,0x01,0xfe,0xff,0x01] + +v_minmax_num_f16 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x6a,0xd6,0x69,0xd2,0xf8,0x01] + +v_minmax_num_f16 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x6a,0xd6,0x6a,0xf6,0x0c,0x04] + +v_minmax_num_f16 v5, vcc_hi, 0xfe0b, v255 +// GFX12: encoding: [0x05,0x00,0x6a,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +v_minmax_num_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX12: encoding: [0x05,0x07,0x6a,0xd6,0x7b,0xfa,0xed,0xe1] + +v_minmax_num_f16 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x6a,0xd6,0x7d,0xe0,0xf5,0x01] + +v_minmax_num_f16 v5, |exec_lo|, -1, vcc_hi +// GFX12: encoding: [0x05,0x01,0x6a,0xd6,0x7e,0x82,0xad,0x01] + +v_minmax_num_f16 v5, -|exec_hi|, null, -|vcc_lo| +// GFX12: encoding: [0x05,0x05,0x6a,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_minmax_num_f16 v5, null, exec_lo, -|0xfe0b| +// GFX12: encoding: [0x05,0x04,0x6a,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] + +v_minmax_num_f16 v5, -1, -|exec_hi|, -|src_scc| +// GFX12: encoding: [0x05,0x06,0x6a,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_minmax_num_f16 v5, 0.5, -m0, 0.5 mul:2 +// GFX12: encoding: [0x05,0x00,0x6a,0xd6,0xf0,0xfa,0xc0,0x4b] + +v_minmax_num_f16 v5, -src_scc, |vcc_lo|, -1 mul:4 +// GFX12: encoding: [0x05,0x02,0x6a,0xd6,0xfd,0xd4,0x04,0x33] + +v_minmax_num_f16 v255, -|0xfe0b|, -|vcc_hi|, null clamp div:2 +// GFX12: encoding: [0xff,0x83,0x6a,0xd6,0xff,0xd6,0xf0,0x79,0x0b,0xfe,0x00,0x00] + +v_minmax_num_f32 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x68,0xd6,0x01,0x05,0x0e,0x00] + +v_minmax_num_f32 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x68,0xd6,0xff,0x05,0xa4,0x01] + +v_minmax_num_f32 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x68,0xd6,0x01,0xfe,0xff,0x01] + +v_minmax_num_f32 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x68,0xd6,0x69,0xd2,0xf8,0x01] + +v_minmax_num_f32 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x68,0xd6,0x6a,0xf6,0x0c,0x04] + +v_minmax_num_f32 v5, vcc_hi, 0xaf123456, v255 +// GFX12: encoding: [0x05,0x00,0x68,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_minmax_num_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX12: encoding: [0x05,0x07,0x68,0xd6,0x7b,0xfa,0xed,0xe1] + +v_minmax_num_f32 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x68,0xd6,0x7d,0xe0,0xf5,0x01] + +v_minmax_num_f32 v5, |exec_lo|, -1, vcc_hi +// GFX12: encoding: [0x05,0x01,0x68,0xd6,0x7e,0x82,0xad,0x01] + +v_minmax_num_f32 v5, -|exec_hi|, null, -|vcc_lo| +// GFX12: encoding: [0x05,0x05,0x68,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_minmax_num_f32 v5, null, exec_lo, -|0xaf123456| +// GFX12: encoding: [0x05,0x04,0x68,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] + +v_minmax_num_f32 v5, -1, -|exec_hi|, -|src_scc| +// GFX12: encoding: [0x05,0x06,0x68,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_minmax_num_f32 v5, 0.5, -m0, 0.5 mul:2 +// GFX12: encoding: [0x05,0x00,0x68,0xd6,0xf0,0xfa,0xc0,0x4b] + +v_minmax_num_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 +// GFX12: encoding: [0x05,0x02,0x68,0xd6,0xfd,0xd4,0x04,0x33] + +v_minmax_num_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 +// GFX12: encoding: [0xff,0x83,0x68,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] + +v_minmax_i32 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x65,0xd6,0x01,0x05,0x0e,0x00] + +v_minmax_i32 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x65,0xd6,0xff,0x05,0xa4,0x01] + +v_minmax_i32 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x65,0xd6,0x01,0xfe,0xff,0x01] + +v_minmax_i32 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x65,0xd6,0x69,0xd2,0xf8,0x01] + +v_minmax_i32 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x65,0xd6,0x6a,0xf6,0x0c,0x04] + +v_minmax_i32 v5, vcc_hi, 0xaf123456, v255 +// GFX12: encoding: [0x05,0x00,0x65,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_minmax_i32 v5, ttmp15, src_scc, ttmp15 +// GFX12: encoding: [0x05,0x00,0x65,0xd6,0x7b,0xfa,0xed,0x01] + +v_minmax_i32 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x65,0xd6,0x7d,0xe0,0xf5,0x01] + +v_minmax_i32 v5, exec_lo, -1, vcc_hi +// GFX12: encoding: [0x05,0x00,0x65,0xd6,0x7e,0x82,0xad,0x01] + +v_minmax_i32 v5, exec_hi, null, vcc_lo +// GFX12: encoding: [0x05,0x00,0x65,0xd6,0x7f,0xf8,0xa8,0x01] + +v_minmax_i32 v5, null, exec_lo, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x65,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_minmax_i32 v5, -1, exec_hi, src_scc +// GFX12: encoding: [0x05,0x00,0x65,0xd6,0xc1,0xfe,0xf4,0x03] + +v_minmax_i32 v5, 0.5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x65,0xd6,0xf0,0xfa,0xc0,0x03] + +v_minmax_i32 v5, src_scc, vcc_lo, -1 +// GFX12: encoding: [0x05,0x00,0x65,0xd6,0xfd,0xd4,0x04,0x03] + +v_minmax_i32 v255, 0xaf123456, vcc_hi, null +// GFX12: encoding: [0xff,0x00,0x65,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_minmax_u32 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x63,0xd6,0x01,0x05,0x0e,0x00] + +v_minmax_u32 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x63,0xd6,0xff,0x05,0xa4,0x01] + +v_minmax_u32 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x63,0xd6,0x01,0xfe,0xff,0x01] + +v_minmax_u32 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x63,0xd6,0x69,0xd2,0xf8,0x01] + +v_minmax_u32 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x63,0xd6,0x6a,0xf6,0x0c,0x04] + +v_minmax_u32 v5, vcc_hi, 0xaf123456, v255 +// GFX12: encoding: [0x05,0x00,0x63,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_minmax_u32 v5, ttmp15, src_scc, ttmp15 +// GFX12: encoding: [0x05,0x00,0x63,0xd6,0x7b,0xfa,0xed,0x01] + +v_minmax_u32 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x63,0xd6,0x7d,0xe0,0xf5,0x01] + +v_minmax_u32 v5, exec_lo, -1, vcc_hi +// GFX12: encoding: [0x05,0x00,0x63,0xd6,0x7e,0x82,0xad,0x01] + +v_minmax_u32 v5, exec_hi, null, vcc_lo +// GFX12: encoding: [0x05,0x00,0x63,0xd6,0x7f,0xf8,0xa8,0x01] + +v_minmax_u32 v5, null, exec_lo, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x63,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_minmax_u32 v5, -1, exec_hi, src_scc +// GFX12: encoding: [0x05,0x00,0x63,0xd6,0xc1,0xfe,0xf4,0x03] + +v_minmax_u32 v5, 0.5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x63,0xd6,0xf0,0xfa,0xc0,0x03] + +v_minmax_u32 v5, src_scc, vcc_lo, -1 +// GFX12: encoding: [0x05,0x00,0x63,0xd6,0xfd,0xd4,0x04,0x03] + +v_minmax_u32 v255, 0xaf123456, vcc_hi, null +// GFX12: encoding: [0xff,0x00,0x63,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_mqsad_pk_u16_u8 v[5:6], v[1:2], v2, ttmp[14:15] +// GFX12: encoding: [0x05,0x00,0x3b,0xd6,0x01,0x05,0xea,0x01] + +v_mqsad_pk_u16_u8 v[5:6], v[1:2], v255, ttmp[14:15] +// GFX12: encoding: [0x05,0x00,0x3b,0xd6,0x01,0xff,0xeb,0x01] + +v_mqsad_pk_u16_u8 v[5:6], v[1:2], s2, ttmp[14:15] +// GFX12: encoding: [0x05,0x00,0x3b,0xd6,0x01,0x05,0xe8,0x01] + +v_mqsad_pk_u16_u8 v[5:6], v[1:2], s105, ttmp[14:15] +// GFX12: encoding: [0x05,0x00,0x3b,0xd6,0x01,0xd3,0xe8,0x01] + +v_mqsad_pk_u16_u8 v[5:6], v[254:255], ttmp15, s[6:7] +// GFX12: encoding: [0x05,0x00,0x3b,0xd6,0xfe,0xf7,0x18,0x00] + +v_mqsad_pk_u16_u8 v[5:6], s[2:3], vcc_hi, v[3:4] +// GFX12: encoding: [0x05,0x00,0x3b,0xd6,0x02,0xd6,0x0c,0x04] + +v_mqsad_pk_u16_u8 v[5:6], s[104:105], vcc_lo, s[104:105] +// GFX12: encoding: [0x05,0x00,0x3b,0xd6,0x68,0xd4,0xa0,0x01] + +v_mqsad_pk_u16_u8 v[5:6], vcc, m0, v[254:255] +// GFX12: encoding: [0x05,0x00,0x3b,0xd6,0x6a,0xfa,0xf8,0x07] + +v_mqsad_pk_u16_u8 v[5:6], ttmp[14:15], exec_hi, null +// GFX12: encoding: [0x05,0x00,0x3b,0xd6,0x7a,0xfe,0xf0,0x01] + +v_mqsad_pk_u16_u8 v[5:6], exec, exec_lo, exec +// GFX12: encoding: [0x05,0x00,0x3b,0xd6,0x7e,0xfc,0xf8,0x01] + +v_mqsad_pk_u16_u8 v[5:6], null, null, vcc +// GFX12: encoding: [0x05,0x00,0x3b,0xd6,0x7c,0xf8,0xa8,0x01] + +v_mqsad_pk_u16_u8 v[5:6], -1, -1, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x3b,0xd6,0xc1,0x82,0xfd,0x03,0x56,0x34,0x12,0xaf] + +v_mqsad_pk_u16_u8 v[5:6], 0.5, 0.5, src_scc +// GFX12: encoding: [0x05,0x00,0x3b,0xd6,0xf0,0xe0,0xf5,0x03] + +v_mqsad_pk_u16_u8 v[5:6], src_scc, src_scc, 0.5 +// GFX12: encoding: [0x05,0x00,0x3b,0xd6,0xfd,0xfa,0xc1,0x03] + +v_mqsad_pk_u16_u8 v[254:255], 0xaf123456, 0xaf123456, -1 clamp +// GFX12: encoding: [0xfe,0x80,0x3b,0xd6,0xff,0xfe,0x05,0x03,0x56,0x34,0x12,0xaf] + +v_mqsad_u32_u8 v[5:8], v[1:2], v2, v[252:255] +// GFX12: encoding: [0x05,0x00,0x3d,0xd6,0x01,0x05,0xf2,0x07] + +v_mqsad_u32_u8 v[5:8], v[1:2], v255, v[252:255] +// GFX12: encoding: [0x05,0x00,0x3d,0xd6,0x01,0xff,0xf3,0x07] + +v_mqsad_u32_u8 v[5:8], v[1:2], s2, v[252:255] +// GFX12: encoding: [0x05,0x00,0x3d,0xd6,0x01,0x05,0xf0,0x07] + +v_mqsad_u32_u8 v[5:8], v[1:2], s105, v[252:255] +// GFX12: encoding: [0x05,0x00,0x3d,0xd6,0x01,0xd3,0xf0,0x07] + +v_mqsad_u32_u8 v[5:8], v[254:255], ttmp15, v[252:255] +// GFX12: encoding: [0x05,0x00,0x3d,0xd6,0xfe,0xf7,0xf0,0x07] + +v_mqsad_u32_u8 v[5:8], s[2:3], vcc_hi, v[252:255] +// GFX12: encoding: [0x05,0x00,0x3d,0xd6,0x02,0xd6,0xf0,0x07] + +v_mqsad_u32_u8 v[5:8], s[104:105], vcc_lo, v[252:255] +// GFX12: encoding: [0x05,0x00,0x3d,0xd6,0x68,0xd4,0xf0,0x07] + +v_mqsad_u32_u8 v[5:8], vcc, m0, v[252:255] +// GFX12: encoding: [0x05,0x00,0x3d,0xd6,0x6a,0xfa,0xf0,0x07] + +v_mqsad_u32_u8 v[5:8], ttmp[14:15], exec_hi, v[252:255] +// GFX12: encoding: [0x05,0x00,0x3d,0xd6,0x7a,0xfe,0xf0,0x07] + +v_mqsad_u32_u8 v[5:8], exec, exec_lo, v[252:255] +// GFX12: encoding: [0x05,0x00,0x3d,0xd6,0x7e,0xfc,0xf0,0x07] + +v_mqsad_u32_u8 v[5:8], null, null, v[252:255] +// GFX12: encoding: [0x05,0x00,0x3d,0xd6,0x7c,0xf8,0xf0,0x07] + +v_mqsad_u32_u8 v[5:8], -1, -1, v[252:255] +// GFX12: encoding: [0x05,0x00,0x3d,0xd6,0xc1,0x82,0xf1,0x07] + +v_mqsad_u32_u8 v[5:8], 0.5, 0.5, v[252:255] +// GFX12: encoding: [0x05,0x00,0x3d,0xd6,0xf0,0xe0,0xf1,0x07] + +v_mqsad_u32_u8 v[5:8], src_scc, src_scc, v[252:255] +// GFX12: encoding: [0x05,0x00,0x3d,0xd6,0xfd,0xfa,0xf1,0x07] + +v_mqsad_u32_u8 v[252:255], 0xaf123456, 0xaf123456, v[3:6] clamp +// GFX12: encoding: [0xfc,0x80,0x3d,0xd6,0xff,0xfe,0x0d,0x04,0x56,0x34,0x12,0xaf] + +v_msad_u8 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x39,0xd6,0x01,0x05,0x0e,0x00] + +v_msad_u8 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x39,0xd6,0xff,0x05,0xa4,0x01] + +v_msad_u8 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x39,0xd6,0x01,0xfe,0xff,0x01] + +v_msad_u8 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x39,0xd6,0x69,0xd2,0xf8,0x01] + +v_msad_u8 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x39,0xd6,0x6a,0xf6,0x0c,0x04] + +v_msad_u8 v5, vcc_hi, 0xaf123456, v255 +// GFX12: encoding: [0x05,0x00,0x39,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_msad_u8 v5, ttmp15, src_scc, ttmp15 +// GFX12: encoding: [0x05,0x00,0x39,0xd6,0x7b,0xfa,0xed,0x01] + +v_msad_u8 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x39,0xd6,0x7d,0xe0,0xf5,0x01] + +v_msad_u8 v5, exec_lo, -1, vcc_hi +// GFX12: encoding: [0x05,0x00,0x39,0xd6,0x7e,0x82,0xad,0x01] + +v_msad_u8 v5, exec_hi, null, vcc_lo +// GFX12: encoding: [0x05,0x00,0x39,0xd6,0x7f,0xf8,0xa8,0x01] + +v_msad_u8 v5, null, exec_lo, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x39,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_msad_u8 v5, -1, exec_hi, src_scc +// GFX12: encoding: [0x05,0x00,0x39,0xd6,0xc1,0xfe,0xf4,0x03] + +v_msad_u8 v5, 0.5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x39,0xd6,0xf0,0xfa,0xc0,0x03] + +v_msad_u8 v5, src_scc, vcc_lo, -1 +// GFX12: encoding: [0x05,0x00,0x39,0xd6,0xfd,0xd4,0x04,0x03] + +v_msad_u8 v255, 0xaf123456, vcc_hi, null clamp +// GFX12: encoding: [0xff,0x80,0x39,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_mul_hi_i32 v5, v1, v2 +// GFX12: encoding: [0x05,0x00,0x2e,0xd7,0x01,0x05,0x02,0x00] + +v_mul_hi_i32 v5, v255, v255 +// GFX12: encoding: [0x05,0x00,0x2e,0xd7,0xff,0xff,0x03,0x00] + +v_mul_hi_i32 v5, s1, s2 +// GFX12: encoding: [0x05,0x00,0x2e,0xd7,0x01,0x04,0x00,0x00] + +v_mul_hi_i32 v5, s105, s105 +// GFX12: encoding: [0x05,0x00,0x2e,0xd7,0x69,0xd2,0x00,0x00] + +v_mul_hi_i32 v5, vcc_lo, ttmp15 +// GFX12: encoding: [0x05,0x00,0x2e,0xd7,0x6a,0xf6,0x00,0x00] + +v_mul_hi_i32 v5, vcc_hi, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x2e,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_mul_hi_i32 v5, ttmp15, src_scc +// GFX12: encoding: [0x05,0x00,0x2e,0xd7,0x7b,0xfa,0x01,0x00] + +v_mul_hi_i32 v5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x2e,0xd7,0x7d,0xe0,0x01,0x00] + +v_mul_hi_i32 v5, exec_lo, -1 +// GFX12: encoding: [0x05,0x00,0x2e,0xd7,0x7e,0x82,0x01,0x00] + +v_mul_hi_i32 v5, exec_hi, null +// GFX12: encoding: [0x05,0x00,0x2e,0xd7,0x7f,0xf8,0x00,0x00] + +v_mul_hi_i32 v5, null, exec_lo +// GFX12: encoding: [0x05,0x00,0x2e,0xd7,0x7c,0xfc,0x00,0x00] + +v_mul_hi_i32 v5, -1, exec_hi +// GFX12: encoding: [0x05,0x00,0x2e,0xd7,0xc1,0xfe,0x00,0x00] + +v_mul_hi_i32 v5, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x2e,0xd7,0xf0,0xfa,0x00,0x00] + +v_mul_hi_i32 v5, src_scc, vcc_lo +// GFX12: encoding: [0x05,0x00,0x2e,0xd7,0xfd,0xd4,0x00,0x00] + +v_mul_hi_i32 v255, 0xaf123456, vcc_hi +// GFX12: encoding: [0xff,0x00,0x2e,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_mul_hi_u32 v5, v1, v2 +// GFX12: encoding: [0x05,0x00,0x2d,0xd7,0x01,0x05,0x02,0x00] + +v_mul_hi_u32 v5, v255, v255 +// GFX12: encoding: [0x05,0x00,0x2d,0xd7,0xff,0xff,0x03,0x00] + +v_mul_hi_u32 v5, s1, s2 +// GFX12: encoding: [0x05,0x00,0x2d,0xd7,0x01,0x04,0x00,0x00] + +v_mul_hi_u32 v5, s105, s105 +// GFX12: encoding: [0x05,0x00,0x2d,0xd7,0x69,0xd2,0x00,0x00] + +v_mul_hi_u32 v5, vcc_lo, ttmp15 +// GFX12: encoding: [0x05,0x00,0x2d,0xd7,0x6a,0xf6,0x00,0x00] + +v_mul_hi_u32 v5, vcc_hi, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x2d,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_mul_hi_u32 v5, ttmp15, src_scc +// GFX12: encoding: [0x05,0x00,0x2d,0xd7,0x7b,0xfa,0x01,0x00] + +v_mul_hi_u32 v5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x2d,0xd7,0x7d,0xe0,0x01,0x00] + +v_mul_hi_u32 v5, exec_lo, -1 +// GFX12: encoding: [0x05,0x00,0x2d,0xd7,0x7e,0x82,0x01,0x00] + +v_mul_hi_u32 v5, exec_hi, null +// GFX12: encoding: [0x05,0x00,0x2d,0xd7,0x7f,0xf8,0x00,0x00] + +v_mul_hi_u32 v5, null, exec_lo +// GFX12: encoding: [0x05,0x00,0x2d,0xd7,0x7c,0xfc,0x00,0x00] + +v_mul_hi_u32 v5, -1, exec_hi +// GFX12: encoding: [0x05,0x00,0x2d,0xd7,0xc1,0xfe,0x00,0x00] + +v_mul_hi_u32 v5, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x2d,0xd7,0xf0,0xfa,0x00,0x00] + +v_mul_hi_u32 v5, src_scc, vcc_lo +// GFX12: encoding: [0x05,0x00,0x2d,0xd7,0xfd,0xd4,0x00,0x00] + +v_mul_hi_u32 v255, 0xaf123456, vcc_hi +// GFX12: encoding: [0xff,0x00,0x2d,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_mul_lo_u16 v5, v1, v2 +// GFX12: encoding: [0x05,0x00,0x05,0xd7,0x01,0x05,0x02,0x00] + +v_mul_lo_u16 v5, v255, v255 +// GFX12: encoding: [0x05,0x00,0x05,0xd7,0xff,0xff,0x03,0x00] + +v_mul_lo_u16 v5, s1, s2 +// GFX12: encoding: [0x05,0x00,0x05,0xd7,0x01,0x04,0x00,0x00] + +v_mul_lo_u16 v5, s105, s105 +// GFX12: encoding: [0x05,0x00,0x05,0xd7,0x69,0xd2,0x00,0x00] + +v_mul_lo_u16 v5, vcc_lo, ttmp15 +// GFX12: encoding: [0x05,0x00,0x05,0xd7,0x6a,0xf6,0x00,0x00] + +v_mul_lo_u16 v5, vcc_hi, 0xfe0b +// GFX12: encoding: [0x05,0x00,0x05,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_mul_lo_u16 v5, ttmp15, src_scc +// GFX12: encoding: [0x05,0x00,0x05,0xd7,0x7b,0xfa,0x01,0x00] + +v_mul_lo_u16 v5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x05,0xd7,0x7d,0xe0,0x01,0x00] + +v_mul_lo_u16 v5, exec_lo, -1 +// GFX12: encoding: [0x05,0x00,0x05,0xd7,0x7e,0x82,0x01,0x00] + +v_mul_lo_u16 v5, exec_hi, null +// GFX12: encoding: [0x05,0x00,0x05,0xd7,0x7f,0xf8,0x00,0x00] + +v_mul_lo_u16 v5, null, exec_lo +// GFX12: encoding: [0x05,0x00,0x05,0xd7,0x7c,0xfc,0x00,0x00] + +v_mul_lo_u16 v5, -1, exec_hi +// GFX12: encoding: [0x05,0x00,0x05,0xd7,0xc1,0xfe,0x00,0x00] + +v_mul_lo_u16 v5, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x05,0xd7,0xf0,0xfa,0x00,0x00] + +v_mul_lo_u16 v5, src_scc, vcc_lo +// GFX12: encoding: [0x05,0x00,0x05,0xd7,0xfd,0xd4,0x00,0x00] + +v_mul_lo_u16 v255, 0xfe0b, vcc_hi +// GFX12: encoding: [0xff,0x00,0x05,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_mul_lo_u32 v5, v1, v2 +// GFX12: encoding: [0x05,0x00,0x2c,0xd7,0x01,0x05,0x02,0x00] + +v_mul_lo_u32 v5, v255, v255 +// GFX12: encoding: [0x05,0x00,0x2c,0xd7,0xff,0xff,0x03,0x00] + +v_mul_lo_u32 v5, s1, s2 +// GFX12: encoding: [0x05,0x00,0x2c,0xd7,0x01,0x04,0x00,0x00] + +v_mul_lo_u32 v5, s105, s105 +// GFX12: encoding: [0x05,0x00,0x2c,0xd7,0x69,0xd2,0x00,0x00] + +v_mul_lo_u32 v5, vcc_lo, ttmp15 +// GFX12: encoding: [0x05,0x00,0x2c,0xd7,0x6a,0xf6,0x00,0x00] + +v_mul_lo_u32 v5, vcc_hi, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x2c,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_mul_lo_u32 v5, ttmp15, src_scc +// GFX12: encoding: [0x05,0x00,0x2c,0xd7,0x7b,0xfa,0x01,0x00] + +v_mul_lo_u32 v5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x2c,0xd7,0x7d,0xe0,0x01,0x00] + +v_mul_lo_u32 v5, exec_lo, -1 +// GFX12: encoding: [0x05,0x00,0x2c,0xd7,0x7e,0x82,0x01,0x00] + +v_mul_lo_u32 v5, exec_hi, null +// GFX12: encoding: [0x05,0x00,0x2c,0xd7,0x7f,0xf8,0x00,0x00] + +v_mul_lo_u32 v5, null, exec_lo +// GFX12: encoding: [0x05,0x00,0x2c,0xd7,0x7c,0xfc,0x00,0x00] + +v_mul_lo_u32 v5, -1, exec_hi +// GFX12: encoding: [0x05,0x00,0x2c,0xd7,0xc1,0xfe,0x00,0x00] + +v_mul_lo_u32 v5, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x2c,0xd7,0xf0,0xfa,0x00,0x00] + +v_mul_lo_u32 v5, src_scc, vcc_lo +// GFX12: encoding: [0x05,0x00,0x2c,0xd7,0xfd,0xd4,0x00,0x00] + +v_mul_lo_u32 v255, 0xaf123456, vcc_hi +// GFX12: encoding: [0xff,0x00,0x2c,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_mullit_f32 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x18,0xd6,0x01,0x05,0x0e,0x00] + +v_mullit_f32 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x18,0xd6,0xff,0x05,0xa4,0x01] + +v_mullit_f32 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x18,0xd6,0x01,0xfe,0xff,0x01] + +v_mullit_f32 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x18,0xd6,0x69,0xd2,0xf8,0x01] + +v_mullit_f32 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x18,0xd6,0x6a,0xf6,0x0c,0x04] + +v_mullit_f32 v5, vcc_hi, 0xaf123456, v255 +// GFX12: encoding: [0x05,0x00,0x18,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_mullit_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX12: encoding: [0x05,0x07,0x18,0xd6,0x7b,0xfa,0xed,0xe1] + +v_mullit_f32 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x18,0xd6,0x7d,0xe0,0xf5,0x01] + +v_mullit_f32 v5, |exec_lo|, -1, vcc_hi +// GFX12: encoding: [0x05,0x01,0x18,0xd6,0x7e,0x82,0xad,0x01] + +v_mullit_f32 v5, -|exec_hi|, null, -|vcc_lo| +// GFX12: encoding: [0x05,0x05,0x18,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_mullit_f32 v5, null, exec_lo, -|0xaf123456| +// GFX12: encoding: [0x05,0x04,0x18,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] + +v_mullit_f32 v5, -1, -|exec_hi|, -|src_scc| +// GFX12: encoding: [0x05,0x06,0x18,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_mullit_f32 v5, 0.5, -m0, 0.5 mul:2 +// GFX12: encoding: [0x05,0x00,0x18,0xd6,0xf0,0xfa,0xc0,0x4b] + +v_mullit_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 +// GFX12: encoding: [0x05,0x02,0x18,0xd6,0xfd,0xd4,0x04,0x33] + +v_mullit_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 +// GFX12: encoding: [0xff,0x83,0x18,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] + +v_or3_b32 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x58,0xd6,0x01,0x05,0x0e,0x00] + +v_or3_b32 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x58,0xd6,0xff,0x05,0xa4,0x01] + +v_or3_b32 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x58,0xd6,0x01,0xfe,0xff,0x01] + +v_or3_b32 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x58,0xd6,0x69,0xd2,0xf8,0x01] + +v_or3_b32 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x58,0xd6,0x6a,0xf6,0x0c,0x04] + +v_or3_b32 v5, vcc_hi, 0xaf123456, v255 +// GFX12: encoding: [0x05,0x00,0x58,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_or3_b32 v5, ttmp15, src_scc, ttmp15 +// GFX12: encoding: [0x05,0x00,0x58,0xd6,0x7b,0xfa,0xed,0x01] + +v_or3_b32 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x58,0xd6,0x7d,0xe0,0xf5,0x01] + +v_or3_b32 v5, exec_lo, -1, vcc_hi +// GFX12: encoding: [0x05,0x00,0x58,0xd6,0x7e,0x82,0xad,0x01] + +v_or3_b32 v5, exec_hi, null, vcc_lo +// GFX12: encoding: [0x05,0x00,0x58,0xd6,0x7f,0xf8,0xa8,0x01] + +v_or3_b32 v5, null, exec_lo, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x58,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_or3_b32 v5, -1, exec_hi, src_scc +// GFX12: encoding: [0x05,0x00,0x58,0xd6,0xc1,0xfe,0xf4,0x03] + +v_or3_b32 v5, 0.5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x58,0xd6,0xf0,0xfa,0xc0,0x03] + +v_or3_b32 v5, src_scc, vcc_lo, -1 +// GFX12: encoding: [0x05,0x00,0x58,0xd6,0xfd,0xd4,0x04,0x03] + +v_or3_b32 v255, 0xaf123456, vcc_hi, null +// GFX12: encoding: [0xff,0x00,0x58,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_or_b16 v5, v1, v2 +// GFX12: encoding: [0x05,0x00,0x63,0xd7,0x01,0x05,0x02,0x00] + +v_or_b16 v5, v255, v255 +// GFX12: encoding: [0x05,0x00,0x63,0xd7,0xff,0xff,0x03,0x00] + +v_or_b16 v5, s1, s2 +// GFX12: encoding: [0x05,0x00,0x63,0xd7,0x01,0x04,0x00,0x00] + +v_or_b16 v5, s105, s105 +// GFX12: encoding: [0x05,0x00,0x63,0xd7,0x69,0xd2,0x00,0x00] + +v_or_b16 v5, vcc_lo, ttmp15 +// GFX12: encoding: [0x05,0x00,0x63,0xd7,0x6a,0xf6,0x00,0x00] + +v_or_b16 v5, vcc_hi, 0xfe0b +// GFX12: encoding: [0x05,0x00,0x63,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_or_b16 v5, ttmp15, src_scc +// GFX12: encoding: [0x05,0x00,0x63,0xd7,0x7b,0xfa,0x01,0x00] + +v_or_b16 v5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x63,0xd7,0x7d,0xe0,0x01,0x00] + +v_or_b16 v5, exec_lo, -1 +// GFX12: encoding: [0x05,0x00,0x63,0xd7,0x7e,0x82,0x01,0x00] + +v_or_b16 v5, exec_hi, null +// GFX12: encoding: [0x05,0x00,0x63,0xd7,0x7f,0xf8,0x00,0x00] + +v_or_b16 v5, null, exec_lo +// GFX12: encoding: [0x05,0x00,0x63,0xd7,0x7c,0xfc,0x00,0x00] + +v_or_b16 v5, -1, exec_hi +// GFX12: encoding: [0x05,0x00,0x63,0xd7,0xc1,0xfe,0x00,0x00] + +v_or_b16 v5, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x63,0xd7,0xf0,0xfa,0x00,0x00] + +v_or_b16 v5, src_scc, vcc_lo +// GFX12: encoding: [0x05,0x00,0x63,0xd7,0xfd,0xd4,0x00,0x00] + +v_or_b16 v255, 0xfe0b, vcc_hi +// GFX12: encoding: [0xff,0x00,0x63,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_pack_b32_f16 v5, v1, v2 +// GFX12: encoding: [0x05,0x00,0x11,0xd7,0x01,0x05,0x02,0x00] + +v_pack_b32_f16 v5, v255, v255 +// GFX12: encoding: [0x05,0x00,0x11,0xd7,0xff,0xff,0x03,0x00] + +v_pack_b32_f16 v5, s1, s2 +// GFX12: encoding: [0x05,0x00,0x11,0xd7,0x01,0x04,0x00,0x00] + +v_pack_b32_f16 v5, s105, s105 +// GFX12: encoding: [0x05,0x00,0x11,0xd7,0x69,0xd2,0x00,0x00] + +v_pack_b32_f16 v5, vcc_lo, ttmp15 +// GFX12: encoding: [0x05,0x00,0x11,0xd7,0x6a,0xf6,0x00,0x00] + +v_pack_b32_f16 v5, vcc_hi, 0xfe0b +// GFX12: encoding: [0x05,0x00,0x11,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_pack_b32_f16 v5, ttmp15, src_scc +// GFX12: encoding: [0x05,0x00,0x11,0xd7,0x7b,0xfa,0x01,0x00] + +v_pack_b32_f16 v5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x11,0xd7,0x7d,0xe0,0x01,0x00] + +v_pack_b32_f16 v5, exec_lo, -1 +// GFX12: encoding: [0x05,0x00,0x11,0xd7,0x7e,0x82,0x01,0x00] + +v_pack_b32_f16 v5, |exec_hi|, null +// GFX12: encoding: [0x05,0x01,0x11,0xd7,0x7f,0xf8,0x00,0x00] + +v_pack_b32_f16 v5, null, exec_lo +// GFX12: encoding: [0x05,0x00,0x11,0xd7,0x7c,0xfc,0x00,0x00] + +v_pack_b32_f16 v5, -1, exec_hi +// GFX12: encoding: [0x05,0x00,0x11,0xd7,0xc1,0xfe,0x00,0x00] + +v_pack_b32_f16 v5, 0.5, -m0 op_sel:[0,0,0] +// GFX12: encoding: [0x05,0x00,0x11,0xd7,0xf0,0xfa,0x00,0x40] + +v_pack_b32_f16 v5, -src_scc, |vcc_lo| op_sel:[1,0,0] +// GFX12: encoding: [0x05,0x0a,0x11,0xd7,0xfd,0xd4,0x00,0x20] + +v_pack_b32_f16 v255, -|0xfe0b|, -|vcc_hi| op_sel:[0,1,0] +// GFX12: encoding: [0xff,0x13,0x11,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] + +v_perm_b32 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x44,0xd6,0x01,0x05,0x0e,0x00] + +v_perm_b32 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x44,0xd6,0xff,0x05,0xa4,0x01] + +v_perm_b32 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x44,0xd6,0x01,0xfe,0xff,0x01] + +v_perm_b32 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x44,0xd6,0x69,0xd2,0xf8,0x01] + +v_perm_b32 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x44,0xd6,0x6a,0xf6,0x0c,0x04] + +v_perm_b32 v5, vcc_hi, 0xaf123456, v255 +// GFX12: encoding: [0x05,0x00,0x44,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_perm_b32 v5, ttmp15, src_scc, ttmp15 +// GFX12: encoding: [0x05,0x00,0x44,0xd6,0x7b,0xfa,0xed,0x01] + +v_perm_b32 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x44,0xd6,0x7d,0xe0,0xf5,0x01] + +v_perm_b32 v5, exec_lo, -1, vcc_hi +// GFX12: encoding: [0x05,0x00,0x44,0xd6,0x7e,0x82,0xad,0x01] + +v_perm_b32 v5, exec_hi, null, vcc_lo +// GFX12: encoding: [0x05,0x00,0x44,0xd6,0x7f,0xf8,0xa8,0x01] + +v_perm_b32 v5, null, exec_lo, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x44,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_perm_b32 v5, -1, exec_hi, src_scc +// GFX12: encoding: [0x05,0x00,0x44,0xd6,0xc1,0xfe,0xf4,0x03] + +v_perm_b32 v5, 0.5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x44,0xd6,0xf0,0xfa,0xc0,0x03] + +v_perm_b32 v5, src_scc, vcc_lo, -1 +// GFX12: encoding: [0x05,0x00,0x44,0xd6,0xfd,0xd4,0x04,0x03] + +v_perm_b32 v255, 0xaf123456, vcc_hi, null +// GFX12: encoding: [0xff,0x00,0x44,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_permlane16_b32 v5, v1, s2, s3 +// GFX12: encoding: [0x05,0x00,0x5b,0xd6,0x01,0x05,0x0c,0x00] + +v_permlane16_b32 v5, v1, s105, s105 +// GFX12: encoding: [0x05,0x00,0x5b,0xd6,0x01,0xd3,0xa4,0x01] + +v_permlane16_b32 v5, v1, ttmp15, ttmp15 +// GFX12: encoding: [0x05,0x00,0x5b,0xd6,0x01,0xf7,0xec,0x01] + +v_permlane16_b32 v5, v1, vcc_hi, exec_lo +// GFX12: encoding: [0x05,0x00,0x5b,0xd6,0x01,0xd7,0xf8,0x01] + +v_permlane16_b32 v5, v1, vcc_lo, m0 +// GFX12: encoding: [0x05,0x00,0x5b,0xd6,0x01,0xd5,0xf4,0x01] + +v_permlane16_b32 v5, v1, m0, vcc_hi +// GFX12: encoding: [0x05,0x00,0x5b,0xd6,0x01,0xfb,0xac,0x01] + +v_permlane16_b32 v5, v1, exec_hi, vcc_lo +// GFX12: encoding: [0x05,0x00,0x5b,0xd6,0x01,0xff,0xa8,0x01] + +v_permlane16_b32 v5, v1, exec_lo, src_scc +// GFX12: encoding: [0x05,0x00,0x5b,0xd6,0x01,0xfd,0xf4,0x03] + +v_permlane16_b32 v5, v1, null, 0.5 op_sel:[1,1] +// GFX12: encoding: [0x05,0x18,0x5b,0xd6,0x01,0xf9,0xc0,0x03] + +v_permlane16_b32 v5, v1, -1, -1 op_sel:[0,0] +// GFX12: encoding: [0x05,0x00,0x5b,0xd6,0x01,0x83,0x05,0x03] + +v_permlane16_b32 v5, v1, 0.5, null op_sel:[1,0] +// GFX12: encoding: [0x05,0x08,0x5b,0xd6,0x01,0xe1,0xf1,0x01] + +v_permlane16_b32 v255, v255, src_scc, exec_hi op_sel:[0,1] +// GFX12: encoding: [0xff,0x10,0x5b,0xd6,0xff,0xfb,0xfd,0x01] + +v_permlanex16_b32 v5, v1, s2, s3 +// GFX12: encoding: [0x05,0x00,0x5c,0xd6,0x01,0x05,0x0c,0x00] + +v_permlanex16_b32 v5, v1, s105, s105 +// GFX12: encoding: [0x05,0x00,0x5c,0xd6,0x01,0xd3,0xa4,0x01] + +v_permlanex16_b32 v5, v1, ttmp15, ttmp15 +// GFX12: encoding: [0x05,0x00,0x5c,0xd6,0x01,0xf7,0xec,0x01] + +v_permlanex16_b32 v5, v1, vcc_hi, exec_lo +// GFX12: encoding: [0x05,0x00,0x5c,0xd6,0x01,0xd7,0xf8,0x01] + +v_permlanex16_b32 v5, v1, vcc_lo, m0 +// GFX12: encoding: [0x05,0x00,0x5c,0xd6,0x01,0xd5,0xf4,0x01] + +v_permlanex16_b32 v5, v1, m0, vcc_hi +// GFX12: encoding: [0x05,0x00,0x5c,0xd6,0x01,0xfb,0xac,0x01] + +v_permlanex16_b32 v5, v1, exec_hi, vcc_lo +// GFX12: encoding: [0x05,0x00,0x5c,0xd6,0x01,0xff,0xa8,0x01] + +v_permlanex16_b32 v5, v1, exec_lo, src_scc +// GFX12: encoding: [0x05,0x00,0x5c,0xd6,0x01,0xfd,0xf4,0x03] + +v_permlanex16_b32 v5, v1, null, 0.5 op_sel:[1,1] +// GFX12: encoding: [0x05,0x18,0x5c,0xd6,0x01,0xf9,0xc0,0x03] + +v_permlanex16_b32 v5, v1, -1, -1 op_sel:[0,0] +// GFX12: encoding: [0x05,0x00,0x5c,0xd6,0x01,0x83,0x05,0x03] + +v_permlanex16_b32 v5, v1, 0.5, null op_sel:[1,0] +// GFX12: encoding: [0x05,0x08,0x5c,0xd6,0x01,0xe1,0xf1,0x01] + +v_permlanex16_b32 v255, v255, src_scc, exec_hi op_sel:[0,1] +// GFX12: encoding: [0xff,0x10,0x5c,0xd6,0xff,0xfb,0xfd,0x01] + +v_permlane16_var_b32 v5, v1, v2 +// GFX12: encoding: [0x05,0x00,0x0f,0xd7,0x01,0x05,0x02,0x00] + +v_permlane16_var_b32 v5, v1, v255 +// GFX12: encoding: [0x05,0x00,0x0f,0xd7,0x01,0xff,0x03,0x00] + +v_permlane16_var_b32 v5, v255, v0 +// GFX12: encoding: [0x05,0x00,0x0f,0xd7,0xff,0x01,0x02,0x00] + +v_permlane16_var_b32 v255, v1, v2 +// GFX12: encoding: [0xff,0x00,0x0f,0xd7,0x01,0x05,0x02,0x00] + +v_permlane16_var_b32 v5, v1, v50, op_sel:[1,1] +// GFX12: encoding: [0x05,0x18,0x0f,0xd7,0x01,0x65,0x02,0x00] + +v_permlane16_var_b32 v5, v1, v50, op_sel:[0,0] +// GFX12: encoding: [0x05,0x00,0x0f,0xd7,0x01,0x65,0x02,0x00] + +v_permlane16_var_b32 v5, v1, v50, op_sel:[1,0] +// GFX12: encoding: [0x05,0x08,0x0f,0xd7,0x01,0x65,0x02,0x00] + +v_permlane16_var_b32 v255, v255, v0, op_sel:[0,1] +// GFX12: encoding: [0xff,0x10,0x0f,0xd7,0xff,0x01,0x02,0x00] + +v_permlanex16_var_b32 v5, v1, v2 +// GFX12: encoding: [0x05,0x00,0x10,0xd7,0x01,0x05,0x02,0x00] + +v_permlanex16_var_b32 v5, v1, v105 +// GFX12: encoding: [0x05,0x00,0x10,0xd7,0x01,0xd3,0x02,0x00] + +v_permlanex16_var_b32 v5, v1, v255 +// GFX12: encoding: [0x05,0x00,0x10,0xd7,0x01,0xff,0x03,0x00] + +v_permlanex16_var_b32 v255, v1, v2 +// GFX12: encoding: [0xff,0x00,0x10,0xd7,0x01,0x05,0x02,0x00] + +v_permlanex16_var_b32 v1, v255, v2 +// GFX12: encoding: [0x01,0x00,0x10,0xd7,0xff,0x05,0x02,0x00] + +v_permlanex16_var_b32 v5, v1, v100, op_sel:[1,1] +// GFX12: encoding: [0x05,0x18,0x10,0xd7,0x01,0xc9,0x02,0x00] + +v_permlanex16_var_b32 v5, v1, v100, op_sel:[0,0] +// GFX12: encoding: [0x05,0x00,0x10,0xd7,0x01,0xc9,0x02,0x00] + +v_permlanex16_var_b32 v5, v1, v100, op_sel:[1,0] +// GFX12: encoding: [0x05,0x08,0x10,0xd7,0x01,0xc9,0x02,0x00] + +v_permlanex16_var_b32 v255, v255, v100, op_sel:[0,1] +// GFX12: encoding: [0xff,0x10,0x10,0xd7,0xff,0xc9,0x02,0x00] + +v_qsad_pk_u16_u8 v[5:6], v[1:2], v2, ttmp[14:15] +// GFX12: encoding: [0x05,0x00,0x3a,0xd6,0x01,0x05,0xea,0x01] + +v_qsad_pk_u16_u8 v[5:6], v[1:2], v255, ttmp[14:15] +// GFX12: encoding: [0x05,0x00,0x3a,0xd6,0x01,0xff,0xeb,0x01] + +v_qsad_pk_u16_u8 v[5:6], v[1:2], s2, ttmp[14:15] +// GFX12: encoding: [0x05,0x00,0x3a,0xd6,0x01,0x05,0xe8,0x01] + +v_qsad_pk_u16_u8 v[5:6], v[1:2], s105, ttmp[14:15] +// GFX12: encoding: [0x05,0x00,0x3a,0xd6,0x01,0xd3,0xe8,0x01] + +v_qsad_pk_u16_u8 v[5:6], v[254:255], ttmp15, s[6:7] +// GFX12: encoding: [0x05,0x00,0x3a,0xd6,0xfe,0xf7,0x18,0x00] + +v_qsad_pk_u16_u8 v[5:6], s[2:3], vcc_hi, v[3:4] +// GFX12: encoding: [0x05,0x00,0x3a,0xd6,0x02,0xd6,0x0c,0x04] + +v_qsad_pk_u16_u8 v[5:6], s[104:105], vcc_lo, s[104:105] +// GFX12: encoding: [0x05,0x00,0x3a,0xd6,0x68,0xd4,0xa0,0x01] + +v_qsad_pk_u16_u8 v[5:6], vcc, m0, v[254:255] +// GFX12: encoding: [0x05,0x00,0x3a,0xd6,0x6a,0xfa,0xf8,0x07] + +v_qsad_pk_u16_u8 v[5:6], ttmp[14:15], exec_hi, null +// GFX12: encoding: [0x05,0x00,0x3a,0xd6,0x7a,0xfe,0xf0,0x01] + +v_qsad_pk_u16_u8 v[5:6], exec, exec_lo, exec +// GFX12: encoding: [0x05,0x00,0x3a,0xd6,0x7e,0xfc,0xf8,0x01] + +v_qsad_pk_u16_u8 v[5:6], null, null, vcc +// GFX12: encoding: [0x05,0x00,0x3a,0xd6,0x7c,0xf8,0xa8,0x01] + +v_qsad_pk_u16_u8 v[5:6], -1, -1, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x3a,0xd6,0xc1,0x82,0xfd,0x03,0x56,0x34,0x12,0xaf] + +v_qsad_pk_u16_u8 v[5:6], 0.5, 0.5, src_scc +// GFX12: encoding: [0x05,0x00,0x3a,0xd6,0xf0,0xe0,0xf5,0x03] + +v_qsad_pk_u16_u8 v[5:6], src_scc, src_scc, 0.5 +// GFX12: encoding: [0x05,0x00,0x3a,0xd6,0xfd,0xfa,0xc1,0x03] + +v_qsad_pk_u16_u8 v[254:255], 0xaf123456, 0xaf123456, -1 clamp +// GFX12: encoding: [0xfe,0x80,0x3a,0xd6,0xff,0xfe,0x05,0x03,0x56,0x34,0x12,0xaf] + +v_readlane_b32 s5, v1, s2 +// GFX12: encoding: [0x05,0x00,0x60,0xd7,0x01,0x05,0x00,0x00] + +v_readlane_b32 s5, v1, s105 +// GFX12: encoding: [0x05,0x00,0x60,0xd7,0x01,0xd3,0x00,0x00] + +v_readlane_b32 s105, v1, ttmp15 +// GFX12: encoding: [0x69,0x00,0x60,0xd7,0x01,0xf7,0x00,0x00] + +v_readlane_b32 vcc_lo, v1, vcc_hi +// GFX12: encoding: [0x6a,0x00,0x60,0xd7,0x01,0xd7,0x00,0x00] + +v_readlane_b32 vcc_hi, v1, vcc_lo +// GFX12: encoding: [0x6b,0x00,0x60,0xd7,0x01,0xd5,0x00,0x00] + +v_readlane_b32 ttmp15, v1, m0 +// GFX12: encoding: [0x7b,0x00,0x60,0xd7,0x01,0xfb,0x00,0x00] + +v_readlane_b32 null, v255, null +// GFX12: encoding: [0x7c,0x00,0x60,0xd7,0xff,0xf9,0x00,0x00] + +v_sad_hi_u8 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x23,0xd6,0x01,0x05,0x0e,0x00] + +v_sad_hi_u8 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x23,0xd6,0xff,0x05,0xa4,0x01] + +v_sad_hi_u8 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x23,0xd6,0x01,0xfe,0xff,0x01] + +v_sad_hi_u8 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x23,0xd6,0x69,0xd2,0xf8,0x01] + +v_sad_hi_u8 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x23,0xd6,0x6a,0xf6,0x0c,0x04] + +v_sad_hi_u8 v5, vcc_hi, 0xaf123456, v255 +// GFX12: encoding: [0x05,0x00,0x23,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_sad_hi_u8 v5, ttmp15, src_scc, ttmp15 +// GFX12: encoding: [0x05,0x00,0x23,0xd6,0x7b,0xfa,0xed,0x01] + +v_sad_hi_u8 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x23,0xd6,0x7d,0xe0,0xf5,0x01] + +v_sad_hi_u8 v5, exec_lo, -1, vcc_hi +// GFX12: encoding: [0x05,0x00,0x23,0xd6,0x7e,0x82,0xad,0x01] + +v_sad_hi_u8 v5, exec_hi, null, vcc_lo +// GFX12: encoding: [0x05,0x00,0x23,0xd6,0x7f,0xf8,0xa8,0x01] + +v_sad_hi_u8 v5, null, exec_lo, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x23,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_sad_hi_u8 v5, -1, exec_hi, src_scc +// GFX12: encoding: [0x05,0x00,0x23,0xd6,0xc1,0xfe,0xf4,0x03] + +v_sad_hi_u8 v5, 0.5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x23,0xd6,0xf0,0xfa,0xc0,0x03] + +v_sad_hi_u8 v5, src_scc, vcc_lo, -1 +// GFX12: encoding: [0x05,0x00,0x23,0xd6,0xfd,0xd4,0x04,0x03] + +v_sad_hi_u8 v255, 0xaf123456, vcc_hi, null clamp +// GFX12: encoding: [0xff,0x80,0x23,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_sad_u16 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x24,0xd6,0x01,0x05,0x0e,0x00] + +v_sad_u16 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x24,0xd6,0xff,0x05,0xa4,0x01] + +v_sad_u16 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x24,0xd6,0x01,0xfe,0xff,0x01] + +v_sad_u16 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x24,0xd6,0x69,0xd2,0xf8,0x01] + +v_sad_u16 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x24,0xd6,0x6a,0xf6,0x0c,0x04] + +v_sad_u16 v5, vcc_hi, 0xfe0b, v255 +// GFX12: encoding: [0x05,0x00,0x24,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +v_sad_u16 v5, ttmp15, src_scc, ttmp15 +// GFX12: encoding: [0x05,0x00,0x24,0xd6,0x7b,0xfa,0xed,0x01] + +v_sad_u16 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x24,0xd6,0x7d,0xe0,0xf5,0x01] + +v_sad_u16 v5, exec_lo, -1, vcc_hi +// GFX12: encoding: [0x05,0x00,0x24,0xd6,0x7e,0x82,0xad,0x01] + +v_sad_u16 v5, exec_hi, null, vcc_lo +// GFX12: encoding: [0x05,0x00,0x24,0xd6,0x7f,0xf8,0xa8,0x01] + +v_sad_u16 v5, null, exec_lo, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x24,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_sad_u16 v5, -1, exec_hi, src_scc +// GFX12: encoding: [0x05,0x00,0x24,0xd6,0xc1,0xfe,0xf4,0x03] + +v_sad_u16 v5, 0.5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x24,0xd6,0xf0,0xfa,0xc0,0x03] + +v_sad_u16 v5, src_scc, vcc_lo, -1 +// GFX12: encoding: [0x05,0x00,0x24,0xd6,0xfd,0xd4,0x04,0x03] + +v_sad_u16 v255, 0xfe0b, vcc_hi, null clamp +// GFX12: encoding: [0xff,0x80,0x24,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] + +v_sad_u32 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x25,0xd6,0x01,0x05,0x0e,0x00] + +v_sad_u32 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x25,0xd6,0xff,0x05,0xa4,0x01] + +v_sad_u32 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x25,0xd6,0x01,0xfe,0xff,0x01] + +v_sad_u32 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x25,0xd6,0x69,0xd2,0xf8,0x01] + +v_sad_u32 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x25,0xd6,0x6a,0xf6,0x0c,0x04] + +v_sad_u32 v5, vcc_hi, 0xaf123456, v255 +// GFX12: encoding: [0x05,0x00,0x25,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_sad_u32 v5, ttmp15, src_scc, ttmp15 +// GFX12: encoding: [0x05,0x00,0x25,0xd6,0x7b,0xfa,0xed,0x01] + +v_sad_u32 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x25,0xd6,0x7d,0xe0,0xf5,0x01] + +v_sad_u32 v5, exec_lo, -1, vcc_hi +// GFX12: encoding: [0x05,0x00,0x25,0xd6,0x7e,0x82,0xad,0x01] + +v_sad_u32 v5, exec_hi, null, vcc_lo +// GFX12: encoding: [0x05,0x00,0x25,0xd6,0x7f,0xf8,0xa8,0x01] + +v_sad_u32 v5, null, exec_lo, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x25,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_sad_u32 v5, -1, exec_hi, src_scc +// GFX12: encoding: [0x05,0x00,0x25,0xd6,0xc1,0xfe,0xf4,0x03] + +v_sad_u32 v5, 0.5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x25,0xd6,0xf0,0xfa,0xc0,0x03] + +v_sad_u32 v5, src_scc, vcc_lo, -1 +// GFX12: encoding: [0x05,0x00,0x25,0xd6,0xfd,0xd4,0x04,0x03] + +v_sad_u32 v255, 0xaf123456, vcc_hi, null clamp +// GFX12: encoding: [0xff,0x80,0x25,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_sad_u8 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x22,0xd6,0x01,0x05,0x0e,0x00] + +v_sad_u8 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x22,0xd6,0xff,0x05,0xa4,0x01] + +v_sad_u8 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x22,0xd6,0x01,0xfe,0xff,0x01] + +v_sad_u8 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x22,0xd6,0x69,0xd2,0xf8,0x01] + +v_sad_u8 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x22,0xd6,0x6a,0xf6,0x0c,0x04] + +v_sad_u8 v5, vcc_hi, 0xaf123456, v255 +// GFX12: encoding: [0x05,0x00,0x22,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_sad_u8 v5, ttmp15, src_scc, ttmp15 +// GFX12: encoding: [0x05,0x00,0x22,0xd6,0x7b,0xfa,0xed,0x01] + +v_sad_u8 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x22,0xd6,0x7d,0xe0,0xf5,0x01] + +v_sad_u8 v5, exec_lo, -1, vcc_hi +// GFX12: encoding: [0x05,0x00,0x22,0xd6,0x7e,0x82,0xad,0x01] + +v_sad_u8 v5, exec_hi, null, vcc_lo +// GFX12: encoding: [0x05,0x00,0x22,0xd6,0x7f,0xf8,0xa8,0x01] + +v_sad_u8 v5, null, exec_lo, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x22,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_sad_u8 v5, -1, exec_hi, src_scc +// GFX12: encoding: [0x05,0x00,0x22,0xd6,0xc1,0xfe,0xf4,0x03] + +v_sad_u8 v5, 0.5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x22,0xd6,0xf0,0xfa,0xc0,0x03] + +v_sad_u8 v5, src_scc, vcc_lo, -1 +// GFX12: encoding: [0x05,0x00,0x22,0xd6,0xfd,0xd4,0x04,0x03] + +v_sad_u8 v255, 0xaf123456, vcc_hi, null clamp +// GFX12: encoding: [0xff,0x80,0x22,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_sub_co_u32 v5, s6, v1, v2 +// W32: encoding: [0x05,0x06,0x01,0xd7,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v5, s6, v255, v255 +// W32: encoding: [0x05,0x06,0x01,0xd7,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v5, s6, s1, s2 +// W32: encoding: [0x05,0x06,0x01,0xd7,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v5, s6, s105, s105 +// W32: encoding: [0x05,0x06,0x01,0xd7,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v5, s6, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x06,0x01,0xd7,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v5, s6, vcc_hi, 0xaf123456 +// W32: encoding: [0x05,0x06,0x01,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v5, s6, ttmp15, src_scc +// W32: encoding: [0x05,0x06,0x01,0xd7,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v5, s6, m0, 0.5 +// W32: encoding: [0x05,0x06,0x01,0xd7,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v5, s6, exec_lo, -1 +// W32: encoding: [0x05,0x06,0x01,0xd7,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v5, s6, exec_hi, null +// W32: encoding: [0x05,0x06,0x01,0xd7,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v5, s105, null, exec_lo +// W32: encoding: [0x05,0x69,0x01,0xd7,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v5, vcc_lo, -1, exec_hi +// W32: encoding: [0x05,0x6a,0x01,0xd7,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v5, vcc_hi, 0.5, m0 +// W32: encoding: [0x05,0x6b,0x01,0xd7,0xf0,0xfa,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v5, ttmp15, src_scc, vcc_lo +// W32: encoding: [0x05,0x7b,0x01,0xd7,0xfd,0xd4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v5, s[12:13], v1, v2 +// W64: encoding: [0x05,0x0c,0x01,0xd7,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v5, s[12:13], v255, v255 +// W64: encoding: [0x05,0x0c,0x01,0xd7,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v5, s[12:13], s1, s2 +// W64: encoding: [0x05,0x0c,0x01,0xd7,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v5, s[12:13], s105, s105 +// W64: encoding: [0x05,0x0c,0x01,0xd7,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v5, s[12:13], vcc_lo, ttmp15 +// W64: encoding: [0x05,0x0c,0x01,0xd7,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v5, s[12:13], vcc_hi, 0xaf123456 +// W64: encoding: [0x05,0x0c,0x01,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v5, s[12:13], ttmp15, src_scc +// W64: encoding: [0x05,0x0c,0x01,0xd7,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v5, s[12:13], m0, 0.5 +// W64: encoding: [0x05,0x0c,0x01,0xd7,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v5, s[12:13], exec_lo, -1 +// W64: encoding: [0x05,0x0c,0x01,0xd7,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v5, s[12:13], exec_hi, null +// W64: encoding: [0x05,0x0c,0x01,0xd7,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v5, s[12:13], null, exec_lo +// W64: encoding: [0x05,0x0c,0x01,0xd7,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v5, s[104:105], -1, exec_hi +// W64: encoding: [0x05,0x68,0x01,0xd7,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v5, vcc, 0.5, m0 +// W64: encoding: [0x05,0x6a,0x01,0xd7,0xf0,0xfa,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_u32 v5, ttmp[14:15], src_scc, vcc_lo +// W64: encoding: [0x05,0x7a,0x01,0xd7,0xfd,0xd4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v255, null, 0xaf123456, vcc_hi clamp +// GFX12: encoding: [0xff,0xfc,0x01,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_sub_nc_i16 v5, v1, v2 +// GFX12: encoding: [0x05,0x00,0x0e,0xd7,0x01,0x05,0x02,0x00] + +v_sub_nc_i16 v5, v255, v255 +// GFX12: encoding: [0x05,0x00,0x0e,0xd7,0xff,0xff,0x03,0x00] + +v_sub_nc_i16 v5, s1, s2 +// GFX12: encoding: [0x05,0x00,0x0e,0xd7,0x01,0x04,0x00,0x00] + +v_sub_nc_i16 v5, s105, s105 +// GFX12: encoding: [0x05,0x00,0x0e,0xd7,0x69,0xd2,0x00,0x00] + +v_sub_nc_i16 v5, vcc_lo, ttmp15 +// GFX12: encoding: [0x05,0x00,0x0e,0xd7,0x6a,0xf6,0x00,0x00] + +v_sub_nc_i16 v5, vcc_hi, 0xfe0b +// GFX12: encoding: [0x05,0x00,0x0e,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_sub_nc_i16 v5, ttmp15, src_scc +// GFX12: encoding: [0x05,0x00,0x0e,0xd7,0x7b,0xfa,0x01,0x00] + +v_sub_nc_i16 v5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x0e,0xd7,0x7d,0xe0,0x01,0x00] + +v_sub_nc_i16 v5, exec_lo, -1 +// GFX12: encoding: [0x05,0x00,0x0e,0xd7,0x7e,0x82,0x01,0x00] + +v_sub_nc_i16 v5, exec_hi, null +// GFX12: encoding: [0x05,0x00,0x0e,0xd7,0x7f,0xf8,0x00,0x00] + +v_sub_nc_i16 v5, null, exec_lo op_sel:[1,1,1] +// GFX12: encoding: [0x05,0x58,0x0e,0xd7,0x7c,0xfc,0x00,0x00] + +v_sub_nc_i16 v5, -1, exec_hi op_sel:[0,0,0] +// GFX12: encoding: [0x05,0x00,0x0e,0xd7,0xc1,0xfe,0x00,0x00] + +v_sub_nc_i16 v5, 0.5, m0 op_sel:[1,0,0] +// GFX12: encoding: [0x05,0x08,0x0e,0xd7,0xf0,0xfa,0x00,0x00] + +v_sub_nc_i16 v5, src_scc, vcc_lo op_sel:[0,1,0] +// GFX12: encoding: [0x05,0x10,0x0e,0xd7,0xfd,0xd4,0x00,0x00] + +v_sub_nc_i16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp +// GFX12: encoding: [0xff,0xc0,0x0e,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_sub_nc_i32 v5, v1, v2 +// GFX12: encoding: [0x05,0x00,0x25,0xd7,0x01,0x05,0x02,0x00] + +v_sub_nc_i32 v5, v255, v255 +// GFX12: encoding: [0x05,0x00,0x25,0xd7,0xff,0xff,0x03,0x00] + +v_sub_nc_i32 v5, s1, s2 +// GFX12: encoding: [0x05,0x00,0x25,0xd7,0x01,0x04,0x00,0x00] + +v_sub_nc_i32 v5, s105, s105 +// GFX12: encoding: [0x05,0x00,0x25,0xd7,0x69,0xd2,0x00,0x00] + +v_sub_nc_i32 v5, vcc_lo, ttmp15 +// GFX12: encoding: [0x05,0x00,0x25,0xd7,0x6a,0xf6,0x00,0x00] + +v_sub_nc_i32 v5, vcc_hi, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x25,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_sub_nc_i32 v5, ttmp15, src_scc +// GFX12: encoding: [0x05,0x00,0x25,0xd7,0x7b,0xfa,0x01,0x00] + +v_sub_nc_i32 v5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x25,0xd7,0x7d,0xe0,0x01,0x00] + +v_sub_nc_i32 v5, exec_lo, -1 +// GFX12: encoding: [0x05,0x00,0x25,0xd7,0x7e,0x82,0x01,0x00] + +v_sub_nc_i32 v5, exec_hi, null +// GFX12: encoding: [0x05,0x00,0x25,0xd7,0x7f,0xf8,0x00,0x00] + +v_sub_nc_i32 v5, null, exec_lo +// GFX12: encoding: [0x05,0x00,0x25,0xd7,0x7c,0xfc,0x00,0x00] + +v_sub_nc_i32 v5, -1, exec_hi +// GFX12: encoding: [0x05,0x00,0x25,0xd7,0xc1,0xfe,0x00,0x00] + +v_sub_nc_i32 v5, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x25,0xd7,0xf0,0xfa,0x00,0x00] + +v_sub_nc_i32 v5, src_scc, vcc_lo +// GFX12: encoding: [0x05,0x00,0x25,0xd7,0xfd,0xd4,0x00,0x00] + +v_sub_nc_i32 v255, 0xaf123456, vcc_hi clamp +// GFX12: encoding: [0xff,0x80,0x25,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_sub_nc_u16 v5, v1, v2 +// GFX12: encoding: [0x05,0x00,0x04,0xd7,0x01,0x05,0x02,0x00] + +v_sub_nc_u16 v5, v255, v255 +// GFX12: encoding: [0x05,0x00,0x04,0xd7,0xff,0xff,0x03,0x00] + +v_sub_nc_u16 v5, s1, s2 +// GFX12: encoding: [0x05,0x00,0x04,0xd7,0x01,0x04,0x00,0x00] + +v_sub_nc_u16 v5, s105, s105 +// GFX12: encoding: [0x05,0x00,0x04,0xd7,0x69,0xd2,0x00,0x00] + +v_sub_nc_u16 v5, vcc_lo, ttmp15 +// GFX12: encoding: [0x05,0x00,0x04,0xd7,0x6a,0xf6,0x00,0x00] + +v_sub_nc_u16 v5, vcc_hi, 0xfe0b +// GFX12: encoding: [0x05,0x00,0x04,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_sub_nc_u16 v5, ttmp15, src_scc +// GFX12: encoding: [0x05,0x00,0x04,0xd7,0x7b,0xfa,0x01,0x00] + +v_sub_nc_u16 v5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x04,0xd7,0x7d,0xe0,0x01,0x00] + +v_sub_nc_u16 v5, exec_lo, -1 +// GFX12: encoding: [0x05,0x00,0x04,0xd7,0x7e,0x82,0x01,0x00] + +v_sub_nc_u16 v5, exec_hi, null +// GFX12: encoding: [0x05,0x00,0x04,0xd7,0x7f,0xf8,0x00,0x00] + +v_sub_nc_u16 v5, null, exec_lo op_sel:[1,1,1] +// GFX12: encoding: [0x05,0x58,0x04,0xd7,0x7c,0xfc,0x00,0x00] + +v_sub_nc_u16 v5, -1, exec_hi op_sel:[0,0,0] +// GFX12: encoding: [0x05,0x00,0x04,0xd7,0xc1,0xfe,0x00,0x00] + +v_sub_nc_u16 v5, 0.5, m0 op_sel:[1,0,0] +// GFX12: encoding: [0x05,0x08,0x04,0xd7,0xf0,0xfa,0x00,0x00] + +v_sub_nc_u16 v5, src_scc, vcc_lo op_sel:[0,1,0] +// GFX12: encoding: [0x05,0x10,0x04,0xd7,0xfd,0xd4,0x00,0x00] + +v_sub_nc_u16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp +// GFX12: encoding: [0xff,0xc0,0x04,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_subrev_co_u32 v5, s6, v1, v2 +// W32: encoding: [0x05,0x06,0x02,0xd7,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v5, s6, v255, v255 +// W32: encoding: [0x05,0x06,0x02,0xd7,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v5, s6, s1, s2 +// W32: encoding: [0x05,0x06,0x02,0xd7,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v5, s6, s105, s105 +// W32: encoding: [0x05,0x06,0x02,0xd7,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v5, s6, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x06,0x02,0xd7,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v5, s6, vcc_hi, 0xaf123456 +// W32: encoding: [0x05,0x06,0x02,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v5, s6, ttmp15, src_scc +// W32: encoding: [0x05,0x06,0x02,0xd7,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v5, s6, m0, 0.5 +// W32: encoding: [0x05,0x06,0x02,0xd7,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v5, s6, exec_lo, -1 +// W32: encoding: [0x05,0x06,0x02,0xd7,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v5, s6, exec_hi, null +// W32: encoding: [0x05,0x06,0x02,0xd7,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v5, s105, null, exec_lo +// W32: encoding: [0x05,0x69,0x02,0xd7,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v5, vcc_lo, -1, exec_hi +// W32: encoding: [0x05,0x6a,0x02,0xd7,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v5, vcc_hi, 0.5, m0 +// W32: encoding: [0x05,0x6b,0x02,0xd7,0xf0,0xfa,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v5, ttmp15, src_scc, vcc_lo +// W32: encoding: [0x05,0x7b,0x02,0xd7,0xfd,0xd4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v5, s[12:13], v1, v2 +// W64: encoding: [0x05,0x0c,0x02,0xd7,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v5, s[12:13], v255, v255 +// W64: encoding: [0x05,0x0c,0x02,0xd7,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v5, s[12:13], s1, s2 +// W64: encoding: [0x05,0x0c,0x02,0xd7,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v5, s[12:13], s105, s105 +// W64: encoding: [0x05,0x0c,0x02,0xd7,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v5, s[12:13], vcc_lo, ttmp15 +// W64: encoding: [0x05,0x0c,0x02,0xd7,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v5, s[12:13], vcc_hi, 0xaf123456 +// W64: encoding: [0x05,0x0c,0x02,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v5, s[12:13], ttmp15, src_scc +// W64: encoding: [0x05,0x0c,0x02,0xd7,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v5, s[12:13], m0, 0.5 +// W64: encoding: [0x05,0x0c,0x02,0xd7,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v5, s[12:13], exec_lo, -1 +// W64: encoding: [0x05,0x0c,0x02,0xd7,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v5, s[12:13], exec_hi, null +// W64: encoding: [0x05,0x0c,0x02,0xd7,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v5, s[12:13], null, exec_lo +// W64: encoding: [0x05,0x0c,0x02,0xd7,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v5, s[104:105], -1, exec_hi +// W64: encoding: [0x05,0x68,0x02,0xd7,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v5, vcc, 0.5, m0 +// W64: encoding: [0x05,0x6a,0x02,0xd7,0xf0,0xfa,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_u32 v5, ttmp[14:15], src_scc, vcc_lo +// W64: encoding: [0x05,0x7a,0x02,0xd7,0xfd,0xd4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v255, null, 0xaf123456, vcc_hi clamp +// GFX12: encoding: [0xff,0xfc,0x02,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_trig_preop_f64 v[5:6], v[1:2], v2 +// GFX12: encoding: [0x05,0x00,0x2f,0xd7,0x01,0x05,0x02,0x00] + +v_trig_preop_f64 v[5:6], v[1:2], v255 +// GFX12: encoding: [0x05,0x00,0x2f,0xd7,0x01,0xff,0x03,0x00] + +v_trig_preop_f64 v[5:6], v[1:2], s2 +// GFX12: encoding: [0x05,0x00,0x2f,0xd7,0x01,0x05,0x00,0x00] + +v_trig_preop_f64 v[5:6], v[1:2], s105 +// GFX12: encoding: [0x05,0x00,0x2f,0xd7,0x01,0xd3,0x00,0x00] + +v_trig_preop_f64 v[5:6], v[254:255], ttmp15 +// GFX12: encoding: [0x05,0x00,0x2f,0xd7,0xfe,0xf7,0x00,0x00] + +v_trig_preop_f64 v[5:6], s[2:3], vcc_hi +// GFX12: encoding: [0x05,0x00,0x2f,0xd7,0x02,0xd6,0x00,0x00] + +v_trig_preop_f64 v[5:6], s[104:105], vcc_lo +// GFX12: encoding: [0x05,0x00,0x2f,0xd7,0x68,0xd4,0x00,0x00] + +v_trig_preop_f64 v[5:6], vcc, m0 +// GFX12: encoding: [0x05,0x00,0x2f,0xd7,0x6a,0xfa,0x00,0x00] + +v_trig_preop_f64 v[5:6], ttmp[14:15], exec_hi +// GFX12: encoding: [0x05,0x00,0x2f,0xd7,0x7a,0xfe,0x00,0x00] + +v_trig_preop_f64 v[5:6], exec, exec_lo +// GFX12: encoding: [0x05,0x00,0x2f,0xd7,0x7e,0xfc,0x00,0x00] + +v_trig_preop_f64 v[5:6], null, null +// GFX12: encoding: [0x05,0x00,0x2f,0xd7,0x7c,0xf8,0x00,0x00] + +v_trig_preop_f64 v[5:6], -1, -1 +// GFX12: encoding: [0x05,0x00,0x2f,0xd7,0xc1,0x82,0x01,0x00] + +v_trig_preop_f64 v[5:6], 0.5, 0.5 mul:2 +// GFX12: encoding: [0x05,0x00,0x2f,0xd7,0xf0,0xe0,0x01,0x08] + +v_trig_preop_f64 v[5:6], -|src_scc|, src_scc mul:4 +// GFX12: encoding: [0x05,0x01,0x2f,0xd7,0xfd,0xfa,0x01,0x30] + +v_trig_preop_f64 v[254:255], 0xaf123456, 0xaf123456 clamp div:2 +// GFX12: encoding: [0xfe,0x80,0x2f,0xd7,0xff,0xfe,0x01,0x18,0x56,0x34,0x12,0xaf] + +v_writelane_b32 v5, s1, s2 +// GFX12: encoding: [0x05,0x00,0x61,0xd7,0x01,0x04,0x00,0x00] + +v_writelane_b32 v5, s105, s2 +// GFX12: encoding: [0x05,0x00,0x61,0xd7,0x69,0x04,0x00,0x00] + +v_writelane_b32 v5, vcc_lo, s2 +// GFX12: encoding: [0x05,0x00,0x61,0xd7,0x6a,0x04,0x00,0x00] + +v_writelane_b32 v5, vcc_hi, s2 +// GFX12: encoding: [0x05,0x00,0x61,0xd7,0x6b,0x04,0x00,0x00] + +v_writelane_b32 v5, ttmp15, s2 +// GFX12: encoding: [0x05,0x00,0x61,0xd7,0x7b,0x04,0x00,0x00] + +v_writelane_b32 v5, m0, s2 +// GFX12: encoding: [0x05,0x00,0x61,0xd7,0x7d,0x04,0x00,0x00] + +v_writelane_b32 v5, exec_lo, s2 +// GFX12: encoding: [0x05,0x00,0x61,0xd7,0x7e,0x04,0x00,0x00] + +v_writelane_b32 v5, exec_hi, s105 +// GFX12: encoding: [0x05,0x00,0x61,0xd7,0x7f,0xd2,0x00,0x00] + +v_writelane_b32 v5, null, ttmp15 +// GFX12: encoding: [0x05,0x00,0x61,0xd7,0x7c,0xf6,0x00,0x00] + +v_writelane_b32 v5, -1, null +// GFX12: encoding: [0x05,0x00,0x61,0xd7,0xc1,0xf8,0x00,0x00] + +v_writelane_b32 v5, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x61,0xd7,0xf0,0xfa,0x00,0x00] + +v_writelane_b32 v5, src_scc, vcc_lo +// GFX12: encoding: [0x05,0x00,0x61,0xd7,0xfd,0xd4,0x00,0x00] + +v_writelane_b32 v255, 0xaf123456, vcc_hi +// GFX12: encoding: [0xff,0x00,0x61,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_xad_u32 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x45,0xd6,0x01,0x05,0x0e,0x00] + +v_xad_u32 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x45,0xd6,0xff,0x05,0xa4,0x01] + +v_xad_u32 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x45,0xd6,0x01,0xfe,0xff,0x01] + +v_xad_u32 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x45,0xd6,0x69,0xd2,0xf8,0x01] + +v_xad_u32 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x45,0xd6,0x6a,0xf6,0x0c,0x04] + +v_xad_u32 v5, vcc_hi, 0xaf123456, v255 +// GFX12: encoding: [0x05,0x00,0x45,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_xad_u32 v5, ttmp15, src_scc, ttmp15 +// GFX12: encoding: [0x05,0x00,0x45,0xd6,0x7b,0xfa,0xed,0x01] + +v_xad_u32 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x45,0xd6,0x7d,0xe0,0xf5,0x01] + +v_xad_u32 v5, exec_lo, -1, vcc_hi +// GFX12: encoding: [0x05,0x00,0x45,0xd6,0x7e,0x82,0xad,0x01] + +v_xad_u32 v5, exec_hi, null, vcc_lo +// GFX12: encoding: [0x05,0x00,0x45,0xd6,0x7f,0xf8,0xa8,0x01] + +v_xad_u32 v5, null, exec_lo, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x45,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_xad_u32 v5, -1, exec_hi, src_scc +// GFX12: encoding: [0x05,0x00,0x45,0xd6,0xc1,0xfe,0xf4,0x03] + +v_xad_u32 v5, 0.5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x45,0xd6,0xf0,0xfa,0xc0,0x03] + +v_xad_u32 v5, src_scc, vcc_lo, -1 +// GFX12: encoding: [0x05,0x00,0x45,0xd6,0xfd,0xd4,0x04,0x03] + +v_xad_u32 v255, 0xaf123456, vcc_hi, null +// GFX12: encoding: [0xff,0x00,0x45,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_xor3_b32 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x40,0xd6,0x01,0x05,0x0e,0x00] + +v_xor3_b32 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x40,0xd6,0xff,0x05,0xa4,0x01] + +v_xor3_b32 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x40,0xd6,0x01,0xfe,0xff,0x01] + +v_xor3_b32 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x40,0xd6,0x69,0xd2,0xf8,0x01] + +v_xor3_b32 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x40,0xd6,0x6a,0xf6,0x0c,0x04] + +v_xor3_b32 v5, vcc_hi, 0xaf123456, v255 +// GFX12: encoding: [0x05,0x00,0x40,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_xor3_b32 v5, ttmp15, src_scc, ttmp15 +// GFX12: encoding: [0x05,0x00,0x40,0xd6,0x7b,0xfa,0xed,0x01] + +v_xor3_b32 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x40,0xd6,0x7d,0xe0,0xf5,0x01] + +v_xor3_b32 v5, exec_lo, -1, vcc_hi +// GFX12: encoding: [0x05,0x00,0x40,0xd6,0x7e,0x82,0xad,0x01] + +v_xor3_b32 v5, exec_hi, null, vcc_lo +// GFX12: encoding: [0x05,0x00,0x40,0xd6,0x7f,0xf8,0xa8,0x01] + +v_xor3_b32 v5, null, exec_lo, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x40,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_xor3_b32 v5, -1, exec_hi, src_scc +// GFX12: encoding: [0x05,0x00,0x40,0xd6,0xc1,0xfe,0xf4,0x03] + +v_xor3_b32 v5, 0.5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x40,0xd6,0xf0,0xfa,0xc0,0x03] + +v_xor3_b32 v5, src_scc, vcc_lo, -1 +// GFX12: encoding: [0x05,0x00,0x40,0xd6,0xfd,0xd4,0x04,0x03] + +v_xor3_b32 v255, 0xaf123456, vcc_hi, null +// GFX12: encoding: [0xff,0x00,0x40,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_xor_b16 v5, v1, v2 +// GFX12: encoding: [0x05,0x00,0x64,0xd7,0x01,0x05,0x02,0x00] + +v_xor_b16 v5, v255, v255 +// GFX12: encoding: [0x05,0x00,0x64,0xd7,0xff,0xff,0x03,0x00] + +v_xor_b16 v5, s1, s2 +// GFX12: encoding: [0x05,0x00,0x64,0xd7,0x01,0x04,0x00,0x00] + +v_xor_b16 v5, s105, s105 +// GFX12: encoding: [0x05,0x00,0x64,0xd7,0x69,0xd2,0x00,0x00] + +v_xor_b16 v5, vcc_lo, ttmp15 +// GFX12: encoding: [0x05,0x00,0x64,0xd7,0x6a,0xf6,0x00,0x00] + +v_xor_b16 v5, vcc_hi, 0xfe0b +// GFX12: encoding: [0x05,0x00,0x64,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_xor_b16 v5, ttmp15, src_scc +// GFX12: encoding: [0x05,0x00,0x64,0xd7,0x7b,0xfa,0x01,0x00] + +v_xor_b16 v5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x64,0xd7,0x7d,0xe0,0x01,0x00] + +v_xor_b16 v5, exec_lo, -1 +// GFX12: encoding: [0x05,0x00,0x64,0xd7,0x7e,0x82,0x01,0x00] + +v_xor_b16 v5, exec_hi, null +// GFX12: encoding: [0x05,0x00,0x64,0xd7,0x7f,0xf8,0x00,0x00] + +v_xor_b16 v5, null, exec_lo +// GFX12: encoding: [0x05,0x00,0x64,0xd7,0x7c,0xfc,0x00,0x00] + +v_xor_b16 v5, -1, exec_hi +// GFX12: encoding: [0x05,0x00,0x64,0xd7,0xc1,0xfe,0x00,0x00] + +v_xor_b16 v5, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x64,0xd7,0xf0,0xfa,0x00,0x00] + +v_xor_b16 v5, src_scc, vcc_lo +// GFX12: encoding: [0x05,0x00,0x64,0xd7,0xfd,0xd4,0x00,0x00] + +v_xor_b16 v255, 0xfe0b, vcc_hi +// GFX12: encoding: [0xff,0x00,0x64,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_minimum_f32 v5, v1, v2 +// GFX12: encoding: [0x05,0x00,0x65,0xd7,0x01,0x05,0x02,0x00] + +v_minimum_f32 v5, v255, v255 +// GFX12: encoding: [0x05,0x00,0x65,0xd7,0xff,0xff,0x03,0x00] + +v_minimum_f32 v5, s1, s2 +// GFX12: encoding: [0x05,0x00,0x65,0xd7,0x01,0x04,0x00,0x00] + +v_minimum_f32 v5, s105, s105 +// GFX12: encoding: [0x05,0x00,0x65,0xd7,0x69,0xd2,0x00,0x00] + +v_minimum_f32 v5, vcc_lo, ttmp15 +// GFX12: encoding: [0x05,0x00,0x65,0xd7,0x6a,0xf6,0x00,0x00] + +v_minimum_f32 v5, vcc_hi, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x65,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_minimum_f32 v5, ttmp15, src_scc +// GFX12: encoding: [0x05,0x00,0x65,0xd7,0x7b,0xfa,0x01,0x00] + +v_minimum_f32 v5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x65,0xd7,0x7d,0xe0,0x01,0x00] + +v_minimum_f32 v5, exec_lo, -1 +// GFX12: encoding: [0x05,0x00,0x65,0xd7,0x7e,0x82,0x01,0x00] + +v_minimum_f32 v5, |exec_hi|, null +// GFX12: encoding: [0x05,0x01,0x65,0xd7,0x7f,0xf8,0x00,0x00] + +v_minimum_f32 v5, null, exec_lo +// GFX12: encoding: [0x05,0x00,0x65,0xd7,0x7c,0xfc,0x00,0x00] + +v_minimum_f32 v5, -1, exec_hi +// GFX12: encoding: [0x05,0x00,0x65,0xd7,0xc1,0xfe,0x00,0x00] + +v_minimum_f32 v5, 0.5, -m0 +// GFX12: encoding: [0x05,0x00,0x65,0xd7,0xf0,0xfa,0x00,0x40] + +v_minimum_f32 v5, -src_scc, |vcc_lo| +// GFX12: encoding: [0x05,0x02,0x65,0xd7,0xfd,0xd4,0x00,0x20] + +v_minimum_f32 v255, -|0xaf123456|, -|vcc_hi| +// GFX12: encoding: [0xff,0x03,0x65,0xd7,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] + +v_maximum_f32 v5, v1, v2 +// GFX12: encoding: [0x05,0x00,0x66,0xd7,0x01,0x05,0x02,0x00] + +v_maximum_f32 v5, v255, v255 +// GFX12: encoding: [0x05,0x00,0x66,0xd7,0xff,0xff,0x03,0x00] + +v_maximum_f32 v5, s1, s2 +// GFX12: encoding: [0x05,0x00,0x66,0xd7,0x01,0x04,0x00,0x00] + +v_maximum_f32 v5, s105, s105 +// GFX12: encoding: [0x05,0x00,0x66,0xd7,0x69,0xd2,0x00,0x00] + +v_maximum_f32 v5, vcc_lo, ttmp15 +// GFX12: encoding: [0x05,0x00,0x66,0xd7,0x6a,0xf6,0x00,0x00] + +v_maximum_f32 v5, vcc_hi, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x66,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_maximum_f32 v5, ttmp15, src_scc +// GFX12: encoding: [0x05,0x00,0x66,0xd7,0x7b,0xfa,0x01,0x00] + +v_maximum_f32 v5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x66,0xd7,0x7d,0xe0,0x01,0x00] + +v_maximum_f32 v5, exec_lo, -1 +// GFX12: encoding: [0x05,0x00,0x66,0xd7,0x7e,0x82,0x01,0x00] + +v_maximum_f32 v5, |exec_hi|, null +// GFX12: encoding: [0x05,0x01,0x66,0xd7,0x7f,0xf8,0x00,0x00] + +v_maximum_f32 v5, null, exec_lo +// GFX12: encoding: [0x05,0x00,0x66,0xd7,0x7c,0xfc,0x00,0x00] + +v_maximum_f32 v5, -1, exec_hi +// GFX12: encoding: [0x05,0x00,0x66,0xd7,0xc1,0xfe,0x00,0x00] + +v_maximum_f32 v5, 0.5, -m0 +// GFX12: encoding: [0x05,0x00,0x66,0xd7,0xf0,0xfa,0x00,0x40] + +v_maximum_f32 v5, -src_scc, |vcc_lo| +// GFX12: encoding: [0x05,0x02,0x66,0xd7,0xfd,0xd4,0x00,0x20] + +v_maximum_f32 v255, -|0xaf123456|, -|vcc_hi| +// GFX12: encoding: [0xff,0x03,0x66,0xd7,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] + +v_minimum_f16 v5, v1, v2 +// GFX12: encoding: [0x05,0x00,0x67,0xd7,0x01,0x05,0x02,0x00] + +v_minimum_f16 v5, v255, v255 +// GFX12: encoding: [0x05,0x00,0x67,0xd7,0xff,0xff,0x03,0x00] + +v_minimum_f16 v5, s1, s2 +// GFX12: encoding: [0x05,0x00,0x67,0xd7,0x01,0x04,0x00,0x00] + +v_minimum_f16 v5, s105, s105 +// GFX12: encoding: [0x05,0x00,0x67,0xd7,0x69,0xd2,0x00,0x00] + +v_minimum_f16 v5, vcc_lo, ttmp15 +// GFX12: encoding: [0x05,0x00,0x67,0xd7,0x6a,0xf6,0x00,0x00] + +v_minimum_f16 v5, vcc_hi, 0xaf12 +// GFX12: encoding: [0x05,0x00,0x67,0xd7,0x6b,0xfe,0x01,0x00,0x12,0xaf,0x00,0x00] + +v_minimum_f16 v5, ttmp15, src_scc +// GFX12: encoding: [0x05,0x00,0x67,0xd7,0x7b,0xfa,0x01,0x00] + +v_minimum_f16 v5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x67,0xd7,0x7d,0xe0,0x01,0x00] + +v_minimum_f16 v5, exec_lo, -1 +// GFX12: encoding: [0x05,0x00,0x67,0xd7,0x7e,0x82,0x01,0x00] + +v_minimum_f16 v5, |exec_hi|, null +// GFX12: encoding: [0x05,0x01,0x67,0xd7,0x7f,0xf8,0x00,0x00] + +v_minimum_f16 v5, null, exec_lo +// GFX12: encoding: [0x05,0x00,0x67,0xd7,0x7c,0xfc,0x00,0x00] + +v_minimum_f16 v5, -1, exec_hi +// GFX12: encoding: [0x05,0x00,0x67,0xd7,0xc1,0xfe,0x00,0x00] + +v_minimum_f16 v5, 0.5, -m0 +// GFX12: encoding: [0x05,0x00,0x67,0xd7,0xf0,0xfa,0x00,0x40] + +v_minimum_f16 v5, -src_scc, |vcc_lo| +// GFX12: encoding: [0x05,0x02,0x67,0xd7,0xfd,0xd4,0x00,0x20] + +v_minimum_f16 v255, -|0xaf12|, -|vcc_hi| +// GFX12: encoding: [0xff,0x03,0x67,0xd7,0xff,0xd6,0x00,0x60,0x12,0xaf,0x00,0x00] + +v_minimum_f16 v205, v201, v200 +// GFX12: encoding: [0xcd,0x00,0x67,0xd7,0xc9,0x91,0x03,0x00] + +v_maximum_f16 v5, v1, v2 +// GFX12: encoding: [0x05,0x00,0x68,0xd7,0x01,0x05,0x02,0x00] + +v_maximum_f16 v5, v255, v255 +// GFX12: encoding: [0x05,0x00,0x68,0xd7,0xff,0xff,0x03,0x00] + +v_maximum_f16 v5, s1, s2 +// GFX12: encoding: [0x05,0x00,0x68,0xd7,0x01,0x04,0x00,0x00] + +v_maximum_f16 v5, s105, s105 +// GFX12: encoding: [0x05,0x00,0x68,0xd7,0x69,0xd2,0x00,0x00] + +v_maximum_f16 v5, vcc_lo, ttmp15 +// GFX12: encoding: [0x05,0x00,0x68,0xd7,0x6a,0xf6,0x00,0x00] + +v_maximum_f16 v5, vcc_hi, 0xaf12 +// GFX12: encoding: [0x05,0x00,0x68,0xd7,0x6b,0xfe,0x01,0x00,0x12,0xaf,0x00,0x00] + +v_maximum_f16 v5, ttmp15, src_scc +// GFX12: encoding: [0x05,0x00,0x68,0xd7,0x7b,0xfa,0x01,0x00] + +v_maximum_f16 v5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x68,0xd7,0x7d,0xe0,0x01,0x00] + +v_maximum_f16 v5, exec_lo, -1 +// GFX12: encoding: [0x05,0x00,0x68,0xd7,0x7e,0x82,0x01,0x00] + +v_maximum_f16 v5, |exec_hi|, null +// GFX12: encoding: [0x05,0x01,0x68,0xd7,0x7f,0xf8,0x00,0x00] + +v_maximum_f16 v5, null, exec_lo +// GFX12: encoding: [0x05,0x00,0x68,0xd7,0x7c,0xfc,0x00,0x00] + +v_maximum_f16 v5, -1, exec_hi +// GFX12: encoding: [0x05,0x00,0x68,0xd7,0xc1,0xfe,0x00,0x00] + +v_maximum_f16 v5, 0.5, -m0 +// GFX12: encoding: [0x05,0x00,0x68,0xd7,0xf0,0xfa,0x00,0x40] + +v_maximum_f16 v5, -src_scc, |vcc_lo| +// GFX12: encoding: [0x05,0x02,0x68,0xd7,0xfd,0xd4,0x00,0x20] + +v_maximum_f16 v255, -|0xaf12|, -|vcc_hi| +// GFX12: encoding: [0xff,0x03,0x68,0xd7,0xff,0xd6,0x00,0x60,0x12,0xaf,0x00,0x00] + +v_maximum_f16 v205, v201, v200 +// GFX12: encoding: [0xcd,0x00,0x68,0xd7,0xc9,0x91,0x03,0x00] + +v_minimum_f64 v[5:6], v[1:2], v[3:4] +// GFX12: encoding: [0x05,0x00,0x41,0xd7,0x01,0x07,0x02,0x00] + +v_minimum_f64 v[5:6], v[254:255], v[254:255] +// GFX12: encoding: [0x05,0x00,0x41,0xd7,0xfe,0xfd,0x03,0x00] + +v_minimum_f64 v[5:6], s[6:7], s[4:5] +// GFX12: encoding: [0x05,0x00,0x41,0xd7,0x06,0x08,0x00,0x00] + +v_minimum_f64 v[5:6], s[104:105], s[104:105] +// GFX12: encoding: [0x05,0x00,0x41,0xd7,0x68,0xd0,0x00,0x00] + +v_minimum_f64 v[5:6], vcc, ttmp[14:15] +// GFX12: encoding: [0x05,0x00,0x41,0xd7,0x6a,0xf4,0x00,0x00] + +v_minimum_f64 v[5:6], vcc, 0xaf121234 +// GFX12: encoding: [0x05,0x00,0x41,0xd7,0x6a,0xfe,0x01,0x00,0x34,0x12,0x12,0xaf] + +v_minimum_f64 v[5:6], ttmp[14:15], src_scc +// GFX12: encoding: [0x05,0x00,0x41,0xd7,0x7a,0xfa,0x01,0x00] + +v_minimum_f64 v[5:6], vcc, 0.5 +// GFX12: encoding: [0x05,0x00,0x41,0xd7,0x6a,0xe0,0x01,0x00] + +v_minimum_f64 v[5:6], exec, -1 +// GFX12: encoding: [0x05,0x00,0x41,0xd7,0x7e,0x82,0x01,0x00] + +v_minimum_f64 v[5:6], |exec|, null +// GFX12: encoding: [0x05,0x01,0x41,0xd7,0x7e,0xf8,0x00,0x00] + +v_minimum_f64 v[5:6], null, exec +// GFX12: encoding: [0x05,0x00,0x41,0xd7,0x7c,0xfc,0x00,0x00] + +v_minimum_f64 v[5:6], -1, exec +// GFX12: encoding: [0x05,0x00,0x41,0xd7,0xc1,0xfc,0x00,0x00] + +v_minimum_f64 v[5:6], 0.5, -vcc +// GFX12: encoding: [0x05,0x00,0x41,0xd7,0xf0,0xd4,0x00,0x40] + +v_minimum_f64 v[5:6], -src_scc, |vcc| +// GFX12: encoding: [0x05,0x02,0x41,0xd7,0xfd,0xd4,0x00,0x20] + +v_minimum_f64 v[254:255], -|2|, -|vcc| +// GFX12: encoding: [0xfe,0x03,0x41,0xd7,0x82,0xd4,0x00,0x60] + +v_maximum_f64 v[5:6], v[1:2], v[3:4] +// GFX12: encoding: [0x05,0x00,0x42,0xd7,0x01,0x07,0x02,0x00] + +v_maximum_f64 v[5:6], v[254:255], v[254:255] +// GFX12: encoding: [0x05,0x00,0x42,0xd7,0xfe,0xfd,0x03,0x00] + +v_maximum_f64 v[5:6], s[6:7], s[4:5] +// GFX12: encoding: [0x05,0x00,0x42,0xd7,0x06,0x08,0x00,0x00] + +v_maximum_f64 v[5:6], s[104:105], s[104:105] +// GFX12: encoding: [0x05,0x00,0x42,0xd7,0x68,0xd0,0x00,0x00] + +v_maximum_f64 v[5:6], vcc, ttmp[14:15] +// GFX12: encoding: [0x05,0x00,0x42,0xd7,0x6a,0xf4,0x00,0x00] + +v_maximum_f64 v[5:6], vcc, 0xaf121234 +// GFX12: encoding: [0x05,0x00,0x42,0xd7,0x6a,0xfe,0x01,0x00,0x34,0x12,0x12,0xaf] + +v_maximum_f64 v[5:6], ttmp[14:15], src_scc +// GFX12: encoding: [0x05,0x00,0x42,0xd7,0x7a,0xfa,0x01,0x00] + +v_maximum_f64 v[5:6], vcc, 0.5 +// GFX12: encoding: [0x05,0x00,0x42,0xd7,0x6a,0xe0,0x01,0x00] + +v_maximum_f64 v[5:6], exec, -1 +// GFX12: encoding: [0x05,0x00,0x42,0xd7,0x7e,0x82,0x01,0x00] + +v_maximum_f64 v[5:6], |exec|, null +// GFX12: encoding: [0x05,0x01,0x42,0xd7,0x7e,0xf8,0x00,0x00] + +v_maximum_f64 v[5:6], null, exec +// GFX12: encoding: [0x05,0x00,0x42,0xd7,0x7c,0xfc,0x00,0x00] + +v_maximum_f64 v[5:6], -1, exec +// GFX12: encoding: [0x05,0x00,0x42,0xd7,0xc1,0xfc,0x00,0x00] + +v_maximum_f64 v[5:6], 0.5, -vcc +// GFX12: encoding: [0x05,0x00,0x42,0xd7,0xf0,0xd4,0x00,0x40] + +v_maximum_f64 v[5:6], -src_scc, |vcc| +// GFX12: encoding: [0x05,0x02,0x42,0xd7,0xfd,0xd4,0x00,0x20] + +v_maximum_f64 v[254:255], -|2|, -|vcc| +// GFX12: encoding: [0xfe,0x03,0x42,0xd7,0x82,0xd4,0x00,0x60] + +v_minimum3_f32 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x2d,0xd6,0x01,0x05,0x0e,0x00] + +v_minimum3_f32 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x2d,0xd6,0xff,0x05,0xa4,0x01] + +v_minimum3_f32 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x2d,0xd6,0x01,0xfe,0xff,0x01] + +v_minimum3_f32 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x2d,0xd6,0x69,0xd2,0xf8,0x01] + +v_minimum3_f32 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x2d,0xd6,0x6a,0xf6,0x0c,0x04] + +v_minimum3_f32 v5, vcc_hi, 0xaf123456, v255 +// GFX12: encoding: [0x05,0x00,0x2d,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_minimum3_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX12: encoding: [0x05,0x07,0x2d,0xd6,0x7b,0xfa,0xed,0xe1] + +v_minimum3_f32 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x2d,0xd6,0x7d,0xe0,0xf5,0x01] + +v_minimum3_f32 v5, |exec_lo|, -1, vcc_hi +// GFX12: encoding: [0x05,0x01,0x2d,0xd6,0x7e,0x82,0xad,0x01] + +v_minimum3_f32 v5, -|exec_hi|, null, -|vcc_lo| +// GFX12: encoding: [0x05,0x05,0x2d,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_minimum3_f32 v5, null, exec_lo, -|0xaf123456| +// GFX12: encoding: [0x05,0x04,0x2d,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] + +v_minimum3_f32 v5, -1, -|exec_hi|, -|src_scc| +// GFX12: encoding: [0x05,0x06,0x2d,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_minimum3_f32 v5, 0.5, -m0, 0.5 mul:2 +// GFX12: encoding: [0x05,0x00,0x2d,0xd6,0xf0,0xfa,0xc0,0x4b] + +v_minimum3_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 +// GFX12: encoding: [0x05,0x02,0x2d,0xd6,0xfd,0xd4,0x04,0x33] + +v_minimum3_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 +// GFX12: encoding: [0xff,0x83,0x2d,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] + +v_maximum3_f32 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x2e,0xd6,0x01,0x05,0x0e,0x00] + +v_maximum3_f32 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x2e,0xd6,0xff,0x05,0xa4,0x01] + +v_maximum3_f32 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x2e,0xd6,0x01,0xfe,0xff,0x01] + +v_maximum3_f32 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x2e,0xd6,0x69,0xd2,0xf8,0x01] + +v_maximum3_f32 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x2e,0xd6,0x6a,0xf6,0x0c,0x04] + +v_maximum3_f32 v5, vcc_hi, 0xaf123456, v255 +// GFX12: encoding: [0x05,0x00,0x2e,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_maximum3_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX12: encoding: [0x05,0x07,0x2e,0xd6,0x7b,0xfa,0xed,0xe1] + +v_maximum3_f32 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x2e,0xd6,0x7d,0xe0,0xf5,0x01] + +v_maximum3_f32 v5, |exec_lo|, -1, vcc_hi +// GFX12: encoding: [0x05,0x01,0x2e,0xd6,0x7e,0x82,0xad,0x01] + +v_maximum3_f32 v5, -|exec_hi|, null, -|vcc_lo| +// GFX12: encoding: [0x05,0x05,0x2e,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_maximum3_f32 v5, null, exec_lo, -|0xaf123456| +// GFX12: encoding: [0x05,0x04,0x2e,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] + +v_maximum3_f32 v5, -1, -|exec_hi|, -|src_scc| +// GFX12: encoding: [0x05,0x06,0x2e,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_maximum3_f32 v5, 0.5, -m0, 0.5 mul:2 +// GFX12: encoding: [0x05,0x00,0x2e,0xd6,0xf0,0xfa,0xc0,0x4b] + +v_maximum3_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 +// GFX12: encoding: [0x05,0x02,0x2e,0xd6,0xfd,0xd4,0x04,0x33] + +v_maximum3_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 +// GFX12: encoding: [0xff,0x83,0x2e,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] + +v_minimum3_f16 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x2f,0xd6,0x01,0x05,0x0e,0x00] + +v_minimum3_f16 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x2f,0xd6,0xff,0x05,0xa4,0x01] + +v_minimum3_f16 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x2f,0xd6,0x01,0xfe,0xff,0x01] + +v_minimum3_f16 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x2f,0xd6,0x69,0xd2,0xf8,0x01] + +v_minimum3_f16 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x2f,0xd6,0x6a,0xf6,0x0c,0x04] + +v_minimum3_f16 v5, vcc_hi, 0xaf12, v255 +// GFX12: encoding: [0x05,0x00,0x2f,0xd6,0x6b,0xfe,0xfd,0x07,0x12,0xaf,0x00,0x00] + +v_minimum3_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX12: encoding: [0x05,0x07,0x2f,0xd6,0x7b,0xfa,0xed,0xe1] + +v_minimum3_f16 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x2f,0xd6,0x7d,0xe0,0xf5,0x01] + +v_minimum3_f16 v5, |exec_lo|, -1, vcc_hi +// GFX12: encoding: [0x05,0x01,0x2f,0xd6,0x7e,0x82,0xad,0x01] + +v_minimum3_f16 v5, -|exec_hi|, null, -|vcc_lo| +// GFX12: encoding: [0x05,0x05,0x2f,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_minimum3_f16 v5, null, exec_lo, -|0xaf12| +// GFX12: encoding: [0x05,0x04,0x2f,0xd6,0x7c,0xfc,0xfc,0x83,0x12,0xaf,0x00,0x00] + +v_minimum3_f16 v5, -1, -|exec_hi|, -|src_scc| +// GFX12: encoding: [0x05,0x06,0x2f,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_minimum3_f16 v5, 0.5, -m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x2f,0xd6,0xf0,0xfa,0xc0,0x43] + +v_minimum3_f16 v5, -src_scc, |vcc_lo|, -1 +// GFX12: encoding: [0x05,0x02,0x2f,0xd6,0xfd,0xd4,0x04,0x23] + +v_minimum3_f16 v255, -|0xaf12|, -|vcc_hi|, null clamp +// GFX12: encoding: [0xff,0x83,0x2f,0xd6,0xff,0xd6,0xf0,0x61,0x12,0xaf,0x00,0x00] + +v_maximum3_f16 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x30,0xd6,0x01,0x05,0x0e,0x00] + +v_maximum3_f16 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x30,0xd6,0xff,0x05,0xa4,0x01] + +v_maximum3_f16 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x30,0xd6,0x01,0xfe,0xff,0x01] + +v_maximum3_f16 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x30,0xd6,0x69,0xd2,0xf8,0x01] + +v_maximum3_f16 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x30,0xd6,0x6a,0xf6,0x0c,0x04] + +v_maximum3_f16 v5, vcc_hi, 0xaf12, v255 +// GFX12: encoding: [0x05,0x00,0x30,0xd6,0x6b,0xfe,0xfd,0x07,0x12,0xaf,0x00,0x00] + +v_maximum3_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX12: encoding: [0x05,0x07,0x30,0xd6,0x7b,0xfa,0xed,0xe1] + +v_maximum3_f16 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x30,0xd6,0x7d,0xe0,0xf5,0x01] + +v_maximum3_f16 v5, |exec_lo|, -1, vcc_hi +// GFX12: encoding: [0x05,0x01,0x30,0xd6,0x7e,0x82,0xad,0x01] + +v_maximum3_f16 v5, -|exec_hi|, null, -|vcc_lo| +// GFX12: encoding: [0x05,0x05,0x30,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_maximum3_f16 v5, null, exec_lo, -|0xaf12| +// GFX12: encoding: [0x05,0x04,0x30,0xd6,0x7c,0xfc,0xfc,0x83,0x12,0xaf,0x00,0x00] + +v_maximum3_f16 v5, -1, -|exec_hi|, -|src_scc| +// GFX12: encoding: [0x05,0x06,0x30,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_maximum3_f16 v5, 0.5, -m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x30,0xd6,0xf0,0xfa,0xc0,0x43] + +v_maximum3_f16 v5, -src_scc, |vcc_lo|, -1 +// GFX12: encoding: [0x05,0x02,0x30,0xd6,0xfd,0xd4,0x04,0x23] + +v_maximumminimum_f32 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x6d,0xd6,0x01,0x05,0x0e,0x00] + +v_maximumminimum_f32 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x6d,0xd6,0xff,0x05,0xa4,0x01] + +v_maximumminimum_f32 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x6d,0xd6,0x01,0xfe,0xff,0x01] + +v_maximumminimum_f32 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x6d,0xd6,0x69,0xd2,0xf8,0x01] + +v_maximumminimum_f32 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x6d,0xd6,0x6a,0xf6,0x0c,0x04] + +v_maximumminimum_f32 v5, vcc_hi, 0xaf123456, v255 +// GFX12: encoding: [0x05,0x00,0x6d,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_maximumminimum_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX12: encoding: [0x05,0x07,0x6d,0xd6,0x7b,0xfa,0xed,0xe1] + +v_maximumminimum_f32 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x6d,0xd6,0x7d,0xe0,0xf5,0x01] + +v_maximumminimum_f32 v5, |exec_lo|, -1, vcc_hi +// GFX12: encoding: [0x05,0x01,0x6d,0xd6,0x7e,0x82,0xad,0x01] + +v_maximumminimum_f32 v5, -|exec_hi|, null, -|vcc_lo| +// GFX12: encoding: [0x05,0x05,0x6d,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_maximumminimum_f32 v5, null, exec_lo, -|0xaf123456| +// GFX12: encoding: [0x05,0x04,0x6d,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] + +v_maximumminimum_f32 v5, -1, -|exec_hi|, -|src_scc| +// GFX12: encoding: [0x05,0x06,0x6d,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_maximumminimum_f32 v5, 0.5, -m0, 0.5 mul:2 +// GFX12: encoding: [0x05,0x00,0x6d,0xd6,0xf0,0xfa,0xc0,0x4b] + +v_maximumminimum_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 +// GFX12: encoding: [0x05,0x02,0x6d,0xd6,0xfd,0xd4,0x04,0x33] + +v_maximumminimum_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 +// GFX12: encoding: [0xff,0x83,0x6d,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] + +v_minimummaximum_f32 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x6c,0xd6,0x01,0x05,0x0e,0x00] + +v_minimummaximum_f32 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x6c,0xd6,0xff,0x05,0xa4,0x01] + +v_minimummaximum_f32 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x6c,0xd6,0x01,0xfe,0xff,0x01] + +v_minimummaximum_f32 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x6c,0xd6,0x69,0xd2,0xf8,0x01] + +v_minimummaximum_f32 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x6c,0xd6,0x6a,0xf6,0x0c,0x04] + +v_minimummaximum_f32 v5, vcc_hi, 0xaf123456, v255 +// GFX12: encoding: [0x05,0x00,0x6c,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_minimummaximum_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX12: encoding: [0x05,0x07,0x6c,0xd6,0x7b,0xfa,0xed,0xe1] + +v_minimummaximum_f32 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x6c,0xd6,0x7d,0xe0,0xf5,0x01] + +v_minimummaximum_f32 v5, |exec_lo|, -1, vcc_hi +// GFX12: encoding: [0x05,0x01,0x6c,0xd6,0x7e,0x82,0xad,0x01] + +v_minimummaximum_f32 v5, -|exec_hi|, null, -|vcc_lo| +// GFX12: encoding: [0x05,0x05,0x6c,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_minimummaximum_f32 v5, null, exec_lo, -|0xaf123456| +// GFX12: encoding: [0x05,0x04,0x6c,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] + +v_minimummaximum_f32 v5, -1, -|exec_hi|, -|src_scc| +// GFX12: encoding: [0x05,0x06,0x6c,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_minimummaximum_f32 v5, 0.5, -m0, 0.5 mul:2 +// GFX12: encoding: [0x05,0x00,0x6c,0xd6,0xf0,0xfa,0xc0,0x4b] + +v_minimummaximum_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 +// GFX12: encoding: [0x05,0x02,0x6c,0xd6,0xfd,0xd4,0x04,0x33] + +v_minimummaximum_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 +// GFX12: encoding: [0xff,0x83,0x6c,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] + +v_maximumminimum_f16 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x6f,0xd6,0x01,0x05,0x0e,0x00] + +v_maximumminimum_f16 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x6f,0xd6,0xff,0x05,0xa4,0x01] + +v_maximumminimum_f16 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x6f,0xd6,0x01,0xfe,0xff,0x01] + +v_maximumminimum_f16 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x6f,0xd6,0x69,0xd2,0xf8,0x01] + +v_maximumminimum_f16 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x6f,0xd6,0x6a,0xf6,0x0c,0x04] + +v_maximumminimum_f16 v5, vcc_hi, 0xaf12, v255 +// GFX12: encoding: [0x05,0x00,0x6f,0xd6,0x6b,0xfe,0xfd,0x07,0x12,0xaf,0x00,0x00] + +v_maximumminimum_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX12: encoding: [0x05,0x07,0x6f,0xd6,0x7b,0xfa,0xed,0xe1] + +v_maximumminimum_f16 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x6f,0xd6,0x7d,0xe0,0xf5,0x01] + +v_maximumminimum_f16 v5, |exec_lo|, -1, vcc_hi +// GFX12: encoding: [0x05,0x01,0x6f,0xd6,0x7e,0x82,0xad,0x01] + +v_maximumminimum_f16 v5, -|exec_hi|, null, -|vcc_lo| +// GFX12: encoding: [0x05,0x05,0x6f,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_maximumminimum_f16 v5, null, exec_lo, -|0xaf12| +// GFX12: encoding: [0x05,0x04,0x6f,0xd6,0x7c,0xfc,0xfc,0x83,0x12,0xaf,0x00,0x00] + +v_maximumminimum_f16 v5, -1, -|exec_hi|, -|src_scc| +// GFX12: encoding: [0x05,0x06,0x6f,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_maximumminimum_f16 v5, 0.5, -m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x6f,0xd6,0xf0,0xfa,0xc0,0x43] + +v_maximumminimum_f16 v5, -src_scc, |vcc_lo|, -1 +// GFX12: encoding: [0x05,0x02,0x6f,0xd6,0xfd,0xd4,0x04,0x23] + +v_maximumminimum_f16 v255, -|0xaf12|, -|vcc_hi|, null clamp +// GFX12: encoding: [0xff,0x83,0x6f,0xd6,0xff,0xd6,0xf0,0x61,0x12,0xaf,0x00,0x00] + +v_minimummaximum_f16 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x6e,0xd6,0x01,0x05,0x0e,0x00] + +v_minimummaximum_f16 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x6e,0xd6,0xff,0x05,0xa4,0x01] + +v_minimummaximum_f16 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x6e,0xd6,0x01,0xfe,0xff,0x01] + +v_minimummaximum_f16 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x6e,0xd6,0x69,0xd2,0xf8,0x01] + +v_minimummaximum_f16 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x6e,0xd6,0x6a,0xf6,0x0c,0x04] + +v_minimummaximum_f16 v5, vcc_hi, 0xaf12, v255 +// GFX12: encoding: [0x05,0x00,0x6e,0xd6,0x6b,0xfe,0xfd,0x07,0x12,0xaf,0x00,0x00] + +v_minimummaximum_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX12: encoding: [0x05,0x07,0x6e,0xd6,0x7b,0xfa,0xed,0xe1] + +v_minimummaximum_f16 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x6e,0xd6,0x7d,0xe0,0xf5,0x01] + +v_minimummaximum_f16 v5, |exec_lo|, -1, vcc_hi +// GFX12: encoding: [0x05,0x01,0x6e,0xd6,0x7e,0x82,0xad,0x01] + +v_minimummaximum_f16 v5, -|exec_hi|, null, -|vcc_lo| +// GFX12: encoding: [0x05,0x05,0x6e,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_minimummaximum_f16 v5, null, exec_lo, -|0xaf12| +// GFX12: encoding: [0x05,0x04,0x6e,0xd6,0x7c,0xfc,0xfc,0x83,0x12,0xaf,0x00,0x00] + +v_minimummaximum_f16 v5, -1, -|exec_hi|, -|src_scc| +// GFX12: encoding: [0x05,0x06,0x6e,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_minimummaximum_f16 v5, 0.5, -m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x6e,0xd6,0xf0,0xfa,0xc0,0x43] + +v_minimummaximum_f16 v5, -src_scc, |vcc_lo|, -1 +// GFX12: encoding: [0x05,0x02,0x6e,0xd6,0xfd,0xd4,0x04,0x23] + +v_s_exp_f32 s5, s1 +// GFX12: encoding: [0x05,0x00,0x80,0xd6,0x01,0x00,0x00,0x00] + +v_s_exp_f32 s5, s105 +// GFX12: encoding: [0x05,0x00,0x80,0xd6,0x69,0x00,0x00,0x00] + +v_s_exp_f32 s5, vcc_lo +// GFX12: encoding: [0x05,0x00,0x80,0xd6,0x6a,0x00,0x00,0x00] + +v_s_exp_f32 s5, vcc_hi +// GFX12: encoding: [0x05,0x00,0x80,0xd6,0x6b,0x00,0x00,0x00] + +v_s_exp_f32 s5, ttmp15 +// GFX12: encoding: [0x05,0x00,0x80,0xd6,0x7b,0x00,0x00,0x00] + +v_s_exp_f32 s5, m0 +// GFX12: encoding: [0x05,0x00,0x80,0xd6,0x7d,0x00,0x00,0x00] + +v_s_exp_f32 s5, exec_lo +// GFX12: encoding: [0x05,0x00,0x80,0xd6,0x7e,0x00,0x00,0x00] + +v_s_exp_f32 s5, exec_hi +// GFX12: encoding: [0x05,0x00,0x80,0xd6,0x7f,0x00,0x00,0x00] + +v_s_exp_f32 s5, null +// GFX12: encoding: [0x05,0x00,0x80,0xd6,0x7c,0x00,0x00,0x00] + +v_s_exp_f32 s5, -1 +// GFX12: encoding: [0x05,0x00,0x80,0xd6,0xc1,0x00,0x00,0x00] + +v_s_exp_f32 s5, 0.5 +// GFX12: encoding: [0x05,0x00,0x80,0xd6,0xf0,0x00,0x00,0x00] + +v_s_exp_f32 s5, src_scc +// GFX12: encoding: [0x05,0x00,0x80,0xd6,0xfd,0x00,0x00,0x00] + +v_s_exp_f32 s105, 0xaf123456 +// GFX12: encoding: [0x69,0x00,0x80,0xd6,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_s_exp_f32 s5, -s1 +// GFX12: encoding: [0x05,0x00,0x80,0xd6,0x01,0x00,0x00,0x20] + +v_s_exp_f32 s5, |s1| +// GFX12: encoding: [0x05,0x01,0x80,0xd6,0x01,0x00,0x00,0x00] + +v_s_exp_f32 s5, s1 clamp +// GFX12: encoding: [0x05,0x80,0x80,0xd6,0x01,0x00,0x00,0x00] + +v_s_exp_f32 s5, s1 mul:2 +// GFX12: encoding: [0x05,0x00,0x80,0xd6,0x01,0x00,0x00,0x08] + +v_s_exp_f32 s5, s1 mul:4 +// GFX12: encoding: [0x05,0x00,0x80,0xd6,0x01,0x00,0x00,0x10] + +v_s_exp_f32 s5, s1 div:2 +// GFX12: encoding: [0x05,0x00,0x80,0xd6,0x01,0x00,0x00,0x18] + +v_s_exp_f16 s5, s1 +// GFX12: encoding: [0x05,0x00,0x81,0xd6,0x01,0x00,0x00,0x00] + +v_s_exp_f16 s5, s105 +// GFX12: encoding: [0x05,0x00,0x81,0xd6,0x69,0x00,0x00,0x00] + +v_s_exp_f16 s5, vcc_lo +// GFX12: encoding: [0x05,0x00,0x81,0xd6,0x6a,0x00,0x00,0x00] + +v_s_exp_f16 s5, vcc_hi +// GFX12: encoding: [0x05,0x00,0x81,0xd6,0x6b,0x00,0x00,0x00] + +v_s_exp_f16 s5, ttmp15 +// GFX12: encoding: [0x05,0x00,0x81,0xd6,0x7b,0x00,0x00,0x00] + +v_s_exp_f16 s5, m0 +// GFX12: encoding: [0x05,0x00,0x81,0xd6,0x7d,0x00,0x00,0x00] + +v_s_exp_f16 s5, exec_lo +// GFX12: encoding: [0x05,0x00,0x81,0xd6,0x7e,0x00,0x00,0x00] + +v_s_exp_f16 s5, exec_hi +// GFX12: encoding: [0x05,0x00,0x81,0xd6,0x7f,0x00,0x00,0x00] + +v_s_exp_f16 s5, null +// GFX12: encoding: [0x05,0x00,0x81,0xd6,0x7c,0x00,0x00,0x00] + +v_s_exp_f16 s5, -1 +// GFX12: encoding: [0x05,0x00,0x81,0xd6,0xc1,0x00,0x00,0x00] + +v_s_exp_f16 s5, 0.5 +// GFX12: encoding: [0x05,0x00,0x81,0xd6,0xf0,0x00,0x00,0x00] + +v_s_exp_f16 s5, src_scc +// GFX12: encoding: [0x05,0x00,0x81,0xd6,0xfd,0x00,0x00,0x00] + +v_s_exp_f16 s105, 0xaf12 +// GFX12: encoding: [0x69,0x00,0x81,0xd6,0xff,0x00,0x00,0x00,0x12,0xaf,0x00,0x00] + +v_s_exp_f16 s5, -s1 +// GFX12: encoding: [0x05,0x00,0x81,0xd6,0x01,0x00,0x00,0x20] + +v_s_exp_f16 s5, |s1| +// GFX12: encoding: [0x05,0x01,0x81,0xd6,0x01,0x00,0x00,0x00] + +v_s_exp_f16 s5, s1 clamp +// GFX12: encoding: [0x05,0x80,0x81,0xd6,0x01,0x00,0x00,0x00] + +v_s_exp_f16 s5, s1 mul:2 +// GFX12: encoding: [0x05,0x00,0x81,0xd6,0x01,0x00,0x00,0x08] + +v_s_exp_f16 s5, s1 mul:4 +// GFX12: encoding: [0x05,0x00,0x81,0xd6,0x01,0x00,0x00,0x10] + +v_s_exp_f16 s5, s1 div:2 +// GFX12: encoding: [0x05,0x00,0x81,0xd6,0x01,0x00,0x00,0x18] + +v_s_log_f32 s5, s1 +// GFX12: encoding: [0x05,0x00,0x82,0xd6,0x01,0x00,0x00,0x00] + +v_s_log_f32 s5, s105 +// GFX12: encoding: [0x05,0x00,0x82,0xd6,0x69,0x00,0x00,0x00] + +v_s_log_f32 s5, vcc_lo +// GFX12: encoding: [0x05,0x00,0x82,0xd6,0x6a,0x00,0x00,0x00] + +v_s_log_f32 s5, vcc_hi +// GFX12: encoding: [0x05,0x00,0x82,0xd6,0x6b,0x00,0x00,0x00] + +v_s_log_f32 s5, ttmp15 +// GFX12: encoding: [0x05,0x00,0x82,0xd6,0x7b,0x00,0x00,0x00] + +v_s_log_f32 s5, m0 +// GFX12: encoding: [0x05,0x00,0x82,0xd6,0x7d,0x00,0x00,0x00] + +v_s_log_f32 s5, exec_lo +// GFX12: encoding: [0x05,0x00,0x82,0xd6,0x7e,0x00,0x00,0x00] + +v_s_log_f32 s5, exec_hi +// GFX12: encoding: [0x05,0x00,0x82,0xd6,0x7f,0x00,0x00,0x00] + +v_s_log_f32 s5, null +// GFX12: encoding: [0x05,0x00,0x82,0xd6,0x7c,0x00,0x00,0x00] + +v_s_log_f32 s5, -1 +// GFX12: encoding: [0x05,0x00,0x82,0xd6,0xc1,0x00,0x00,0x00] + +v_s_log_f32 s5, 0.5 +// GFX12: encoding: [0x05,0x00,0x82,0xd6,0xf0,0x00,0x00,0x00] + +v_s_log_f32 s5, src_scc +// GFX12: encoding: [0x05,0x00,0x82,0xd6,0xfd,0x00,0x00,0x00] + +v_s_log_f32 s105, 0xaf123456 +// GFX12: encoding: [0x69,0x00,0x82,0xd6,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_s_log_f32 s5, -s1 +// GFX12: encoding: [0x05,0x00,0x82,0xd6,0x01,0x00,0x00,0x20] + +v_s_log_f32 s5, |s1| +// GFX12: encoding: [0x05,0x01,0x82,0xd6,0x01,0x00,0x00,0x00] + +v_s_log_f32 s5, s1 clamp +// GFX12: encoding: [0x05,0x80,0x82,0xd6,0x01,0x00,0x00,0x00] + +v_s_log_f32 s5, s1 mul:2 +// GFX12: encoding: [0x05,0x00,0x82,0xd6,0x01,0x00,0x00,0x08] + +v_s_log_f32 s5, s1 mul:4 +// GFX12: encoding: [0x05,0x00,0x82,0xd6,0x01,0x00,0x00,0x10] + +v_s_log_f32 s5, s1 div:2 +// GFX12: encoding: [0x05,0x00,0x82,0xd6,0x01,0x00,0x00,0x18] + +v_s_log_f16 s5, s1 +// GFX12: encoding: [0x05,0x00,0x83,0xd6,0x01,0x00,0x00,0x00] + +v_s_log_f16 s5, s105 +// GFX12: encoding: [0x05,0x00,0x83,0xd6,0x69,0x00,0x00,0x00] + +v_s_log_f16 s5, vcc_lo +// GFX12: encoding: [0x05,0x00,0x83,0xd6,0x6a,0x00,0x00,0x00] + +v_s_log_f16 s5, vcc_hi +// GFX12: encoding: [0x05,0x00,0x83,0xd6,0x6b,0x00,0x00,0x00] + +v_s_log_f16 s5, ttmp15 +// GFX12: encoding: [0x05,0x00,0x83,0xd6,0x7b,0x00,0x00,0x00] + +v_s_log_f16 s5, m0 +// GFX12: encoding: [0x05,0x00,0x83,0xd6,0x7d,0x00,0x00,0x00] + +v_s_log_f16 s5, exec_lo +// GFX12: encoding: [0x05,0x00,0x83,0xd6,0x7e,0x00,0x00,0x00] + +v_s_log_f16 s5, exec_hi +// GFX12: encoding: [0x05,0x00,0x83,0xd6,0x7f,0x00,0x00,0x00] + +v_s_log_f16 s5, null +// GFX12: encoding: [0x05,0x00,0x83,0xd6,0x7c,0x00,0x00,0x00] + +v_s_log_f16 s5, -1 +// GFX12: encoding: [0x05,0x00,0x83,0xd6,0xc1,0x00,0x00,0x00] + +v_s_log_f16 s5, 0.5 +// GFX12: encoding: [0x05,0x00,0x83,0xd6,0xf0,0x00,0x00,0x00] + +v_s_log_f16 s5, src_scc +// GFX12: encoding: [0x05,0x00,0x83,0xd6,0xfd,0x00,0x00,0x00] + +v_s_log_f16 s105, 0xaf12 +// GFX12: encoding: [0x69,0x00,0x83,0xd6,0xff,0x00,0x00,0x00,0x12,0xaf,0x00,0x00] + +v_s_log_f16 s5, -s1 +// GFX12: encoding: [0x05,0x00,0x83,0xd6,0x01,0x00,0x00,0x20] + +v_s_log_f16 s5, |s1| +// GFX12: encoding: [0x05,0x01,0x83,0xd6,0x01,0x00,0x00,0x00] + +v_s_log_f16 s5, s1 clamp +// GFX12: encoding: [0x05,0x80,0x83,0xd6,0x01,0x00,0x00,0x00] + +v_s_log_f16 s5, s1 mul:2 +// GFX12: encoding: [0x05,0x00,0x83,0xd6,0x01,0x00,0x00,0x08] + +v_s_log_f16 s5, s1 mul:4 +// GFX12: encoding: [0x05,0x00,0x83,0xd6,0x01,0x00,0x00,0x10] + +v_s_log_f16 s5, s1 div:2 +// GFX12: encoding: [0x05,0x00,0x83,0xd6,0x01,0x00,0x00,0x18] + +v_s_rcp_f32 s5, s1 +// GFX12: encoding: [0x05,0x00,0x84,0xd6,0x01,0x00,0x00,0x00] + +v_s_rcp_f32 s5, s105 +// GFX12: encoding: [0x05,0x00,0x84,0xd6,0x69,0x00,0x00,0x00] + +v_s_rcp_f32 s5, vcc_lo +// GFX12: encoding: [0x05,0x00,0x84,0xd6,0x6a,0x00,0x00,0x00] + +v_s_rcp_f32 s5, vcc_hi +// GFX12: encoding: [0x05,0x00,0x84,0xd6,0x6b,0x00,0x00,0x00] + +v_s_rcp_f32 s5, ttmp15 +// GFX12: encoding: [0x05,0x00,0x84,0xd6,0x7b,0x00,0x00,0x00] + +v_s_rcp_f32 s5, m0 +// GFX12: encoding: [0x05,0x00,0x84,0xd6,0x7d,0x00,0x00,0x00] + +v_s_rcp_f32 s5, exec_lo +// GFX12: encoding: [0x05,0x00,0x84,0xd6,0x7e,0x00,0x00,0x00] + +v_s_rcp_f32 s5, exec_hi +// GFX12: encoding: [0x05,0x00,0x84,0xd6,0x7f,0x00,0x00,0x00] + +v_s_rcp_f32 s5, null +// GFX12: encoding: [0x05,0x00,0x84,0xd6,0x7c,0x00,0x00,0x00] + +v_s_rcp_f32 s5, -1 +// GFX12: encoding: [0x05,0x00,0x84,0xd6,0xc1,0x00,0x00,0x00] + +v_s_rcp_f32 s5, 0.5 +// GFX12: encoding: [0x05,0x00,0x84,0xd6,0xf0,0x00,0x00,0x00] + +v_s_rcp_f32 s5, src_scc +// GFX12: encoding: [0x05,0x00,0x84,0xd6,0xfd,0x00,0x00,0x00] + +v_s_rcp_f32 s105, 0xaf123456 +// GFX12: encoding: [0x69,0x00,0x84,0xd6,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_s_rcp_f32 s5, -s1 +// GFX12: encoding: [0x05,0x00,0x84,0xd6,0x01,0x00,0x00,0x20] + +v_s_rcp_f32 s5, |s1| +// GFX12: encoding: [0x05,0x01,0x84,0xd6,0x01,0x00,0x00,0x00] + +v_s_rcp_f32 s5, s1 clamp +// GFX12: encoding: [0x05,0x80,0x84,0xd6,0x01,0x00,0x00,0x00] + +v_s_rcp_f32 s5, s1 mul:2 +// GFX12: encoding: [0x05,0x00,0x84,0xd6,0x01,0x00,0x00,0x08] + +v_s_rcp_f32 s5, s1 mul:4 +// GFX12: encoding: [0x05,0x00,0x84,0xd6,0x01,0x00,0x00,0x10] + +v_s_rcp_f32 s5, s1 div:2 +// GFX12: encoding: [0x05,0x00,0x84,0xd6,0x01,0x00,0x00,0x18] + +v_s_rcp_f16 s5, s1 +// GFX12: encoding: [0x05,0x00,0x85,0xd6,0x01,0x00,0x00,0x00] + +v_s_rcp_f16 s5, s105 +// GFX12: encoding: [0x05,0x00,0x85,0xd6,0x69,0x00,0x00,0x00] + +v_s_rcp_f16 s5, vcc_lo +// GFX12: encoding: [0x05,0x00,0x85,0xd6,0x6a,0x00,0x00,0x00] + +v_s_rcp_f16 s5, vcc_hi +// GFX12: encoding: [0x05,0x00,0x85,0xd6,0x6b,0x00,0x00,0x00] + +v_s_rcp_f16 s5, ttmp15 +// GFX12: encoding: [0x05,0x00,0x85,0xd6,0x7b,0x00,0x00,0x00] + +v_s_rcp_f16 s5, m0 +// GFX12: encoding: [0x05,0x00,0x85,0xd6,0x7d,0x00,0x00,0x00] + +v_s_rcp_f16 s5, exec_lo +// GFX12: encoding: [0x05,0x00,0x85,0xd6,0x7e,0x00,0x00,0x00] + +v_s_rcp_f16 s5, exec_hi +// GFX12: encoding: [0x05,0x00,0x85,0xd6,0x7f,0x00,0x00,0x00] + +v_s_rcp_f16 s5, null +// GFX12: encoding: [0x05,0x00,0x85,0xd6,0x7c,0x00,0x00,0x00] + +v_s_rcp_f16 s5, -1 +// GFX12: encoding: [0x05,0x00,0x85,0xd6,0xc1,0x00,0x00,0x00] + +v_s_rcp_f16 s5, 0.5 +// GFX12: encoding: [0x05,0x00,0x85,0xd6,0xf0,0x00,0x00,0x00] + +v_s_rcp_f16 s5, src_scc +// GFX12: encoding: [0x05,0x00,0x85,0xd6,0xfd,0x00,0x00,0x00] + +v_s_rcp_f16 s105, 0xaf12 +// GFX12: encoding: [0x69,0x00,0x85,0xd6,0xff,0x00,0x00,0x00,0x12,0xaf,0x00,0x00] + +v_s_rcp_f16 s5, -s1 +// GFX12: encoding: [0x05,0x00,0x85,0xd6,0x01,0x00,0x00,0x20] + +v_s_rcp_f16 s5, |s1| +// GFX12: encoding: [0x05,0x01,0x85,0xd6,0x01,0x00,0x00,0x00] + +v_s_rcp_f16 s5, s1 clamp +// GFX12: encoding: [0x05,0x80,0x85,0xd6,0x01,0x00,0x00,0x00] + +v_s_rcp_f16 s5, s1 mul:2 +// GFX12: encoding: [0x05,0x00,0x85,0xd6,0x01,0x00,0x00,0x08] + +v_s_rcp_f16 s5, s1 mul:4 +// GFX12: encoding: [0x05,0x00,0x85,0xd6,0x01,0x00,0x00,0x10] + +v_s_rcp_f16 s5, s1 div:2 +// GFX12: encoding: [0x05,0x00,0x85,0xd6,0x01,0x00,0x00,0x18] + +v_s_rsq_f32 s5, s1 +// GFX12: encoding: [0x05,0x00,0x86,0xd6,0x01,0x00,0x00,0x00] + +v_s_rsq_f32 s5, s105 +// GFX12: encoding: [0x05,0x00,0x86,0xd6,0x69,0x00,0x00,0x00] + +v_s_rsq_f32 s5, vcc_lo +// GFX12: encoding: [0x05,0x00,0x86,0xd6,0x6a,0x00,0x00,0x00] + +v_s_rsq_f32 s5, vcc_hi +// GFX12: encoding: [0x05,0x00,0x86,0xd6,0x6b,0x00,0x00,0x00] + +v_s_rsq_f32 s5, ttmp15 +// GFX12: encoding: [0x05,0x00,0x86,0xd6,0x7b,0x00,0x00,0x00] + +v_s_rsq_f32 s5, m0 +// GFX12: encoding: [0x05,0x00,0x86,0xd6,0x7d,0x00,0x00,0x00] + +v_s_rsq_f32 s5, exec_lo +// GFX12: encoding: [0x05,0x00,0x86,0xd6,0x7e,0x00,0x00,0x00] + +v_s_rsq_f32 s5, exec_hi +// GFX12: encoding: [0x05,0x00,0x86,0xd6,0x7f,0x00,0x00,0x00] + +v_s_rsq_f32 s5, null +// GFX12: encoding: [0x05,0x00,0x86,0xd6,0x7c,0x00,0x00,0x00] + +v_s_rsq_f32 s5, -1 +// GFX12: encoding: [0x05,0x00,0x86,0xd6,0xc1,0x00,0x00,0x00] + +v_s_rsq_f32 s5, 0.5 +// GFX12: encoding: [0x05,0x00,0x86,0xd6,0xf0,0x00,0x00,0x00] + +v_s_rsq_f32 s5, src_scc +// GFX12: encoding: [0x05,0x00,0x86,0xd6,0xfd,0x00,0x00,0x00] + +v_s_rsq_f32 s105, 0xaf123456 +// GFX12: encoding: [0x69,0x00,0x86,0xd6,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_s_rsq_f32 s5, -s1 +// GFX12: encoding: [0x05,0x00,0x86,0xd6,0x01,0x00,0x00,0x20] + +v_s_rsq_f32 s5, |s1| +// GFX12: encoding: [0x05,0x01,0x86,0xd6,0x01,0x00,0x00,0x00] + +v_s_rsq_f32 s5, s1 clamp +// GFX12: encoding: [0x05,0x80,0x86,0xd6,0x01,0x00,0x00,0x00] + +v_s_rsq_f32 s5, s1 mul:2 +// GFX12: encoding: [0x05,0x00,0x86,0xd6,0x01,0x00,0x00,0x08] + +v_s_rsq_f32 s5, s1 mul:4 +// GFX12: encoding: [0x05,0x00,0x86,0xd6,0x01,0x00,0x00,0x10] + +v_s_rsq_f32 s5, s1 div:2 +// GFX12: encoding: [0x05,0x00,0x86,0xd6,0x01,0x00,0x00,0x18] + +v_s_rsq_f16 s5, s1 +// GFX12: encoding: [0x05,0x00,0x87,0xd6,0x01,0x00,0x00,0x00] + +v_s_rsq_f16 s5, s105 +// GFX12: encoding: [0x05,0x00,0x87,0xd6,0x69,0x00,0x00,0x00] + +v_s_rsq_f16 s5, vcc_lo +// GFX12: encoding: [0x05,0x00,0x87,0xd6,0x6a,0x00,0x00,0x00] + +v_s_rsq_f16 s5, vcc_hi +// GFX12: encoding: [0x05,0x00,0x87,0xd6,0x6b,0x00,0x00,0x00] + +v_s_rsq_f16 s5, ttmp15 +// GFX12: encoding: [0x05,0x00,0x87,0xd6,0x7b,0x00,0x00,0x00] + +v_s_rsq_f16 s5, m0 +// GFX12: encoding: [0x05,0x00,0x87,0xd6,0x7d,0x00,0x00,0x00] + +v_s_rsq_f16 s5, exec_lo +// GFX12: encoding: [0x05,0x00,0x87,0xd6,0x7e,0x00,0x00,0x00] + +v_s_rsq_f16 s5, exec_hi +// GFX12: encoding: [0x05,0x00,0x87,0xd6,0x7f,0x00,0x00,0x00] + +v_s_rsq_f16 s5, null +// GFX12: encoding: [0x05,0x00,0x87,0xd6,0x7c,0x00,0x00,0x00] + +v_s_rsq_f16 s5, -1 +// GFX12: encoding: [0x05,0x00,0x87,0xd6,0xc1,0x00,0x00,0x00] + +v_s_rsq_f16 s5, 0.5 +// GFX12: encoding: [0x05,0x00,0x87,0xd6,0xf0,0x00,0x00,0x00] + +v_s_rsq_f16 s5, src_scc +// GFX12: encoding: [0x05,0x00,0x87,0xd6,0xfd,0x00,0x00,0x00] + +v_s_rsq_f16 s105, 0xaf12 +// GFX12: encoding: [0x69,0x00,0x87,0xd6,0xff,0x00,0x00,0x00,0x12,0xaf,0x00,0x00] + +v_s_rsq_f16 s5, -s1 +// GFX12: encoding: [0x05,0x00,0x87,0xd6,0x01,0x00,0x00,0x20] + +v_s_rsq_f16 s5, |s1| +// GFX12: encoding: [0x05,0x01,0x87,0xd6,0x01,0x00,0x00,0x00] + +v_s_rsq_f16 s5, s1 clamp +// GFX12: encoding: [0x05,0x80,0x87,0xd6,0x01,0x00,0x00,0x00] + +v_s_rsq_f16 s5, s1 mul:2 +// GFX12: encoding: [0x05,0x00,0x87,0xd6,0x01,0x00,0x00,0x08] + +v_s_rsq_f16 s5, s1 mul:4 +// GFX12: encoding: [0x05,0x00,0x87,0xd6,0x01,0x00,0x00,0x10] + +v_s_rsq_f16 s5, s1 div:2 +// GFX12: encoding: [0x05,0x00,0x87,0xd6,0x01,0x00,0x00,0x18] + +v_s_sqrt_f32 s5, s1 +// GFX12: encoding: [0x05,0x00,0x88,0xd6,0x01,0x00,0x00,0x00] + +v_s_sqrt_f32 s5, s105 +// GFX12: encoding: [0x05,0x00,0x88,0xd6,0x69,0x00,0x00,0x00] + +v_s_sqrt_f32 s5, vcc_lo +// GFX12: encoding: [0x05,0x00,0x88,0xd6,0x6a,0x00,0x00,0x00] + +v_s_sqrt_f32 s5, vcc_hi +// GFX12: encoding: [0x05,0x00,0x88,0xd6,0x6b,0x00,0x00,0x00] + +v_s_sqrt_f32 s5, ttmp15 +// GFX12: encoding: [0x05,0x00,0x88,0xd6,0x7b,0x00,0x00,0x00] + +v_s_sqrt_f32 s5, m0 +// GFX12: encoding: [0x05,0x00,0x88,0xd6,0x7d,0x00,0x00,0x00] + +v_s_sqrt_f32 s5, exec_lo +// GFX12: encoding: [0x05,0x00,0x88,0xd6,0x7e,0x00,0x00,0x00] + +v_s_sqrt_f32 s5, exec_hi +// GFX12: encoding: [0x05,0x00,0x88,0xd6,0x7f,0x00,0x00,0x00] + +v_s_sqrt_f32 s5, null +// GFX12: encoding: [0x05,0x00,0x88,0xd6,0x7c,0x00,0x00,0x00] + +v_s_sqrt_f32 s5, -1 +// GFX12: encoding: [0x05,0x00,0x88,0xd6,0xc1,0x00,0x00,0x00] + +v_s_sqrt_f32 s5, 0.5 +// GFX12: encoding: [0x05,0x00,0x88,0xd6,0xf0,0x00,0x00,0x00] + +v_s_sqrt_f32 s5, src_scc +// GFX12: encoding: [0x05,0x00,0x88,0xd6,0xfd,0x00,0x00,0x00] + +v_s_sqrt_f32 s105, 0xaf123456 +// GFX12: encoding: [0x69,0x00,0x88,0xd6,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_s_sqrt_f32 s5, -s1 +// GFX12: encoding: [0x05,0x00,0x88,0xd6,0x01,0x00,0x00,0x20] + +v_s_sqrt_f32 s5, |s1| +// GFX12: encoding: [0x05,0x01,0x88,0xd6,0x01,0x00,0x00,0x00] + +v_s_sqrt_f32 s5, s1 clamp +// GFX12: encoding: [0x05,0x80,0x88,0xd6,0x01,0x00,0x00,0x00] + +v_s_sqrt_f32 s5, s1 mul:2 +// GFX12: encoding: [0x05,0x00,0x88,0xd6,0x01,0x00,0x00,0x08] + +v_s_sqrt_f32 s5, s1 mul:4 +// GFX12: encoding: [0x05,0x00,0x88,0xd6,0x01,0x00,0x00,0x10] + +v_s_sqrt_f32 s5, s1 div:2 +// GFX12: encoding: [0x05,0x00,0x88,0xd6,0x01,0x00,0x00,0x18] + +v_s_sqrt_f16 s5, s1 +// GFX12: encoding: [0x05,0x00,0x89,0xd6,0x01,0x00,0x00,0x00] + +v_s_sqrt_f16 s5, s105 +// GFX12: encoding: [0x05,0x00,0x89,0xd6,0x69,0x00,0x00,0x00] + +v_s_sqrt_f16 s5, vcc_lo +// GFX12: encoding: [0x05,0x00,0x89,0xd6,0x6a,0x00,0x00,0x00] + +v_s_sqrt_f16 s5, vcc_hi +// GFX12: encoding: [0x05,0x00,0x89,0xd6,0x6b,0x00,0x00,0x00] + +v_s_sqrt_f16 s5, ttmp15 +// GFX12: encoding: [0x05,0x00,0x89,0xd6,0x7b,0x00,0x00,0x00] + +v_s_sqrt_f16 s5, m0 +// GFX12: encoding: [0x05,0x00,0x89,0xd6,0x7d,0x00,0x00,0x00] + +v_s_sqrt_f16 s5, exec_lo +// GFX12: encoding: [0x05,0x00,0x89,0xd6,0x7e,0x00,0x00,0x00] + +v_s_sqrt_f16 s5, exec_hi +// GFX12: encoding: [0x05,0x00,0x89,0xd6,0x7f,0x00,0x00,0x00] + +v_s_sqrt_f16 s5, null +// GFX12: encoding: [0x05,0x00,0x89,0xd6,0x7c,0x00,0x00,0x00] + +v_s_sqrt_f16 s5, -1 +// GFX12: encoding: [0x05,0x00,0x89,0xd6,0xc1,0x00,0x00,0x00] + +v_s_sqrt_f16 s5, 0.5 +// GFX12: encoding: [0x05,0x00,0x89,0xd6,0xf0,0x00,0x00,0x00] + +v_s_sqrt_f16 s5, src_scc +// GFX12: encoding: [0x05,0x00,0x89,0xd6,0xfd,0x00,0x00,0x00] + +v_s_sqrt_f16 s105, 0xaf12 +// GFX12: encoding: [0x69,0x00,0x89,0xd6,0xff,0x00,0x00,0x00,0x12,0xaf,0x00,0x00] + +v_s_sqrt_f16 s5, -s1 +// GFX12: encoding: [0x05,0x00,0x89,0xd6,0x01,0x00,0x00,0x20] + +v_s_sqrt_f16 s5, |s1| +// GFX12: encoding: [0x05,0x01,0x89,0xd6,0x01,0x00,0x00,0x00] + +v_s_sqrt_f16 s5, s1 clamp +// GFX12: encoding: [0x05,0x80,0x89,0xd6,0x01,0x00,0x00,0x00] + +v_s_sqrt_f16 s5, s1 mul:2 +// GFX12: encoding: [0x05,0x00,0x89,0xd6,0x01,0x00,0x00,0x08] + +v_s_sqrt_f16 s5, s1 mul:4 +// GFX12: encoding: [0x05,0x00,0x89,0xd6,0x01,0x00,0x00,0x10] + +v_s_sqrt_f16 s5, s1 div:2 +// GFX12: encoding: [0x05,0x00,0x89,0xd6,0x01,0x00,0x00,0x18] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3.s index 484e73da199b3..3e99a6120bfdd 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3.s @@ -1,7 +1,7 @@ -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32 -show-encoding %s | FileCheck --check-prefixes=GFX12,W32 %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX12,W64 %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32 %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12,W32 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12,W64 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s v_add3_u32 v5, v1, v2, s3 // GFX12: encoding: [0x05,0x00,0x55,0xd6,0x01,0x05,0x0e,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16-fake16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16-fake16.s new file mode 100644 index 0000000000000..9fe555fa46706 --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16-fake16.s @@ -0,0 +1,5764 @@ +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12,W32 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12,W64 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,-real-true16 %s 2>&1 | FileCheck --check-prefixes=GFX12-ERR,W32-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 %s 2>&1 | FileCheck --check-prefixes=GFX12-ERR,W64-ERR --implicit-check-not=error: %s + +v_add3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x55,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_add3_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x55,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_add3_u32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x55,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_add3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x55,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_add3_u32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x55,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_add3_u32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x55,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_add3_u32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x55,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_add3_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x55,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_add3_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x55,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_add3_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX12: [0x05,0x00,0x55,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_add3_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX12: [0x05,0x00,0x55,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_add3_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX12: [0x05,0x00,0x55,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_add3_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x55,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_add3_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x55,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_add3_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x55,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_add3_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x00,0x55,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_add_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] +// W32: [0x05,0x06,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[0,1,2,3] +// W32: [0x05,0x06,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s6, v1, v2 row_mirror +// W32: [0x05,0x06,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s6, v1, s2 row_mirror +// W32: [0x05,0x06,0x00,0xd7,0xfa,0x04,0x00,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s6, v1, v2 row_half_mirror +// W32: [0x05,0x06,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s6, v1, v2 row_shl:1 +// W32: [0x05,0x06,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s6, v1, v2 row_shl:15 +// W32: [0x05,0x06,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s6, v1, v2 row_shr:1 +// W32: [0x05,0x06,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s6, v1, v2 row_shr:15 +// W32: [0x05,0x06,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s6, v1, v2 row_ror:1 +// W32: [0x05,0x06,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s105, v1, v2 row_ror:15 +// W32: [0x05,0x69,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: [0x05,0x6a,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: [0x05,0x6b,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: [0x05,0x7b,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 quad_perm:[3,2,1,0] +// W64: [0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 quad_perm:[0,1,2,3] +// W64: [0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 row_mirror +// W64: [0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 row_half_mirror +// W64: [0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s[12:13], v1, s2 row_half_mirror +// W64: [0x05,0x0c,0x00,0xd7,0xfa,0x04,0x00,0x00,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shl:1 +// W64: [0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shl:15 +// W64: [0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shr:1 +// W64: [0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shr:15 +// W64: [0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 row_ror:1 +// W64: [0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 row_ror:15 +// W64: [0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: [0x05,0x68,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: [0x05,0x6a,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: [0x05,0x7a,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v255, null, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0xfc,0x00,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_add_lshl_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x47,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_add_lshl_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x47,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_add_lshl_u32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x47,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_add_lshl_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x47,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_add_lshl_u32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x47,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_add_lshl_u32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x47,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_add_lshl_u32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x47,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_add_lshl_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x47,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_add_lshl_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x47,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_add_lshl_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX12: [0x05,0x00,0x47,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_add_lshl_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX12: [0x05,0x00,0x47,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_add_lshl_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX12: [0x05,0x00,0x47,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_add_lshl_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x47,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_add_lshl_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x47,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_add_lshl_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x47,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_add_lshl_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x00,0x47,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_add_nc_i16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_add_nc_i16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_add_nc_i16_e64_dpp v5, v1, v2 row_mirror +// GFX12: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_add_nc_i16_e64_dpp v5, v1, v2 row_half_mirror +// GFX12: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_add_nc_i16_e64_dpp v5, v1, v2 row_shl:1 +// GFX12: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_add_nc_i16_e64_dpp v5, v1, v2 row_shl:15 +// GFX12: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_add_nc_i16_e64_dpp v5, v1, v2 row_shr:1 +// GFX12: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_add_nc_i16_e64_dpp v5, v1, v2 row_shr:15 +// GFX12: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_add_nc_i16_e64_dpp v5, v1, v2 row_ror:1 +// GFX12: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_add_nc_i16_e64_dpp v5, v1, v2 row_ror:15 +// GFX12: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_add_nc_i16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_add_nc_i16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_add_nc_i16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_add_nc_i16_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x80,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_add_nc_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_add_nc_i32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_add_nc_i32_e64_dpp v5, v1, v2 row_mirror +// GFX12: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_add_nc_i32_e64_dpp v5, v1, v2 row_half_mirror +// GFX12: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_add_nc_i32_e64_dpp v5, v1, v2 row_shl:1 +// GFX12: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_add_nc_i32_e64_dpp v5, v1, v2 row_shl:15 +// GFX12: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_add_nc_i32_e64_dpp v5, v1, v2 row_shr:1 +// GFX12: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_add_nc_i32_e64_dpp v5, v1, v2 row_shr:15 +// GFX12: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_add_nc_i32_e64_dpp v5, v1, v2 row_ror:1 +// GFX12: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_add_nc_i32_e64_dpp v5, v1, v2 row_ror:15 +// GFX12: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_add_nc_i32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_add_nc_i32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_add_nc_i32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_add_nc_i32_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x80,0x26,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_add_nc_u16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_add_nc_u16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_add_nc_u16_e64_dpp v5, v1, v2 row_mirror +// GFX12: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_add_nc_u16_e64_dpp v5, v1, v2 row_half_mirror +// GFX12: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_add_nc_u16_e64_dpp v5, v1, v2 row_shl:1 +// GFX12: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_add_nc_u16_e64_dpp v5, v1, v2 row_shl:15 +// GFX12: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_add_nc_u16_e64_dpp v5, v1, v2 row_shr:1 +// GFX12: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_add_nc_u16_e64_dpp v5, v1, v2 row_shr:15 +// GFX12: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_add_nc_u16_e64_dpp v5, v1, v2 row_ror:1 +// GFX12: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_add_nc_u16_e64_dpp v5, v1, v2 row_ror:15 +// GFX12: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_add_nc_u16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_add_nc_u16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_add_nc_u16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_add_nc_u16_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x80,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_alignbit_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_alignbit_b32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_alignbit_b32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x16,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_alignbit_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_alignbit_b32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_alignbit_b32_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX12: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_alignbit_b32_e64_dpp v5, v1, v2, v255 row_shl:1 +// GFX12: [0x05,0x00,0x16,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] + +v_alignbit_b32_e64_dpp v5, v1, v2, s105 row_shl:15 +// GFX12: [0x05,0x00,0x16,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x0f,0x01,0xff] + +v_alignbit_b32_e64_dpp v5, v1, v2, vcc_hi row_shr:1 +// GFX12: [0x05,0x00,0x16,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] + +v_alignbit_b32_e64_dpp v5, v1, v2, vcc_lo row_shr:15 +// GFX12: [0x05,0x00,0x16,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] + +v_alignbit_b32_e64_dpp v5, v1, v2, ttmp15 row_ror:1 +// GFX12: [0x05,0x00,0x16,0xd6,0xfa,0x04,0xee,0x01,0x01,0x21,0x01,0xff] + +v_alignbit_b32_e64_dpp v5, v1, v2, exec_hi row_ror:15 +// GFX12: [0x05,0x00,0x16,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] + +v_alignbit_b32_e64_dpp v5, v1, v2, exec_lo row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x16,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] + +v_alignbit_b32_e64_dpp v5, v1, v2, null row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x16,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +v_alignbit_b32_e64_dpp v5, v1, v2, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] + +v_alignbit_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x00,0x16,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_alignbyte_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_alignbyte_b32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_alignbyte_b32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x17,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_alignbyte_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_alignbyte_b32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_alignbyte_b32_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX12: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_alignbyte_b32_e64_dpp v5, v1, v2, v255 row_shl:1 +// GFX12: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] + +v_alignbyte_b32_e64_dpp v5, v1, v2, s105 row_shl:15 +// GFX12: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x0f,0x01,0xff] + +v_alignbyte_b32_e64_dpp v5, v1, v2, vcc_hi row_shr:1 +// GFX12: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] + +v_alignbyte_b32_e64_dpp v5, v1, v2, vcc_lo row_shr:15 +// GFX12: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] + +v_alignbyte_b32_e64_dpp v5, v1, v2, ttmp15 row_ror:1 +// GFX12: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xee,0x01,0x01,0x21,0x01,0xff] + +v_alignbyte_b32_e64_dpp v5, v1, v2, exec_hi row_ror:15 +// GFX12: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] + +v_alignbyte_b32_e64_dpp v5, v1, v2, exec_lo row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] + +v_alignbyte_b32_e64_dpp v5, v1, v2, null row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +v_alignbyte_b32_e64_dpp v5, v1, v2, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] + +v_alignbyte_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x00,0x17,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_and_b16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_and_b16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_and_b16_e64_dpp v5, v1, v2 row_mirror +// GFX12: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_and_b16_e64_dpp v5, v1, v2 row_half_mirror +// GFX12: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_and_b16_e64_dpp v5, v1, v2 row_shl:1 +// GFX12: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_and_b16_e64_dpp v5, v1, v2 row_shl:15 +// GFX12: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_and_b16_e64_dpp v5, v1, v2 row_shr:1 +// GFX12: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_and_b16_e64_dpp v5, v1, v2 row_shr:15 +// GFX12: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_and_b16_e64_dpp v5, v1, v2 row_ror:1 +// GFX12: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_and_b16_e64_dpp v5, v1, v2 row_ror:15 +// GFX12: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_and_b16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_and_b16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_and_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_and_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x00,0x62,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_and_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x57,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_and_or_b32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x57,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_and_or_b32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x57,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_and_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x57,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_and_or_b32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x57,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_and_or_b32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x57,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_and_or_b32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x57,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_and_or_b32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x57,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_and_or_b32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x57,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_and_or_b32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX12: [0x05,0x00,0x57,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_and_or_b32_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX12: [0x05,0x00,0x57,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_and_or_b32_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX12: [0x05,0x00,0x57,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_and_or_b32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x57,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_and_or_b32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x57,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_and_or_b32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x57,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_and_or_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x00,0x57,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_ashrrev_i16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_ashrrev_i16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_ashrrev_i16_e64_dpp v5, v1, v2 row_mirror +// GFX12: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_ashrrev_i16_e64_dpp v5, v1, v2 row_half_mirror +// GFX12: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_ashrrev_i16_e64_dpp v5, v1, v2 row_shl:1 +// GFX12: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_ashrrev_i16_e64_dpp v5, v1, v2 row_shl:15 +// GFX12: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_ashrrev_i16_e64_dpp v5, v1, v2 row_shr:1 +// GFX12: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_ashrrev_i16_e64_dpp v5, v1, v2 row_shr:15 +// GFX12: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_ashrrev_i16_e64_dpp v5, v1, v2 row_ror:1 +// GFX12: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_ashrrev_i16_e64_dpp v5, v1, v2 row_ror:15 +// GFX12: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_ashrrev_i16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_ashrrev_i16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_ashrrev_i16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_ashrrev_i16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x00,0x3a,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_bcnt_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_bcnt_u32_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_mirror +// GFX12: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_half_mirror +// GFX12: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_shl:1 +// GFX12: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_shl:15 +// GFX12: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_shr:1 +// GFX12: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_shr:15 +// GFX12: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_ror:1 +// GFX12: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_ror:15 +// GFX12: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_bcnt_u32_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x00,0x1e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_bfe_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x11,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_bfe_i32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x11,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_bfe_i32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x11,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_bfe_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x11,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_bfe_i32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x11,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_bfe_i32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x11,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_bfe_i32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x11,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_bfe_i32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x11,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_bfe_i32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x11,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_bfe_i32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX12: [0x05,0x00,0x11,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_bfe_i32_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX12: [0x05,0x00,0x11,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_bfe_i32_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX12: [0x05,0x00,0x11,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_bfe_i32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x11,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_bfe_i32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x11,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_bfe_i32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x11,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_bfe_i32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x00,0x11,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_bfe_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x10,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_bfe_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x10,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_bfe_u32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x10,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_bfe_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x10,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_bfe_u32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x10,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_bfe_u32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x10,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_bfe_u32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x10,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_bfe_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x10,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_bfe_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x10,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_bfe_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX12: [0x05,0x00,0x10,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_bfe_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX12: [0x05,0x00,0x10,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_bfe_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX12: [0x05,0x00,0x10,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_bfe_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x10,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_bfe_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x10,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_bfe_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x10,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_bfe_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x00,0x10,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_bfi_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x12,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_bfi_b32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x12,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_bfi_b32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x12,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_bfi_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x12,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_bfi_b32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x12,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_bfi_b32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x12,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_bfi_b32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x12,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_bfi_b32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x12,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_bfi_b32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x12,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_bfi_b32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX12: [0x05,0x00,0x12,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_bfi_b32_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX12: [0x05,0x00,0x12,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_bfi_b32_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX12: [0x05,0x00,0x12,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_bfi_b32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x12,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_bfi_b32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x12,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_bfi_b32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x12,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_bfi_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x00,0x12,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_bfm_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_bfm_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_bfm_b32_e64_dpp v5, v1, v2 row_mirror +// GFX12: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_bfm_b32_e64_dpp v5, v1, v2 row_half_mirror +// GFX12: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_bfm_b32_e64_dpp v5, v1, v2 row_shl:1 +// GFX12: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_bfm_b32_e64_dpp v5, v1, v2 row_shl:15 +// GFX12: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_bfm_b32_e64_dpp v5, v1, v2 row_shr:1 +// GFX12: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_bfm_b32_e64_dpp v5, v1, v2 row_shr:15 +// GFX12: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_bfm_b32_e64_dpp v5, v1, v2 row_ror:1 +// GFX12: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_bfm_b32_e64_dpp v5, v1, v2 row_ror:15 +// GFX12: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_bfm_b32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_bfm_b32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_bfm_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_bfm_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x00,0x1d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_cndmask_b16_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, v2, s3 quad_perm:[0,1,2,3] +// W32: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, v2, s3 row_mirror +// W32: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, s2, s3 row_mirror +// W32: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0c,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, 10, s3 row_mirror +// W32: [0x05,0x00,0x5d,0xd6,0xfa,0x14,0x0d,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, v2, s3 row_half_mirror +// W32: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, v2, s3 row_shl:1 +// W32: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, v2, s3 row_shl:15 +// W32: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, v2, s3 row_shr:1 +// W32: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, v2, s3 row_shr:15 +// W32: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, v2, s3 row_ror:1 +// W32: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, v2, s105 row_ror:15 +// W32: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, v2, vcc_hi row_share:0 row_mask:0xf bank_mask:0xf +// W32: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xae,0x01,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, |v1|, -v2, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: [0x05,0x01,0x5d,0xd6,0xfa,0x04,0xaa,0x41,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, -v1, |v2|, ttmp15 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: [0x05,0x02,0x5d,0xd6,0xfa,0x04,0xee,0x21,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] quad_perm:[3,2,1,0] +// W64: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] quad_perm:[0,1,2,3] +// W64: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_mirror +// W64: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_half_mirror +// W64: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, s2, s[6:7] row_half_mirror +// W64: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x18,0x00,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, 10, s[6:7] row_half_mirror +// W64: [0x05,0x00,0x5d,0xd6,0xfa,0x14,0x19,0x00,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_shl:1 +// W64: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_shl:15 +// W64: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_shr:1 +// W64: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_shr:15 +// W64: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_ror:1 +// W64: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_ror:15 +// W64: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, v2, s[104:105] row_share:0 row_mask:0xf bank_mask:0xf +// W64: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, |v1|, -v2, vcc row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: [0x05,0x01,0x5d,0xd6,0xfa,0x04,0xaa,0x41,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, -v1, |v2|, ttmp[14:15] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: [0x05,0x02,0x5d,0xd6,0xfa,0x04,0xea,0x21,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v255, -|v255|, -|v255|, null row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x03,0x5d,0xd6,0xfa,0xfe,0xf3,0x61,0xff,0x6f,0x05,0x30] + +v_cubeid_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_cubeid_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_cubeid_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x0c,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_cubeid_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_cubeid_f32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_cubeid_f32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_cubeid_f32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_cubeid_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_cubeid_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_cubeid_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 +// GFX12: [0x05,0x01,0x0c,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +v_cubeid_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 +// GFX12: [0x05,0x02,0x0c,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +v_cubeid_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 +// GFX12: [0x05,0x04,0x0c,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_cubeid_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x03,0x0c,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_cubeid_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x05,0x0c,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] + +v_cubeid_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x06,0x0c,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] + +v_cubeid_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x87,0x0c,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] + +v_cubema_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_cubema_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_cubema_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x0f,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_cubema_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_cubema_f32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_cubema_f32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_cubema_f32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_cubema_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_cubema_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_cubema_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 +// GFX12: [0x05,0x01,0x0f,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +v_cubema_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 +// GFX12: [0x05,0x02,0x0f,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +v_cubema_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 +// GFX12: [0x05,0x04,0x0f,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_cubema_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x03,0x0f,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_cubema_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x05,0x0f,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] + +v_cubema_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x06,0x0f,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] + +v_cubema_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x87,0x0f,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] + +v_cubesc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_cubesc_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_cubesc_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x0d,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_cubesc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_cubesc_f32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_cubesc_f32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_cubesc_f32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_cubesc_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_cubesc_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_cubesc_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 +// GFX12: [0x05,0x01,0x0d,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +v_cubesc_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 +// GFX12: [0x05,0x02,0x0d,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +v_cubesc_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 +// GFX12: [0x05,0x04,0x0d,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_cubesc_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x03,0x0d,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_cubesc_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x05,0x0d,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] + +v_cubesc_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x06,0x0d,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] + +v_cubesc_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x87,0x0d,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] + +v_cubetc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_cubetc_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_cubetc_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x0e,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_cubetc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_cubetc_f32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_cubetc_f32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_cubetc_f32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_cubetc_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_cubetc_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_cubetc_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 +// GFX12: [0x05,0x01,0x0e,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +v_cubetc_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 +// GFX12: [0x05,0x02,0x0e,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +v_cubetc_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 +// GFX12: [0x05,0x04,0x0e,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_cubetc_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x03,0x0e,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_cubetc_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x05,0x0e,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] + +v_cubetc_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x06,0x0e,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] + +v_cubetc_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x87,0x0e,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] + +v_cvt_pk_bf8_f32_e64_dpp v1, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd +// GFX12: encoding: [0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed] + +v_cvt_pk_bf8_f32_e64_dpp v1, -v2, |v3| quad_perm:[0,1,2,3] +// GFX12: encoding: [0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0xe4,0x00,0xff] + +v_cvt_pk_bf8_f32_e64_dpp v6, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd +// GFX12: encoding: [0x06,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed] + +v_cvt_pk_bf8_f32_e64_dpp v1, -v6, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd +// GFX12: encoding: [0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x06,0x1b,0x00,0xed] + +v_cvt_pk_bf8_f32_e64_dpp v1, -v2, |v255| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd +// GFX12: encoding: [0x01,0x02,0x6a,0xd7,0xfa,0xfe,0x03,0x20,0x02,0x1b,0x00,0xed] + +v_cvt_pk_bf8_f32_e64_dpp v1, -v2, |v3| quad_perm:[0,2,1,3] row_mask:0xe bank_mask:0xd +// GFX12: encoding: [0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0xd8,0x00,0xed] + +v_cvt_pk_bf8_f32_e64_dpp v1, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0x2 bank_mask:0xd +// GFX12: encoding: [0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0x2d] + +v_cvt_pk_bf8_f32_e64_dpp v1, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0x5 +// GFX12: encoding: [0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xe5] + +v_cvt_pk_bf8_f32_e64_dpp v1, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd fi:1 +// GFX12: encoding: [0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x04,0xed] + +v_cvt_pk_fp8_f32_e64_dpp v1, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd +// GFX12: encoding: [0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed] + +v_cvt_pk_fp8_f32_e64_dpp v1, -v2, |v3| quad_perm:[0,1,2,3] +// GFX12: encoding: [0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0xe4,0x00,0xff] + +v_cvt_pk_fp8_f32_e64_dpp v6, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd +// GFX12: encoding: [0x06,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed] + +v_cvt_pk_fp8_f32_e64_dpp v1, -v6, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd +// GFX12: encoding: [0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x06,0x1b,0x00,0xed] + +v_cvt_pk_fp8_f32_e64_dpp v1, -v2, |v255| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd +// GFX12: encoding: [0x01,0x02,0x69,0xd7,0xfa,0xfe,0x03,0x20,0x02,0x1b,0x00,0xed] + +v_cvt_pk_fp8_f32_e64_dpp v1, -v2, |v3| quad_perm:[0,2,1,3] row_mask:0xe bank_mask:0xd +// GFX12: encoding: [0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0xd8,0x00,0xed] + +v_cvt_pk_fp8_f32_e64_dpp v1, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0x2 bank_mask:0xd +// GFX12: encoding: [0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0x2d] + +v_cvt_pk_fp8_f32_e64_dpp v1, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0x5 +// GFX12: encoding: [0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xe5] + +v_cvt_pk_fp8_f32_e64_dpp v1, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd fi:1 +// GFX12: encoding: [0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x04,0xed] + +v_cvt_sr_bf8_f32_e64_dpp v1, -v2, v3 quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd +// GFX12: encoding: [0x01,0x00,0x6c,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed] + +v_cvt_sr_bf8_f32_e64_dpp v1, -v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0x01,0x00,0x6c,0xd7,0xfa,0x06,0x02,0x20,0x02,0xe4,0x00,0xff] + +v_cvt_sr_bf8_f32_e64_dpp v6, -v2, v3 quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd +// GFX12: encoding: [0x06,0x00,0x6c,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed] + +v_cvt_sr_bf8_f32_e64_dpp v1, -v6, v3 quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd +// GFX12: encoding: [0x01,0x00,0x6c,0xd7,0xfa,0x06,0x02,0x20,0x06,0x1b,0x00,0xed] + +v_cvt_sr_bf8_f32_e64_dpp v1, -v2, v255 quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd +// GFX12: encoding: [0x01,0x00,0x6c,0xd7,0xfa,0xfe,0x03,0x20,0x02,0x1b,0x00,0xed] + +v_cvt_sr_bf8_f32_e64_dpp v1, -v2, v3 quad_perm:[0,2,1,3] row_mask:0xe bank_mask:0xd +// GFX12: encoding: [0x01,0x00,0x6c,0xd7,0xfa,0x06,0x02,0x20,0x02,0xd8,0x00,0xed] + +v_cvt_sr_bf8_f32_e64_dpp v1, -v2, v3 quad_perm:[3,2,1,0] row_mask:0x2 bank_mask:0xd +// GFX12: encoding: [0x01,0x00,0x6c,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0x2d] + +v_cvt_sr_bf8_f32_e64_dpp v1, -v2, v3 quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0x5 +// GFX12: encoding: [0x01,0x00,0x6c,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xe5] + +v_cvt_sr_bf8_f32_e64_dpp v1, -v2, v3 quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd fi:1 +// GFX12: encoding: [0x01,0x00,0x6c,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x04,0xed] + +v_cvt_sr_bf8_f32 v1, v2, v3 byte_sel:0 quad_perm:[3,2,1,0] +// GFX12: v_cvt_sr_bf8_f32_e64_dpp v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x00,0x6c,0xd7,0xfa,0x06,0x02,0x00,0x02,0x1b,0x00,0xff] + +v_cvt_sr_bf8_f32 v1, v2, v3 byte_sel:1 quad_perm:[3,2,1,0] +// GFX12: v_cvt_sr_bf8_f32_e64_dpp v1, v2, v3 byte_sel:1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x20,0x6c,0xd7,0xfa,0x06,0x02,0x00,0x02,0x1b,0x00,0xff] + +v_cvt_sr_bf8_f32 v1, v2, v3 byte_sel:2 quad_perm:[3,2,1,0] +// GFX12: v_cvt_sr_bf8_f32_e64_dpp v1, v2, v3 byte_sel:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x40,0x6c,0xd7,0xfa,0x06,0x02,0x00,0x02,0x1b,0x00,0xff] + +v_cvt_sr_bf8_f32 v1, v2, v3 byte_sel:3 quad_perm:[3,2,1,0] +// GFX12: v_cvt_sr_bf8_f32_e64_dpp v1, v2, v3 byte_sel:3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x60,0x6c,0xd7,0xfa,0x06,0x02,0x00,0x02,0x1b,0x00,0xff] + +v_cvt_sr_fp8_f32_e64_dpp v1, -v2, v3 quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd +// GFX12: encoding: [0x01,0x00,0x6b,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed] + +v_cvt_sr_fp8_f32_e64_dpp v1, -v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0x01,0x00,0x6b,0xd7,0xfa,0x06,0x02,0x20,0x02,0xe4,0x00,0xff] + +v_cvt_sr_fp8_f32_e64_dpp v6, -v2, v3 quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd +// GFX12: encoding: [0x06,0x00,0x6b,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed] + +v_cvt_sr_fp8_f32_e64_dpp v1, -v6, v3 quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd +// GFX12: encoding: [0x01,0x00,0x6b,0xd7,0xfa,0x06,0x02,0x20,0x06,0x1b,0x00,0xed] + +v_cvt_sr_fp8_f32_e64_dpp v1, -v2, v255 quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd +// GFX12: encoding: [0x01,0x00,0x6b,0xd7,0xfa,0xfe,0x03,0x20,0x02,0x1b,0x00,0xed] + +v_cvt_sr_fp8_f32_e64_dpp v1, -v2, v3 quad_perm:[0,2,1,3] row_mask:0xe bank_mask:0xd +// GFX12: encoding: [0x01,0x00,0x6b,0xd7,0xfa,0x06,0x02,0x20,0x02,0xd8,0x00,0xed] + +v_cvt_sr_fp8_f32_e64_dpp v1, -v2, v3 quad_perm:[3,2,1,0] row_mask:0x2 bank_mask:0xd +// GFX12: encoding: [0x01,0x00,0x6b,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0x2d] + +v_cvt_sr_fp8_f32_e64_dpp v1, -v2, v3 quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0x5 +// GFX12: encoding: [0x01,0x00,0x6b,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xe5] + +v_cvt_sr_fp8_f32_e64_dpp v1, -v2, v3 quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd fi:1 +// GFX12: encoding: [0x01,0x00,0x6b,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x04,0xed] + +v_cvt_sr_fp8_f32 v1, v2, v3 byte_sel:0 quad_perm:[3,2,1,0] +// GFX12: v_cvt_sr_fp8_f32_e64_dpp v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x00,0x6b,0xd7,0xfa,0x06,0x02,0x00,0x02,0x1b,0x00,0xff] + +v_cvt_sr_fp8_f32 v1, v2, v3 byte_sel:1 quad_perm:[3,2,1,0] +// GFX12: v_cvt_sr_fp8_f32_e64_dpp v1, v2, v3 byte_sel:1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x20,0x6b,0xd7,0xfa,0x06,0x02,0x00,0x02,0x1b,0x00,0xff] + +v_cvt_sr_fp8_f32 v1, v2, v3 byte_sel:2 quad_perm:[3,2,1,0] +// GFX12: v_cvt_sr_fp8_f32_e64_dpp v1, v2, v3 byte_sel:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x40,0x6b,0xd7,0xfa,0x06,0x02,0x00,0x02,0x1b,0x00,0xff] + +v_cvt_sr_fp8_f32 v1, v2, v3 byte_sel:3 quad_perm:[3,2,1,0] +// GFX12: v_cvt_sr_fp8_f32_e64_dpp v1, v2, v3 byte_sel:3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x60,0x6b,0xd7,0xfa,0x06,0x02,0x00,0x02,0x1b,0x00,0xff] + +v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_mirror +// GFX12: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_half_mirror +// GFX12: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_shl:1 +// GFX12: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_shl:15 +// GFX12: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_shr:1 +// GFX12: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_shr:15 +// GFX12: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_ror:1 +// GFX12: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_ror:15 +// GFX12: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cvt_pk_i16_f32_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x01,0x06,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] + +v_cvt_pk_i16_f32_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x02,0x06,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] + +v_cvt_pk_i16_f32_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x03,0x06,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_mirror +// GFX12: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_half_mirror +// GFX12: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_shl:1 +// GFX12: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_shl:15 +// GFX12: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_shr:1 +// GFX12: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_shr:15 +// GFX12: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_ror:1 +// GFX12: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_ror:15 +// GFX12: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_cvt_pk_i16_i32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x00,0x24,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_mirror +// GFX12: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_half_mirror +// GFX12: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shl:1 +// GFX12: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shl:15 +// GFX12: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shr:1 +// GFX12: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shr:15 +// GFX12: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_ror:1 +// GFX12: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_ror:15 +// GFX12: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x01,0x12,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x02,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] + +v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x03,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_mirror +// GFX12: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_half_mirror +// GFX12: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shl:1 +// GFX12: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shl:15 +// GFX12: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shr:1 +// GFX12: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shr:15 +// GFX12: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_ror:1 +// GFX12: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_ror:15 +// GFX12: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x01,0x13,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x02,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] + +v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x03,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_mirror +// GFX12: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_half_mirror +// GFX12: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_shl:1 +// GFX12: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_shl:15 +// GFX12: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_shr:1 +// GFX12: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_shr:15 +// GFX12: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_ror:1 +// GFX12: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_ror:15 +// GFX12: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cvt_pk_u16_f32_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x01,0x07,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] + +v_cvt_pk_u16_f32_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x02,0x07,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] + +v_cvt_pk_u16_f32_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x03,0x07,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_mirror +// GFX12: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_half_mirror +// GFX12: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_shl:1 +// GFX12: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_shl:15 +// GFX12: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_shr:1 +// GFX12: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_shr:15 +// GFX12: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_ror:1 +// GFX12: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_ror:15 +// GFX12: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_cvt_pk_u16_u32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x00,0x23,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x26,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX12: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX12: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX12: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_cvt_pk_u8_f32_e64_dpp v255, -|v255|, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x01,0x26,0xd6,0xfa,0xfe,0xf7,0x23,0xff,0x6f,0x05,0x30] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_mirror +// GFX12: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_half_mirror +// GFX12: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shl:1 +// GFX12: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shl:15 +// GFX12: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shr:1 +// GFX12: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shr:15 +// GFX12: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_ror:1 +// GFX12: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_ror:15 +// GFX12: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x01,0x12,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x02,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] + +v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x03,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_mirror +// GFX12: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_half_mirror +// GFX12: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_shl:1 +// GFX12: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_shl:15 +// GFX12: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_shr:1 +// GFX12: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_shr:15 +// GFX12: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_ror:1 +// GFX12: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_ror:15 +// GFX12: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x01,0x21,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x02,0x21,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] + +v_cvt_pk_norm_i16_f32_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x03,0x21,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_mirror +// GFX12: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_half_mirror +// GFX12: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shl:1 +// GFX12: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shl:15 +// GFX12: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shr:1 +// GFX12: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shr:15 +// GFX12: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_ror:1 +// GFX12: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_ror:15 +// GFX12: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x01,0x13,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x02,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] + +v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x03,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_mirror +// GFX12: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_half_mirror +// GFX12: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_shl:1 +// GFX12: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_shl:15 +// GFX12: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_shr:1 +// GFX12: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_shr:15 +// GFX12: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_ror:1 +// GFX12: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_ror:15 +// GFX12: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x01,0x22,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x02,0x22,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] + +v_cvt_pk_norm_u16_f32_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x03,0x22,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +v_div_fixup_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_div_fixup_f16_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_div_fixup_f16_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x54,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_div_fixup_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_div_fixup_f16_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_div_fixup_f16_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_div_fixup_f16_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_div_fixup_f16_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 +// GFX12: [0x05,0x01,0x54,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +v_div_fixup_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 +// GFX12: [0x05,0x02,0x54,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +v_div_fixup_f16_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 +// GFX12: [0x05,0x04,0x54,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_div_fixup_f16_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x03,0x54,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_div_fixup_f16_e64_dpp v5, -|v1|, v2, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x05,0x54,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] + +v_div_fixup_f16_e64_dpp v5, v1, -|v2|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x06,0x54,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13] + +v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x87,0x54,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30] + +v_fma_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_fma_f16_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x48,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_fma_f16_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x48,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_fma_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_fma_f16_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_fma_f16_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_fma_f16_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_fma_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_fma_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_fma_f16_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 +// GFX12: [0x05,0x01,0x48,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +v_fma_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 +// GFX12: [0x05,0x02,0x48,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +v_fma_f16_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 +// GFX12: [0x05,0x04,0x48,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_fma_f16_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x03,0x48,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_fma_f16_e64_dpp v5, -|v1|, v2, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x05,0x48,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] + +v_fma_f16_e64_dpp v5, v1, -|v2|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x06,0x48,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13] + +v_fma_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x87,0x48,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30] + +v_fma_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_fma_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x13,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_fma_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x13,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_fma_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_fma_f32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_fma_f32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x13,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_fma_f32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x13,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_fma_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x13,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_fma_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x13,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_fma_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 +// GFX12: [0x05,0x01,0x13,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +v_fma_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 +// GFX12: [0x05,0x02,0x13,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +v_fma_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 +// GFX12: [0x05,0x04,0x13,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_fma_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x03,0x13,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_fma_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x05,0x13,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] + +v_fma_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x06,0x13,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] + +v_fma_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x87,0x13,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] + +v_ldexp_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_ldexp_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_ldexp_f32_e64_dpp v5, v1, v2 row_mirror +// GFX12: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_ldexp_f32_e64_dpp v5, v1, v2 row_half_mirror +// GFX12: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_ldexp_f32_e64_dpp v5, v1, v2 row_shl:1 +// GFX12: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_ldexp_f32_e64_dpp v5, v1, v2 row_shl:15 +// GFX12: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_ldexp_f32_e64_dpp v5, v1, v2 row_shr:1 +// GFX12: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_ldexp_f32_e64_dpp v5, v1, v2 row_shr:15 +// GFX12: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_ldexp_f32_e64_dpp v5, v1, v2 row_ror:1 +// GFX12: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_ldexp_f32_e64_dpp v5, v1, v2 row_ror:15 +// GFX12: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_ldexp_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_ldexp_f32_e64_dpp v5, v1, v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x08,0x01,0x5f,0x01,0x01] + +v_ldexp_f32_e64_dpp v5, v1, v2 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x10,0x01,0x60,0x09,0x13] + +v_ldexp_f32_e64_dpp v255, -|v255|, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x81,0x1c,0xd7,0xfa,0xfe,0x03,0x38,0xff,0x6f,0x05,0x30] + +v_lerp_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x15,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_lerp_u8_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x15,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x15,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x15,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX12: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX12: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX12: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x15,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_lerp_u8_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_lerp_u8_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x00,0x15,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_lshl_add_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x46,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x46,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x46,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x46,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x46,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x46,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x46,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x46,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x46,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX12: [0x05,0x00,0x46,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX12: [0x05,0x00,0x46,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX12: [0x05,0x00,0x46,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x46,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x46,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_lshl_add_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x46,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_lshl_add_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x00,0x46,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_lshl_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x56,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x56,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x56,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x56,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x56,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x56,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x56,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x56,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x56,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX12: [0x05,0x00,0x56,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX12: [0x05,0x00,0x56,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX12: [0x05,0x00,0x56,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x56,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x56,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_lshl_or_b32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x56,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_lshl_or_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x00,0x56,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_lshlrev_b16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_lshlrev_b16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_lshlrev_b16_e64_dpp v5, v1, v2 row_mirror +// GFX12: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_lshlrev_b16_e64_dpp v5, v1, v2 row_half_mirror +// GFX12: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_lshlrev_b16_e64_dpp v5, v1, v2 row_shl:1 +// GFX12: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_lshlrev_b16_e64_dpp v5, v1, v2 row_shl:15 +// GFX12: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_lshlrev_b16_e64_dpp v5, v1, v2 row_shr:1 +// GFX12: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_lshlrev_b16_e64_dpp v5, v1, v2 row_shr:15 +// GFX12: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_lshlrev_b16_e64_dpp v5, v1, v2 row_ror:1 +// GFX12: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_lshlrev_b16_e64_dpp v5, v1, v2 row_ror:15 +// GFX12: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_lshlrev_b16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_lshlrev_b16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_lshlrev_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_lshlrev_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x00,0x38,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_lshrrev_b16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_lshrrev_b16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_lshrrev_b16_e64_dpp v5, v1, v2 row_mirror +// GFX12: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_lshrrev_b16_e64_dpp v5, v1, v2 row_half_mirror +// GFX12: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_lshrrev_b16_e64_dpp v5, v1, v2 row_shl:1 +// GFX12: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_lshrrev_b16_e64_dpp v5, v1, v2 row_shl:15 +// GFX12: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_lshrrev_b16_e64_dpp v5, v1, v2 row_shr:1 +// GFX12: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_lshrrev_b16_e64_dpp v5, v1, v2 row_shr:15 +// GFX12: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_lshrrev_b16_e64_dpp v5, v1, v2 row_ror:1 +// GFX12: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_lshrrev_b16_e64_dpp v5, v1, v2 row_ror:15 +// GFX12: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_lshrrev_b16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_lshrrev_b16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_lshrrev_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_lshrrev_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x00,0x39,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_mad_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i16_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i16_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x53,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_mad_i16_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_mad_i16_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX12: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_mad_i16_e64_dpp v5, v1, v2, v255 row_shl:1 +// GFX12: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] + +v_mad_i16_e64_dpp v5, v1, v2, s105 row_shl:15 +// GFX12: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x0f,0x01,0xff] + +v_mad_i16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 +// GFX12: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] + +v_mad_i16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 +// GFX12: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] + +v_mad_i16_e64_dpp v5, v1, v2, ttmp15 row_ror:1 +// GFX12: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xee,0x01,0x01,0x21,0x01,0xff] + +v_mad_i16_e64_dpp v5, v1, v2, exec_hi row_ror:15 +// GFX12: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] + +v_mad_i16_e64_dpp v5, v1, v2, exec_lo row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] + +v_mad_i16_e64_dpp v5, v1, v2, null row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +v_mad_i16_e64_dpp v5, v1, v2, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] + +v_mad_i16_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x80,0x53,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_mad_i32_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i32_i16_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i32_i16_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x5a,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i32_i16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_mad_i32_i16_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_mad_i32_i16_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_mad_i32_i16_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_mad_i32_i16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_mad_i32_i16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_mad_i32_i16_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX12: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_mad_i32_i16_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX12: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_mad_i32_i16_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX12: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_mad_i32_i16_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_mad_i32_i16_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_mad_i32_i16_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_mad_i32_i16_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x80,0x5a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_mad_i32_i24_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x0a,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX12: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX12: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX12: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_mad_i32_i24_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_mad_i32_i24_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x80,0x0a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_mad_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u16_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u16_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x41,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_mad_u16_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_mad_u16_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX12: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_mad_u16_e64_dpp v5, v1, v2, v255 row_shl:1 +// GFX12: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] + +v_mad_u16_e64_dpp v5, v1, v2, s105 row_shl:15 +// GFX12: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x0f,0x01,0xff] + +v_mad_u16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 +// GFX12: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] + +v_mad_u16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 +// GFX12: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] + +v_mad_u16_e64_dpp v5, v1, v2, ttmp15 row_ror:1 +// GFX12: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xee,0x01,0x01,0x21,0x01,0xff] + +v_mad_u16_e64_dpp v5, v1, v2, exec_hi row_ror:15 +// GFX12: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] + +v_mad_u16_e64_dpp v5, v1, v2, exec_lo row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] + +v_mad_u16_e64_dpp v5, v1, v2, null row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +v_mad_u16_e64_dpp v5, v1, v2, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] + +v_mad_u16_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x80,0x41,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_mad_u32_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x59,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u32_u16_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x59,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u32_u16_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x59,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u32_u16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x59,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_mad_u32_u16_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x59,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_mad_u32_u16_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_mad_u32_u16_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_mad_u32_u16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_mad_u32_u16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_mad_u32_u16_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX12: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_mad_u32_u16_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX12: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_mad_u32_u16_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX12: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_mad_u32_u16_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_mad_u32_u16_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x59,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_mad_u32_u16_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_mad_u32_u16_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x80,0x59,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_mad_u32_u24_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x0b,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX12: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX12: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX12: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_mad_u32_u24_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_mad_u32_u24_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x80,0x0b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_max3_num_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_num_f16_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_max3_num_f16_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x2c,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_max3_num_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_max3_num_f16_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_max3_num_f16_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_max3_num_f16_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_max3_num_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_max3_num_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_max3_num_f16_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 +// GFX12: [0x05,0x01,0x2c,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +v_max3_num_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 +// GFX12: [0x05,0x02,0x2c,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +v_max3_num_f16_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 +// GFX12: [0x05,0x04,0x2c,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_max3_num_f16_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x03,0x2c,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_max3_num_f16_e64_dpp v5, -|v1|, v2, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x05,0x2c,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] + +v_max3_num_f16_e64_dpp v5, v1, -|v2|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x06,0x2c,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13] + +v_max3_num_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x87,0x2c,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30] + +v_max3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_num_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_max3_num_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x2a,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_max3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_max3_num_f32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_max3_num_f32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_max3_num_f32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_max3_num_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_max3_num_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_max3_num_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 +// GFX12: [0x05,0x01,0x2a,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +v_max3_num_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 +// GFX12: [0x05,0x02,0x2a,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +v_max3_num_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 +// GFX12: [0x05,0x04,0x2a,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_max3_num_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x03,0x2a,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_max3_num_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x05,0x2a,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] + +v_max3_num_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x06,0x2a,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] + +v_max3_num_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x87,0x2a,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] + +v_max3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_i16_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_max3_i16_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x4d,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_max3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_max3_i16_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_max3_i16_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX12: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_max3_i16_e64_dpp v5, v1, v2, v255 row_shl:1 +// GFX12: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] + +v_max3_i16_e64_dpp v5, v1, v2, s105 row_shl:15 +// GFX12: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x0f,0x01,0xff] + +v_max3_i16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 +// GFX12: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] + +v_max3_i16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 +// GFX12: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] + +v_max3_i16_e64_dpp v5, v1, v2, ttmp15 row_ror:1 +// GFX12: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xee,0x01,0x01,0x21,0x01,0xff] + +v_max3_i16_e64_dpp v5, v1, v2, exec_hi row_ror:15 +// GFX12: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] + +v_max3_i16_e64_dpp v5, v1, v2, exec_lo row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] + +v_max3_i16_e64_dpp v5, v1, v2, null row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +v_max3_i16_e64_dpp v5, v1, v2, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] + +v_max3_i16_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x00,0x4d,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_max3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_i32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_max3_i32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x1d,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX12: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX12: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX12: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_max3_i32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_max3_i32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x00,0x1d,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_max3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_u16_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_max3_u16_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x4e,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_max3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_max3_u16_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_max3_u16_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX12: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_max3_u16_e64_dpp v5, v1, v2, v255 row_shl:1 +// GFX12: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] + +v_max3_u16_e64_dpp v5, v1, v2, s105 row_shl:15 +// GFX12: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x0f,0x01,0xff] + +v_max3_u16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 +// GFX12: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] + +v_max3_u16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 +// GFX12: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] + +v_max3_u16_e64_dpp v5, v1, v2, ttmp15 row_ror:1 +// GFX12: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xee,0x01,0x01,0x21,0x01,0xff] + +v_max3_u16_e64_dpp v5, v1, v2, exec_hi row_ror:15 +// GFX12: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] + +v_max3_u16_e64_dpp v5, v1, v2, exec_lo row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] + +v_max3_u16_e64_dpp v5, v1, v2, null row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +v_max3_u16_e64_dpp v5, v1, v2, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] + +v_max3_u16_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x00,0x4e,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_max3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_max3_u32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x1e,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX12: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX12: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX12: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_max3_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_max3_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x00,0x1e,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_max_i16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_max_i16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_max_i16_e64_dpp v5, v1, v2 row_mirror +// GFX12: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_max_i16_e64_dpp v5, v1, v2 row_half_mirror +// GFX12: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_max_i16_e64_dpp v5, v1, v2 row_shl:1 +// GFX12: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_max_i16_e64_dpp v5, v1, v2 row_shl:15 +// GFX12: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_max_i16_e64_dpp v5, v1, v2 row_shr:1 +// GFX12: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_max_i16_e64_dpp v5, v1, v2 row_shr:15 +// GFX12: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_max_i16_e64_dpp v5, v1, v2 row_ror:1 +// GFX12: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_max_i16_e64_dpp v5, v1, v2 row_ror:15 +// GFX12: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_max_i16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_max_i16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_max_i16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_max_i16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x00,0x0a,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_max_u16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_max_u16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_max_u16_e64_dpp v5, v1, v2 row_mirror +// GFX12: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_max_u16_e64_dpp v5, v1, v2 row_half_mirror +// GFX12: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_max_u16_e64_dpp v5, v1, v2 row_shl:1 +// GFX12: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_max_u16_e64_dpp v5, v1, v2 row_shl:15 +// GFX12: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_max_u16_e64_dpp v5, v1, v2 row_shr:1 +// GFX12: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_max_u16_e64_dpp v5, v1, v2 row_shr:15 +// GFX12: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_max_u16_e64_dpp v5, v1, v2 row_ror:1 +// GFX12: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_max_u16_e64_dpp v5, v1, v2 row_ror:15 +// GFX12: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_max_u16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_max_u16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_max_u16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_max_u16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x00,0x09,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_maxmin_num_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f16_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f16_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x6b,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_maxmin_num_f16_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_maxmin_num_f16_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_maxmin_num_f16_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_maxmin_num_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_maxmin_num_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_maxmin_num_f16_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 +// GFX12: [0x05,0x01,0x6b,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +v_maxmin_num_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 +// GFX12: [0x05,0x02,0x6b,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +v_maxmin_num_f16_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 +// GFX12: [0x05,0x04,0x6b,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_maxmin_num_f16_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x03,0x6b,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_maxmin_num_f16_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x05,0x6b,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] + +v_maxmin_num_f16_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x06,0x6b,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] + +v_maxmin_num_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x87,0x6b,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] + +v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x69,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x69,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x69,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x69,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x69,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_maxmin_num_f32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x69,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_maxmin_num_f32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x69,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_maxmin_num_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x69,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_maxmin_num_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x69,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_maxmin_num_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 +// GFX12: [0x05,0x01,0x69,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +v_maxmin_num_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 +// GFX12: [0x05,0x02,0x69,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +v_maxmin_num_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 +// GFX12: [0x05,0x04,0x69,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_maxmin_num_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x03,0x69,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_maxmin_num_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x05,0x69,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] + +v_maxmin_num_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x06,0x69,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] + +v_maxmin_num_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x87,0x69,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] + +v_maxmin_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x64,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_i32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x64,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_i32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x64,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x64,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x64,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX12: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX12: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX12: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x64,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_maxmin_i32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_maxmin_i32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x00,0x64,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_maxmin_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x62,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x62,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_u32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x62,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x62,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x62,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX12: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX12: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX12: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x62,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_maxmin_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_maxmin_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x00,0x62,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_mirror +// GFX12: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_half_mirror +// GFX12: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_shl:1 +// GFX12: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_shl:15 +// GFX12: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_shr:1 +// GFX12: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_shr:15 +// GFX12: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_ror:1 +// GFX12: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_ror:15 +// GFX12: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_mbcnt_hi_u32_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x00,0x20,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_mirror +// GFX12: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_half_mirror +// GFX12: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_shl:1 +// GFX12: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_shl:15 +// GFX12: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_shr:1 +// GFX12: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_shr:15 +// GFX12: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_ror:1 +// GFX12: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_ror:15 +// GFX12: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_mbcnt_lo_u32_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x00,0x1f,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_med3_num_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x32,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_med3_num_f16_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x32,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_med3_num_f16_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x32,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_med3_num_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x32,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_med3_num_f16_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x32,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_med3_num_f16_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x32,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_med3_num_f16_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x32,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_med3_num_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x32,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_med3_num_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x32,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_med3_num_f16_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 +// GFX12: [0x05,0x01,0x32,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +v_med3_num_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 +// GFX12: [0x05,0x02,0x32,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +v_med3_num_f16_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 +// GFX12: [0x05,0x04,0x32,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_med3_num_f16_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x03,0x32,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_med3_num_f16_e64_dpp v5, -|v1|, v2, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x05,0x32,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] + +v_med3_num_f16_e64_dpp v5, v1, -|v2|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x06,0x32,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13] + +v_med3_num_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x87,0x32,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30] + +v_med3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x31,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_med3_num_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x31,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_med3_num_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x31,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_med3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x31,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_med3_num_f32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x31,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_med3_num_f32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x31,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_med3_num_f32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x31,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_med3_num_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x31,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_med3_num_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x31,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_med3_num_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 +// GFX12: [0x05,0x01,0x31,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +v_med3_num_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 +// GFX12: [0x05,0x02,0x31,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +v_med3_num_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 +// GFX12: [0x05,0x04,0x31,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_med3_num_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x03,0x31,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_med3_num_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x05,0x31,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] + +v_med3_num_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x06,0x31,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] + +v_med3_num_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x87,0x31,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] + +v_med3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_med3_i16_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_med3_i16_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x50,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_med3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_med3_i16_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_med3_i16_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX12: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_med3_i16_e64_dpp v5, v1, v2, v255 row_shl:1 +// GFX12: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] + +v_med3_i16_e64_dpp v5, v1, v2, s105 row_shl:15 +// GFX12: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x0f,0x01,0xff] + +v_med3_i16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 +// GFX12: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] + +v_med3_i16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 +// GFX12: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] + +v_med3_i16_e64_dpp v5, v1, v2, ttmp15 row_ror:1 +// GFX12: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xee,0x01,0x01,0x21,0x01,0xff] + +v_med3_i16_e64_dpp v5, v1, v2, exec_hi row_ror:15 +// GFX12: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] + +v_med3_i16_e64_dpp v5, v1, v2, exec_lo row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] + +v_med3_i16_e64_dpp v5, v1, v2, null row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +v_med3_i16_e64_dpp v5, v1, v2, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] + +v_med3_i16_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x00,0x50,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_med3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x20,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_med3_i32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x20,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_med3_i32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x20,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_med3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x20,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_med3_i32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x20,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_med3_i32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_med3_i32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_med3_i32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_med3_i32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_med3_i32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX12: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_med3_i32_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX12: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_med3_i32_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX12: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_med3_i32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_med3_i32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x20,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_med3_i32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_med3_i32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x00,0x20,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_med3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_med3_u16_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_med3_u16_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x51,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_med3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_med3_u16_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_med3_u16_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX12: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_med3_u16_e64_dpp v5, v1, v2, v255 row_shl:1 +// GFX12: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] + +v_med3_u16_e64_dpp v5, v1, v2, s105 row_shl:15 +// GFX12: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x0f,0x01,0xff] + +v_med3_u16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 +// GFX12: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] + +v_med3_u16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 +// GFX12: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] + +v_med3_u16_e64_dpp v5, v1, v2, ttmp15 row_ror:1 +// GFX12: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xee,0x01,0x01,0x21,0x01,0xff] + +v_med3_u16_e64_dpp v5, v1, v2, exec_hi row_ror:15 +// GFX12: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] + +v_med3_u16_e64_dpp v5, v1, v2, exec_lo row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] + +v_med3_u16_e64_dpp v5, v1, v2, null row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +v_med3_u16_e64_dpp v5, v1, v2, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] + +v_med3_u16_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x00,0x51,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_med3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x21,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_med3_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x21,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_med3_u32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x21,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_med3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x21,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_med3_u32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x21,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_med3_u32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_med3_u32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_med3_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_med3_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_med3_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX12: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_med3_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX12: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_med3_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX12: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_med3_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_med3_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x21,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_med3_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_med3_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x00,0x21,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_min3_num_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_min3_num_f16_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_min3_num_f16_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x2b,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_min3_num_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_min3_num_f16_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_min3_num_f16_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_min3_num_f16_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_min3_num_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_min3_num_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_min3_num_f16_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 +// GFX12: [0x05,0x01,0x2b,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +v_min3_num_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 +// GFX12: [0x05,0x02,0x2b,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +v_min3_num_f16_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 +// GFX12: [0x05,0x04,0x2b,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_min3_num_f16_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x03,0x2b,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_min3_num_f16_e64_dpp v5, -|v1|, v2, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x05,0x2b,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] + +v_min3_num_f16_e64_dpp v5, v1, -|v2|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x06,0x2b,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13] + +v_min3_num_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x87,0x2b,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30] + +v_min3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x29,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_min3_num_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x29,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_min3_num_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x29,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_min3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x29,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_min3_num_f32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x29,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_min3_num_f32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x29,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_min3_num_f32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x29,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_min3_num_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x29,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_min3_num_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x29,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_min3_num_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 +// GFX12: [0x05,0x01,0x29,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +v_min3_num_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 +// GFX12: [0x05,0x02,0x29,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +v_min3_num_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 +// GFX12: [0x05,0x04,0x29,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_min3_num_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x03,0x29,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_min3_num_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x05,0x29,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] + +v_min3_num_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x06,0x29,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] + +v_min3_num_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x87,0x29,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] + +v_min3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_min3_i16_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_min3_i16_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x4a,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_min3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_min3_i16_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_min3_i16_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX12: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_min3_i16_e64_dpp v5, v1, v2, v255 row_shl:1 +// GFX12: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] + +v_min3_i16_e64_dpp v5, v1, v2, s105 row_shl:15 +// GFX12: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x0f,0x01,0xff] + +v_min3_i16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 +// GFX12: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] + +v_min3_i16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 +// GFX12: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] + +v_min3_i16_e64_dpp v5, v1, v2, ttmp15 row_ror:1 +// GFX12: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xee,0x01,0x01,0x21,0x01,0xff] + +v_min3_i16_e64_dpp v5, v1, v2, exec_hi row_ror:15 +// GFX12: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] + +v_min3_i16_e64_dpp v5, v1, v2, exec_lo row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] + +v_min3_i16_e64_dpp v5, v1, v2, null row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +v_min3_i16_e64_dpp v5, v1, v2, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] + +v_min3_i16_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x00,0x4a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_min3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_min3_i32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_min3_i32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x1a,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_min3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_min3_i32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_min3_i32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_min3_i32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_min3_i32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_min3_i32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_min3_i32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX12: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_min3_i32_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX12: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_min3_i32_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX12: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_min3_i32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_min3_i32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_min3_i32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_min3_i32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x00,0x1a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_min3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_min3_u16_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_min3_u16_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x4b,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_min3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_min3_u16_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_min3_u16_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX12: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_min3_u16_e64_dpp v5, v1, v2, v255 row_shl:1 +// GFX12: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] + +v_min3_u16_e64_dpp v5, v1, v2, s105 row_shl:15 +// GFX12: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x0f,0x01,0xff] + +v_min3_u16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 +// GFX12: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] + +v_min3_u16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 +// GFX12: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] + +v_min3_u16_e64_dpp v5, v1, v2, ttmp15 row_ror:1 +// GFX12: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xee,0x01,0x01,0x21,0x01,0xff] + +v_min3_u16_e64_dpp v5, v1, v2, exec_hi row_ror:15 +// GFX12: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] + +v_min3_u16_e64_dpp v5, v1, v2, exec_lo row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] + +v_min3_u16_e64_dpp v5, v1, v2, null row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +v_min3_u16_e64_dpp v5, v1, v2, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] + +v_min3_u16_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x00,0x4b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_min3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_min3_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_min3_u32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x1b,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_min3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_min3_u32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_min3_u32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_min3_u32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_min3_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_min3_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_min3_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX12: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_min3_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX12: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_min3_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX12: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_min3_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_min3_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_min3_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_min3_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x00,0x1b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_min_i16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_min_i16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_min_i16_e64_dpp v5, v1, v2 row_mirror +// GFX12: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_min_i16_e64_dpp v5, v1, v2 row_half_mirror +// GFX12: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_min_i16_e64_dpp v5, v1, v2 row_shl:1 +// GFX12: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_min_i16_e64_dpp v5, v1, v2 row_shl:15 +// GFX12: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_min_i16_e64_dpp v5, v1, v2 row_shr:1 +// GFX12: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_min_i16_e64_dpp v5, v1, v2 row_shr:15 +// GFX12: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_min_i16_e64_dpp v5, v1, v2 row_ror:1 +// GFX12: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_min_i16_e64_dpp v5, v1, v2 row_ror:15 +// GFX12: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_min_i16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_min_i16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_min_i16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_min_i16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x00,0x0c,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_min_u16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_min_u16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_min_u16_e64_dpp v5, v1, v2 row_mirror +// GFX12: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_min_u16_e64_dpp v5, v1, v2 row_half_mirror +// GFX12: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_min_u16_e64_dpp v5, v1, v2 row_shl:1 +// GFX12: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_min_u16_e64_dpp v5, v1, v2 row_shl:15 +// GFX12: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_min_u16_e64_dpp v5, v1, v2 row_shr:1 +// GFX12: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_min_u16_e64_dpp v5, v1, v2 row_shr:15 +// GFX12: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_min_u16_e64_dpp v5, v1, v2 row_ror:1 +// GFX12: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_min_u16_e64_dpp v5, v1, v2 row_ror:15 +// GFX12: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_min_u16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_min_u16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_min_u16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_min_u16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x00,0x0b,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_minmax_num_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_minmax_num_f16_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_minmax_num_f16_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x6a,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_minmax_num_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_minmax_num_f16_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_minmax_num_f16_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_minmax_num_f16_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_minmax_num_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_minmax_num_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_minmax_num_f16_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 +// GFX12: [0x05,0x01,0x6a,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +v_minmax_num_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 +// GFX12: [0x05,0x02,0x6a,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +v_minmax_num_f16_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 +// GFX12: [0x05,0x04,0x6a,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_minmax_num_f16_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x03,0x6a,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_minmax_num_f16_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x05,0x6a,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] + +v_minmax_num_f16_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x06,0x6a,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] + +v_minmax_num_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x87,0x6a,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] + +v_minmax_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x68,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_minmax_num_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x68,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_minmax_num_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x68,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_minmax_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x68,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_minmax_num_f32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x68,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_minmax_num_f32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x68,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_minmax_num_f32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x68,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_minmax_num_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x68,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_minmax_num_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x68,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_minmax_num_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 +// GFX12: [0x05,0x01,0x68,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +v_minmax_num_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 +// GFX12: [0x05,0x02,0x68,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +v_minmax_num_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 +// GFX12: [0x05,0x04,0x68,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_minmax_num_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x03,0x68,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_minmax_num_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x05,0x68,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] + +v_minmax_num_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x06,0x68,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] + +v_minmax_num_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x87,0x68,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] + +v_minmax_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x65,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_minmax_i32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x65,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_minmax_i32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x65,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_minmax_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x65,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_minmax_i32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x65,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_minmax_i32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_minmax_i32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_minmax_i32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_minmax_i32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_minmax_i32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX12: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_minmax_i32_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX12: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_minmax_i32_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX12: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_minmax_i32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_minmax_i32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x65,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_minmax_i32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_minmax_i32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x00,0x65,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_minmax_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_minmax_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_minmax_u32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x63,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_minmax_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_minmax_u32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_minmax_u32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_minmax_u32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_minmax_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_minmax_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_minmax_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX12: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_minmax_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX12: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_minmax_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX12: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_minmax_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_minmax_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_minmax_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_minmax_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x00,0x63,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_msad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x39,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_msad_u8_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x39,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_msad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x39,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_msad_u8_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x39,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_msad_u8_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_msad_u8_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_msad_u8_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_msad_u8_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_msad_u8_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX12: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_msad_u8_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX12: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_msad_u8_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX12: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_msad_u8_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_msad_u8_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x39,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_msad_u8_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_msad_u8_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x80,0x39,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_mul_lo_u16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_mul_lo_u16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_mul_lo_u16_e64_dpp v5, v1, v2 row_mirror +// GFX12: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_mul_lo_u16_e64_dpp v5, v1, v2 row_half_mirror +// GFX12: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_mul_lo_u16_e64_dpp v5, v1, v2 row_shl:1 +// GFX12: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_mul_lo_u16_e64_dpp v5, v1, v2 row_shl:15 +// GFX12: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_mul_lo_u16_e64_dpp v5, v1, v2 row_shr:1 +// GFX12: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_mul_lo_u16_e64_dpp v5, v1, v2 row_shr:15 +// GFX12: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_mul_lo_u16_e64_dpp v5, v1, v2 row_ror:1 +// GFX12: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_mul_lo_u16_e64_dpp v5, v1, v2 row_ror:15 +// GFX12: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_mul_lo_u16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_mul_lo_u16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_mul_lo_u16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_mul_lo_u16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x00,0x05,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_mullit_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x18,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mullit_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x18,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_mullit_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x18,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_mullit_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x18,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_mullit_f32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x18,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_mullit_f32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x18,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_mullit_f32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x18,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_mullit_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x18,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_mullit_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x18,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_mullit_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 +// GFX12: [0x05,0x01,0x18,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +v_mullit_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 +// GFX12: [0x05,0x02,0x18,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +v_mullit_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 +// GFX12: [0x05,0x04,0x18,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_mullit_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x03,0x18,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_mullit_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x05,0x18,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] + +v_mullit_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x06,0x18,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] + +v_mullit_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x87,0x18,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] + +v_or3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x58,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_or3_b32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x58,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_or3_b32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x58,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_or3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x58,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_or3_b32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x58,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_or3_b32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x58,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_or3_b32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x58,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_or3_b32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x58,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_or3_b32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x58,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_or3_b32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX12: [0x05,0x00,0x58,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_or3_b32_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX12: [0x05,0x00,0x58,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_or3_b32_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX12: [0x05,0x00,0x58,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_or3_b32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x58,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_or3_b32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x58,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_or3_b32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x58,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_or3_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x00,0x58,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_or_b16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_or_b16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_or_b16_e64_dpp v5, v1, v2 row_mirror +// GFX12: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_or_b16_e64_dpp v5, v1, v2 row_half_mirror +// GFX12: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_or_b16_e64_dpp v5, v1, v2 row_shl:1 +// GFX12: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_or_b16_e64_dpp v5, v1, v2 row_shl:15 +// GFX12: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_or_b16_e64_dpp v5, v1, v2 row_shr:1 +// GFX12: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_or_b16_e64_dpp v5, v1, v2 row_shr:15 +// GFX12: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_or_b16_e64_dpp v5, v1, v2 row_ror:1 +// GFX12: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_or_b16_e64_dpp v5, v1, v2 row_ror:15 +// GFX12: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_or_b16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_or_b16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_or_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_or_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x00,0x63,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_pack_b32_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_pack_b32_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_pack_b32_f16_e64_dpp v5, v1, v2 row_mirror +// GFX12: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_pack_b32_f16_e64_dpp v5, v1, v2 row_half_mirror +// GFX12: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_pack_b32_f16_e64_dpp v5, v1, v2 row_shl:1 +// GFX12: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_pack_b32_f16_e64_dpp v5, v1, v2 row_shl:15 +// GFX12: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_pack_b32_f16_e64_dpp v5, v1, v2 row_shr:1 +// GFX12: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_pack_b32_f16_e64_dpp v5, v1, v2 row_shr:15 +// GFX12: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_pack_b32_f16_e64_dpp v5, v1, v2 row_ror:1 +// GFX12: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_pack_b32_f16_e64_dpp v5, v1, v2 row_ror:15 +// GFX12: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_pack_b32_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_pack_b32_f16_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x01,0x11,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] + +v_pack_b32_f16_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x02,0x11,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] + +v_pack_b32_f16_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x03,0x11,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +v_perm_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x44,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_perm_b32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x44,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_perm_b32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x44,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_perm_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x44,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_perm_b32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x44,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_perm_b32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x44,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_perm_b32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x44,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_perm_b32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x44,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_perm_b32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x44,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_perm_b32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX12: [0x05,0x00,0x44,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_perm_b32_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX12: [0x05,0x00,0x44,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_perm_b32_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX12: [0x05,0x00,0x44,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_perm_b32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x44,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_perm_b32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x44,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_perm_b32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x44,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_perm_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x00,0x44,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_sad_hi_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x23,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_sad_hi_u8_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x23,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_sad_hi_u8_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x23,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_sad_hi_u8_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x23,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_sad_hi_u8_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x23,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_sad_hi_u8_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x23,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_sad_hi_u8_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x23,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_sad_hi_u8_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x23,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_sad_hi_u8_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX12: [0x05,0x00,0x23,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_sad_hi_u8_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX12: [0x05,0x00,0x23,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_sad_hi_u8_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX12: [0x05,0x00,0x23,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_sad_hi_u8_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x23,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_sad_hi_u8_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x23,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_sad_hi_u8_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x23,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_sad_hi_u8_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x80,0x23,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_sad_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x24,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_sad_u16_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x24,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_sad_u16_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x24,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_sad_u16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x24,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_sad_u16_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x24,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_sad_u16_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_sad_u16_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_sad_u16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_sad_u16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_sad_u16_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX12: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_sad_u16_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX12: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_sad_u16_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX12: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_sad_u16_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_sad_u16_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x24,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_sad_u16_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_sad_u16_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x80,0x24,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_sad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x25,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_sad_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x25,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_sad_u32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x25,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_sad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x25,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_sad_u32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x25,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_sad_u32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_sad_u32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_sad_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_sad_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_sad_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX12: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_sad_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX12: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_sad_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX12: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_sad_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_sad_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x25,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_sad_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_sad_u32_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x80,0x25,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_sad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x22,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_sad_u8_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x22,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_sad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x22,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_sad_u8_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x22,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_sad_u8_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_sad_u8_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_sad_u8_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_sad_u8_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_sad_u8_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX12: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_sad_u8_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX12: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_sad_u8_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX12: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_sad_u8_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_sad_u8_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x22,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_sad_u8_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_sad_u8_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x80,0x22,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_sub_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] +// W32: [0x05,0x06,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[0,1,2,3] +// W32: [0x05,0x06,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_mirror +// W32: [0x05,0x06,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, s6, v1, s2 row_mirror +// W32: [0x05,0x06,0x01,0xd7,0xfa,0x04,0x00,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_half_mirror +// W32: [0x05,0x06,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_shl:1 +// W32: [0x05,0x06,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_shl:15 +// W32: [0x05,0x06,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_shr:1 +// W32: [0x05,0x06,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_shr:15 +// W32: [0x05,0x06,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_ror:1 +// W32: [0x05,0x06,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, s105, v1, v2 row_ror:15 +// W32: [0x05,0x69,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: [0x05,0x6a,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: [0x05,0x6b,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: [0x05,0x7b,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 quad_perm:[3,2,1,0] +// W64: [0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 quad_perm:[0,1,2,3] +// W64: [0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 row_mirror +// W64: [0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 row_half_mirror +// W64: [0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, s[12:13], v1, s2 row_half_mirror +// W64: [0x05,0x0c,0x01,0xd7,0xfa,0x04,0x00,0x00,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shl:1 +// W64: [0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shl:15 +// W64: [0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shr:1 +// W64: [0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shr:15 +// W64: [0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 row_ror:1 +// W64: [0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 row_ror:15 +// W64: [0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: [0x05,0x68,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: [0x05,0x6a,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: [0x05,0x7a,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v255, null, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0xfc,0x01,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_sub_nc_i16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_sub_nc_i16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_sub_nc_i16_e64_dpp v5, v1, v2 row_mirror +// GFX12: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_sub_nc_i16_e64_dpp v5, v1, v2 row_half_mirror +// GFX12: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_sub_nc_i16_e64_dpp v5, v1, v2 row_shl:1 +// GFX12: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_sub_nc_i16_e64_dpp v5, v1, v2 row_shl:15 +// GFX12: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_sub_nc_i16_e64_dpp v5, v1, v2 row_shr:1 +// GFX12: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_sub_nc_i16_e64_dpp v5, v1, v2 row_shr:15 +// GFX12: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_sub_nc_i16_e64_dpp v5, v1, v2 row_ror:1 +// GFX12: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_sub_nc_i16_e64_dpp v5, v1, v2 row_ror:15 +// GFX12: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_sub_nc_i16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_sub_nc_i16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_sub_nc_i16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_sub_nc_i16_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x80,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_sub_nc_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_sub_nc_i32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_sub_nc_i32_e64_dpp v5, v1, v2 row_mirror +// GFX12: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_sub_nc_i32_e64_dpp v5, v1, v2 row_half_mirror +// GFX12: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_sub_nc_i32_e64_dpp v5, v1, v2 row_shl:1 +// GFX12: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_sub_nc_i32_e64_dpp v5, v1, v2 row_shl:15 +// GFX12: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_sub_nc_i32_e64_dpp v5, v1, v2 row_shr:1 +// GFX12: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_sub_nc_i32_e64_dpp v5, v1, v2 row_shr:15 +// GFX12: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_sub_nc_i32_e64_dpp v5, v1, v2 row_ror:1 +// GFX12: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_sub_nc_i32_e64_dpp v5, v1, v2 row_ror:15 +// GFX12: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_sub_nc_i32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_sub_nc_i32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_sub_nc_i32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_sub_nc_i32_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x80,0x25,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_sub_nc_u16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_sub_nc_u16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_sub_nc_u16_e64_dpp v5, v1, v2 row_mirror +// GFX12: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_sub_nc_u16_e64_dpp v5, v1, v2 row_half_mirror +// GFX12: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_sub_nc_u16_e64_dpp v5, v1, v2 row_shl:1 +// GFX12: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_sub_nc_u16_e64_dpp v5, v1, v2 row_shl:15 +// GFX12: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_sub_nc_u16_e64_dpp v5, v1, v2 row_shr:1 +// GFX12: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_sub_nc_u16_e64_dpp v5, v1, v2 row_shr:15 +// GFX12: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_sub_nc_u16_e64_dpp v5, v1, v2 row_ror:1 +// GFX12: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_sub_nc_u16_e64_dpp v5, v1, v2 row_ror:15 +// GFX12: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_sub_nc_u16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_sub_nc_u16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_sub_nc_u16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_sub_nc_u16_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x80,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_subrev_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] +// W32: [0x05,0x06,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[0,1,2,3] +// W32: [0x05,0x06,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_mirror +// W32: [0x05,0x06,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, s6, v1, s2 row_mirror +// W32: [0x05,0x06,0x02,0xd7,0xfa,0x04,0x00,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_half_mirror +// W32: [0x05,0x06,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_shl:1 +// W32: [0x05,0x06,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_shl:15 +// W32: [0x05,0x06,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_shr:1 +// W32: [0x05,0x06,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_shr:15 +// W32: [0x05,0x06,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_ror:1 +// W32: [0x05,0x06,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, s105, v1, v2 row_ror:15 +// W32: [0x05,0x69,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: [0x05,0x6a,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: [0x05,0x6b,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: [0x05,0x7b,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 quad_perm:[3,2,1,0] +// W64: [0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 quad_perm:[0,1,2,3] +// W64: [0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 row_mirror +// W64: [0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 row_half_mirror +// W64: [0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, s[12:13], v1, s2 row_half_mirror +// W64: [0x05,0x0c,0x02,0xd7,0xfa,0x04,0x00,0x00,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shl:1 +// W64: [0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shl:15 +// W64: [0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shr:1 +// W64: [0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shr:15 +// W64: [0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 row_ror:1 +// W64: [0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 row_ror:15 +// W64: [0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: [0x05,0x68,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: [0x05,0x6a,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: [0x05,0x7a,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v255, null, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0xfc,0x02,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_xad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x45,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_xad_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x45,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_xad_u32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x45,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_xad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x45,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_xad_u32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x45,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_xad_u32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x45,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_xad_u32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x45,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_xad_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x45,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_xad_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x45,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_xad_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX12: [0x05,0x00,0x45,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_xad_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX12: [0x05,0x00,0x45,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_xad_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX12: [0x05,0x00,0x45,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_xad_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x45,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_xad_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x45,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_xad_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x45,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_xad_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x00,0x45,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_xor3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x40,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_xor3_b32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x40,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_xor3_b32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x40,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_xor3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x40,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_xor3_b32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x40,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_xor3_b32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x40,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_xor3_b32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x40,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_xor3_b32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x40,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_xor3_b32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x40,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_xor3_b32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX12: [0x05,0x00,0x40,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_xor3_b32_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX12: [0x05,0x00,0x40,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_xor3_b32_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX12: [0x05,0x00,0x40,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_xor3_b32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x40,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_xor3_b32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x40,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_xor3_b32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x40,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_xor3_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x00,0x40,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_xor_b16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_xor_b16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_xor_b16_e64_dpp v5, v1, v2 row_mirror +// GFX12: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_xor_b16_e64_dpp v5, v1, v2 row_half_mirror +// GFX12: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_xor_b16_e64_dpp v5, v1, v2 row_shl:1 +// GFX12: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_xor_b16_e64_dpp v5, v1, v2 row_shl:15 +// GFX12: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_xor_b16_e64_dpp v5, v1, v2 row_shr:1 +// GFX12: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_xor_b16_e64_dpp v5, v1, v2 row_shr:15 +// GFX12: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_xor_b16_e64_dpp v5, v1, v2 row_ror:1 +// GFX12: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_xor_b16_e64_dpp v5, v1, v2 row_ror:15 +// GFX12: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_xor_b16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_xor_b16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_xor_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_xor_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x00,0x64,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX12: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] + +v_add_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX12: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] + +v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX12: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] + +v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX12: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX12: [0x05,0x0a,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] + +v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX12: [0xff,0x13,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX12: [0x05,0x0a,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] + +v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX12: [0xff,0x13,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] + +v_div_fixup_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x7c,0x54,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_div_fixup_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x0b,0x54,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_div_fixup_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x15,0x54,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] + +v_div_fixup_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX12: [0x05,0x26,0x54,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13] + +v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX12: [0xff,0xc7,0x54,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] + +v_fma_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x7c,0x48,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_fma_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x0b,0x48,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_fma_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x15,0x48,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] + +v_fma_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX12: [0x05,0x26,0x48,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13] + +v_fma_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX12: [0xff,0xc7,0x48,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] + +v_mad_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x78,0x53,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] + +v_mad_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x08,0x53,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] + +v_mad_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x10,0x53,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +v_mad_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX12: [0x05,0x20,0x53,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] + +v_mad_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX12: [0xff,0xc0,0x53,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +v_mad_i32_i16_e64_dpp v5, v1, v2, 0.5 op_sel:[1,0,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX12: [0x05,0x08,0x5a,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] + +v_mad_i32_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,1,0,0] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX12: [0xff,0x90,0x5a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +v_mad_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x78,0x41,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] + +v_mad_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x08,0x41,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] + +v_mad_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x10,0x41,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +v_mad_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX12: [0x05,0x20,0x41,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] + +v_mad_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX12: [0xff,0xc0,0x41,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +v_mad_u32_u16_e64_dpp v5, v1, v2, 0.5 op_sel:[1,0,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX12: [0x05,0x08,0x59,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] + +v_mad_u32_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,1,0,0] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX12: [0xff,0x90,0x59,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +v_max3_num_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x7c,0x2c,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_max3_num_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x0b,0x2c,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_max3_num_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x15,0x2c,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] + +v_max3_num_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX12: [0x05,0x26,0x2c,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13] + +v_max3_num_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX12: [0xff,0xc7,0x2c,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] + +v_max3_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x78,0x4d,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] + +v_max3_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x08,0x4d,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] + +v_max3_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x10,0x4d,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +v_max3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX12: [0x05,0x20,0x4d,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] + +v_max3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX12: [0xff,0x40,0x4d,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +v_max3_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x78,0x4e,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] + +v_max3_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x08,0x4e,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] + +v_max3_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x10,0x4e,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +v_max3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX12: [0x05,0x20,0x4e,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] + +v_max3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX12: [0xff,0x40,0x4e,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +v_med3_num_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x7c,0x32,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_med3_num_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x0b,0x32,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_med3_num_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x15,0x32,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] + +v_med3_num_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX12: [0x05,0x26,0x32,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13] + +v_med3_num_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX12: [0xff,0xc7,0x32,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] + +v_med3_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x78,0x50,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] + +v_med3_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x08,0x50,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] + +v_med3_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x10,0x50,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +v_med3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX12: [0x05,0x20,0x50,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] + +v_med3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX12: [0xff,0x40,0x50,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +v_med3_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x78,0x51,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] + +v_med3_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x08,0x51,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] + +v_med3_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x10,0x51,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +v_med3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX12: [0x05,0x20,0x51,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] + +v_med3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX12: [0xff,0x40,0x51,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +v_min3_num_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x7c,0x2b,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_min3_num_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x0b,0x2b,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_min3_num_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x15,0x2b,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] + +v_min3_num_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX12: [0x05,0x26,0x2b,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13] + +v_min3_num_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX12: [0xff,0xc7,0x2b,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] + +v_min3_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x78,0x4a,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] + +v_min3_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x08,0x4a,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] + +v_min3_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x10,0x4a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +v_min3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX12: [0x05,0x20,0x4a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] + +v_min3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX12: [0xff,0x40,0x4a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +v_min3_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x78,0x4b,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] + +v_min3_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x08,0x4b,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] + +v_min3_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x10,0x4b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +v_min3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX12: [0x05,0x20,0x4b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] + +v_min3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX12: [0xff,0x40,0x4b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +v_pack_b32_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX12: [0x05,0x0a,0x11,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] + +v_pack_b32_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX12: [0xff,0x13,0x11,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] + +v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX12: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] + +v_sub_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX12: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] + +v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX12: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] + +v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX12: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] + +v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX12: [0x00,0x00,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] + +v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 op_sel:[1,1,0,0] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX12-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid op_sel operand + +v_dot2_f16_f16_e64_dpp v0, s1, v2, v3 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX12-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_dot2_f16_f16_e64_dpp v0, v1, s2, v3 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX12: [0x00,0x00,0x66,0xd6,0xfa,0x04,0x0c,0x04,0x01,0xe4,0x04,0x00] + +v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX12: [0x00,0x60,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] + +v_dot2_f16_f16_e64_dpp v0, |v1|, -v2, -|s3| op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX12: [0x00,0x65,0x66,0xd6,0xfa,0x04,0x0e,0xc0,0x01,0xe4,0x04,0x00] + +v_dot2_f16_f16_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x66,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] + +v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX12: [0x00,0x00,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] + +v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 op_sel:[1,1,0,0] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX12-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid op_sel operand + +v_dot2_bf16_bf16_e64_dpp v0, s1, v2, v3 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// GFX12-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_dot2_bf16_bf16_e64_dpp v0, v1, s2, v3 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// GFX12: [0x00,0x00,0x67,0xd6,0xfa,0x04,0x0c,0x04,0x01,0xe4,0x00,0x00] + +v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX12: [0x00,0x60,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] + +v_dot2_bf16_bf16_e64_dpp v0, |v1|, -v2, -|s3| op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX12: [0x00,0x65,0x67,0xd6,0xfa,0x04,0x0e,0xc0,0x01,0xe4,0x04,0x00] + +v_dot2_bf16_bf16_e64_dpp v5, v1, v2, 0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x67,0xd6,0xfa,0x04,0x02,0x02,0x01,0x1b,0x00,0xff] + +v_minimum_f32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_minimum_f32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_minimum_f32 v5, v1, v2 row_mirror +// GFX12: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_minimum_f32 v5, v1, v2 row_half_mirror +// GFX12: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_minimum_f32 v5, v1, v2 row_shl:1 +// GFX12: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_minimum_f32 v5, v1, v2 row_shl:15 +// GFX12: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_minimum_f32 v5, v1, v2 row_shr:1 +// GFX12: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_minimum_f32 v5, v1, v2 row_shr:15 +// GFX12: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_minimum_f32 v5, v1, v2 row_ror:1 +// GFX12: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_minimum_f32 v5, v1, v2 row_ror:15 +// GFX12: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_minimum_f32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_minimum_f32 v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x01,0x65,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] + +v_minimum_f32 v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x02,0x65,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] + +v_minimum_f32 v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x03,0x65,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +v_maximum_f32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_maximum_f32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_maximum_f32 v5, v1, v2 row_mirror +// GFX12: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_maximum_f32 v5, v1, v2 row_half_mirror +// GFX12: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_maximum_f32 v5, v1, v2 row_shl:1 +// GFX12: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_maximum_f32 v5, v1, v2 row_shl:15 +// GFX12: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_maximum_f32 v5, v1, v2 row_shr:1 +// GFX12: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_maximum_f32 v5, v1, v2 row_shr:15 +// GFX12: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_maximum_f32 v5, v1, v2 row_ror:1 +// GFX12: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_maximum_f32 v5, v1, v2 row_ror:15 +// GFX12: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_maximum_f32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_maximum_f32 v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x01,0x66,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] + +v_maximum_f32 v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x02,0x66,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] + +v_maximum_f32 v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x03,0x66,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +v_minimum_f16 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_minimum_f16 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_minimum_f16 v5, v1, v2 row_mirror +// GFX12: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_minimum_f16 v5, v1, v2 row_half_mirror +// GFX12: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_minimum_f16 v5, v1, v2 row_shl:1 +// GFX12: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_minimum_f16 v5, v1, v2 row_shl:15 +// GFX12: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_minimum_f16 v5, v1, v2 row_shr:1 +// GFX12: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_minimum_f16 v5, v1, v2 row_shr:15 +// GFX12: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_minimum_f16 v5, v1, v2 row_ror:1 +// GFX12: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_minimum_f16 v5, v1, v2 row_ror:15 +// GFX12: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_minimum_f16 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_minimum_f16 v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x01,0x67,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] + +v_minimum_f16 v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x02,0x67,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] + +v_minimum_f16 v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x03,0x67,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +v_maximum_f16 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_maximum_f16 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_maximum_f16 v5, v1, v2 row_mirror +// GFX12: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_maximum_f16 v5, v1, v2 row_half_mirror +// GFX12: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_maximum_f16 v5, v1, v2 row_shl:1 +// GFX12: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_maximum_f16 v5, v1, v2 row_shl:15 +// GFX12: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_maximum_f16 v5, v1, v2 row_shr:1 +// GFX12: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_maximum_f16 v5, v1, v2 row_shr:15 +// GFX12: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_maximum_f16 v5, v1, v2 row_ror:1 +// GFX12: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_maximum_f16 v5, v1, v2 row_ror:15 +// GFX12: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_maximum_f16 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_maximum_f16 v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x01,0x68,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] + +v_maximum_f16 v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x02,0x68,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] + +v_maximum_f16 v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x03,0x68,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +v_minimum3_f32 v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_minimum3_f32 v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_minimum3_f32 v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x2d,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_minimum3_f32 v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_minimum3_f32 v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_minimum3_f32 v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_minimum3_f32 v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_minimum3_f32 v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_minimum3_f32 v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_minimum3_f32 v5, |v1|, v2, -ttmp15 row_shr:15 +// GFX12: [0x05,0x01,0x2d,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +v_minimum3_f32 v5, v1, -|v2|, exec_hi row_ror:1 +// GFX12: [0x05,0x02,0x2d,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +v_minimum3_f32 v5, -v1, v2, |exec_lo| row_ror:15 +// GFX12: [0x05,0x04,0x2d,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_minimum3_f32 v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x03,0x2d,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_minimum3_f32 v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x05,0x2d,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] + +v_minimum3_f32 v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x06,0x2d,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] + +v_minimum3_f32 v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x87,0x2d,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] + +v_maximum3_f32 v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maximum3_f32 v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_maximum3_f32 v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x2e,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_maximum3_f32 v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_maximum3_f32 v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_maximum3_f32 v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_maximum3_f32 v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_maximum3_f32 v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_maximum3_f32 v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_maximum3_f32 v5, |v1|, v2, -ttmp15 row_shr:15 +// GFX12: [0x05,0x01,0x2e,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +v_maximum3_f32 v5, v1, -|v2|, exec_hi row_ror:1 +// GFX12: [0x05,0x02,0x2e,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +v_maximum3_f32 v5, -v1, v2, |exec_lo| row_ror:15 +// GFX12: [0x05,0x04,0x2e,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_maximum3_f32 v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x03,0x2e,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_maximum3_f32 v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x05,0x2e,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] + +v_maximum3_f32 v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x06,0x2e,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] + +v_maximum3_f32 v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x87,0x2e,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] + +v_minimum3_f16 v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_minimum3_f16 v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_minimum3_f16 v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x2f,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_minimum3_f16 v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_minimum3_f16 v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_minimum3_f16 v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_minimum3_f16 v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_minimum3_f16 v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_minimum3_f16 v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_minimum3_f16 v5, |v1|, v2, -ttmp15 row_shr:15 +// GFX12: [0x05,0x01,0x2f,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +v_minimum3_f16 v5, v1, -|v2|, exec_hi row_ror:1 +// GFX12: [0x05,0x02,0x2f,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +v_minimum3_f16 v5, -v1, v2, |exec_lo| row_ror:15 +// GFX12: [0x05,0x04,0x2f,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_minimum3_f16 v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x03,0x2f,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_minimum3_f16 v5, -|v1|, v2, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x05,0x2f,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] + +v_minimum3_f16 v5, v1, -|v2|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x06,0x2f,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13] + +v_minimum3_f16 v255, -|v255|, -|v255|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x87,0x2f,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30] + +v_maximum3_f16 v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maximum3_f16 v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x30,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_maximum3_f16 v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x30,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_maximum3_f16 v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_maximum3_f16 v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_maximum3_f16 v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x30,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_maximum3_f16 v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x30,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_maximum3_f16 v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x30,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_maximum3_f16 v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x30,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_maximum3_f16 v5, |v1|, v2, -ttmp15 row_shr:15 +// GFX12: [0x05,0x01,0x30,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +v_maximum3_f16 v5, v1, -|v2|, exec_hi row_ror:1 +// GFX12: [0x05,0x02,0x30,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +v_maximum3_f16 v5, -v1, v2, |exec_lo| row_ror:15 +// GFX12: [0x05,0x04,0x30,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_maximum3_f16 v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x03,0x30,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_maximum3_f16 v5, -|v1|, v2, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x05,0x30,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] + +v_maximum3_f16 v5, v1, -|v2|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x06,0x30,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13] + +v_maximum3_f16 v255, -|v255|, -|v255|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x87,0x30,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30] + +v_maximumminimum_f32 v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_maximumminimum_f32 v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_maximumminimum_f32 v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_maximumminimum_f32 v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_maximumminimum_f32 v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_maximumminimum_f32 v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_maximumminimum_f32 v5, |v1|, v2, -ttmp15 row_shr:15 +// GFX12: [0x05,0x01,0x6d,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +v_maximumminimum_f32 v5, v1, -|v2|, exec_hi row_ror:1 +// GFX12: [0x05,0x02,0x6d,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +v_maximumminimum_f32 v5, -v1, v2, |exec_lo| row_ror:15 +// GFX12: [0x05,0x04,0x6d,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_maximumminimum_f32 v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x03,0x6d,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_maximumminimum_f32 v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x05,0x6d,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] + +v_maximumminimum_f32 v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x06,0x6d,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] + +v_maximumminimum_f32 v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x87,0x6d,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] + +v_minimummaximum_f32 v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_minimummaximum_f32 v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_minimummaximum_f32 v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x6c,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_minimummaximum_f32 v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_minimummaximum_f32 v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_minimummaximum_f32 v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_minimummaximum_f32 v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_minimummaximum_f32 v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_minimummaximum_f32 v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_minimummaximum_f32 v5, |v1|, v2, -ttmp15 row_shr:15 +// GFX12: [0x05,0x01,0x6c,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +v_minimummaximum_f32 v5, v1, -|v2|, exec_hi row_ror:1 +// GFX12: [0x05,0x02,0x6c,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +v_minimummaximum_f32 v5, -v1, v2, |exec_lo| row_ror:15 +// GFX12: [0x05,0x04,0x6c,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_minimummaximum_f32 v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x03,0x6c,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_minimummaximum_f32 v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x05,0x6c,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] + +v_minimummaximum_f32 v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x06,0x6c,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] + +v_minimummaximum_f32 v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x87,0x6c,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] + +v_maximumminimum_f16 v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f16 v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f16 v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x6f,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f16 v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_maximumminimum_f16 v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_maximumminimum_f16 v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_maximumminimum_f16 v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_maximumminimum_f16 v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_maximumminimum_f16 v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_maximumminimum_f16 v5, |v1|, v2, -ttmp15 row_shr:15 +// GFX12: [0x05,0x01,0x6f,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +v_maximumminimum_f16 v5, v1, -|v2|, exec_hi row_ror:1 +// GFX12: [0x05,0x02,0x6f,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +v_maximumminimum_f16 v5, -v1, v2, |exec_lo| row_ror:15 +// GFX12: [0x05,0x04,0x6f,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_maximumminimum_f16 v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x03,0x6f,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_maximumminimum_f16 v5, -|v1|, v2, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x05,0x6f,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] + +v_maximumminimum_f16 v5, v1, -|v2|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x06,0x6f,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13] + +v_maximumminimum_f16 v255, -|v255|, -|v255|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x87,0x6f,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30] + +v_minimummaximum_f16 v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_minimummaximum_f16 v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_minimummaximum_f16 v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x6e,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_minimummaximum_f16 v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_minimummaximum_f16 v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_minimummaximum_f16 v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_minimummaximum_f16 v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_minimummaximum_f16 v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_minimummaximum_f16 v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_minimummaximum_f16 v5, |v1|, v2, -ttmp15 row_shr:15 +// GFX12: [0x05,0x01,0x6e,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +v_minimummaximum_f16 v5, v1, -|v2|, exec_hi row_ror:1 +// GFX12: [0x05,0x02,0x6e,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +v_minimummaximum_f16 v5, -v1, v2, |exec_lo| row_ror:15 +// GFX12: [0x05,0x04,0x6e,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_minimummaximum_f16 v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x03,0x6e,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_minimummaximum_f16 v5, -|v1|, v2, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x05,0x6e,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] + +v_minimummaximum_f16 v5, v1, -|v2|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x06,0x6e,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13] + +v_minimummaximum_f16 v255, -|v255|, -|v255|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x87,0x6e,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s index 91817b9029db3..14b489efc8d19 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s @@ -1,7 +1,7 @@ -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32 -show-encoding %s | FileCheck --check-prefixes=GFX12,W32 %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX12,W64 %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32 %s 2>&1 | FileCheck --check-prefixes=GFX12-ERR,W32-ERR --implicit-check-not=error: %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 %s 2>&1 | FileCheck --check-prefixes=GFX12-ERR,W64-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12,W32 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12,W64 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 %s 2>&1 | FileCheck --check-prefixes=GFX12-ERR,W32-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 %s 2>&1 | FileCheck --check-prefixes=GFX12-ERR,W64-ERR --implicit-check-not=error: %s v_add3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x55,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8-fake16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8-fake16.s new file mode 100644 index 0000000000000..4622797357301 --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8-fake16.s @@ -0,0 +1,3814 @@ +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12,W32 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12,W64 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,-real-true16 %s 2>&1 | FileCheck --check-prefixes=GFX12-ERR,W32-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 %s 2>&1 | FileCheck --check-prefixes=GFX12-ERR,W64-ERR --implicit-check-not=error: %s + +v_add3_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x55,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_add3_u32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x55,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_add3_u32_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x55,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_add3_u32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x55,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_add3_u32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x55,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_add3_u32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x55,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_add3_u32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x55,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_add3_u32_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x55,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_add3_u32_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x55,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_add3_u32_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x55,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_add3_u32_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x55,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_add3_u32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x55,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_add3_u32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x55,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_add3_u32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x00,0x55,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_add_co_u32_e64_dpp v5, s6, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x06,0x00,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x69,0x00,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s105, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x69,0x00,0xd7,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x6a,0x00,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x6b,0x00,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: [0x05,0x7b,0x00,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x05,0x0c,0x00,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x05,0x68,0x00,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s[104:105], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x05,0x68,0x00,0xd7,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x05,0x6a,0x00,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: [0x05,0x7a,0x00,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v255, null, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0xfc,0x00,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_add_lshl_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x47,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_add_lshl_u32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x47,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_add_lshl_u32_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x47,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_add_lshl_u32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x47,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_add_lshl_u32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x47,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_add_lshl_u32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x47,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_add_lshl_u32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x47,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_add_lshl_u32_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x47,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_add_lshl_u32_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x47,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_add_lshl_u32_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x47,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_add_lshl_u32_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x47,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_add_lshl_u32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x47,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_add_lshl_u32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x47,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_add_lshl_u32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x00,0x47,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_add_nc_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_add_nc_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x0d,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_add_nc_i16_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x80,0x0d,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_add_nc_i32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x26,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_add_nc_i32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x26,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_add_nc_i32_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x80,0x26,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_add_nc_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_add_nc_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x03,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_add_nc_u16_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x80,0x03,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_alignbit_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x16,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_alignbit_b32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x16,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_alignbit_b32_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x16,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_alignbit_b32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x16,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_alignbit_b32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x16,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_alignbit_b32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x16,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_alignbit_b32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x16,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_alignbit_b32_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x16,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_alignbit_b32_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x16,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_alignbit_b32_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x16,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_alignbit_b32_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x16,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_alignbit_b32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x16,0xd6,0xea,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_alignbit_b32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x00,0x16,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_alignbyte_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x17,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_alignbyte_b32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x17,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_alignbyte_b32_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x17,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_alignbyte_b32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x17,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_alignbyte_b32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x17,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_alignbyte_b32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x17,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_alignbyte_b32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x17,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_alignbyte_b32_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x17,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_alignbyte_b32_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x17,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_alignbyte_b32_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x17,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_alignbyte_b32_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x17,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_alignbyte_b32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x17,0xd6,0xea,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_alignbyte_b32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x00,0x17,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_and_b16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x62,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_and_b16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x62,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_and_b16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x00,0x62,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_and_or_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x57,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_and_or_b32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x57,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_and_or_b32_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x57,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_and_or_b32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x57,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_and_or_b32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x57,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_and_or_b32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x57,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_and_or_b32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x57,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_and_or_b32_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x57,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_and_or_b32_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x57,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_and_or_b32_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x57,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_and_or_b32_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x57,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_and_or_b32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x57,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_and_or_b32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x57,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_and_or_b32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x00,0x57,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_ashrrev_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x3a,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_ashrrev_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x3a,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_ashrrev_i16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x00,0x3a,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_bcnt_u32_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_bcnt_u32_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x1e,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_bcnt_u32_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x00,0x1e,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_bfe_i32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x11,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_bfe_i32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x11,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_bfe_i32_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x11,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_bfe_i32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x11,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_bfe_i32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x11,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_bfe_i32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x11,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_bfe_i32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x11,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_bfe_i32_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x11,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_bfe_i32_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x11,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_bfe_i32_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x11,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_bfe_i32_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x11,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_bfe_i32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x11,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_bfe_i32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x11,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_bfe_i32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x00,0x11,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_bfe_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x10,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_bfe_u32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x10,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_bfe_u32_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x10,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_bfe_u32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x10,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_bfe_u32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x10,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_bfe_u32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x10,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_bfe_u32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x10,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_bfe_u32_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x10,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_bfe_u32_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x10,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_bfe_u32_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x10,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_bfe_u32_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x10,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_bfe_u32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x10,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_bfe_u32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x10,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_bfe_u32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x00,0x10,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_bfi_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x12,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_bfi_b32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x12,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_bfi_b32_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x12,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_bfi_b32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x12,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_bfi_b32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x12,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_bfi_b32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x12,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_bfi_b32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x12,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_bfi_b32_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x12,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_bfi_b32_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x12,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_bfi_b32_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x12,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_bfi_b32_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x12,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_bfi_b32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x12,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_bfi_b32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x12,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_bfi_b32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x00,0x12,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_bfm_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_bfm_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x1d,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_bfm_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x00,0x1d,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_cndmask_b16_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, s2, s3 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0x0c,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, 10, s3 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x5d,0xd6,0xe9,0x14,0x0d,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, |v1|, -v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x01,0x5d,0xd6,0xe9,0x04,0xaa,0x41,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, -v1, |v2|, ttmp15 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: [0x05,0x02,0x5d,0xd6,0xea,0x04,0xee,0x21,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, v2, s[104:105] dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, |v1|, -v2, vcc dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x05,0x01,0x5d,0xd6,0xe9,0x04,0xaa,0x41,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, -v1, |v2|, ttmp[14:15] dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: [0x05,0x02,0x5d,0xd6,0xea,0x04,0xea,0x21,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, -v1, |s2|, ttmp[14:15] dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: [0x05,0x02,0x5d,0xd6,0xea,0x04,0xe8,0x21,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v255, -|v255|, -|v255|, null dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x03,0x5d,0xd6,0xe9,0xfe,0xf3,0x61,0xff,0x00,0x00,0x00] + +v_cubeid_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0c,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_cubeid_f32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0c,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_cubeid_f32_e64_dpp v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0c,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_cubeid_f32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0c,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_cubeid_f32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0c,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_cubeid_f32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0c,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_cubeid_f32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0c,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_cubeid_f32_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x01,0x0c,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] + +v_cubeid_f32_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x02,0x0c,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] + +v_cubeid_f32_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x04,0x0c,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_cubeid_f32_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x03,0x0c,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_cubeid_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x05,0x0c,0xd6,0xe9,0x04,0x06,0xab,0x01,0x77,0x39,0x05] + +v_cubeid_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x06,0x0c,0xd6,0xea,0x04,0xc2,0xd3,0x01,0x77,0x39,0x05] + +v_cubeid_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x87,0x0c,0xd6,0xe9,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] + +v_cubema_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0f,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_cubema_f32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0f,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_cubema_f32_e64_dpp v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0f,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_cubema_f32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0f,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_cubema_f32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0f,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_cubema_f32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0f,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_cubema_f32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0f,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_cubema_f32_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x01,0x0f,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] + +v_cubema_f32_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x02,0x0f,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] + +v_cubema_f32_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x04,0x0f,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_cubema_f32_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x03,0x0f,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_cubema_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x05,0x0f,0xd6,0xe9,0x04,0x06,0xab,0x01,0x77,0x39,0x05] + +v_cubema_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x06,0x0f,0xd6,0xea,0x04,0xc2,0xd3,0x01,0x77,0x39,0x05] + +v_cubema_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x87,0x0f,0xd6,0xe9,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] + +v_cubesc_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0d,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_cubesc_f32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0d,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_cubesc_f32_e64_dpp v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0d,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_cubesc_f32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0d,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_cubesc_f32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0d,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_cubesc_f32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0d,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_cubesc_f32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0d,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_cubesc_f32_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x01,0x0d,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] + +v_cubesc_f32_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x02,0x0d,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] + +v_cubesc_f32_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x04,0x0d,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_cubesc_f32_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x03,0x0d,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_cubesc_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x05,0x0d,0xd6,0xe9,0x04,0x06,0xab,0x01,0x77,0x39,0x05] + +v_cubesc_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x06,0x0d,0xd6,0xea,0x04,0xc2,0xd3,0x01,0x77,0x39,0x05] + +v_cubesc_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x87,0x0d,0xd6,0xe9,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] + +v_cubetc_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0e,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_cubetc_f32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0e,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_cubetc_f32_e64_dpp v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0e,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_cubetc_f32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0e,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_cubetc_f32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0e,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_cubetc_f32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0e,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_cubetc_f32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0e,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_cubetc_f32_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x01,0x0e,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] + +v_cubetc_f32_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x02,0x0e,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] + +v_cubetc_f32_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x04,0x0e,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_cubetc_f32_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x03,0x0e,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_cubetc_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x05,0x0e,0xd6,0xe9,0x04,0x06,0xab,0x01,0x77,0x39,0x05] + +v_cubetc_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x06,0x0e,0xd6,0xea,0x04,0xc2,0xd3,0x01,0x77,0x39,0x05] + +v_cubetc_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x87,0x0e,0xd6,0xe9,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] + +v_cvt_pk_fp8_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,2,3,0,1] +// GFX12: encoding: [0x05,0x00,0x69,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0xa9,0x21] + +v_cvt_pk_fp8_f32_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0x05,0x01,0x69,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] + +v_cvt_pk_fp8_f32_e64_dpp v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0x05,0x02,0x69,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_cvt_pk_fp8_f32_e64_dpp v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] +// GFX12: encoding: [0xff,0x03,0x69,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_cvt_pk_bf8_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0x05,0x00,0x6a,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cvt_pk_bf8_f32_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0x05,0x01,0x6a,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] + +v_cvt_pk_bf8_f32_e64_dpp v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0x05,0x02,0x6a,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_cvt_pk_bf8_f32_e64_dpp v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] +// GFX12: encoding: [0xff,0x03,0x6a,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_cvt_sr_fp8_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0x05,0x00,0x6b,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cvt_sr_fp8_f32_e64_dpp v5, |v1|, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0x05,0x01,0x6b,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cvt_sr_fp8_f32_e64_dpp v5, -v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0x05,0x00,0x6b,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_cvt_sr_fp8_f32_e64_dpp v255, -|v255|, v255 dpp8:[0,0,0,0,0,0,0,0] +// GFX12: encoding: [0xff,0x01,0x6b,0xd7,0xe9,0xfe,0x03,0x20,0xff,0x00,0x00,0x00] + +v_cvt_sr_fp8_f32 v1, v2, v3 byte_sel:0 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cvt_sr_fp8_f32_e64_dpp v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x01,0x00,0x6b,0xd7,0xe9,0x06,0x02,0x00,0x02,0x77,0x39,0x05] + +v_cvt_sr_fp8_f32 v1, v2, v3 byte_sel:1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cvt_sr_fp8_f32_e64_dpp v1, v2, v3 byte_sel:1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x01,0x20,0x6b,0xd7,0xe9,0x06,0x02,0x00,0x02,0x77,0x39,0x05] + +v_cvt_sr_fp8_f32 v1, v2, v3 byte_sel:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cvt_sr_fp8_f32_e64_dpp v1, v2, v3 byte_sel:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x01,0x40,0x6b,0xd7,0xe9,0x06,0x02,0x00,0x02,0x77,0x39,0x05] + +v_cvt_sr_fp8_f32 v1, v2, v3 byte_sel:3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cvt_sr_fp8_f32_e64_dpp v1, v2, v3 byte_sel:3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x01,0x60,0x6b,0xd7,0xe9,0x06,0x02,0x00,0x02,0x77,0x39,0x05] + +v_cvt_sr_bf8_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0x05,0x00,0x6c,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cvt_sr_bf8_f32_e64_dpp v5, |v1|, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0x05,0x01,0x6c,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cvt_sr_bf8_f32_e64_dpp v5, -v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0x05,0x00,0x6c,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_cvt_sr_bf8_f32_e64_dpp v255, -|v255|, v255 dpp8:[0,0,0,0,0,0,0,0] +// GFX12: encoding: [0xff,0x01,0x6c,0xd7,0xe9,0xfe,0x03,0x20,0xff,0x00,0x00,0x00] + +v_cvt_sr_bf8_f32 v1, v2, v3 byte_sel:0 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cvt_sr_bf8_f32_e64_dpp v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x01,0x00,0x6c,0xd7,0xe9,0x06,0x02,0x00,0x02,0x77,0x39,0x05] + +v_cvt_sr_bf8_f32 v1, v2, v3 byte_sel:1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cvt_sr_bf8_f32_e64_dpp v1, v2, v3 byte_sel:1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x01,0x20,0x6c,0xd7,0xe9,0x06,0x02,0x00,0x02,0x77,0x39,0x05] + +v_cvt_sr_bf8_f32 v1, v2, v3 byte_sel:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cvt_sr_bf8_f32_e64_dpp v1, v2, v3 byte_sel:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x01,0x40,0x6c,0xd7,0xe9,0x06,0x02,0x00,0x02,0x77,0x39,0x05] + +v_cvt_sr_bf8_f32 v1, v2, v3 byte_sel:3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cvt_sr_bf8_f32_e64_dpp v1, v2, v3 byte_sel:3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x01,0x60,0x6c,0xd7,0xe9,0x06,0x02,0x00,0x02,0x77,0x39,0x05] + +v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x06,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cvt_pk_i16_f32_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x01,0x06,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] + +v_cvt_pk_i16_f32_e64_dpp v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x02,0x06,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_cvt_pk_i16_f32_e64_dpp v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x03,0x06,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x24,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x24,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cvt_pk_i16_i32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x00,0x24,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x12,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x01,0x12,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x02,0x12,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x03,0x12,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x13,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x01,0x13,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x02,0x13,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x03,0x13,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x07,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cvt_pk_u16_f32_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x01,0x07,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] + +v_cvt_pk_u16_f32_e64_dpp v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x02,0x07,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_cvt_pk_u16_f32_e64_dpp v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x03,0x07,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x23,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x23,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cvt_pk_u16_u32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x00,0x23,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x26,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x26,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x26,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x26,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x26,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x26,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x26,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x26,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x26,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x26,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x26,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x26,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x26,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_cvt_pk_u8_f32_e64_dpp v255, -|v255|, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x01,0x26,0xd6,0xe9,0xfe,0xf7,0x23,0xff,0x00,0x00,0x00] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x12,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x01,0x12,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x02,0x12,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x03,0x12,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x21,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x01,0x21,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x02,0x21,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_cvt_pk_norm_i16_f32_e64_dpp v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x03,0x21,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x13,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x01,0x13,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x02,0x13,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x03,0x13,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x22,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x01,0x22,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x02,0x22,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_cvt_pk_norm_u16_f32_e64_dpp v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x03,0x22,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_div_fixup_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x54,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_div_fixup_f16_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x54,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_div_fixup_f16_e64_dpp v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x54,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_div_fixup_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_div_fixup_f16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_div_fixup_f16_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x01,0x54,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] + +v_div_fixup_f16_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x02,0x54,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] + +v_div_fixup_f16_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x04,0x54,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_div_fixup_f16_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x03,0x54,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_div_fixup_f16_e64_dpp v5, -|v1|, v2, -|-1| dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x05,0x54,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] + +v_div_fixup_f16_e64_dpp v5, v1, -|v2|, -|0.5| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x06,0x54,0xd6,0xea,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] + +v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x87,0x54,0xd6,0xe9,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] + +v_fma_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x48,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_fma_f16_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x48,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_fma_f16_e64_dpp v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x48,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_fma_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x48,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_fma_f16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x48,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_fma_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x48,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_fma_f16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x48,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_fma_f16_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x01,0x48,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] + +v_fma_f16_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x02,0x48,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] + +v_fma_f16_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x04,0x48,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_fma_f16_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x03,0x48,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_fma_f16_e64_dpp v5, -|v1|, v2, -|-1| dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x05,0x48,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] + +v_fma_f16_e64_dpp v5, v1, -|v2|, -|0.5| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x06,0x48,0xd6,0xea,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] + +v_fma_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x87,0x48,0xd6,0xe9,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] + +v_fma_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x13,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_fma_f32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x13,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_fma_f32_e64_dpp v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x13,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_fma_f32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x13,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_fma_f32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x13,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_fma_f32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x13,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_fma_f32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x13,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_fma_f32_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x01,0x13,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] + +v_fma_f32_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x02,0x13,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] + +v_fma_f32_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x04,0x13,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_fma_f32_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x03,0x13,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_fma_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x05,0x13,0xd6,0xe9,0x04,0x06,0xab,0x01,0x77,0x39,0x05] + +v_fma_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x06,0x13,0xd6,0xea,0x04,0xc2,0xd3,0x01,0x77,0x39,0x05] + +v_fma_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x87,0x13,0xd6,0xe9,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] + +v_ldexp_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1c,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_ldexp_f32_e64_dpp v5, v1, v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1c,0xd7,0xe9,0x04,0x02,0x08,0x01,0x77,0x39,0x05] + +v_ldexp_f32_e64_dpp v5, v1, v2 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x1c,0xd7,0xea,0x04,0x02,0x10,0x01,0x77,0x39,0x05] + +v_ldexp_f32_e64_dpp v255, -|v255|, v255 clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x81,0x1c,0xd7,0xe9,0xfe,0x03,0x38,0xff,0x00,0x00,0x00] + +v_lerp_u8_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x15,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_lerp_u8_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x15,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_lerp_u8_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x15,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_lerp_u8_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x15,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_lerp_u8_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x15,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_lerp_u8_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x15,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_lerp_u8_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x15,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_lerp_u8_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x15,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_lerp_u8_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x15,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_lerp_u8_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x15,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_lerp_u8_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x15,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_lerp_u8_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x15,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_lerp_u8_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x00,0x15,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_lshl_add_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x46,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_lshl_add_u32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x46,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_lshl_add_u32_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x46,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_lshl_add_u32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x46,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_lshl_add_u32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x46,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_lshl_add_u32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x46,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_lshl_add_u32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x46,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_lshl_add_u32_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x46,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_lshl_add_u32_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x46,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_lshl_add_u32_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x46,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_lshl_add_u32_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x46,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_lshl_add_u32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x46,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_lshl_add_u32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x46,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_lshl_add_u32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x00,0x46,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_lshl_or_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x56,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_lshl_or_b32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x56,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_lshl_or_b32_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x56,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_lshl_or_b32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x56,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_lshl_or_b32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x56,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_lshl_or_b32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x56,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_lshl_or_b32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x56,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_lshl_or_b32_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x56,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_lshl_or_b32_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x56,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_lshl_or_b32_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x56,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_lshl_or_b32_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x56,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_lshl_or_b32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x56,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_lshl_or_b32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x56,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_lshl_or_b32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x00,0x56,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_lshlrev_b16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x38,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_lshlrev_b16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x38,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_lshlrev_b16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x00,0x38,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_lshrrev_b16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x39,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_lshrrev_b16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x39,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_lshrrev_b16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x00,0x39,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_mad_i16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x53,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_mad_i16_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x53,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_mad_i16_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x53,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_mad_i16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_mad_i16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_mad_i16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_mad_i16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_mad_i16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_mad_i16_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_mad_i16_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_mad_i16_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_mad_i16_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x53,0xd6,0xea,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_mad_i16_e64_dpp v255, v255, v255, src_scc clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x80,0x53,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_mad_i32_i16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x5a,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_mad_i32_i16_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x5a,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_mad_i32_i16_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x5a,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_mad_i32_i16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x5a,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_mad_i32_i16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x5a,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_mad_i32_i16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x5a,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_mad_i32_i16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x5a,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_mad_i32_i16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x5a,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_mad_i32_i16_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x5a,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_mad_i32_i16_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x5a,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_mad_i32_i16_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x5a,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_mad_i32_i16_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x5a,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_mad_i32_i16_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x5a,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_mad_i32_i16_e64_dpp v255, v255, v255, src_scc clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x80,0x5a,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_mad_i32_i24_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0a,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_mad_i32_i24_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0a,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_mad_i32_i24_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0a,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_mad_i32_i24_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0a,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_mad_i32_i24_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0a,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_mad_i32_i24_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0a,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_mad_i32_i24_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0a,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_mad_i32_i24_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0a,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_mad_i32_i24_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0a,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_mad_i32_i24_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0a,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_mad_i32_i24_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0a,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_mad_i32_i24_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0a,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_mad_i32_i24_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x0a,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_mad_i32_i24_e64_dpp v255, v255, v255, src_scc clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x80,0x0a,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_mad_u16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x41,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_mad_u16_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x41,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_mad_u16_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x41,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_mad_u16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_mad_u16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_mad_u16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_mad_u16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_mad_u16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_mad_u16_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_mad_u16_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_mad_u16_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_mad_u16_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x41,0xd6,0xea,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_mad_u16_e64_dpp v255, v255, v255, src_scc clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x80,0x41,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_mad_u32_u16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x59,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_mad_u32_u16_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x59,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_mad_u32_u16_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x59,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_mad_u32_u16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x59,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_mad_u32_u16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x59,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_mad_u32_u16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x59,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_mad_u32_u16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x59,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_mad_u32_u16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x59,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_mad_u32_u16_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x59,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_mad_u32_u16_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x59,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_mad_u32_u16_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x59,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_mad_u32_u16_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x59,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_mad_u32_u16_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x59,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_mad_u32_u16_e64_dpp v255, v255, v255, src_scc clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x80,0x59,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_mad_u32_u24_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0b,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_mad_u32_u24_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0b,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_mad_u32_u24_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0b,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_mad_u32_u24_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0b,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_mad_u32_u24_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0b,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_mad_u32_u24_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0b,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_mad_u32_u24_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0b,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_mad_u32_u24_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0b,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_mad_u32_u24_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0b,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_mad_u32_u24_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0b,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_mad_u32_u24_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0b,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_mad_u32_u24_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0b,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_mad_u32_u24_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x0b,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_mad_u32_u24_e64_dpp v255, v255, v255, src_scc clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x80,0x0b,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_max3_num_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2c,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_max3_num_f16_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2c,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_max3_num_f16_e64_dpp v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2c,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_max3_num_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2c,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_max3_num_f16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2c,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_max3_num_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2c,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_max3_num_f16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2c,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_max3_num_f16_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x01,0x2c,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] + +v_max3_num_f16_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x02,0x2c,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] + +v_max3_num_f16_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x04,0x2c,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_max3_num_f16_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x03,0x2c,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_max3_num_f16_e64_dpp v5, -|v1|, v2, -|-1| dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x05,0x2c,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] + +v_max3_num_f16_e64_dpp v5, v1, -|v2|, -|0.5| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x06,0x2c,0xd6,0xea,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] + +v_max3_num_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp dpp8:[0,0,0,0,0,0,0,0] +// GFX12: [0xff,0x87,0x2c,0xd6,0xe9,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] + +v_max3_num_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2a,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_max3_num_f32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2a,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_max3_num_f32_e64_dpp v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2a,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_max3_num_f32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2a,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_max3_num_f32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2a,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_max3_num_f32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2a,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_max3_num_f32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2a,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_max3_num_f32_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x01,0x2a,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] + +v_max3_num_f32_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x02,0x2a,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] + +v_max3_num_f32_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x04,0x2a,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_max3_num_f32_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x03,0x2a,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_max3_num_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x05,0x2a,0xd6,0xe9,0x04,0x06,0xab,0x01,0x77,0x39,0x05] + +v_max3_num_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x06,0x2a,0xd6,0xea,0x04,0xc2,0xd3,0x01,0x77,0x39,0x05] + +v_max3_num_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] +// GFX12: [0xff,0x87,0x2a,0xd6,0xe9,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] + +v_max3_i16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_max3_i16_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_max3_i16_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x4d,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_max3_i16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_max3_i16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_max3_i16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_max3_i16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_max3_i16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_max3_i16_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_max3_i16_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_max3_i16_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_max3_i16_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x4d,0xd6,0xea,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_max3_i16_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x00,0x4d,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_max3_i32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1d,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_max3_i32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1d,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_max3_i32_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1d,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_max3_i32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1d,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_max3_i32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1d,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_max3_i32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1d,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_max3_i32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1d,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_max3_i32_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1d,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_max3_i32_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1d,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_max3_i32_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1d,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_max3_i32_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1d,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_max3_i32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1d,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_max3_i32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x1d,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_max3_i32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x00,0x1d,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_max3_u16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_max3_u16_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_max3_u16_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x4e,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_max3_u16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_max3_u16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_max3_u16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_max3_u16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_max3_u16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_max3_u16_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_max3_u16_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_max3_u16_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_max3_u16_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x4e,0xd6,0xea,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_max3_u16_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x00,0x4e,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_max3_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1e,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_max3_u32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1e,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_max3_u32_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1e,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_max3_u32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1e,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_max3_u32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1e,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_max3_u32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1e,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_max3_u32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1e,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_max3_u32_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1e,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_max3_u32_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1e,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_max3_u32_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1e,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_max3_u32_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1e,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_max3_u32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1e,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_max3_u32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x1e,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_max3_u32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x00,0x1e,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_max_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0a,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_max_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x0a,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_max_i16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x00,0x0a,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_max_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x09,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_max_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x09,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_max_u16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x00,0x09,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_maxmin_num_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6b,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_maxmin_num_f16_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6b,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_maxmin_num_f16_e64_dpp v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6b,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_maxmin_num_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6b,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_maxmin_num_f16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6b,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_maxmin_num_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6b,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_maxmin_num_f16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6b,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_maxmin_num_f16_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x01,0x6b,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] + +v_maxmin_num_f16_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x02,0x6b,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] + +v_maxmin_num_f16_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x04,0x6b,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_maxmin_num_f16_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x03,0x6b,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_maxmin_num_f16_e64_dpp v5, -|v1|, v2, -|-1| mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x05,0x6b,0xd6,0xe9,0x04,0x06,0xab,0x01,0x77,0x39,0x05] + +v_maxmin_num_f16_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x06,0x6b,0xd6,0xea,0x04,0xc2,0xd3,0x01,0x77,0x39,0x05] + +v_maxmin_num_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] +// GFX12: [0xff,0x87,0x6b,0xd6,0xe9,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] + +v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x69,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_maxmin_num_f32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x69,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_maxmin_num_f32_e64_dpp v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x69,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_maxmin_num_f32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x69,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_maxmin_num_f32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x69,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_maxmin_num_f32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x69,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_maxmin_num_f32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x69,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_maxmin_num_f32_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x01,0x69,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] + +v_maxmin_num_f32_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x02,0x69,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] + +v_maxmin_num_f32_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x04,0x69,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_maxmin_num_f32_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x03,0x69,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_maxmin_num_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x05,0x69,0xd6,0xe9,0x04,0x06,0xab,0x01,0x77,0x39,0x05] + +v_maxmin_num_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x06,0x69,0xd6,0xea,0x04,0xc2,0xd3,0x01,0x77,0x39,0x05] + +v_maxmin_num_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] +// GFX12: [0xff,0x87,0x69,0xd6,0xe9,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] + +v_maxmin_i32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x64,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_maxmin_i32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x64,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_maxmin_i32_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x64,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_maxmin_i32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x64,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_maxmin_i32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x64,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_maxmin_i32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x64,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_maxmin_i32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x64,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_maxmin_i32_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x64,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_maxmin_i32_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x64,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_maxmin_i32_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x64,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_maxmin_i32_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x64,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_maxmin_i32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x64,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_maxmin_i32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x64,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_maxmin_i32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x00,0x64,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_maxmin_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x62,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_maxmin_u32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x62,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_maxmin_u32_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x62,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_maxmin_u32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x62,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_maxmin_u32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x62,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_maxmin_u32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x62,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_maxmin_u32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x62,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_maxmin_u32_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x62,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_maxmin_u32_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x62,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_maxmin_u32_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x62,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_maxmin_u32_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x62,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_maxmin_u32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x62,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_maxmin_u32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x62,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_maxmin_u32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x00,0x62,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x20,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x20,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_mbcnt_hi_u32_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x00,0x20,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1f,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x1f,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_mbcnt_lo_u32_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x00,0x1f,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_med3_num_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x32,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_med3_num_f16_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x32,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_med3_num_f16_e64_dpp v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x32,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_med3_num_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x32,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_med3_num_f16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x32,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_med3_num_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x32,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_med3_num_f16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x32,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_med3_num_f16_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x01,0x32,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] + +v_med3_num_f16_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x02,0x32,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] + +v_med3_num_f16_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x04,0x32,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_med3_num_f16_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x03,0x32,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_med3_num_f16_e64_dpp v5, -|v1|, v2, -|-1| dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x05,0x32,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] + +v_med3_num_f16_e64_dpp v5, v1, -|v2|, -|0.5| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x06,0x32,0xd6,0xea,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] + +v_med3_num_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp dpp8:[0,0,0,0,0,0,0,0] +// GFX12: [0xff,0x87,0x32,0xd6,0xe9,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] + +v_med3_num_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x31,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_med3_num_f32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x31,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_med3_num_f32_e64_dpp v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x31,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_med3_num_f32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x31,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_med3_num_f32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x31,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_med3_num_f32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x31,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_med3_num_f32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x31,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_med3_num_f32_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x01,0x31,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] + +v_med3_num_f32_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x02,0x31,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] + +v_med3_num_f32_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x04,0x31,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_med3_num_f32_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x03,0x31,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_med3_num_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x05,0x31,0xd6,0xe9,0x04,0x06,0xab,0x01,0x77,0x39,0x05] + +v_med3_num_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x06,0x31,0xd6,0xea,0x04,0xc2,0xd3,0x01,0x77,0x39,0x05] + +v_med3_num_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] +// GFX12: [0xff,0x87,0x31,0xd6,0xe9,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] + +v_med3_i16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x50,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_med3_i16_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x50,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_med3_i16_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x50,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_med3_i16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_med3_i16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_med3_i16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_med3_i16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_med3_i16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_med3_i16_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_med3_i16_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_med3_i16_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_med3_i16_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x50,0xd6,0xea,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_med3_i16_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x00,0x50,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_med3_i32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x20,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_med3_i32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x20,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_med3_i32_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x20,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_med3_i32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x20,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_med3_i32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x20,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_med3_i32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x20,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_med3_i32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x20,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_med3_i32_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x20,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_med3_i32_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x20,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_med3_i32_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x20,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_med3_i32_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x20,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_med3_i32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x20,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_med3_i32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x20,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_med3_i32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x00,0x20,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_med3_u16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x51,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_med3_u16_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x51,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_med3_u16_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x51,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_med3_u16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_med3_u16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_med3_u16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_med3_u16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_med3_u16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_med3_u16_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_med3_u16_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_med3_u16_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_med3_u16_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x51,0xd6,0xea,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_med3_u16_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x00,0x51,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_med3_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x21,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_med3_u32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x21,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_med3_u32_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x21,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_med3_u32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x21,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_med3_u32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x21,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_med3_u32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x21,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_med3_u32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x21,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_med3_u32_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x21,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_med3_u32_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x21,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_med3_u32_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x21,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_med3_u32_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x21,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_med3_u32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x21,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_med3_u32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x21,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_med3_u32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x00,0x21,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_min3_num_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2b,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_min3_num_f16_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2b,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_min3_num_f16_e64_dpp v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2b,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_min3_num_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2b,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_min3_num_f16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2b,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_min3_num_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2b,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_min3_num_f16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2b,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_min3_num_f16_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x01,0x2b,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] + +v_min3_num_f16_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x02,0x2b,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] + +v_min3_num_f16_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x04,0x2b,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_min3_num_f16_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x03,0x2b,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_min3_num_f16_e64_dpp v5, -|v1|, v2, -|-1| dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x05,0x2b,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] + +v_min3_num_f16_e64_dpp v5, v1, -|v2|, -|0.5| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x06,0x2b,0xd6,0xea,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] + +v_min3_num_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp dpp8:[0,0,0,0,0,0,0,0] +// GFX12: [0xff,0x87,0x2b,0xd6,0xe9,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] + +v_min3_num_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x29,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_min3_num_f32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x29,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_min3_num_f32_e64_dpp v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x29,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_min3_num_f32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x29,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_min3_num_f32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x29,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_min3_num_f32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x29,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_min3_num_f32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x29,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_min3_num_f32_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x01,0x29,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] + +v_min3_num_f32_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x02,0x29,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] + +v_min3_num_f32_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x04,0x29,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_min3_num_f32_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x03,0x29,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_min3_num_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x05,0x29,0xd6,0xe9,0x04,0x06,0xab,0x01,0x77,0x39,0x05] + +v_min3_num_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x06,0x29,0xd6,0xea,0x04,0xc2,0xd3,0x01,0x77,0x39,0x05] + +v_min3_num_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] +// GFX12: [0xff,0x87,0x29,0xd6,0xe9,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] + +v_min3_i16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_min3_i16_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_min3_i16_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x4a,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_min3_i16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_min3_i16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_min3_i16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_min3_i16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_min3_i16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_min3_i16_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_min3_i16_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_min3_i16_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_min3_i16_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x4a,0xd6,0xea,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_min3_i16_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x00,0x4a,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_min3_i32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1a,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_min3_i32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1a,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_min3_i32_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1a,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_min3_i32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1a,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_min3_i32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1a,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_min3_i32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1a,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_min3_i32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1a,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_min3_i32_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1a,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_min3_i32_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1a,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_min3_i32_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1a,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_min3_i32_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1a,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_min3_i32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1a,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_min3_i32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x1a,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_min3_i32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x00,0x1a,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_min3_u16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_min3_u16_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_min3_u16_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x4b,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_min3_u16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_min3_u16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_min3_u16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_min3_u16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_min3_u16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_min3_u16_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_min3_u16_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_min3_u16_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_min3_u16_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x4b,0xd6,0xea,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_min3_u16_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x00,0x4b,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_min3_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1b,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_min3_u32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1b,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_min3_u32_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1b,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_min3_u32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1b,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_min3_u32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1b,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_min3_u32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1b,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_min3_u32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1b,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_min3_u32_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1b,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_min3_u32_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1b,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_min3_u32_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1b,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_min3_u32_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1b,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_min3_u32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1b,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_min3_u32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x1b,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_min3_u32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x00,0x1b,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_min_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0c,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_min_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x0c,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_min_i16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x00,0x0c,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_min_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0b,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_min_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x0b,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_min_u16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x00,0x0b,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_minmax_num_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6a,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_minmax_num_f16_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6a,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_minmax_num_f16_e64_dpp v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6a,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_minmax_num_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6a,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_minmax_num_f16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6a,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_minmax_num_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6a,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_minmax_num_f16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6a,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_minmax_num_f16_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x01,0x6a,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] + +v_minmax_num_f16_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x02,0x6a,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] + +v_minmax_num_f16_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x04,0x6a,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_minmax_num_f16_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x03,0x6a,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_minmax_num_f16_e64_dpp v5, -|v1|, v2, -|-1| mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x05,0x6a,0xd6,0xe9,0x04,0x06,0xab,0x01,0x77,0x39,0x05] + +v_minmax_num_f16_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x06,0x6a,0xd6,0xea,0x04,0xc2,0xd3,0x01,0x77,0x39,0x05] + +v_minmax_num_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] +// GFX12: [0xff,0x87,0x6a,0xd6,0xe9,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] + +v_minmax_num_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x68,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_minmax_num_f32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x68,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_minmax_num_f32_e64_dpp v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x68,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_minmax_num_f32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x68,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_minmax_num_f32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x68,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_minmax_num_f32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x68,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_minmax_num_f32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x68,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_minmax_num_f32_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x01,0x68,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] + +v_minmax_num_f32_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x02,0x68,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] + +v_minmax_num_f32_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x04,0x68,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_minmax_num_f32_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x03,0x68,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_minmax_num_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x05,0x68,0xd6,0xe9,0x04,0x06,0xab,0x01,0x77,0x39,0x05] + +v_minmax_num_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x06,0x68,0xd6,0xea,0x04,0xc2,0xd3,0x01,0x77,0x39,0x05] + +v_minmax_num_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] +// GFX12: [0xff,0x87,0x68,0xd6,0xe9,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] + +v_minmax_i32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x65,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_minmax_i32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x65,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_minmax_i32_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x65,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_minmax_i32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x65,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_minmax_i32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x65,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_minmax_i32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x65,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_minmax_i32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x65,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_minmax_i32_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x65,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_minmax_i32_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x65,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_minmax_i32_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x65,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_minmax_i32_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x65,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_minmax_i32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x65,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_minmax_i32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x65,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_minmax_i32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x00,0x65,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_minmax_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x63,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_minmax_u32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x63,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_minmax_u32_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x63,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_minmax_u32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x63,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_minmax_u32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x63,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_minmax_u32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x63,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_minmax_u32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x63,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_minmax_u32_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x63,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_minmax_u32_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x63,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_minmax_u32_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x63,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_minmax_u32_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x63,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_minmax_u32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x63,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_minmax_u32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x63,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_minmax_u32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x00,0x63,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_msad_u8_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x39,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_msad_u8_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x39,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_msad_u8_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x39,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_msad_u8_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x39,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_msad_u8_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x39,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_msad_u8_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x39,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_msad_u8_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x39,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_msad_u8_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x39,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_msad_u8_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x39,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_msad_u8_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x39,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_msad_u8_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x39,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_msad_u8_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x39,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_msad_u8_e64_dpp v255, v255, v255, src_scc clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x80,0x39,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_mul_lo_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x05,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_mul_lo_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x05,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_mul_lo_u16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x00,0x05,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_mullit_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x18,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_mullit_f32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x18,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_mullit_f32_e64_dpp v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x18,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_mullit_f32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x18,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_mullit_f32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x18,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_mullit_f32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x18,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_mullit_f32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x18,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_mullit_f32_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x01,0x18,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] + +v_mullit_f32_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x02,0x18,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] + +v_mullit_f32_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x04,0x18,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_mullit_f32_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x03,0x18,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_mullit_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x05,0x18,0xd6,0xe9,0x04,0x06,0xab,0x01,0x77,0x39,0x05] + +v_mullit_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x06,0x18,0xd6,0xea,0x04,0xc2,0xd3,0x01,0x77,0x39,0x05] + +v_mullit_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x87,0x18,0xd6,0xe9,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] + +v_or3_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x58,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_or3_b32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x58,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_or3_b32_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x58,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_or3_b32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x58,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_or3_b32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x58,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_or3_b32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x58,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_or3_b32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x58,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_or3_b32_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x58,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_or3_b32_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x58,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_or3_b32_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x58,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_or3_b32_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x58,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_or3_b32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x58,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_or3_b32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x58,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_or3_b32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x00,0x58,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_or_b16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x63,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_or_b16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x63,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_or_b16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x00,0x63,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_pack_b32_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x11,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_pack_b32_f16_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x01,0x11,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] + +v_pack_b32_f16_e64_dpp v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x02,0x11,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_pack_b32_f16_e64_dpp v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x03,0x11,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_perm_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x44,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_perm_b32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x44,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_perm_b32_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x44,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_perm_b32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x44,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_perm_b32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x44,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_perm_b32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x44,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_perm_b32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x44,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_perm_b32_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x44,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_perm_b32_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x44,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_perm_b32_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x44,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_perm_b32_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x44,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_perm_b32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x44,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_perm_b32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x44,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_perm_b32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x00,0x44,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_sad_hi_u8_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x23,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_sad_hi_u8_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x23,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_sad_hi_u8_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x23,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_sad_hi_u8_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x23,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_sad_hi_u8_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x23,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_sad_hi_u8_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x23,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_sad_hi_u8_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x23,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_sad_hi_u8_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x23,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_sad_hi_u8_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x23,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_sad_hi_u8_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x23,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_sad_hi_u8_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x23,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_sad_hi_u8_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x23,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_sad_hi_u8_e64_dpp v255, v255, v255, src_scc clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x80,0x23,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_sad_u16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x24,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_sad_u16_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x24,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_sad_u16_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x24,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_sad_u16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x24,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_sad_u16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x24,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_sad_u16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x24,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_sad_u16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x24,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_sad_u16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x24,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_sad_u16_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x24,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_sad_u16_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x24,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_sad_u16_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x24,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_sad_u16_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x24,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_sad_u16_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x24,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_sad_u16_e64_dpp v255, v255, v255, src_scc clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x80,0x24,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_sad_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x25,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_sad_u32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x25,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_sad_u32_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x25,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_sad_u32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x25,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_sad_u32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x25,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_sad_u32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x25,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_sad_u32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x25,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_sad_u32_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x25,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_sad_u32_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x25,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_sad_u32_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x25,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_sad_u32_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x25,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_sad_u32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x25,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_sad_u32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x25,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_sad_u32_e64_dpp v255, v255, v255, src_scc clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x80,0x25,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_sad_u8_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x22,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_sad_u8_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x22,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_sad_u8_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x22,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_sad_u8_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x22,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_sad_u8_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x22,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_sad_u8_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x22,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_sad_u8_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x22,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_sad_u8_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x22,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_sad_u8_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x22,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_sad_u8_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x22,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_sad_u8_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x22,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_sad_u8_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x22,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_sad_u8_e64_dpp v255, v255, v255, src_scc clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x80,0x22,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_sub_co_u32_e64_dpp v5, s6, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x06,0x01,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, s6, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x06,0x01,0xd7,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x69,0x01,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x6a,0x01,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x6b,0x01,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: [0x05,0x7b,0x01,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x05,0x0c,0x01,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, s[12:13], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x05,0x0c,0x01,0xd7,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x05,0x68,0x01,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x05,0x6a,0x01,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: [0x05,0x7a,0x01,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v255, null, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0xfc,0x01,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_sub_nc_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_sub_nc_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x0e,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_sub_nc_i16_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x80,0x0e,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_sub_nc_i32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x25,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_sub_nc_i32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x25,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_sub_nc_i32_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x80,0x25,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_sub_nc_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_sub_nc_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x04,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_sub_nc_u16_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x80,0x04,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_subrev_co_u32_e64_dpp v5, s6, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x06,0x02,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, s6, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x06,0x02,0xd7,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x69,0x02,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x6a,0x02,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x6b,0x02,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: [0x05,0x7b,0x02,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x05,0x0c,0x02,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x05,0x68,0x02,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, s[104:105], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x05,0x68,0x02,0xd7,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x05,0x6a,0x02,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: [0x05,0x7a,0x02,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v255, null, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0xfc,0x02,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_xad_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x45,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_xad_u32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x45,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_xad_u32_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x45,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_xad_u32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x45,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_xad_u32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x45,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_xad_u32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x45,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_xad_u32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x45,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_xad_u32_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x45,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_xad_u32_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x45,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_xad_u32_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x45,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_xad_u32_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x45,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_xad_u32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x45,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_xad_u32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x45,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_xad_u32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x00,0x45,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_xor3_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x40,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_xor3_b32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x40,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_xor3_b32_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x40,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_xor3_b32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x40,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_xor3_b32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x40,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_xor3_b32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x40,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_xor3_b32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x40,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_xor3_b32_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x40,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_xor3_b32_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x40,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_xor3_b32_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x40,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_xor3_b32_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x40,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_xor3_b32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x40,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_xor3_b32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x40,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_xor3_b32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x00,0x40,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_xor_b16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x64,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_xor_b16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x64,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_xor_b16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x00,0x64,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x10,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_add_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX12: [0xff,0xc0,0x0d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x10,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX12: [0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x0a,0x12,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX12: [0xff,0x13,0x12,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x0a,0x13,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX12: [0xff,0x13,0x13,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_div_fixup_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x7c,0x54,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_div_fixup_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x0b,0x54,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_div_fixup_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x15,0x54,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] + +v_div_fixup_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x26,0x54,0xd6,0xe9,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] + +v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX12: [0xff,0xc7,0x54,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] + +v_fma_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x7c,0x48,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_fma_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x0b,0x48,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_fma_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x15,0x48,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] + +v_fma_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x26,0x48,0xd6,0xe9,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] + +v_fma_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX12: [0xff,0xc7,0x48,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] + +v_mad_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x78,0x53,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_mad_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x08,0x53,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_mad_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x10,0x53,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_mad_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x20,0x53,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_mad_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX12: [0xff,0xc0,0x53,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_mad_i32_i16_e64_dpp v5, v1, v2, 0.5 op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x08,0x5a,0xd6,0xe9,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_mad_i32_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,1,0,0] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX12: [0xff,0x90,0x5a,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_mad_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x78,0x41,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_mad_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x08,0x41,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_mad_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x10,0x41,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_mad_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x20,0x41,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_mad_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX12: [0xff,0xc0,0x41,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_mad_u32_u16_e64_dpp v5, v1, v2, 0.5 op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x08,0x59,0xd6,0xe9,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_mad_u32_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,1,0,0] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX12: [0xff,0x90,0x59,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_max3_num_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x7c,0x2c,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_max3_num_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x0b,0x2c,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_max3_num_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x15,0x2c,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] + +v_max3_num_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x26,0x2c,0xd6,0xe9,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] + +v_max3_num_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX12: [0xff,0xc7,0x2c,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] + +v_max3_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x78,0x4d,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_max3_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x08,0x4d,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_max3_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x10,0x4d,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_max3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x20,0x4d,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_max3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX12: [0xff,0x40,0x4d,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_max3_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x78,0x4e,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_max3_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x08,0x4e,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_max3_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x10,0x4e,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_max3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x20,0x4e,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_max3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX12: [0xff,0x40,0x4e,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_med3_num_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x7c,0x32,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_med3_num_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x0b,0x32,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_med3_num_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x15,0x32,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] + +v_med3_num_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x26,0x32,0xd6,0xe9,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] + +v_med3_num_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX12: [0xff,0xc7,0x32,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] + +v_med3_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x78,0x50,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_med3_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x08,0x50,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_med3_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x10,0x50,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_med3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x20,0x50,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_med3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX12: [0xff,0x40,0x50,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_med3_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x78,0x51,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_med3_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x08,0x51,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_med3_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x10,0x51,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_med3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x20,0x51,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_med3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX12: [0xff,0x40,0x51,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_min3_num_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x7c,0x2b,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_min3_num_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x0b,0x2b,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_min3_num_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x15,0x2b,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] + +v_min3_num_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x26,0x2b,0xd6,0xe9,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] + +v_min3_num_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX12: [0xff,0xc7,0x2b,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] + +v_min3_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x78,0x4a,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_min3_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x08,0x4a,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_min3_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x10,0x4a,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_min3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x20,0x4a,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_min3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX12: [0xff,0x40,0x4a,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_min3_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x78,0x4b,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_min3_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x08,0x4b,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_min3_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x10,0x4b,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_min3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x20,0x4b,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_min3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX12: [0xff,0x40,0x4b,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_pack_b32_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x0a,0x11,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_pack_b32_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX12: [0xff,0x13,0x11,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x10,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_sub_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX12: [0xff,0xc0,0x0e,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x10,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX12: [0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 dpp8:[0,1,2,3,4,4,4,4] +// GFX12: [0x00,0x00,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92] + +v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 op_sel:[1,1,0,0] dpp8:[0,1,2,3,4,4,4,4] +// GFX12-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid op_sel operand + +v_dot2_f16_f16_e64_dpp v0, s1, v2, v3 dpp8:[0,1,2,3,4,4,4,4] +// GFX12-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_dot2_f16_f16_e64_dpp v0, v1, s2, v3 dpp8:[0,1,2,3,4,4,4,4] +// GFX12: [0x00,0x00,0x66,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x88,0x46,0x92] + +v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] +// GFX12: [0x00,0x60,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92] + +v_dot2_f16_f16_e64_dpp v0, |v1|, -v2, -|s3| op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] +// GFX12: [0x00,0x65,0x66,0xd6,0xe9,0x04,0x0e,0xc0,0x01,0x88,0x46,0x92] + +v_dot2_f16_f16_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x66,0xd6,0xe9,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 dpp8:[0,1,2,3,4,4,4,4] +// GFX12: [0x00,0x00,0x67,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92] + +v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 op_sel:[1,1,0,0] dpp8:[0,1,2,3,4,4,4,4] +// GFX12-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid op_sel operand + +v_dot2_bf16_bf16_e64_dpp v0, s1, v2, v3 dpp8:[0,1,2,3,4,4,4,4] +// GFX12-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_dot2_bf16_bf16_e64_dpp v0, v1, s2, v3 dpp8:[0,1,2,3,4,4,4,4] +// GFX12: [0x00,0x00,0x67,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x88,0x46,0x92] + +v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] +// GFX12: [0x00,0x60,0x67,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92] + +v_dot2_bf16_bf16_e64_dpp v0, |v1|, -v2, -|s3| op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] +// GFX12: [0x00,0x65,0x67,0xd6,0xe9,0x04,0x0e,0xc0,0x01,0x88,0x46,0x92] + +v_dot2_bf16_bf16_e64_dpp v5, v1, v2, 0 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x67,0xd6,0xe9,0x04,0x02,0x02,0x01,0x77,0x39,0x05] + +v_minimum_f32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x65,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_minimum_f32 v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x01,0x65,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] + +v_minimum_f32 v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x02,0x65,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_minimum_f32 v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x03,0x65,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_maximum_f32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x66,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_maximum_f32 v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x01,0x66,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] + +v_maximum_f32 v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x02,0x66,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_maximum_f32 v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x03,0x66,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_minimum_f16 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x67,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_minimum_f16 v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x01,0x67,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] + +v_minimum_f16 v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x02,0x67,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_minimum_f16 v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x03,0x67,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_maximum_f16 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x68,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_maximum_f16 v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x01,0x68,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] + +v_maximum_f16 v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x02,0x68,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_maximum_f16 v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x03,0x68,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_minimum3_f32 v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2d,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_minimum3_f32 v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2d,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_minimum3_f32 v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2d,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_minimum3_f32 v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2d,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_minimum3_f32 v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2d,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_minimum3_f32 v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2d,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_minimum3_f32 v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2d,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_minimum3_f32 v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x01,0x2d,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] + +v_minimum3_f32 v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x02,0x2d,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] + +v_minimum3_f32 v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x04,0x2d,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_minimum3_f32 v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x03,0x2d,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_minimum3_f32 v5, -|v1|, v2, -|-1| mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x05,0x2d,0xd6,0xe9,0x04,0x06,0xab,0x01,0x77,0x39,0x05] + +v_minimum3_f32 v5, v1, -|v2|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x06,0x2d,0xd6,0xea,0x04,0xc2,0xd3,0x01,0x77,0x39,0x05] + +v_minimum3_f32 v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x87,0x2d,0xd6,0xe9,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] + +v_maximum3_f32 v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2e,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_maximum3_f32 v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2e,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_maximum3_f32 v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2e,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_maximum3_f32 v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2e,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_maximum3_f32 v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2e,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_maximum3_f32 v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2e,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_maximum3_f32 v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2e,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_maximum3_f32 v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x01,0x2e,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] + +v_maximum3_f32 v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x02,0x2e,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] + +v_maximum3_f32 v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x04,0x2e,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_maximum3_f32 v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x03,0x2e,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_maximum3_f32 v5, -|v1|, v2, -|-1| mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x05,0x2e,0xd6,0xe9,0x04,0x06,0xab,0x01,0x77,0x39,0x05] + +v_maximum3_f32 v5, v1, -|v2|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x06,0x2e,0xd6,0xea,0x04,0xc2,0xd3,0x01,0x77,0x39,0x05] + +v_maximum3_f32 v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x87,0x2e,0xd6,0xe9,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] + +v_minimum3_f16 v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2f,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_minimum3_f16 v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2f,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_minimum3_f16 v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2f,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_minimum3_f16 v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2f,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_minimum3_f16 v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2f,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_minimum3_f16 v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2f,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_minimum3_f16 v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2f,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_minimum3_f16 v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x01,0x2f,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] + +v_minimum3_f16 v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x02,0x2f,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] + +v_minimum3_f16 v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x04,0x2f,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_minimum3_f16 v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x03,0x2f,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_minimum3_f16 v5, -|v1|, v2, -|-1| dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x05,0x2f,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] + +v_minimum3_f16 v5, v1, -|v2|, -|0.5| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x06,0x2f,0xd6,0xea,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] + +v_minimum3_f16 v255, -|v255|, -|v255|, -|src_scc| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x87,0x2f,0xd6,0xe9,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] + +v_maximum3_f16 v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x30,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_maximum3_f16 v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x30,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_maximum3_f16 v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x30,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_maximum3_f16 v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x30,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_maximum3_f16 v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x30,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_maximum3_f16 v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x30,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_maximum3_f16 v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x30,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_maximum3_f16 v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x01,0x30,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] + +v_maximum3_f16 v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x02,0x30,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] + +v_maximum3_f16 v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x04,0x30,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_maximum3_f16 v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x03,0x30,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_maximum3_f16 v5, -|v1|, v2, -|-1| dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x05,0x30,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] + +v_maximum3_f16 v5, v1, -|v2|, -|0.5| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x06,0x30,0xd6,0xea,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] + +v_maximum3_f16 v255, -|v255|, -|v255|, -|src_scc| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x87,0x30,0xd6,0xe9,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] + +v_maximumminimum_f32 v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6d,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_maximumminimum_f32 v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6d,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_maximumminimum_f32 v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6d,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_maximumminimum_f32 v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6d,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_maximumminimum_f32 v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6d,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_maximumminimum_f32 v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6d,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_maximumminimum_f32 v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6d,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_maximumminimum_f32 v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x01,0x6d,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] + +v_maximumminimum_f32 v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x02,0x6d,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] + +v_maximumminimum_f32 v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x04,0x6d,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_maximumminimum_f32 v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x03,0x6d,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_maximumminimum_f32 v5, -|v1|, v2, -|-1| mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x05,0x6d,0xd6,0xe9,0x04,0x06,0xab,0x01,0x77,0x39,0x05] + +v_maximumminimum_f32 v5, v1, -|v2|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x06,0x6d,0xd6,0xea,0x04,0xc2,0xd3,0x01,0x77,0x39,0x05] + +v_maximumminimum_f32 v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x87,0x6d,0xd6,0xe9,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] + +v_minimummaximum_f32 v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6c,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_minimummaximum_f32 v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6c,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_minimummaximum_f32 v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6c,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_minimummaximum_f32 v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6c,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_minimummaximum_f32 v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6c,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_minimummaximum_f32 v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6c,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_minimummaximum_f32 v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6c,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_minimummaximum_f32 v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x01,0x6c,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] + +v_minimummaximum_f32 v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x02,0x6c,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] + +v_minimummaximum_f32 v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x04,0x6c,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_minimummaximum_f32 v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x03,0x6c,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_minimummaximum_f32 v5, -|v1|, v2, -|-1| mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x05,0x6c,0xd6,0xe9,0x04,0x06,0xab,0x01,0x77,0x39,0x05] + +v_minimummaximum_f32 v5, v1, -|v2|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x06,0x6c,0xd6,0xea,0x04,0xc2,0xd3,0x01,0x77,0x39,0x05] + +v_minimummaximum_f32 v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x87,0x6c,0xd6,0xe9,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] + +v_maximumminimum_f16 v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6f,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_maximumminimum_f16 v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6f,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_maximumminimum_f16 v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6f,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_maximumminimum_f16 v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6f,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_maximumminimum_f16 v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6f,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_maximumminimum_f16 v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6f,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_maximumminimum_f16 v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6f,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_maximumminimum_f16 v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x01,0x6f,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] + +v_maximumminimum_f16 v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x02,0x6f,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] + +v_maximumminimum_f16 v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x04,0x6f,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_maximumminimum_f16 v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x03,0x6f,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_maximumminimum_f16 v5, -|v1|, v2, -|-1| dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x05,0x6f,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] + +v_maximumminimum_f16 v5, v1, -|v2|, -|0.5| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x06,0x6f,0xd6,0xea,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] + +v_maximumminimum_f16 v255, -|v255|, -|v255|, -|src_scc| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x87,0x6f,0xd6,0xe9,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] + +v_minimummaximum_f16 v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6e,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_minimummaximum_f16 v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6e,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_minimummaximum_f16 v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6e,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_minimummaximum_f16 v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6e,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_minimummaximum_f16 v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6e,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_minimummaximum_f16 v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6e,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_minimummaximum_f16 v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6e,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_minimummaximum_f16 v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x01,0x6e,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] + +v_minimummaximum_f16 v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x02,0x6e,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] + +v_minimummaximum_f16 v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x04,0x6e,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_minimummaximum_f16 v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x03,0x6e,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_minimummaximum_f16 v5, -|v1|, v2, -|-1| dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x05,0x6e,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] + +v_minimummaximum_f16 v5, v1, -|v2|, -|0.5| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x06,0x6e,0xd6,0xea,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] + +v_minimummaximum_f16 v255, -|v255|, -|v255|, -|src_scc| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x87,0x6e,0xd6,0xe9,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s index 3003d72b67968..2d912a4d1ad1f 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s @@ -1,7 +1,7 @@ -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32 -show-encoding %s | FileCheck --check-prefixes=GFX12,W32 %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX12,W64 %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32 %s 2>&1 | FileCheck --check-prefixes=GFX12-ERR,W32-ERR --implicit-check-not=error: %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 %s 2>&1 | FileCheck --check-prefixes=GFX12-ERR,W64-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12,W32 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12,W64 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 %s 2>&1 | FileCheck --check-prefixes=GFX12-ERR,W32-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 %s 2>&1 | FileCheck --check-prefixes=GFX12-ERR,W64-ERR --implicit-check-not=error: %s v_add3_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x55,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] diff --git a/llvm/test/MC/ARM/ltorg-range.s b/llvm/test/MC/ARM/ltorg-range.s index 5c27d4cd0df26..88b9bb3cb5be8 100644 --- a/llvm/test/MC/ARM/ltorg-range.s +++ b/llvm/test/MC/ARM/ltorg-range.s @@ -1,5 +1,5 @@ @ RUN: llvm-mc -triple armv7-unknown-linux-gnueabi -filetype obj -o - %s \ -@ RUN: | llvm-objdump -d - | FileCheck %s +@ RUN: | llvm-objdump -d -z - | FileCheck %s ldr r0, =0x01020304 @ CHECK: ldr diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt index 0c1d538a22750..c58b696e2d2e7 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt @@ -1,5 +1,8 @@ -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W32 %s -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W64 %s +; NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W32,W32-REAL16 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W32,W32-FAKE16 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W64,W64-REAL16 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W64,W64-FAKE16 %s # GFX11: v_add3_u32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x55,0xd6,0x01,0x05,0x0e,0x00] 0x05,0x00,0x55,0xd6,0x01,0x05,0x0e,0x00 @@ -411,49 +414,94 @@ # GFX11: v_alignbyte_b32 v255, 0xaf123456, vcc_hi, null ; encoding: [0xff,0x00,0x17,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] 0xff,0x00,0x17,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf -# GFX11: v_and_b16 v5, v1, v2 ; encoding: [0x05,0x00,0x62,0xd7,0x01,0x05,0x02,0x00] +# W32-REAL16: v_and_b16 v5.l, v1.l, v2.l ; encoding: [0x05,0x00,0x62,0xd7,0x01,0x05,0x02,0x00] +# W32-FAKE16: v_and_b16 v5, v1, v2 ; encoding: [0x05,0x00,0x62,0xd7,0x01,0x05,0x02,0x00] +# W64-REAL16: v_and_b16 v5.l, v1.l, v2.l ; encoding: [0x05,0x00,0x62,0xd7,0x01,0x05,0x02,0x00] +# W64-FAKE16: v_and_b16 v5, v1, v2 ; encoding: [0x05,0x00,0x62,0xd7,0x01,0x05,0x02,0x00] 0x05,0x00,0x62,0xd7,0x01,0x05,0x02,0x00 -# GFX11: v_and_b16 v5, v255, v255 ; encoding: [0x05,0x00,0x62,0xd7,0xff,0xff,0x03,0x00] +# W32-REAL16: v_and_b16 v5.l, v255.l, v255.l ; encoding: [0x05,0x00,0x62,0xd7,0xff,0xff,0x03,0x00] +# W32-FAKE16: v_and_b16 v5, v255, v255 ; encoding: [0x05,0x00,0x62,0xd7,0xff,0xff,0x03,0x00] +# W64-REAL16: v_and_b16 v5.l, v255.l, v255.l ; encoding: [0x05,0x00,0x62,0xd7,0xff,0xff,0x03,0x00] +# W64-FAKE16: v_and_b16 v5, v255, v255 ; encoding: [0x05,0x00,0x62,0xd7,0xff,0xff,0x03,0x00] 0x05,0x00,0x62,0xd7,0xff,0xff,0x03,0x00 -# GFX11: v_and_b16 v5, s1, s2 ; encoding: [0x05,0x00,0x62,0xd7,0x01,0x04,0x00,0x00] +# W32-REAL16: v_and_b16 v5.l, s1, s2 ; encoding: [0x05,0x00,0x62,0xd7,0x01,0x04,0x00,0x00] +# W32-FAKE16: v_and_b16 v5, s1, s2 ; encoding: [0x05,0x00,0x62,0xd7,0x01,0x04,0x00,0x00] +# W64-REAL16: v_and_b16 v5.l, s1, s2 ; encoding: [0x05,0x00,0x62,0xd7,0x01,0x04,0x00,0x00] +# W64-FAKE16: v_and_b16 v5, s1, s2 ; encoding: [0x05,0x00,0x62,0xd7,0x01,0x04,0x00,0x00] 0x05,0x00,0x62,0xd7,0x01,0x04,0x00,0x00 -# GFX11: v_and_b16 v5, s105, s105 ; encoding: [0x05,0x00,0x62,0xd7,0x69,0xd2,0x00,0x00] +# W32-REAL16: v_and_b16 v5.l, s105, s105 ; encoding: [0x05,0x00,0x62,0xd7,0x69,0xd2,0x00,0x00] +# W32-FAKE16: v_and_b16 v5, s105, s105 ; encoding: [0x05,0x00,0x62,0xd7,0x69,0xd2,0x00,0x00] +# W64-REAL16: v_and_b16 v5.l, s105, s105 ; encoding: [0x05,0x00,0x62,0xd7,0x69,0xd2,0x00,0x00] +# W64-FAKE16: v_and_b16 v5, s105, s105 ; encoding: [0x05,0x00,0x62,0xd7,0x69,0xd2,0x00,0x00] 0x05,0x00,0x62,0xd7,0x69,0xd2,0x00,0x00 -# GFX11: v_and_b16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x62,0xd7,0x6a,0xf6,0x00,0x00] +# W32-REAL16: v_and_b16 v5.l, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x62,0xd7,0x6a,0xf6,0x00,0x00] +# W32-FAKE16: v_and_b16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x62,0xd7,0x6a,0xf6,0x00,0x00] +# W64-REAL16: v_and_b16 v5.l, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x62,0xd7,0x6a,0xf6,0x00,0x00] +# W64-FAKE16: v_and_b16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x62,0xd7,0x6a,0xf6,0x00,0x00] 0x05,0x00,0x62,0xd7,0x6a,0xf6,0x00,0x00 -# GFX11: v_and_b16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x62,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +# W32-REAL16: v_and_b16 v5.l, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x62,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_and_b16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x62,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_and_b16 v5.l, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x62,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_and_b16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x62,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] 0x05,0x00,0x62,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00 -# GFX11: v_and_b16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x62,0xd7,0x7b,0xfa,0x01,0x00] +# W32-REAL16: v_and_b16 v5.l, ttmp15, src_scc ; encoding: [0x05,0x00,0x62,0xd7,0x7b,0xfa,0x01,0x00] +# W32-FAKE16: v_and_b16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x62,0xd7,0x7b,0xfa,0x01,0x00] +# W64-REAL16: v_and_b16 v5.l, ttmp15, src_scc ; encoding: [0x05,0x00,0x62,0xd7,0x7b,0xfa,0x01,0x00] +# W64-FAKE16: v_and_b16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x62,0xd7,0x7b,0xfa,0x01,0x00] 0x05,0x00,0x62,0xd7,0x7b,0xfa,0x01,0x00 -# GFX11: v_and_b16 v5, m0, 0x3800 +# W32-REAL16: v_and_b16 v5.l, m0, 0x3800 ; encoding: [0x05,0x00,0x62,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00] +# W32-FAKE16: v_and_b16 v5, m0, 0x3800 ; encoding: [0x05,0x00,0x62,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00] +# W64-REAL16: v_and_b16 v5.l, m0, 0x3800 ; encoding: [0x05,0x00,0x62,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00] +# W64-FAKE16: v_and_b16 v5, m0, 0x3800 ; encoding: [0x05,0x00,0x62,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00] 0x05,0x00,0x62,0xd7,0x7d,0xe0,0x01,0x00 -# GFX11: v_and_b16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x62,0xd7,0x7e,0x82,0x01,0x00] +# W32-REAL16: v_and_b16 v5.l, exec_lo, -1 ; encoding: [0x05,0x00,0x62,0xd7,0x7e,0x82,0x01,0x00] +# W32-FAKE16: v_and_b16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x62,0xd7,0x7e,0x82,0x01,0x00] +# W64-REAL16: v_and_b16 v5.l, exec_lo, -1 ; encoding: [0x05,0x00,0x62,0xd7,0x7e,0x82,0x01,0x00] +# W64-FAKE16: v_and_b16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x62,0xd7,0x7e,0x82,0x01,0x00] 0x05,0x00,0x62,0xd7,0x7e,0x82,0x01,0x00 -# GFX11: v_and_b16 v5, exec_hi, null ; encoding: [0x05,0x00,0x62,0xd7,0x7f,0xf8,0x00,0x00] +# W32-REAL16: v_and_b16 v5.l, exec_hi, null ; encoding: [0x05,0x00,0x62,0xd7,0x7f,0xf8,0x00,0x00] +# W32-FAKE16: v_and_b16 v5, exec_hi, null ; encoding: [0x05,0x00,0x62,0xd7,0x7f,0xf8,0x00,0x00] +# W64-REAL16: v_and_b16 v5.l, exec_hi, null ; encoding: [0x05,0x00,0x62,0xd7,0x7f,0xf8,0x00,0x00] +# W64-FAKE16: v_and_b16 v5, exec_hi, null ; encoding: [0x05,0x00,0x62,0xd7,0x7f,0xf8,0x00,0x00] 0x05,0x00,0x62,0xd7,0x7f,0xf8,0x00,0x00 -# GFX11: v_and_b16 v5, null, exec_lo ; encoding: [0x05,0x00,0x62,0xd7,0x7c,0xfc,0x00,0x00] +# W32-REAL16: v_and_b16 v5.l, null, exec_lo ; encoding: [0x05,0x00,0x62,0xd7,0x7c,0xfc,0x00,0x00] +# W32-FAKE16: v_and_b16 v5, null, exec_lo ; encoding: [0x05,0x00,0x62,0xd7,0x7c,0xfc,0x00,0x00] +# W64-REAL16: v_and_b16 v5.l, null, exec_lo ; encoding: [0x05,0x00,0x62,0xd7,0x7c,0xfc,0x00,0x00] +# W64-FAKE16: v_and_b16 v5, null, exec_lo ; encoding: [0x05,0x00,0x62,0xd7,0x7c,0xfc,0x00,0x00] 0x05,0x00,0x62,0xd7,0x7c,0xfc,0x00,0x00 -# GFX11: v_and_b16 v5, -1, exec_hi ; encoding: [0x05,0x00,0x62,0xd7,0xc1,0xfe,0x00,0x00] +# W32-REAL16: v_and_b16 v5.l, -1, exec_hi ; encoding: [0x05,0x00,0x62,0xd7,0xc1,0xfe,0x00,0x00] +# W32-FAKE16: v_and_b16 v5, -1, exec_hi ; encoding: [0x05,0x00,0x62,0xd7,0xc1,0xfe,0x00,0x00] +# W64-REAL16: v_and_b16 v5.l, -1, exec_hi ; encoding: [0x05,0x00,0x62,0xd7,0xc1,0xfe,0x00,0x00] +# W64-FAKE16: v_and_b16 v5, -1, exec_hi ; encoding: [0x05,0x00,0x62,0xd7,0xc1,0xfe,0x00,0x00] 0x05,0x00,0x62,0xd7,0xc1,0xfe,0x00,0x00 -# GFX11: v_and_b16 v5, 0x3800, m0 +# W32-REAL16: v_and_b16 v5.l, 0x3800, m0 ; encoding: [0x05,0x00,0x62,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00] +# W32-FAKE16: v_and_b16 v5, 0x3800, m0 ; encoding: [0x05,0x00,0x62,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00] +# W64-REAL16: v_and_b16 v5.l, 0x3800, m0 ; encoding: [0x05,0x00,0x62,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00] +# W64-FAKE16: v_and_b16 v5, 0x3800, m0 ; encoding: [0x05,0x00,0x62,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00] 0x05,0x00,0x62,0xd7,0xf0,0xfa,0x00,0x00 -# GFX11: v_and_b16 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x62,0xd7,0xfd,0xd4,0x00,0x00] +# W32-REAL16: v_and_b16 v5.l, src_scc, vcc_lo ; encoding: [0x05,0x00,0x62,0xd7,0xfd,0xd4,0x00,0x00] +# W32-FAKE16: v_and_b16 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x62,0xd7,0xfd,0xd4,0x00,0x00] +# W64-REAL16: v_and_b16 v5.l, src_scc, vcc_lo ; encoding: [0x05,0x00,0x62,0xd7,0xfd,0xd4,0x00,0x00] +# W64-FAKE16: v_and_b16 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x62,0xd7,0xfd,0xd4,0x00,0x00] 0x05,0x00,0x62,0xd7,0xfd,0xd4,0x00,0x00 -# GFX11: v_and_b16 v255, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x62,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +# W32-REAL16: v_and_b16 v255.l, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x62,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_and_b16 v255, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x62,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_and_b16 v255.l, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x62,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_and_b16 v255, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x62,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] 0xff,0x00,0x62,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00 # GFX11: v_and_or_b32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x57,0xd6,0x01,0x05,0x0e,0x00] @@ -4738,49 +4786,94 @@ # GFX11: v_or3_b32 v255, 0xaf123456, vcc_hi, null ; encoding: [0xff,0x00,0x58,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] 0xff,0x00,0x58,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf -# GFX11: v_or_b16 v5, v1, v2 ; encoding: [0x05,0x00,0x63,0xd7,0x01,0x05,0x02,0x00] +# W32-REAL16: v_or_b16 v5.l, v1.l, v2.l ; encoding: [0x05,0x00,0x63,0xd7,0x01,0x05,0x02,0x00] +# W32-FAKE16: v_or_b16 v5, v1, v2 ; encoding: [0x05,0x00,0x63,0xd7,0x01,0x05,0x02,0x00] +# W64-REAL16: v_or_b16 v5.l, v1.l, v2.l ; encoding: [0x05,0x00,0x63,0xd7,0x01,0x05,0x02,0x00] +# W64-FAKE16: v_or_b16 v5, v1, v2 ; encoding: [0x05,0x00,0x63,0xd7,0x01,0x05,0x02,0x00] 0x05,0x00,0x63,0xd7,0x01,0x05,0x02,0x00 -# GFX11: v_or_b16 v5, v255, v255 ; encoding: [0x05,0x00,0x63,0xd7,0xff,0xff,0x03,0x00] +# W32-REAL16: v_or_b16 v5.l, v255.l, v255.l ; encoding: [0x05,0x00,0x63,0xd7,0xff,0xff,0x03,0x00] +# W32-FAKE16: v_or_b16 v5, v255, v255 ; encoding: [0x05,0x00,0x63,0xd7,0xff,0xff,0x03,0x00] +# W64-REAL16: v_or_b16 v5.l, v255.l, v255.l ; encoding: [0x05,0x00,0x63,0xd7,0xff,0xff,0x03,0x00] +# W64-FAKE16: v_or_b16 v5, v255, v255 ; encoding: [0x05,0x00,0x63,0xd7,0xff,0xff,0x03,0x00] 0x05,0x00,0x63,0xd7,0xff,0xff,0x03,0x00 -# GFX11: v_or_b16 v5, s1, s2 ; encoding: [0x05,0x00,0x63,0xd7,0x01,0x04,0x00,0x00] +# W32-REAL16: v_or_b16 v5.l, s1, s2 ; encoding: [0x05,0x00,0x63,0xd7,0x01,0x04,0x00,0x00] +# W32-FAKE16: v_or_b16 v5, s1, s2 ; encoding: [0x05,0x00,0x63,0xd7,0x01,0x04,0x00,0x00] +# W64-REAL16: v_or_b16 v5.l, s1, s2 ; encoding: [0x05,0x00,0x63,0xd7,0x01,0x04,0x00,0x00] +# W64-FAKE16: v_or_b16 v5, s1, s2 ; encoding: [0x05,0x00,0x63,0xd7,0x01,0x04,0x00,0x00] 0x05,0x00,0x63,0xd7,0x01,0x04,0x00,0x00 -# GFX11: v_or_b16 v5, s105, s105 ; encoding: [0x05,0x00,0x63,0xd7,0x69,0xd2,0x00,0x00] +# W32-REAL16: v_or_b16 v5.l, s105, s105 ; encoding: [0x05,0x00,0x63,0xd7,0x69,0xd2,0x00,0x00] +# W32-FAKE16: v_or_b16 v5, s105, s105 ; encoding: [0x05,0x00,0x63,0xd7,0x69,0xd2,0x00,0x00] +# W64-REAL16: v_or_b16 v5.l, s105, s105 ; encoding: [0x05,0x00,0x63,0xd7,0x69,0xd2,0x00,0x00] +# W64-FAKE16: v_or_b16 v5, s105, s105 ; encoding: [0x05,0x00,0x63,0xd7,0x69,0xd2,0x00,0x00] 0x05,0x00,0x63,0xd7,0x69,0xd2,0x00,0x00 -# GFX11: v_or_b16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x63,0xd7,0x6a,0xf6,0x00,0x00] +# W32-REAL16: v_or_b16 v5.l, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x63,0xd7,0x6a,0xf6,0x00,0x00] +# W32-FAKE16: v_or_b16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x63,0xd7,0x6a,0xf6,0x00,0x00] +# W64-REAL16: v_or_b16 v5.l, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x63,0xd7,0x6a,0xf6,0x00,0x00] +# W64-FAKE16: v_or_b16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x63,0xd7,0x6a,0xf6,0x00,0x00] 0x05,0x00,0x63,0xd7,0x6a,0xf6,0x00,0x00 -# GFX11: v_or_b16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x63,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +# W32-REAL16: v_or_b16 v5.l, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x63,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_or_b16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x63,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_or_b16 v5.l, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x63,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_or_b16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x63,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] 0x05,0x00,0x63,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00 -# GFX11: v_or_b16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x63,0xd7,0x7b,0xfa,0x01,0x00] +# W32-REAL16: v_or_b16 v5.l, ttmp15, src_scc ; encoding: [0x05,0x00,0x63,0xd7,0x7b,0xfa,0x01,0x00] +# W32-FAKE16: v_or_b16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x63,0xd7,0x7b,0xfa,0x01,0x00] +# W64-REAL16: v_or_b16 v5.l, ttmp15, src_scc ; encoding: [0x05,0x00,0x63,0xd7,0x7b,0xfa,0x01,0x00] +# W64-FAKE16: v_or_b16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x63,0xd7,0x7b,0xfa,0x01,0x00] 0x05,0x00,0x63,0xd7,0x7b,0xfa,0x01,0x00 -# GFX11: v_or_b16 v5, m0, 0x3800 +# W32-REAL16: v_or_b16 v5.l, m0, 0x3800 ; encoding: [0x05,0x00,0x63,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00] +# W32-FAKE16: v_or_b16 v5, m0, 0x3800 ; encoding: [0x05,0x00,0x63,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00] +# W64-REAL16: v_or_b16 v5.l, m0, 0x3800 ; encoding: [0x05,0x00,0x63,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00] +# W64-FAKE16: v_or_b16 v5, m0, 0x3800 ; encoding: [0x05,0x00,0x63,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00] 0x05,0x00,0x63,0xd7,0x7d,0xe0,0x01,0x00 -# GFX11: v_or_b16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x63,0xd7,0x7e,0x82,0x01,0x00] +# W32-REAL16: v_or_b16 v5.l, exec_lo, -1 ; encoding: [0x05,0x00,0x63,0xd7,0x7e,0x82,0x01,0x00] +# W32-FAKE16: v_or_b16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x63,0xd7,0x7e,0x82,0x01,0x00] +# W64-REAL16: v_or_b16 v5.l, exec_lo, -1 ; encoding: [0x05,0x00,0x63,0xd7,0x7e,0x82,0x01,0x00] +# W64-FAKE16: v_or_b16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x63,0xd7,0x7e,0x82,0x01,0x00] 0x05,0x00,0x63,0xd7,0x7e,0x82,0x01,0x00 -# GFX11: v_or_b16 v5, exec_hi, null ; encoding: [0x05,0x00,0x63,0xd7,0x7f,0xf8,0x00,0x00] +# W32-REAL16: v_or_b16 v5.l, exec_hi, null ; encoding: [0x05,0x00,0x63,0xd7,0x7f,0xf8,0x00,0x00] +# W32-FAKE16: v_or_b16 v5, exec_hi, null ; encoding: [0x05,0x00,0x63,0xd7,0x7f,0xf8,0x00,0x00] +# W64-REAL16: v_or_b16 v5.l, exec_hi, null ; encoding: [0x05,0x00,0x63,0xd7,0x7f,0xf8,0x00,0x00] +# W64-FAKE16: v_or_b16 v5, exec_hi, null ; encoding: [0x05,0x00,0x63,0xd7,0x7f,0xf8,0x00,0x00] 0x05,0x00,0x63,0xd7,0x7f,0xf8,0x00,0x00 -# GFX11: v_or_b16 v5, null, exec_lo ; encoding: [0x05,0x00,0x63,0xd7,0x7c,0xfc,0x00,0x00] +# W32-REAL16: v_or_b16 v5.l, null, exec_lo ; encoding: [0x05,0x00,0x63,0xd7,0x7c,0xfc,0x00,0x00] +# W32-FAKE16: v_or_b16 v5, null, exec_lo ; encoding: [0x05,0x00,0x63,0xd7,0x7c,0xfc,0x00,0x00] +# W64-REAL16: v_or_b16 v5.l, null, exec_lo ; encoding: [0x05,0x00,0x63,0xd7,0x7c,0xfc,0x00,0x00] +# W64-FAKE16: v_or_b16 v5, null, exec_lo ; encoding: [0x05,0x00,0x63,0xd7,0x7c,0xfc,0x00,0x00] 0x05,0x00,0x63,0xd7,0x7c,0xfc,0x00,0x00 -# GFX11: v_or_b16 v5, -1, exec_hi ; encoding: [0x05,0x00,0x63,0xd7,0xc1,0xfe,0x00,0x00] +# W32-REAL16: v_or_b16 v5.l, -1, exec_hi ; encoding: [0x05,0x00,0x63,0xd7,0xc1,0xfe,0x00,0x00] +# W32-FAKE16: v_or_b16 v5, -1, exec_hi ; encoding: [0x05,0x00,0x63,0xd7,0xc1,0xfe,0x00,0x00] +# W64-REAL16: v_or_b16 v5.l, -1, exec_hi ; encoding: [0x05,0x00,0x63,0xd7,0xc1,0xfe,0x00,0x00] +# W64-FAKE16: v_or_b16 v5, -1, exec_hi ; encoding: [0x05,0x00,0x63,0xd7,0xc1,0xfe,0x00,0x00] 0x05,0x00,0x63,0xd7,0xc1,0xfe,0x00,0x00 -# GFX11: v_or_b16 v5, 0x3800, m0 +# W32-REAL16: v_or_b16 v5.l, 0x3800, m0 ; encoding: [0x05,0x00,0x63,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00] +# W32-FAKE16: v_or_b16 v5, 0x3800, m0 ; encoding: [0x05,0x00,0x63,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00] +# W64-REAL16: v_or_b16 v5.l, 0x3800, m0 ; encoding: [0x05,0x00,0x63,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00] +# W64-FAKE16: v_or_b16 v5, 0x3800, m0 ; encoding: [0x05,0x00,0x63,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00] 0x05,0x00,0x63,0xd7,0xf0,0xfa,0x00,0x00 -# GFX11: v_or_b16 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x63,0xd7,0xfd,0xd4,0x00,0x00] +# W32-REAL16: v_or_b16 v5.l, src_scc, vcc_lo ; encoding: [0x05,0x00,0x63,0xd7,0xfd,0xd4,0x00,0x00] +# W32-FAKE16: v_or_b16 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x63,0xd7,0xfd,0xd4,0x00,0x00] +# W64-REAL16: v_or_b16 v5.l, src_scc, vcc_lo ; encoding: [0x05,0x00,0x63,0xd7,0xfd,0xd4,0x00,0x00] +# W64-FAKE16: v_or_b16 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x63,0xd7,0xfd,0xd4,0x00,0x00] 0x05,0x00,0x63,0xd7,0xfd,0xd4,0x00,0x00 -# GFX11: v_or_b16 v255, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x63,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +# W32-REAL16: v_or_b16 v255.l, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x63,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_or_b16 v255, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x63,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_or_b16 v255.l, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x63,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_or_b16 v255, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x63,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] 0xff,0x00,0x63,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00 # GFX11: v_pack_b32_f16 v5, v1, v2 ; encoding: [0x05,0x00,0x11,0xd7,0x01,0x05,0x02,0x00] @@ -5642,47 +5735,92 @@ # GFX11: v_xor3_b32 v255, 0xaf123456, vcc_hi, null ; encoding: [0xff,0x00,0x40,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] 0xff,0x00,0x40,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf -# GFX11: v_xor_b16 v5, v1, v2 ; encoding: [0x05,0x00,0x64,0xd7,0x01,0x05,0x02,0x00] +# W32-REAL16: v_xor_b16 v5.l, v1.l, v2.l ; encoding: [0x05,0x00,0x64,0xd7,0x01,0x05,0x02,0x00] +# W32-FAKE16: v_xor_b16 v5, v1, v2 ; encoding: [0x05,0x00,0x64,0xd7,0x01,0x05,0x02,0x00] +# W64-REAL16: v_xor_b16 v5.l, v1.l, v2.l ; encoding: [0x05,0x00,0x64,0xd7,0x01,0x05,0x02,0x00] +# W64-FAKE16: v_xor_b16 v5, v1, v2 ; encoding: [0x05,0x00,0x64,0xd7,0x01,0x05,0x02,0x00] 0x05,0x00,0x64,0xd7,0x01,0x05,0x02,0x00 -# GFX11: v_xor_b16 v5, v255, v255 ; encoding: [0x05,0x00,0x64,0xd7,0xff,0xff,0x03,0x00] +# W32-REAL16: v_xor_b16 v5.l, v255.l, v255.l ; encoding: [0x05,0x00,0x64,0xd7,0xff,0xff,0x03,0x00] +# W32-FAKE16: v_xor_b16 v5, v255, v255 ; encoding: [0x05,0x00,0x64,0xd7,0xff,0xff,0x03,0x00] +# W64-REAL16: v_xor_b16 v5.l, v255.l, v255.l ; encoding: [0x05,0x00,0x64,0xd7,0xff,0xff,0x03,0x00] +# W64-FAKE16: v_xor_b16 v5, v255, v255 ; encoding: [0x05,0x00,0x64,0xd7,0xff,0xff,0x03,0x00] 0x05,0x00,0x64,0xd7,0xff,0xff,0x03,0x00 -# GFX11: v_xor_b16 v5, s1, s2 ; encoding: [0x05,0x00,0x64,0xd7,0x01,0x04,0x00,0x00] +# W32-REAL16: v_xor_b16 v5.l, s1, s2 ; encoding: [0x05,0x00,0x64,0xd7,0x01,0x04,0x00,0x00] +# W32-FAKE16: v_xor_b16 v5, s1, s2 ; encoding: [0x05,0x00,0x64,0xd7,0x01,0x04,0x00,0x00] +# W64-REAL16: v_xor_b16 v5.l, s1, s2 ; encoding: [0x05,0x00,0x64,0xd7,0x01,0x04,0x00,0x00] +# W64-FAKE16: v_xor_b16 v5, s1, s2 ; encoding: [0x05,0x00,0x64,0xd7,0x01,0x04,0x00,0x00] 0x05,0x00,0x64,0xd7,0x01,0x04,0x00,0x00 -# GFX11: v_xor_b16 v5, s105, s105 ; encoding: [0x05,0x00,0x64,0xd7,0x69,0xd2,0x00,0x00] +# W32-REAL16: v_xor_b16 v5.l, s105, s105 ; encoding: [0x05,0x00,0x64,0xd7,0x69,0xd2,0x00,0x00] +# W32-FAKE16: v_xor_b16 v5, s105, s105 ; encoding: [0x05,0x00,0x64,0xd7,0x69,0xd2,0x00,0x00] +# W64-REAL16: v_xor_b16 v5.l, s105, s105 ; encoding: [0x05,0x00,0x64,0xd7,0x69,0xd2,0x00,0x00] +# W64-FAKE16: v_xor_b16 v5, s105, s105 ; encoding: [0x05,0x00,0x64,0xd7,0x69,0xd2,0x00,0x00] 0x05,0x00,0x64,0xd7,0x69,0xd2,0x00,0x00 -# GFX11: v_xor_b16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x64,0xd7,0x6a,0xf6,0x00,0x00] +# W32-REAL16: v_xor_b16 v5.l, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x64,0xd7,0x6a,0xf6,0x00,0x00] +# W32-FAKE16: v_xor_b16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x64,0xd7,0x6a,0xf6,0x00,0x00] +# W64-REAL16: v_xor_b16 v5.l, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x64,0xd7,0x6a,0xf6,0x00,0x00] +# W64-FAKE16: v_xor_b16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x64,0xd7,0x6a,0xf6,0x00,0x00] 0x05,0x00,0x64,0xd7,0x6a,0xf6,0x00,0x00 -# GFX11: v_xor_b16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x64,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +# W32-REAL16: v_xor_b16 v5.l, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x64,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_xor_b16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x64,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_xor_b16 v5.l, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x64,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_xor_b16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x64,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] 0x05,0x00,0x64,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00 -# GFX11: v_xor_b16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x64,0xd7,0x7b,0xfa,0x01,0x00] +# W32-REAL16: v_xor_b16 v5.l, ttmp15, src_scc ; encoding: [0x05,0x00,0x64,0xd7,0x7b,0xfa,0x01,0x00] +# W32-FAKE16: v_xor_b16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x64,0xd7,0x7b,0xfa,0x01,0x00] +# W64-REAL16: v_xor_b16 v5.l, ttmp15, src_scc ; encoding: [0x05,0x00,0x64,0xd7,0x7b,0xfa,0x01,0x00] +# W64-FAKE16: v_xor_b16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x64,0xd7,0x7b,0xfa,0x01,0x00] 0x05,0x00,0x64,0xd7,0x7b,0xfa,0x01,0x00 -# GFX11: v_xor_b16 v5, m0, 0x3800 +# W32-REAL16: v_xor_b16 v5.l, m0, 0x3800 ; encoding: [0x05,0x00,0x64,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00] +# W32-FAKE16: v_xor_b16 v5, m0, 0x3800 ; encoding: [0x05,0x00,0x64,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00] +# W64-REAL16: v_xor_b16 v5.l, m0, 0x3800 ; encoding: [0x05,0x00,0x64,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00] +# W64-FAKE16: v_xor_b16 v5, m0, 0x3800 ; encoding: [0x05,0x00,0x64,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00] 0x05,0x00,0x64,0xd7,0x7d,0xe0,0x01,0x00 -# GFX11: v_xor_b16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x64,0xd7,0x7e,0x82,0x01,0x00] +# W32-REAL16: v_xor_b16 v5.l, exec_lo, -1 ; encoding: [0x05,0x00,0x64,0xd7,0x7e,0x82,0x01,0x00] +# W32-FAKE16: v_xor_b16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x64,0xd7,0x7e,0x82,0x01,0x00] +# W64-REAL16: v_xor_b16 v5.l, exec_lo, -1 ; encoding: [0x05,0x00,0x64,0xd7,0x7e,0x82,0x01,0x00] +# W64-FAKE16: v_xor_b16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x64,0xd7,0x7e,0x82,0x01,0x00] 0x05,0x00,0x64,0xd7,0x7e,0x82,0x01,0x00 -# GFX11: v_xor_b16 v5, exec_hi, null ; encoding: [0x05,0x00,0x64,0xd7,0x7f,0xf8,0x00,0x00] +# W32-REAL16: v_xor_b16 v5.l, exec_hi, null ; encoding: [0x05,0x00,0x64,0xd7,0x7f,0xf8,0x00,0x00] +# W32-FAKE16: v_xor_b16 v5, exec_hi, null ; encoding: [0x05,0x00,0x64,0xd7,0x7f,0xf8,0x00,0x00] +# W64-REAL16: v_xor_b16 v5.l, exec_hi, null ; encoding: [0x05,0x00,0x64,0xd7,0x7f,0xf8,0x00,0x00] +# W64-FAKE16: v_xor_b16 v5, exec_hi, null ; encoding: [0x05,0x00,0x64,0xd7,0x7f,0xf8,0x00,0x00] 0x05,0x00,0x64,0xd7,0x7f,0xf8,0x00,0x00 -# GFX11: v_xor_b16 v5, null, exec_lo ; encoding: [0x05,0x00,0x64,0xd7,0x7c,0xfc,0x00,0x00] +# W32-REAL16: v_xor_b16 v5.l, null, exec_lo ; encoding: [0x05,0x00,0x64,0xd7,0x7c,0xfc,0x00,0x00] +# W32-FAKE16: v_xor_b16 v5, null, exec_lo ; encoding: [0x05,0x00,0x64,0xd7,0x7c,0xfc,0x00,0x00] +# W64-REAL16: v_xor_b16 v5.l, null, exec_lo ; encoding: [0x05,0x00,0x64,0xd7,0x7c,0xfc,0x00,0x00] +# W64-FAKE16: v_xor_b16 v5, null, exec_lo ; encoding: [0x05,0x00,0x64,0xd7,0x7c,0xfc,0x00,0x00] 0x05,0x00,0x64,0xd7,0x7c,0xfc,0x00,0x00 -# GFX11: v_xor_b16 v5, -1, exec_hi ; encoding: [0x05,0x00,0x64,0xd7,0xc1,0xfe,0x00,0x00] +# W32-REAL16: v_xor_b16 v5.l, -1, exec_hi ; encoding: [0x05,0x00,0x64,0xd7,0xc1,0xfe,0x00,0x00] +# W32-FAKE16: v_xor_b16 v5, -1, exec_hi ; encoding: [0x05,0x00,0x64,0xd7,0xc1,0xfe,0x00,0x00] +# W64-REAL16: v_xor_b16 v5.l, -1, exec_hi ; encoding: [0x05,0x00,0x64,0xd7,0xc1,0xfe,0x00,0x00] +# W64-FAKE16: v_xor_b16 v5, -1, exec_hi ; encoding: [0x05,0x00,0x64,0xd7,0xc1,0xfe,0x00,0x00] 0x05,0x00,0x64,0xd7,0xc1,0xfe,0x00,0x00 -# GFX11: v_xor_b16 v5, 0x3800, m0 +# W32-REAL16: v_xor_b16 v5.l, 0x3800, m0 ; encoding: [0x05,0x00,0x64,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00] +# W32-FAKE16: v_xor_b16 v5, 0x3800, m0 ; encoding: [0x05,0x00,0x64,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00] +# W64-REAL16: v_xor_b16 v5.l, 0x3800, m0 ; encoding: [0x05,0x00,0x64,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00] +# W64-FAKE16: v_xor_b16 v5, 0x3800, m0 ; encoding: [0x05,0x00,0x64,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00] 0x05,0x00,0x64,0xd7,0xf0,0xfa,0x00,0x00 -# GFX11: v_xor_b16 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x64,0xd7,0xfd,0xd4,0x00,0x00] +# W32-REAL16: v_xor_b16 v5.l, src_scc, vcc_lo ; encoding: [0x05,0x00,0x64,0xd7,0xfd,0xd4,0x00,0x00] +# W32-FAKE16: v_xor_b16 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x64,0xd7,0xfd,0xd4,0x00,0x00] +# W64-REAL16: v_xor_b16 v5.l, src_scc, vcc_lo ; encoding: [0x05,0x00,0x64,0xd7,0xfd,0xd4,0x00,0x00] +# W64-FAKE16: v_xor_b16 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x64,0xd7,0xfd,0xd4,0x00,0x00] 0x05,0x00,0x64,0xd7,0xfd,0xd4,0x00,0x00 -# GFX11: v_xor_b16 v255, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x64,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +# W32-REAL16: v_xor_b16 v255.l, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x64,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_xor_b16 v255, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x64,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_xor_b16 v255.l, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x64,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_xor_b16 v255, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x64,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] 0xff,0x00,0x64,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00 diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16.txt index 486243c450d67..bf3fa3bf65c74 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16.txt @@ -1,5 +1,8 @@ -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W32 %s -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W64 %s +; NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W32,W32-REAL16 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W32,W32-FAKE16 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W64,W64-REAL16 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W64,W64-FAKE16 %s # GFX11: v_add3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x55,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff @@ -266,46 +269,88 @@ # GFX11: v_alignbyte_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x17,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] 0xff,0x00,0x17,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# GFX11: v_and_b16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W32-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W32-FAKE16: v_and_b16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W64-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W64-FAKE16: v_and_b16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff -# GFX11: v_and_b16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W32-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W32-FAKE16: v_and_b16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W64-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W64-FAKE16: v_and_b16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] 0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff -# GFX11: v_and_b16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W32-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W32-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W64-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W64-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] 0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff -# GFX11: v_and_b16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W32-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W32-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W64-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W64-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] 0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff -# GFX11: v_and_b16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W32-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W32-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W64-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W64-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] 0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff -# GFX11: v_and_b16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W32-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W32-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W64-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W64-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] 0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff -# GFX11: v_and_b16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W32-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W32-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W64-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W64-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] 0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff -# GFX11: v_and_b16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W32-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W32-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W64-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W64-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] 0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff -# GFX11: v_and_b16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W32-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W32-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W64-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W64-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] 0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff -# GFX11: v_and_b16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W32-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W32-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W64-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W64-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] 0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff -# GFX11: v_and_b16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W32-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W32-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W64-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W64-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] 0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# GFX11: v_and_b16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W32-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W32-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W64-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W64-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] 0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 -# GFX11: v_and_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W32-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W64-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] 0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 -# GFX11: v_and_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x62,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W32-REAL16: v_and_b16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x62,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_and_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x62,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_and_b16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x62,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_and_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x62,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] 0xff,0x00,0x62,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 # GFX11: v_and_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] @@ -2637,46 +2682,88 @@ # GFX11: v_or3_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x58,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] 0xff,0x00,0x58,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# GFX11: v_or_b16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W32-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W32-FAKE16: v_or_b16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W64-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W64-FAKE16: v_or_b16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff -# GFX11: v_or_b16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W32-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W32-FAKE16: v_or_b16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W64-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W64-FAKE16: v_or_b16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] 0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff -# GFX11: v_or_b16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W32-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W32-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W64-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W64-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] 0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff -# GFX11: v_or_b16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W32-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W32-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W64-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W64-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] 0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff -# GFX11: v_or_b16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W32-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W32-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W64-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W64-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] 0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff -# GFX11: v_or_b16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W32-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W32-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W64-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W64-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] 0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff -# GFX11: v_or_b16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W32-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W32-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W64-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W64-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] 0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff -# GFX11: v_or_b16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W32-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W32-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W64-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W64-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] 0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff -# GFX11: v_or_b16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W32-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W32-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W64-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W64-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] 0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff -# GFX11: v_or_b16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W32-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W32-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W64-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W64-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] 0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff -# GFX11: v_or_b16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W32-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W32-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W64-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W64-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] 0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# GFX11: v_or_b16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W32-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W32-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W64-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W64-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] 0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 -# GFX11: v_or_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W32-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W64-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] 0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 -# GFX11: v_or_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x63,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W32-REAL16: v_or_b16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x63,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_or_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x63,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_or_b16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x63,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_or_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x63,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] 0xff,0x00,0x63,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 # GFX11: v_perm_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] @@ -3125,46 +3212,88 @@ # GFX11: v_xor3_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x40,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] 0xff,0x00,0x40,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# GFX11: v_xor_b16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W32-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W32-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W64-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W64-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff -# GFX11: v_xor_b16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W32-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W32-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W64-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W64-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] 0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff -# GFX11: v_xor_b16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W32-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W32-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W64-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W64-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] 0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff -# GFX11: v_xor_b16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W32-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W32-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W64-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W64-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] 0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff -# GFX11: v_xor_b16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W32-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W32-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W64-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W64-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] 0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff -# GFX11: v_xor_b16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W32-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W32-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W64-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W64-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] 0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff -# GFX11: v_xor_b16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W32-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W32-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W64-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W64-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] 0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff -# GFX11: v_xor_b16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W32-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W32-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W64-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W64-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] 0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff -# GFX11: v_xor_b16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W32-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W32-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W64-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W64-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] 0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff -# GFX11: v_xor_b16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W32-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W32-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W64-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W64-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] 0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff -# GFX11: v_xor_b16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W32-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W32-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W64-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W64-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] 0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# GFX11: v_xor_b16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W32-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W32-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W64-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W64-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] 0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 -# GFX11: v_xor_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W32-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W64-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] 0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 -# GFX11: v_xor_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x64,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W32-REAL16: v_xor_b16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x64,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_xor_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x64,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_xor_b16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x64,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_xor_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x64,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] 0xff,0x00,0x64,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 # GFX11: v_add_nc_i16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8.txt index e88aad3312757..cdbf798fd99c7 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8.txt @@ -1,5 +1,8 @@ -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W32 %s -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W64 %s +; NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W32,W32-REAL16 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W32,W32-FAKE16 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W64,W64-REAL16 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W64,W64-FAKE16 %s # GFX11: v_add3_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x55,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x55,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 @@ -164,10 +167,16 @@ # GFX11: v_alignbyte_b32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x17,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] 0xff,0x00,0x17,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00 -# GFX11: v_and_b16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x62,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x62,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_and_b16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x62,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x62,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_and_b16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x62,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x62,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 -# GFX11: v_and_b16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x62,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W32-REAL16: v_and_b16_e64_dpp v255.l, v255.l, v255.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x62,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_and_b16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x62,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W64-REAL16: v_and_b16_e64_dpp v255.l, v255.l, v255.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x62,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_and_b16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x62,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0xff,0x00,0x62,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 # GFX11: v_and_or_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x57,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] @@ -1599,10 +1608,16 @@ # GFX11: v_or3_b32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x58,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] 0xff,0x00,0x58,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00 -# GFX11: v_or_b16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x63,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x63,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_or_b16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x63,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x63,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_or_b16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x63,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x63,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 -# GFX11: v_or_b16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x63,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W32-REAL16: v_or_b16_e64_dpp v255.l, v255.l, v255.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x63,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_or_b16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x63,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W64-REAL16: v_or_b16_e64_dpp v255.l, v255.l, v255.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x63,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_or_b16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x63,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0xff,0x00,0x63,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 # GFX11: v_perm_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x44,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] @@ -1901,10 +1916,16 @@ # GFX11: v_xor3_b32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x40,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] 0xff,0x00,0x40,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00 -# GFX11: v_xor_b16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x64,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x64,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x64,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x64,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x64,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x64,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 -# GFX11: v_xor_b16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x64,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W32-REAL16: v_xor_b16_e64_dpp v255.l, v255.l, v255.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x64,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_xor_b16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x64,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W64-REAL16: v_xor_b16_e64_dpp v255.l, v255.l, v255.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x64,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_xor_b16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x64,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0xff,0x00,0x64,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 # GFX11: v_add_nc_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3.txt index 44cbe5f31b2cf..9b41b22b9012f 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3.txt @@ -1,5 +1,8 @@ -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,W32 %s -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,W64 %s +; NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,W32,W32-REAL16 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,W32,W32-FAKE16 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,W64,W64-REAL16 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,W64,W64-FAKE16 %s # GFX12: v_add3_u32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x55,0xd6,0x01,0x05,0x0e,0x00] 0x05,0x00,0x55,0xd6,0x01,0x05,0x0e,0x00 @@ -375,49 +378,94 @@ # GFX12: v_alignbyte_b32 v255, 0xaf123456, vcc_hi, null ; encoding: [0xff,0x00,0x17,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] 0xff,0x00,0x17,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf -# GFX12: v_and_b16 v5, v1, v2 ; encoding: [0x05,0x00,0x62,0xd7,0x01,0x05,0x02,0x00] +# W32-REAL16: v_and_b16 v5.l, v1.l, v2.l ; encoding: [0x05,0x00,0x62,0xd7,0x01,0x05,0x02,0x00] +# W32-FAKE16: v_and_b16 v5, v1, v2 ; encoding: [0x05,0x00,0x62,0xd7,0x01,0x05,0x02,0x00] +# W64-REAL16: v_and_b16 v5.l, v1.l, v2.l ; encoding: [0x05,0x00,0x62,0xd7,0x01,0x05,0x02,0x00] +# W64-FAKE16: v_and_b16 v5, v1, v2 ; encoding: [0x05,0x00,0x62,0xd7,0x01,0x05,0x02,0x00] 0x05,0x00,0x62,0xd7,0x01,0x05,0x02,0x00 -# GFX12: v_and_b16 v5, v255, v255 ; encoding: [0x05,0x00,0x62,0xd7,0xff,0xff,0x03,0x00] +# W32-REAL16: v_and_b16 v5.l, v255.l, v255.l ; encoding: [0x05,0x00,0x62,0xd7,0xff,0xff,0x03,0x00] +# W32-FAKE16: v_and_b16 v5, v255, v255 ; encoding: [0x05,0x00,0x62,0xd7,0xff,0xff,0x03,0x00] +# W64-REAL16: v_and_b16 v5.l, v255.l, v255.l ; encoding: [0x05,0x00,0x62,0xd7,0xff,0xff,0x03,0x00] +# W64-FAKE16: v_and_b16 v5, v255, v255 ; encoding: [0x05,0x00,0x62,0xd7,0xff,0xff,0x03,0x00] 0x05,0x00,0x62,0xd7,0xff,0xff,0x03,0x00 -# GFX12: v_and_b16 v5, s1, s2 ; encoding: [0x05,0x00,0x62,0xd7,0x01,0x04,0x00,0x00] +# W32-REAL16: v_and_b16 v5.l, s1, s2 ; encoding: [0x05,0x00,0x62,0xd7,0x01,0x04,0x00,0x00] +# W32-FAKE16: v_and_b16 v5, s1, s2 ; encoding: [0x05,0x00,0x62,0xd7,0x01,0x04,0x00,0x00] +# W64-REAL16: v_and_b16 v5.l, s1, s2 ; encoding: [0x05,0x00,0x62,0xd7,0x01,0x04,0x00,0x00] +# W64-FAKE16: v_and_b16 v5, s1, s2 ; encoding: [0x05,0x00,0x62,0xd7,0x01,0x04,0x00,0x00] 0x05,0x00,0x62,0xd7,0x01,0x04,0x00,0x00 -# GFX12: v_and_b16 v5, s105, s105 ; encoding: [0x05,0x00,0x62,0xd7,0x69,0xd2,0x00,0x00] +# W32-REAL16: v_and_b16 v5.l, s105, s105 ; encoding: [0x05,0x00,0x62,0xd7,0x69,0xd2,0x00,0x00] +# W32-FAKE16: v_and_b16 v5, s105, s105 ; encoding: [0x05,0x00,0x62,0xd7,0x69,0xd2,0x00,0x00] +# W64-REAL16: v_and_b16 v5.l, s105, s105 ; encoding: [0x05,0x00,0x62,0xd7,0x69,0xd2,0x00,0x00] +# W64-FAKE16: v_and_b16 v5, s105, s105 ; encoding: [0x05,0x00,0x62,0xd7,0x69,0xd2,0x00,0x00] 0x05,0x00,0x62,0xd7,0x69,0xd2,0x00,0x00 -# GFX12: v_and_b16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x62,0xd7,0x6a,0xf6,0x00,0x00] +# W32-REAL16: v_and_b16 v5.l, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x62,0xd7,0x6a,0xf6,0x00,0x00] +# W32-FAKE16: v_and_b16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x62,0xd7,0x6a,0xf6,0x00,0x00] +# W64-REAL16: v_and_b16 v5.l, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x62,0xd7,0x6a,0xf6,0x00,0x00] +# W64-FAKE16: v_and_b16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x62,0xd7,0x6a,0xf6,0x00,0x00] 0x05,0x00,0x62,0xd7,0x6a,0xf6,0x00,0x00 -# GFX12: v_and_b16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x62,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +# W32-REAL16: v_and_b16 v5.l, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x62,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_and_b16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x62,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_and_b16 v5.l, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x62,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_and_b16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x62,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] 0x05,0x00,0x62,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00 -# GFX12: v_and_b16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x62,0xd7,0x7b,0xfa,0x01,0x00] +# W32-REAL16: v_and_b16 v5.l, ttmp15, src_scc ; encoding: [0x05,0x00,0x62,0xd7,0x7b,0xfa,0x01,0x00] +# W32-FAKE16: v_and_b16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x62,0xd7,0x7b,0xfa,0x01,0x00] +# W64-REAL16: v_and_b16 v5.l, ttmp15, src_scc ; encoding: [0x05,0x00,0x62,0xd7,0x7b,0xfa,0x01,0x00] +# W64-FAKE16: v_and_b16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x62,0xd7,0x7b,0xfa,0x01,0x00] 0x05,0x00,0x62,0xd7,0x7b,0xfa,0x01,0x00 -# GFX12: v_and_b16 v5, m0, 0x3800 +# W32-REAL16: v_and_b16 v5.l, m0, 0x3800 ; encoding: [0x05,0x00,0x62,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00] +# W32-FAKE16: v_and_b16 v5, m0, 0x3800 ; encoding: [0x05,0x00,0x62,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00] +# W64-REAL16: v_and_b16 v5.l, m0, 0x3800 ; encoding: [0x05,0x00,0x62,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00] +# W64-FAKE16: v_and_b16 v5, m0, 0x3800 ; encoding: [0x05,0x00,0x62,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00] 0x05,0x00,0x62,0xd7,0x7d,0xe0,0x01,0x00 -# GFX12: v_and_b16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x62,0xd7,0x7e,0x82,0x01,0x00] +# W32-REAL16: v_and_b16 v5.l, exec_lo, -1 ; encoding: [0x05,0x00,0x62,0xd7,0x7e,0x82,0x01,0x00] +# W32-FAKE16: v_and_b16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x62,0xd7,0x7e,0x82,0x01,0x00] +# W64-REAL16: v_and_b16 v5.l, exec_lo, -1 ; encoding: [0x05,0x00,0x62,0xd7,0x7e,0x82,0x01,0x00] +# W64-FAKE16: v_and_b16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x62,0xd7,0x7e,0x82,0x01,0x00] 0x05,0x00,0x62,0xd7,0x7e,0x82,0x01,0x00 -# GFX12: v_and_b16 v5, exec_hi, null ; encoding: [0x05,0x00,0x62,0xd7,0x7f,0xf8,0x00,0x00] +# W32-REAL16: v_and_b16 v5.l, exec_hi, null ; encoding: [0x05,0x00,0x62,0xd7,0x7f,0xf8,0x00,0x00] +# W32-FAKE16: v_and_b16 v5, exec_hi, null ; encoding: [0x05,0x00,0x62,0xd7,0x7f,0xf8,0x00,0x00] +# W64-REAL16: v_and_b16 v5.l, exec_hi, null ; encoding: [0x05,0x00,0x62,0xd7,0x7f,0xf8,0x00,0x00] +# W64-FAKE16: v_and_b16 v5, exec_hi, null ; encoding: [0x05,0x00,0x62,0xd7,0x7f,0xf8,0x00,0x00] 0x05,0x00,0x62,0xd7,0x7f,0xf8,0x00,0x00 -# GFX12: v_and_b16 v5, null, exec_lo ; encoding: [0x05,0x00,0x62,0xd7,0x7c,0xfc,0x00,0x00] +# W32-REAL16: v_and_b16 v5.l, null, exec_lo ; encoding: [0x05,0x00,0x62,0xd7,0x7c,0xfc,0x00,0x00] +# W32-FAKE16: v_and_b16 v5, null, exec_lo ; encoding: [0x05,0x00,0x62,0xd7,0x7c,0xfc,0x00,0x00] +# W64-REAL16: v_and_b16 v5.l, null, exec_lo ; encoding: [0x05,0x00,0x62,0xd7,0x7c,0xfc,0x00,0x00] +# W64-FAKE16: v_and_b16 v5, null, exec_lo ; encoding: [0x05,0x00,0x62,0xd7,0x7c,0xfc,0x00,0x00] 0x05,0x00,0x62,0xd7,0x7c,0xfc,0x00,0x00 -# GFX12: v_and_b16 v5, -1, exec_hi ; encoding: [0x05,0x00,0x62,0xd7,0xc1,0xfe,0x00,0x00] +# W32-REAL16: v_and_b16 v5.l, -1, exec_hi ; encoding: [0x05,0x00,0x62,0xd7,0xc1,0xfe,0x00,0x00] +# W32-FAKE16: v_and_b16 v5, -1, exec_hi ; encoding: [0x05,0x00,0x62,0xd7,0xc1,0xfe,0x00,0x00] +# W64-REAL16: v_and_b16 v5.l, -1, exec_hi ; encoding: [0x05,0x00,0x62,0xd7,0xc1,0xfe,0x00,0x00] +# W64-FAKE16: v_and_b16 v5, -1, exec_hi ; encoding: [0x05,0x00,0x62,0xd7,0xc1,0xfe,0x00,0x00] 0x05,0x00,0x62,0xd7,0xc1,0xfe,0x00,0x00 -# GFX12: v_and_b16 v5, 0x3800, m0 +# W32-REAL16: v_and_b16 v5.l, 0x3800, m0 ; encoding: [0x05,0x00,0x62,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00] +# W32-FAKE16: v_and_b16 v5, 0x3800, m0 ; encoding: [0x05,0x00,0x62,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00] +# W64-REAL16: v_and_b16 v5.l, 0x3800, m0 ; encoding: [0x05,0x00,0x62,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00] +# W64-FAKE16: v_and_b16 v5, 0x3800, m0 ; encoding: [0x05,0x00,0x62,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00] 0x05,0x00,0x62,0xd7,0xf0,0xfa,0x00,0x00 -# GFX12: v_and_b16 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x62,0xd7,0xfd,0xd4,0x00,0x00] +# W32-REAL16: v_and_b16 v5.l, src_scc, vcc_lo ; encoding: [0x05,0x00,0x62,0xd7,0xfd,0xd4,0x00,0x00] +# W32-FAKE16: v_and_b16 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x62,0xd7,0xfd,0xd4,0x00,0x00] +# W64-REAL16: v_and_b16 v5.l, src_scc, vcc_lo ; encoding: [0x05,0x00,0x62,0xd7,0xfd,0xd4,0x00,0x00] +# W64-FAKE16: v_and_b16 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x62,0xd7,0xfd,0xd4,0x00,0x00] 0x05,0x00,0x62,0xd7,0xfd,0xd4,0x00,0x00 -# GFX12: v_and_b16 v255, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x62,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +# W32-REAL16: v_and_b16 v255.l, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x62,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_and_b16 v255, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x62,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_and_b16 v255.l, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x62,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_and_b16 v255, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x62,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] 0xff,0x00,0x62,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00 # GFX12: v_and_or_b32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x57,0xd6,0x01,0x05,0x0e,0x00] @@ -4597,49 +4645,107 @@ # GFX12: v_or3_b32 v255, 0xaf123456, vcc_hi, null ; encoding: [0xff,0x00,0x58,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] 0xff,0x00,0x58,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf -# GFX12: v_or_b16 v5, v1, v2 ; encoding: [0x05,0x00,0x63,0xd7,0x01,0x05,0x02,0x00] + +# W32-REAL16: v_or_b16 v5.l, v1.l, v2.l ; encoding: [0x05,0x00,0x63,0xd7,0x01,0x05,0x02,0x00] +# W32-FAKE16: v_or_b16 v5, v1, v2 ; encoding: [0x05,0x00,0x63,0xd7,0x01,0x05,0x02,0x00] +# W64-REAL16: v_or_b16 v5.l, v1.l, v2.l ; encoding: [0x05,0x00,0x63,0xd7,0x01,0x05,0x02,0x00] +# W64-FAKE16: v_or_b16 v5, v1, v2 ; encoding: [0x05,0x00,0x63,0xd7,0x01,0x05,0x02,0x00] 0x05,0x00,0x63,0xd7,0x01,0x05,0x02,0x00 -# GFX12: v_or_b16 v5, v255, v255 ; encoding: [0x05,0x00,0x63,0xd7,0xff,0xff,0x03,0x00] + +# W32-REAL16: v_or_b16 v5.l, v255.l, v255.l ; encoding: [0x05,0x00,0x63,0xd7,0xff,0xff,0x03,0x00] +# W32-FAKE16: v_or_b16 v5, v255, v255 ; encoding: [0x05,0x00,0x63,0xd7,0xff,0xff,0x03,0x00] +# W64-REAL16: v_or_b16 v5.l, v255.l, v255.l ; encoding: [0x05,0x00,0x63,0xd7,0xff,0xff,0x03,0x00] +# W64-FAKE16: v_or_b16 v5, v255, v255 ; encoding: [0x05,0x00,0x63,0xd7,0xff,0xff,0x03,0x00] 0x05,0x00,0x63,0xd7,0xff,0xff,0x03,0x00 -# GFX12: v_or_b16 v5, s1, s2 ; encoding: [0x05,0x00,0x63,0xd7,0x01,0x04,0x00,0x00] + +# W32-REAL16: v_or_b16 v5.l, s1, s2 ; encoding: [0x05,0x00,0x63,0xd7,0x01,0x04,0x00,0x00] +# W32-FAKE16: v_or_b16 v5, s1, s2 ; encoding: [0x05,0x00,0x63,0xd7,0x01,0x04,0x00,0x00] +# W64-REAL16: v_or_b16 v5.l, s1, s2 ; encoding: [0x05,0x00,0x63,0xd7,0x01,0x04,0x00,0x00] +# W64-FAKE16: v_or_b16 v5, s1, s2 ; encoding: [0x05,0x00,0x63,0xd7,0x01,0x04,0x00,0x00] 0x05,0x00,0x63,0xd7,0x01,0x04,0x00,0x00 -# GFX12: v_or_b16 v5, s105, s105 ; encoding: [0x05,0x00,0x63,0xd7,0x69,0xd2,0x00,0x00] + +# W32-REAL16: v_or_b16 v5.l, s105, s105 ; encoding: [0x05,0x00,0x63,0xd7,0x69,0xd2,0x00,0x00] +# W32-FAKE16: v_or_b16 v5, s105, s105 ; encoding: [0x05,0x00,0x63,0xd7,0x69,0xd2,0x00,0x00] +# W64-REAL16: v_or_b16 v5.l, s105, s105 ; encoding: [0x05,0x00,0x63,0xd7,0x69,0xd2,0x00,0x00] +# W64-FAKE16: v_or_b16 v5, s105, s105 ; encoding: [0x05,0x00,0x63,0xd7,0x69,0xd2,0x00,0x00] 0x05,0x00,0x63,0xd7,0x69,0xd2,0x00,0x00 -# GFX12: v_or_b16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x63,0xd7,0x6a,0xf6,0x00,0x00] + +# W32-REAL16: v_or_b16 v5.l, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x63,0xd7,0x6a,0xf6,0x00,0x00] +# W32-FAKE16: v_or_b16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x63,0xd7,0x6a,0xf6,0x00,0x00] +# W64-REAL16: v_or_b16 v5.l, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x63,0xd7,0x6a,0xf6,0x00,0x00] +# W64-FAKE16: v_or_b16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x63,0xd7,0x6a,0xf6,0x00,0x00] 0x05,0x00,0x63,0xd7,0x6a,0xf6,0x00,0x00 -# GFX12: v_or_b16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x63,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +# W32-REAL16: v_or_b16 v5.l, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x63,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_or_b16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x63,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_or_b16 v5.l, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x63,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_or_b16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x63,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] 0x05,0x00,0x63,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00 -# GFX12: v_or_b16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x63,0xd7,0x7b,0xfa,0x01,0x00] + +# W32-REAL16: v_or_b16 v5.l, ttmp15, src_scc ; encoding: [0x05,0x00,0x63,0xd7,0x7b,0xfa,0x01,0x00] +# W32-FAKE16: v_or_b16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x63,0xd7,0x7b,0xfa,0x01,0x00] +# W64-REAL16: v_or_b16 v5.l, ttmp15, src_scc ; encoding: [0x05,0x00,0x63,0xd7,0x7b,0xfa,0x01,0x00] +# W64-FAKE16: v_or_b16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x63,0xd7,0x7b,0xfa,0x01,0x00] 0x05,0x00,0x63,0xd7,0x7b,0xfa,0x01,0x00 -# GFX12: v_or_b16 v5, m0, 0x3800 +# W32-REAL16: v_or_b16 v5.l, m0, 0x3800 ; encoding: [0x05,0x00,0x63,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00] +# W32-FAKE16: v_or_b16 v5, m0, 0x3800 ; encoding: [0x05,0x00,0x63,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00] +# W64-REAL16: v_or_b16 v5.l, m0, 0x3800 ; encoding: [0x05,0x00,0x63,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00] +# W64-FAKE16: v_or_b16 v5, m0, 0x3800 ; encoding: [0x05,0x00,0x63,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00] 0x05,0x00,0x63,0xd7,0x7d,0xe0,0x01,0x00 -# GFX12: v_or_b16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x63,0xd7,0x7e,0x82,0x01,0x00] + +# W32-REAL16: v_or_b16 v5.l, exec_lo, -1 ; encoding: [0x05,0x00,0x63,0xd7,0x7e,0x82,0x01,0x00] +# W32-FAKE16: v_or_b16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x63,0xd7,0x7e,0x82,0x01,0x00] +# W64-REAL16: v_or_b16 v5.l, exec_lo, -1 ; encoding: [0x05,0x00,0x63,0xd7,0x7e,0x82,0x01,0x00] +# W64-FAKE16: v_or_b16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x63,0xd7,0x7e,0x82,0x01,0x00] 0x05,0x00,0x63,0xd7,0x7e,0x82,0x01,0x00 -# GFX12: v_or_b16 v5, exec_hi, null ; encoding: [0x05,0x00,0x63,0xd7,0x7f,0xf8,0x00,0x00] + +# W32-REAL16: v_or_b16 v5.l, exec_hi, null ; encoding: [0x05,0x00,0x63,0xd7,0x7f,0xf8,0x00,0x00] +# W32-FAKE16: v_or_b16 v5, exec_hi, null ; encoding: [0x05,0x00,0x63,0xd7,0x7f,0xf8,0x00,0x00] +# W64-REAL16: v_or_b16 v5.l, exec_hi, null ; encoding: [0x05,0x00,0x63,0xd7,0x7f,0xf8,0x00,0x00] +# W64-FAKE16: v_or_b16 v5, exec_hi, null ; encoding: [0x05,0x00,0x63,0xd7,0x7f,0xf8,0x00,0x00] 0x05,0x00,0x63,0xd7,0x7f,0xf8,0x00,0x00 -# GFX12: v_or_b16 v5, null, exec_lo ; encoding: [0x05,0x00,0x63,0xd7,0x7c,0xfc,0x00,0x00] + +# W32-REAL16: v_or_b16 v5.l, null, exec_lo ; encoding: [0x05,0x00,0x63,0xd7,0x7c,0xfc,0x00,0x00] +# W32-FAKE16: v_or_b16 v5, null, exec_lo ; encoding: [0x05,0x00,0x63,0xd7,0x7c,0xfc,0x00,0x00] +# W64-REAL16: v_or_b16 v5.l, null, exec_lo ; encoding: [0x05,0x00,0x63,0xd7,0x7c,0xfc,0x00,0x00] +# W64-FAKE16: v_or_b16 v5, null, exec_lo ; encoding: [0x05,0x00,0x63,0xd7,0x7c,0xfc,0x00,0x00] 0x05,0x00,0x63,0xd7,0x7c,0xfc,0x00,0x00 -# GFX12: v_or_b16 v5, -1, exec_hi ; encoding: [0x05,0x00,0x63,0xd7,0xc1,0xfe,0x00,0x00] + +# W32-REAL16: v_or_b16 v5.l, -1, exec_hi ; encoding: [0x05,0x00,0x63,0xd7,0xc1,0xfe,0x00,0x00] +# W32-FAKE16: v_or_b16 v5, -1, exec_hi ; encoding: [0x05,0x00,0x63,0xd7,0xc1,0xfe,0x00,0x00] +# W64-REAL16: v_or_b16 v5.l, -1, exec_hi ; encoding: [0x05,0x00,0x63,0xd7,0xc1,0xfe,0x00,0x00] +# W64-FAKE16: v_or_b16 v5, -1, exec_hi ; encoding: [0x05,0x00,0x63,0xd7,0xc1,0xfe,0x00,0x00] 0x05,0x00,0x63,0xd7,0xc1,0xfe,0x00,0x00 -# GFX12: v_or_b16 v5, 0x3800, m0 +# W32-REAL16: v_or_b16 v5.l, 0x3800, m0 ; encoding: [0x05,0x00,0x63,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00] +# W32-FAKE16: v_or_b16 v5, 0x3800, m0 ; encoding: [0x05,0x00,0x63,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00] +# W64-REAL16: v_or_b16 v5.l, 0x3800, m0 ; encoding: [0x05,0x00,0x63,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00] +# W64-FAKE16: v_or_b16 v5, 0x3800, m0 ; encoding: [0x05,0x00,0x63,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00] 0x05,0x00,0x63,0xd7,0xf0,0xfa,0x00,0x00 -# GFX12: v_or_b16 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x63,0xd7,0xfd,0xd4,0x00,0x00] + +# W32-REAL16: v_or_b16 v5.l, src_scc, vcc_lo ; encoding: [0x05,0x00,0x63,0xd7,0xfd,0xd4,0x00,0x00] +# W32-FAKE16: v_or_b16 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x63,0xd7,0xfd,0xd4,0x00,0x00] +# W64-REAL16: v_or_b16 v5.l, src_scc, vcc_lo ; encoding: [0x05,0x00,0x63,0xd7,0xfd,0xd4,0x00,0x00] +# W64-FAKE16: v_or_b16 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x63,0xd7,0xfd,0xd4,0x00,0x00] 0x05,0x00,0x63,0xd7,0xfd,0xd4,0x00,0x00 -# GFX12: v_or_b16 v255, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x63,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] + +# W32-REAL16: v_or_b16 v255.l, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x63,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_or_b16 v255, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x63,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_or_b16 v255.l, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x63,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_or_b16 v255, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x63,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] 0xff,0x00,0x63,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00 # GFX12: v_pack_b32_f16 v5, v1, v2 ; encoding: [0x05,0x00,0x11,0xd7,0x01,0x05,0x02,0x00] @@ -5555,49 +5661,107 @@ # GFX12: v_xor3_b32 v255, 0xaf123456, vcc_hi, null ; encoding: [0xff,0x00,0x40,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] 0xff,0x00,0x40,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf -# GFX12: v_xor_b16 v5, v1, v2 ; encoding: [0x05,0x00,0x64,0xd7,0x01,0x05,0x02,0x00] + +# W32-REAL16: v_xor_b16 v5.l, v1.l, v2.l ; encoding: [0x05,0x00,0x64,0xd7,0x01,0x05,0x02,0x00] +# W32-FAKE16: v_xor_b16 v5, v1, v2 ; encoding: [0x05,0x00,0x64,0xd7,0x01,0x05,0x02,0x00] +# W64-REAL16: v_xor_b16 v5.l, v1.l, v2.l ; encoding: [0x05,0x00,0x64,0xd7,0x01,0x05,0x02,0x00] +# W64-FAKE16: v_xor_b16 v5, v1, v2 ; encoding: [0x05,0x00,0x64,0xd7,0x01,0x05,0x02,0x00] 0x05,0x00,0x64,0xd7,0x01,0x05,0x02,0x00 -# GFX12: v_xor_b16 v5, v255, v255 ; encoding: [0x05,0x00,0x64,0xd7,0xff,0xff,0x03,0x00] + +# W32-REAL16: v_xor_b16 v5.l, v255.l, v255.l ; encoding: [0x05,0x00,0x64,0xd7,0xff,0xff,0x03,0x00] +# W32-FAKE16: v_xor_b16 v5, v255, v255 ; encoding: [0x05,0x00,0x64,0xd7,0xff,0xff,0x03,0x00] +# W64-REAL16: v_xor_b16 v5.l, v255.l, v255.l ; encoding: [0x05,0x00,0x64,0xd7,0xff,0xff,0x03,0x00] +# W64-FAKE16: v_xor_b16 v5, v255, v255 ; encoding: [0x05,0x00,0x64,0xd7,0xff,0xff,0x03,0x00] 0x05,0x00,0x64,0xd7,0xff,0xff,0x03,0x00 -# GFX12: v_xor_b16 v5, s1, s2 ; encoding: [0x05,0x00,0x64,0xd7,0x01,0x04,0x00,0x00] + +# W32-REAL16: v_xor_b16 v5.l, s1, s2 ; encoding: [0x05,0x00,0x64,0xd7,0x01,0x04,0x00,0x00] +# W32-FAKE16: v_xor_b16 v5, s1, s2 ; encoding: [0x05,0x00,0x64,0xd7,0x01,0x04,0x00,0x00] +# W64-REAL16: v_xor_b16 v5.l, s1, s2 ; encoding: [0x05,0x00,0x64,0xd7,0x01,0x04,0x00,0x00] +# W64-FAKE16: v_xor_b16 v5, s1, s2 ; encoding: [0x05,0x00,0x64,0xd7,0x01,0x04,0x00,0x00] 0x05,0x00,0x64,0xd7,0x01,0x04,0x00,0x00 -# GFX12: v_xor_b16 v5, s105, s105 ; encoding: [0x05,0x00,0x64,0xd7,0x69,0xd2,0x00,0x00] + +# W32-REAL16: v_xor_b16 v5.l, s105, s105 ; encoding: [0x05,0x00,0x64,0xd7,0x69,0xd2,0x00,0x00] +# W32-FAKE16: v_xor_b16 v5, s105, s105 ; encoding: [0x05,0x00,0x64,0xd7,0x69,0xd2,0x00,0x00] +# W64-REAL16: v_xor_b16 v5.l, s105, s105 ; encoding: [0x05,0x00,0x64,0xd7,0x69,0xd2,0x00,0x00] +# W64-FAKE16: v_xor_b16 v5, s105, s105 ; encoding: [0x05,0x00,0x64,0xd7,0x69,0xd2,0x00,0x00] 0x05,0x00,0x64,0xd7,0x69,0xd2,0x00,0x00 -# GFX12: v_xor_b16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x64,0xd7,0x6a,0xf6,0x00,0x00] + +# W32-REAL16: v_xor_b16 v5.l, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x64,0xd7,0x6a,0xf6,0x00,0x00] +# W32-FAKE16: v_xor_b16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x64,0xd7,0x6a,0xf6,0x00,0x00] +# W64-REAL16: v_xor_b16 v5.l, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x64,0xd7,0x6a,0xf6,0x00,0x00] +# W64-FAKE16: v_xor_b16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x64,0xd7,0x6a,0xf6,0x00,0x00] 0x05,0x00,0x64,0xd7,0x6a,0xf6,0x00,0x00 -# GFX12: v_xor_b16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x64,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +# W32-REAL16: v_xor_b16 v5.l, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x64,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_xor_b16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x64,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_xor_b16 v5.l, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x64,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_xor_b16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x64,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] 0x05,0x00,0x64,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00 -# GFX12: v_xor_b16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x64,0xd7,0x7b,0xfa,0x01,0x00] + +# W32-REAL16: v_xor_b16 v5.l, ttmp15, src_scc ; encoding: [0x05,0x00,0x64,0xd7,0x7b,0xfa,0x01,0x00] +# W32-FAKE16: v_xor_b16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x64,0xd7,0x7b,0xfa,0x01,0x00] +# W64-REAL16: v_xor_b16 v5.l, ttmp15, src_scc ; encoding: [0x05,0x00,0x64,0xd7,0x7b,0xfa,0x01,0x00] +# W64-FAKE16: v_xor_b16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x64,0xd7,0x7b,0xfa,0x01,0x00] 0x05,0x00,0x64,0xd7,0x7b,0xfa,0x01,0x00 -# GFX12: v_xor_b16 v5, m0, 0x3800 +# W32-REAL16: v_xor_b16 v5.l, m0, 0x3800 ; encoding: [0x05,0x00,0x64,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00] +# W32-FAKE16: v_xor_b16 v5, m0, 0x3800 ; encoding: [0x05,0x00,0x64,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00] +# W64-REAL16: v_xor_b16 v5.l, m0, 0x3800 ; encoding: [0x05,0x00,0x64,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00] +# W64-FAKE16: v_xor_b16 v5, m0, 0x3800 ; encoding: [0x05,0x00,0x64,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00] 0x05,0x00,0x64,0xd7,0x7d,0xe0,0x01,0x00 -# GFX12: v_xor_b16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x64,0xd7,0x7e,0x82,0x01,0x00] + +# W32-REAL16: v_xor_b16 v5.l, exec_lo, -1 ; encoding: [0x05,0x00,0x64,0xd7,0x7e,0x82,0x01,0x00] +# W32-FAKE16: v_xor_b16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x64,0xd7,0x7e,0x82,0x01,0x00] +# W64-REAL16: v_xor_b16 v5.l, exec_lo, -1 ; encoding: [0x05,0x00,0x64,0xd7,0x7e,0x82,0x01,0x00] +# W64-FAKE16: v_xor_b16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x64,0xd7,0x7e,0x82,0x01,0x00] 0x05,0x00,0x64,0xd7,0x7e,0x82,0x01,0x00 -# GFX12: v_xor_b16 v5, exec_hi, null ; encoding: [0x05,0x00,0x64,0xd7,0x7f,0xf8,0x00,0x00] + +# W32-REAL16: v_xor_b16 v5.l, exec_hi, null ; encoding: [0x05,0x00,0x64,0xd7,0x7f,0xf8,0x00,0x00] +# W32-FAKE16: v_xor_b16 v5, exec_hi, null ; encoding: [0x05,0x00,0x64,0xd7,0x7f,0xf8,0x00,0x00] +# W64-REAL16: v_xor_b16 v5.l, exec_hi, null ; encoding: [0x05,0x00,0x64,0xd7,0x7f,0xf8,0x00,0x00] +# W64-FAKE16: v_xor_b16 v5, exec_hi, null ; encoding: [0x05,0x00,0x64,0xd7,0x7f,0xf8,0x00,0x00] 0x05,0x00,0x64,0xd7,0x7f,0xf8,0x00,0x00 -# GFX12: v_xor_b16 v5, null, exec_lo ; encoding: [0x05,0x00,0x64,0xd7,0x7c,0xfc,0x00,0x00] + +# W32-REAL16: v_xor_b16 v5.l, null, exec_lo ; encoding: [0x05,0x00,0x64,0xd7,0x7c,0xfc,0x00,0x00] +# W32-FAKE16: v_xor_b16 v5, null, exec_lo ; encoding: [0x05,0x00,0x64,0xd7,0x7c,0xfc,0x00,0x00] +# W64-REAL16: v_xor_b16 v5.l, null, exec_lo ; encoding: [0x05,0x00,0x64,0xd7,0x7c,0xfc,0x00,0x00] +# W64-FAKE16: v_xor_b16 v5, null, exec_lo ; encoding: [0x05,0x00,0x64,0xd7,0x7c,0xfc,0x00,0x00] 0x05,0x00,0x64,0xd7,0x7c,0xfc,0x00,0x00 -# GFX12: v_xor_b16 v5, -1, exec_hi ; encoding: [0x05,0x00,0x64,0xd7,0xc1,0xfe,0x00,0x00] + +# W32-REAL16: v_xor_b16 v5.l, -1, exec_hi ; encoding: [0x05,0x00,0x64,0xd7,0xc1,0xfe,0x00,0x00] +# W32-FAKE16: v_xor_b16 v5, -1, exec_hi ; encoding: [0x05,0x00,0x64,0xd7,0xc1,0xfe,0x00,0x00] +# W64-REAL16: v_xor_b16 v5.l, -1, exec_hi ; encoding: [0x05,0x00,0x64,0xd7,0xc1,0xfe,0x00,0x00] +# W64-FAKE16: v_xor_b16 v5, -1, exec_hi ; encoding: [0x05,0x00,0x64,0xd7,0xc1,0xfe,0x00,0x00] 0x05,0x00,0x64,0xd7,0xc1,0xfe,0x00,0x00 -# GFX12: v_xor_b16 v5, 0x3800, m0 +# W32-REAL16: v_xor_b16 v5.l, 0x3800, m0 ; encoding: [0x05,0x00,0x64,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00] +# W32-FAKE16: v_xor_b16 v5, 0x3800, m0 ; encoding: [0x05,0x00,0x64,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00] +# W64-REAL16: v_xor_b16 v5.l, 0x3800, m0 ; encoding: [0x05,0x00,0x64,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00] +# W64-FAKE16: v_xor_b16 v5, 0x3800, m0 ; encoding: [0x05,0x00,0x64,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00] 0x05,0x00,0x64,0xd7,0xf0,0xfa,0x00,0x00 -# GFX12: v_xor_b16 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x64,0xd7,0xfd,0xd4,0x00,0x00] + +# W32-REAL16: v_xor_b16 v5.l, src_scc, vcc_lo ; encoding: [0x05,0x00,0x64,0xd7,0xfd,0xd4,0x00,0x00] +# W32-FAKE16: v_xor_b16 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x64,0xd7,0xfd,0xd4,0x00,0x00] +# W64-REAL16: v_xor_b16 v5.l, src_scc, vcc_lo ; encoding: [0x05,0x00,0x64,0xd7,0xfd,0xd4,0x00,0x00] +# W64-FAKE16: v_xor_b16 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x64,0xd7,0xfd,0xd4,0x00,0x00] 0x05,0x00,0x64,0xd7,0xfd,0xd4,0x00,0x00 -# GFX12: v_xor_b16 v255, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x64,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] + +# W32-REAL16: v_xor_b16 v255.l, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x64,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_xor_b16 v255, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x64,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_xor_b16 v255.l, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x64,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_xor_b16 v255, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x64,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] 0xff,0x00,0x64,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00 # GFX12: v_minimum_f32 v255, -|0xaf123456|, -|vcc_hi| ; encoding: [0xff,0x03,0x65,0xd7,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16.txt index f6bb2e4a55282..f9efef4f4ebc3 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16.txt @@ -1,5 +1,8 @@ -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,W32 %s -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,W64 %s +; NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,W32,W32-REAL16 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,W32,W32-FAKE16 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,W64,W64-REAL16 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,W64,W64-FAKE16 %s # GFX12: v_add3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x55,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff @@ -281,46 +284,88 @@ # GFX12: v_alignbyte_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x17,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] 0xff,0x00,0x17,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# GFX12: v_and_b16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W32-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W32-FAKE16: v_and_b16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W64-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W64-FAKE16: v_and_b16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff -# GFX12: v_and_b16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W32-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W32-FAKE16: v_and_b16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W64-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W64-FAKE16: v_and_b16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] 0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff -# GFX12: v_and_b16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W32-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W32-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W64-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W64-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] 0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff -# GFX12: v_and_b16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W32-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W32-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W64-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W64-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] 0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff -# GFX12: v_and_b16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W32-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W32-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W64-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W64-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] 0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff -# GFX12: v_and_b16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W32-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W32-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W64-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W64-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] 0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff -# GFX12: v_and_b16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W32-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W32-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W64-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W64-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] 0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff -# GFX12: v_and_b16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W32-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W32-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W64-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W64-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] 0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff -# GFX12: v_and_b16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W32-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W32-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W64-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W64-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] 0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff -# GFX12: v_and_b16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W32-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W32-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W64-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W64-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] 0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff -# GFX12: v_and_b16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W32-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W32-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W64-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W64-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] 0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# GFX12: v_and_b16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W32-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W32-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W64-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W64-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] 0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 -# GFX12: v_and_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W32-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W64-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] 0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 -# GFX12: v_and_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x62,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W32-REAL16: v_and_b16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x62,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_and_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x62,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_and_b16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x62,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_and_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x62,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] 0xff,0x00,0x62,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 # GFX12: v_and_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] @@ -867,7 +912,7 @@ # GFX12: v_cvt_pk_bf8_f32_e64_dpp v1, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed] 0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed -# GFX12: v_cvt_pk_bf8_f32 v1, -v2, |v3| ; encoding: [0x01,0x02,0x6a,0xd7,0x02,0x07,0x02,0x20] +# GFX12: v_cvt_pk_bf8_f32 v1, -v2, |v3| ; encoding: [0x01,0x02,0x6a,0xd7,0x02,0x07,0x02,0x20] 0x01,0x02,0x6a,0xd7,0x02,0x07,0x02,0x20 # GFX12: v_cvt_pk_bf8_f32_e64_dpp v6, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x06,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed] @@ -894,7 +939,7 @@ # GFX12: v_cvt_pk_fp8_f32_e64_dpp v1, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed] 0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed -# GFX12: v_cvt_pk_fp8_f32 v1, -v2, |v3| ; encoding: [0x01,0x02,0x69,0xd7,0x02,0x07,0x02,0x20] +# GFX12: v_cvt_pk_fp8_f32 v1, -v2, |v3| ; encoding: [0x01,0x02,0x69,0xd7,0x02,0x07,0x02,0x20] 0x01,0x02,0x69,0xd7,0x02,0x07,0x02,0x20 # GFX12: v_cvt_pk_fp8_f32_e64_dpp v6, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x06,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed] @@ -2883,46 +2928,88 @@ # GFX12: v_or3_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x58,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] 0xff,0x00,0x58,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# GFX12: v_or_b16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W32-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W32-FAKE16: v_or_b16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W64-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W64-FAKE16: v_or_b16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff -# GFX12: v_or_b16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W32-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W32-FAKE16: v_or_b16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W64-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W64-FAKE16: v_or_b16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] 0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff -# GFX12: v_or_b16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W32-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W32-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W64-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W64-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] 0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff -# GFX12: v_or_b16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W32-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W32-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W64-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W64-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] 0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff -# GFX12: v_or_b16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W32-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W32-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W64-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W64-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] 0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff -# GFX12: v_or_b16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W32-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W32-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W64-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W64-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] 0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff -# GFX12: v_or_b16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W32-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W32-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W64-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W64-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] 0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff -# GFX12: v_or_b16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W32-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W32-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W64-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W64-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] 0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff -# GFX12: v_or_b16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W32-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W32-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W64-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W64-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] 0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff -# GFX12: v_or_b16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W32-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W32-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W64-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W64-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] 0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff -# GFX12: v_or_b16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W32-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W32-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W64-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W64-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] 0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# GFX12: v_or_b16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W32-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W32-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W64-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W64-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] 0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 -# GFX12: v_or_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W32-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W64-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] 0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 -# GFX12: v_or_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x63,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W32-REAL16: v_or_b16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x63,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_or_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x63,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_or_b16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x63,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_or_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x63,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] 0xff,0x00,0x63,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 # GFX12: v_perm_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] @@ -3392,46 +3479,88 @@ # GFX12: v_xor3_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x40,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] 0xff,0x00,0x40,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# GFX12: v_xor_b16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W32-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W32-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W64-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W64-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff -# GFX12: v_xor_b16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W32-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W32-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W64-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W64-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] 0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff -# GFX12: v_xor_b16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W32-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W32-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W64-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W64-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] 0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff -# GFX12: v_xor_b16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W32-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W32-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W64-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W64-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] 0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff -# GFX12: v_xor_b16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W32-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W32-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W64-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W64-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] 0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff -# GFX12: v_xor_b16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W32-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W32-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W64-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W64-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] 0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff -# GFX12: v_xor_b16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W32-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W32-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W64-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W64-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] 0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff -# GFX12: v_xor_b16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W32-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W32-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W64-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W64-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] 0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff -# GFX12: v_xor_b16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W32-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W32-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W64-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W64-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] 0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff -# GFX12: v_xor_b16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W32-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W32-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W64-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W64-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] 0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff -# GFX12: v_xor_b16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W32-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W32-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W64-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W64-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] 0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# GFX12: v_xor_b16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W32-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W32-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W64-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W64-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] 0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 -# GFX12: v_xor_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W32-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W64-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] 0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 -# GFX12: v_xor_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x64,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W32-REAL16: v_xor_b16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x64,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_xor_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x64,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_xor_b16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x64,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_xor_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x64,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] 0xff,0x00,0x64,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 # GFX12: v_add_nc_i16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp8.txt index f291795c8a627..47611e0b9708f 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp8.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp8.txt @@ -1,5 +1,8 @@ -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,W32 %s -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,W64 %s +; NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,W32,W32-REAL16 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,W32,W32-FAKE16 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,W64,W64-REAL16 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,W64,W64-FAKE16 %s # GFX12: v_add3_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x55,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x55,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 @@ -179,10 +182,16 @@ # GFX12: v_alignbyte_b32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x17,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] 0xff,0x00,0x17,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00 -# GFX12: v_and_b16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x62,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x62,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_and_b16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x62,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x62,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_and_b16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x62,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x62,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 -# GFX12: v_and_b16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x62,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W32-REAL16: v_and_b16_e64_dpp v255.l, v255.l, v255.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x62,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_and_b16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x62,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W64-REAL16: v_and_b16_e64_dpp v255.l, v255.l, v255.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x62,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_and_b16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x62,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0xff,0x00,0x62,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 # GFX12: v_and_or_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x57,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] @@ -1779,10 +1788,16 @@ # GFX12: v_or3_b32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x58,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] 0xff,0x00,0x58,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00 -# GFX12: v_or_b16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x63,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x63,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_or_b16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x63,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x63,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_or_b16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x63,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x63,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 -# GFX12: v_or_b16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x63,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W32-REAL16: v_or_b16_e64_dpp v255.l, v255.l, v255.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x63,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_or_b16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x63,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W64-REAL16: v_or_b16_e64_dpp v255.l, v255.l, v255.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x63,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_or_b16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x63,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0xff,0x00,0x63,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 # GFX12: v_perm_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x44,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] @@ -2102,10 +2117,16 @@ # GFX12: v_xor3_b32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x40,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] 0xff,0x00,0x40,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00 -# GFX12: v_xor_b16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x64,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x64,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x64,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x64,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x64,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x64,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 -# GFX12: v_xor_b16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x64,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W32-REAL16: v_xor_b16_e64_dpp v255.l, v255.l, v255.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x64,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_xor_b16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x64,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W64-REAL16: v_xor_b16_e64_dpp v255.l, v255.l, v255.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x64,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_xor_b16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x64,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0xff,0x00,0x64,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 # GFX12: v_add_nc_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] diff --git a/llvm/test/MC/RISCV/rv32zacas-invalid.s b/llvm/test/MC/RISCV/rv32zacas-invalid.s index bad2edcaaa915..6927a2733b8e6 100644 --- a/llvm/test/MC/RISCV/rv32zacas-invalid.s +++ b/llvm/test/MC/RISCV/rv32zacas-invalid.s @@ -1,4 +1,4 @@ -# RUN: not llvm-mc -triple riscv32 -mattr=+a,+experimental-zacas < %s 2>&1 | FileCheck %s +# RUN: not llvm-mc -triple riscv32 -mattr=+a,+zacas < %s 2>&1 | FileCheck %s # Non-zero offsets not supported for the third operand (rs1). amocas.w a1, a3, 1(a5) # CHECK: :[[@LINE]]:18: error: optional integer offset must be 0 diff --git a/llvm/test/MC/RISCV/rv32zacas-valid.s b/llvm/test/MC/RISCV/rv32zacas-valid.s index 8ba2b02542bc0..0e76f02399483 100644 --- a/llvm/test/MC/RISCV/rv32zacas-valid.s +++ b/llvm/test/MC/RISCV/rv32zacas-valid.s @@ -1,12 +1,12 @@ -# RUN: llvm-mc %s -triple=riscv32 -mattr=+a,+experimental-zacas -riscv-no-aliases -show-encoding \ +# RUN: llvm-mc %s -triple=riscv32 -mattr=+a,+zacas -riscv-no-aliases -show-encoding \ # RUN: | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s -# RUN: llvm-mc %s -triple=riscv64 -mattr=+a,+experimental-zacas -riscv-no-aliases -show-encoding \ +# RUN: llvm-mc %s -triple=riscv64 -mattr=+a,+zacas -riscv-no-aliases -show-encoding \ # RUN: | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s -# RUN: llvm-mc -filetype=obj -triple=riscv32 -mattr=+a,+experimental-zacas < %s \ -# RUN: | llvm-objdump --mattr=+a,+experimental-zacas -M no-aliases -d -r - \ +# RUN: llvm-mc -filetype=obj -triple=riscv32 -mattr=+a,+zacas < %s \ +# RUN: | llvm-objdump --mattr=+a,+zacas -M no-aliases -d -r - \ # RUN: | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s -# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+a,+experimental-zacas < %s \ -# RUN: | llvm-objdump --mattr=+a,+experimental-zacas -M no-aliases -d -r - \ +# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+a,+zacas < %s \ +# RUN: | llvm-objdump --mattr=+a,+zacas -M no-aliases -d -r - \ # RUN: | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s # RUN: not llvm-mc -triple=riscv32 -mattr=+a -show-encoding %s 2>&1 \ # RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/RISCV/rv64zacas-invalid.s b/llvm/test/MC/RISCV/rv64zacas-invalid.s index 854e6fe308b0a..e75ff9e9f94ca 100644 --- a/llvm/test/MC/RISCV/rv64zacas-invalid.s +++ b/llvm/test/MC/RISCV/rv64zacas-invalid.s @@ -1,4 +1,4 @@ -# RUN: not llvm-mc -triple riscv64 -mattr=+a,+experimental-zacas < %s 2>&1 | FileCheck %s +# RUN: not llvm-mc -triple riscv64 -mattr=+a,+zacas < %s 2>&1 | FileCheck %s # Non-zero offsets not supported for the third operand (rs1). amocas.w a1, a3, 1(a5) # CHECK: :[[@LINE]]:18: error: optional integer offset must be 0 diff --git a/llvm/test/MC/RISCV/rv64zacas-valid.s b/llvm/test/MC/RISCV/rv64zacas-valid.s index d5044a0e0671d..595c70b6e3f5b 100644 --- a/llvm/test/MC/RISCV/rv64zacas-valid.s +++ b/llvm/test/MC/RISCV/rv64zacas-valid.s @@ -1,7 +1,7 @@ -# RUN: llvm-mc %s -triple=riscv64 -mattr=+a,+experimental-zacas -riscv-no-aliases -show-encoding \ +# RUN: llvm-mc %s -triple=riscv64 -mattr=+a,+zacas -riscv-no-aliases -show-encoding \ # RUN: | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s -# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+a,+experimental-zacas < %s \ -# RUN: | llvm-objdump --mattr=+a,+experimental-zacas -M no-aliases -d -r - \ +# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+a,+zacas < %s \ +# RUN: | llvm-objdump --mattr=+a,+zacas -M no-aliases -d -r - \ # RUN: | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s # RUN: not llvm-mc -triple=riscv64 -mattr=+a -show-encoding %s 2>&1 \ # RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/RISCV/rvzabha-zacas-valid.s b/llvm/test/MC/RISCV/rvzabha-zacas-valid.s index f8aa6867aedc6..97afb9d6563e5 100644 --- a/llvm/test/MC/RISCV/rvzabha-zacas-valid.s +++ b/llvm/test/MC/RISCV/rvzabha-zacas-valid.s @@ -1,12 +1,12 @@ -# RUN: llvm-mc %s -triple=riscv32 -mattr=+a,+zabha,+experimental-zacas -riscv-no-aliases -show-encoding \ +# RUN: llvm-mc %s -triple=riscv32 -mattr=+a,+zabha,+zacas -riscv-no-aliases -show-encoding \ # RUN: | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s -# RUN: llvm-mc %s -triple=riscv64 -mattr=+a,+zabha,+experimental-zacas -riscv-no-aliases -show-encoding \ +# RUN: llvm-mc %s -triple=riscv64 -mattr=+a,+zabha,+zacas -riscv-no-aliases -show-encoding \ # RUN: | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s -# RUN: llvm-mc -filetype=obj -triple=riscv32 -mattr=+a,+zabha,+experimental-zacas < %s \ -# RUN: | llvm-objdump --mattr=+a,+zabha,+experimental-zacas -M no-aliases -d -r - \ +# RUN: llvm-mc -filetype=obj -triple=riscv32 -mattr=+a,+zabha,+zacas < %s \ +# RUN: | llvm-objdump --mattr=+a,+zabha,+zacas -M no-aliases -d -r - \ # RUN: | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s -# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+a,+zabha,+experimental-zacas < %s \ -# RUN: | llvm-objdump --mattr=+a,+zabha,+experimental-zacas -M no-aliases -d -r - \ +# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+a,+zabha,+zacas < %s \ +# RUN: | llvm-objdump --mattr=+a,+zabha,+zacas -M no-aliases -d -r - \ # RUN: | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s # RUN: not llvm-mc -triple=riscv32 -mattr=+a,+zabha -show-encoding %s 2>&1 \ # RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/WebAssembly/type-checker-errors.s b/llvm/test/MC/WebAssembly/type-checker-errors.s index e8b8274036a83..3106fe76c8449 100644 --- a/llvm/test/MC/WebAssembly/type-checker-errors.s +++ b/llvm/test/MC/WebAssembly/type-checker-errors.s @@ -93,12 +93,14 @@ global_set_type_mismatch: table_get_expected_expression_operand: .functype table_get_expected_expression_operand () -> () + i32.const 0 # CHECK: :[[@LINE+1]]:13: error: expected expression operand table.get 1 end_function table_get_missing_tabletype: .functype table_get_missing_tabletype () -> () + i32.const 0 # CHECK: :[[@LINE+1]]:13: error: symbol foo: missing .tabletype table.get foo end_function @@ -851,3 +853,23 @@ br_incorrect_func_signature: drop i32.const 1 end_function + +multiple_errors_in_function: + .functype multiple_errors_in_function () -> () +# CHECK: :[[@LINE+2]]:3: error: empty stack while popping i32 +# CHECK: :[[@LINE+1]]:13: error: expected expression operand + table.get 1 + +# CHECK: :[[@LINE+3]]:3: error: empty stack while popping i32 +# CHECK: :[[@LINE+2]]:3: error: empty stack while popping externref +# CHECK: :[[@LINE+1]]:3: error: empty stack while popping i32 + table.fill valid_table + + f32.const 0.0 + ref.null_extern +# CHECK: :[[@LINE+2]]:3: error: popped externref, expected i32 +# CHECK: :[[@LINE+1]]:3: error: popped f32, expected i32 + i32.add + drop + + end_function diff --git a/llvm/test/TableGen/listflatten-error.td b/llvm/test/TableGen/listflatten-error.td index 56062420982a1..2f13356b6792f 100644 --- a/llvm/test/TableGen/listflatten-error.td +++ b/llvm/test/TableGen/listflatten-error.td @@ -1,6 +1,6 @@ -// RUN: not llvm-tblgen %s 2>&1 | FileCheck %s -DFILE=%s - -// CHECK: [[FILE]]:[[@LINE+2]]:33: error: expected list type argument in unary operator -class Flatten { - list F = !listflatten(A); -} +// RUN: not llvm-tblgen %s 2>&1 | FileCheck %s -DFILE=%s + +// CHECK: [[FILE]]:[[@LINE+2]]:33: error: expected list type argument in unary operator +class Flatten { + list F = !listflatten(A); +} diff --git a/llvm/test/TableGen/listflatten.td b/llvm/test/TableGen/listflatten.td index bc9b1c71ea88d..a76ac21c4ad84 100644 --- a/llvm/test/TableGen/listflatten.td +++ b/llvm/test/TableGen/listflatten.td @@ -1,32 +1,32 @@ -// RUN: llvm-tblgen %s | FileCheck %s - -class Flatten A, list B> { - list Flat1 = !listflatten([A, B, [6], [7, 8]]); - - list> X = [A, B]; - list Flat2 = !listflatten(!listconcat(X, [[7]])); - - // Generate a nested list of integers. - list Y0 = [1, 2, 3, 4]; - list> Y1 = !foreach(elem, Y0, [elem]); - list>> Y2 = !foreach(elem, Y1, [elem]); - list>>> Y3 = !foreach(elem, Y2, [elem]); - - // Flatten it completely. - list Flat3=!listflatten(!listflatten(!listflatten(Y3))); - - // Flatten it partially. - list>> Flat4 = !listflatten(Y3); - list> Flat5 = !listflatten(!listflatten(Y3)); - - // Test NOP flattening. - list Flat6 = !listflatten(["a", "b"]); -} - -// CHECK: list Flat1 = [1, 2, 3, 4, 5, 6, 7, 8]; -// CHECK: list Flat2 = [1, 2, 3, 4, 5, 7]; -// CHECK: list Flat3 = [1, 2, 3, 4]; -// CHECK{LITERAL}: list>> Flat4 = [[[1]], [[2]], [[3]], [[4]]]; -// CHECK: list Flat6 = ["a", "b"]; -def F : Flatten<[1,2], [3,4,5]>; - +// RUN: llvm-tblgen %s | FileCheck %s + +class Flatten A, list B> { + list Flat1 = !listflatten([A, B, [6], [7, 8]]); + + list> X = [A, B]; + list Flat2 = !listflatten(!listconcat(X, [[7]])); + + // Generate a nested list of integers. + list Y0 = [1, 2, 3, 4]; + list> Y1 = !foreach(elem, Y0, [elem]); + list>> Y2 = !foreach(elem, Y1, [elem]); + list>>> Y3 = !foreach(elem, Y2, [elem]); + + // Flatten it completely. + list Flat3=!listflatten(!listflatten(!listflatten(Y3))); + + // Flatten it partially. + list>> Flat4 = !listflatten(Y3); + list> Flat5 = !listflatten(!listflatten(Y3)); + + // Test NOP flattening. + list Flat6 = !listflatten(["a", "b"]); +} + +// CHECK: list Flat1 = [1, 2, 3, 4, 5, 6, 7, 8]; +// CHECK: list Flat2 = [1, 2, 3, 4, 5, 7]; +// CHECK: list Flat3 = [1, 2, 3, 4]; +// CHECK{LITERAL}: list>> Flat4 = [[[1]], [[2]], [[3]], [[4]]]; +// CHECK: list Flat6 = ["a", "b"]; +def F : Flatten<[1,2], [3,4,5]>; + diff --git a/llvm/test/Transforms/DFAJumpThreading/dfa-jump-threading-transform.ll b/llvm/test/Transforms/DFAJumpThreading/dfa-jump-threading-transform.ll index c38f81d0f046e..cba1ba8dde768 100644 --- a/llvm/test/Transforms/DFAJumpThreading/dfa-jump-threading-transform.ll +++ b/llvm/test/Transforms/DFAJumpThreading/dfa-jump-threading-transform.ll @@ -300,3 +300,126 @@ define void @self-reference() { end: ret void } + +define void @pr106083_invalidBBarg_fold(i1 %cmp1, i1 %cmp2, i1 %not, ptr %d) { +; CHECK-LABEL: @pr106083_invalidBBarg_fold( +; CHECK-NEXT: bb: +; CHECK-NEXT: br i1 [[CMP1:%.*]], label [[BB1:%.*]], label [[SEL_SI_UNFOLD_FALSE:%.*]] +; CHECK: sel.si.unfold.false: +; CHECK-NEXT: [[DOTSI_UNFOLD_PHI1:%.*]] = phi i32 [ 1, [[BB:%.*]] ] +; CHECK-NEXT: br label [[BB1]] +; CHECK: BB1: +; CHECK-NEXT: [[I:%.*]] = phi i16 [ 0, [[BB1_BACKEDGE:%.*]] ], [ 0, [[BB]] ], [ 1, [[BB7:%.*]] ], [ 0, [[SEL_SI_UNFOLD_FALSE]] ], [ 1, [[BB7_JT0:%.*]] ] +; CHECK-NEXT: [[SEL_SI_UNFOLD_PHI:%.*]] = phi i32 [ [[SEL_SI_UNFOLD_PHI]], [[BB1_BACKEDGE]] ], [ [[SEL_SI_UNFOLD_PHI]], [[BB7]] ], [ 0, [[BB]] ], [ [[DOTSI_UNFOLD_PHI1]], [[SEL_SI_UNFOLD_FALSE]] ], [ [[SEL_SI_UNFOLD_PHI]], [[BB7_JT0]] ] +; CHECK-NEXT: br i1 [[NOT:%.*]], label [[BB7_JT0]], label [[BB2:%.*]] +; CHECK: BB2: +; CHECK-NEXT: store i16 0, ptr [[D:%.*]], align 2 +; CHECK-NEXT: br i1 [[CMP2:%.*]], label [[BB7]], label [[SPEC_SELECT_SI_UNFOLD_FALSE_JT0:%.*]] +; CHECK: spec.select.si.unfold.false: +; CHECK-NEXT: br label [[BB7]] +; CHECK: spec.select.si.unfold.false.jt0: +; CHECK-NEXT: [[DOTSI_UNFOLD_PHI_JT0:%.*]] = phi i32 [ 0, [[BB2]] ] +; CHECK-NEXT: br label [[BB7_JT0]] +; CHECK: BB7: +; CHECK-NEXT: [[D_PROMOTED4:%.*]] = phi i16 [ 1, [[BB2]] ], [ 1, [[SPEC_SELECT_SI_UNFOLD_FALSE:%.*]] ] +; CHECK-NEXT: [[_3:%.*]] = phi i32 [ [[SEL_SI_UNFOLD_PHI]], [[BB2]] ], [ poison, [[SPEC_SELECT_SI_UNFOLD_FALSE]] ] +; CHECK-NEXT: switch i32 [[_3]], label [[BB1_BACKEDGE]] [ +; CHECK-NEXT: i32 0, label [[BB1]] +; CHECK-NEXT: i32 1, label [[BB8:%.*]] +; CHECK-NEXT: ] +; CHECK: BB7.jt0: +; CHECK-NEXT: [[D_PROMOTED4_JT0:%.*]] = phi i16 [ 0, [[BB1]] ], [ 1, [[SPEC_SELECT_SI_UNFOLD_FALSE_JT0]] ] +; CHECK-NEXT: [[_3_JT0:%.*]] = phi i32 [ 0, [[BB1]] ], [ [[DOTSI_UNFOLD_PHI_JT0]], [[SPEC_SELECT_SI_UNFOLD_FALSE_JT0]] ] +; CHECK-NEXT: br label [[BB1]] +; CHECK: BB1.backedge: +; CHECK-NEXT: br label [[BB1]] +; CHECK: BB8: +; CHECK-NEXT: ret void +; +bb: + %sel = select i1 %cmp1, i32 0, i32 1 + br label %BB1 + +BB1: ; preds = %BB1.backedge, %BB7, %bb + %i = phi i16 [ 0, %BB1.backedge ], [ 0, %bb ], [ 1, %BB7 ] + br i1 %not, label %BB7, label %BB2 + +BB2: ; preds = %BB1 + store i16 0, ptr %d, align 2 + %spec.select = select i1 %cmp2, i32 %sel, i32 0 + br label %BB7 + +BB7: ; preds = %BB2, %BB1 + %d.promoted4 = phi i16 [ 0, %BB1 ], [ 1, %BB2 ] + %_3 = phi i32 [ 0, %BB1 ], [ %spec.select, %BB2 ] + switch i32 %_3, label %BB1.backedge [ + i32 0, label %BB1 + i32 1, label %BB8 + ] + +BB1.backedge: ; preds = %BB7 + br label %BB1 + +BB8: ; preds = %BB7 + ret void +} + +define void @pr106083_select_dead_uses(i1 %cmp1, i1 %not, ptr %p) { +; CHECK-LABEL: @pr106083_select_dead_uses( +; CHECK-NEXT: bb: +; CHECK-NEXT: br i1 [[CMP1:%.*]], label [[DOTLOOPEXIT6:%.*]], label [[SPEC_SELECT_SI_UNFOLD_FALSE:%.*]] +; CHECK: spec.select.si.unfold.false: +; CHECK-NEXT: [[DOTSI_UNFOLD_PHI1:%.*]] = phi i32 [ 1, [[BB:%.*]] ] +; CHECK-NEXT: br label [[DOTLOOPEXIT6]] +; CHECK: .loopexit6: +; CHECK-NEXT: [[SPEC_SELECT_SI_UNFOLD_PHI:%.*]] = phi i32 [ [[SPEC_SELECT_SI_UNFOLD_PHI]], [[SELECT_UNFOLD:%.*]] ], [ 0, [[BB]] ], [ [[DOTSI_UNFOLD_PHI1]], [[SPEC_SELECT_SI_UNFOLD_FALSE]] ] +; CHECK-NEXT: br i1 [[NOT:%.*]], label [[SELECT_UNFOLD_JT0:%.*]], label [[BB1:%.*]] +; CHECK: bb1: +; CHECK-NEXT: [[I:%.*]] = load i32, ptr [[P:%.*]], align 4 +; CHECK-NEXT: [[NOT2:%.*]] = icmp eq i32 0, 0 +; CHECK-NEXT: br i1 [[NOT2]], label [[SELECT_UNFOLD]], label [[SPEC_SELECT7_SI_UNFOLD_FALSE_JT0:%.*]] +; CHECK: spec.select7.si.unfold.false: +; CHECK-NEXT: br label [[SELECT_UNFOLD]] +; CHECK: spec.select7.si.unfold.false.jt0: +; CHECK-NEXT: [[DOTSI_UNFOLD_PHI_JT0:%.*]] = phi i32 [ 0, [[BB1]] ] +; CHECK-NEXT: br label [[SELECT_UNFOLD_JT0]] +; CHECK: select.unfold: +; CHECK-NEXT: [[_2:%.*]] = phi i32 [ [[SPEC_SELECT_SI_UNFOLD_PHI]], [[BB1]] ], [ poison, [[SPEC_SELECT7_SI_UNFOLD_FALSE:%.*]] ] +; CHECK-NEXT: switch i32 [[_2]], label [[BB2:%.*]] [ +; CHECK-NEXT: i32 0, label [[DOTPREHEADER_PREHEADER:%.*]] +; CHECK-NEXT: i32 1, label [[DOTLOOPEXIT6]] +; CHECK-NEXT: ] +; CHECK: select.unfold.jt0: +; CHECK-NEXT: [[_2_JT0:%.*]] = phi i32 [ 0, [[DOTLOOPEXIT6]] ], [ [[DOTSI_UNFOLD_PHI_JT0]], [[SPEC_SELECT7_SI_UNFOLD_FALSE_JT0]] ] +; CHECK-NEXT: br label [[DOTPREHEADER_PREHEADER]] +; CHECK: .preheader.preheader: +; CHECK-NEXT: ret void +; CHECK: bb2: +; CHECK-NEXT: unreachable +; +bb: + %spec.select = select i1 %cmp1, i32 0, i32 1 + br label %.loopexit6 + +.loopexit6: ; preds = %select.unfold, %bb + br i1 %not, label %select.unfold, label %bb1 + +bb1: ; preds = %.loopexit6 + %i = load i32, ptr %p, align 4 + %not2 = icmp eq i32 0, 0 + %spec.select7 = select i1 %not2, i32 %spec.select, i32 0 + br label %select.unfold + +select.unfold: ; preds = %bb1, %.loopexit6 + %_2 = phi i32 [ 0, %.loopexit6 ], [ %spec.select7, %bb1 ] + switch i32 %_2, label %bb2 [ + i32 0, label %.preheader.preheader + i32 1, label %.loopexit6 + ] + +.preheader.preheader: ; preds = %select.unfold + ret void + +bb2: ; preds = %select.unfold + unreachable +} diff --git a/llvm/test/Transforms/EliminateAvailableExternally/transform-to-local.ll b/llvm/test/Transforms/EliminateAvailableExternally/transform-to-local.ll index 786cc260d331c..4908fba62e3bf 100644 --- a/llvm/test/Transforms/EliminateAvailableExternally/transform-to-local.ll +++ b/llvm/test/Transforms/EliminateAvailableExternally/transform-to-local.ll @@ -1,6 +1,16 @@ ; REQUIRES: asserts ; RUN: opt -passes=elim-avail-extern -avail-extern-to-local -stats -S 2>&1 < %s | FileCheck %s +; +; RUN: echo '[{"Guid":1234, "Counters": [1]}]' | llvm-ctxprof-util fromJSON --input=- --output=%t_profile.ctxprofdata +; +; Because we pass a contextual profile with a root defined in this module, we expect the outcome to be the same as-if +; we passed -avail-extern-to-local, i.e. available_externally don't get elided and instead get converted to local linkage +; RUN: opt -passes='assign-guid,require,elim-avail-extern' -use-ctx-profile=%t_profile.ctxprofdata -stats -S 2>&1 < %s | FileCheck %s +; If the profile doesn't apply to this module, available_externally won't get converted to internal linkage, and will be +; removed instead. +; RUN: echo '[{"Guid":5678, "Counters": [1]}]' | llvm-ctxprof-util fromJSON --input=- --output=%t_profile_bad.ctxprofdata +; RUN: opt -passes='assign-guid,require,elim-avail-extern' -use-ctx-profile=%t_profile_bad.ctxprofdata -stats -S 2>&1 < %s | FileCheck %s --check-prefix=NOOP declare void @call_out(ptr %fct) @@ -12,13 +22,15 @@ define available_externally hidden void @g() { ret void } -define void @hello(ptr %g) { +define void @hello(ptr %g) !guid !0 { call void @f() %f = load ptr, ptr @f call void @call_out(ptr %f) ret void } +!0 = !{i64 1234} + ; CHECK: define internal void @f.__uniq.{{[0-9|a-f]*}}() ; CHECK: declare hidden void @g() ; CHECK: call void @f.__uniq.{{[0-9|a-f]*}}() @@ -26,4 +38,6 @@ define void @hello(ptr %g) { ; CHECK-NEXT: call void @call_out(ptr %f) ; CHECK: Statistics Collected ; CHECK: 1 elim-avail-extern - Number of functions converted -; CHECK: 1 elim-avail-extern - Number of functions removed \ No newline at end of file +; CHECK: 1 elim-avail-extern - Number of functions removed + +; NOOP: 2 elim-avail-extern - Number of functions removed \ No newline at end of file diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll index 11405a1c91158..90c209cf3f518 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll @@ -39,7 +39,8 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) { ; IF-EVL-INLOOP-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION ; IF-EVL-INLOOP-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEXT:%[0-9]+]]> ; IF-EVL-INLOOP-NEXT: WIDEN-REDUCTION-PHI ir<[[RDX_PHI:%.+]]> = phi ir<%start>, ir<[[RDX_NEXT:%.+]]> -; IF-EVL-INLOOP-NEXT: EMIT vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[EVL_PHI]]>, ir<%n> +; IF-EVL-INLOOP-NEXT: EMIT vp<[[AVL:%.+]]> = sub ir<%n>, vp<[[EVL_PHI]]> +; IF-EVL-INLOOP-NEXT: EMIT vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]> ; IF-EVL-INLOOP-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[EVL_PHI]]>, ir<1> ; IF-EVL-INLOOP-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]> ; IF-EVL-INLOOP-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]> diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics.ll index 6dfe5b608199b..c14a8bce8f48d 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics.ll @@ -23,7 +23,8 @@ define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) { ; IF-EVL-NEXT: vector.body: ; IF-EVL-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION ; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEXT:%[0-9]+]]> -; IF-EVL-NEXT: EMIT vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[EVL_PHI]]>, ir<%N> +; IF-EVL-NEXT: EMIT vp<[[AVL:%.+]]> = sub ir<%N>, vp<[[EVL_PHI]]> +; IF-EVL-NEXT: EMIT vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]> ; IF-EVL-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[EVL_PHI]]>, ir<1> ; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]> ; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]> diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll index 5b878108af59a..5f0b16048d40c 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll @@ -685,10 +685,10 @@ define void @store_blockstrided3(ptr nocapture noundef readonly %x, ptr nocaptur ; CHECK-NEXT: [[MUL:%.*]] = shl nsw i32 [[STRIDE]], 1 ; CHECK-NEXT: [[IDXPROM11:%.*]] = sext i32 [[MUL]] to i64 ; CHECK-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM11]] -; CHECK-NEXT: [[ADD18:%.*]] = add nsw i32 [[MUL]], 2 -; CHECK-NEXT: [[IDXPROM19:%.*]] = sext i32 [[ADD18]] to i64 -; CHECK-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM19]] -; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX20]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX12]], align 4 +; CHECK-NEXT: [[ADD14:%.*]] = or disjoint i32 [[MUL]], 1 +; CHECK-NEXT: [[IDXPROM15:%.*]] = sext i32 [[ADD14]] to i64 +; CHECK-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM15]] ; CHECK-NEXT: [[MUL21:%.*]] = mul nsw i32 [[STRIDE]], 3 ; CHECK-NEXT: [[IDXPROM23:%.*]] = sext i32 [[MUL21]] to i64 ; CHECK-NEXT: [[ARRAYIDX24:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM23]] @@ -700,8 +700,8 @@ define void @store_blockstrided3(ptr nocapture noundef readonly %x, ptr nocaptur ; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX35]], align 4 ; CHECK-NEXT: [[ARRAYIDX41:%.*]] = getelementptr inbounds i32, ptr [[Y]], i64 [[IDXPROM5]] ; CHECK-NEXT: [[ARRAYIDX48:%.*]] = getelementptr inbounds i32, ptr [[Y]], i64 [[IDXPROM11]] -; CHECK-NEXT: [[ARRAYIDX56:%.*]] = getelementptr inbounds i32, ptr [[Y]], i64 [[IDXPROM19]] -; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX56]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX48]], align 4 +; CHECK-NEXT: [[ARRAYIDX52:%.*]] = getelementptr inbounds i32, ptr [[Y]], i64 [[IDXPROM15]] ; CHECK-NEXT: [[ARRAYIDX60:%.*]] = getelementptr inbounds i32, ptr [[Y]], i64 [[IDXPROM23]] ; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX60]], align 4 ; CHECK-NEXT: [[ARRAYIDX64:%.*]] = getelementptr inbounds i32, ptr [[Y]], i64 [[IDXPROM27]] @@ -715,12 +715,12 @@ define void @store_blockstrided3(ptr nocapture noundef readonly %x, ptr nocaptur ; CHECK-NEXT: [[TMP10:%.*]] = mul nsw <2 x i32> [[TMP8]], [[TMP6]] ; CHECK-NEXT: [[TMP11:%.*]] = mul nsw <2 x i32> [[TMP9]], [[TMP7]] ; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], <4 x i32> -; CHECK-NEXT: [[ARRAYIDX84:%.*]] = getelementptr inbounds i8, ptr [[Z]], i64 28 -; CHECK-NEXT: [[TMP13:%.*]] = load <2 x i32>, ptr [[ARRAYIDX12]], align 4 -; CHECK-NEXT: [[TMP14:%.*]] = load <2 x i32>, ptr [[ARRAYIDX48]], align 4 +; CHECK-NEXT: [[MUL81:%.*]] = mul nsw i32 [[TMP4]], [[TMP1]] +; CHECK-NEXT: [[ARRAYIDX82:%.*]] = getelementptr inbounds i8, ptr [[Z]], i64 32 +; CHECK-NEXT: [[TMP13:%.*]] = load <2 x i32>, ptr [[ARRAYIDX16]], align 4 +; CHECK-NEXT: [[TMP14:%.*]] = load <2 x i32>, ptr [[ARRAYIDX52]], align 4 ; CHECK-NEXT: [[TMP15:%.*]] = mul nsw <2 x i32> [[TMP14]], [[TMP13]] ; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <2 x i32> [[TMP15]], <2 x i32> poison, <2 x i32> -; CHECK-NEXT: [[MUL85:%.*]] = mul nsw i32 [[TMP4]], [[TMP1]] ; CHECK-NEXT: [[MUL87:%.*]] = mul nsw i32 [[TMP5]], [[TMP2]] ; CHECK-NEXT: [[ARRAYIDX88:%.*]] = getelementptr inbounds i8, ptr [[Z]], i64 44 ; CHECK-NEXT: [[ARRAYIDX92:%.*]] = getelementptr inbounds i8, ptr [[Z]], i64 36 @@ -728,8 +728,8 @@ define void @store_blockstrided3(ptr nocapture noundef readonly %x, ptr nocaptur ; CHECK-NEXT: [[TMP18:%.*]] = load <2 x i32>, ptr [[ARRAYIDX64]], align 4 ; CHECK-NEXT: store i32 [[MUL73]], ptr [[Z]], align 4 ; CHECK-NEXT: store <4 x i32> [[TMP12]], ptr [[ARRAYIDX72]], align 4 -; CHECK-NEXT: store <2 x i32> [[TMP16]], ptr [[ARRAYIDX84]], align 4 -; CHECK-NEXT: store i32 [[MUL85]], ptr [[ARRAYIDX76]], align 4 +; CHECK-NEXT: store i32 [[MUL81]], ptr [[ARRAYIDX82]], align 4 +; CHECK-NEXT: store <2 x i32> [[TMP16]], ptr [[ARRAYIDX76]], align 4 ; CHECK-NEXT: store i32 [[MUL87]], ptr [[ARRAYIDX88]], align 4 ; CHECK-NEXT: [[TMP19:%.*]] = mul nsw <2 x i32> [[TMP18]], [[TMP17]] ; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <2 x i32> [[TMP19]], <2 x i32> poison, <2 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-uniform-cmps.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-uniform-cmps.ll index 9c086abe216c0..0fe4e6a5aa28b 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-uniform-cmps.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-uniform-cmps.ll @@ -259,10 +259,12 @@ define void @select_uniform_ugt_16xi8(ptr %ptr, i8 %x) { ; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v8i8(<16 x i8> [[TMP6]], <8 x i8> [[TMP0]], i64 0) ; CHECK-NEXT: [[TMP8:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP7]], <4 x i8> [[TMP3]], i64 12) ; CHECK-NEXT: [[TMP9:%.*]] = icmp ugt <16 x i8> [[TMP8]], -; CHECK-NEXT: [[TMP10:%.*]] = insertelement <16 x i8> poison, i8 [[X]], i32 0 -; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <16 x i8> [[TMP10]], <16 x i8> poison, <16 x i32> zeroinitializer -; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP9]], <16 x i8> [[TMP8]], <16 x i8> [[TMP11]] -; CHECK-NEXT: store <16 x i8> [[TMP12]], ptr [[PTR]], align 2 +; CHECK-NEXT: [[TMP10:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v8i8(<16 x i8> [[TMP8]], <8 x i8> [[TMP0]], i64 0) +; CHECK-NEXT: [[TMP11:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP10]], <4 x i8> [[TMP3]], i64 12) +; CHECK-NEXT: [[TMP12:%.*]] = insertelement <16 x i8> poison, i8 [[X]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <16 x i8> [[TMP12]], <16 x i8> poison, <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = select <16 x i1> [[TMP9]], <16 x i8> [[TMP11]], <16 x i8> [[TMP13]] +; CHECK-NEXT: store <16 x i8> [[TMP14]], ptr [[PTR]], align 2 ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/select-profitability.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/select-profitability.ll new file mode 100644 index 0000000000000..9cfc5f86cb014 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/select-profitability.ll @@ -0,0 +1,59 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -S --passes=slp-vectorizer -mtriple=riscv64-unknown-linux -mattr=+v < %s | FileCheck %s + +define i32 @pow2_zero_constant_shift(i16 zeroext %a, i16 zeroext %b, i16 zeroext %c, i16 zeroext %d) { +; CHECK-LABEL: define i32 @pow2_zero_constant_shift( +; CHECK-SAME: i16 zeroext [[A:%.*]], i16 zeroext [[B:%.*]], i16 zeroext [[C:%.*]], i16 zeroext [[D:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 [[A]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i16> [[TMP1]], i16 [[B]], i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i16> [[TMP2]], i16 [[C]], i32 2 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i16> [[TMP3]], i16 [[D]], i32 3 +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq <4 x i16> [[TMP4]], +; CHECK-NEXT: [[TMP6:%.*]] = select <4 x i1> [[TMP5]], <4 x i32> , <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP6]]) +; CHECK-NEXT: ret i32 [[TMP7]] +; + %t39.i0 = icmp eq i16 %a, 1 + %t39.i1 = icmp eq i16 %b, 1 + %t39.i2 = icmp eq i16 %c, 1 + %t39.i3 = icmp eq i16 %d, 1 + %t40.i0 = select i1 %t39.i0, i32 65536, i32 0 + %t40.i1 = select i1 %t39.i1, i32 65536, i32 0 + %t40.i2 = select i1 %t39.i2, i32 65536, i32 0 + %t40.i3 = select i1 %t39.i3, i32 65536, i32 0 + %or.rdx0 = or i32 %t40.i0, %t40.i1 + %or.rdx1 = or i32 %t40.i2, %t40.i3 + %or.rdx2 = or i32 %or.rdx0, %or.rdx1 + ret i32 %or.rdx2 +} + +; TODO: This case is unprofitable, and we should not be vectorizing this. +define i32 @pow2_zero_variable_shift(i16 zeroext %a, i16 zeroext %b, i16 zeroext %c, i16 zeroext %d) { +; CHECK-LABEL: define i32 @pow2_zero_variable_shift( +; CHECK-SAME: i16 zeroext [[A:%.*]], i16 zeroext [[B:%.*]], i16 zeroext [[C:%.*]], i16 zeroext [[D:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[T39_I0:%.*]] = icmp eq i16 [[A]], 1 +; CHECK-NEXT: [[T39_I1:%.*]] = icmp eq i16 [[B]], 1 +; CHECK-NEXT: [[T39_I2:%.*]] = icmp eq i16 [[C]], 1 +; CHECK-NEXT: [[T39_I3:%.*]] = icmp eq i16 [[D]], 1 +; CHECK-NEXT: [[T40_I0:%.*]] = select i1 [[T39_I0]], i32 524288, i32 0 +; CHECK-NEXT: [[T40_I1:%.*]] = select i1 [[T39_I1]], i32 262144, i32 0 +; CHECK-NEXT: [[T40_I2:%.*]] = select i1 [[T39_I2]], i32 131072, i32 0 +; CHECK-NEXT: [[T40_I3:%.*]] = select i1 [[T39_I3]], i32 65536, i32 0 +; CHECK-NEXT: [[OR_RDX0:%.*]] = or i32 [[T40_I0]], [[T40_I1]] +; CHECK-NEXT: [[OR_RDX1:%.*]] = or i32 [[T40_I2]], [[T40_I3]] +; CHECK-NEXT: [[OR_RDX2:%.*]] = or i32 [[OR_RDX0]], [[OR_RDX1]] +; CHECK-NEXT: ret i32 [[OR_RDX2]] +; + %t39.i0 = icmp eq i16 %a, 1 + %t39.i1 = icmp eq i16 %b, 1 + %t39.i2 = icmp eq i16 %c, 1 + %t39.i3 = icmp eq i16 %d, 1 + %t40.i0 = select i1 %t39.i0, i32 524288, i32 0 + %t40.i1 = select i1 %t39.i1, i32 262144, i32 0 + %t40.i2 = select i1 %t39.i2, i32 131072, i32 0 + %t40.i3 = select i1 %t39.i3, i32 65536, i32 0 + %or.rdx0 = or i32 %t40.i0, %t40.i1 + %or.rdx1 = or i32 %t40.i2, %t40.i3 + %or.rdx2 = or i32 %or.rdx0, %or.rdx1 + ret i32 %or.rdx2 +} diff --git a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-postpone-for-dependency.ll b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-postpone-for-dependency.ll index 9c22295a1c718..43c42c1ea2bfb 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-postpone-for-dependency.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-postpone-for-dependency.ll @@ -12,12 +12,12 @@ define void @test() { ; CHECK-NEXT: ret void ; CHECK: [[BB6]]: ; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ zeroinitializer, %[[BB]] ], [ [[TMP8:%.*]], %[[BB6]] ] -; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> , <2 x i32> [[TMP1]], i64 2) +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> , <4 x i32> [[TMP6]], <4 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = ashr <4 x i32> zeroinitializer, [[TMP2]] ; CHECK-NEXT: [[TMP4:%.*]] = mul <4 x i32> zeroinitializer, [[TMP2]] ; CHECK-NEXT: [[TMP5]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <2 x i32> -; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> , <2 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> , <2 x i32> ; CHECK-NEXT: [[TMP8]] = mul <2 x i32> zeroinitializer, [[TMP7]] ; CHECK-NEXT: br i1 false, label %[[BB2]], label %[[BB6]] ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/landing_pad.ll b/llvm/test/Transforms/SLPVectorizer/X86/landing_pad.ll index 813c5e7418b30..47b42bc8f32a7 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/landing_pad.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/landing_pad.ll @@ -10,10 +10,10 @@ define void @foo() personality ptr @bar { ; CHECK: bb2.loopexit: ; CHECK-NEXT: br label [[BB2:%.*]] ; CHECK: bb2: -; CHECK-NEXT: [[TMP0:%.*]] = phi <4 x i32> [ [[TMP8:%.*]], [[BB9:%.*]] ], [ poison, [[BB2_LOOPEXIT:%.*]] ] +; CHECK-NEXT: [[TMP0:%.*]] = phi <4 x i32> [ [[TMP7:%.*]], [[BB9:%.*]] ], [ poison, [[BB2_LOOPEXIT:%.*]] ] ; CHECK-NEXT: ret void ; CHECK: bb3: -; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ [[TMP4:%.*]], [[BB6:%.*]] ], [ poison, [[BB1:%.*]] ] +; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ [[TMP3:%.*]], [[BB6:%.*]] ], [ poison, [[BB1:%.*]] ] ; CHECK-NEXT: [[TMP2:%.*]] = invoke i32 poison(ptr addrspace(1) nonnull poison, i32 0, i32 0, i32 poison) [ "deopt"() ] ; CHECK-NEXT: to label [[BB4:%.*]] unwind label [[BB10:%.*]] ; CHECK: bb4: @@ -21,30 +21,29 @@ define void @foo() personality ptr @bar { ; CHECK: bb5: ; CHECK-NEXT: br label [[BB7:%.*]] ; CHECK: bb6: -; CHECK-NEXT: [[TMP3:%.*]] = phi <2 x i32> [ , [[BB8:%.*]] ] -; CHECK-NEXT: [[TMP4]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> poison, <2 x i32> +; CHECK-NEXT: [[TMP3]] = phi <2 x i32> [ , [[BB8:%.*]] ] ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb7: ; CHECK-NEXT: [[LOCAL_5_84111:%.*]] = phi i32 [ poison, [[BB8]] ], [ poison, [[BB5]] ] -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> poison, i32 [[LOCAL_5_84111]], i32 1 -; CHECK-NEXT: [[TMP6:%.*]] = invoke i32 poison(ptr addrspace(1) nonnull poison, i32 poison, i32 poison, i32 poison) [ "deopt"() ] +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> poison, i32 [[LOCAL_5_84111]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = invoke i32 poison(ptr addrspace(1) nonnull poison, i32 poison, i32 poison, i32 poison) [ "deopt"() ] ; CHECK-NEXT: to label [[BB8]] unwind label [[BB12:%.*]] ; CHECK: bb8: ; CHECK-NEXT: br i1 poison, label [[BB7]], label [[BB6]] ; CHECK: bb9: ; CHECK-NEXT: [[INDVARS_IV528799:%.*]] = phi i64 [ poison, [[BB10]] ], [ poison, [[BB12]] ] -; CHECK-NEXT: [[TMP7:%.*]] = phi <2 x i32> [ [[TMP9:%.*]], [[BB10]] ], [ [[TMP10:%.*]], [[BB12]] ] -; CHECK-NEXT: [[TMP8]] = call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> poison, <2 x i32> [[TMP7]], i64 2) +; CHECK-NEXT: [[TMP6:%.*]] = phi <2 x i32> [ [[TMP8:%.*]], [[BB10]] ], [ [[TMP9:%.*]], [[BB12]] ] +; CHECK-NEXT: [[TMP7]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> poison, <4 x i32> ; CHECK-NEXT: br label [[BB2]] ; CHECK: bb10: -; CHECK-NEXT: [[TMP9]] = phi <2 x i32> [ [[TMP1]], [[BB3]] ] +; CHECK-NEXT: [[TMP8]] = phi <2 x i32> [ [[TMP1]], [[BB3]] ] ; CHECK-NEXT: [[LANDING_PAD68:%.*]] = landingpad { ptr, i32 } ; CHECK-NEXT: cleanup ; CHECK-NEXT: br label [[BB9]] ; CHECK: bb11: ; CHECK-NEXT: ret void ; CHECK: bb12: -; CHECK-NEXT: [[TMP10]] = phi <2 x i32> [ [[TMP5]], [[BB7]] ] +; CHECK-NEXT: [[TMP9]] = phi <2 x i32> [ [[TMP4]], [[BB7]] ] ; CHECK-NEXT: [[LANDING_PAD149:%.*]] = landingpad { ptr, i32 } ; CHECK-NEXT: cleanup ; CHECK-NEXT: br label [[BB9]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/phi.ll b/llvm/test/Transforms/SLPVectorizer/X86/phi.ll index 7201583f3450e..ec8bcc85e7db0 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/phi.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/phi.ll @@ -144,35 +144,36 @@ define float @foo3(ptr nocapture readonly %A) #0 { ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[R_052:%.*]] = phi float [ [[TMP2]], [[ENTRY]] ], [ [[ADD6:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP3:%.*]] = phi <4 x float> [ [[TMP1]], [[ENTRY]] ], [ [[TMP14:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = phi <2 x float> [ [[TMP0]], [[ENTRY]] ], [ [[TMP9:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[TMP3:%.*]] = phi <4 x float> [ [[TMP1]], [[ENTRY]] ], [ [[TMP15:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[TMP4:%.*]] = phi <2 x float> [ [[TMP0]], [[ENTRY]] ], [ [[TMP7:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP4]], i32 0 ; CHECK-NEXT: [[MUL:%.*]] = fmul float [[TMP5]], 7.000000e+00 ; CHECK-NEXT: [[ADD6]] = fadd float [[R_052]], [[MUL]] ; CHECK-NEXT: [[TMP6:%.*]] = add nsw i64 [[INDVARS_IV]], 2 ; CHECK-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP6]] -; CHECK-NEXT: [[TMP7:%.*]] = load float, ptr [[ARRAYIDX14]], align 4 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 3 ; CHECK-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV_NEXT]] -; CHECK-NEXT: [[TMP8:%.*]] = load <2 x float>, ptr [[ARRAYIDX19]], align 4 -; CHECK-NEXT: [[TMP9]] = load <2 x float>, ptr [[ARRAYIDX19]], align 4 -; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> -; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x float> [[TMP10]], float [[TMP7]], i32 1 -; CHECK-NEXT: [[TMP12:%.*]] = call <4 x float> @llvm.vector.insert.v4f32.v2f32(<4 x float> [[TMP11]], <2 x float> [[TMP8]], i64 2) -; CHECK-NEXT: [[TMP13:%.*]] = fmul <4 x float> [[TMP12]], -; CHECK-NEXT: [[TMP14]] = fadd <4 x float> [[TMP3]], [[TMP13]] -; CHECK-NEXT: [[TMP15:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 -; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP15]], 121 +; CHECK-NEXT: [[TMP7]] = load <2 x float>, ptr [[ARRAYIDX19]], align 4 +; CHECK-NEXT: [[TMP8:%.*]] = load <2 x float>, ptr [[ARRAYIDX14]], align 4 +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x float> [[TMP8]], <2 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x float> [[TMP9]], <4 x float> [[TMP10]], <4 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x float> [[TMP7]], <2 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x float> [[TMP11]], <4 x float> [[TMP12]], <4 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = fmul <4 x float> [[TMP13]], +; CHECK-NEXT: [[TMP15]] = fadd <4 x float> [[TMP3]], [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP16]], 121 ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] ; CHECK: for.end: -; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x float> [[TMP14]], i32 0 -; CHECK-NEXT: [[ADD28:%.*]] = fadd float [[ADD6]], [[TMP16]] -; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x float> [[TMP14]], i32 1 -; CHECK-NEXT: [[ADD29:%.*]] = fadd float [[ADD28]], [[TMP17]] -; CHECK-NEXT: [[TMP18:%.*]] = extractelement <4 x float> [[TMP14]], i32 2 -; CHECK-NEXT: [[ADD30:%.*]] = fadd float [[ADD29]], [[TMP18]] -; CHECK-NEXT: [[TMP19:%.*]] = extractelement <4 x float> [[TMP14]], i32 3 -; CHECK-NEXT: [[ADD31:%.*]] = fadd float [[ADD30]], [[TMP19]] +; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x float> [[TMP15]], i32 0 +; CHECK-NEXT: [[ADD28:%.*]] = fadd float [[ADD6]], [[TMP17]] +; CHECK-NEXT: [[TMP18:%.*]] = extractelement <4 x float> [[TMP15]], i32 1 +; CHECK-NEXT: [[ADD29:%.*]] = fadd float [[ADD28]], [[TMP18]] +; CHECK-NEXT: [[TMP19:%.*]] = extractelement <4 x float> [[TMP15]], i32 2 +; CHECK-NEXT: [[ADD30:%.*]] = fadd float [[ADD29]], [[TMP19]] +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x float> [[TMP15]], i32 3 +; CHECK-NEXT: [[ADD31:%.*]] = fadd float [[ADD30]], [[TMP20]] ; CHECK-NEXT: ret float [[ADD31]] ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll index 12389f4a3dbf4..6200e3ae43fc9 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll @@ -315,11 +315,12 @@ define i1 @logical_and_icmp_clamp_extra_use_select(<4 x i32> %x) { define i1 @logical_and_icmp_clamp_v8i32(<8 x i32> %x, <8 x i32> %y) { ; CHECK-LABEL: @logical_and_icmp_clamp_v8i32( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[X:%.*]], <8 x i32> poison, <8 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[Y:%.*]], <8 x i32> , <8 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = icmp slt <8 x i32> [[TMP1]], [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = freeze <8 x i1> [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP4]]) -; CHECK-NEXT: ret i1 [[TMP5]] +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[Y:%.*]], <8 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> , <4 x i32> [[TMP2]], i64 4) +; CHECK-NEXT: [[TMP4:%.*]] = icmp slt <8 x i32> [[TMP1]], [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = freeze <8 x i1> [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP5]]) +; CHECK-NEXT: ret i1 [[TMP6]] ; %x0 = extractelement <8 x i32> %x, i32 0 %x1 = extractelement <8 x i32> %x, i32 1 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/resched.ll b/llvm/test/Transforms/SLPVectorizer/X86/resched.ll index 4ed52247c2ef3..b79ba458ef706 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/resched.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/resched.ll @@ -12,30 +12,25 @@ define fastcc void @_ZN12_GLOBAL__N_127PolynomialMultiplyRecognize9recognizeEv() ; CHECK-NEXT: [[SUB_I:%.*]] = add nsw i32 undef, -1 ; CHECK-NEXT: [[CONV31_I:%.*]] = and i32 undef, [[SUB_I]] ; CHECK-NEXT: [[SHR_I_I:%.*]] = lshr i32 [[CONV31_I]], 1 -; CHECK-NEXT: [[SHR_1_I_I:%.*]] = lshr i32 [[CONV31_I]], 2 -; CHECK-NEXT: [[SHR_2_I_I:%.*]] = lshr i32 [[CONV31_I]], 3 -; CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[SUB_I]] to i8 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <16 x i8> poison, i8 [[TMP0]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[SHR_I_I]] to i8 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <16 x i8> [[TMP1]], i8 [[TMP2]], i32 1 -; CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[SHR_1_I_I]] to i8 -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <16 x i8> [[TMP3]], i8 [[TMP4]], i32 2 -; CHECK-NEXT: [[TMP6:%.*]] = trunc i32 [[SHR_2_I_I]] to i8 -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <16 x i8> [[TMP5]], i8 [[TMP6]], i32 3 -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> poison, i32 [[CONV31_I]], i32 0 -; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP10:%.*]] = lshr <4 x i32> [[TMP9]], -; CHECK-NEXT: [[TMP11:%.*]] = trunc <4 x i32> [[TMP10]] to <4 x i8> -; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x i8> [[TMP11]], <4 x i8> poison, <16 x i32> -; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <16 x i8> [[TMP7]], <16 x i8> [[TMP12]], <16 x i32> -; CHECK-NEXT: [[TMP14:%.*]] = insertelement <8 x i32> poison, i32 [[CONV31_I]], i32 0 -; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <8 x i32> [[TMP14]], <8 x i32> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[TMP16:%.*]] = lshr <8 x i32> [[TMP15]], -; CHECK-NEXT: [[TMP17:%.*]] = trunc <8 x i32> [[TMP16]] to <8 x i8> -; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <8 x i8> [[TMP17]], <8 x i8> poison, <16 x i32> -; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <16 x i8> [[TMP13]], <16 x i8> [[TMP18]], <16 x i32> -; CHECK-NEXT: [[TMP20:%.*]] = and <16 x i8> [[TMP19]], -; CHECK-NEXT: store <16 x i8> [[TMP20]], ptr undef, align 1 +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> poison, i32 [[CONV31_I]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = lshr <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = lshr <4 x i32> [[TMP3]], +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = lshr <8 x i32> [[TMP5]], +; CHECK-NEXT: [[TMP7:%.*]] = trunc i32 [[SUB_I]] to i8 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <16 x i8> poison, i8 [[TMP7]], i32 0 +; CHECK-NEXT: [[TMP9:%.*]] = trunc i32 [[SHR_I_I]] to i8 +; CHECK-NEXT: [[TMP10:%.*]] = insertelement <16 x i8> [[TMP8]], i8 [[TMP9]], i32 1 +; CHECK-NEXT: [[TMP11:%.*]] = trunc <8 x i32> [[TMP6]] to <8 x i8> +; CHECK-NEXT: [[TMP12:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v8i8(<16 x i8> [[TMP10]], <8 x i8> [[TMP11]], i64 8) +; CHECK-NEXT: [[TMP13:%.*]] = trunc <4 x i32> [[TMP4]] to <4 x i8> +; CHECK-NEXT: [[TMP14:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP12]], <4 x i8> [[TMP13]], i64 4) +; CHECK-NEXT: [[TMP15:%.*]] = trunc <2 x i32> [[TMP2]] to <2 x i8> +; CHECK-NEXT: [[TMP16:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v2i8(<16 x i8> [[TMP14]], <2 x i8> [[TMP15]], i64 2) +; CHECK-NEXT: [[TMP17:%.*]] = and <16 x i8> [[TMP16]], +; CHECK-NEXT: store <16 x i8> [[TMP17]], ptr undef, align 1 ; CHECK-NEXT: unreachable ; CHECK: if.end50.i: ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/SLPVectorizer/X86/splat-score-adjustment.ll b/llvm/test/Transforms/SLPVectorizer/X86/splat-score-adjustment.ll new file mode 100644 index 0000000000000..33fa00c1881da --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/splat-score-adjustment.ll @@ -0,0 +1,89 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -mcpu=core-avx2 < %s | FileCheck %s + +define i32 @a() { +; CHECK-LABEL: define i32 @a( +; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: br label %[[BB1:.*]] +; CHECK: [[BB1]]: +; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i8> [ zeroinitializer, [[TMP0:%.*]] ], [ [[TMP6:%.*]], %[[BB1]] ] +; CHECK-NEXT: [[TMP3:%.*]] = phi <2 x i8> [ zeroinitializer, [[TMP0]] ], [ [[TMP17:%.*]], %[[BB1]] ] +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i8> [[TMP4]], <4 x i8> poison, <8 x i32> +; CHECK-NEXT: [[TMP6]] = load <4 x i8>, ptr null, align 4 +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x i8> [[TMP6]], <4 x i8> poison, <4 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i8> [[TMP3]], <2 x i8> poison, <4 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i8> [[TMP12]], <4 x i8> [[TMP7]], <4 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = xor <4 x i8> [[TMP6]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x i8> [[TMP6]], <4 x i8> poison, <8 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i8> [[TMP9]], <4 x i8> poison, <8 x i32> +; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <8 x i8> [[TMP10]], <8 x i8> [[TMP11]], <8 x i32> +; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <4 x i8> [[TMP4]], <4 x i8> poison, <8 x i32> +; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <8 x i8> [[TMP19]], <8 x i8> [[TMP18]], <8 x i32> +; CHECK-NEXT: [[TMP22:%.*]] = xor <8 x i8> [[TMP18]], [[TMP21]] +; CHECK-NEXT: [[TMP23:%.*]] = xor <8 x i8> [[TMP22]], [[TMP5]] +; CHECK-NEXT: store <8 x i8> [[TMP23]], ptr null, align 4 +; CHECK-NEXT: [[TMP17]] = shufflevector <4 x i8> [[TMP6]], <4 x i8> poison, <2 x i32> +; CHECK-NEXT: br label %[[BB1]] +; + br label %1 + +1: + %2 = phi i8 [ 0, %0 ], [ %40, %1 ] + %3 = phi i8 [ 0, %0 ], [ %28, %1 ] + %4 = phi i8 [ 0, %0 ], [ %16, %1 ] + %5 = phi i8 [ 0, %0 ], [ %6, %1 ] + %6 = load i8, ptr null, align 4 + %7 = xor i8 %6, %3 + %8 = xor i8 %7, %4 + %9 = xor i8 %8, %5 + store i8 %9, ptr null, align 4 + %10 = xor i8 %6, %2 + %11 = xor i8 %10, %5 + %12 = add i64 0, 1 + %13 = getelementptr i8, ptr null, i64 %12 + store i8 %11, ptr %13, align 1 + %14 = add i64 0, 1 + %15 = getelementptr i8, ptr null, i64 %14 + %16 = load i8, ptr %15, align 1 + %17 = xor i8 %16, %2 + %18 = xor i8 %17, %3 + %19 = xor i8 %18, %4 + %20 = add i64 0, 2 + %21 = getelementptr i8, ptr null, i64 %20 + store i8 %19, ptr %21, align 2 + %22 = xor i8 %16, %6 + %23 = xor i8 %22, %4 + %24 = add i64 0, 3 + %25 = getelementptr i8, ptr null, i64 %24 + store i8 %23, ptr %25, align 1 + %26 = add i64 0, 2 + %27 = getelementptr i8, ptr null, i64 %26 + %28 = load i8, ptr %27, align 2 + %29 = xor i8 %28, %6 + %30 = xor i8 %29, %2 + %31 = xor i8 %30, %3 + %32 = add i64 0, 4 + %33 = getelementptr i8, ptr null, i64 %32 + store i8 %31, ptr %33, align 4 + %34 = xor i8 %28, %16 + %35 = xor i8 %34, %3 + %36 = add i64 0, 5 + %37 = getelementptr i8, ptr null, i64 %36 + store i8 %35, ptr %37, align 1 + %38 = add i64 0, 3 + %39 = getelementptr i8, ptr null, i64 %38 + %40 = load i8, ptr %39, align 1 + %41 = xor i8 %40, %16 + %42 = xor i8 %41, %6 + %43 = xor i8 %42, %2 + %44 = add i64 0, 6 + %45 = getelementptr i8, ptr null, i64 %44 + store i8 %43, ptr %45, align 2 + %46 = xor i8 %40, %28 + %47 = xor i8 %46, %2 + %48 = add i64 0, 7 + %49 = getelementptr i8, ptr null, i64 %48 + store i8 %47, ptr %49, align 1 + br label %1 +} + diff --git a/llvm/test/Transforms/SLPVectorizer/X86/peek-through-shuffle.ll b/llvm/test/Transforms/SLPVectorizer/peek-through-shuffle.ll similarity index 85% rename from llvm/test/Transforms/SLPVectorizer/X86/peek-through-shuffle.ll rename to llvm/test/Transforms/SLPVectorizer/peek-through-shuffle.ll index c157f6117df95..839c1ebed6bcf 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/peek-through-shuffle.ll +++ b/llvm/test/Transforms/SLPVectorizer/peek-through-shuffle.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -passes=slp-vectorizer < %s -mtriple=x86_64-unknown-linux-gnu -o - | FileCheck %s +; RUN: %if x86-registered-target %{ opt -S -passes=slp-vectorizer < %s -mtriple=x86_64-unknown-linux-gnu -o - | FileCheck %s %} +; RUN: %if aarch64-registered-target %{ opt -S -passes=slp-vectorizer < %s -mtriple=aarch64-unknown-linux-gnu -o - | FileCheck %s %} define void @foo(ptr %0, <4 x float> %1) { ; CHECK-LABEL: @foo( diff --git a/llvm/test/Transforms/SLPVectorizer/X86/phi-node-bitwidt-op-not.ll b/llvm/test/Transforms/SLPVectorizer/phi-node-bitwidt-op-not.ll similarity index 94% rename from llvm/test/Transforms/SLPVectorizer/X86/phi-node-bitwidt-op-not.ll rename to llvm/test/Transforms/SLPVectorizer/phi-node-bitwidt-op-not.ll index f376ca71c7769..2037e0d67d2f8 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/phi-node-bitwidt-op-not.ll +++ b/llvm/test/Transforms/SLPVectorizer/phi-node-bitwidt-op-not.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 -; RUN: opt -S -passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s +; RUN: %if x86-registered-target %{ opt -S -passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s %} +; RUN: %if aarch64-registered-target %{ opt -S -passes=slp-vectorizer -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s %} define i32 @test(ptr %b, ptr %c, i32 %0, ptr %a, i1 %tobool3.not) { ; CHECK-LABEL: define i32 @test( diff --git a/llvm/test/Transforms/SLPVectorizer/X86/phi-undef-input.ll b/llvm/test/Transforms/SLPVectorizer/phi-undef-input.ll similarity index 96% rename from llvm/test/Transforms/SLPVectorizer/X86/phi-undef-input.ll rename to llvm/test/Transforms/SLPVectorizer/phi-undef-input.ll index 3cc32c1fc7b28..b9802a0adb8aa 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/phi-undef-input.ll +++ b/llvm/test/Transforms/SLPVectorizer/phi-undef-input.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -passes=slp-vectorizer -slp-threshold=-1000 -mtriple=x86_64 -S | FileCheck %s +; RUN: %if x86-registered-target %{ opt < %s -passes=slp-vectorizer -slp-threshold=-1000 -mtriple=x86_64 -S | FileCheck %s %} +; RUN: %if aarch64-registered-target %{ opt < %s -passes=slp-vectorizer -slp-threshold=-1000 -mtriple=aarch64-unknown-linux-gnu -S | FileCheck %s %} ; The inputs to vector phi should remain undef. diff --git a/llvm/test/Transforms/SLPVectorizer/X86/postponed_gathers.ll b/llvm/test/Transforms/SLPVectorizer/postponed_gathers.ll similarity index 90% rename from llvm/test/Transforms/SLPVectorizer/X86/postponed_gathers.ll rename to llvm/test/Transforms/SLPVectorizer/postponed_gathers.ll index 488ca0b23cd9c..f6bed797b9ba9 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/postponed_gathers.ll +++ b/llvm/test/Transforms/SLPVectorizer/postponed_gathers.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 -; RUN: opt < %s -passes=slp-vectorizer -slp-threshold=-10 -mtriple=x86_64-unknown-linux-gnu -S | FileCheck %s +; RUN: %if x86-registered-target %{ opt < %s -passes=slp-vectorizer -slp-threshold=-10 -mtriple=x86_64-unknown-linux-gnu -S | FileCheck %s %} +; RUN: %if aarch64-registered-target %{ opt < %s -passes=slp-vectorizer -slp-threshold=-10 -mtriple=aarch64-unknown-linux-gnu -S | FileCheck %s %} define void @foo() { ; CHECK-LABEL: define void @foo() { diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr31599-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/pr31599-inseltpoison.ll similarity index 78% rename from llvm/test/Transforms/SLPVectorizer/X86/pr31599-inseltpoison.ll rename to llvm/test/Transforms/SLPVectorizer/pr31599-inseltpoison.ll index 5506f61fe134b..fe5871d73cd5e 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/pr31599-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/pr31599-inseltpoison.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s +; RUN: %if x86-registered-target %{ opt -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s %} +; RUN: %if aarch64-registered-target %{ opt -passes=slp-vectorizer -S -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s %} define <2 x float> @foo() { ; CHECK-LABEL: @foo( diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr31599.ll b/llvm/test/Transforms/SLPVectorizer/pr31599.ll similarity index 78% rename from llvm/test/Transforms/SLPVectorizer/X86/pr31599.ll rename to llvm/test/Transforms/SLPVectorizer/pr31599.ll index 348656e07c6be..10b9b224d556e 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/pr31599.ll +++ b/llvm/test/Transforms/SLPVectorizer/pr31599.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s +; RUN: %if x86-registered-target %{ opt -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s %} +; RUN: %if aarch64-registered-target %{ opt -passes=slp-vectorizer -S -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s %} define <2 x float> @foo() { ; CHECK-LABEL: @foo( diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduction-gather-non-scheduled-extracts.ll b/llvm/test/Transforms/SLPVectorizer/reduction-gather-non-scheduled-extracts.ll similarity index 85% rename from llvm/test/Transforms/SLPVectorizer/X86/reduction-gather-non-scheduled-extracts.ll rename to llvm/test/Transforms/SLPVectorizer/reduction-gather-non-scheduled-extracts.ll index 03c8767eff327..f1034f3971135 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reduction-gather-non-scheduled-extracts.ll +++ b/llvm/test/Transforms/SLPVectorizer/reduction-gather-non-scheduled-extracts.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 -; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-sie-ps5 < %s | FileCheck %s +; RUN: %if x86-registered-target %{ opt -S --passes=slp-vectorizer -mtriple=x86_64-sie-ps5 < %s | FileCheck %s %} +; RUN: %if aarch64-registered-target %{ opt -S --passes=slp-vectorizer -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s %} define void @tes() { ; CHECK-LABEL: define void @tes() { diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduction-modified-values.ll b/llvm/test/Transforms/SLPVectorizer/reduction-modified-values.ll similarity index 83% rename from llvm/test/Transforms/SLPVectorizer/X86/reduction-modified-values.ll rename to llvm/test/Transforms/SLPVectorizer/reduction-modified-values.ll index dbf490c5fe6a2..be9318e467174 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reduction-modified-values.ll +++ b/llvm/test/Transforms/SLPVectorizer/reduction-modified-values.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s +; RUN: %if x86-registered-target %{ opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s %} +; RUN: %if aarch64-registered-target %{ opt -S --passes=slp-vectorizer -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s %} define i32 @test() { ; CHECK-LABEL: @test( diff --git a/llvm/test/Transforms/SLPVectorizer/reduction-whole-regs-loads.ll b/llvm/test/Transforms/SLPVectorizer/reduction-whole-regs-loads.ll index 281b5f99540ea..4074b8654362e 100644 --- a/llvm/test/Transforms/SLPVectorizer/reduction-whole-regs-loads.ll +++ b/llvm/test/Transforms/SLPVectorizer/reduction-whole-regs-loads.ll @@ -1,21 +1,29 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=riscv64-unknown-linux -mattr=+v -slp-threshold=-100 | FileCheck %s +; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=riscv64-unknown-linux -mattr=+v -slp-threshold=-100 | FileCheck %s --check-prefix=RISCV ; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux -slp-threshold=-100 | FileCheck %s ; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=aarch64-unknown-linux -slp-threshold=-100 | FileCheck %s ; REQUIRES: aarch64-registered-target, x86-registered-target, riscv-registered-target define i64 @test(ptr %p) { +; RISCV-LABEL: @test( +; RISCV-NEXT: entry: +; RISCV-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 4 +; RISCV-NEXT: [[TMP0:%.*]] = load <4 x i64>, ptr [[P]], align 4 +; RISCV-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[ARRAYIDX_4]], align 4 +; RISCV-NEXT: [[TMP2:%.*]] = shufflevector <4 x i64> [[TMP0]], <4 x i64> poison, <8 x i32> +; RISCV-NEXT: [[TMP3:%.*]] = call <8 x i64> @llvm.vector.insert.v8i64.v4i64(<8 x i64> [[TMP2]], <4 x i64> [[TMP0]], i64 0) +; RISCV-NEXT: [[TMP4:%.*]] = call <8 x i64> @llvm.vector.insert.v8i64.v2i64(<8 x i64> [[TMP3]], <2 x i64> [[TMP1]], i64 4) +; RISCV-NEXT: [[TMP5:%.*]] = mul <8 x i64> [[TMP4]], +; RISCV-NEXT: [[TMP6:%.*]] = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> [[TMP5]]) +; RISCV-NEXT: ret i64 [[TMP6]] +; ; CHECK-LABEL: @test( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 4 -; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i64>, ptr [[P]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[ARRAYIDX_4]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i64> [[TMP0]], <4 x i64> poison, <8 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i64> @llvm.vector.insert.v8i64.v4i64(<8 x i64> [[TMP2]], <4 x i64> [[TMP0]], i64 0) -; CHECK-NEXT: [[TMP4:%.*]] = call <8 x i64> @llvm.vector.insert.v8i64.v2i64(<8 x i64> [[TMP3]], <2 x i64> [[TMP1]], i64 4) -; CHECK-NEXT: [[TMP5:%.*]] = mul <8 x i64> [[TMP4]], -; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> [[TMP5]]) -; CHECK-NEXT: ret i64 [[TMP6]] +; CHECK-NEXT: [[TMP0:%.*]] = load <6 x i64>, ptr [[P:%.*]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <6 x i64> [[TMP0]], <6 x i64> poison, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = mul <8 x i64> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> [[TMP2]]) +; CHECK-NEXT: ret i64 [[TMP3]] ; entry: %arrayidx.1 = getelementptr inbounds i64, ptr %p, i64 1 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reorder-clustered-node.ll b/llvm/test/Transforms/SLPVectorizer/reorder-clustered-node.ll similarity index 93% rename from llvm/test/Transforms/SLPVectorizer/X86/reorder-clustered-node.ll rename to llvm/test/Transforms/SLPVectorizer/reorder-clustered-node.ll index 1a6ff2385905b..561182d5e4f49 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reorder-clustered-node.ll +++ b/llvm/test/Transforms/SLPVectorizer/reorder-clustered-node.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -passes=slp-vectorizer -S < %s -mtriple=x86_64 -slp-threshold=-150 | FileCheck %s +; RUN: %if x86-registered-target %{ opt -passes=slp-vectorizer -S < %s -mtriple=x86_64 -slp-threshold=-150 | FileCheck %s %} +; RUN: %if aarch64-registered-target %{ opt -passes=slp-vectorizer -S < %s -mtriple=aarch64-unknown-linux-gnu -slp-threshold=-150 | FileCheck %s %} define i1 @test(ptr %arg, ptr %i233, i64 %i241, ptr %i235, ptr %i237, ptr %i227) { ; CHECK-LABEL: @test( diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reordered-top-scalars.ll b/llvm/test/Transforms/SLPVectorizer/reordered-top-scalars.ll similarity index 83% rename from llvm/test/Transforms/SLPVectorizer/X86/reordered-top-scalars.ll rename to llvm/test/Transforms/SLPVectorizer/reordered-top-scalars.ll index 4517d27598b60..1de5ee2298837 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reordered-top-scalars.ll +++ b/llvm/test/Transforms/SLPVectorizer/reordered-top-scalars.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown %s -slp-threshold=-5 | FileCheck %s +; RUN: %if x86-registered-target %{ opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown %s -slp-threshold=-5 | FileCheck %s %} +; RUN: %if aarch64-registered-target %{ opt -S --passes=slp-vectorizer -mtriple=aarch64-unknown %s -slp-threshold=-5 | FileCheck %s %} define i32 @test(ptr %isec) { ; CHECK-LABEL: @test( diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reordering-single-phi.ll b/llvm/test/Transforms/SLPVectorizer/reordering-single-phi.ll similarity index 93% rename from llvm/test/Transforms/SLPVectorizer/X86/reordering-single-phi.ll rename to llvm/test/Transforms/SLPVectorizer/reordering-single-phi.ll index bc1eaaac5d1bb..a70daf9cf8d60 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reordering-single-phi.ll +++ b/llvm/test/Transforms/SLPVectorizer/reordering-single-phi.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux < %s | FileCheck %s +; RUN: %if x86-registered-target %{ opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux < %s | FileCheck %s %} +; RUN: %if aarch64-registered-target %{ opt -S --passes=slp-vectorizer -mtriple=aarch64-unknown-linux < %s | FileCheck %s %} @a = external global [32000 x float], align 64 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reused-buildvector-matching-vectorized-node.ll b/llvm/test/Transforms/SLPVectorizer/reused-buildvector-matching-vectorized-node.ll similarity index 94% rename from llvm/test/Transforms/SLPVectorizer/X86/reused-buildvector-matching-vectorized-node.ll rename to llvm/test/Transforms/SLPVectorizer/reused-buildvector-matching-vectorized-node.ll index 2b425ee624700..3e00550a88521 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reused-buildvector-matching-vectorized-node.ll +++ b/llvm/test/Transforms/SLPVectorizer/reused-buildvector-matching-vectorized-node.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 -; RUN: opt -S -passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s +; RUN: %if x86-registered-target %{ opt -S -passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s %} +; RUN: %if aarch64-registered-target %{ opt -S -passes=slp-vectorizer -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s %} define void @blam(ptr %arg, double %load2, i1 %fcmp3) { ; CHECK-LABEL: define void @blam diff --git a/llvm/test/Transforms/SLPVectorizer/revec-fix-109835.ll b/llvm/test/Transforms/SLPVectorizer/revec-fix-109835.ll new file mode 100644 index 0000000000000..965bfc7074c63 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/revec-fix-109835.ll @@ -0,0 +1,70 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -passes=slp-vectorizer -S -slp-revec %s | FileCheck %s + +@b = external dso_local local_unnamed_addr global i64, align 8 +@d = external dso_local local_unnamed_addr global i32, align 4 +@c = external dso_local local_unnamed_addr global i32, align 4 +@a = external dso_local local_unnamed_addr global i8, align 2 + +define void @e() { +; CHECK-LABEL: @e( +; CHECK-NEXT: vector.ph: +; CHECK-NEXT: [[C_PROMOTED5:%.*]] = load i32, ptr @c, align 4 +; CHECK-NEXT: [[A_PROMOTED7:%.*]] = load i8, ptr @a, align 2 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[C_PROMOTED5]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i32> [[DOTSPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <16 x i8> , i8 [[A_PROMOTED7]], i64 0 +; CHECK-NEXT: [[TMP1:%.*]] = add <16 x i32> [[DOTSPLAT]], +; CHECK-NEXT: [[TMP2:%.*]] = add <16 x i32> [[DOTSPLAT]], +; CHECK-NEXT: [[TMP3:%.*]] = add <16 x i32> [[DOTSPLAT]], +; CHECK-NEXT: [[INDUCTION:%.*]] = add <16 x i32> [[DOTSPLAT]], +; CHECK-NEXT: [[TMP4:%.*]] = icmp ult <16 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP5:%.*]] = icmp ult <16 x i32> [[TMP2]], +; CHECK-NEXT: [[TMP6:%.*]] = icmp ult <16 x i32> [[TMP3]], +; CHECK-NEXT: [[TMP7:%.*]] = icmp ult <16 x i32> [[INDUCTION]], +; CHECK-NEXT: [[TMP8:%.*]] = icmp eq <16 x i32> [[DOTSPLAT]], +; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i1> [[TMP4]], [[TMP5]] +; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i1> [[TMP9]], [[TMP6]] +; CHECK-NEXT: [[TMP11:%.*]] = or <16 x i1> [[TMP10]], [[TMP7]] +; CHECK-NEXT: [[TMP12:%.*]] = or <16 x i1> [[TMP11]], [[TMP8]] +; CHECK-NEXT: [[TMP13:%.*]] = zext <16 x i1> [[TMP12]] to <16 x i8> +; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i8> [[TMP0]], [[TMP13]] +; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <16 x i8> [[TMP14]], <16 x i8> , <16 x i32> +; CHECK-NEXT: [[TMP16:%.*]] = call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> [[TMP15]]) +; CHECK-NEXT: [[TMP17:%.*]] = add i32 [[C_PROMOTED5]], 81 +; CHECK-NEXT: store i64 -1, ptr @b, align 8 +; CHECK-NEXT: store i32 9, ptr @d, align 4 +; CHECK-NEXT: store i32 [[TMP17]], ptr @c, align 4 +; CHECK-NEXT: store i8 [[TMP16]], ptr @a, align 2 +; CHECK-NEXT: ret void +; +vector.ph: + %c.promoted5 = load i32, ptr @c, align 4 + %a.promoted7 = load i8, ptr @a, align 2 + %.splatinsert = insertelement <16 x i32> poison, i32 %c.promoted5, i64 0 + %.splat = shufflevector <16 x i32> %.splatinsert, <16 x i32> poison, <16 x i32> zeroinitializer + %0 = insertelement <16 x i8> , i8 %a.promoted7, i64 0 + %1 = add <16 x i32> %.splat, + %2 = add <16 x i32> %.splat, + %3 = add <16 x i32> %.splat, + %induction = add <16 x i32> %.splat, + %4 = icmp ult <16 x i32> %1, + %5 = icmp ult <16 x i32> %2, + %6 = icmp ult <16 x i32> %3, + %7 = icmp ult <16 x i32> %induction, + %8 = icmp eq <16 x i32> %.splat, + %9 = or <16 x i1> %4, %5 + %10 = or <16 x i1> %9, %6 + %11 = or <16 x i1> %10, %7 + %12 = or <16 x i1> %11, %8 + %13 = zext <16 x i1> %12 to <16 x i8> + %14 = or <16 x i8> %0, %13 + %15 = shufflevector <16 x i8> %14, <16 x i8> , <16 x i32> + %16 = call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> %15) + %17 = add i32 %c.promoted5, 81 + store i64 -1, ptr @b, align 8 + store i32 9, ptr @d, align 4 + store i32 %17, ptr @c, align 4 + store i8 %16, ptr @a, align 2 + ret void +} diff --git a/llvm/test/Transforms/SLPVectorizer/X86/root-trunc-extract-reuse.ll b/llvm/test/Transforms/SLPVectorizer/root-trunc-extract-reuse.ll similarity index 86% rename from llvm/test/Transforms/SLPVectorizer/X86/root-trunc-extract-reuse.ll rename to llvm/test/Transforms/SLPVectorizer/root-trunc-extract-reuse.ll index af46b4f576234..34c068478c5f5 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/root-trunc-extract-reuse.ll +++ b/llvm/test/Transforms/SLPVectorizer/root-trunc-extract-reuse.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -passes=slp-vectorizer -S -slp-threshold=-100 -mtriple=x86_64 < %s | FileCheck %s +; RUN: %if x86-registered-target %{ opt -passes=slp-vectorizer -S -slp-threshold=-100 -mtriple=x86_64 < %s | FileCheck %s %} +; RUN: %if aarch64-registered-target %{ opt -passes=slp-vectorizer -S -slp-threshold=-100 -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s %} define i1 @test() { ; CHECK-LABEL: @test( diff --git a/llvm/test/Transforms/SLPVectorizer/X86/same-scalar-in-same-phi-extract.ll b/llvm/test/Transforms/SLPVectorizer/same-scalar-in-same-phi-extract.ll similarity index 88% rename from llvm/test/Transforms/SLPVectorizer/X86/same-scalar-in-same-phi-extract.ll rename to llvm/test/Transforms/SLPVectorizer/same-scalar-in-same-phi-extract.ll index f1be11d0d0fc5..fe0813542f309 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/same-scalar-in-same-phi-extract.ll +++ b/llvm/test/Transforms/SLPVectorizer/same-scalar-in-same-phi-extract.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 -; RUN: opt -S --passes=slp-vectorizer -slp-threshold=-99999 -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s +; RUN: %if x86-registered-target %{ opt -S --passes=slp-vectorizer -slp-threshold=-99999 -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s %} +; RUN: %if aarch64-registered-target %{ opt -S --passes=slp-vectorizer -slp-threshold=-99999 -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s %} define void @test(i32 %arg) { ; CHECK-LABEL: define void @test( diff --git a/llvm/test/Transforms/SLPVectorizer/X86/scalarazied-result.ll b/llvm/test/Transforms/SLPVectorizer/scalarazied-result.ll similarity index 60% rename from llvm/test/Transforms/SLPVectorizer/X86/scalarazied-result.ll rename to llvm/test/Transforms/SLPVectorizer/scalarazied-result.ll index 1d6e191c6f97b..2570cdb45e1e7 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/scalarazied-result.ll +++ b/llvm/test/Transforms/SLPVectorizer/scalarazied-result.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -S < %s | FileCheck %s +; RUN: %if x86-registered-target %{ opt -passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -S < %s | FileCheck %s %} +; RUN: %if aarch64-registered-target %{ opt -passes=slp-vectorizer -mtriple=aarch64-unknown-linux-gnu -S < %s | FileCheck %s %} define void @test() { ; CHECK-LABEL: @test( diff --git a/llvm/test/Transforms/SLPVectorizer/X86/scalarization-overhead.ll b/llvm/test/Transforms/SLPVectorizer/scalarization-overhead.ll similarity index 92% rename from llvm/test/Transforms/SLPVectorizer/X86/scalarization-overhead.ll rename to llvm/test/Transforms/SLPVectorizer/scalarization-overhead.ll index 55e155840f858..9f6b285f1ab90 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/scalarization-overhead.ll +++ b/llvm/test/Transforms/SLPVectorizer/scalarization-overhead.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -mtriple=x86_64-- -passes=slp-vectorizer -S < %s | FileCheck %s +; RUN: %if x86-registered-target %{ opt -mtriple=x86_64-- -passes=slp-vectorizer -S < %s | FileCheck %s %} +; RUN: %if aarch64-registered-target %{ opt -mtriple=aarch64-- -passes=slp-vectorizer -S < %s | FileCheck %s %} ; Crash Test case reported on D134605 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/shrink_after_reorder2.ll b/llvm/test/Transforms/SLPVectorizer/shrink_after_reorder2.ll similarity index 91% rename from llvm/test/Transforms/SLPVectorizer/X86/shrink_after_reorder2.ll rename to llvm/test/Transforms/SLPVectorizer/shrink_after_reorder2.ll index 9e3ba05f88da8..2f0bd4a8f1315 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/shrink_after_reorder2.ll +++ b/llvm/test/Transforms/SLPVectorizer/shrink_after_reorder2.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -o - -passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s +; RUN: %if x86-registered-target %{ opt -S -o - -passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s %} +; RUN: %if aarch64-registered-target %{ opt -S -o - -passes=slp-vectorizer -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s %} %class.e = type { i32, i32 } %struct.a = type { i32, i32, i32, i32 } diff --git a/llvm/test/Transforms/SLPVectorizer/X86/shuffle-multivector.ll b/llvm/test/Transforms/SLPVectorizer/shuffle-multivector.ll similarity index 89% rename from llvm/test/Transforms/SLPVectorizer/X86/shuffle-multivector.ll rename to llvm/test/Transforms/SLPVectorizer/shuffle-multivector.ll index c2555889f5981..2253c70dc2501 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/shuffle-multivector.ll +++ b/llvm/test/Transforms/SLPVectorizer/shuffle-multivector.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -passes=slp-vectorizer -S < %s -mtriple=x86_64-unknown-linux -slp-threshold=-163 | FileCheck %s +; RUN: %if x86-registered-target %{ opt -passes=slp-vectorizer -S < %s -mtriple=x86_64-unknown-linux -slp-threshold=-163 | FileCheck %s %} +; RUN: %if aarch64-registered-target %{ opt -passes=slp-vectorizer -S < %s -mtriple=aarch64-unknown-linux -slp-threshold=-163 | FileCheck %s %} define void @test1(i128 %p0, i128 %p1, i128 %p2, i128 %p3, <4 x i128> %vec) { ; CHECK-LABEL: @test1( diff --git a/llvm/test/Transforms/SLPVectorizer/X86/shufflebuilder-bug.ll b/llvm/test/Transforms/SLPVectorizer/shufflebuilder-bug.ll similarity index 89% rename from llvm/test/Transforms/SLPVectorizer/X86/shufflebuilder-bug.ll rename to llvm/test/Transforms/SLPVectorizer/shufflebuilder-bug.ll index 9db7d696c7c7e..019c9eadd7c09 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/shufflebuilder-bug.ll +++ b/llvm/test/Transforms/SLPVectorizer/shufflebuilder-bug.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 -; RUN: opt -S -p slp-vectorizer -mtriple=x86_64-- %s | FileCheck %s +; RUN: %if x86-registered-target %{ opt -S -p slp-vectorizer -mtriple=x86_64-- %s | FileCheck %s %} +; RUN: %if aarch64-registered-target %{ opt -S -p slp-vectorizer -mtriple=aarch64-unknown-linux-gnu %s | FileCheck %s %} define void @foo(<4 x float> %vec, float %val, ptr %ptr) { ; CHECK-LABEL: define void @foo diff --git a/llvm/test/Transforms/SLPVectorizer/X86/stores-non-ordered.ll b/llvm/test/Transforms/SLPVectorizer/stores-non-ordered.ll similarity index 92% rename from llvm/test/Transforms/SLPVectorizer/X86/stores-non-ordered.ll rename to llvm/test/Transforms/SLPVectorizer/stores-non-ordered.ll index a9748ca6291ae..aaa6be73056bd 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/stores-non-ordered.ll +++ b/llvm/test/Transforms/SLPVectorizer/stores-non-ordered.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -S -mtriple=x86_64-unknown -passes=slp-vectorizer -slp-min-reg-size=64 -slp-threshold=-1000 | FileCheck %s +; RUN: %if x86-registered-target %{ opt < %s -S -mtriple=x86_64-unknown -passes=slp-vectorizer -slp-min-reg-size=64 -slp-threshold=-1000 | FileCheck %s %} +; RUN: %if aarch64-registered-target %{ opt < %s -S -mtriple=aarch64-unknown -passes=slp-vectorizer -slp-min-reg-size=64 -slp-threshold=-1000 | FileCheck %s %} define i32 @non-ordered-stores(ptr noalias nocapture %in, ptr noalias nocapture %inn, ptr noalias nocapture %out) { ; CHECK-LABEL: @non-ordered-stores( diff --git a/llvm/test/Transforms/SLPVectorizer/X86/unknown-entries.ll b/llvm/test/Transforms/SLPVectorizer/unknown-entries.ll similarity index 82% rename from llvm/test/Transforms/SLPVectorizer/X86/unknown-entries.ll rename to llvm/test/Transforms/SLPVectorizer/unknown-entries.ll index fc22280c2b8ad..ca9aa451a9a3a 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/unknown-entries.ll +++ b/llvm/test/Transforms/SLPVectorizer/unknown-entries.ll @@ -1,7 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 -; RUN: opt < %s -passes=slp-vectorizer -S | FileCheck %s - -target triple = "x86_64-unknown-linux-gnu" +; RUN: %if x86-registered-target %{ opt < %s -passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -S | FileCheck %s %} +; RUN: %if aarch64-registered-target %{ opt < %s -passes=slp-vectorizer -mtriple=aarch64-unknown-linux-gnu -S | FileCheck %s %} define <3 x i64> @ahyes(i64 %position, i64 %value) { ; CHECK-LABEL: define <3 x i64> @ahyes( diff --git a/llvm/test/Transforms/SLPVectorizer/X86/zext-incoming-for-neg-icmp.ll b/llvm/test/Transforms/SLPVectorizer/zext-incoming-for-neg-icmp.ll similarity index 89% rename from llvm/test/Transforms/SLPVectorizer/X86/zext-incoming-for-neg-icmp.ll rename to llvm/test/Transforms/SLPVectorizer/zext-incoming-for-neg-icmp.ll index 7f086d17ca4c0..89fcc7e983749 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/zext-incoming-for-neg-icmp.ll +++ b/llvm/test/Transforms/SLPVectorizer/zext-incoming-for-neg-icmp.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 -; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s +; RUN: %if x86-registered-target %{ opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s %} +; RUN: %if aarch64-registered-target %{ opt -S --passes=slp-vectorizer -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s %} define i32 @test(i32 %a, i8 %b, i8 %c) { ; CHECK-LABEL: define i32 @test( diff --git a/llvm/test/Transforms/SimplifyCFG/avoid-complex-phi.ll b/llvm/test/Transforms/SimplifyCFG/avoid-complex-phi.ll new file mode 100644 index 0000000000000..c24fae7aa67bb --- /dev/null +++ b/llvm/test/Transforms/SimplifyCFG/avoid-complex-phi.ll @@ -0,0 +1,164 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt < %s -max-phi-entries-increase-after-removing-empty-block=12 -passes=simplifycfg -S | FileCheck --check-prefixes=CHECK-12 %s +; RUN: opt < %s -max-phi-entries-increase-after-removing-empty-block=11 -passes=simplifycfg -S | FileCheck --check-prefixes=CHECK-11 %s +; RUN: opt < %s -max-phi-entries-increase-after-removing-empty-block=4 -passes=simplifycfg -S | FileCheck --check-prefixes=CHECK-4 %s +; +; This test has the following CFG: +; 1. entry has a switch to 4 blocks: B1 - B4 +; 2. For B1 and B2, it branches to B5 and B6 +; 3. For B3 and B4, it branches to B5 and B7 +; 4. In B5, %val is defined as phi taking values from B1 to B4 +; 5. B5, B6, B7 branch to block Merge unconditionally +; 6. Block Merge has 5 phis(%x1 - %x4 and %val_merge). +; +; If we remove B5, %x1 - %x4 will increase the number of phi entries by (4 - 1) * 4 = 12. For %val_merge, since the value taking from B5 +; is defined in B5, it will not increase the number of phi entries (it can be considered as move the entries from %val to +; %val_merge). Therefore, removing B5 will increase the number of phi entries by 12 (not (4 - 1) * 5 = 15). +; +; If we remove B6 / B7, it will increase the number of phi entries by (2 - 1) * 5 = 5. +; +; In the first test, max-phi-entries-increase-after-removing-empty-block is set to be 12, then B5 will be removed. +; In the second test, max-phi-entries-increase-after-removing-empty-block is set to be 11, then B5 should not be removed, +; but B6 and B7 can be removed. +; In the third test, max-phi-entries-increase-after-removing-empty-block is set to be 4, then no BB can be removed. +; +define void @foo(i32 %a, i32 %val1, i32 %val2, i32 %val3, i32 %val4) { +; CHECK-12-LABEL: define void @foo( +; CHECK-12-SAME: i32 [[A:%.*]], i32 [[VAL1:%.*]], i32 [[VAL2:%.*]], i32 [[VAL3:%.*]], i32 [[VAL4:%.*]]) { +; CHECK-12-NEXT: [[ENTRY:.*:]] +; CHECK-12-NEXT: switch i32 [[A]], label %[[B1:.*]] [ +; CHECK-12-NEXT: i32 4, label %[[B4:.*]] +; CHECK-12-NEXT: i32 2, label %[[B2:.*]] +; CHECK-12-NEXT: i32 3, label %[[B3:.*]] +; CHECK-12-NEXT: ] +; CHECK-12: [[B1]]: +; CHECK-12-NEXT: [[CMP1:%.*]] = icmp eq i32 [[VAL1]], 1 +; CHECK-12-NEXT: br i1 [[CMP1]], label %[[B6:.*]], label %[[MERGE:.*]] +; CHECK-12: [[B2]]: +; CHECK-12-NEXT: [[CMP2:%.*]] = icmp eq i32 [[VAL2]], 2 +; CHECK-12-NEXT: br i1 [[CMP2]], label %[[B6]], label %[[MERGE]] +; CHECK-12: [[B3]]: +; CHECK-12-NEXT: [[CMP3:%.*]] = icmp eq i32 [[VAL3]], 3 +; CHECK-12-NEXT: br i1 [[CMP3]], label %[[B7:.*]], label %[[MERGE]] +; CHECK-12: [[B4]]: +; CHECK-12-NEXT: [[CMP4:%.*]] = icmp eq i32 [[VAL4]], 4 +; CHECK-12-NEXT: br i1 [[CMP4]], label %[[B7]], label %[[MERGE]] +; CHECK-12: [[B6]]: +; CHECK-12-NEXT: br label %[[MERGE]] +; CHECK-12: [[B7]]: +; CHECK-12-NEXT: br label %[[MERGE]] +; CHECK-12: [[MERGE]]: +; CHECK-12-NEXT: [[X1:%.*]] = phi i16 [ 0, %[[B6]] ], [ 2, %[[B7]] ], [ 1, %[[B4]] ], [ 1, %[[B3]] ], [ 1, %[[B2]] ], [ 1, %[[B1]] ] +; CHECK-12-NEXT: [[X2:%.*]] = phi i16 [ 0, %[[B6]] ], [ 2, %[[B7]] ], [ 2, %[[B4]] ], [ 2, %[[B3]] ], [ 2, %[[B2]] ], [ 2, %[[B1]] ] +; CHECK-12-NEXT: [[X3:%.*]] = phi i16 [ 0, %[[B6]] ], [ 2, %[[B7]] ], [ 3, %[[B4]] ], [ 3, %[[B3]] ], [ 3, %[[B2]] ], [ 3, %[[B1]] ] +; CHECK-12-NEXT: [[X4:%.*]] = phi i16 [ 0, %[[B6]] ], [ 2, %[[B7]] ], [ 4, %[[B4]] ], [ 4, %[[B3]] ], [ 4, %[[B2]] ], [ 4, %[[B1]] ] +; CHECK-12-NEXT: [[VAL_MERGE:%.*]] = phi i32 [ 0, %[[B6]] ], [ 2, %[[B7]] ], [ [[VAL1]], %[[B1]] ], [ [[VAL2]], %[[B2]] ], [ [[VAL3]], %[[B3]] ], [ [[VAL4]], %[[B4]] ] +; CHECK-12-NEXT: ret void +; +; CHECK-11-LABEL: define void @foo( +; CHECK-11-SAME: i32 [[A:%.*]], i32 [[VAL1:%.*]], i32 [[VAL2:%.*]], i32 [[VAL3:%.*]], i32 [[VAL4:%.*]]) { +; CHECK-11-NEXT: [[ENTRY:.*:]] +; CHECK-11-NEXT: switch i32 [[A]], label %[[B1:.*]] [ +; CHECK-11-NEXT: i32 4, label %[[B4:.*]] +; CHECK-11-NEXT: i32 2, label %[[B2:.*]] +; CHECK-11-NEXT: i32 3, label %[[B3:.*]] +; CHECK-11-NEXT: ] +; CHECK-11: [[B1]]: +; CHECK-11-NEXT: [[CMP1:%.*]] = icmp eq i32 [[VAL1]], 1 +; CHECK-11-NEXT: br i1 [[CMP1]], label %[[MERGE:.*]], label %[[B5:.*]] +; CHECK-11: [[B2]]: +; CHECK-11-NEXT: [[CMP2:%.*]] = icmp eq i32 [[VAL2]], 2 +; CHECK-11-NEXT: br i1 [[CMP2]], label %[[MERGE]], label %[[B5]] +; CHECK-11: [[B3]]: +; CHECK-11-NEXT: [[CMP3:%.*]] = icmp eq i32 [[VAL3]], 3 +; CHECK-11-NEXT: br i1 [[CMP3]], label %[[MERGE]], label %[[B5]] +; CHECK-11: [[B4]]: +; CHECK-11-NEXT: [[CMP4:%.*]] = icmp eq i32 [[VAL4]], 4 +; CHECK-11-NEXT: br i1 [[CMP4]], label %[[MERGE]], label %[[B5]] +; CHECK-11: [[B5]]: +; CHECK-11-NEXT: [[VAL:%.*]] = phi i32 [ [[VAL1]], %[[B1]] ], [ [[VAL2]], %[[B2]] ], [ [[VAL3]], %[[B3]] ], [ [[VAL4]], %[[B4]] ] +; CHECK-11-NEXT: br label %[[MERGE]] +; CHECK-11: [[MERGE]]: +; CHECK-11-NEXT: [[X1:%.*]] = phi i16 [ 1, %[[B5]] ], [ 0, %[[B2]] ], [ 0, %[[B1]] ], [ 2, %[[B4]] ], [ 2, %[[B3]] ] +; CHECK-11-NEXT: [[X2:%.*]] = phi i16 [ 2, %[[B5]] ], [ 0, %[[B2]] ], [ 0, %[[B1]] ], [ 2, %[[B4]] ], [ 2, %[[B3]] ] +; CHECK-11-NEXT: [[X3:%.*]] = phi i16 [ 3, %[[B5]] ], [ 0, %[[B2]] ], [ 0, %[[B1]] ], [ 2, %[[B4]] ], [ 2, %[[B3]] ] +; CHECK-11-NEXT: [[X4:%.*]] = phi i16 [ 4, %[[B5]] ], [ 0, %[[B2]] ], [ 0, %[[B1]] ], [ 2, %[[B4]] ], [ 2, %[[B3]] ] +; CHECK-11-NEXT: [[VAL_MERGE:%.*]] = phi i32 [ [[VAL]], %[[B5]] ], [ 0, %[[B2]] ], [ 0, %[[B1]] ], [ 2, %[[B4]] ], [ 2, %[[B3]] ] +; CHECK-11-NEXT: ret void +; +; CHECK-4-LABEL: define void @foo( +; CHECK-4-SAME: i32 [[A:%.*]], i32 [[VAL1:%.*]], i32 [[VAL2:%.*]], i32 [[VAL3:%.*]], i32 [[VAL4:%.*]]) { +; CHECK-4-NEXT: [[ENTRY:.*:]] +; CHECK-4-NEXT: switch i32 [[A]], label %[[B1:.*]] [ +; CHECK-4-NEXT: i32 4, label %[[B4:.*]] +; CHECK-4-NEXT: i32 2, label %[[B2:.*]] +; CHECK-4-NEXT: i32 3, label %[[B3:.*]] +; CHECK-4-NEXT: ] +; CHECK-4: [[B1]]: +; CHECK-4-NEXT: [[CMP1:%.*]] = icmp eq i32 [[VAL1]], 1 +; CHECK-4-NEXT: br i1 [[CMP1]], label %[[B6:.*]], label %[[B5:.*]] +; CHECK-4: [[B2]]: +; CHECK-4-NEXT: [[CMP2:%.*]] = icmp eq i32 [[VAL2]], 2 +; CHECK-4-NEXT: br i1 [[CMP2]], label %[[B6]], label %[[B5]] +; CHECK-4: [[B3]]: +; CHECK-4-NEXT: [[CMP3:%.*]] = icmp eq i32 [[VAL3]], 3 +; CHECK-4-NEXT: br i1 [[CMP3]], label %[[B7:.*]], label %[[B5]] +; CHECK-4: [[B4]]: +; CHECK-4-NEXT: [[CMP4:%.*]] = icmp eq i32 [[VAL4]], 4 +; CHECK-4-NEXT: br i1 [[CMP4]], label %[[B7]], label %[[B5]] +; CHECK-4: [[B5]]: +; CHECK-4-NEXT: [[VAL:%.*]] = phi i32 [ [[VAL1]], %[[B1]] ], [ [[VAL2]], %[[B2]] ], [ [[VAL3]], %[[B3]] ], [ [[VAL4]], %[[B4]] ] +; CHECK-4-NEXT: br label %[[MERGE:.*]] +; CHECK-4: [[B6]]: +; CHECK-4-NEXT: br label %[[MERGE]] +; CHECK-4: [[B7]]: +; CHECK-4-NEXT: br label %[[MERGE]] +; CHECK-4: [[MERGE]]: +; CHECK-4-NEXT: [[X1:%.*]] = phi i16 [ 1, %[[B5]] ], [ 0, %[[B6]] ], [ 2, %[[B7]] ] +; CHECK-4-NEXT: [[X2:%.*]] = phi i16 [ 2, %[[B5]] ], [ 0, %[[B6]] ], [ 2, %[[B7]] ] +; CHECK-4-NEXT: [[X3:%.*]] = phi i16 [ 3, %[[B5]] ], [ 0, %[[B6]] ], [ 2, %[[B7]] ] +; CHECK-4-NEXT: [[X4:%.*]] = phi i16 [ 4, %[[B5]] ], [ 0, %[[B6]] ], [ 2, %[[B7]] ] +; CHECK-4-NEXT: [[VAL_MERGE:%.*]] = phi i32 [ [[VAL]], %[[B5]] ], [ 0, %[[B6]] ], [ 2, %[[B7]] ] +; CHECK-4-NEXT: ret void +; +entry: + switch i32 %a, label %B1 [ + i32 4, label %B4 + i32 2, label %B2 + i32 3, label %B3 + ] + +B1: ; preds = %entry + %cmp1 = icmp eq i32 %val1, 1 + br i1 %cmp1, label %B6, label %B5 + +B2: ; preds = %entry + %cmp2 = icmp eq i32 %val2, 2 + br i1 %cmp2, label %B6, label %B5 + +B3: ; preds = %entry + %cmp3 = icmp eq i32 %val3, 3 + br i1 %cmp3, label %B7, label %B5 + +B4: ; preds = %entry + %cmp4 = icmp eq i32 %val4, 4 + br i1 %cmp4, label %B7, label %B5 + +B5: ; preds = %B4, %B3, %B2, %B1 + %val = phi i32 [ %val1, %B1 ], [ %val2, %B2 ], [ %val3, %B3 ], [ %val4, %B4 ] + br label %Merge + +B6: ; preds = %B2, %B1 + br label %Merge + +B7: ; preds = %B4, %B3 + br label %Merge + +Merge: ; preds = %B7, %B6, %B5 + %x1 = phi i16 [ 1, %B5 ], [ 0, %B6 ], [ 2, %B7 ] + %x2 = phi i16 [ 2, %B5 ], [ 0, %B6 ], [ 2, %B7 ] + %x3 = phi i16 [ 3, %B5 ], [ 0, %B6 ], [ 2, %B7 ] + %x4 = phi i16 [ 4, %B5 ], [ 0, %B6 ], [ 2, %B7 ] + %val_merge = phi i32 [ %val, %B5 ], [ 0, %B6 ], [ 2, %B7 ] + ret void +} diff --git a/llvm/test/Transforms/StructurizeCFG/structurizer-keep-perf-md.ll b/llvm/test/Transforms/StructurizeCFG/structurizer-keep-perf-md.ll new file mode 100644 index 0000000000000..cdf5ca569701b --- /dev/null +++ b/llvm/test/Transforms/StructurizeCFG/structurizer-keep-perf-md.ll @@ -0,0 +1,76 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S -mtriple=amdgcn-- -passes=structurizecfg %s | FileCheck -check-prefix=OPT %s + +define amdgpu_ps i32 @if_else(i32 %0) { +; OPT-LABEL: define amdgpu_ps i32 @if_else( +; OPT-SAME: i32 [[TMP0:%.*]]) { +; OPT-NEXT: [[C:%.*]] = icmp ne i32 [[TMP0]], 0 +; OPT-NEXT: br i1 [[C]], label %[[FALSE:.*]], label %[[FLOW:.*]], !prof [[PROF0:![0-9]+]] +; OPT: [[FLOW]]: +; OPT-NEXT: [[TMP2:%.*]] = phi i32 [ 33, %[[FALSE]] ], [ undef, [[TMP1:%.*]] ] +; OPT-NEXT: [[TMP3:%.*]] = phi i1 [ false, %[[FALSE]] ], [ true, [[TMP1]] ] +; OPT-NEXT: br i1 [[TMP3]], label %[[TRUE:.*]], label %[[EXIT:.*]] +; OPT: [[TRUE]]: +; OPT-NEXT: br label %[[EXIT]] +; OPT: [[FALSE]]: +; OPT-NEXT: br label %[[FLOW]] +; OPT: [[EXIT]]: +; OPT-NEXT: [[RET:%.*]] = phi i32 [ [[TMP2]], %[[FLOW]] ], [ 42, %[[TRUE]] ] +; OPT-NEXT: ret i32 [[RET]] +; + %c = icmp eq i32 %0, 0 + br i1 %c, label %true, label %false, !prof !0 + +true: ; preds = %1 + br label %exit + +false: ; preds = %1 + br label %exit + +exit: ; preds = %false, %true + %ret = phi i32 [ 42, %true ], [ 33, %false ] + ret i32 %ret +} + +define amdgpu_ps void @loop_if_break(i32 %n) { +; OPT-LABEL: define amdgpu_ps void @loop_if_break( +; OPT-SAME: i32 [[N:%.*]]) { +; OPT-NEXT: [[ENTRY:.*]]: +; OPT-NEXT: br label %[[LOOP:.*]] +; OPT: [[LOOP]]: +; OPT-NEXT: [[I:%.*]] = phi i32 [ [[N]], %[[ENTRY]] ], [ [[TMP0:%.*]], %[[FLOW:.*]] ] +; OPT-NEXT: [[C:%.*]] = icmp ugt i32 [[I]], 0 +; OPT-NEXT: br i1 [[C]], label %[[LOOP_BODY:.*]], label %[[FLOW]], !prof [[PROF1:![0-9]+]] +; OPT: [[LOOP_BODY]]: +; OPT-NEXT: [[I_NEXT:%.*]] = sub i32 [[I]], 1 +; OPT-NEXT: br label %[[FLOW]] +; OPT: [[FLOW]]: +; OPT-NEXT: [[TMP0]] = phi i32 [ [[I_NEXT]], %[[LOOP_BODY]] ], [ undef, %[[LOOP]] ] +; OPT-NEXT: [[TMP1:%.*]] = phi i1 [ false, %[[LOOP_BODY]] ], [ true, %[[LOOP]] ] +; OPT-NEXT: br i1 [[TMP1]], label %[[EXIT:.*]], label %[[LOOP]] +; OPT: [[EXIT]]: +; OPT-NEXT: ret void +; +entry: + br label %loop + +loop: ; preds = %loop_body, %entry + %i = phi i32 [ %n, %entry ], [ %i.next, %loop_body ] + %c = icmp ugt i32 %i, 0 + br i1 %c, label %loop_body, label %exit, !prof !0 + +loop_body: ; preds = %loop + %i.next = sub i32 %i, 1 + br label %loop + +exit: ; preds = %loop + ret void +} + +attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } + +!0 = !{!"branch_weights", i32 1000, i32 1} +;. +; OPT: [[PROF0]] = !{!"branch_weights", i32 1, i32 1000} +; OPT: [[PROF1]] = !{!"branch_weights", i32 1000, i32 1} +;. diff --git a/llvm/test/tools/dsymutil/X86/dwarf5-many-include-directories.test b/llvm/test/tools/dsymutil/X86/dwarf5-many-include-directories.test new file mode 100644 index 0000000000000..644eecd26d8af --- /dev/null +++ b/llvm/test/tools/dsymutil/X86/dwarf5-many-include-directories.test @@ -0,0 +1,213 @@ +# RUN: rm -rf %t && mkdir -p %t +# RUN: split-file %s %t +# RUN: %python %t/all.py > %t/all.ll +# RUN: sed 's@---TEMPORARY_DIR---@%{/t:regex_replacement}@' %t/debug.map.template > %t/debug.map +# RUN: %llc_dwarf -mtriple x86_64-apple-macosx10.4.0 -o %t/all.o -filetype=obj %t/all.ll +# RUN: dsymutil -f -y %t/debug.map -o - | llvm-dwarfdump -debug-line - | FileCheck %s +# RUN: dsymutil --linker parallel -f -y %t/debug.map -o - | llvm-dwarfdump -debug-line - | tee %t/output.txt | FileCheck %s + +# CHECK: include_directories[255] = "/tmp/tmp.0HPkdttdoU/d254" +# CHECK-NEXT: include_directories[256] = "/tmp/tmp.0HPkdttdoU/d255" +# CHECK-NEXT: include_directories[257] = "/tmp/tmp.0HPkdttdoU/d256" + +# CHECK: dir_index: 255 +# CHECK: dir_index: 256 +# CHECK: dir_index: 257 + +# Original file generated doing the following (fish shell): +# - for cnt in (seq 0 256); mkdir -p d$cnt ; printf "void func$cnd() {}\n#define FUNC$cnt func$cnt()\n" >> d$cnt/f$cnt.c ; end +# - for cnt in (seq 0 256); printf "#include \"f$cnt.c\"" >> all.c ; end +# - printf "void all() {\n" >> all.c +# - for cnt in (seq 0 256); printf "FUNC$cnt;\n" >> all.c ; end +# - printf "}\n" >> all.c +# - clang -target x86_64-apple-macos -S -emit-llvm -gdwarf-5 -o all.ll all.c (for cnt in (seq 0 256); echo "-Id$cnt"; end) +# - Edit all.ll manually and change all DIFile so the directory in filename is +# moved into the directory field. +# - Transformed into Python manually. + +#--- all.py +import math +import string + +PROLOGUE = string.Template("""\ +; ModuleID = 'all.c' +source_filename = "all.c" +target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.4.0" +""") + +FUNCTION = string.Template("""\ +; Function Attrs: noinline nounwind optnone uwtable +define void @func$idx() #0 !dbg !$dbg_reference_subprogram { + ret void, !dbg !$dbg_reference_location_ret +} +""") + +ALL_FUNCTION_PROLOGUE = string.Template("""\ +; Function Attrs: noinline nounwind optnone uwtable +define void @all() #0 !dbg !$dbg_reference_subprogram { +""") + +ALL_FUNCTION_CALL = string.Template("""\ + call void @func$idx(), !dbg !$dbg_reference_location_call +""") + +ALL_FUNCTION_EPILOGUE = string.Template("""\ + ret void, !dbg !$dbg_reference_location_ret +} +""") + +DWARF_PROLOGUE = string.Template("""\ +attributes #0 = { noinline nounwind optnone uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="core2" "target-features"="+cmov,+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+ssse3,+x87" "tune-cpu"="generic" } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!2, !3, !4, !5, !6, !7} +!llvm.ident = !{!8} + +!0 = distinct !DICompileUnit(language: DW_LANG_C11, file: !1, producer: "clang version 18.1.6 (CentOS 18.1.6-3.el9)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: Apple, sysroot: "/") +!1 = !DIFile(filename: "all.c", directory: "/tmp/tmp.0HPkdttdoU", checksumkind: CSK_MD5, checksum: "8b5068f097f0c272ddc808ed2d82cb12") +!2 = !{i32 7, !"Dwarf Version", i32 5} +!3 = !{i32 2, !"Debug Info Version", i32 3} +!4 = !{i32 1, !"wchar_size", i32 4} +!5 = !{i32 8, !"PIC Level", i32 2} +!6 = !{i32 7, !"uwtable", i32 2} +!7 = !{i32 7, !"frame-pointer", i32 2} +!8 = !{!"clang version 18.1.6 (CentOS 18.1.6-3.el9)"} +""") + +DWARF_FUNCTION_WITH_TYPE = string.Template("""\ +!$dbg_reference_subprogram = distinct !DISubprogram(name: "func$idx", scope: !$dbg_reference_file, file: !$dbg_reference_file, line: 1, type: !11, scopeLine: 1, spFlags: DISPFlagDefinition, unit: !0) +!$dbg_reference_file = !DIFile(filename: "f$idx.c", directory: "/tmp/tmp.0HPkdttdoU/d$idx", checksumkind: CSK_MD5, checksum: "01234567890123456789012345678901") +!11 = !DISubroutineType(types: !12) +!12 = !{null} +!$dbg_reference_location = !DILocation(line: 1, column: $column, scope: !$dbg_reference_subprogram) +""") + +DWARF_FUNCTION = string.Template("""\ +!$dbg_reference_subprogram = distinct !DISubprogram(name: "func$idx", scope: !$dbg_reference_file, file: !$dbg_reference_file, line: 1, type: !11, scopeLine: 1, spFlags: DISPFlagDefinition, unit: !0) +!$dbg_reference_file = !DIFile(filename: "f$idx.c", directory: "/tmp/tmp.0HPkdttdoU/d$idx", checksumkind: CSK_MD5, checksum: "01234567890123456789012345678901") +!$dbg_reference_location = !DILocation(line: 1, column: $column, scope: !$dbg_reference_subprogram) +""") + +DWARF_ALL_FUNCTION_PROLOGUE = string.Template("""\ +!$dbg_reference_subprogram = distinct !DISubprogram(name: "all", scope: !1, file: !1, line: $line_number, type: !11, scopeLine: $line_number, spFlags: DISPFlagDefinition, unit: !0) +""") + +DWARF_ALL_FUNCTION_LOCATION = string.Template("""\ +!$dbg_reference_location = !DILocation(line: $line_number, column: 1, scope: !$dbg_reference_subprogram) +""") + +NUM_FUNCS = 257 + +dbg_reference_subprogram = 9 +dbg_reference_file = 10 +dbg_reference_location = 13 +column_base = 15 +functions = [] +dwarf_subprograms = [] + +first = True +for idx in range(NUM_FUNCS): + functions.append( + FUNCTION.substitute( + idx=idx, + dbg_reference_subprogram=dbg_reference_subprogram, + dbg_reference_location_ret=dbg_reference_location, + ) + ) + if first: + dwarf_subprograms.append( + DWARF_FUNCTION_WITH_TYPE.substitute( + idx=idx, + dbg_reference_subprogram=dbg_reference_subprogram, + dbg_reference_file=dbg_reference_file, + dbg_reference_location=dbg_reference_location, + column=column_base, + ) + ) + else: + dwarf_subprograms.append( + DWARF_FUNCTION.substitute( + idx=idx, + dbg_reference_subprogram=dbg_reference_subprogram, + dbg_reference_file=dbg_reference_file, + dbg_reference_location=dbg_reference_location, + column=column_base + math.floor(math.log10(idx)), + ) + ) + + dbg_reference_subprogram += 5 if first else 3 + dbg_reference_file += 5 if first else 3 + dbg_reference_location += 3 + first = False + +dbg_reference_location = dbg_reference_subprogram + 1 +line_number = 258 +all_function = [] +dwarf_all_subprogram = [] + +all_function.append( + ALL_FUNCTION_PROLOGUE.substitute( + dbg_reference_subprogram=dbg_reference_subprogram + ) +) +dwarf_all_subprogram.append( + DWARF_ALL_FUNCTION_PROLOGUE.substitute( + dbg_reference_subprogram=dbg_reference_subprogram, + line_number=line_number + ) +) +line_number += 1 + +for idx in range(NUM_FUNCS): + all_function.append( + ALL_FUNCTION_CALL.substitute( + idx=idx, + dbg_reference_location_call=dbg_reference_location, + ) + ) + dwarf_all_subprogram.append( + DWARF_ALL_FUNCTION_LOCATION.substitute( + dbg_reference_location=dbg_reference_location, + line_number=line_number, + dbg_reference_subprogram=dbg_reference_subprogram, + ) + ) + + dbg_reference_location += 1 + line_number += 1 + +all_function.append( + ALL_FUNCTION_EPILOGUE.substitute( + dbg_reference_location_ret=dbg_reference_location + ) +) +dwarf_all_subprogram.append( + DWARF_ALL_FUNCTION_LOCATION.substitute( + dbg_reference_location=dbg_reference_location, + line_number=line_number, + dbg_reference_subprogram=dbg_reference_subprogram, + ) +) + +print(PROLOGUE.substitute()) +for function in functions: + print(function) +for all_function_piece in all_function: + print(all_function_piece, end='') +print() +print(DWARF_PROLOGUE.substitute(), end='') +for dwarf_subprogram in dwarf_subprograms: + print(dwarf_subprogram, end='') +for dwarf_all_subprogram_piece in dwarf_all_subprogram: + print(dwarf_all_subprogram_piece, end='') +print() + +#--- debug.map.template +--- +triple: 'x86_64-apple-darwin' +objects: + - filename: ---TEMPORARY_DIR---/all.o + symbols: + - { sym: _all, objAddr: 0x0, binAddr: 0x0, size: 0x0 } +... diff --git a/llvm/test/tools/llvm-objdump/ELF/AArch64/zeroes.test b/llvm/test/tools/llvm-objdump/ELF/AArch64/zeroes.test new file mode 100644 index 0000000000000..a56d056f8a225 --- /dev/null +++ b/llvm/test/tools/llvm-objdump/ELF/AArch64/zeroes.test @@ -0,0 +1,66 @@ +## Test zero dumping when a data mapping symbol is active. +# RUN: llvm-mc -filetype=obj -triple=aarch64 %s -o %t +# RUN: llvm-objdump -t -d %t | FileCheck %s + +# CHECK: SYMBOL TABLE: +# CHECK-NEXT: 0000000000000000 l .text 0000000000000000 $d +# CHECK-NEXT: 000000000000000c l .text 0000000000000000 $x +# CHECK-NEXT: 0000000000000010 l .text 0000000000000000 $d + +# CHECK: 0000000000000000 <_start>: +# CHECK-NEXT: ... +# CHECK-NEXT: 8: 01 00 00 00 .word 0x00000001 +# CHECK-NEXT: c: d503201f nop +# CHECK-NEXT: ... +# CHECK-NEXT: 18: d503201f nop +# CHECK-NEXT: ... +# CHECK-NEXT: 2c: d503201f nop +# CHECK-NEXT: ... +# CHECK-NEXT: 48: d503201f nop + +# RUN: llvm-objdump -d -z %t | FileCheck %s --check-prefix=ZERO + +# ZERO: 0000000000000000 <_start>: +# ZERO-NEXT: 0: 00 00 00 00 .word 0x00000000 +# ZERO-NEXT: 4: 00 00 00 00 .word 0x00000000 +# ZERO-NEXT: 8: 01 00 00 00 .word 0x00000001 +# ZERO-NEXT: c: d503201f nop +# ZERO-NEXT: 10: 00 00 00 00 .word 0x00000000 +# ZERO-NEXT: 14: 00 00 00 00 .word 0x00000000 +# ZERO-NEXT: 18: d503201f nop + +## Check we do not skip zeroes blocks if have relocations pointed to these places. +# RUN: llvm-objdump -d -r %t | FileCheck %s --check-prefix=RELOC + +# RELOC: 0000000000000000 <_start>: +# RELOC-NEXT: ... +# RELOC-NEXT: 8: 01 00 00 00 .word 0x00000001 +# RELOC-NEXT: c: d503201f nop +# RELOC-NEXT: ... +# RELOC-NEXT: 18: d503201f nop +# RELOC-NEXT: 1c: 00 00 00 00 .word 0x00000000 +# RELOC-NEXT: 000000000000001c: R_AARCH64_ABS64 x1 +# RELOC-NEXT: ... +# RELOC-NEXT: 2c: d503201f nop +# RELOC-NEXT: ... +# RELOC-NEXT: 38: 00 00 00 00 .word 0x00000000 +# RELOC-NEXT: 0000000000000038: R_AARCH64_ABS64 x2 +# RELOC-NEXT: ... +# RELOC-NEXT: 48: d503201f nop + +.globl _start +_start: + .space 8 + .long 1 + nop + .space 8 + nop + + .quad x1 + .space 8 + nop + + .space 8 + .quad x2 + .space 8 + nop diff --git a/llvm/test/tools/llvm-objdump/ELF/ARM/zeroes.test b/llvm/test/tools/llvm-objdump/ELF/ARM/zeroes.test new file mode 100644 index 0000000000000..8601343bd146e --- /dev/null +++ b/llvm/test/tools/llvm-objdump/ELF/ARM/zeroes.test @@ -0,0 +1,47 @@ +## Test zero dumping when a data mapping symbol is active. +# RUN: llvm-mc -filetype=obj -triple=armv7 %s -o %t +# RUN: llvm-objdump -t -d %t | FileCheck %s + +# CHECK: SYMBOL TABLE: +# CHECK-NEXT: 00000000 l .text 00000000 $d +# CHECK-NEXT: 0000000c l .text 00000000 $a +# CHECK-NEXT: 00000010 l .text 00000000 $d + +# CHECK: 00000000 <_start>: +# CHECK-NEXT: ... +# CHECK-NEXT: 8: 01 00 00 00 .word 0x00000001 +# CHECK-NEXT: c: e320f000 +# CHECK-NEXT: ... +# CHECK-NEXT: 18: e320f000 +# CHECK-NEXT: ... +# CHECK-NEXT: 28: e320f000 +# CHECK-NEXT: ... +# CHECK-NEXT: 40: e320f000 + +# RUN: llvm-objdump -d -z --triple=armv7 %t | FileCheck %s --check-prefix=ZERO + +# ZERO: 00000000 <_start>: +# ZERO-NEXT: 0: 00 00 00 00 .word 0x00000000 +# ZERO-NEXT: 4: 00 00 00 00 .word 0x00000000 +# ZERO-NEXT: 8: 01 00 00 00 .word 0x00000001 +# ZERO-NEXT: c: e320f000 nop +# ZERO-NEXT: 10: 00 00 00 00 .word 0x00000000 +# ZERO-NEXT: 14: 00 00 00 00 .word 0x00000000 +# ZERO-NEXT: 18: e320f000 nop + +.globl _start +_start: + .space 8 + .long 1 + nop + .space 8 + nop + + .long x1 + .space 8 + nop + + .space 8 + .long x2 + .space 8 + nop diff --git a/llvm/tools/gold/gold-plugin.cpp b/llvm/tools/gold/gold-plugin.cpp index 0b175a3852e42..0377791d85b3f 100644 --- a/llvm/tools/gold/gold-plugin.cpp +++ b/llvm/tools/gold/gold-plugin.cpp @@ -1057,9 +1057,11 @@ static std::vector, bool>> runLTO() { getThinLTOOldAndNewSuffix(OldSuffix, NewSuffix); for (claimed_file &F : Modules) { - if (options::thinlto && !HandleToInputFile.count(F.leader_handle)) - HandleToInputFile.insert(std::make_pair( - F.leader_handle, std::make_unique(F.handle))); + if (options::thinlto) { + auto [It, Inserted] = HandleToInputFile.try_emplace(F.leader_handle); + if (Inserted) + It->second = std::make_unique(F.handle); + } // In case we are thin linking with a minimized bitcode file, ensure // the module paths encoded in the index reflect where the backends // will locate the full bitcode files for compiling/importing. diff --git a/llvm/tools/llvm-cov/SourceCoverageViewText.cpp b/llvm/tools/llvm-cov/SourceCoverageViewText.cpp index cab60c2d9034e..8b93b592910b3 100644 --- a/llvm/tools/llvm-cov/SourceCoverageViewText.cpp +++ b/llvm/tools/llvm-cov/SourceCoverageViewText.cpp @@ -179,7 +179,7 @@ void SourceCoverageViewText::renderLine(raw_ostream &OS, LineRef L, unsigned Col = 1; for (const auto *S : Segments) { unsigned End = std::min(S->Col, static_cast(Line.size()) + 1); - colored_ostream(OS, Highlight ? *Highlight : raw_ostream::SAVEDCOLOR, + colored_ostream(OS, Highlight.value_or(raw_ostream::SAVEDCOLOR), getOptions().Colors && Highlight, /*Bold=*/false, /*BG=*/true) << Line.substr(Col - 1, End - Col); @@ -196,7 +196,7 @@ void SourceCoverageViewText::renderLine(raw_ostream &OS, LineRef L, } // Show the rest of the line. - colored_ostream(OS, Highlight ? *Highlight : raw_ostream::SAVEDCOLOR, + colored_ostream(OS, Highlight.value_or(raw_ostream::SAVEDCOLOR), getOptions().Colors && Highlight, /*Bold=*/false, /*BG=*/true) << Line.substr(Col - 1, Line.size() - Col + 1); OS << '\n'; diff --git a/llvm/tools/llvm-extract/llvm-extract.cpp b/llvm/tools/llvm-extract/llvm-extract.cpp index 4ee644f1e2906..5fc9a31ab4ad7 100644 --- a/llvm/tools/llvm-extract/llvm-extract.cpp +++ b/llvm/tools/llvm-extract/llvm-extract.cpp @@ -297,9 +297,8 @@ int main(int argc, char **argv) { Function *CF = CB->getCalledFunction(); if (!CF) continue; - if (CF->isDeclaration() || GVs.count(CF)) + if (CF->isDeclaration() || !GVs.insert(CF)) continue; - GVs.insert(CF); Workqueue.push_back(CF); } } diff --git a/llvm/tools/llvm-ifs/llvm-ifs.cpp b/llvm/tools/llvm-ifs/llvm-ifs.cpp index 169f601d459e3..b76ea8dec0c98 100644 --- a/llvm/tools/llvm-ifs/llvm-ifs.cpp +++ b/llvm/tools/llvm-ifs/llvm-ifs.cpp @@ -441,12 +441,9 @@ int llvm_ifs_main(int argc, char **argv, const llvm::ToolContext &) { } for (auto Symbol : TargetStub->Symbols) { - auto SI = SymbolMap.find(Symbol.Name); - if (SI == SymbolMap.end()) { - SymbolMap.insert( - std::pair(Symbol.Name, Symbol)); + auto [SI, Inserted] = SymbolMap.try_emplace(Symbol.Name, Symbol); + if (Inserted) continue; - } assert(Symbol.Name == SI->second.Name && "Symbol Names Must Match."); diff --git a/llvm/tools/llvm-link/llvm-link.cpp b/llvm/tools/llvm-link/llvm-link.cpp index 317b6e20f64cf..34bb6ce30b766 100644 --- a/llvm/tools/llvm-link/llvm-link.cpp +++ b/llvm/tools/llvm-link/llvm-link.cpp @@ -330,8 +330,8 @@ static bool importFunctions(const char *argv0, Module &DestModule) { auto ModuleLoader = [&DestModule](const char *argv0, const std::string &Identifier) { - std::unique_ptr Buffer = - ExitOnErr(errorOrToExpected(MemoryBuffer::getFileOrSTDIN(Identifier))); + std::unique_ptr Buffer = ExitOnErr(errorOrToExpected( + MemoryBuffer::getFileOrSTDIN(Identifier, /*IsText=*/true))); return loadFile(argv0, std::move(Buffer), DestModule.getContext(), false); }; @@ -402,7 +402,7 @@ static bool linkFiles(const char *argv0, LLVMContext &Context, Linker &L, // Similar to some flags, internalization doesn't apply to the first file. bool InternalizeLinkedSymbols = false; for (const auto &File : Files) { - auto BufferOrErr = MemoryBuffer::getFileOrSTDIN(File); + auto BufferOrErr = MemoryBuffer::getFileOrSTDIN(File, /*IsText=*/true); // When we encounter a missing file, make sure we expose its name. if (auto EC = BufferOrErr.getError()) diff --git a/llvm/tools/llvm-objdump/SourcePrinter.cpp b/llvm/tools/llvm-objdump/SourcePrinter.cpp index 7099390f24233..600bd6aa4d51e 100644 --- a/llvm/tools/llvm-objdump/SourcePrinter.cpp +++ b/llvm/tools/llvm-objdump/SourcePrinter.cpp @@ -344,7 +344,8 @@ bool SourcePrinter::cacheSource(const DILineInfo &LineInfo) { if (LineInfo.Source) { Buffer = MemoryBuffer::getMemBuffer(*LineInfo.Source); } else { - auto BufferOrError = MemoryBuffer::getFile(LineInfo.FileName); + auto BufferOrError = + MemoryBuffer::getFile(LineInfo.FileName, /*IsText=*/true); if (!BufferOrError) { if (MissingSources.insert(LineInfo.FileName).second) reportWarning("failed to find source " + LineInfo.FileName, diff --git a/llvm/tools/llvm-objdump/llvm-objdump.cpp b/llvm/tools/llvm-objdump/llvm-objdump.cpp index b69d14b4e7609..8073c898b8a14 100644 --- a/llvm/tools/llvm-objdump/llvm-objdump.cpp +++ b/llvm/tools/llvm-objdump/llvm-objdump.cpp @@ -2244,27 +2244,28 @@ disassembleObject(ObjectFile &Obj, const ObjectFile &DbgObj, return false; }; + // When -z or --disassemble-zeroes are given we always dissasemble + // them. Otherwise we might want to skip zero bytes we see. + if (!DisassembleZeroes) { + uint64_t MaxOffset = End - Index; + // For --reloc: print zero blocks patched by relocations, so that + // relocations can be shown in the dump. + if (InlineRelocs && RelCur != RelEnd) + MaxOffset = std::min(RelCur->getOffset() - RelAdjustment - Index, + MaxOffset); + + if (size_t N = + countSkippableZeroBytes(Bytes.slice(Index, MaxOffset))) { + FOS << "\t\t..." << '\n'; + Index += N; + continue; + } + } + if (DumpARMELFData) { Size = dumpARMELFData(SectionAddr, Index, End, Obj, Bytes, MappingSymbols, *DT->SubtargetInfo, FOS); } else { - // When -z or --disassemble-zeroes are given we always dissasemble - // them. Otherwise we might want to skip zero bytes we see. - if (!DisassembleZeroes) { - uint64_t MaxOffset = End - Index; - // For --reloc: print zero blocks patched by relocations, so that - // relocations can be shown in the dump. - if (InlineRelocs && RelCur != RelEnd) - MaxOffset = std::min(RelCur->getOffset() - RelAdjustment - Index, - MaxOffset); - - if (size_t N = - countSkippableZeroBytes(Bytes.slice(Index, MaxOffset))) { - FOS << "\t\t..." << '\n'; - Index += N; - continue; - } - } if (DumpTracebackTableForXCOFFFunction && doesXCOFFTracebackTableBegin(Bytes.slice(Index, 4))) { diff --git a/llvm/tools/llvm-rc/llvm-rc.cpp b/llvm/tools/llvm-rc/llvm-rc.cpp index 51214524adeef..4bc9d90095575 100644 --- a/llvm/tools/llvm-rc/llvm-rc.cpp +++ b/llvm/tools/llvm-rc/llvm-rc.cpp @@ -603,7 +603,7 @@ void doRc(std::string Src, std::string Dest, RcOptions &Opts, // Read and tokenize the input file. ErrorOr> File = - MemoryBuffer::getFile(PreprocessedFile); + MemoryBuffer::getFile(PreprocessedFile, /*IsText=*/true); if (!File) { fatalError("Error opening file '" + Twine(PreprocessedFile) + "': " + File.getError().message()); @@ -682,7 +682,7 @@ void doCvtres(std::string Src, std::string Dest, std::string TargetTriple) { object::WindowsResourceParser Parser; ErrorOr> BufferOrErr = - MemoryBuffer::getFile(Src); + MemoryBuffer::getFile(Src, /*IsText=*/true); if (!BufferOrErr) fatalError("Error opening file '" + Twine(Src) + "': " + BufferOrErr.getError().message()); diff --git a/llvm/tools/llvm-readtapi/llvm-readtapi.cpp b/llvm/tools/llvm-readtapi/llvm-readtapi.cpp index 1f183975d9481..c287dac4cd239 100644 --- a/llvm/tools/llvm-readtapi/llvm-readtapi.cpp +++ b/llvm/tools/llvm-readtapi/llvm-readtapi.cpp @@ -125,7 +125,7 @@ static std::unique_ptr getInterfaceFile(const StringRef Filename, bool ResetBanner = true) { ExitOnErr.setBanner(TOOLNAME + ": error: '" + Filename.str() + "' "); ErrorOr> BufferOrErr = - MemoryBuffer::getFile(Filename); + MemoryBuffer::getFile(Filename, /*IsText=*/true); if (BufferOrErr.getError()) ExitOnErr(errorCodeToError(BufferOrErr.getError())); auto Buffer = std::move(*BufferOrErr); diff --git a/llvm/tools/llvm-reduce/ReducerWorkItem.cpp b/llvm/tools/llvm-reduce/ReducerWorkItem.cpp index 1510e9fb32007..5409b6dc7459d 100644 --- a/llvm/tools/llvm-reduce/ReducerWorkItem.cpp +++ b/llvm/tools/llvm-reduce/ReducerWorkItem.cpp @@ -52,6 +52,11 @@ extern cl::OptionCategory LLVMReduceOptions; static cl::opt TargetTriple("mtriple", cl::desc("Set the target triple"), cl::cat(LLVMReduceOptions)); +static cl::opt PrintInvalidMachineReductions( + "print-invalid-reduction-machine-verifier-errors", + cl::desc( + "Print machine verifier errors on invalid reduction attempts triple"), + cl::cat(LLVMReduceOptions)); static cl::opt TmpFilesAsBitcode( "write-tmp-files-as-bitcode", @@ -417,7 +422,7 @@ static std::unique_ptr cloneMF(MachineFunction *SrcMF, DstMRI->freezeReservedRegs(); - DstMF->verify(nullptr, "", /*AbortOnError=*/true); + DstMF->verify(nullptr, "", &errs(), /*AbortOnError=*/true); return DstMF; } @@ -450,8 +455,21 @@ bool ReducerWorkItem::verify(raw_fd_ostream *OS) const { for (const Function &F : getModule()) { if (const MachineFunction *MF = MMI->getMachineFunction(F)) { - if (!MF->verify(nullptr, "", /*AbortOnError=*/false)) + // With the current state of quality, most reduction attempts fail the + // machine verifier. Avoid spamming large function dumps on nearly every + // attempt until the situation is better. + if (!MF->verify(nullptr, "", + /*OS=*/PrintInvalidMachineReductions ? &errs() : nullptr, + /*AbortOnError=*/false)) { + + if (!PrintInvalidMachineReductions) { + WithColor::warning(errs()) + << "reduction attempt on function '" << MF->getName() + << "' failed machine verifier (debug with " + "-print-invalid-reduction-machine-verifier-errors)\n"; + } return true; + } } } diff --git a/llvm/tools/llvm-reduce/deltas/ReduceDistinctMetadata.cpp b/llvm/tools/llvm-reduce/deltas/ReduceDistinctMetadata.cpp index 32fca80b5e5d6..02129263f6af4 100644 --- a/llvm/tools/llvm-reduce/deltas/ReduceDistinctMetadata.cpp +++ b/llvm/tools/llvm-reduce/deltas/ReduceDistinctMetadata.cpp @@ -41,10 +41,8 @@ reduceNodes(MDNode *Root, if (MDNode *Operand = dyn_cast(CurrentNode->getOperand(I).get())) { // Check whether node has been visited - if (!VisitedNodes.contains(Operand)) { + if (VisitedNodes.insert(Operand)) NodesToTraverse.push(Operand); - VisitedNodes.insert(Operand); - } // Delete the node only if it is distinct if (Operand->isDistinct()) { // Add to removal list @@ -74,10 +72,8 @@ static void cleanUpTemporaries(NamedMDNode &NamedNode, MDTuple *TemporaryTuple, // If the node hasn't been traversed yet, add it to the queue of nodes to // traverse. if (MDTuple *TupleI = dyn_cast((*I))) { - if (!VisitedNodes.contains(TupleI)) { + if (VisitedNodes.insert(TupleI)) NodesToTraverse.push(TupleI); - VisitedNodes.insert(TupleI); - } } } @@ -113,12 +109,10 @@ static void cleanUpTemporaries(NamedMDNode &NamedNode, MDTuple *TemporaryTuple, // Push the remaining nodes into the queue for (unsigned int I = 0; I < CurrentTuple->getNumOperands(); ++I) { MDTuple *Operand = dyn_cast(CurrentTuple->getOperand(I).get()); - if (Operand && !VisitedNodes.contains(Operand)) { - NodesToTraverse.push(Operand); + if (Operand && VisitedNodes.insert(Operand)) // If the node hasn't been traversed yet, add it to the queue of nodes // to traverse. - VisitedNodes.insert(Operand); - } + NodesToTraverse.push(Operand); } } } diff --git a/llvm/tools/llvm-strings/llvm-strings.cpp b/llvm/tools/llvm-strings/llvm-strings.cpp index 8642be3127fed..d4305096b60a0 100644 --- a/llvm/tools/llvm-strings/llvm-strings.cpp +++ b/llvm/tools/llvm-strings/llvm-strings.cpp @@ -173,7 +173,7 @@ int main(int argc, char **argv) { for (const auto &File : InputFileNames) { ErrorOr> Buffer = - MemoryBuffer::getFileOrSTDIN(File); + MemoryBuffer::getFileOrSTDIN(File, /*IsText=*/true); if (std::error_code EC = Buffer.getError()) errs() << File << ": " << EC.message() << '\n'; else diff --git a/llvm/tools/sancov/sancov.cpp b/llvm/tools/sancov/sancov.cpp index dd51226e21311..80f9996ba705b 100644 --- a/llvm/tools/sancov/sancov.cpp +++ b/llvm/tools/sancov/sancov.cpp @@ -323,11 +323,10 @@ static void operator<<(json::OStream &W, for (const auto &Loc : Point->Locs) { if (Loc.FileName != FileName || Loc.FunctionName != FunctionName) continue; - if (WrittenIds.find(Point->Id) != WrittenIds.end()) + if (!WrittenIds.insert(Point->Id).second) continue; // Output : ":". - WrittenIds.insert(Point->Id); W.attribute(Point->Id, (utostr(Loc.Line) + ":" + utostr(Loc.Column))); } @@ -418,9 +417,6 @@ SymbolizedCoverage::read(const std::string &InputFile) { auto LineStr = Loc.substr(0, ColonPos); auto ColStr = Loc.substr(ColonPos + 1, Loc.size()); - if (Points.find(PointId) == Points.end()) - Points.insert(std::make_pair(PointId, CoveragePoint(PointId))); - DILineInfo LineInfo; LineInfo.FileName = Filename; LineInfo.FunctionName = FunctionName; @@ -428,7 +424,8 @@ SymbolizedCoverage::read(const std::string &InputFile) { LineInfo.Line = std::strtoul(LineStr.c_str(), &End, 10); LineInfo.Column = std::strtoul(ColStr.c_str(), &End, 10); - CoveragePoint *CoveragePoint = &Points.find(PointId)->second; + CoveragePoint *CoveragePoint = + &Points.try_emplace(PointId, PointId).first->second; CoveragePoint->Locs.push_back(LineInfo); } } @@ -576,10 +573,8 @@ getCoveragePoints(const std::string &ObjectFile, FrameInfo.FileName = normalizeFilename(FrameInfo.FileName); if (Ig.isIgnorelisted(FrameInfo)) continue; - if (Infos.find(FrameInfo) == Infos.end()) { - Infos.insert(FrameInfo); + if (Infos.insert(FrameInfo).second) Point.Locs.push_back(FrameInfo); - } } Result.push_back(Point); diff --git a/llvm/unittests/ADT/SmallSetTest.cpp b/llvm/unittests/ADT/SmallSetTest.cpp index b50b368ae6636..0fb20b19df925 100644 --- a/llvm/unittests/ADT/SmallSetTest.cpp +++ b/llvm/unittests/ADT/SmallSetTest.cpp @@ -41,6 +41,40 @@ TEST(SmallSetTest, Insert) { EXPECT_EQ(0u, s1.count(4)); } +TEST(SmallSetTest, InsertPerfectFwd) { + struct Value { + int Key; + bool Moved; + + Value(int Key) : Key(Key), Moved(false) {} + Value(const Value &) = default; + Value(Value &&Other) : Key(Other.Key), Moved(false) { Other.Moved = true; } + bool operator==(const Value &Other) const { return Key == Other.Key; } + bool operator<(const Value &Other) const { return Key < Other.Key; } + }; + + { + SmallSet S; + Value V1(1), V2(2); + + S.insert(V1); + EXPECT_EQ(V1.Moved, false); + + S.insert(std::move(V2)); + EXPECT_EQ(V2.Moved, true); + } + { + SmallSet S; + Value V1(1), V2(2); + + S.insert(V1); + EXPECT_EQ(V1.Moved, false); + + S.insert(std::move(V2)); + EXPECT_EQ(V2.Moved, true); + } +} + TEST(SmallSetTest, Grow) { SmallSet s1; diff --git a/llvm/unittests/IR/CMakeLists.txt b/llvm/unittests/IR/CMakeLists.txt index 633166221c690..e5c8630f3eed7 100644 --- a/llvm/unittests/IR/CMakeLists.txt +++ b/llvm/unittests/IR/CMakeLists.txt @@ -16,6 +16,7 @@ add_llvm_unittest(IRTests BasicBlockTest.cpp BasicBlockDbgInfoTest.cpp CFGBuilder.cpp + ConstantFPRangeTest.cpp ConstantRangeTest.cpp ConstantRangeListTest.cpp ConstantsTest.cpp diff --git a/llvm/unittests/IR/ConstantFPRangeTest.cpp b/llvm/unittests/IR/ConstantFPRangeTest.cpp new file mode 100644 index 0000000000000..722e6566730da --- /dev/null +++ b/llvm/unittests/IR/ConstantFPRangeTest.cpp @@ -0,0 +1,428 @@ +//===- ConstantRangeTest.cpp - ConstantRange tests ------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/IR/ConstantFPRange.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Operator.h" +#include "gtest/gtest.h" + +using namespace llvm; + +namespace { + +class ConstantFPRangeTest : public ::testing::Test { +protected: + static const fltSemantics &Sem; + static ConstantFPRange Full; + static ConstantFPRange Empty; + static ConstantFPRange Finite; + static ConstantFPRange One; + static ConstantFPRange PosZero; + static ConstantFPRange NegZero; + static ConstantFPRange Zero; + static ConstantFPRange PosInf; + static ConstantFPRange NegInf; + static ConstantFPRange Denormal; + static ConstantFPRange NaN; + static ConstantFPRange SNaN; + static ConstantFPRange QNaN; + static ConstantFPRange Some; + static ConstantFPRange SomePos; + static ConstantFPRange SomeNeg; +}; + +const fltSemantics &ConstantFPRangeTest::Sem = APFloat::IEEEdouble(); +ConstantFPRange ConstantFPRangeTest::Full = + ConstantFPRange::getFull(APFloat::IEEEdouble()); +ConstantFPRange ConstantFPRangeTest::Empty = + ConstantFPRange::getEmpty(APFloat::IEEEdouble()); +ConstantFPRange ConstantFPRangeTest::Finite = + ConstantFPRange::getFinite(APFloat::IEEEdouble()); +ConstantFPRange ConstantFPRangeTest::One = ConstantFPRange(APFloat(1.0)); +ConstantFPRange ConstantFPRangeTest::PosZero = ConstantFPRange( + APFloat::getZero(APFloat::IEEEdouble(), /*Negative=*/false)); +ConstantFPRange ConstantFPRangeTest::NegZero = + ConstantFPRange(APFloat::getZero(APFloat::IEEEdouble(), /*Negative=*/true)); +ConstantFPRange ConstantFPRangeTest::Zero = ConstantFPRange::getNonNaN( + APFloat::getZero(APFloat::IEEEdouble(), /*Negative=*/true), + APFloat::getZero(APFloat::IEEEdouble(), /*Negative=*/false)); +ConstantFPRange ConstantFPRangeTest::Denormal = + ConstantFPRange(APFloat::getSmallest(APFloat::IEEEdouble())); +ConstantFPRange ConstantFPRangeTest::PosInf = + ConstantFPRange(APFloat::getInf(APFloat::IEEEdouble(), /*Negative=*/false)); +ConstantFPRange ConstantFPRangeTest::NegInf = + ConstantFPRange(APFloat::getInf(APFloat::IEEEdouble(), /*Negative=*/true)); +ConstantFPRange ConstantFPRangeTest::NaN = ConstantFPRange::getNaNOnly( + APFloat::IEEEdouble(), /*MayBeQNaN=*/true, /*MayBeSNaN=*/true); +ConstantFPRange ConstantFPRangeTest::SNaN = + ConstantFPRange(APFloat::getSNaN(APFloat::IEEEdouble())); +ConstantFPRange ConstantFPRangeTest::QNaN = + ConstantFPRange(APFloat::getQNaN(APFloat::IEEEdouble())); +ConstantFPRange ConstantFPRangeTest::Some = + ConstantFPRange::getNonNaN(APFloat(-3.0), APFloat(3.0)); +ConstantFPRange ConstantFPRangeTest::SomePos = ConstantFPRange::getNonNaN( + APFloat::getZero(APFloat::IEEEdouble(), /*Negative=*/false), APFloat(3.0)); +ConstantFPRange ConstantFPRangeTest::SomeNeg = ConstantFPRange::getNonNaN( + APFloat(-3.0), APFloat::getZero(APFloat::IEEEdouble(), /*Negative=*/true)); + +static void strictNext(APFloat &V) { + // Note: nextUp(+/-0) is smallest. + if (V.isNegZero()) + V = APFloat::getZero(V.getSemantics(), /*Negative=*/false); + else + V.next(/*nextDown=*/false); +} + +template +static void EnumerateConstantFPRangesImpl(Fn TestFn, bool Exhaustive, + bool MayBeQNaN, bool MayBeSNaN) { + const fltSemantics &Sem = APFloat::Float8E4M3(); + APFloat PosInf = APFloat::getInf(Sem, /*Negative=*/false); + APFloat NegInf = APFloat::getInf(Sem, /*Negative=*/true); + TestFn(ConstantFPRange(PosInf, NegInf, MayBeQNaN, MayBeSNaN)); + + if (!Exhaustive) { + SmallVector Values; + Values.push_back(APFloat::getInf(Sem, /*Negative=*/true)); + Values.push_back(APFloat::getLargest(Sem, /*Negative=*/true)); + unsigned BitWidth = APFloat::semanticsSizeInBits(Sem); + unsigned Exponents = APFloat::semanticsMaxExponent(Sem) - + APFloat::semanticsMinExponent(Sem) + 3; + unsigned MantissaBits = APFloat::semanticsPrecision(Sem) - 1; + // Add -2^(max exponent), -2^(max exponent-1), ..., -2^(min exponent) + for (unsigned M = Exponents - 2; M != 0; --M) + Values.push_back( + APFloat(Sem, APInt(BitWidth, (M + Exponents) << MantissaBits))); + Values.push_back(APFloat::getSmallest(Sem, /*Negative=*/true)); + Values.push_back(APFloat::getZero(Sem, /*Negative=*/true)); + size_t E = Values.size(); + for (size_t I = 1; I <= E; ++I) + Values.push_back(-Values[E - I]); + for (size_t I = 0; I != Values.size(); ++I) + for (size_t J = I; J != Values.size(); ++J) + TestFn(ConstantFPRange(Values[I], Values[J], MayBeQNaN, MayBeSNaN)); + return; + } + + auto Next = [&](APFloat &V) { + if (V.isPosInfinity()) + return false; + strictNext(V); + return true; + }; + + APFloat Lower = NegInf; + do { + APFloat Upper = Lower; + do { + TestFn(ConstantFPRange(Lower, Upper, MayBeQNaN, MayBeSNaN)); + } while (Next(Upper)); + } while (Next(Lower)); +} + +template +static void EnumerateConstantFPRanges(Fn TestFn, bool Exhaustive) { + EnumerateConstantFPRangesImpl(TestFn, Exhaustive, /*MayBeQNaN=*/false, + /*MayBeSNaN=*/false); + EnumerateConstantFPRangesImpl(TestFn, Exhaustive, /*MayBeQNaN=*/false, + /*MayBeSNaN=*/true); + EnumerateConstantFPRangesImpl(TestFn, Exhaustive, /*MayBeQNaN=*/true, + /*MayBeSNaN=*/false); + EnumerateConstantFPRangesImpl(TestFn, Exhaustive, /*MayBeQNaN=*/true, + /*MayBeSNaN=*/true); +} + +template +static void EnumerateTwoInterestingConstantFPRanges(Fn TestFn, + bool Exhaustive) { + EnumerateConstantFPRanges( + [&](const ConstantFPRange &CR1) { + EnumerateConstantFPRanges( + [&](const ConstantFPRange &CR2) { TestFn(CR1, CR2); }, Exhaustive); + }, + Exhaustive); +} + +template +static void EnumerateValuesInConstantFPRange(const ConstantFPRange &CR, + Fn TestFn) { + const fltSemantics &Sem = CR.getSemantics(); + unsigned Bits = APFloat::semanticsSizeInBits(Sem); + assert(Bits < 32 && "Too many bits"); + for (unsigned I = 0, E = (1U << Bits) - 1; I != E; ++I) { + APFloat V(Sem, APInt(Bits, I)); + if (CR.contains(V)) + TestFn(V); + } +} + +TEST_F(ConstantFPRangeTest, Basics) { + EXPECT_TRUE(Full.isFullSet()); + EXPECT_FALSE(Full.isEmptySet()); + EXPECT_TRUE(Full.contains(APFloat::getNaN(Sem))); + EXPECT_TRUE(Full.contains(APFloat::getInf(Sem, /*Negative=*/false))); + EXPECT_TRUE(Full.contains(APFloat::getInf(Sem, /*Negative=*/true))); + EXPECT_TRUE(Full.contains(APFloat::getZero(Sem, /*Negative=*/false))); + EXPECT_TRUE(Full.contains(APFloat::getZero(Sem, /*Negative=*/true))); + EXPECT_TRUE(Full.contains(APFloat::getSmallest(Sem))); + EXPECT_TRUE(Full.contains(APFloat(2.0))); + EXPECT_TRUE(Full.contains(Full)); + EXPECT_TRUE(Full.contains(Empty)); + EXPECT_TRUE(Full.contains(Finite)); + EXPECT_TRUE(Full.contains(Zero)); + EXPECT_TRUE(Full.contains(Some)); + + EXPECT_FALSE(Empty.isFullSet()); + EXPECT_TRUE(Empty.isEmptySet()); + EXPECT_FALSE(Empty.contains(APFloat::getNaN(Sem))); + EXPECT_FALSE(Empty.contains(APFloat::getInf(Sem, /*Negative=*/false))); + EXPECT_FALSE(Empty.contains(APFloat::getZero(Sem, /*Negative=*/true))); + EXPECT_FALSE(Empty.contains(APFloat(2.0))); + EXPECT_TRUE(Empty.contains(Empty)); + + EXPECT_FALSE(Finite.isFullSet()); + EXPECT_FALSE(Finite.isEmptySet()); + EXPECT_FALSE(Finite.contains(APFloat::getNaN(Sem))); + EXPECT_FALSE(Finite.contains(APFloat::getInf(Sem, /*Negative=*/false))); + EXPECT_FALSE(Finite.contains(APFloat::getInf(Sem, /*Negative=*/true))); + EXPECT_TRUE(Finite.contains(APFloat::getLargest(Sem, /*Negative=*/false))); + EXPECT_TRUE(Finite.contains(APFloat::getLargest(Sem, /*Negative=*/true))); + EXPECT_TRUE(Finite.contains(Finite)); + EXPECT_TRUE(Finite.contains(Some)); + EXPECT_TRUE(Finite.contains(Denormal)); + EXPECT_TRUE(Finite.contains(Zero)); + EXPECT_FALSE(Finite.contains(PosInf)); + EXPECT_FALSE(Finite.contains(NaN)); + + EXPECT_TRUE(One.contains(APFloat(1.0))); + EXPECT_FALSE(One.contains(APFloat(1.1))); + + EXPECT_TRUE(PosZero.contains(APFloat::getZero(Sem, /*Negative=*/false))); + EXPECT_FALSE(PosZero.contains(APFloat::getZero(Sem, /*Negative=*/true))); + EXPECT_TRUE(NegZero.contains(APFloat::getZero(Sem, /*Negative=*/true))); + EXPECT_FALSE(NegZero.contains(APFloat::getZero(Sem, /*Negative=*/false))); + EXPECT_TRUE(Zero.contains(PosZero)); + EXPECT_TRUE(Zero.contains(NegZero)); + EXPECT_TRUE(Denormal.contains(APFloat::getSmallest(Sem))); + EXPECT_FALSE(Denormal.contains(APFloat::getSmallestNormalized(Sem))); + EXPECT_TRUE(PosInf.contains(APFloat::getInf(Sem, /*Negative=*/false))); + EXPECT_TRUE(NegInf.contains(APFloat::getInf(Sem, /*Negative=*/true))); + EXPECT_TRUE(NaN.contains(APFloat::getQNaN(Sem))); + EXPECT_TRUE(NaN.contains(APFloat::getSNaN(Sem))); + EXPECT_TRUE(NaN.contains(SNaN)); + EXPECT_TRUE(NaN.contains(QNaN)); + + EXPECT_TRUE(Some.contains(APFloat(3.0))); + EXPECT_TRUE(Some.contains(APFloat(-3.0))); + EXPECT_FALSE(Some.contains(APFloat(4.0))); + APFloat Next1(3.0); + Next1.next(/*nextDown=*/true); + EXPECT_TRUE(Some.contains(Next1)); + APFloat Next2(3.0); + Next2.next(/*nextDown=*/false); + EXPECT_FALSE(Some.contains(Next2)); + EXPECT_TRUE(Some.contains(Zero)); + EXPECT_TRUE(Some.contains(Some)); + EXPECT_TRUE(Some.contains(One)); + EXPECT_FALSE(Some.contains(NaN)); + EXPECT_FALSE(Some.contains(PosInf)); + EXPECT_TRUE(SomePos.contains(APFloat(3.0))); + EXPECT_FALSE(SomeNeg.contains(APFloat(3.0))); + EXPECT_TRUE(SomeNeg.contains(APFloat(-3.0))); + EXPECT_FALSE(SomePos.contains(APFloat(-3.0))); + EXPECT_TRUE(Some.contains(SomePos)); + EXPECT_TRUE(Some.contains(SomeNeg)); +} + +TEST_F(ConstantFPRangeTest, Equality) { + EXPECT_EQ(Full, Full); + EXPECT_EQ(Empty, Empty); + EXPECT_EQ(One, One); + EXPECT_EQ(Some, Some); + EXPECT_NE(Full, Empty); + EXPECT_NE(Zero, PosZero); + EXPECT_NE(One, NaN); + EXPECT_NE(Some, One); + EXPECT_NE(SNaN, QNaN); +} + +TEST_F(ConstantFPRangeTest, SingleElement) { + EXPECT_EQ(Full.getSingleElement(), static_cast(nullptr)); + EXPECT_EQ(Empty.getSingleElement(), static_cast(nullptr)); + EXPECT_EQ(Finite.getSingleElement(), static_cast(nullptr)); + EXPECT_EQ(Zero.getSingleElement(), static_cast(nullptr)); + EXPECT_EQ(NaN.getSingleElement(), static_cast(nullptr)); + EXPECT_EQ(SNaN.getSingleElement(), static_cast(nullptr)); + EXPECT_EQ(QNaN.getSingleElement(), static_cast(nullptr)); + + EXPECT_EQ(*One.getSingleElement(), APFloat(1.0)); + EXPECT_EQ(*PosZero.getSingleElement(), APFloat::getZero(Sem)); + EXPECT_EQ(*PosInf.getSingleElement(), APFloat::getInf(Sem)); + + EXPECT_FALSE(Full.isSingleElement()); + EXPECT_FALSE(Empty.isSingleElement()); + EXPECT_TRUE(One.isSingleElement()); + EXPECT_FALSE(Some.isSingleElement()); + EXPECT_FALSE(Zero.isSingleElement()); +} + +TEST_F(ConstantFPRangeTest, ExhaustivelyEnumerate) { + constexpr unsigned NNaNValues = (1 << 8) - 2 * ((1 << 3) - 1); + constexpr unsigned Expected = 4 * ((NNaNValues + 1) * NNaNValues / 2 + 1); + unsigned Count = 0; + EnumerateConstantFPRanges([&](const ConstantFPRange &) { ++Count; }, + /*Exhaustive=*/true); + EXPECT_EQ(Expected, Count); +} + +TEST_F(ConstantFPRangeTest, Enumerate) { + constexpr unsigned NNaNValues = 2 * ((1 << 4) - 2 + 4); + constexpr unsigned Expected = 4 * ((NNaNValues + 1) * NNaNValues / 2 + 1); + unsigned Count = 0; + EnumerateConstantFPRanges([&](const ConstantFPRange &) { ++Count; }, + /*Exhaustive=*/false); + EXPECT_EQ(Expected, Count); +} + +TEST_F(ConstantFPRangeTest, IntersectWith) { + EXPECT_EQ(Empty.intersectWith(Full), Empty); + EXPECT_EQ(Empty.intersectWith(Empty), Empty); + EXPECT_EQ(Empty.intersectWith(One), Empty); + EXPECT_EQ(Empty.intersectWith(Some), Empty); + EXPECT_EQ(Full.intersectWith(Full), Full); + EXPECT_EQ(Some.intersectWith(Some), Some); + EXPECT_EQ(Some.intersectWith(One), One); + EXPECT_EQ(Full.intersectWith(One), One); + EXPECT_EQ(Full.intersectWith(Some), Some); + EXPECT_EQ(Some.intersectWith(SomePos), SomePos); + EXPECT_EQ(Some.intersectWith(SomeNeg), SomeNeg); + EXPECT_EQ(NaN.intersectWith(Finite), Empty); + EXPECT_EQ(NaN.intersectWith(SNaN), SNaN); + EXPECT_EQ(NaN.intersectWith(QNaN), QNaN); + EXPECT_EQ(Finite.intersectWith(One), One); + EXPECT_EQ(Some.intersectWith(Zero), Zero); + EXPECT_EQ(ConstantFPRange::getNonNaN(APFloat(1.0), APFloat(4.0)) + .intersectWith( + ConstantFPRange::getNonNaN(APFloat(3.0), APFloat(6.0))), + ConstantFPRange::getNonNaN(APFloat(3.0), APFloat(4.0))); + EXPECT_EQ(ConstantFPRange::getNonNaN(APFloat(1.0), APFloat(2.0)) + .intersectWith( + ConstantFPRange::getNonNaN(APFloat(5.0), APFloat(6.0))), + Empty); +} + +TEST_F(ConstantFPRangeTest, UnionWith) { + EXPECT_EQ(Empty.unionWith(Full), Full); + EXPECT_EQ(Empty.unionWith(Empty), Empty); + EXPECT_EQ(Empty.unionWith(One), One); + EXPECT_EQ(Empty.unionWith(Some), Some); + EXPECT_EQ(Full.unionWith(Full), Full); + EXPECT_EQ(Some.unionWith(Some), Some); + EXPECT_EQ(Some.unionWith(One), Some); + EXPECT_EQ(Full.unionWith(Some), Full); + EXPECT_EQ(Some.unionWith(SomePos), Some); + EXPECT_EQ(Some.unionWith(SomeNeg), Some); + EXPECT_EQ(Finite.unionWith(One), Finite); + EXPECT_EQ(Some.unionWith(Zero), Some); + EXPECT_EQ(Finite.unionWith(PosInf).unionWith(NegInf).unionWith(NaN), Full); + EXPECT_EQ(PosZero.unionWith(NegZero), Zero); + EXPECT_EQ(NaN.unionWith(SNaN), NaN); + EXPECT_EQ(NaN.unionWith(QNaN), NaN); + EXPECT_EQ(SNaN.unionWith(QNaN), NaN); + EXPECT_EQ( + ConstantFPRange::getNonNaN(APFloat(1.0), APFloat(4.0)) + .unionWith(ConstantFPRange::getNonNaN(APFloat(3.0), APFloat(6.0))), + ConstantFPRange::getNonNaN(APFloat(1.0), APFloat(6.0))); + EXPECT_EQ( + ConstantFPRange::getNonNaN(APFloat(1.0), APFloat(2.0)) + .unionWith(ConstantFPRange::getNonNaN(APFloat(5.0), APFloat(6.0))), + ConstantFPRange::getNonNaN(APFloat(1.0), APFloat(6.0))); +} + +TEST_F(ConstantFPRangeTest, FPClassify) { + EXPECT_EQ(Empty.classify(), fcNone); + EXPECT_EQ(Full.classify(), fcAllFlags); + EXPECT_EQ(Finite.classify(), fcFinite); + EXPECT_EQ(Zero.classify(), fcZero); + EXPECT_EQ(NaN.classify(), fcNan); + EXPECT_EQ(SNaN.classify(), fcSNan); + EXPECT_EQ(QNaN.classify(), fcQNan); + EXPECT_EQ(One.classify(), fcPosNormal); + EXPECT_EQ(Some.classify(), fcFinite); + EXPECT_EQ(SomePos.classify(), fcPosFinite); + EXPECT_EQ(SomeNeg.classify(), fcNegFinite); + EXPECT_EQ(PosInf.classify(), fcPosInf); + EXPECT_EQ(NegInf.classify(), fcNegInf); + EXPECT_EQ(Finite.getSignBit(), std::nullopt); + EXPECT_EQ(PosZero.getSignBit(), false); + EXPECT_EQ(NegZero.getSignBit(), true); + EXPECT_EQ(SomePos.getSignBit(), false); + EXPECT_EQ(SomeNeg.getSignBit(), true); + + EnumerateConstantFPRanges( + [](const ConstantFPRange &CR) { + unsigned Mask = fcNone; + bool HasPos = false, HasNeg = false; + EnumerateValuesInConstantFPRange(CR, [&](const APFloat &V) { + Mask |= V.classify(); + if (V.isNegative()) + HasNeg = true; + else + HasPos = true; + }); + + std::optional SignBit = std::nullopt; + if (HasPos != HasNeg) + SignBit = HasNeg; + + EXPECT_EQ(SignBit, CR.getSignBit()) << CR; + EXPECT_EQ(Mask, CR.classify()) << CR; + }, + /*Exhaustive=*/true); +} + +TEST_F(ConstantFPRangeTest, Print) { + auto ToString = [](const ConstantFPRange &CR) { + std::string Str; + raw_string_ostream OS(Str); + CR.print(OS); + return Str; + }; + + EXPECT_EQ(ToString(Full), "full-set"); + EXPECT_EQ(ToString(Empty), "empty-set"); + EXPECT_EQ(ToString(NaN), "NaN"); + EXPECT_EQ(ToString(SNaN), "SNaN"); + EXPECT_EQ(ToString(QNaN), "QNaN"); + EXPECT_EQ(ToString(One), "[1, 1]"); + EXPECT_EQ(ToString(Some.unionWith(SNaN)), "[-3, 3] with SNaN"); +} + +#ifdef GTEST_HAS_DEATH_TEST +#ifndef NDEBUG +TEST_F(ConstantFPRangeTest, NonCanonicalEmptySet) { + EXPECT_DEATH((void)(ConstantFPRange::getNonNaN(APFloat(1.0), APFloat(0.0))), + "Non-canonical form"); +} +TEST_F(ConstantFPRangeTest, MismatchedSemantics) { + EXPECT_DEATH((void)(ConstantFPRange::getNonNaN(APFloat(0.0), APFloat(1.0f))), + "Should only use the same semantics"); + EXPECT_DEATH((void)(One.contains(APFloat(1.0f))), + "Should only use the same semantics"); + ConstantFPRange OneF32 = ConstantFPRange(APFloat(1.0f)); + EXPECT_DEATH((void)(One.contains(OneF32)), + "Should only use the same semantics"); + EXPECT_DEATH((void)(One.intersectWith(OneF32)), + "Should only use the same semantics"); + EXPECT_DEATH((void)(One.unionWith(OneF32)), + "Should only use the same semantics"); +} +#endif +#endif + +} // anonymous namespace diff --git a/llvm/unittests/MI/LiveIntervalTest.cpp b/llvm/unittests/MI/LiveIntervalTest.cpp index 7dcd82f3e7aa6..f910e8e1f2c8f 100644 --- a/llvm/unittests/MI/LiveIntervalTest.cpp +++ b/llvm/unittests/MI/LiveIntervalTest.cpp @@ -101,7 +101,9 @@ struct TestPassT : public TestPass { bool runOnMachineFunction(MachineFunction &MF) override { AnalysisType &A = getAnalysis(); T(MF, A); - EXPECT_EQ(MF.verify(this, /* Banner */ nullptr, /* AbortOnError */ false), + EXPECT_EQ(MF.verify(this, /* Banner=*/nullptr, + /*OS=*/nullptr, + /* AbortOnError=*/false), ShouldPass); return true; } diff --git a/llvm/unittests/Option/OptionMarshallingTest.cpp b/llvm/unittests/Option/OptionMarshallingTest.cpp index 0464e27d5248a..2ec422f1a0984 100644 --- a/llvm/unittests/Option/OptionMarshallingTest.cpp +++ b/llvm/unittests/Option/OptionMarshallingTest.cpp @@ -1,4 +1,4 @@ -//===- unittest/Support/OptionMarshallingTest.cpp - OptParserEmitter tests ===// +//===- OptionMarshallingTest.cpp - OptionParserEmitter tests -================// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/llvm/unittests/SandboxIR/CMakeLists.txt b/llvm/unittests/SandboxIR/CMakeLists.txt index a228637b062a4..2ab284a511fca 100644 --- a/llvm/unittests/SandboxIR/CMakeLists.txt +++ b/llvm/unittests/SandboxIR/CMakeLists.txt @@ -2,6 +2,7 @@ set(LLVM_LINK_COMPONENTS AsmParser SandboxIR Core + Analysis ) add_llvm_unittest(SandboxIRTests @@ -9,4 +10,5 @@ add_llvm_unittest(SandboxIRTests SandboxIRTest.cpp TrackerTest.cpp TypesTest.cpp + UtilsTest.cpp ) diff --git a/llvm/unittests/SandboxIR/SandboxIRTest.cpp b/llvm/unittests/SandboxIR/SandboxIRTest.cpp index 1fcc9cbea152c..42df09609b675 100644 --- a/llvm/unittests/SandboxIR/SandboxIRTest.cpp +++ b/llvm/unittests/SandboxIR/SandboxIRTest.cpp @@ -1769,6 +1769,7 @@ define void @foo(i8 %v1, ptr %ptr) { store volatile i8 %ld0, ptr %ptr %atomicrmw = atomicrmw add ptr %ptr, i8 %v1 acquire %udiv = udiv i8 %ld0, %v1 + %urem = urem i8 %ld0, %v1 call void @foo() ret void } @@ -1861,6 +1862,18 @@ define void @foo(i8 %v1, ptr %ptr) { for (auto &LLVMI : *LLVMBB1) { auto &I = cast(*Ctx.getValue(&LLVMI)); + // Check isTerminator(). + EXPECT_EQ(LLVMI.isTerminator(), I.isTerminator()); + // Check isUnaryOp(). + EXPECT_EQ(LLVMI.isUnaryOp(), I.isUnaryOp()); + // Check isBinaryOp(). + EXPECT_EQ(LLVMI.isBinaryOp(), I.isBinaryOp()); + // Check isIntDivRem(). + EXPECT_EQ(LLVMI.isIntDivRem(), I.isIntDivRem()); + // Check isShift(). + EXPECT_EQ(LLVMI.isShift(), I.isShift()); + // Check isCast(). + EXPECT_EQ(LLVMI.isCast(), I.isCast()); // Check isAssociative(). EXPECT_EQ(LLVMI.isAssociative(), I.isAssociative()); // Check isCommutative(). diff --git a/llvm/unittests/SandboxIR/UtilsTest.cpp b/llvm/unittests/SandboxIR/UtilsTest.cpp new file mode 100644 index 0000000000000..ded3edf1206a4 --- /dev/null +++ b/llvm/unittests/SandboxIR/UtilsTest.cpp @@ -0,0 +1,56 @@ +//===- UtilsTest.cpp ------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/SandboxIR/Utils.h" +#include "llvm/AsmParser/Parser.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Module.h" +#include "llvm/SandboxIR/SandboxIR.h" +#include "llvm/Support/SourceMgr.h" +#include "gtest/gtest.h" + +using namespace llvm; + +struct UtilsTest : public testing::Test { + LLVMContext C; + std::unique_ptr M; + + void parseIR(LLVMContext &C, const char *IR) { + SMDiagnostic Err; + M = parseAssemblyString(IR, Err, C); + if (!M) + Err.print("UtilsTest", errs()); + } + BasicBlock *getBasicBlockByName(Function &F, StringRef Name) { + for (BasicBlock &BB : F) + if (BB.getName() == Name) + return &BB; + llvm_unreachable("Expected to find basic block!"); + } +}; + +TEST_F(UtilsTest, getMemoryLocation) { + parseIR(C, R"IR( +define void @foo(ptr %arg0) { + %ld = load i8, ptr %arg0 + ret void +} +)IR"); + llvm::Function *LLVMF = &*M->getFunction("foo"); + auto *LLVMBB = &*LLVMF->begin(); + auto *LLVMLd = cast(&*LLVMBB->begin()); + sandboxir::Context Ctx(C); + sandboxir::Function *F = Ctx.createFunction(LLVMF); + auto *BB = &*F->begin(); + auto *Ld = cast(&*BB->begin()); + EXPECT_EQ(sandboxir::Utils::memoryLocationGetOrNone(Ld), + MemoryLocation::getOrNone(LLVMLd)); +} diff --git a/llvm/unittests/Support/TimerTest.cpp b/llvm/unittests/Support/TimerTest.cpp index 09545eb6939ae..5686b394e16cd 100644 --- a/llvm/unittests/Support/TimerTest.cpp +++ b/llvm/unittests/Support/TimerTest.cpp @@ -27,8 +27,13 @@ void SleepMS() { struct timespec Interval; Interval.tv_sec = 0; Interval.tv_nsec = 1000000; +#if defined(__MVS__) + long Microseconds = (Interval.tv_nsec + 999) / 1000; + usleep(Microseconds); +#else nanosleep(&Interval, nullptr); #endif +#endif } TEST(Timer, Additivity) { diff --git a/llvm/unittests/Support/raw_ostream_test.cpp b/llvm/unittests/Support/raw_ostream_test.cpp index a35edd6168529..fbeff37d26a35 100644 --- a/llvm/unittests/Support/raw_ostream_test.cpp +++ b/llvm/unittests/Support/raw_ostream_test.cpp @@ -198,6 +198,26 @@ TEST(raw_ostreamTest, Indent) { EXPECT_EQ(Spaces(10), printToString(Scaled)); Scaled -= 1; EXPECT_EQ(Spaces(8), printToString(Scaled)); + + // Operators. + Indent = 10; + EXPECT_EQ(Spaces(10), printToString(Indent)); + + indent Temp = Indent++; + EXPECT_EQ(Spaces(11), printToString(Indent)); + EXPECT_EQ(Spaces(10), printToString(Temp)); + + Temp = Indent--; + EXPECT_EQ(Spaces(10), printToString(Indent)); + EXPECT_EQ(Spaces(11), printToString(Temp)); + + Temp = ++Indent; + EXPECT_EQ(Spaces(11), printToString(Indent)); + EXPECT_EQ(Spaces(11), printToString(Temp)); + + Temp = --Indent; + EXPECT_EQ(Spaces(10), printToString(Indent)); + EXPECT_EQ(Spaces(10), printToString(Temp)); } TEST(raw_ostreamTest, FormatHex) { diff --git a/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp b/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp index 6662421eb26d9..33944b64dc157 100644 --- a/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp +++ b/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp @@ -980,6 +980,7 @@ R"(All available -march extensions for RISC-V za64rs 1.0 zaamo 1.0 zabha 1.0 + zacas 1.0 zalrsc 1.0 zama16b 1.0 zawrs 1.0 @@ -1116,7 +1117,6 @@ R"(All available -march extensions for RISC-V Experimental extensions zicfilp 1.0 This is a long dummy description zicfiss 1.0 - zacas 1.0 zalasr 0.1 zvbc32e 0.7 zvkgs 0.7 diff --git a/llvm/utils/TableGen/CMakeLists.txt b/llvm/utils/TableGen/CMakeLists.txt index abebb98761d06..ba1e4aa01b48d 100644 --- a/llvm/utils/TableGen/CMakeLists.txt +++ b/llvm/utils/TableGen/CMakeLists.txt @@ -59,8 +59,8 @@ add_tablegen(llvm-tblgen LLVM InstrInfoEmitter.cpp IntrinsicEmitter.cpp MacroFusionPredicatorEmitter.cpp - OptParserEmitter.cpp - OptRSTEmitter.cpp + OptionParserEmitter.cpp + OptionRSTEmitter.cpp PseudoLoweringEmitter.cpp RegisterBankEmitter.cpp RegisterInfoEmitter.cpp diff --git a/llvm/utils/TableGen/Common/PredicateExpander.cpp b/llvm/utils/TableGen/Common/PredicateExpander.cpp index 2afaa8cc21aa6..314e563ba90bb 100644 --- a/llvm/utils/TableGen/Common/PredicateExpander.cpp +++ b/llvm/utils/TableGen/Common/PredicateExpander.cpp @@ -153,10 +153,9 @@ void PredicateExpander::expandCheckOpcode(raw_ostream &OS, } OS << '('; - increaseIndentLevel(); + ++Indent; for (const Record *Rec : Opcodes) { - OS << '\n'; - OS.indent(getIndentLevel() * 2); + OS << '\n' << Indent; if (!First) OS << (shouldNegate() ? "&& " : "|| "); @@ -164,10 +163,8 @@ void PredicateExpander::expandCheckOpcode(raw_ostream &OS, First = false; } - OS << '\n'; - decreaseIndentLevel(); - OS.indent(getIndentLevel() * 2); - OS << ')'; + --Indent; + OS << '\n' << Indent << ')'; } void PredicateExpander::expandCheckPseudo(raw_ostream &OS, @@ -187,22 +184,19 @@ void PredicateExpander::expandPredicateSequence( // Okay, there is more than one predicate in the set. bool First = true; OS << (shouldNegate() ? "!(" : "("); - increaseIndentLevel(); + ++Indent; bool OldValue = shouldNegate(); setNegatePredicate(false); for (const Record *Rec : Sequence) { - OS << '\n'; - OS.indent(getIndentLevel() * 2); + OS << '\n' << Indent; if (!First) OS << (IsCheckAll ? "&& " : "|| "); expandPredicate(OS, Rec); First = false; } - OS << '\n'; - decreaseIndentLevel(); - OS.indent(getIndentLevel() * 2); - OS << ')'; + --Indent; + OS << '\n' << Indent << ')'; setNegatePredicate(OldValue); } @@ -269,15 +263,14 @@ void PredicateExpander::expandReturnStatement(raw_ostream &OS, void PredicateExpander::expandOpcodeSwitchCase(raw_ostream &OS, const Record *Rec) { for (const Record *Opcode : Rec->getValueAsListOfDefs("Opcodes")) { - OS.indent(getIndentLevel() * 2); - OS << "case " << Opcode->getValueAsString("Namespace") + OS << Indent << "case " << Opcode->getValueAsString("Namespace") << "::" << Opcode->getName() << ":\n"; } - increaseIndentLevel(); - OS.indent(getIndentLevel() * 2); + ++Indent; + OS << Indent; expandStatement(OS, Rec->getValueAsDef("CaseStmt")); - decreaseIndentLevel(); + --Indent; } void PredicateExpander::expandOpcodeSwitchStatement( @@ -292,17 +285,12 @@ void PredicateExpander::expandOpcodeSwitchStatement( } // Expand the default case. - SS.indent(getIndentLevel() * 2); - SS << "default:\n"; + SS << Indent << "default:\n"; - increaseIndentLevel(); - SS.indent(getIndentLevel() * 2); + ++Indent; + SS << Indent; expandStatement(SS, Default); - decreaseIndentLevel(); - SS << '\n'; - - SS.indent(getIndentLevel() * 2); - SS << "} // end of switch-stmt"; + SS << '\n' << Indent << "} // end of switch-stmt"; OS << Buffer; } @@ -436,8 +424,7 @@ void STIPredicateExpander::expandHeader(raw_ostream &OS, const Record *Rec = Fn.getDeclaration(); StringRef FunctionName = Rec->getValueAsString("Name"); - OS.indent(getIndentLevel() * 2); - OS << "bool "; + OS << Indent << "bool "; if (shouldExpandDefinition()) OS << getClassPrefix() << "::"; OS << FunctionName << "("; @@ -463,26 +450,22 @@ void STIPredicateExpander::expandPrologue(raw_ostream &OS, bool UpdatesOpcodeMask = Fn.getDeclaration()->getValueAsBit("UpdatesOpcodeMask"); - increaseIndentLevel(); - unsigned IndentLevel = getIndentLevel(); + ++Indent; for (const Record *Delegate : Fn.getDeclaration()->getValueAsListOfDefs("Delegates")) { - OS.indent(IndentLevel * 2); - OS << "if (" << Delegate->getValueAsString("Name") << "(MI"; + OS << Indent << "if (" << Delegate->getValueAsString("Name") << "(MI"; if (UpdatesOpcodeMask) OS << ", Mask"; if (shouldExpandForMC()) OS << ", ProcessorID"; OS << "))\n"; - OS.indent((1 + IndentLevel) * 2); - OS << "return true;\n\n"; + OS << Indent + 1 << "return true;\n\n"; } if (shouldExpandForMC()) return; - OS.indent(IndentLevel * 2); - OS << "unsigned ProcessorID = getSchedModel().getProcessorID();\n"; + OS << Indent << "unsigned ProcessorID = getSchedModel().getProcessorID();\n"; } void STIPredicateExpander::expandOpcodeGroup(raw_ostream &OS, @@ -497,8 +480,7 @@ void STIPredicateExpander::expandOpcodeGroup(raw_ostream &OS, continue; if (FirstProcID) { - OS.indent(getIndentLevel() * 2); - OS << "if (ProcessorID == " << I; + OS << Indent << "if (ProcessorID == " << I; } else { OS << " || ProcessorID == " << I; } @@ -507,21 +489,20 @@ void STIPredicateExpander::expandOpcodeGroup(raw_ostream &OS, OS << ") {\n"; - increaseIndentLevel(); - OS.indent(getIndentLevel() * 2); + ++Indent; + OS << Indent; if (ShouldUpdateOpcodeMask) { if (PI.OperandMask.isZero()) OS << "Mask.clearAllBits();\n"; else OS << "Mask = " << PI.OperandMask << ";\n"; - OS.indent(getIndentLevel() * 2); + OS << Indent; } OS << "return "; expandPredicate(OS, PI.Predicate); OS << ";\n"; - decreaseIndentLevel(); - OS.indent(getIndentLevel() * 2); - OS << "}\n"; + --Indent; + OS << Indent << "}\n"; } } @@ -530,46 +511,38 @@ void STIPredicateExpander::expandBody(raw_ostream &OS, bool UpdatesOpcodeMask = Fn.getDeclaration()->getValueAsBit("UpdatesOpcodeMask"); - unsigned IndentLevel = getIndentLevel(); - OS.indent(IndentLevel * 2); - OS << "switch(MI" << (isByRef() ? "." : "->") << "getOpcode()) {\n"; - OS.indent(IndentLevel * 2); - OS << "default:\n"; - OS.indent(IndentLevel * 2); - OS << " break;"; + OS << Indent << "switch(MI" << (isByRef() ? "." : "->") << "getOpcode()) {\n"; + OS << Indent << "default:\n"; + OS << Indent << " break;"; for (const OpcodeGroup &Group : Fn.getGroups()) { for (const Record *Opcode : Group.getOpcodes()) { - OS << '\n'; - OS.indent(IndentLevel * 2); - OS << "case " << getTargetName() << "::" << Opcode->getName() << ":"; + OS << '\n' + << Indent << "case " << getTargetName() << "::" << Opcode->getName() + << ":"; } OS << '\n'; - increaseIndentLevel(); + ++Indent; expandOpcodeGroup(OS, Group, UpdatesOpcodeMask); - OS.indent(getIndentLevel() * 2); - OS << "break;\n"; - decreaseIndentLevel(); + OS << Indent << "break;\n"; + --Indent; } - OS.indent(IndentLevel * 2); - OS << "}\n"; + OS << Indent << "}\n"; } void STIPredicateExpander::expandEpilogue(raw_ostream &OS, const STIPredicateFunction &Fn) { - OS << '\n'; - OS.indent(getIndentLevel() * 2); + OS << '\n' << Indent; OS << "return "; expandPredicate(OS, Fn.getDefaultReturnPredicate()); OS << ";\n"; - decreaseIndentLevel(); - OS.indent(getIndentLevel() * 2); + --Indent; StringRef FunctionName = Fn.getDeclaration()->getValueAsString("Name"); - OS << "} // " << ClassPrefix << "::" << FunctionName << "\n\n"; + OS << Indent << "} // " << ClassPrefix << "::" << FunctionName << "\n\n"; } void STIPredicateExpander::expandSTIPredicate(raw_ostream &OS, diff --git a/llvm/utils/TableGen/Common/PredicateExpander.h b/llvm/utils/TableGen/Common/PredicateExpander.h index c0cd69e3cb1f8..0c3a8718a473f 100644 --- a/llvm/utils/TableGen/Common/PredicateExpander.h +++ b/llvm/utils/TableGen/Common/PredicateExpander.h @@ -18,39 +18,38 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Support/raw_ostream.h" namespace llvm { -class raw_ostream; class Record; class PredicateExpander { bool EmitCallsByRef; bool NegatePredicate; bool ExpandForMC; - unsigned IndentLevel; StringRef TargetName; PredicateExpander(const PredicateExpander &) = delete; PredicateExpander &operator=(const PredicateExpander &) = delete; +protected: + indent Indent; + public: - PredicateExpander(StringRef Target) + explicit PredicateExpander(StringRef Target, unsigned Indent = 1) : EmitCallsByRef(true), NegatePredicate(false), ExpandForMC(false), - IndentLevel(1U), TargetName(Target) {} + TargetName(Target), Indent(Indent, 2) {} bool isByRef() const { return EmitCallsByRef; } bool shouldNegate() const { return NegatePredicate; } bool shouldExpandForMC() const { return ExpandForMC; } - unsigned getIndentLevel() const { return IndentLevel; } + indent &getIndent() { return Indent; } StringRef getTargetName() const { return TargetName; } void setByRef(bool Value) { EmitCallsByRef = Value; } void flipNegatePredicate() { NegatePredicate = !NegatePredicate; } void setNegatePredicate(bool Value) { NegatePredicate = Value; } void setExpandForMC(bool Value) { ExpandForMC = Value; } - void setIndentLevel(unsigned Level) { IndentLevel = Level; } - void increaseIndentLevel() { ++IndentLevel; } - void decreaseIndentLevel() { --IndentLevel; } void expandTrue(raw_ostream &OS); void expandFalse(raw_ostream &OS); @@ -116,8 +115,8 @@ class STIPredicateExpander : public PredicateExpander { void expandEpilogue(raw_ostream &OS, const STIPredicateFunction &Fn); public: - STIPredicateExpander(StringRef Target) - : PredicateExpander(Target), ExpandDefinition(false) {} + explicit STIPredicateExpander(StringRef Target, unsigned Indent = 1) + : PredicateExpander(Target, Indent), ExpandDefinition(false) {} bool shouldExpandDefinition() const { return ExpandDefinition; } StringRef getClassPrefix() const { return ClassPrefix; } diff --git a/llvm/utils/TableGen/InstrInfoEmitter.cpp b/llvm/utils/TableGen/InstrInfoEmitter.cpp index cc5ef49385bb8..46605095ba85f 100644 --- a/llvm/utils/TableGen/InstrInfoEmitter.cpp +++ b/llvm/utils/TableGen/InstrInfoEmitter.cpp @@ -711,7 +711,7 @@ void InstrInfoEmitter::emitMCIIHelperMethods(raw_ostream &OS, OS << "bool " << Rec->getValueAsString("FunctionName"); OS << "(const MCInst &MI) {\n"; - OS.indent(PE.getIndentLevel() * 2); + OS << PE.getIndent(); PE.expandStatement(OS, Rec->getValueAsDef("Body")); OS << "\n}\n\n"; } @@ -914,7 +914,7 @@ void InstrInfoEmitter::emitTIIHelperMethods(raw_ostream &OS, } OS << " {\n"; - OS.indent(PE.getIndentLevel() * 2); + OS << PE.getIndent(); PE.expandStatement(OS, Rec->getValueAsDef("Body")); OS << "\n}\n\n"; } diff --git a/llvm/utils/TableGen/IntrinsicEmitter.cpp b/llvm/utils/TableGen/IntrinsicEmitter.cpp index 51c2e9a12e00c..efa067e60de43 100644 --- a/llvm/utils/TableGen/IntrinsicEmitter.cpp +++ b/llvm/utils/TableGen/IntrinsicEmitter.cpp @@ -276,12 +276,10 @@ using TypeSigTy = SmallVector; static TypeSigTy ComputeTypeSignature(const CodeGenIntrinsic &Int) { TypeSigTy TypeSig; const Record *TypeInfo = Int.TheDef->getValueAsDef("TypeInfo"); - const ListInit *OuterList = TypeInfo->getValueAsListInit("TypeSig"); + const ListInit *TypeList = TypeInfo->getValueAsListInit("TypeSig"); - for (const auto *Outer : OuterList->getValues()) { - for (const auto *Inner : cast(Outer)->getValues()) - TypeSig.emplace_back(cast(Inner)->getValue()); - } + for (const auto *TypeListEntry : TypeList->getValues()) + TypeSig.emplace_back(cast(TypeListEntry)->getValue()); return TypeSig; } diff --git a/llvm/utils/TableGen/MacroFusionPredicatorEmitter.cpp b/llvm/utils/TableGen/MacroFusionPredicatorEmitter.cpp index c4f238b67476a..6ca2fea41230b 100644 --- a/llvm/utils/TableGen/MacroFusionPredicatorEmitter.cpp +++ b/llvm/utils/TableGen/MacroFusionPredicatorEmitter.cpp @@ -160,7 +160,7 @@ void MacroFusionPredicatorEmitter::emitFirstPredicate(const Record *Predicate, OS.indent(4) << "const MachineInstr *MI = FirstMI;\n"; OS.indent(4) << "if ("; PE.setNegatePredicate(true); - PE.setIndentLevel(3); + PE.getIndent() = 3; PE.expandPredicate(OS, Predicate->getValueAsDef("Predicate")); OS << ")\n"; OS.indent(4) << " return false;\n"; @@ -181,7 +181,7 @@ void MacroFusionPredicatorEmitter::emitSecondPredicate(const Record *Predicate, OS.indent(4) << "const MachineInstr *MI = &SecondMI;\n"; OS.indent(4) << "if ("; PE.setNegatePredicate(true); - PE.setIndentLevel(3); + PE.getIndent() = 3; PE.expandPredicate(OS, Predicate->getValueAsDef("Predicate")); OS << ")\n"; OS.indent(4) << " return false;\n"; diff --git a/llvm/utils/TableGen/OptParserEmitter.cpp b/llvm/utils/TableGen/OptionParserEmitter.cpp similarity index 98% rename from llvm/utils/TableGen/OptParserEmitter.cpp rename to llvm/utils/TableGen/OptionParserEmitter.cpp index 79cbf51514ae5..5ae6f773a3c60 100644 --- a/llvm/utils/TableGen/OptParserEmitter.cpp +++ b/llvm/utils/TableGen/OptionParserEmitter.cpp @@ -1,4 +1,4 @@ -//===- OptParserEmitter.cpp - Table Driven Command Line Parsing -----------===// +//===- OptionParserEmitter.cpp - Table Driven Command Option Line Parsing -===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -247,10 +247,10 @@ static void EmitHelpTextsForVariants( OS << " }})"; } -/// OptParserEmitter - This tablegen backend takes an input .td file +/// OptionParserEmitter - This tablegen backend takes an input .td file /// describing a list of options and emits a data structure for parsing and /// working with those options when given an input command line. -static void EmitOptParser(const RecordKeeper &Records, raw_ostream &OS) { +static void EmitOptionParser(const RecordKeeper &Records, raw_ostream &OS) { // Get the option groups and options. ArrayRef Groups = Records.getAllDerivedDefinitions("OptionGroup"); @@ -572,5 +572,5 @@ static void EmitOptParser(const RecordKeeper &Records, raw_ostream &OS) { OS << "\n"; } -static TableGen::Emitter::Opt X("gen-opt-parser-defs", EmitOptParser, +static TableGen::Emitter::Opt X("gen-opt-parser-defs", EmitOptionParser, "Generate option definitions"); diff --git a/llvm/utils/TableGen/OptRSTEmitter.cpp b/llvm/utils/TableGen/OptionRSTEmitter.cpp similarity index 89% rename from llvm/utils/TableGen/OptRSTEmitter.cpp rename to llvm/utils/TableGen/OptionRSTEmitter.cpp index 16125198a7c38..b798896a80963 100644 --- a/llvm/utils/TableGen/OptRSTEmitter.cpp +++ b/llvm/utils/TableGen/OptionRSTEmitter.cpp @@ -1,4 +1,4 @@ -//===- OptParserEmitter.cpp - Table Driven Command Line Parsing -----------===// +//===- OptionRSTEmitter.cpp - Table Driven Command Line Option Parsing ----===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -14,9 +14,9 @@ using namespace llvm; -/// OptParserEmitter - This tablegen backend takes an input .td file -/// describing a list of options and emits a RST man page. -static void EmitOptRST(const RecordKeeper &Records, raw_ostream &OS) { +/// This tablegen backend takes an input .td file describing a list of options +/// and emits a RST man page. +static void EmitOptionRST(const RecordKeeper &Records, raw_ostream &OS) { llvm::StringMap> OptionsByGroup; std::vector OptionsWithoutGroup; @@ -97,5 +97,5 @@ static void EmitOptRST(const RecordKeeper &Records, raw_ostream &OS) { } } -static TableGen::Emitter::Opt X("gen-opt-rst", EmitOptRST, +static TableGen::Emitter::Opt X("gen-opt-rst", EmitOptionRST, "Generate option RST"); diff --git a/llvm/utils/TableGen/SubtargetEmitter.cpp b/llvm/utils/TableGen/SubtargetEmitter.cpp index 78d80ff82d6a4..d21e19c060afc 100644 --- a/llvm/utils/TableGen/SubtargetEmitter.cpp +++ b/llvm/utils/TableGen/SubtargetEmitter.cpp @@ -1576,13 +1576,13 @@ static void emitPredicates(const CodeGenSchedTransition &T, unsigned NumNonTruePreds = T.PredTerm.size() - count_if(T.PredTerm, isTruePredicate); - SS.indent(PE.getIndentLevel() * 2); + SS << PE.getIndent(); if (NumNonTruePreds) { bool FirstNonTruePredicate = true; SS << "if ("; - PE.setIndentLevel(PE.getIndentLevel() + 2); + PE.getIndent() += 2; for (const Record *Rec : T.PredTerm) { // Skip predicates that evaluate to "true". @@ -1593,7 +1593,7 @@ static void emitPredicates(const CodeGenSchedTransition &T, FirstNonTruePredicate = false; } else { SS << "\n"; - SS.indent(PE.getIndentLevel() * 2); + SS << PE.getIndent(); SS << "&& "; } @@ -1610,9 +1610,9 @@ static void emitPredicates(const CodeGenSchedTransition &T, } SS << ")\n"; // end of if-stmt - PE.decreaseIndentLevel(); - SS.indent(PE.getIndentLevel() * 2); - PE.decreaseIndentLevel(); + --PE.getIndent(); + SS << PE.getIndent(); + --PE.getIndent(); } SS << "return " << T.ToClassIdx << "; // " << SC.Name << '\n'; @@ -1736,7 +1736,7 @@ void SubtargetEmitter::emitSchedModelHelpersImpl( FinalT = &T; continue; } - PE.setIndentLevel(3); + PE.getIndent() = 3; emitPredicates(T, SchedModels.getSchedClass(T.ToClassIdx), PE, OS); } if (FinalT) @@ -1780,11 +1780,10 @@ void SubtargetEmitter::EmitSchedModelHelpers(const std::string &ClassName, << "::resolveVariantSchedClassImpl(SchedClass, MI, MCII, CPUID);\n" << "} // " << ClassName << "::resolveVariantSchedClass\n\n"; - STIPredicateExpander PE(Target); + STIPredicateExpander PE(Target, /*Indent=*/0); PE.setClassPrefix(ClassName); PE.setExpandDefinition(true); PE.setByRef(false); - PE.setIndentLevel(0); for (const STIPredicateFunction &Fn : SchedModels.getSTIPredicates()) PE.expandSTIPredicate(OS, Fn); @@ -1962,7 +1961,7 @@ void SubtargetEmitter::EmitMCInstrAnalysisPredicateFunctions(raw_ostream &OS) { OS << "\n#ifdef GET_STIPREDICATE_DECLS_FOR_MC_ANALYSIS\n"; OS << "#undef GET_STIPREDICATE_DECLS_FOR_MC_ANALYSIS\n\n"; - STIPredicateExpander PE(Target); + STIPredicateExpander PE(Target, /*Indent=*/0); PE.setExpandForMC(true); PE.setByRef(true); for (const STIPredicateFunction &Fn : SchedModels.getSTIPredicates()) @@ -1976,7 +1975,6 @@ void SubtargetEmitter::EmitMCInstrAnalysisPredicateFunctions(raw_ostream &OS) { std::string ClassPrefix = Target + "MCInstrAnalysis"; PE.setExpandDefinition(true); PE.setClassPrefix(ClassPrefix); - PE.setIndentLevel(0); for (const STIPredicateFunction &Fn : SchedModels.getSTIPredicates()) PE.expandSTIPredicate(OS, Fn); diff --git a/llvm/utils/gn/build/BUILD.gn b/llvm/utils/gn/build/BUILD.gn index 27f95bb5a49f1..0b0f62721d374 100644 --- a/llvm/utils/gn/build/BUILD.gn +++ b/llvm/utils/gn/build/BUILD.gn @@ -186,6 +186,7 @@ config("compiler_defaults") { if (!is_clang) { # expand __VA_ARGS__ in "OPTION(...) LLVM_MAKE_OPT_ID(__VA_ARGS__)" cflags += [ "/Zc:preprocessor" ] + # cl.exe doesn't set __cplusplus correctly by default. # clang-cl gets it right by default, so don't needlessly add the flag there. cflags_cc += [ "/Zc:__cplusplus" ] diff --git a/llvm/utils/gn/build/toolchain/target_flags.gni b/llvm/utils/gn/build/toolchain/target_flags.gni index cbfa22966b48f..50d31a3da85fc 100644 --- a/llvm/utils/gn/build/toolchain/target_flags.gni +++ b/llvm/utils/gn/build/toolchain/target_flags.gni @@ -45,9 +45,10 @@ if (current_os == "android") { target_flags += [ "-isysroot", rebase_path(mac_sdk_path, root_build_dir), + # TODO(lgrey): We should be getting this from `compiler_defaults`. Why # aren't we? - "-mmacos-version-min=$mac_deployment_target", + "-mmacos-version-min=$mac_deployment_target", ] } } else if (current_os == "baremetal") { diff --git a/llvm/utils/gn/secondary/BUILD.gn b/llvm/utils/gn/secondary/BUILD.gn index a17a2fdb7a3ca..7f6b4cb43239f 100644 --- a/llvm/utils/gn/secondary/BUILD.gn +++ b/llvm/utils/gn/secondary/BUILD.gn @@ -21,12 +21,12 @@ group("default") { "//libcxxabi", ] } - if (current_os == "linux" || current_os == "win" || current_os=="mac") { + if (current_os == "linux" || current_os == "win" || current_os == "mac") { deps += [ "//compiler-rt/test/asan" ] } if (current_os == "linux" || current_os == "mac") { - deps += [ "//compiler-rt/test/lsan"] + deps += [ "//compiler-rt/test/lsan" ] } if (current_os == "linux" || current_os == "android") { diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clang-doc/tool/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clang-doc/tool/BUILD.gn index b224df093c774..47dd70e629fb3 100644 --- a/llvm/utils/gn/secondary/clang-tools-extra/clang-doc/tool/BUILD.gn +++ b/llvm/utils/gn/secondary/clang-tools-extra/clang-doc/tool/BUILD.gn @@ -1,7 +1,7 @@ copy("assets") { sources = [ - "../assets/index.js", "../assets/clang-doc-default-stylesheet.css", + "../assets/index.js", ] outputs = [ "$root_build_dir/share/clang-doc/{{source_file_part}}" ] } diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clangd/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clangd/BUILD.gn index a116c0abe0b2c..c6b45efef2990 100644 --- a/llvm/utils/gn/secondary/clang-tools-extra/clangd/BUILD.gn +++ b/llvm/utils/gn/secondary/clang-tools-extra/clangd/BUILD.gn @@ -56,9 +56,9 @@ static_library("clangd") { "//clang/lib/Serialization", "//clang/lib/Tooling", "//clang/lib/Tooling/Core", + "//clang/lib/Tooling/DependencyScanning", "//clang/lib/Tooling/Inclusions", "//clang/lib/Tooling/Inclusions/Stdlib", - "//clang/lib/Tooling/DependencyScanning", "//clang/lib/Tooling/Refactoring", "//clang/lib/Tooling/Syntax", "//llvm/lib/Support", diff --git a/llvm/utils/gn/secondary/clang/test/BUILD.gn b/llvm/utils/gn/secondary/clang/test/BUILD.gn index 1d5b8025a12ac..97610d4c95749 100644 --- a/llvm/utils/gn/secondary/clang/test/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/test/BUILD.gn @@ -124,13 +124,13 @@ write_lit_config("lit_site_cfg") { "CMAKE_LIBRARY_OUTPUT_DIRECTORY=" + rebase_path("$root_out_dir/bin", dir), "LLVM_LIT_ERRC_MESSAGES=no such file or directory;is a directory;" + "invalid argument;permission denied", - "PERL_EXECUTABLE=" + "PERL_EXECUTABLE=", ] } else { extra_values += [ "CMAKE_LIBRARY_OUTPUT_DIRECTORY=" + rebase_path("$root_out_dir/lib", dir), "LLVM_LIT_ERRC_MESSAGES=", - "PERL_EXECUTABLE=/usr/bin/perl" + "PERL_EXECUTABLE=/usr/bin/perl", ] } diff --git a/llvm/utils/gn/secondary/clang/unittests/InstallAPI/BUILD.gn b/llvm/utils/gn/secondary/clang/unittests/InstallAPI/BUILD.gn index e27659457474f..b8bf438bfdb48 100644 --- a/llvm/utils/gn/secondary/clang/unittests/InstallAPI/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/unittests/InstallAPI/BUILD.gn @@ -7,7 +7,7 @@ unittest("InstallAPITests") { "//llvm/lib/Testing/Support", ] sources = [ - "HeaderFileTest.cpp", "FileListTest.cpp", + "HeaderFileTest.cpp", ] } diff --git a/llvm/utils/gn/secondary/compiler-rt/lib/sanitizer_common/BUILD.gn b/llvm/utils/gn/secondary/compiler-rt/lib/sanitizer_common/BUILD.gn index 450c419d25fe8..e398119fb38a9 100644 --- a/llvm/utils/gn/secondary/compiler-rt/lib/sanitizer_common/BUILD.gn +++ b/llvm/utils/gn/secondary/compiler-rt/lib/sanitizer_common/BUILD.gn @@ -167,9 +167,9 @@ source_set("sources") { "sanitizer_vector.h", "sanitizer_win.cpp", "sanitizer_win.h", + "sanitizer_win_defs.h", "sanitizer_win_interception.cpp", "sanitizer_win_interception.h", - "sanitizer_win_defs.h", "sanitizer_win_thunk_interception.h", ] } diff --git a/llvm/utils/gn/secondary/compiler-rt/test/hwasan/BUILD.gn b/llvm/utils/gn/secondary/compiler-rt/test/hwasan/BUILD.gn index 59ed1d1480967..7bdf9c2b994c9 100644 --- a/llvm/utils/gn/secondary/compiler-rt/test/hwasan/BUILD.gn +++ b/llvm/utils/gn/secondary/compiler-rt/test/hwasan/BUILD.gn @@ -37,8 +37,8 @@ if (current_toolchain != host_toolchain) { ":lit_site_cfg", "//compiler-rt/include($host_toolchain)", "//compiler-rt/lib/cfi:ignorelist($host_toolchain)", - "//compiler-rt/lib/hwasan:hwasan_shared", "//compiler-rt/lib/hwasan:hwasan_preinit", + "//compiler-rt/lib/hwasan:hwasan_shared", "//compiler-rt/test:lit_common_configured", "//llvm/utils/FileCheck($host_toolchain)", "//llvm/utils/llvm-lit($host_toolchain)", diff --git a/llvm/utils/gn/secondary/compiler-rt/test/lsan/BUILD.gn b/llvm/utils/gn/secondary/compiler-rt/test/lsan/BUILD.gn index 4fb375f06caae..7dc69af57124c 100644 --- a/llvm/utils/gn/secondary/compiler-rt/test/lsan/BUILD.gn +++ b/llvm/utils/gn/secondary/compiler-rt/test/lsan/BUILD.gn @@ -7,11 +7,13 @@ import("//llvm/version.gni") write_cmake_config("asan_mode_cfg") { input = "lit.site.cfg.py.in" - output = "$target_gen_dir/${crt_current_target_arch}AsanConfig/lit.site.cfg.py" + output = + "$target_gen_dir/${crt_current_target_arch}AsanConfig/lit.site.cfg.py" values = [ "LSAN_LIT_SOURCE_DIR=" + rebase_path("."), "LSAN_TEST_CONFIG_SUFFIX=$crt_current_target_suffix", "LSAN_TEST_TARGET_CFLAGS=$target_flags_string", + # TODO(lgrey): Support standalone mode "LSAN_LIT_TEST_MODE=AddressSanitizer", "LSAN_TEST_TARGET_ARCH=$crt_current_target_arch", @@ -59,9 +61,7 @@ if (supported_toolchains != []) { test_dir = rebase_path( get_label_info(":lit_site_cfg($toolchain)", "target_gen_dir"), root_build_dir) - args += [ - test_dir + "/${crt_current_target_arch}AsanConfig", - ] + args += [ test_dir + "/${crt_current_target_arch}AsanConfig" ] } outputs = [ "$target_gen_dir/run-lit" ] # Non-existing, so that ninja runs # it each time. diff --git a/llvm/utils/gn/secondary/libcxx/src/BUILD.gn b/llvm/utils/gn/secondary/libcxx/src/BUILD.gn index a94674a61873d..29e864957a5e1 100644 --- a/llvm/utils/gn/secondary/libcxx/src/BUILD.gn +++ b/llvm/utils/gn/secondary/libcxx/src/BUILD.gn @@ -317,13 +317,13 @@ if (libcxx_enable_experimental) { sources = [ "experimental/keep.cpp" ] if (libcxx_enable_filesystem && libcxx_enable_time_zone_database) { sources += [ + # TODO TZDB The exception could be moved in chrono once the TZDB library + # is no longer experimental. + "experimental/chrono_exception.cpp", "experimental/include/tzdb/time_zone_private.h", "experimental/include/tzdb/types_private.h", "experimental/include/tzdb/tzdb_list_private.h", "experimental/include/tzdb/tzdb_private.h", - # TODO TZDB The exception could be moved in chrono once the TZDB library - # is no longer experimental. - "experimental/chrono_exception.cpp", "experimental/time_zone.cpp", "experimental/tzdb.cpp", "experimental/tzdb_list.cpp", diff --git a/llvm/utils/gn/secondary/lld/unittests/AsLibAll/BUILD.gn b/llvm/utils/gn/secondary/lld/unittests/AsLibAll/BUILD.gn index d6af6a1c73792..6eb82ea61a1b0 100644 --- a/llvm/utils/gn/secondary/lld/unittests/AsLibAll/BUILD.gn +++ b/llvm/utils/gn/secondary/lld/unittests/AsLibAll/BUILD.gn @@ -3,8 +3,8 @@ import("//third-party/unittest/unittest.gni") unittest("LLDAsLibAllTests") { configs += [ "//llvm/utils/gn/build:lld_code" ] deps = [ - "//lld/Common", "//lld/COFF", + "//lld/Common", "//lld/ELF", "//lld/MachO", "//lld/MinGW", diff --git a/llvm/utils/gn/secondary/lld/unittests/BUILD.gn b/llvm/utils/gn/secondary/lld/unittests/BUILD.gn index c909670f4b1ba..6faaa12faad0d 100644 --- a/llvm/utils/gn/secondary/lld/unittests/BUILD.gn +++ b/llvm/utils/gn/secondary/lld/unittests/BUILD.gn @@ -5,4 +5,3 @@ group("unittests") { ] testonly = true } - diff --git a/llvm/utils/gn/secondary/lldb/test/BUILD.gn b/llvm/utils/gn/secondary/lldb/test/BUILD.gn index e903d16e338c9..369b24f97d7b1 100644 --- a/llvm/utils/gn/secondary/lldb/test/BUILD.gn +++ b/llvm/utils/gn/secondary/lldb/test/BUILD.gn @@ -164,8 +164,8 @@ group("test") { ":lit_unit_site_cfg", "//clang/tools/driver:symlinks", "//lld/tools/lld:symlinks", - "//lldb/tools/lldb-dap", "//lldb/tools/driver:lldb", + "//lldb/tools/lldb-dap", # XXX lldb-instr, darwin-debug, etc "//lldb/tools/lldb-server", diff --git a/llvm/utils/gn/secondary/llvm/include/llvm/TargetParser/BUILD.gn b/llvm/utils/gn/secondary/llvm/include/llvm/TargetParser/BUILD.gn index a71dfa518b1df..455a8265fce87 100644 --- a/llvm/utils/gn/secondary/llvm/include/llvm/TargetParser/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/include/llvm/TargetParser/BUILD.gn @@ -23,8 +23,8 @@ tablegen("RISCVTargetParserDef") { group("gen") { deps = [ - ":ARMTargetParserDef", ":AArch64TargetParserDef", + ":ARMTargetParserDef", ":RISCVTargetParserDef", ] } diff --git a/llvm/utils/gn/secondary/llvm/lib/CodeGenTypes/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/CodeGenTypes/BUILD.gn index 5df31c33a3ad0..04f819d36d581 100644 --- a/llvm/utils/gn/secondary/llvm/lib/CodeGenTypes/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/CodeGenTypes/BUILD.gn @@ -10,4 +10,3 @@ static_library("CodeGenTypes") { ] sources = [ "LowLevelType.cpp" ] } - diff --git a/llvm/utils/gn/secondary/llvm/lib/DebugInfo/BTF/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/DebugInfo/BTF/BUILD.gn index 74c1362c697b9..803dd867199de 100644 --- a/llvm/utils/gn/secondary/llvm/lib/DebugInfo/BTF/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/DebugInfo/BTF/BUILD.gn @@ -2,7 +2,7 @@ static_library("BTF") { output_name = "LLVMDebugInfoBTF" deps = [ "//llvm/lib/Support" ] sources = [ - "BTFParser.cpp", "BTFContext.cpp", + "BTFParser.cpp", ] } diff --git a/llvm/utils/gn/secondary/llvm/lib/IR/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/IR/BUILD.gn index 0f6e345b9d175..3fecf9477ee76 100644 --- a/llvm/utils/gn/secondary/llvm/lib/IR/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/IR/BUILD.gn @@ -21,6 +21,7 @@ static_library("IR") { "BasicBlock.cpp", "BuiltinGCs.cpp", "Comdat.cpp", + "ConstantFPRange.cpp", "ConstantFold.cpp", "ConstantRange.cpp", "ConstantRangeList.cpp", diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/WebAssembly/Utils/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/WebAssembly/Utils/BUILD.gn index a4a6889dcb4a5..1fba8640a2a2b 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Target/WebAssembly/Utils/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Target/WebAssembly/Utils/BUILD.gn @@ -12,7 +12,5 @@ static_library("Utils") { "//llvm/lib/Target/WebAssembly/TargetInfo", ] include_dirs = [ ".." ] - sources = [ - "WebAssemblyTypeUtilities.cpp", - ] + sources = [ "WebAssemblyTypeUtilities.cpp" ] } diff --git a/llvm/utils/gn/secondary/llvm/tools/llc/BUILD.gn b/llvm/utils/gn/secondary/llvm/tools/llc/BUILD.gn index a968760e1c2a3..8756ee512d01b 100644 --- a/llvm/utils/gn/secondary/llvm/tools/llc/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/tools/llc/BUILD.gn @@ -6,8 +6,8 @@ executable("llc") { "//llvm/lib/CodeGen/MIRParser", "//llvm/lib/CodeGen/SelectionDAG", "//llvm/lib/IR", - "//llvm/lib/IRReader", "//llvm/lib/IRPrinter", + "//llvm/lib/IRReader", "//llvm/lib/MC", "//llvm/lib/Passes", "//llvm/lib/Support", diff --git a/llvm/utils/gn/secondary/llvm/tools/llvm-dwp/BUILD.gn b/llvm/utils/gn/secondary/llvm/tools/llvm-dwp/BUILD.gn index 01f8d0f134dd2..49cccaa5b215c 100644 --- a/llvm/utils/gn/secondary/llvm/tools/llvm-dwp/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/tools/llvm-dwp/BUILD.gn @@ -1,7 +1,7 @@ import("//llvm/tools/binutils_symlinks.gni") +import("//llvm/utils/TableGen/tablegen.gni") import("//llvm/utils/gn/build/driver_executable.gni") import("//llvm/utils/gn/build/symlink_or_copy.gni") -import("//llvm/utils/TableGen/tablegen.gni") tablegen("Opts") { visibility = [ ":llvm-dwp" ] diff --git a/llvm/utils/gn/secondary/llvm/tools/llvm-libtool-darwin/BUILD.gn b/llvm/utils/gn/secondary/llvm/tools/llvm-libtool-darwin/BUILD.gn index c974cae267371..034201adf779e 100644 --- a/llvm/utils/gn/secondary/llvm/tools/llvm-libtool-darwin/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/tools/llvm-libtool-darwin/BUILD.gn @@ -1,7 +1,7 @@ import("//llvm/tools/cctools_symlinks.gni") +import("//llvm/utils/TableGen/tablegen.gni") import("//llvm/utils/gn/build/driver_executable.gni") import("//llvm/utils/gn/build/symlink_or_copy.gni") -import("//llvm/utils/TableGen/tablegen.gni") tablegen("Opts") { visibility = [ ":llvm-libtool-darwin" ] diff --git a/llvm/utils/gn/secondary/llvm/tools/llvm-ml/BUILD.gn b/llvm/utils/gn/secondary/llvm/tools/llvm-ml/BUILD.gn index b094f0e59bcce..9e3fb96861dbd 100644 --- a/llvm/utils/gn/secondary/llvm/tools/llvm-ml/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/tools/llvm-ml/BUILD.gn @@ -1,5 +1,5 @@ -import("//llvm/utils/gn/build/driver_executable.gni") import("//llvm/utils/TableGen/tablegen.gni") +import("//llvm/utils/gn/build/driver_executable.gni") tablegen("Opts") { visibility = [ ":llvm-ml" ] diff --git a/llvm/utils/gn/secondary/llvm/tools/sancov/BUILD.gn b/llvm/utils/gn/secondary/llvm/tools/sancov/BUILD.gn index ff0fd700c911a..9057072f3c095 100644 --- a/llvm/utils/gn/secondary/llvm/tools/sancov/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/tools/sancov/BUILD.gn @@ -1,5 +1,5 @@ -import("//llvm/utils/gn/build/driver_executable.gni") import("//llvm/utils/TableGen/tablegen.gni") +import("//llvm/utils/gn/build/driver_executable.gni") tablegen("Opts") { visibility = [ ":sancov" ] diff --git a/llvm/utils/gn/secondary/llvm/unittests/IR/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/IR/BUILD.gn index ba897a679db46..b19d54d7ed92f 100644 --- a/llvm/utils/gn/secondary/llvm/unittests/IR/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/unittests/IR/BUILD.gn @@ -17,6 +17,7 @@ unittest("IRTests") { "BasicBlockDbgInfoTest.cpp", "BasicBlockTest.cpp", "CFGBuilder.cpp", + "ConstantFPRangeTest.cpp", "ConstantRangeListTest.cpp", "ConstantRangeTest.cpp", "ConstantsTest.cpp", diff --git a/llvm/utils/gn/secondary/llvm/unittests/Transforms/Instrumentation/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/Transforms/Instrumentation/BUILD.gn index 27ff75b18f431..c9c59acda22ac 100644 --- a/llvm/utils/gn/secondary/llvm/unittests/Transforms/Instrumentation/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/unittests/Transforms/Instrumentation/BUILD.gn @@ -5,10 +5,10 @@ unittest("InstrumentationTests") { "//llvm/lib/Analysis", "//llvm/lib/AsmParser", "//llvm/lib/IR", - "//llvm/lib/Transforms/Instrumentation", "//llvm/lib/Passes", "//llvm/lib/Support", "//llvm/lib/Testing/Support", + "//llvm/lib/Transforms/Instrumentation", ] sources = [ "PGOInstrumentationTest.cpp" ] } diff --git a/llvm/utils/gn/secondary/llvm/utils/TableGen/BUILD.gn b/llvm/utils/gn/secondary/llvm/utils/TableGen/BUILD.gn index 2e11d25767cd0..ba52a97f39d85 100644 --- a/llvm/utils/gn/secondary/llvm/utils/TableGen/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/utils/TableGen/BUILD.gn @@ -56,17 +56,17 @@ executable("llvm-tblgen") { "InstrDocsEmitter.cpp", "InstrInfoEmitter.cpp", "MacroFusionPredicatorEmitter.cpp", - "OptParserEmitter.cpp", - "OptRSTEmitter.cpp", + "OptionParserEmitter.cpp", + "OptionRSTEmitter.cpp", "PseudoLoweringEmitter.cpp", "RegisterBankEmitter.cpp", "RegisterInfoEmitter.cpp", "SearchableTableEmitter.cpp", "SubtargetEmitter.cpp", "WebAssemblyDisassemblerEmitter.cpp", - "X86InstrMappingEmitter.cpp", "X86DisassemblerTables.cpp", "X86FoldTablesEmitter.cpp", + "X86InstrMappingEmitter.cpp", "X86MnemonicTables.cpp", "X86ModRMFilters.cpp", "X86RecognizableInstr.cpp", diff --git a/llvm/utils/split-file/split-file.cpp b/llvm/utils/split-file/split-file.cpp index 2ad04d6e42f2b..672877adaba31 100644 --- a/llvm/utils/split-file/split-file.cpp +++ b/llvm/utils/split-file/split-file.cpp @@ -123,7 +123,7 @@ static int handle(MemoryBuffer &inputBuf, StringRef input) { if (ec) fatal(input, ec.message()); auto f = std::make_unique(partPath.str(), ec, - llvm::sys::fs::OF_None); + llvm::sys::fs::OF_Text); if (!f) fatal(input, ec.message()); @@ -156,7 +156,7 @@ int main(int argc, const char **argv) { if (output.empty()) fatal("", "output directory is not specified"); ErrorOr> bufferOrErr = - MemoryBuffer::getFileOrSTDIN(input); + MemoryBuffer::getFileOrSTDIN(input, /*IsText=*/true); if (std::error_code ec = bufferOrErr.getError()) fatal(input, ec.message()); diff --git a/mlir/include/mlir/Dialect/MemRef/IR/MemRefOps.td b/mlir/include/mlir/Dialect/MemRef/IR/MemRefOps.td index 2ff9d612a5efa..c50df6ccd9aa5 100644 --- a/mlir/include/mlir/Dialect/MemRef/IR/MemRefOps.td +++ b/mlir/include/mlir/Dialect/MemRef/IR/MemRefOps.td @@ -1407,6 +1407,10 @@ def MemRef_ReinterpretCastOp "OpFoldResult":$offset, "ArrayRef":$sizes, "ArrayRef":$strides, CArg<"ArrayRef", "{}">:$attrs)>, + // Build a ReinterpretCastOp and infer the result type. + OpBuilder<(ins "Value":$source, "OpFoldResult":$offset, + "ArrayRef":$sizes, "ArrayRef":$strides, + CArg<"ArrayRef", "{}">:$attrs)>, // Build a ReinterpretCastOp with static entries. OpBuilder<(ins "MemRefType":$resultType, "Value":$source, "int64_t":$offset, "ArrayRef":$sizes, diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td index f3ca09a6a68ea..26eec0d4f2082 100644 --- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td +++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td @@ -19,12 +19,18 @@ class XeGPUAttr traits = [], let mnemonic = attrMnemonic; } -def XeGPU_TensorDescAttr: XeGPUAttr<"TensorDesc", "tdesc_attr"> { +class XeGPU_TensorDescAttr traits = [], + string baseCppClass = "::mlir::Attribute"> + : XeGPUAttr { + let assemblyFormat = "`<` struct(params) `>`"; +} + +def XeGPU_BlockTensorDescAttr: XeGPU_TensorDescAttr<"BlockTensorDesc", "block_tdesc_attr"> { let summary = [{a composite attribute for `TensorDescType`}]; - let description = [{`TensorDescAttr` (or `tdesc_attr`) is a composite + let description = [{`BlockTensorDesc` (or `block_tdesc_attr`) is a composite attribute defined for `TensorDescType` for describing following properties of a `TensorDesc`. - 1. `memory_scope`: It describes where the data block described by the + 1. `memory_space`: It describes where the data block described by the TensorDesc is located, `Global` device memory or `Shared` local memory. It is default to `Global`. 2. `array_length`: It describes how many horizontally consecutive blocks @@ -33,43 +39,63 @@ def XeGPU_TensorDescAttr: XeGPUAttr<"TensorDesc", "tdesc_attr"> { 8x32. Its default value is 1. 3. `boundary_check`: It is used to indicates the hardware whether to do out-of-boundary check. The default value is true. - 4. `scattered`: It is used to differenciate TensorDescs created from - `create_nd_tdesc` vs from `create_tdesc`. }]; let parameters = (ins - OptionalParameter<"MemoryScopeAttr">: $memory_scope, + OptionalParameter<"MemorySpaceAttr">: $memory_space, OptionalParameter<"IntegerAttr", "1">: $array_length, - OptionalParameter<"BoolAttr", "true">: $boundary_check, - OptionalParameter<"BoolAttr", "false">: $scattered + OptionalParameter<"BoolAttr", "true">: $boundary_check ); let builders = [ AttrBuilder<(ins - CArg<"xegpu::MemoryScope", "xegpu::MemoryScope::Global">:$memory_scope, + CArg<"xegpu::MemorySpace", "xegpu::MemorySpace::Global">:$memory_space, CArg<"int", "1">:$array_length, - CArg<"bool", "true">: $boundary_check, - CArg<"bool", "false">: $scattered + CArg<"bool", "true">: $boundary_check )> ]; - let assemblyFormat = "`<` struct(params) `>`"; } +def XeGPU_ScatterTensorDescAttr: XeGPU_TensorDescAttr<"ScatterTensorDesc", "scatter_tdesc_attr"> { + let summary = [{a composite attribute for `TensorDescType`}]; + let description = [{`ScatterTensorDesc` (or `scatter_tdesc_attr`) is a composite + attribute defined for `TensorDescType` for describing following + properties of a `TensorDesc`. + 1. `memory_space`: It describes where the data block described by the + TensorDesc is located, `Global` device memory or `Shared` local memory. + It is default to `Global`. + 2. `chunk_size`: indicates number of continious elements accessed for each + offset, default is 1. It is used with `scattered` attr only. + }]; + + let parameters = (ins + OptionalParameter<"MemorySpaceAttr">: $memory_space, + OptionalParameter<"IntegerAttr", "1">: $chunk_size + ); + + let builders = [ + AttrBuilder<(ins + CArg<"xegpu::MemorySpace", "xegpu::MemorySpace::Global">:$memory_space, + CArg<"int", "1">: $chunk_size + )> + ]; + } + //===----------------------------------------------------------------------===// // XeGPU Memory Scope Enums. //===----------------------------------------------------------------------===// -def XeGPU_MemoryScopeGlobal: I32EnumAttrCase<"Global", 0, "global">; -def XeGPU_MemoryScopeShared: I32EnumAttrCase<"SLM", 1, "slm">; -def XeGPU_MemoryScope: I32EnumAttr<"MemoryScope", +def XeGPU_MemorySpaceGlobal: I32EnumAttrCase<"Global", 0, "global">; +def XeGPU_MemorySpaceShared: I32EnumAttrCase<"SLM", 3, "slm">; +def XeGPU_MemorySpace: I32EnumAttr<"MemorySpace", "The address space of the memory the tensor descritor is created for", - [XeGPU_MemoryScopeGlobal, XeGPU_MemoryScopeShared]> { + [XeGPU_MemorySpaceGlobal, XeGPU_MemorySpaceShared]> { let genSpecializedAttr = 0; let cppNamespace = "::mlir::xegpu"; } -def XeGPU_MemoryScopeAttr: - EnumAttr { +def XeGPU_MemorySpaceAttr: + EnumAttr { let summary = [{Describe the location of data described by a `TensorDesc`: Global device memory (`Global`) or Shared local memory (`SLM`).}]; let assemblyFormat = "$value"; @@ -116,4 +142,4 @@ def XeGPU_FenceScopeAttr: let assemblyFormat = "$value"; } -#endif // MLIR_DIALECT_XEGPU_IR_XEGPUATTRS_TD \ No newline at end of file +#endif // MLIR_DIALECT_XEGPU_IR_XEGPUATTRS_TD diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td index c32c7541c3979..e24a056de2caf 100644 --- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td +++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td @@ -218,6 +218,23 @@ def XeGPU_CreateNdDescOp: XeGPU_Op<"create_nd_tdesc", [Pure, ViewLikeOpInterface static unsigned getOffsetSizeAndStrideStartOperandIndex() { return 1; } mlir::Value getViewSource() { return getSource(); } + + unsigned getSourceMemorySpace() { + auto srcTy = getSourceType(); + if (auto memrefTy = llvm::dyn_cast(srcTy)) { + auto attr = memrefTy.getMemorySpace(); + if (attr) { + if (auto intAttr = llvm::dyn_cast(attr)) { + return static_cast(intAttr.getInt()); + } + if (auto memSpaceAttr = llvm::dyn_cast(attr)) + return static_cast(memSpaceAttr.getValue()); + } + } + // take global as default memory scope. + return static_cast(MemorySpace::Global); + } + }]; } @@ -411,8 +428,10 @@ def XeGPU_CreateDescOp: XeGPU_Op<"create_tdesc", [Pure, ViewLikeOpInterface]> { is fixed to the hardware supportted subgroup size, e.g., 16 on PVC, implying each element in the array corresponds to a work-item (SIMT lane) in the subgroup. - * chunk_size: [optional attribute] indicates number of continious - elements accessed for each offset, default is 1. + + The first dimension of the result TensorDesc corresponds to work-items, so it should + match the dimension of offsets. It may also has a second dimension corresponding to + the chunk_size if the chunk size is larger than 1. Example 1. It assumes subgroup size is 4, and accesses a[0], a[16], a[32], a[64] ```mlir @@ -424,29 +443,22 @@ def XeGPU_CreateDescOp: XeGPU_Op<"create_tdesc", [Pure, ViewLikeOpInterface]> { It will access totally 32 data elements: a[0:7], a[16:23], a[32:39], a[64:71] ```mlir %0 = memref.alloc() : memref<1024xf32> - %1 = xegpu.create_tdesc %0[0, 16, 32, 64] {chunk_size = 8}: memref<1024xf32> -> TensorDesc<4x8xf32> + %1 = xegpu.create_tdesc %0[0, 16, 32, 64] : memref<1024xf32> -> TensorDesc<4x8xf32, chunk_size = 8> ``` Example 3. It is similar to Example 2, but there is some overlaps among workitems. It accesses: a[0:7], a[4:11], a[8:15], a[12:19] ```mlir %0 = memref.alloc() : memref<1024xf32> - %1 = xegpu.create_tdesc %0[0, 4, 8, 12] {chunk_size = 8}: memref<1024xf32> -> TensorDesc<4x8xf32> + %1 = xegpu.create_tdesc %0[0, 4, 8, 12] : memref<1024xf32> -> TensorDesc<4x8xf32, chunk_size = 8>> ``` }]; let arguments = (ins XeGPU_BaseAddrType: $source, Variadic: $offsets, - DenseI64ArrayAttr: $const_offsets, - DefaultValuedAttr: $chunk_size); + DenseI64ArrayAttr: $const_offsets); let results = (outs XeGPU_TensorDesc:$TensorDesc); - let builders = [ - OpBuilder<(ins "xegpu::TensorDescType": $TensorDesc, "Value": $source, - "llvm::ArrayRef": $offsets, - CArg<"uint32_t", "1"> : $chunk_size)>, - ]; - let assemblyFormat = [{ $source custom($offsets, $const_offsets) @@ -473,6 +485,22 @@ def XeGPU_CreateDescOp: XeGPU_Op<"create_tdesc", [Pure, ViewLikeOpInterface]> { assert(idx < getNumOffsets() && "Invalid out of bound access."); return getMixedOffsets()[idx]; } + + unsigned getSourceMemorySpace() { + auto srcTy = getSource().getType(); + if (auto memrefTy = llvm::dyn_cast(srcTy)) { + auto attr = memrefTy.getMemorySpace(); + if (attr) { + if (auto intAttr = llvm::dyn_cast(attr)) + return static_cast(intAttr.getInt()); + if (auto memSpaceAttr = llvm::dyn_cast(attr)) + return static_cast(memSpaceAttr.getValue()); + } + } + // take global as default memory scope. + return static_cast(MemorySpace::Global); + } + }]; let hasVerifier = 1; @@ -520,28 +548,31 @@ def XeGPU_LoadGatherOp : XeGPU_Op<"load", [AllRanksMatch<["value", "TensorDesc"] let description = [{ It (aka. load) load data per each work-item. The output describes the data being loaded at the subgroup level, so its size is - consistent with the number of work-items in a subgroup. When `chunk_size_per_lane` - attribute is larger than 1 in TensorDesc, the output vector will be 2D vector, - with dim-1 correspoding to the chunk size. + consistent with the number of work-items in a subgroup. When the chunk size + is larger than 2, the output vector is a 2D vector, with dim-1 correspoding + to work-items, and dim-0 corresponding to the chunk_size loaded by each work-item. + Specially, there is a transpose effect on the result (as compared to the TensorDesc) + due to the hardware implementation. Therefore, a transpose attribute is introduced + on purpose, making sure users are aware of this implicit transformation. The mask operand masks out memory access so that it is safe to pass out-of-boundary addresses/offsets as long as they are masked. It applies to slots of SIMD lanes. Example: ```mlir - %2 = xegpu.load %1, %0 {transpose = [1, 0], + %2 = xegpu.load %1, %0 {transpose, l1_hint = #xegpu.cache_hint, l2_hint = #xegpu.cache_hint, l3_hint = #xegpu.cache_hint} - : !xegpu.tensor_desc<16xf32, #xegpu.tdesc_attr>, vector<16xi1> - -> vector<16xf32> + : !xegpu.tensor_desc<16xf32, #xegpu.scatter_tdesc_attr>, + vector<16xi1> -> vector<16xf32> ``` }]; let arguments = (ins XeGPU_TensorDesc: $TensorDesc, XeGPU_MaskType: $mask, - OptionalAttr: $transpose, + OptionalAttr: $transpose, OptionalAttr: $l1_hint, OptionalAttr: $l2_hint, OptionalAttr: $l3_hint); @@ -573,11 +604,15 @@ def XeGPU_LoadGatherOp : XeGPU_Op<"load", [AllRanksMatch<["value", "TensorDesc"] let hasVerifier = 1; } -def XeGPU_StoreScatterOp : XeGPU_Op<"store", [AllShapesMatch<["value", "TensorDesc"]>, - AllElementTypesMatch<["value", "TensorDesc"]>]> { +def XeGPU_StoreScatterOp : XeGPU_Op<"store", [AllElementCountsMatch<["value", "TensorDesc"]>, + AllElementTypesMatch<["value", "TensorDesc"]>]> { let summary = "store data to scattered memory locations."; - let description = [{ It (aka. store) stores data to scattered memory locations. - It has similar semantic to `load_gather`. + let description = [{ It (aka. store) stores data to scattered memory locations. The value is + typically a 1D vector. But when the chunk size of the TensorDesc is larger than 1, it will be + a 2D vector instead. For the later case, dim-1 of the value correspods to the simd lanes + and the dim-0 of the value corresponds to the chunk_size stored per lane. So `store_scatter` + has transpose effect, which is similar to `load_gather`. Therefore, a transpose attribute is + introduced on purpose, making sure users are aware of this implicit transformation. Example: ```mlir @@ -592,6 +627,7 @@ def XeGPU_StoreScatterOp : XeGPU_Op<"store", [AllShapesMatch<["value", "TensorDe XeGPU_ValueType: $value, XeGPU_TensorDesc: $TensorDesc, XeGPU_MaskType: $mask, + OptionalAttr: $transpose, OptionalAttr: $l1_hint, OptionalAttr: $l2_hint, OptionalAttr: $l3_hint); @@ -723,7 +759,7 @@ def XeGPU_DpasOp : XeGPU_Op<"dpas", [Pure, AllElementTypesMatch<["lhs", "rhs"]>] def XeGPU_AtomicRMWOp: XeGPU_Op<"atomic_rmw", [Pure, AllElementTypesMatch<["tensorDesc", "value", "result"]>, - AllShapesMatch<["tensorDesc", "mask", "value", "result"]>]> { + AllShapesMatch<["tensorDesc", "value", "result"]>]> { let summary = "Atomic ready-modify-write operation on the TensorDesc. "; let description = [{ @@ -808,7 +844,7 @@ def XeGPU_FenceOp: XeGPU_Op<"fence", []> { 2. `Fence_scope` describes the scope of fence. "Workgroup" means that the scope would be within each workgroup. "GPU" means the scope would be across workgroups within the GPU. }]; - let arguments = (ins XeGPU_MemoryScopeAttr: $memory_kind, + let arguments = (ins XeGPU_MemorySpaceAttr: $memory_kind, XeGPU_FenceScopeAttr: $fence_scope); let assemblyFormat = [{`memory_kind` `=` `` $memory_kind `,` `fence_scope` `=` `` $fence_scope attr-dict}]; let extraClassDeclaration = extraBaseClassDeclaration; diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td index 9f101a71697b5..0ce1211664b5b 100644 --- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td +++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td @@ -48,7 +48,7 @@ def XeGPU_TensorDesc: XeGPUTypeDef<"TensorDesc", "tensor_desc", Similar to the builtin tensor, it also provides an optinal attribute to encoding the following information via the TensorDescAttr object: - * memory_scope (xegpu::MemoryScope): [optional] where the data is located, + * memory_space (xegpu::MemorySpace): [optional] where the data is located, global memory or shared memory. It is default to Global. * array_length (int): [optional] The number of contiguous blocks with size as `shape`, that will be loaded by block load at a time. It is default to 1. @@ -63,7 +63,7 @@ def XeGPU_TensorDesc: XeGPUTypeDef<"TensorDesc", "tensor_desc", element-type ::= float-type | integer-type | index-type dim-list := (static-dim-list `x`)? static-dim-list ::= decimal-literal `x` decimal-literal - attr-list = (, memory_scope = value)? (, arr_len = value)? (, boundary_check = value)? (, scattered = value)? + attr-list = (, memory_space = value)? (, arr_len = value)? (, boundary_check = value)? (, scattered = value)? ``` Examples: @@ -76,7 +76,7 @@ def XeGPU_TensorDesc: XeGPUTypeDef<"TensorDesc", "tensor_desc", xegpu.tensor_desc<8x16xf32> // A TensorDesc with 8x16 f32 elements for a memory region in shared memory space. - xegpu.tensor_desc<8x16xf32, #xegpu.tdesc_attr> + xegpu.tensor_desc<8x16xf32, #xegpu.tdesc_attr> ``` }]; @@ -88,11 +88,14 @@ def XeGPU_TensorDesc: XeGPUTypeDef<"TensorDesc", "tensor_desc", TypeBuilderWithInferredContext<(ins "llvm::ArrayRef": $shape, "mlir::Type": $elementType, - CArg<"bool", "false">: $scattered, CArg<"int", "1">: $array_length, - CArg<"xegpu::MemoryScope", "xegpu::MemoryScope::Global">:$memory_scope, - CArg<"bool", "true">: $boundary_check - )> + CArg<"bool", "true">: $boundary_check, + CArg<"xegpu::MemorySpace", "xegpu::MemorySpace::Global">:$memory_space)>, + TypeBuilderWithInferredContext<(ins + "llvm::ArrayRef": $shape, + "mlir::Type": $elementType, + CArg<"int", "1">: $chunk_size, + CArg<"xegpu::MemorySpace", "xegpu::MemorySpace::Global">:$memory_space)> ]; let extraClassDeclaration = [{ @@ -110,40 +113,58 @@ def XeGPU_TensorDesc: XeGPUTypeDef<"TensorDesc", "tensor_desc", return llvm::cast(cloneWith(getShape(), elementType)); } - TensorDescAttr getEncodingAsTensorDescAttr() const { - return llvm::dyn_cast_if_present(getEncoding()); + BlockTensorDescAttr getEncodingAsBlockTensorDescAttr() const { + return llvm::dyn_cast_if_present(getEncoding()); } - xegpu::MemoryScope getMemoryScope() const { - auto attr = getEncodingAsTensorDescAttr(); - if (attr && attr.getMemoryScope()) - return attr.getMemoryScope().getValue(); + ScatterTensorDescAttr getEncodingAsScatterTensorDescAttr() const { + return llvm::dyn_cast_if_present(getEncoding()); + } + + xegpu::MemorySpace getMemorySpace() const { + auto block_attr = getEncodingAsBlockTensorDescAttr(); + if (block_attr && block_attr.getMemorySpace()) + return block_attr.getMemorySpace().getValue(); + + auto scatter_attr = getEncodingAsScatterTensorDescAttr(); + if (scatter_attr && scatter_attr.getMemorySpace()) + return scatter_attr.getMemorySpace().getValue(); + // return default value - return MemoryScope::Global; + return MemorySpace::Global; } int getArrayLength() { - auto attr = getEncodingAsTensorDescAttr(); - if (attr && attr.getArrayLength()) - return attr.getArrayLength().getInt(); + auto attr = getEncoding(); + auto block_attr = mlir::dyn_cast_if_present(attr); + assert((!attr || block_attr) && "invalid on non BlockTensorDescAttr."); + if (block_attr && block_attr.getArrayLength()) + return block_attr.getArrayLength().getInt(); // return default value return 1; } bool getBoundaryCheck() { - auto attr = getEncodingAsTensorDescAttr(); - if (attr && attr.getBoundaryCheck()) - return attr.getBoundaryCheck().getValue(); + auto attr = getEncoding(); + auto block_attr = mlir::dyn_cast_if_present(attr); + assert((!attr || block_attr) && "invalid on non BlockTensorDescAttr."); + if (block_attr && block_attr.getBoundaryCheck()) + return block_attr.getBoundaryCheck().getValue(); // return default value return true; } - bool getScattered() { - auto attr = getEncodingAsTensorDescAttr(); - if (attr && attr.getScattered()) - return attr.getScattered().getValue(); - // return default value - return false; + bool isScattered() { + return bool(getEncodingAsScatterTensorDescAttr()); + } + + int getChunkSize() { + auto attr = getEncoding(); + auto scatter_attr = mlir::dyn_cast_if_present(attr); + assert((!attr || scatter_attr) && "invalid on non ScatterTensorDescAttr."); + if (scatter_attr && scatter_attr.getChunkSize()) + return scatter_attr.getChunkSize().getInt(); + return 1; } }]; diff --git a/mlir/lib/Conversion/VectorToLLVM/CMakeLists.txt b/mlir/lib/Conversion/VectorToLLVM/CMakeLists.txt index aef3cf467fb65..35576732c82cf 100644 --- a/mlir/lib/Conversion/VectorToLLVM/CMakeLists.txt +++ b/mlir/lib/Conversion/VectorToLLVM/CMakeLists.txt @@ -35,12 +35,9 @@ add_mlir_conversion_library(MLIRVectorToLLVMPass MLIRVectorToLLVM MLIRArmNeonDialect - MLIRArmNeonTransforms MLIRArmSMEDialect - MLIRArmSMETransforms MLIRArmSVEDialect MLIRArmSVETransforms - MLIRVectorToArmSME MLIRAMXDialect MLIRAMXTransforms MLIRX86VectorDialect diff --git a/mlir/lib/Conversion/VectorToXeGPU/VectorToXeGPU.cpp b/mlir/lib/Conversion/VectorToXeGPU/VectorToXeGPU.cpp index be1581d619a8b..fa03442765539 100644 --- a/mlir/lib/Conversion/VectorToXeGPU/VectorToXeGPU.cpp +++ b/mlir/lib/Conversion/VectorToXeGPU/VectorToXeGPU.cpp @@ -168,9 +168,8 @@ struct TransferReadLowering : public OpRewritePattern { if (isTransposeLoad) std::reverse(descShape.begin(), descShape.end()); auto descType = xegpu::TensorDescType::get( - descShape, elementType, /*scattered=*/false, /*array_length=*/1, - xegpu::MemoryScope::Global, - /*boundary_check=*/isOutOfBounds); + descShape, elementType, /*array_length=*/1, + /*boundary_check=*/isOutOfBounds, xegpu::MemorySpace::Global); xegpu::CreateNdDescOp ndDesc = createNdDescriptor(rewriter, loc, descType, @@ -212,10 +211,10 @@ struct TransferWriteLowering return rewriter.notifyMatchFailure(writeOp, "Expects identity map"); VectorType vecTy = writeOp.getVectorType(); - auto descType = xegpu::TensorDescType::get( - vecTy.getShape(), vecTy.getElementType(), - /*scattered=*/false, /*array_length=*/1, xegpu::MemoryScope::Global, - /*boundary_check=*/false); + auto descType = + xegpu::TensorDescType::get(vecTy.getShape(), vecTy.getElementType(), + /*array_length=*/1, /*boundary_check=*/false, + xegpu::MemorySpace::Global); xegpu::CreateNdDescOp ndDesc = createNdDescriptor( rewriter, loc, descType, dyn_cast>(writeOp.getSource()), diff --git a/mlir/lib/Dialect/Bufferization/IR/BufferDeallocationOpInterface.cpp b/mlir/lib/Dialect/Bufferization/IR/BufferDeallocationOpInterface.cpp index b197786c32054..51dfd84d9ac60 100644 --- a/mlir/lib/Dialect/Bufferization/IR/BufferDeallocationOpInterface.cpp +++ b/mlir/lib/Dialect/Bufferization/IR/BufferDeallocationOpInterface.cpp @@ -197,8 +197,10 @@ LogicalResult DeallocationState::getMemrefsAndConditionsToDeallocate( // that we can call extract_strided_metadata on it. if (auto unrankedMemRefTy = dyn_cast(memref.getType())) memref = builder.create( - loc, MemRefType::get({}, unrankedMemRefTy.getElementType()), memref, - 0, SmallVector{}, SmallVector{}); + loc, memref, + /*offset=*/builder.getIndexAttr(0), + /*sizes=*/ArrayRef{}, + /*strides=*/ArrayRef{}); // Use the `memref.extract_strided_metadata` operation to get the base // memref. This is needed because the same MemRef that was produced by the diff --git a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp index c332307da4d33..fa20001f66182 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp @@ -2987,10 +2987,15 @@ struct Conv1DGenerator if (!setOperKind(reduceOp)) return; auto maybeKind = getCombinerOpKind(reduceOp); - if (!maybeKind || (*maybeKind != vector::CombiningKind::ADD && + // Typically convolution will have a `Add` CombiningKind but for i1 type it + // can get strength reduced to `OR` which is also supported. This strength + // reduction logic is in `buildBinaryFn` helper in the Linalg dialect. + if (!maybeKind || ((*maybeKind != vector::CombiningKind::ADD && + *maybeKind != vector::CombiningKind::OR) && (oper != Pool || !isSupportedPoolKind(*maybeKind)))) { return; } + reductionKind = maybeKind.value(); auto rhsRank = rhsShapedType.getRank(); switch (oper) { @@ -3273,10 +3278,12 @@ struct Conv1DGenerator bindDims(ctx, n, w, f, c); lhs = promote(rewriter, loc, lhs, res.getType()); rhs = promote(rewriter, loc, rhs, res.getType()); - return rewriter.create( + auto contrationOp = rewriter.create( loc, lhs, rhs, res, /*indexingMaps=*/MapList{{n, w, c}, {c, f}, {n, w, f}}, /*iteratorTypes=*/ArrayRef{par, par, par, red}); + contrationOp.setKind(reductionKind); + return contrationOp; } // Create an outerproduct: lhs{w} * rhs{1} -> res{w} for single channel @@ -3666,6 +3673,7 @@ struct Conv1DGenerator int strideW, dilationW; Value lhsShaped, rhsShaped, resShaped; ShapedType lhsShapedType, rhsShapedType, resShapedType; + vector::CombiningKind reductionKind; // Sets oper, poolExtOp and isPoolExt for valid conv/pooling ops. // Returns true iff it is a valid conv/pooling op. @@ -3681,7 +3689,9 @@ struct Conv1DGenerator switch (numBlockArguments) { case 1: { // Will be convolution if feeder is a MulOp. - // Otherwise, if it can be pooling. + // A strength reduced version of MulOp for i1 type is AndOp which is also + // supported. Otherwise, it can be pooling. This strength reduction logic + // is in `buildBinaryFn` helper in the Linalg dialect. auto feedValIt = llvm::find_if_not(reduceOp->getOperands(), llvm::IsaPred); Operation *feedOp = (*feedValIt).getDefiningOp(); @@ -3689,7 +3699,9 @@ struct Conv1DGenerator oper = Pool; isPoolExt = true; poolExtOp = feedOp->getName().getIdentifier(); - } else if (!(isa(feedOp) && + } else if (!((isa(feedOp) || + (isa(feedOp) && + feedOp->getResultTypes()[0].isInteger(1))) && llvm::all_of(feedOp->getOperands(), [](Value v) { if (isa(v)) return true; diff --git a/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp b/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp index 9c021d3613f1c..75b9729e63648 100644 --- a/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp +++ b/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp @@ -1832,6 +1832,24 @@ void ReinterpretCastOp::build(OpBuilder &b, OperationState &result, b.getDenseI64ArrayAttr(staticStrides)); } +void ReinterpretCastOp::build(OpBuilder &b, OperationState &result, + Value source, OpFoldResult offset, + ArrayRef sizes, + ArrayRef strides, + ArrayRef attrs) { + auto sourceType = cast(source.getType()); + SmallVector staticOffsets, staticSizes, staticStrides; + SmallVector dynamicOffsets, dynamicSizes, dynamicStrides; + dispatchIndexOpFoldResults(offset, dynamicOffsets, staticOffsets); + dispatchIndexOpFoldResults(sizes, dynamicSizes, staticSizes); + dispatchIndexOpFoldResults(strides, dynamicStrides, staticStrides); + auto stridedLayout = StridedLayoutAttr::get( + b.getContext(), staticOffsets.front(), staticStrides); + auto resultType = MemRefType::get(staticSizes, sourceType.getElementType(), + stridedLayout, sourceType.getMemorySpace()); + build(b, result, resultType, source, offset, sizes, strides, attrs); +} + void ReinterpretCastOp::build(OpBuilder &b, OperationState &result, MemRefType resultType, Value source, int64_t offset, ArrayRef sizes, diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp index 24719fe748fe4..1dfbaed454c19 100644 --- a/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp +++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp @@ -30,23 +30,35 @@ void XeGPUDialect::initialize() { } //===----------------------------------------------------------------------===// -// XeGPU_TensorDescAttr +// XeGPU_BlockTensorDescAttr //===----------------------------------------------------------------------===// -TensorDescAttr TensorDescAttr::get(mlir::MLIRContext *context, - xegpu::MemoryScope memory_scope, - int array_length, bool boundary_check, - bool scattered) { - auto scopeAttr = MemoryScopeAttr::get(context, memory_scope); +BlockTensorDescAttr BlockTensorDescAttr::get(mlir::MLIRContext *context, + xegpu::MemorySpace memory_space, + int array_length, + bool boundary_check) { + auto scopeAttr = MemorySpaceAttr::get(context, memory_space); auto lengthAttr = IntegerAttr::get(IntegerType::get(context, 64), array_length); auto boundaryAttr = BoolAttr::get(context, boundary_check); - auto scatteredAttr = BoolAttr::get(context, scattered); - return Base::get(context, scopeAttr, lengthAttr, boundaryAttr, scatteredAttr); + return Base::get(context, scopeAttr, lengthAttr, boundaryAttr); +} + +//===----------------------------------------------------------------------===// +// XeGPU_ScatterTensorDescAttr +//===----------------------------------------------------------------------===// +ScatterTensorDescAttr +ScatterTensorDescAttr::get(mlir::MLIRContext *context, + xegpu::MemorySpace memory_space, int chunk_size) { + auto scopeAttr = MemorySpaceAttr::get(context, memory_space); + auto chunkSizeAttr = + IntegerAttr::get(IntegerType::get(context, 64), chunk_size); + return Base::get(context, scopeAttr, chunkSizeAttr); } //===----------------------------------------------------------------------===// // XeGPU_TensorDescType //===----------------------------------------------------------------------===// + mlir::Type TensorDescType::parse(::mlir::AsmParser &parser) { llvm::SmallVector shape; mlir::Type elementType; @@ -108,12 +120,20 @@ void TensorDescType::print(::mlir::AsmPrinter &printer) const { } TensorDescType TensorDescType::get(llvm::ArrayRef shape, - mlir::Type elementType, bool scattered, - int array_length, MemoryScope memory_scope, - bool boundary_check) { + mlir::Type elementType, int array_length, + bool boundary_check, + MemorySpace memory_space) { + auto context = elementType.getContext(); + auto attr = BlockTensorDescAttr::get(context, memory_space, array_length, + boundary_check); + return Base::get(context, shape, elementType, attr); +} + +TensorDescType TensorDescType::get(llvm::ArrayRef shape, + mlir::Type elementType, int chunk_size, + MemorySpace memory_space) { auto context = elementType.getContext(); - auto attr = TensorDescAttr::get(context, memory_scope, array_length, - boundary_check, scattered); + auto attr = ScatterTensorDescAttr::get(context, memory_space, chunk_size); return Base::get(context, shape, elementType, attr); } diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp index 9c517337a3aa5..1a7a6b3478409 100644 --- a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp +++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp @@ -124,6 +124,17 @@ LogicalResult CreateNdDescOp::verify() { bool invalidRank = false; bool invalidElemTy = false; + // Memory space of created TensorDesc should match with the source. + // Both source and TensorDesc are considered for global memory by default, + // if the memory scope attr is not specified. If source is an integer, + // it is considered as ptr to global memory. + auto srcMemorySpace = getSourceMemorySpace(); + auto tdescMemorySpace = static_cast(getType().getMemorySpace()); + if (srcMemorySpace != tdescMemorySpace) + return emitOpError("Memory space mismatch.") + << " Source: " << srcMemorySpace + << ", TensorDesc: " << tdescMemorySpace; + // check source type matches the rank if it is a memref. // It also should have the same ElementType as TensorDesc. auto memrefTy = dyn_cast(getSourceType()); @@ -152,9 +163,13 @@ LogicalResult CreateNdDescOp::verify() { return emitOpError("TensorDesc should have the same element " "type with the source if it is a memref.\n"); - if (getType().getScattered()) + if (getType().isScattered()) return emitOpError("Expects a non-scattered TensorDesc.\n"); + if (getType().getRank() == 2 && + tdescMemorySpace == static_cast(MemorySpace::SLM)) + return emitOpError("SLM is not supported for 2D Block TensorDesc.\n"); + return success(); } @@ -163,7 +178,7 @@ LogicalResult CreateNdDescOp::verify() { //===----------------------------------------------------------------------===// LogicalResult PrefetchNdOp::verify() { auto tdescTy = getTensorDescType(); - if (tdescTy.getScattered()) + if (tdescTy.isScattered()) return emitOpError("Expects a non-scattered TensorDesc.\n"); if (!isReadHintOrNone(getL1HintAttr())) @@ -188,7 +203,7 @@ LogicalResult LoadNdOp::verify() { if (tdescTy.getRank() > 2) return emitOpError("Expecting a 1D/2D TensorDesc.\n"); - if (tdescTy.getScattered()) + if (tdescTy.isScattered()) return emitOpError("Expects a non-scattered TensorDesc.\n"); if (!valueTy) @@ -228,8 +243,8 @@ LogicalResult LoadNdOp::verify() { tdescShape[axis] /= vnni_factor; tdescShape.push_back(vnni_factor); } else { - return emitWarning("Invalid Packed Attr. It is ignored (available for 2D " - "TensorDesc only)."); + emitWarning("Invalid Packed Attr. It is ignored (available for 2D " + "TensorDesc only)."); } } @@ -256,7 +271,7 @@ LogicalResult StoreNdOp::verify() { if (dstTy.getRank() > 2) return emitOpError("Expecting a 1D/2D TensorDesc.\n"); - if (dstTy.getScattered()) + if (dstTy.isScattered()) return emitOpError("Expects a non-scattered TensorDesc.\n"); if (!valTy) @@ -279,7 +294,7 @@ LogicalResult StoreNdOp::verify() { //===----------------------------------------------------------------------===// LogicalResult UpdateNdOffsetOp::verify() { auto ty = getTensorDescType(); - if (ty.getScattered()) + if (ty.isScattered()) return emitOpError("Expects a non-scattered TensorDesc.\n"); // number of offsets specified must match the rank of the tensor descriptor @@ -292,28 +307,55 @@ LogicalResult UpdateNdOffsetOp::verify() { //===----------------------------------------------------------------------===// // XeGPU_CreateDescOp //===----------------------------------------------------------------------===// -void CreateDescOp::build(OpBuilder &builder, OperationState &state, - TensorDescType TensorDesc, Value source, - llvm::ArrayRef offsets, - uint32_t chunk_size) { - llvm::SmallVector staticOffsets; - llvm::SmallVector dynamicOffsets; - dispatchIndexOpFoldResults(offsets, dynamicOffsets, staticOffsets); - build(builder, state, TensorDesc, source, dynamicOffsets, staticOffsets, - chunk_size); -} LogicalResult CreateDescOp::verify() { auto tdescTy = getTensorDescType(); - auto chunkSize = getChunkSize(); if (getRankOf(getSource()) > 1) return emitOpError( "Expecting the source is a 1D memref or pointer (uint64_t)."); - if (!tdescTy.getScattered()) + if (!tdescTy.isScattered()) return emitOpError("Expects a scattered TensorDesc.\n"); + // Memory space of created TensorDesc should match with the source. + // Both source and TensorDesc are considered for global memory by default, + // if the memory scope attr is not specified. If source is an integer, + // it is considered as ptr to global memory. + auto srcMemorySpace = getSourceMemorySpace(); + auto tdescMemorySpace = static_cast(tdescTy.getMemorySpace()); + if (srcMemorySpace != tdescMemorySpace) + return emitOpError("Memory space mismatch.") + << " Source: " << srcMemorySpace + << ", TensorDesc: " << tdescMemorySpace; + + auto chunkSize = tdescTy.getChunkSize(); + + // check chunk_size + llvm::SmallVector supportedChunkSizes = {1, 2, 3, 4, 8, + 16, 32, 64, 128, 256}; + if (!llvm::is_contained(supportedChunkSizes, chunkSize)) + return emitOpError("Invalid chunk_size. Supported values are 1, 2, 3, 4, " + "8, 16, 32, 64, 128, or 256."); + + // check total size + auto elemBits = tdescTy.getElementType().getIntOrFloatBitWidth(); + auto bitsPerLane = elemBits * chunkSize; + if (chunkSize > 1 && bitsPerLane % 32) { + // For 8-bit and 16-bit data, the hardware only supports chunk size of 1. + // For 32-bit data, the hardware can support larger larger chunk size. So + // we can bitcast 8-bit/16-bit data to 32-bit data for better performance. + // But this requires the total size is 32 bit aligned to make the + // optimization work. + return emitOpError( + "access size (chunk_size * sizeof(elemTy)) should be 32-bit aligned."); + } + + auto lscConstraints = 512 * 8; // each access is upto 512 bytes. + if (elemBits * tdescTy.getNumElements() > lscConstraints) + return emitOpError("total access size (simd_lanes * chunk_size * " + "sizeof(elemTy)) is upto 512 bytes."); + SmallVector shape({(int64_t)getNumOffsets()}); if (chunkSize != 1) shape.push_back(chunkSize); @@ -331,7 +373,7 @@ LogicalResult CreateDescOp::verify() { //===----------------------------------------------------------------------===// LogicalResult PrefetchOp::verify() { auto tdescTy = getTensorDescType(); - if (!tdescTy.getScattered()) + if (!tdescTy.isScattered()) return emitOpError("Expects a scattered TensorDesc.\n"); if (!isReadHintOrNone(getL1HintAttr())) @@ -354,7 +396,7 @@ LogicalResult LoadGatherOp::verify() { auto maskTy = getMaskType(); auto valueTy = getValueType(); - if (!tdescTy.getScattered()) + if (!tdescTy.isScattered()) return emitOpError("Expects a scattered TensorDesc.\n"); if (!isReadHintOrNone(getL1HintAttr())) @@ -379,12 +421,10 @@ LogicalResult LoadGatherOp::verify() { if (tdescShape[0] != maskShape[0]) return emitOpError("dim-0 of the Mask and TensorDesc should be the same."); - if (getTransposeAttr()) { - auto trans = getTranspose().value(); - if (tdescShape.size() < trans.size()) - emitWarning("Invalid transpose attr. It is ignored."); - else - transpose(trans, tdescShape); + if (tdescTy.getRank() == 2) { + if (!getTransposeAttr()) + return emitOpError("load_gather has to be transposed."); + transpose({1, 0}, tdescShape); } if (valueShape != tdescShape) @@ -400,7 +440,7 @@ LogicalResult LoadGatherOp::verify() { //===----------------------------------------------------------------------===// LogicalResult StoreScatterOp::verify() { auto tdescTy = getTensorDescType(); - if (!tdescTy.getScattered()) + if (!tdescTy.isScattered()) return emitOpError("Expects a scattered TensorDesc.\n"); if (!isWriteHintOrNone(getL1HintAttr())) @@ -413,11 +453,24 @@ LogicalResult StoreScatterOp::verify() { return emitOpError("invlid l3_hint: ") << getL3HintAttr(); auto maskTy = getMaskType(); + auto valueTy = getValueType(); auto maskShape = getShapeOf(maskTy); auto tdescShape = getShapeOf(tdescTy); + auto valueShape = getShapeOf(valueTy); if (tdescShape[0] != maskShape[0]) return emitOpError("dim-0 of the Mask and TensorDesc should be the same."); + if (tdescTy.getRank() == 2) { + if (!getTransposeAttr()) + return emitOpError("load_gather has to be transposed."); + transpose({1, 0}, tdescShape); + } + + if (valueShape != tdescShape) + return emitOpError("Unexpected value shape") + << "(Expected shape: " << makeString(tdescShape) + << ", Given shape: " << makeString(valueShape) << ").\n"; + return success(); } //===----------------------------------------------------------------------===// diff --git a/mlir/test/Dialect/Linalg/vectorize-convolution.mlir b/mlir/test/Dialect/Linalg/vectorize-convolution.mlir index 93e36a69567bd..7f4b9b986c81b 100644 --- a/mlir/test/Dialect/Linalg/vectorize-convolution.mlir +++ b/mlir/test/Dialect/Linalg/vectorize-convolution.mlir @@ -39,6 +39,7 @@ func.func @conv1d_nwc_4x2x8_memref(%input: memref<4x6x3xf32>, %filter: memref<1x // CHECK: %[[CONTRACT_0:.+]] = vector.contract { // CHECK-SAME: indexing_maps = [#[[INPUT_MAP]], #[[FILTER_MAP]], #[[OUTPUT_MAP]]], // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "reduction"] +// CHECK-SAME: kind = #vector.kind // CHECK-SAME: %[[V_INPUT_0]], %[[V_FILTER]], %[[V_OUTPUT_0]] // CHECK-SAME: : vector<4x1x3xf32>, vector<3x8xf32> into vector<4x1x8xf32> @@ -46,6 +47,7 @@ func.func @conv1d_nwc_4x2x8_memref(%input: memref<4x6x3xf32>, %filter: memref<1x // CHECK: %[[CONTRACT_1:.+]] = vector.contract { // CHECK-SAME: indexing_maps = [#[[INPUT_MAP]], #[[FILTER_MAP]], #[[OUTPUT_MAP]]], // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "reduction"] +// CHECK-SAME: kind = #vector.kind // CHECK-SAME: %[[V_INPUT_1]], %[[V_FILTER]], %[[V_OUTPUT_1]] // CHECK-SAME: : vector<4x1x3xf32>, vector<3x8xf32> into vector<4x1x8xf32> @@ -61,6 +63,36 @@ func.func @conv1d_nwc_4x2x8_memref(%input: memref<4x6x3xf32>, %filter: memref<1x // ----- +// This test is same as above but for i1 type with the only difference being that +// the combining kind for `vector.contract` is `OR`. +func.func @conv1d_nwc_4x2x8_memref_i1(%input: memref<4x6x3xi1>, %filter: memref<1x3x8xi1>, %output: memref<4x2x8xi1>) { + linalg.conv_1d_nwc_wcf + {dilations = dense<1> : tensor<1xi64>, strides = dense<3> : tensor<1xi64>} + ins(%input, %filter : memref<4x6x3xi1>, memref<1x3x8xi1>) + outs(%output : memref<4x2x8xi1>) + return +} +// CHECK: #[[INPUT_MAP:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)> +// CHECK: #[[FILTER_MAP:.+]] = affine_map<(d0, d1, d2, d3) -> (d3, d2)> +// CHECK: #[[OUTPUT_MAP:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)> + +// CHECK: func @conv1d_nwc_4x2x8_memref_i1 +/// w == 0, kw == 0 +// CHECK: %[[CONTRACT_0:.+]] = vector.contract { +// CHECK-SAME: indexing_maps = [#[[INPUT_MAP]], #[[FILTER_MAP]], #[[OUTPUT_MAP]]], +// CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "reduction"] +// CHECK-SAME: kind = #vector.kind +// CHECK-SAME: : vector<4x1x3xi1>, vector<3x8xi1> into vector<4x1x8xi1> + +/// w == 1, kw == 0 +// CHECK: %[[CONTRACT_1:.+]] = vector.contract { +// CHECK-SAME: indexing_maps = [#[[INPUT_MAP]], #[[FILTER_MAP]], #[[OUTPUT_MAP]]], +// CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "reduction"] +// CHECK-SAME: kind = #vector.kind +// CHECK-SAME: : vector<4x1x3xi1>, vector<3x8xi1> into vector<4x1x8xi1> + +// ----- + // The i8i8i32 case is similar to f32 case, so checking one case is enough for // test coverage. func.func @conv1d_nwc_4x2x8_i8i8i32_memref(%input: memref<4x6x3xi8>, %filter: memref<1x3x8xi8>, %output: memref<4x2x8xi32>) { @@ -299,6 +331,7 @@ func.func @conv1d_ncw_4x8x2_memref(%input: memref<4x3x6xf32>, %filter: memref<8x // CHECK: %[[CONTRACT_0:.+]] = vector.contract { // CHECK-SAME: indexing_maps = [#[[INPUT_MAP]], #[[FILTER_MAP]], #[[OUTPUT_MAP]]], // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "reduction"] +// CHECK-SAME: kind = #vector.kind // CHECK-SAME: %[[V_INPUT_0]], %[[V_FILTER]], %[[V_OUTPUT_0]] // CHECK-SAME: : vector<4x1x3xf32>, vector<3x8xf32> into vector<4x1x8xf32> @@ -306,6 +339,7 @@ func.func @conv1d_ncw_4x8x2_memref(%input: memref<4x3x6xf32>, %filter: memref<8x // CHECK: %[[CONTRACT_1:.+]] = vector.contract { // CHECK-SAME: indexing_maps = [#[[INPUT_MAP]], #[[FILTER_MAP]], #[[OUTPUT_MAP]]], // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "reduction"] +// CHECK-SAME: kind = #vector.kind // CHECK-SAME: %[[V_INPUT_1]], %[[V_FILTER]], %[[V_OUTPUT_1]] // CHECK-SAME: : vector<4x1x3xf32>, vector<3x8xf32> into vector<4x1x8xf32> @@ -324,6 +358,37 @@ func.func @conv1d_ncw_4x8x2_memref(%input: memref<4x3x6xf32>, %filter: memref<8x // ----- +// This test is same as above but for i1 type with the only difference being that +// the combining kind for `vector.contract` is `OR`. +func.func @conv1d_ncw_4x8x2_memref_i1(%input: memref<4x3x6xi1>, %filter: memref<8x3x1xi1>, %output: memref<4x8x2xi1>) { + linalg.conv_1d_ncw_fcw + {dilations = dense<1> : tensor<1xi64>, strides = dense<3> : tensor<1xi64>} + ins(%input, %filter : memref<4x3x6xi1>, memref<8x3x1xi1>) + outs(%output : memref<4x8x2xi1>) + return +} + +// CHECK: #[[INPUT_MAP:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)> +// CHECK: #[[FILTER_MAP:.+]] = affine_map<(d0, d1, d2, d3) -> (d3, d2)> +// CHECK: #[[OUTPUT_MAP:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)> + +// CHECK: func @conv1d_ncw_4x8x2_memref_i1 +/// w == 0, kw == 0 +// CHECK: vector.contract { +// CHECK-SAME: indexing_maps = [#[[INPUT_MAP]], #[[FILTER_MAP]], #[[OUTPUT_MAP]]], +// CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "reduction"] +// CHECK-SAME: kind = #vector.kind +// CHECK-SAME: : vector<4x1x3xi1>, vector<3x8xi1> into vector<4x1x8xi1> + +/// w == 1, kw == 0 +// CHECK: vector.contract { +// CHECK-SAME: indexing_maps = [#[[INPUT_MAP]], #[[FILTER_MAP]], #[[OUTPUT_MAP]]], +// CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "reduction"] +// CHECK-SAME: kind = #vector.kind +// CHECK-SAME: : vector<4x1x3xi1>, vector<3x8xi1> into vector<4x1x8xi1> + +// ----- + func.func @conv1d_ncw_4x8x2_memref(%input: memref<4x3x6xf32>, %filter: memref<8x3x2xf32>, %output: memref<4x8x2xf32>) { linalg.conv_1d_ncw_fcw {dilations = dense<2> : tensor<1xi64>, strides = dense<3> : tensor<1xi64>} diff --git a/mlir/test/Dialect/XeGPU/XeGPUOps.mlir b/mlir/test/Dialect/XeGPU/XeGPUOps.mlir index 35d44cf56a239..c1126efb6046d 100644 --- a/mlir/test/Dialect/XeGPU/XeGPUOps.mlir +++ b/mlir/test/Dialect/XeGPU/XeGPUOps.mlir @@ -24,8 +24,8 @@ gpu.func @test_create_nd_tdesc_vc_2(%src: ui64, %w : index, %h : index, %x : ind // CHECK: gpu.func @test_create_nd_tdesc_vc_3(%[[arg0:.*]]: memref<24x32xf32>) { gpu.func @test_create_nd_tdesc_vc_3(%src: memref<24x32xf32>) { - // CHECK: %[[REG:.*]] = xegpu.create_nd_tdesc %[[arg0]][0, 0] : memref<24x32xf32> -> !xegpu.tensor_desc<24x16xf32, #xegpu.tdesc_attr - %1 = xegpu.create_nd_tdesc %src[0, 0] : memref<24x32xf32> -> !xegpu.tensor_desc<24x16xf32, #xegpu.tdesc_attr> + // CHECK: %[[REG:.*]] = xegpu.create_nd_tdesc %[[arg0]][0, 0] : memref<24x32xf32> -> !xegpu.tensor_desc<24x16xf32, #xegpu.block_tdesc_attr + %1 = xegpu.create_nd_tdesc %src[0, 0] : memref<24x32xf32> -> !xegpu.tensor_desc<24x16xf32, #xegpu.block_tdesc_attr> gpu.return } @@ -36,6 +36,13 @@ gpu.func @test_create_nd_tdesc_vc_4(%src: memref<2x24x32xf32>) { gpu.return } +// CHECK: gpu.func @test_create_nd_tdesc_vc_5(%[[arg0:.*]]: memref<2x24x32xf32, 3>) { +gpu.func @test_create_nd_tdesc_vc_5(%src: memref<2x24x32xf32, 3>) { + // CHECK: %[[REG:.*]] = xegpu.create_nd_tdesc %arg0[0, 0, 0] : memref<2x24x32xf32, 3> -> !xegpu.tensor_desc<16xf32, #xegpu.block_tdesc_attr> + %1 = xegpu.create_nd_tdesc %src[0, 0, 0] : memref<2x24x32xf32, 3> -> !xegpu.tensor_desc<16xf32, #xegpu.block_tdesc_attr> + gpu.return +} + // CHECK: gpu.func @test_prefetch_nd_vc(%[[arg0:.*]]: memref<24x32xf16>) { gpu.func @test_prefetch_nd_vc(%src: memref<24x32xf16>) { // CHECK: %[[R0:.*]] = xegpu.create_nd_tdesc %[[arg0]][0, 0] : memref<24x32xf16> -> !xegpu.tensor_desc<8x16xf16> @@ -97,17 +104,24 @@ gpu.func @test_create_update_nd_tdesc_vc(%src: memref<24x32xf32>) { // CHECK: gpu.func @test_create_tdesc_vc(%[[arg0:.*]]: ui64) { gpu.func @test_create_tdesc_vc(%src: ui64) { - //CHECK: %[[R0:.*]] = xegpu.create_tdesc %arg0 [0, 8, 16, 24] {chunk_size = 2 : i64} : ui64 -> !xegpu.tensor_desc<4x2xf32, #xegpu.tdesc_attr> - %1 = xegpu.create_tdesc %src[0, 8, 16, 24] {chunk_size = 2} : ui64 -> !xegpu.tensor_desc<4x2xf32, #xegpu.tdesc_attr> + //CHECK: %[[R0:.*]] = xegpu.create_tdesc %arg0 [0, 8, 16, 24] : ui64 -> !xegpu.tensor_desc<4x2xf32, #xegpu.scatter_tdesc_attr> + %1 = xegpu.create_tdesc %src[0, 8, 16, 24] : ui64 -> !xegpu.tensor_desc<4x2xf32, #xegpu.scatter_tdesc_attr> + gpu.return +} + +// CHECK: gpu.func @test_create_tdesc_vc_1(%[[arg0:.*]]: memref) { +gpu.func @test_create_tdesc_vc_1(%src: memref) { + //CHECK: %[[R0:.*]] = xegpu.create_tdesc %arg0 [0, 8, 16, 24] : memref -> !xegpu.tensor_desc<4x2xf32, #xegpu.scatter_tdesc_attr> + %1 = xegpu.create_tdesc %src[0, 8, 16, 24] : memref -> !xegpu.tensor_desc<4x2xf32, #xegpu.scatter_tdesc_attr> gpu.return } // CHECK: gpu.func @test_prefetch_vc(%[[arg0:.*]]: ui64) { gpu.func @test_prefetch_vc(%src: ui64) { - //CHECK: %[[R0:.*]] = xegpu.create_tdesc %arg0 [0, 8, 16, 24] {chunk_size = 2 : i64} : ui64 -> !xegpu.tensor_desc<4x2xf32, #xegpu.tdesc_attr> - %1 = xegpu.create_tdesc %src[0, 8, 16, 24] {chunk_size = 2} : ui64 -> !xegpu.tensor_desc<4x2xf32, #xegpu.tdesc_attr> - // CHECK: xegpu.prefetch %[[R0]] <{l1_hint = #xegpu.cache_hint, l2_hint = #xegpu.cache_hint}> : !xegpu.tensor_desc<4x2xf32, #xegpu.tdesc_attr> - xegpu.prefetch %1 <{l1_hint = #xegpu.cache_hint, l2_hint = #xegpu.cache_hint}>: !xegpu.tensor_desc<4x2xf32, #xegpu.tdesc_attr> + //CHECK: %[[R0:.*]] = xegpu.create_tdesc %arg0 [0, 8, 16, 24] : ui64 -> !xegpu.tensor_desc<4x2xf32, #xegpu.scatter_tdesc_attr> + %1 = xegpu.create_tdesc %src[0, 8, 16, 24] : ui64 -> !xegpu.tensor_desc<4x2xf32, #xegpu.scatter_tdesc_attr> + // CHECK: xegpu.prefetch %[[R0]] <{l1_hint = #xegpu.cache_hint, l2_hint = #xegpu.cache_hint}> : !xegpu.tensor_desc<4x2xf32, #xegpu.scatter_tdesc_attr> + xegpu.prefetch %1 <{l1_hint = #xegpu.cache_hint, l2_hint = #xegpu.cache_hint}>: !xegpu.tensor_desc<4x2xf32, #xegpu.scatter_tdesc_attr> gpu.return } @@ -115,12 +129,12 @@ gpu.func @test_prefetch_vc(%src: ui64) { gpu.func @test_load_gather_vc(%src: ui64) { //CHECK: %[[cst:.*]] = arith.constant dense : vector<4xi1> %0 = arith.constant dense<1>: vector<4xi1> - //CHECK: %[[R0:.*]] = xegpu.create_tdesc %arg0 [0, 8, 16, 24] {chunk_size = 2 : i64} : ui64 -> !xegpu.tensor_desc<4x2xf32, #xegpu.tdesc_attr> - %1 = xegpu.create_tdesc %src[0, 8, 16, 24] {chunk_size = 2} : ui64 -> !xegpu.tensor_desc<4x2xf32, #xegpu.tdesc_attr> - //CHECK: %[[R1:.*]] = xegpu.load %[[R0]], %[[cst]] <{l1_hint = #xegpu.cache_hint, l2_hint = #xegpu.cache_hint}> - //CHECK-SAME: !xegpu.tensor_desc<4x2xf32, #xegpu.tdesc_attr>, vector<4xi1> -> vector<4x2xf32> - %2 = xegpu.load %1, %0 <{l1_hint = #xegpu.cache_hint, l2_hint = #xegpu.cache_hint}> - : !xegpu.tensor_desc<4x2xf32, #xegpu.tdesc_attr>, vector<4xi1> -> vector<4x2xf32> + //CHECK: %[[R0:.*]] = xegpu.create_tdesc %arg0 [0, 8, 16, 24] : ui64 -> !xegpu.tensor_desc<4x2xf32, #xegpu.scatter_tdesc_attr> + %1 = xegpu.create_tdesc %src[0, 8, 16, 24] : ui64 -> !xegpu.tensor_desc<4x2xf32, #xegpu.scatter_tdesc_attr> + //CHECK: %[[R1:.*]] = xegpu.load %[[R0]], %[[cst]] <{l1_hint = #xegpu.cache_hint, l2_hint = #xegpu.cache_hint, transpose}> + //CHECK-SAME: !xegpu.tensor_desc<4x2xf32, #xegpu.scatter_tdesc_attr>, vector<4xi1> -> vector<2x4xf32> + %2 = xegpu.load %1, %0 <{l1_hint = #xegpu.cache_hint, l2_hint = #xegpu.cache_hint, transpose}> + : !xegpu.tensor_desc<4x2xf32, #xegpu.scatter_tdesc_attr>, vector<4xi1> -> vector<2x4xf32> gpu.return } @@ -128,23 +142,23 @@ gpu.func @test_load_gather_vc(%src: ui64) { gpu.func @test_store_scatter_vc(%src: ui64) { //CHECK: %[[c0:.*]] = arith.constant dense : vector<4xi1> %0 = arith.constant dense<1>: vector<4xi1> - //CHECK: %[[c1:.*]] = arith.constant dense<2.900000e+00> : vector<4x2xf32> - %1 = arith.constant dense<2.9>: vector<4x2xf32> - //CHECK: %[[R0:.*]] = xegpu.create_tdesc %arg0 [0, 8, 16, 24] {chunk_size = 2 : i64} : ui64 -> !xegpu.tensor_desc<4x2xf32, #xegpu.tdesc_attr> - %2 = xegpu.create_tdesc %src[0, 8, 16, 24] {chunk_size = 2} : ui64 -> !xegpu.tensor_desc<4x2xf32, #xegpu.tdesc_attr> - //CHECK: xegpu.store %[[c1]], %[[R0]], %[[c0]] <{l1_hint = #xegpu.cache_hint, l2_hint = #xegpu.cache_hint}> - //CHECK-SAME: vector<4x2xf32>, !xegpu.tensor_desc<4x2xf32, #xegpu.tdesc_attr>, vector<4xi1> - xegpu.store %1, %2, %0 <{l1_hint = #xegpu.cache_hint, l2_hint = #xegpu.cache_hint}> - : vector<4x2xf32>, !xegpu.tensor_desc<4x2xf32, #xegpu.tdesc_attr>, vector<4xi1> + //CHECK: %[[c1:.*]] = arith.constant dense<2.900000e+00> : vector<2x4xf32> + %1 = arith.constant dense<2.9>: vector<2x4xf32> + //CHECK: %[[R0:.*]] = xegpu.create_tdesc %arg0 [0, 8, 16, 24] : ui64 -> !xegpu.tensor_desc<4x2xf32, #xegpu.scatter_tdesc_attr> + %2 = xegpu.create_tdesc %src[0, 8, 16, 24] : ui64 -> !xegpu.tensor_desc<4x2xf32, #xegpu.scatter_tdesc_attr> + //CHECK: xegpu.store %[[c1]], %[[R0]], %[[c0]] <{l1_hint = #xegpu.cache_hint, l2_hint = #xegpu.cache_hint, transpose}> + //CHECK-SAME: vector<2x4xf32>, !xegpu.tensor_desc<4x2xf32, #xegpu.scatter_tdesc_attr>, vector<4xi1> + xegpu.store %1, %2, %0 <{l1_hint = #xegpu.cache_hint, l2_hint = #xegpu.cache_hint, transpose}> + : vector<2x4xf32>, !xegpu.tensor_desc<4x2xf32, #xegpu.scatter_tdesc_attr>, vector<4xi1> gpu.return } // CHECK: gpu.func @test_create_update_tdesc_vc(%[[arg0:.*]]: ui64) { gpu.func @test_create_update_tdesc_vc(%src: ui64) { - //CHECK: %[[R0:.*]] = xegpu.create_tdesc %arg0 [0, 8, 16, 24] {chunk_size = 2 : i64} : ui64 -> !xegpu.tensor_desc<4x2xf32, #xegpu.tdesc_attr> - %1 = xegpu.create_tdesc %src[0, 8, 16, 24] {chunk_size = 2} : ui64 -> !xegpu.tensor_desc<4x2xf32, #xegpu.tdesc_attr> - //CHECK: %[[R1:.*]] = xegpu.update_offset %[[R0]], [32, 32, 32, 32] : !xegpu.tensor_desc<4x2xf32, #xegpu.tdesc_attr> - %2 = xegpu.update_offset %1, [32, 32, 32, 32] : !xegpu.tensor_desc<4x2xf32, #xegpu.tdesc_attr> + //CHECK: %[[R0:.*]] = xegpu.create_tdesc %arg0 [0, 8, 16, 24] : ui64 -> !xegpu.tensor_desc<4x2xf32, #xegpu.scatter_tdesc_attr> + %1 = xegpu.create_tdesc %src[0, 8, 16, 24]: ui64 -> !xegpu.tensor_desc<4x2xf32, #xegpu.scatter_tdesc_attr> + //CHECK: %[[R1:.*]] = xegpu.update_offset %[[R0]], [32, 32, 32, 32] : !xegpu.tensor_desc<4x2xf32, #xegpu.scatter_tdesc_attr> + %2 = xegpu.update_offset %1, [32, 32, 32, 32] : !xegpu.tensor_desc<4x2xf32, #xegpu.scatter_tdesc_attr> gpu.return } @@ -165,10 +179,10 @@ gpu.func @test_dpas_vc_with_packed_b(%a : vector<8x16xf16>, %b: vector<8x16x2xf1 // CHECK: gpu.func @test_atomic_rmw(%[[arg0:.*]]: ui64, %[[arg1:.*]]: vector<16xf32>, %[[arg2:.*]]: vector<16xi1>) gpu.func @test_atomic_rmw(%src: ui64, %value : vector<16xf32>, %mask : vector<16xi1>) { - //CHECK: %[[R0:.*]] = xegpu.create_tdesc %[[arg0]] [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] : ui64 -> !xegpu.tensor_desc<16xf32, #xegpu.tdesc_attr> - %1 = xegpu.create_tdesc %src[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]: ui64 -> !xegpu.tensor_desc<16xf32, #xegpu.tdesc_attr> - //CHECK: %[[R1:.*]] = xegpu.atomic_rmw addf %[[R0]], %[[arg2]], %[[arg1]] : !xegpu.tensor_desc<16xf32, #xegpu.tdesc_attr>, vector<16xi1>, vector<16xf32> -> vector<16xf32> - xegpu.atomic_rmw addf %1, %mask, %value: !xegpu.tensor_desc<16xf32, #xegpu.tdesc_attr>, vector<16xi1>, vector<16xf32> -> vector<16xf32> + //CHECK: %[[R0:.*]] = xegpu.create_tdesc %[[arg0]] [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] : ui64 -> !xegpu.tensor_desc<16xf32, #xegpu.scatter_tdesc_attr<>> + %1 = xegpu.create_tdesc %src[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]: ui64 -> !xegpu.tensor_desc<16xf32, #xegpu.scatter_tdesc_attr<>> + //CHECK: %[[R1:.*]] = xegpu.atomic_rmw addf %[[R0]], %[[arg2]], %[[arg1]] : !xegpu.tensor_desc<16xf32, #xegpu.scatter_tdesc_attr<>>, vector<16xi1>, vector<16xf32> -> vector<16xf32> + xegpu.atomic_rmw addf %1, %mask, %value: !xegpu.tensor_desc<16xf32, #xegpu.scatter_tdesc_attr<>>, vector<16xi1>, vector<16xf32> -> vector<16xf32> gpu.return } diff --git a/mlir/test/Dialect/XeGPU/invalid.mlir b/mlir/test/Dialect/XeGPU/invalid.mlir index 7ef50bb2b5fad..193dae352e370 100644 --- a/mlir/test/Dialect/XeGPU/invalid.mlir +++ b/mlir/test/Dialect/XeGPU/invalid.mlir @@ -15,6 +15,20 @@ func.func @test_create_nd_tdesc_vc_2(%src: memref<24x32xf32>) { return } +// ----- +func.func @test_create_nd_tdesc_vc_3(%src: memref<2x24x32xf32, 3>) { + // expected-error@+1 {{SLM is not supported for 2D Block TensorDesc}} + %1 = xegpu.create_nd_tdesc %src[0, 0, 0] : memref<2x24x32xf32, 3> -> !xegpu.tensor_desc<8x16xf32, #xegpu.block_tdesc_attr> + return +} + +// ----- +func.func @test_create_nd_tdesc_vc_4(%src: memref<2x24x32xf32, 3>) { + // expected-error@+1 {{Memory space mismatch}} + %1 = xegpu.create_nd_tdesc %src[0, 0, 0] : memref<2x24x32xf32, 3> -> !xegpu.tensor_desc<16xf32> + return +} + // ----- func.func @test_prefetch_nd_vc_1(%src: memref<24x32xf16>) { %1 = xegpu.create_nd_tdesc %src[0, 0] : memref<24x32xf16> -> !xegpu.tensor_desc<8x16xf16> @@ -26,10 +40,10 @@ func.func @test_prefetch_nd_vc_1(%src: memref<24x32xf16>) { // ----- func.func @test_prefetch_nd_vc_2(%src: memref<24xf16>) { %1 = xegpu.create_tdesc %src[0, 1, 2, 3, 4, 5, 6, 7] - : memref<24xf16> -> !xegpu.tensor_desc<8xf16, #xegpu.tdesc_attr> + : memref<24xf16> -> !xegpu.tensor_desc<8xf16, #xegpu.scatter_tdesc_attr<>> // expected-error@+1 {{Expects a non-scattered TensorDesc}} xegpu.prefetch_nd %1 <{l1_hint = #xegpu.cache_hint}> - : !xegpu.tensor_desc<8xf16, #xegpu.tdesc_attr> + : !xegpu.tensor_desc<8xf16, #xegpu.scatter_tdesc_attr<>> return } @@ -44,11 +58,11 @@ func.func @test_load_nd_vc_1(%src: memref<8x16xf16>) { // ----- func.func @test_load_nd_vc_2(%src: memref<16xf16>) { - %1 = xegpu.create_tdesc %src[0, 2, 4, 6, 8, 10, 12, 14] {chunk_size = 2} - : memref<16xf16> -> !xegpu.tensor_desc<8x2xf16, #xegpu.tdesc_attr> + %1 = xegpu.create_tdesc %src[0, 2, 4, 6, 8, 10, 12, 14] + : memref<16xf16> -> !xegpu.tensor_desc<8x2xf16, #xegpu.scatter_tdesc_attr> // expected-error@+1 {{Expects a non-scattered TensorDesc.}} %2 = xegpu.load_nd %1 <{l1_hint = #xegpu.cache_hint}> - : !xegpu.tensor_desc<8x2xf16, #xegpu.tdesc_attr> -> vector<8x2xf16> + : !xegpu.tensor_desc<8x2xf16, #xegpu.scatter_tdesc_attr> -> vector<8x2xf16> return } @@ -73,28 +87,28 @@ func.func @test_store_nd_vc_1(%dst: memref<24x32xf16>) { // ----- func.func @test_store_nd_vc_2(%dst: memref<16xf16>) { %1 = arith.constant dense<1.0>: vector<8x2xf16> - %2 = xegpu.create_tdesc %dst[0, 2, 4, 6, 8, 10, 12, 14] {chunk_size = 2} - : memref<16xf16> -> !xegpu.tensor_desc<8x2xf16, #xegpu.tdesc_attr> + %2 = xegpu.create_tdesc %dst[0, 2, 4, 6, 8, 10, 12, 14] + : memref<16xf16> -> !xegpu.tensor_desc<8x2xf16, #xegpu.scatter_tdesc_attr> // expected-error@+1 {{Expects a non-scattered TensorDesc}} xegpu.store_nd %1, %2 <{l1_hint = #xegpu.cache_hint}> - : vector<8x2xf16>, !xegpu.tensor_desc<8x2xf16, #xegpu.tdesc_attr> + : vector<8x2xf16>, !xegpu.tensor_desc<8x2xf16, #xegpu.scatter_tdesc_attr> return } // ----- func.func @test_update_nd_offset_1(%dst: memref<16xf16>) { - %1 = xegpu.create_tdesc %dst[0, 2, 4, 6, 8, 10, 12, 14] {chunk_size = 2} - : memref<16xf16> -> !xegpu.tensor_desc<8x2xf16, #xegpu.tdesc_attr> + %1 = xegpu.create_tdesc %dst[0, 2, 4, 6, 8, 10, 12, 14] + : memref<16xf16> -> !xegpu.tensor_desc<8x2xf16, #xegpu.scatter_tdesc_attr> // expected-error@+1 {{Expects a non-scattered TensorDesc}} - xegpu.update_nd_offset %1, [0, 2] : !xegpu.tensor_desc<8x2xf16, #xegpu.tdesc_attr> + xegpu.update_nd_offset %1, [0, 2] : !xegpu.tensor_desc<8x2xf16, #xegpu.scatter_tdesc_attr> return } // ----- func.func @test_create_tdesc_vc_1(%src: ui64) { // expected-error@+1 {{Expects a scattered TensorDesc}} - %1 = xegpu.create_tdesc %src[0, 2, 4, 6, 8, 10, 12, 14] {chunk_size = 2} - : ui64 -> !xegpu.tensor_desc<8x2xf16> + %1 = xegpu.create_tdesc %src[0, 2, 4, 6, 8, 10, 12, 14] + : ui64 -> !xegpu.tensor_desc<8xf16> return } @@ -102,7 +116,14 @@ func.func @test_create_tdesc_vc_1(%src: ui64) { func.func @test_create_tdesc_vc_2(%src: ui64) { // expected-error@+1 {{Incorrect TensorDesc shape}} %1 = xegpu.create_tdesc %src[0, 2, 4, 6, 8, 10, 12, 14] {chunk_size = 2} - : ui64 -> !xegpu.tensor_desc<8x4xf16, #xegpu.tdesc_attr> + : ui64 -> !xegpu.tensor_desc<8x4xf16, #xegpu.scatter_tdesc_attr<>> + return +} + +// ----- +func.func @test_create_tdesc_vc_1(%src: memref) { + // expected-error@+1 {{Memory space mismatch}} + %1 = xegpu.create_tdesc %src[0, 8, 16, 24] : memref -> !xegpu.tensor_desc<4x2xf32, #xegpu.scatter_tdesc_attr> return } @@ -116,9 +137,9 @@ func.func @test_prefetch_vc_1(%src: memref<24x32xf16>) { // ----- func.func @test_prefetch_vc_2(%src: ui64) { - %1 = xegpu.create_tdesc %src[0, 8, 16, 24] {chunk_size = 2} : ui64 -> !xegpu.tensor_desc<4x2xf32, #xegpu.tdesc_attr> + %1 = xegpu.create_tdesc %src[0, 8, 16, 24] : ui64 -> !xegpu.tensor_desc<4x2xf32, #xegpu.scatter_tdesc_attr> // expected-error@+1 {{invlid l1_hint: #xegpu.cache_hint}} - xegpu.prefetch %1 <{l1_hint = #xegpu.cache_hint}>: !xegpu.tensor_desc<4x2xf32, #xegpu.tdesc_attr> + xegpu.prefetch %1 <{l1_hint = #xegpu.cache_hint}>: !xegpu.tensor_desc<4x2xf32, #xegpu.scatter_tdesc_attr> return } @@ -135,11 +156,11 @@ func.func @test_load_gather_vc_1(%src: memref<24x32xf16>) { // ----- func.func @test_load_gather_vc_2(%src: ui64) { %0 = arith.constant dense<1>: vector<4xi1> - %1 = xegpu.create_tdesc %src[0, 8, 16, 24] {chunk_size = 2} : ui64 - -> !xegpu.tensor_desc<4x2xf32, #xegpu.tdesc_attr> + %1 = xegpu.create_tdesc %src[0, 8, 16, 24] : ui64 + -> !xegpu.tensor_desc<4x2xf32, #xegpu.scatter_tdesc_attr> // expected-error@+1 {{invlid l1_hint: #xegpu.cache_hint}} %2 = xegpu.load %1, %0 <{l1_hint = #xegpu.cache_hint}> - : !xegpu.tensor_desc<4x2xf32, #xegpu.tdesc_attr>, vector<4xi1> + : !xegpu.tensor_desc<4x2xf32, #xegpu.scatter_tdesc_attr>, vector<4xi1> -> vector<4x2xf32> return } @@ -159,11 +180,11 @@ func.func @test_store_scatter_vc_1(%src: memref<24x32xf32>) { func.func @test_store_scatter_vc_2(%src: ui64) { %0 = arith.constant dense<1>: vector<4xi1> %1 = arith.constant dense<2.9>: vector<4x2xf32> - %2 = xegpu.create_tdesc %src[0, 8, 16, 24] {chunk_size = 2} - : ui64 -> !xegpu.tensor_desc<4x2xf32, #xegpu.tdesc_attr> + %2 = xegpu.create_tdesc %src[0, 8, 16, 24] + : ui64 -> !xegpu.tensor_desc<4x2xf32, #xegpu.scatter_tdesc_attr> // expected-error@+1 {{invlid l1_hint: #xegpu.cache_hint}} xegpu.store %1, %2, %0 <{l1_hint = #xegpu.cache_hint}> : vector<4x2xf32>, - !xegpu.tensor_desc<4x2xf32, #xegpu.tdesc_attr>, vector<4xi1> + !xegpu.tensor_desc<4x2xf32, #xegpu.scatter_tdesc_attr>, vector<4xi1> return } @@ -182,9 +203,9 @@ func.func @test_dpas_vc_2(%a : vector<8x8x2xf16>, %b: vector<8x16x2xf16>) { } // ----- -func.func @test_atomic_rmw(%src: ui64, %value : vector<16x8xf32>, %mask : vector<16xi1>) { - %1 = xegpu.create_tdesc %src[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] {chunk_size = 8}: ui64 -> !xegpu.tensor_desc<16x8xf32, #xegpu.tdesc_attr> - // expected-error@+1 {{failed to verify that all of {tensorDesc, mask, value, result} have same shape}} - xegpu.atomic_rmw addf %1, %mask, %value: !xegpu.tensor_desc<16x8xf32, #xegpu.tdesc_attr>, vector<16xi1>, vector<16x8xf32> -> vector<16x8xf32> - gpu.return +func.func @test_atomic_rmw(%src: ui64, %value : vector<16x4xf32>, %mask : vector<16xi1>) { + %1 = xegpu.create_tdesc %src[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] : ui64 -> !xegpu.tensor_desc<16x8xf32, #xegpu.scatter_tdesc_attr> + // expected-error@+1 {{failed to verify that all of {tensorDesc, value, result} have same shape}} + xegpu.atomic_rmw addf %1, %mask, %value: !xegpu.tensor_desc<16x8xf32, #xegpu.scatter_tdesc_attr>, vector<16xi1>, vector<16x4xf32> -> vector<16x8xf32> + return } \ No newline at end of file diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel index dbc8c045eea99..140d48c8f9684 100644 --- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel @@ -1825,26 +1825,11 @@ libc_math_function( ], ) -libc_math_function( - name = "ceil", - specializations = [ - "generic", - ], -) +libc_math_function(name = "ceil") -libc_math_function( - name = "ceilf", - specializations = [ - "generic", - ], -) +libc_math_function(name = "ceilf") -libc_math_function( - name = "ceill", - specializations = [ - "generic", - ], -) +libc_math_function(name = "ceill") libc_math_function(name = "ceilf128") @@ -2126,19 +2111,9 @@ libc_math_function( ], ) -libc_math_function( - name = "floor", - specializations = [ - "generic", - ], -) +libc_math_function(name = "floor") -libc_math_function( - name = "floorf", - specializations = [ - "generic", - ], -) +libc_math_function(name = "floorf") libc_math_function(name = "floorl") @@ -2639,19 +2614,9 @@ libc_math_function(name = "rintl") libc_math_function(name = "rintf128") -libc_math_function( - name = "round", - specializations = [ - "generic", - ], -) +libc_math_function(name = "round") -libc_math_function( - name = "roundf", - specializations = [ - "generic", - ], -) +libc_math_function(name = "roundf") libc_math_function(name = "roundl") @@ -2850,19 +2815,9 @@ libc_math_function(name = "totalordermagl") libc_math_function(name = "totalordermagf128") -libc_math_function( - name = "trunc", - specializations = [ - "generic", - ], -) +libc_math_function(name = "trunc") -libc_math_function( - name = "truncf", - specializations = [ - "generic", - ], -) +libc_math_function(name = "truncf") libc_math_function(name = "truncl") diff --git a/utils/bazel/llvm-project-overlay/libc/libc_build_rules.bzl b/utils/bazel/llvm-project-overlay/libc/libc_build_rules.bzl index ec3714407cb91..f298f817af83d 100644 --- a/utils/bazel/llvm-project-overlay/libc/libc_build_rules.bzl +++ b/utils/bazel/llvm-project-overlay/libc/libc_build_rules.bzl @@ -129,7 +129,6 @@ def libc_function( def libc_math_function( name, - specializations = None, additional_deps = None): """Add a target for a math function. @@ -142,14 +141,6 @@ def libc_math_function( math function. """ additional_deps = additional_deps or [] - specializations = specializations or ["generic"] - select_map = {} - if "generic" in specializations: - select_map["//conditions:default"] = ["src/math/generic/" + name + ".cpp"] - if "aarch64" in specializations: - select_map[PLATFORM_CPU_ARM64] = ["src/math/aarch64/" + name + ".cpp"] - if "x86_64" in specializations: - select_map[PLATFORM_CPU_X86_64] = ["src/math/x86_64/" + name + ".cpp"] #TODO(michaelrj): Fix the floating point dependencies OLD_FPUTIL_DEPS = [ @@ -166,7 +157,7 @@ def libc_math_function( ] libc_function( name = name, - srcs = selects.with_or(select_map), + srcs = ["src/math/generic/" + name + ".cpp"], hdrs = ["src/math/" + name + ".h"], deps = [":__support_common"] + OLD_FPUTIL_DEPS + additional_deps, ) diff --git a/utils/bazel/llvm-project-overlay/libc/test/src/sys/socket/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/test/src/sys/socket/BUILD.bazel index 865f5e6f49617..f7bce45d07da6 100644 --- a/utils/bazel/llvm-project-overlay/libc/test/src/sys/socket/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/test/src/sys/socket/BUILD.bazel @@ -2,7 +2,7 @@ # See https://llvm.org/LICENSE.txt for license information. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -# Tests for LLVM libc string.h functions. +# Tests for LLVM libc socket.h functions. load("//libc/test:libc_test_rules.bzl", "libc_test") diff --git a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel index bbb0435837e41..62f1c2a50acf7 100644 --- a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel @@ -956,6 +956,9 @@ cc_library( ) + [ "include/llvm-c/Comdat.h", "include/llvm-c/DebugInfo.h", + "include/llvm/Analysis/ValueTracking.h", + "include/llvm/Analysis/SimplifyQuery.h", + "include/llvm/Analysis/WithCache.h", ] + [":llvm_intrinsics_headers"], copts = llvm_copts, textual_hdrs = glob(["include/llvm/IR/*.def"]), diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index 304bee99b323f..f5437245e8e13 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -12974,6 +12974,7 @@ cc_library( ":ControlFlowInterfaces", ":ConvertToLLVMInterface", ":DestinationStyleOpInterface", + ":FunctionInterfaces", ":IR", ":InferIntRangeCommon", ":InferIntRangeInterface",