diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp index a300e5b2b1dab..813d825f8b570 100644 --- a/bolt/lib/Profile/DataAggregator.cpp +++ b/bolt/lib/Profile/DataAggregator.cpp @@ -2415,17 +2415,15 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC, Fragments.insert(BF); for (const BinaryFunction *F : Fragments) { const uint64_t FuncAddr = F->getAddress(); - const auto &FragmentProbes = - llvm::make_range(ProbeMap.lower_bound(FuncAddr), - ProbeMap.lower_bound(FuncAddr + F->getSize())); - for (const auto &[OutputAddress, Probes] : FragmentProbes) { + for (const MCDecodedPseudoProbe &Probe : + ProbeMap.find(FuncAddr, FuncAddr + F->getSize())) { + const uint32_t OutputAddress = Probe.getAddress(); const uint32_t InputOffset = BAT->translate( FuncAddr, OutputAddress - FuncAddr, /*IsBranchSrc=*/true); const unsigned BlockIndex = getBlock(InputOffset).second; - for (const MCDecodedPseudoProbe &Probe : Probes) - YamlBF.Blocks[BlockIndex].PseudoProbes.emplace_back( - yaml::bolt::PseudoProbeInfo{Probe.getGuid(), Probe.getIndex(), - Probe.getType()}); + YamlBF.Blocks[BlockIndex].PseudoProbes.emplace_back( + yaml::bolt::PseudoProbeInfo{Probe.getGuid(), Probe.getIndex(), + Probe.getType()}); } } } diff --git a/bolt/lib/Profile/YAMLProfileWriter.cpp b/bolt/lib/Profile/YAMLProfileWriter.cpp index 84777741d611a..f74cf60e076d0 100644 --- a/bolt/lib/Profile/YAMLProfileWriter.cpp +++ b/bolt/lib/Profile/YAMLProfileWriter.cpp @@ -193,13 +193,10 @@ YAMLProfileWriter::convert(const BinaryFunction &BF, bool UseDFS, const uint64_t FuncAddr = BF.getAddress(); const std::pair &BlockRange = BB->getInputAddressRange(); - const auto &BlockProbes = - llvm::make_range(ProbeMap.lower_bound(FuncAddr + BlockRange.first), - ProbeMap.lower_bound(FuncAddr + BlockRange.second)); - for (const auto &[_, Probes] : BlockProbes) - for (const MCDecodedPseudoProbe &Probe : Probes) - YamlBB.PseudoProbes.emplace_back(yaml::bolt::PseudoProbeInfo{ - Probe.getGuid(), Probe.getIndex(), Probe.getType()}); + for (const MCDecodedPseudoProbe &Probe : ProbeMap.find( + FuncAddr + BlockRange.first, FuncAddr + BlockRange.second)) + YamlBB.PseudoProbes.emplace_back(yaml::bolt::PseudoProbeInfo{ + Probe.getGuid(), Probe.getIndex(), Probe.getType()}); } YamlBF.Blocks.emplace_back(YamlBB); diff --git a/bolt/lib/Rewrite/PseudoProbeRewriter.cpp b/bolt/lib/Rewrite/PseudoProbeRewriter.cpp index 886bbdbf9d686..4925b4b385d9b 100644 --- a/bolt/lib/Rewrite/PseudoProbeRewriter.cpp +++ b/bolt/lib/Rewrite/PseudoProbeRewriter.cpp @@ -143,7 +143,6 @@ void PseudoProbeRewriter::parsePseudoProbe() { if (!ProbeDecoder.buildAddress2ProbeMap( reinterpret_cast(Contents.data()), Contents.size(), GuidFilter, FuncStartAddrs)) { - ProbeDecoder.getAddress2ProbesMap().clear(); errs() << "BOLT-WARNING: fail in building Address2ProbeMap\n"; return; } @@ -156,7 +155,8 @@ void PseudoProbeRewriter::parsePseudoProbe() { ProbeDecoder.printProbesForAllAddresses(outs()); } - for (const auto &[GUID, FuncDesc] : ProbeDecoder.getGUID2FuncDescMap()) { + for (const auto &FuncDesc : ProbeDecoder.getGUID2FuncDescMap()) { + uint64_t GUID = FuncDesc.FuncGUID; if (!FuncStartAddrs.contains(GUID)) continue; BinaryFunction *BF = BC.getBinaryFunctionAtAddress(FuncStartAddrs[GUID]); @@ -174,13 +174,13 @@ void PseudoProbeRewriter::updatePseudoProbes() { AddressProbesMap &Address2ProbesMap = ProbeDecoder.getAddress2ProbesMap(); const GUIDProbeFunctionMap &GUID2Func = ProbeDecoder.getGUID2FuncDescMap(); - for (auto &AP : Address2ProbesMap) { - BinaryFunction *F = BC.getBinaryFunctionContainingAddress(AP.first); + for (MCDecodedPseudoProbe &Probe : Address2ProbesMap) { + uint64_t Address = Probe.getAddress(); + BinaryFunction *F = BC.getBinaryFunctionContainingAddress(Address); // If F is removed, eliminate all probes inside it from inline tree // Setting probes' addresses as INT64_MAX means elimination if (!F) { - for (MCDecodedPseudoProbe &Probe : AP.second) - Probe.setAddress(INT64_MAX); + Probe.setAddress(INT64_MAX); continue; } // If F is not emitted, the function will remain in the same address as its @@ -188,45 +188,36 @@ void PseudoProbeRewriter::updatePseudoProbes() { if (!F->isEmitted()) continue; - uint64_t Offset = AP.first - F->getAddress(); + uint64_t Offset = Address - F->getAddress(); const BinaryBasicBlock *BB = F->getBasicBlockContainingOffset(Offset); uint64_t BlkOutputAddress = BB->getOutputAddressRange().first; // Check if block output address is defined. // If not, such block is removed from binary. Then remove the probes from // inline tree if (BlkOutputAddress == 0) { - for (MCDecodedPseudoProbe &Probe : AP.second) - Probe.setAddress(INT64_MAX); + Probe.setAddress(INT64_MAX); continue; } - unsigned ProbeTrack = AP.second.size(); - std::list::iterator Probe = AP.second.begin(); - while (ProbeTrack != 0) { - if (Probe->isBlock()) { - Probe->setAddress(BlkOutputAddress); - } else if (Probe->isCall()) { - // A call probe may be duplicated due to ICP - // Go through output of InputOffsetToAddressMap to collect all related - // probes - auto CallOutputAddresses = BC.getIOAddressMap().lookupAll(AP.first); - auto CallOutputAddress = CallOutputAddresses.first; - if (CallOutputAddress == CallOutputAddresses.second) { - Probe->setAddress(INT64_MAX); - } else { - Probe->setAddress(CallOutputAddress->second); - CallOutputAddress = std::next(CallOutputAddress); - } - - while (CallOutputAddress != CallOutputAddresses.second) { - AP.second.push_back(*Probe); - AP.second.back().setAddress(CallOutputAddress->second); - Probe->getInlineTreeNode()->addProbes(&(AP.second.back())); - CallOutputAddress = std::next(CallOutputAddress); - } + if (Probe.isBlock()) { + Probe.setAddress(BlkOutputAddress); + } else if (Probe.isCall()) { + // A call probe may be duplicated due to ICP + // Go through output of InputOffsetToAddressMap to collect all related + // probes + auto CallOutputAddresses = BC.getIOAddressMap().lookupAll(Address); + auto CallOutputAddress = CallOutputAddresses.first; + if (CallOutputAddress == CallOutputAddresses.second) { + Probe.setAddress(INT64_MAX); + } else { + Probe.setAddress(CallOutputAddress->second); + CallOutputAddress = std::next(CallOutputAddress); + } + + while (CallOutputAddress != CallOutputAddresses.second) { + ProbeDecoder.addInjectedProbe(Probe, CallOutputAddress->second); + CallOutputAddress = std::next(CallOutputAddress); } - Probe = std::next(Probe); - ProbeTrack--; } } @@ -242,22 +233,16 @@ void PseudoProbeRewriter::updatePseudoProbes() { BinaryBlock.getName(); // scan all addresses -> correlate probe to block when print out - std::vector Addresses; - for (auto &Entry : Address2ProbesMap) - Addresses.push_back(Entry.first); - llvm::sort(Addresses); - for (uint64_t Key : Addresses) { - for (MCDecodedPseudoProbe &Probe : Address2ProbesMap[Key]) { - if (Probe.getAddress() == INT64_MAX) - outs() << "Deleted Probe: "; - else - outs() << "Address: " << format_hex(Probe.getAddress(), 8) << " "; - Probe.print(outs(), GUID2Func, true); - // print block name only if the probe is block type and undeleted. - if (Probe.isBlock() && Probe.getAddress() != INT64_MAX) - outs() << format_hex(Probe.getAddress(), 8) << " Probe is in " - << Addr2BlockNames[Probe.getAddress()] << "\n"; - } + for (MCDecodedPseudoProbe &Probe : Address2ProbesMap) { + if (Probe.getAddress() == INT64_MAX) + outs() << "Deleted Probe: "; + else + outs() << "Address: " << format_hex(Probe.getAddress(), 8) << " "; + Probe.print(outs(), GUID2Func, true); + // print block name only if the probe is block type and undeleted. + if (Probe.isBlock() && Probe.getAddress() != INT64_MAX) + outs() << format_hex(Probe.getAddress(), 8) << " Probe is in " + << Addr2BlockNames[Probe.getAddress()] << "\n"; } outs() << "=======================================\n"; } @@ -333,7 +318,7 @@ void PseudoProbeRewriter::encodePseudoProbes() { ProbeDecoder.getDummyInlineRoot(); for (auto Child = Root.getChildren().begin(); Child != Root.getChildren().end(); ++Child) - Inlinees[Child->first] = Child->second.get(); + Inlinees[Child->getInlineSite()] = &*Child; for (auto Inlinee : Inlinees) // INT64_MAX is "placeholder" of unused callsite index field in the pair @@ -359,25 +344,37 @@ void PseudoProbeRewriter::encodePseudoProbes() { EmitInt(Cur->Guid, 8); // Emit number of probes in this node uint64_t Deleted = 0; - for (MCDecodedPseudoProbe *&Probe : Cur->getProbes()) + for (MCDecodedPseudoProbe *&Probe : + llvm::make_pointer_range(Cur->getProbes())) if (Probe->getAddress() == INT64_MAX) Deleted++; LLVM_DEBUG(dbgs() << "Deleted Probes:" << Deleted << "\n"); - uint64_t ProbesSize = Cur->getProbes().size() - Deleted; + size_t InjectedProbes = ProbeDecoder.getNumInjectedProbes(Cur); + uint64_t ProbesSize = Cur->getProbes().size() - Deleted + InjectedProbes; EmitULEB128IntValue(ProbesSize); // Emit number of direct inlinees EmitULEB128IntValue(Cur->getChildren().size()); // Emit probes in this group - for (MCDecodedPseudoProbe *&Probe : Cur->getProbes()) { + for (MCDecodedPseudoProbe *&Probe : + llvm::make_pointer_range(Cur->getProbes())) { if (Probe->getAddress() == INT64_MAX) continue; EmitDecodedPseudoProbe(Probe); LastProbe = Probe; } + if (InjectedProbes) { + for (MCDecodedPseudoProbe *&Probe : + llvm::make_pointer_range(ProbeDecoder.getInjectedProbes(Cur))) { + if (Probe->getAddress() == INT64_MAX) + continue; + EmitDecodedPseudoProbe(Probe); + LastProbe = Probe; + } + } for (auto Child = Cur->getChildren().begin(); Child != Cur->getChildren().end(); ++Child) - Inlinees[Child->first] = Child->second.get(); + Inlinees[Child->getInlineSite()] = &*Child; for (const auto &Inlinee : Inlinees) { assert(Cur->Guid != 0 && "non root tree node must have nonzero Guid"); NextNodes.push_back({std::get<1>(Inlinee.first), Inlinee.second}); diff --git a/clang-tools-extra/clang-tidy/readability/ImplicitBoolConversionCheck.cpp b/clang-tools-extra/clang-tidy/readability/ImplicitBoolConversionCheck.cpp index aa115cd450c4f..968a4a55a6d79 100644 --- a/clang-tools-extra/clang-tidy/readability/ImplicitBoolConversionCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/ImplicitBoolConversionCheck.cpp @@ -66,7 +66,8 @@ bool isUnaryLogicalNotOperator(const Stmt *Statement) { void fixGenericExprCastToBool(DiagnosticBuilder &Diag, const ImplicitCastExpr *Cast, const Stmt *Parent, - ASTContext &Context) { + ASTContext &Context, + bool UseUpperCaseLiteralSuffix) { // In case of expressions like (! integer), we should remove the redundant not // operator and use inverted comparison (integer == 0). bool InvertComparison = @@ -112,9 +113,14 @@ void fixGenericExprCastToBool(DiagnosticBuilder &Diag, EndLocInsertion += " != "; } - EndLocInsertion += getZeroLiteralToCompareWithForType( + const StringRef ZeroLiteral = getZeroLiteralToCompareWithForType( Cast->getCastKind(), SubExpr->getType(), Context); + if (UseUpperCaseLiteralSuffix) + EndLocInsertion += ZeroLiteral.upper(); + else + EndLocInsertion += ZeroLiteral; + if (NeedOuterParens) { EndLocInsertion += ")"; } @@ -248,12 +254,15 @@ ImplicitBoolConversionCheck::ImplicitBoolConversionCheck( StringRef Name, ClangTidyContext *Context) : ClangTidyCheck(Name, Context), AllowIntegerConditions(Options.get("AllowIntegerConditions", false)), - AllowPointerConditions(Options.get("AllowPointerConditions", false)) {} + AllowPointerConditions(Options.get("AllowPointerConditions", false)), + UseUpperCaseLiteralSuffix( + Options.get("UseUpperCaseLiteralSuffix", false)) {} void ImplicitBoolConversionCheck::storeOptions( ClangTidyOptions::OptionMap &Opts) { Options.store(Opts, "AllowIntegerConditions", AllowIntegerConditions); Options.store(Opts, "AllowPointerConditions", AllowPointerConditions); + Options.store(Opts, "UseUpperCaseLiteralSuffix", UseUpperCaseLiteralSuffix); } void ImplicitBoolConversionCheck::registerMatchers(MatchFinder *Finder) { @@ -378,7 +387,8 @@ void ImplicitBoolConversionCheck::handleCastToBool(const ImplicitCastExpr *Cast, if (!EquivalentLiteral.empty()) { Diag << tooling::fixit::createReplacement(*Cast, EquivalentLiteral); } else { - fixGenericExprCastToBool(Diag, Cast, Parent, Context); + fixGenericExprCastToBool(Diag, Cast, Parent, Context, + UseUpperCaseLiteralSuffix); } } @@ -392,8 +402,16 @@ void ImplicitBoolConversionCheck::handleCastFromBool( if (const auto *BoolLiteral = dyn_cast(Cast->getSubExpr()->IgnoreParens())) { - Diag << tooling::fixit::createReplacement( - *Cast, getEquivalentForBoolLiteral(BoolLiteral, DestType, Context)); + + const auto EquivalentForBoolLiteral = + getEquivalentForBoolLiteral(BoolLiteral, DestType, Context); + if (UseUpperCaseLiteralSuffix) + Diag << tooling::fixit::createReplacement( + *Cast, EquivalentForBoolLiteral.upper()); + else + Diag << tooling::fixit::createReplacement(*Cast, + EquivalentForBoolLiteral); + } else { fixGenericExprCastFromBool(Diag, Cast, Context, DestType.getAsString()); } diff --git a/clang-tools-extra/clang-tidy/readability/ImplicitBoolConversionCheck.h b/clang-tools-extra/clang-tidy/readability/ImplicitBoolConversionCheck.h index 9defec91e2f78..5947f7316e67c 100644 --- a/clang-tools-extra/clang-tidy/readability/ImplicitBoolConversionCheck.h +++ b/clang-tools-extra/clang-tidy/readability/ImplicitBoolConversionCheck.h @@ -36,6 +36,7 @@ class ImplicitBoolConversionCheck : public ClangTidyCheck { const bool AllowIntegerConditions; const bool AllowPointerConditions; + const bool UseUpperCaseLiteralSuffix; }; } // namespace clang::tidy::readability diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index 1b025e8f90f7b..b001a6ad44669 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -112,6 +112,11 @@ Changes in existing checks ` check to support replacing member function calls too. +- Improved :doc:`readablility-implicit-bool-conversion + ` check + by adding the option `UseUpperCaseLiteralSuffix` to select the + case of the literal suffix in fixes. + - Improved :doc:`readability-redundant-smartptr-get ` check to remove `->`, when redundant `get()` is removed. diff --git a/clang-tools-extra/docs/clang-tidy/checks/readability/implicit-bool-conversion.rst b/clang-tools-extra/docs/clang-tidy/checks/readability/implicit-bool-conversion.rst index 1ab21ffeb4228..88cff387f4c16 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/readability/implicit-bool-conversion.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/readability/implicit-bool-conversion.rst @@ -133,3 +133,17 @@ Options When `true`, the check will allow conditional pointer conversions. Default is `false`. + +.. option:: UseUpperCaseLiteralSuffix + + When `true`, the replacements will use an uppercase literal suffix in the + provided fixes. Default is `false`. + + Example + + .. code-block:: c++ + + uint32_t foo; + if (foo) {} + // ^ propose replacement default: if (foo != 0u) {} + // ^ propose replacement with option `UseUpperCaseLiteralSuffix`: if (foo != 0U) {} diff --git a/clang-tools-extra/test/clang-tidy/checkers/readability/implicit-bool-conversion.c b/clang-tools-extra/test/clang-tidy/checkers/readability/implicit-bool-conversion.c index a8c69858f76b6..f3dc32c10d640 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/readability/implicit-bool-conversion.c +++ b/clang-tools-extra/test/clang-tidy/checkers/readability/implicit-bool-conversion.c @@ -1,4 +1,8 @@ // RUN: %check_clang_tidy %s readability-implicit-bool-conversion %t -- -- -std=c23 +// RUN: %check_clang_tidy -check-suffix=UPPER-CASE %s readability-implicit-bool-conversion %t -- \ +// RUN: -config='{CheckOptions: { \ +// RUN: readability-implicit-bool-conversion.UseUpperCaseLiteralSuffix: true \ +// RUN: }}' -- -std=c23 #undef NULL #define NULL 0L @@ -95,6 +99,7 @@ void implicitConversionFromBoolLiterals() { functionTakingUnsignedLong(false); // CHECK-MESSAGES: :[[@LINE-1]]:30: warning: implicit conversion 'bool' -> 'unsigned long' // CHECK-FIXES: functionTakingUnsignedLong(0u); + // CHECK-FIXES-UPPER-CASE: functionTakingUnsignedLong(0U); functionTakingSignedChar(true); // CHECK-MESSAGES: :[[@LINE-1]]:28: warning: implicit conversion 'bool' -> 'signed char' @@ -103,6 +108,7 @@ void implicitConversionFromBoolLiterals() { functionTakingFloat(false); // CHECK-MESSAGES: :[[@LINE-1]]:23: warning: implicit conversion 'bool' -> 'float' // CHECK-FIXES: functionTakingFloat(0.0f); + // CHECK-FIXES-UPPER-CASE: functionTakingFloat(0.0F); functionTakingDouble(true); // CHECK-MESSAGES: :[[@LINE-1]]:24: warning: implicit conversion 'bool' -> 'double' @@ -160,11 +166,13 @@ void implicitConversionToBoolSimpleCases() { functionTakingBool(unsignedLong); // CHECK-MESSAGES: :[[@LINE-1]]:22: warning: implicit conversion 'unsigned long' -> 'bool' // CHECK-FIXES: functionTakingBool(unsignedLong != 0u); + // CHECK-FIXES-UPPER-CASE: functionTakingBool(unsignedLong != 0U); float floating = 0.0f; functionTakingBool(floating); // CHECK-MESSAGES: :[[@LINE-1]]:22: warning: implicit conversion 'float' -> 'bool' // CHECK-FIXES: functionTakingBool(floating != 0.0f); + // CHECK-FIXES-UPPER-CASE: functionTakingBool(floating != 0.0F); double doubleFloating = 1.0f; functionTakingBool(doubleFloating); @@ -194,6 +202,7 @@ void implicitConversionToBoolInSingleExpressions() { boolComingFromFloat = floating; // CHECK-MESSAGES: :[[@LINE-1]]:25: warning: implicit conversion 'float' -> 'bool' // CHECK-FIXES: boolComingFromFloat = (floating != 0.0f); + // CHECK-FIXES-UPPER-CASE: boolComingFromFloat = (floating != 0.0F); signed char character = 'a'; bool boolComingFromChar; @@ -288,6 +297,7 @@ void implicitConversionToBoolFromUnaryMinusAndZeroLiterals() { functionTakingBool(-0.0f); // CHECK-MESSAGES: :[[@LINE-1]]:22: warning: implicit conversion 'float' -> 'bool' // CHECK-FIXES: functionTakingBool((-0.0f) != 0.0f); + // CHECK-FIXES-UPPER-CASE: functionTakingBool((-0.0f) != 0.0F); functionTakingBool(-0.0); // CHECK-MESSAGES: :[[@LINE-1]]:22: warning: implicit conversion 'double' -> 'bool' diff --git a/clang-tools-extra/test/clang-tidy/checkers/readability/implicit-bool-conversion.cpp b/clang-tools-extra/test/clang-tidy/checkers/readability/implicit-bool-conversion.cpp index d6e7dcc4d8867..c4b7a77b92f0a 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/readability/implicit-bool-conversion.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/readability/implicit-bool-conversion.cpp @@ -1,4 +1,8 @@ // RUN: %check_clang_tidy %s readability-implicit-bool-conversion %t +// RUN: %check_clang_tidy -check-suffix=UPPER-CASE %s readability-implicit-bool-conversion %t -- \ +// RUN: -config='{CheckOptions: { \ +// RUN: readability-implicit-bool-conversion.UseUpperCaseLiteralSuffix: true \ +// RUN: }}' // We need NULL macro, but some buildbots don't like including header // This is a portable way of getting it to work @@ -99,6 +103,7 @@ void implicitConversionFromBoolLiterals() { functionTaking(false); // CHECK-MESSAGES: :[[@LINE-1]]:33: warning: implicit conversion 'bool' -> 'unsigned long' // CHECK-FIXES: functionTaking(0u); + // CHECK-FIXES-UPPER-CASE: functionTaking(0U); functionTaking(true); // CHECK-MESSAGES: :[[@LINE-1]]:31: warning: implicit conversion 'bool' -> 'signed char' @@ -107,6 +112,7 @@ void implicitConversionFromBoolLiterals() { functionTaking(false); // CHECK-MESSAGES: :[[@LINE-1]]:25: warning: implicit conversion 'bool' -> 'float' // CHECK-FIXES: functionTaking(0.0f); + // CHECK-FIXES-UPPER-CASE: functionTaking(0.0F); functionTaking(true); // CHECK-MESSAGES: :[[@LINE-1]]:26: warning: implicit conversion 'bool' -> 'double' @@ -178,11 +184,13 @@ void implicitConversionToBoolSimpleCases() { functionTaking(unsignedLong); // CHECK-MESSAGES: :[[@LINE-1]]:24: warning: implicit conversion 'unsigned long' -> 'bool' // CHECK-FIXES: functionTaking(unsignedLong != 0u); + // CHECK-FIXES-UPPER-CASE: functionTaking(unsignedLong != 0U); float floating = 0.0f; functionTaking(floating); // CHECK-MESSAGES: :[[@LINE-1]]:24: warning: implicit conversion 'float' -> 'bool' // CHECK-FIXES: functionTaking(floating != 0.0f); + // CHECK-FIXES-UPPER-CASE: functionTaking(floating != 0.0F); double doubleFloating = 1.0f; functionTaking(doubleFloating); @@ -215,6 +223,7 @@ void implicitConversionToBoolInSingleExpressions() { bool boolComingFromFloat = floating; // CHECK-MESSAGES: :[[@LINE-1]]:30: warning: implicit conversion 'float' -> 'bool' // CHECK-FIXES: bool boolComingFromFloat = floating != 0.0f; + // CHECK-FIXES-UPPER-CASE: bool boolComingFromFloat = floating != 0.0F; signed char character = 'a'; bool boolComingFromChar = character; @@ -240,6 +249,7 @@ void implicitConversionToBoolInComplexExpressions() { bool boolComingFromFloating = floating - 0.3f || boolean; // CHECK-MESSAGES: :[[@LINE-1]]:33: warning: implicit conversion 'float' -> 'bool' // CHECK-FIXES: bool boolComingFromFloating = ((floating - 0.3f) != 0.0f) || boolean; + // CHECK-FIXES-UPPER-CASE: bool boolComingFromFloating = ((floating - 0.3f) != 0.0F) || boolean; double doubleFloating = 0.3; bool boolComingFromDoubleFloating = (doubleFloating - 0.4) && boolean; @@ -257,6 +267,7 @@ void implicitConversionInNegationExpressions() { bool boolComingFromNegatedFloat = ! floating; // CHECK-MESSAGES: :[[@LINE-1]]:39: warning: implicit conversion 'float' -> 'bool' // CHECK-FIXES: bool boolComingFromNegatedFloat = floating == 0.0f; + // CHECK-FIXES-UPPER-CASE: bool boolComingFromNegatedFloat = floating == 0.0F; signed char character = 'a'; bool boolComingFromNegatedChar = (! character); @@ -284,6 +295,7 @@ void implicitConversionToBoolInControlStatements() { while (floating) {} // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: implicit conversion 'float' -> 'bool' // CHECK-FIXES: while (floating != 0.0f) {} + // CHECK-FIXES-UPPER-CASE: while (floating != 0.0F) {} double doubleFloating = 0.4; do {} while (doubleFloating); @@ -296,6 +308,7 @@ bool implicitConversionToBoolInReturnValue() { return floating; // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: implicit conversion 'float' -> 'bool' // CHECK-FIXES: return floating != 0.0f; + // CHECK-FIXES-UPPER-CASE: return floating != 0.0F; } void implicitConversionToBoolFromLiterals() { @@ -355,6 +368,7 @@ void implicitConversionToBoolFromUnaryMinusAndZeroLiterals() { functionTaking(-0.0f); // CHECK-MESSAGES: :[[@LINE-1]]:24: warning: implicit conversion 'float' -> 'bool' // CHECK-FIXES: functionTaking((-0.0f) != 0.0f); + // CHECK-FIXES-UPPER-CASE: functionTaking((-0.0f) != 0.0F); functionTaking(-0.0); // CHECK-MESSAGES: :[[@LINE-1]]:24: warning: implicit conversion 'double' -> 'bool' diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 2c6c7e083b9c9..2c29d49ba20f0 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -173,6 +173,13 @@ Non-comprehensive list of changes in this release New Compiler Flags ------------------ +- The ``-fc++-static-destructors={all,thread-local,none}`` flag was + added to control which C++ variables have static destructors + registered: all (the default) does so for all variables, thread-local + only for thread-local variables, and none (which corresponds to the + existing ``-fno-c++-static-destructors`` flag) skips all static + destructors registration. + Deprecated Compiler Flags ------------------------- @@ -309,6 +316,8 @@ Bug Fixes to C++ Support template depth than the friend function template. (#GH98258) - Clang now rebuilds the template parameters of out-of-line declarations and specializations in the context of the current instantiation in all cases. +- Fix evaluation of the index of dependent pack indexing expressions/types specifiers (#GH105900) +- Correctly handle subexpressions of an immediate invocation in the presence of implicit casts. (#GH105558) Bug Fixes to AST Handling ^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/clang/docs/analyzer/checkers.rst b/clang/docs/analyzer/checkers.rst index 0bfbc995579d4..89a1018e14c0e 100644 --- a/clang/docs/analyzer/checkers.rst +++ b/clang/docs/analyzer/checkers.rst @@ -571,7 +571,7 @@ if ((y = make_int())) { nullability ^^^^^^^^^^^ -Objective C checkers that warn for null pointer passing and dereferencing errors. +Checkers (mostly Objective C) that warn for null pointer passing and dereferencing errors. .. _nullability-NullPassedToNonnull: @@ -588,8 +588,8 @@ Warns when a null pointer is passed to a pointer which has a _Nonnull type. .. _nullability-NullReturnedFromNonnull: -nullability.NullReturnedFromNonnull (ObjC) -"""""""""""""""""""""""""""""""""""""""""" +nullability.NullReturnedFromNonnull (C, C++, ObjC) +"""""""""""""""""""""""""""""""""""""""""""""""""" Warns when a null pointer is returned from a function that has _Nonnull return type. .. code-block:: objc @@ -604,6 +604,22 @@ Warns when a null pointer is returned from a function that has _Nonnull return t return result; } +Warns when a null pointer is returned from a function annotated with ``__attribute__((returns_nonnull))`` + +.. code-block:: cpp + + int global; + __attribute__((returns_nonnull)) void* getPtr(void* p); + + void* getPtr(void* p) { + if (p) { // forgot to negate the condition + return &global; + } + // Warning: nullptr returned from a function that is expected + // to return a non-null value + return p; + } + .. _nullability-NullableDereferenced: nullability.NullableDereferenced (ObjC) diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index df36a2163b9f0..c2b9d7cb93c30 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -1592,6 +1592,12 @@ succeeds but Clang emits a warning specifying that the function is deprecated. Finally, if Clang is instructed to compile code for macOS 10.7, the call fails because ``f()`` is no longer available. +Clang is instructed to compile code for a minimum deployment version using +the ``-target`` or ``-mtargetos`` command line arguments. For example, +macOS 10.7 would be specified as ``-target x86_64-apple-macos10.7`` or +``-mtargetos=macos10.7``. Variants like Mac Catalyst are specified as +``-target arm64-apple-ios15.0-macabi`` or ``-mtargetos=ios15.0-macabi`` + The availability attribute is a comma-separated list starting with the platform name and then including clauses specifying important milestones in the declaration's lifetime (in any order) along with additional information. Those @@ -1636,41 +1642,61 @@ the implicitly inferred availability attributes. If no availability attribute specifies availability for the current target platform, the availability attributes are ignored. Supported platforms are: -``ios`` - Apple's iOS operating system. The minimum deployment target is specified - as part of the ``-target *arch*-apple-ios*version*`` command line argument. - Alternatively, it can be specified by the ``-mtargetos=ios*version*`` - command-line argument. +``iOS`` +``macOS`` +``tvOS`` +``watchOS`` +``iOSApplicationExtension`` +``macOSApplicationExtension`` +``tvOSApplicationExtension`` +``watchOSApplicationExtension`` +``macCatalyst`` +``macCatalystApplicationExtension`` +``visionOS`` +``visionOSApplicationExtension`` +``driverkit`` +``swift`` +``android`` +``fuchsia`` +``ohos`` +``zos`` +``ShaderModel`` -``macos`` - Apple's macOS operating system. The minimum deployment target is specified - as part of the ``-target *arch*-apple-macos*version*`` command line argument. - Alternatively, it can be specified by the ``-mtargetos=macos*version*`` - command-line argument. ``macosx`` is supported for - backward-compatibility reasons, but it is deprecated. +Some platforms have alias names: +``ios`` +``macos`` +``macosx (deprecated)`` ``tvos`` - Apple's tvOS operating system. The minimum deployment target is specified - as part of the ``-target *arch*-apple-tvos*version*`` command line argument. - Alternatively, it can be specified by the ``-mtargetos=tvos*version*`` - command-line argument. - ``watchos`` - Apple's watchOS operating system. The minimum deployment target is specified - as part of the ``-target *arch*-apple-watchos*version*`` command line argument. - Alternatively, it can be specified by the ``-mtargetos=watchos*version*`` - command-line argument. - +``ios_app_extension`` +``macos_app_extension`` +``macosx_app_extension (deprecated)`` +``tvos_app_extension`` +``watchos_app_extension`` +``maccatalyst`` +``maccatalyst_app_extension`` ``visionos`` - Apple's visionOS operating system. The minimum deployment target is specified - as part of the ``-target *arch*-apple-visionos*version*`` command line argument. - Alternatively, it can be specified by the ``-mtargetos=visionos*version*`` - command-line argument. - -``driverkit`` - Apple's DriverKit userspace kernel extensions. The minimum deployment target - is specified as part of the ``-target *arch*-apple-driverkit*version*`` - command line argument. +``visionos_app_extension`` +``shadermodel`` + +Supported environment names for the ShaderModel platform: + +``pixel`` +``vertex`` +``geometry`` +``hull`` +``domain`` +``compute`` +``raygeneration`` +``intersection`` +``anyhit`` +``closesthit`` +``miss`` +``callable`` +``mesh`` +``amplification`` +``library`` A declaration can typically be used even when deploying back to a platform version prior to when the declaration was introduced. When this happens, the diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def index 956d9a2d2434c..fd3346d29f26a 100644 --- a/clang/include/clang/Basic/LangOptions.def +++ b/clang/include/clang/Basic/LangOptions.def @@ -464,7 +464,9 @@ LANGOPT(FixedPoint, 1, 0, "fixed point types") LANGOPT(PaddingOnUnsignedFixedPoint, 1, 0, "unsigned fixed point types having one extra padding bit") -LANGOPT(RegisterStaticDestructors, 1, 1, "Register C++ static destructors") +ENUM_LANGOPT(RegisterStaticDestructors, RegisterStaticDestructorsKind, 2, + RegisterStaticDestructorsKind::All, + "Register C++ static destructors") LANGOPT(RegCall4, 1, 0, "Set __regcall4 as a default calling convention to respect __regcall ABI v.4") diff --git a/clang/include/clang/Basic/LangOptions.h b/clang/include/clang/Basic/LangOptions.h index 1c80ee89837cb..51a34686ad7e1 100644 --- a/clang/include/clang/Basic/LangOptions.h +++ b/clang/include/clang/Basic/LangOptions.h @@ -458,6 +458,16 @@ class LangOptionsBase { CX_None }; + /// Controls which variables have static destructors registered. + enum class RegisterStaticDestructorsKind { + /// Register static destructors for all variables. + All, + /// Register static destructors only for thread-local variables. + ThreadLocal, + /// Don't register static destructors for any variables. + None, + }; + // Define simple language options (with no accessors). #define LANGOPT(Name, Bits, Default, Description) unsigned Name : Bits; #define ENUM_LANGOPT(Name, Type, Bits, Default, Description) diff --git a/clang/include/clang/Basic/TokenKinds.def b/clang/include/clang/Basic/TokenKinds.def index d683106bb0e29..212c1f6ff3a12 100644 --- a/clang/include/clang/Basic/TokenKinds.def +++ b/clang/include/clang/Basic/TokenKinds.def @@ -660,6 +660,9 @@ KEYWORD(out , KEYHLSL) #define HLSL_INTANGIBLE_TYPE(Name, Id, SingletonId) KEYWORD(Name, KEYHLSL) #include "clang/Basic/HLSLIntangibleTypes.def" +// HLSL Type traits. +TYPE_TRAIT_2(__builtin_hlsl_is_scalarized_layout_compatible, IsScalarizedLayoutCompatible, KEYHLSL) + // OpenMP Type Traits UNARY_EXPR_OR_TYPE_TRAIT(__builtin_omp_required_simd_align, OpenMPRequiredSimdAlign, KEYALL) diff --git a/clang/include/clang/Driver/Driver.h b/clang/include/clang/Driver/Driver.h index 84eadd42880a5..9177d56718ee7 100644 --- a/clang/include/clang/Driver/Driver.h +++ b/clang/include/clang/Driver/Driver.h @@ -379,8 +379,7 @@ class Driver { /// Takes the path to a binary that's either in bin/ or lib/ and returns /// the path to clang's resource directory. - static std::string GetResourcesPath(StringRef BinaryPath, - StringRef CustomResourceDir = ""); + static std::string GetResourcesPath(StringRef BinaryPath); Driver(StringRef ClangExecutable, StringRef TargetTriple, DiagnosticsEngine &Diags, std::string Title = "clang LLVM compiler", diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 1b9b3f2c6600a..83cf753e82484 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -2302,11 +2302,18 @@ defm fixed_point : BoolFOption<"fixed-point", PosFlag, NegFlag, BothFlags<[], [ClangOption], " fixed point types">>; -defm cxx_static_destructors : BoolFOption<"c++-static-destructors", - LangOpts<"RegisterStaticDestructors">, DefaultTrue, - NegFlag, - PosFlag>; +def cxx_static_destructors_EQ : Joined<["-"], "fc++-static-destructors=">, Group, + HelpText<"Controls which variables C++ static destructors are registered for">, + Values<"all,thread-local,none">, + NormalizedValues<["All", "ThreadLocal", "None"]>, + NormalizedValuesScope<"LangOptions::RegisterStaticDestructorsKind">, + MarshallingInfoEnum, "All">, + Visibility<[ClangOption, CC1Option]>; +def cxx_static_destructors : Flag<["-"], "fc++-static-destructors">, Group, + Alias, AliasArgs<["all"]>; +def no_cxx_static_destructors : Flag<["-"], "fno-c++-static-destructors">, Group, + Alias, AliasArgs<["none"]>, + HelpText<"Disable C++ static destructor registration">; def fsymbol_partition_EQ : Joined<["-"], "fsymbol-partition=">, Group, Visibility<[ClangOption, CC1Option]>, MarshallingInfoString>; diff --git a/clang/include/clang/ExtractAPI/DeclarationFragments.h b/clang/include/clang/ExtractAPI/DeclarationFragments.h index 535da90b98284..4ac744459031e 100644 --- a/clang/include/clang/ExtractAPI/DeclarationFragments.h +++ b/clang/include/clang/ExtractAPI/DeclarationFragments.h @@ -411,9 +411,9 @@ class DeclarationFragmentsBuilder { /// Build DeclarationFragments for a macro. /// /// \param Name name of the macro. - /// \param MD the associated MacroDirective. + /// \param MI the associated MacroInfo. static DeclarationFragments getFragmentsForMacro(StringRef Name, - const MacroDirective *MD); + const MacroInfo *MI); /// Build DeclarationFragments for a typedef \p TypedefNameDecl. static DeclarationFragments diff --git a/clang/include/clang/ExtractAPI/ExtractAPIVisitor.h b/clang/include/clang/ExtractAPI/ExtractAPIVisitor.h index 67659f5a25037..b09b8b44d9aba 100644 --- a/clang/include/clang/ExtractAPI/ExtractAPIVisitor.h +++ b/clang/include/clang/ExtractAPI/ExtractAPIVisitor.h @@ -213,7 +213,7 @@ class ExtractAPIVisitorBase : public RecursiveASTVisitor { StringRef getOwningModuleName(const Decl &D) { if (auto *OwningModule = D.getImportedOwningModule()) - return OwningModule->Name; + return OwningModule->getTopLevelModule()->Name; return {}; } diff --git a/clang/include/clang/Sema/SemaHLSL.h b/clang/include/clang/Sema/SemaHLSL.h index 3aae3383c215b..5277fb57a2334 100644 --- a/clang/include/clang/Sema/SemaHLSL.h +++ b/clang/include/clang/Sema/SemaHLSL.h @@ -61,6 +61,9 @@ class SemaHLSL : public SemaBase { void handleParamModifierAttr(Decl *D, const ParsedAttr &AL); bool CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall); + + // HLSL Type trait implementations + bool IsScalarizedLayoutCompatible(QualType T1, QualType T2) const; }; } // namespace clang diff --git a/clang/lib/AST/ByteCode/EvalEmitter.cpp b/clang/lib/AST/ByteCode/EvalEmitter.cpp index 53ec8f52d4921..3b9e5f9f9f69c 100644 --- a/clang/lib/AST/ByteCode/EvalEmitter.cpp +++ b/clang/lib/AST/ByteCode/EvalEmitter.cpp @@ -219,7 +219,7 @@ bool EvalEmitter::emitRetValue(const SourceInfo &Info) { return false; if (std::optional APV = - Ptr.toRValue(S.getCtx(), EvalResult.getSourceType())) { + Ptr.toRValue(S.getASTContext(), EvalResult.getSourceType())) { EvalResult.setValue(*APV); return true; } diff --git a/clang/lib/AST/ByteCode/Interp.cpp b/clang/lib/AST/ByteCode/Interp.cpp index aea303f0e630c..09d3f4525138e 100644 --- a/clang/lib/AST/ByteCode/Interp.cpp +++ b/clang/lib/AST/ByteCode/Interp.cpp @@ -326,7 +326,7 @@ bool CheckConstant(InterpState &S, CodePtr OpPC, const Descriptor *Desc) { auto IsConstType = [&S](const VarDecl *VD) -> bool { QualType T = VD->getType(); - if (T.isConstant(S.getCtx())) + if (T.isConstant(S.getASTContext())) return true; if (S.getLangOpts().CPlusPlus && !S.getLangOpts().CPlusPlus11) @@ -523,9 +523,9 @@ bool CheckGlobalInitialized(InterpState &S, CodePtr OpPC, const Pointer &Ptr) { assert(S.getLangOpts().CPlusPlus); const auto *VD = cast(Ptr.getDeclDesc()->asValueDecl()); if ((!VD->hasConstantInitialization() && - VD->mightBeUsableInConstantExpressions(S.getCtx())) || + VD->mightBeUsableInConstantExpressions(S.getASTContext())) || (S.getLangOpts().OpenCL && !S.getLangOpts().CPlusPlus11 && - !VD->hasICEInitializer(S.getCtx()))) { + !VD->hasICEInitializer(S.getASTContext()))) { const SourceInfo &Loc = S.Current->getSource(OpPC); S.FFDiag(Loc, diag::note_constexpr_var_init_non_constant, 1) << VD; S.Note(VD->getLocation(), diag::note_declared_at); @@ -797,7 +797,7 @@ bool CheckNewDeleteForms(InterpState &S, CodePtr OpPC, bool NewWasArray, // but we want to get the array size right. if (D->isArray()) { QualType ElemQT = D->getType()->getPointeeType(); - TypeToDiagnose = S.getCtx().getConstantArrayType( + TypeToDiagnose = S.getASTContext().getConstantArrayType( ElemQT, APInt(64, static_cast(D->getNumElems()), false), nullptr, ArraySizeModifier::Normal, 0); } else @@ -819,7 +819,7 @@ bool CheckDeleteSource(InterpState &S, CodePtr OpPC, const Expr *Source, // Whatever this is, we didn't heap allocate it. const SourceInfo &Loc = S.Current->getSource(OpPC); S.FFDiag(Loc, diag::note_constexpr_delete_not_heap_alloc) - << Ptr.toDiagnosticString(S.getCtx()); + << Ptr.toDiagnosticString(S.getASTContext()); if (Ptr.isTemporary()) S.Note(Ptr.getDeclLoc(), diag::note_constexpr_temporary_here); diff --git a/clang/lib/AST/ByteCode/Interp.h b/clang/lib/AST/ByteCode/Interp.h index 81c547991c3d7..242532a3f0544 100644 --- a/clang/lib/AST/ByteCode/Interp.h +++ b/clang/lib/AST/ByteCode/Interp.h @@ -41,7 +41,7 @@ using APSInt = llvm::APSInt; /// Convert a value to an APValue. template bool ReturnValue(const InterpState &S, const T &V, APValue &R) { - R = V.toAPValue(S.getCtx()); + R = V.toAPValue(S.getASTContext()); return true; } @@ -231,12 +231,12 @@ bool CheckArraySize(InterpState &S, CodePtr OpPC, SizeT *NumElements, // constructing the array, we catch this here. SizeT MaxElements = SizeT::from(Descriptor::MaxArrayElemBytes / ElemSize); if (NumElements->toAPSInt().getActiveBits() > - ConstantArrayType::getMaxSizeBits(S.getCtx()) || + ConstantArrayType::getMaxSizeBits(S.getASTContext()) || *NumElements > MaxElements) { if (!IsNoThrow) { const SourceInfo &Loc = S.Current->getSource(OpPC); S.FFDiag(Loc, diag::note_constexpr_new_too_large) - << NumElements->toDiagnosticString(S.getCtx()); + << NumElements->toDiagnosticString(S.getASTContext()); } return false; } @@ -911,8 +911,8 @@ inline bool CmpHelper(InterpState &S, CodePtr OpPC, const SourceInfo &Loc = S.Current->getSource(OpPC); S.FFDiag(Loc, diag::note_constexpr_pointer_comparison_unspecified) - << LHS.toDiagnosticString(S.getCtx()) - << RHS.toDiagnosticString(S.getCtx()); + << LHS.toDiagnosticString(S.getASTContext()) + << RHS.toDiagnosticString(S.getASTContext()); return false; } @@ -927,7 +927,7 @@ inline bool CmpHelperEQ(InterpState &S, CodePtr OpPC, if (FP.isWeak()) { const SourceInfo &Loc = S.Current->getSource(OpPC); S.FFDiag(Loc, diag::note_constexpr_pointer_weak_comparison) - << FP.toDiagnosticString(S.getCtx()); + << FP.toDiagnosticString(S.getASTContext()); return false; } } @@ -945,8 +945,8 @@ inline bool CmpHelper(InterpState &S, CodePtr OpPC, CompareFn Fn) { if (!Pointer::hasSameBase(LHS, RHS)) { const SourceInfo &Loc = S.Current->getSource(OpPC); S.FFDiag(Loc, diag::note_constexpr_pointer_comparison_unspecified) - << LHS.toDiagnosticString(S.getCtx()) - << RHS.toDiagnosticString(S.getCtx()); + << LHS.toDiagnosticString(S.getASTContext()) + << RHS.toDiagnosticString(S.getASTContext()); return false; } else { unsigned VL = LHS.getByteOffset(); @@ -974,7 +974,7 @@ inline bool CmpHelperEQ(InterpState &S, CodePtr OpPC, CompareFn Fn) { if (P.isWeak()) { const SourceInfo &Loc = S.Current->getSource(OpPC); S.FFDiag(Loc, diag::note_constexpr_pointer_weak_comparison) - << P.toDiagnosticString(S.getCtx()); + << P.toDiagnosticString(S.getASTContext()); return false; } } @@ -984,13 +984,13 @@ inline bool CmpHelperEQ(InterpState &S, CodePtr OpPC, CompareFn Fn) { RHS.getOffset() == 0) { const SourceInfo &Loc = S.Current->getSource(OpPC); S.FFDiag(Loc, diag::note_constexpr_pointer_comparison_past_end) - << LHS.toDiagnosticString(S.getCtx()); + << LHS.toDiagnosticString(S.getASTContext()); return false; } else if (RHS.isOnePastEnd() && !LHS.isOnePastEnd() && !LHS.isZero() && LHS.getOffset() == 0) { const SourceInfo &Loc = S.Current->getSource(OpPC); S.FFDiag(Loc, diag::note_constexpr_pointer_comparison_past_end) - << RHS.toDiagnosticString(S.getCtx()); + << RHS.toDiagnosticString(S.getASTContext()); return false; } @@ -1073,8 +1073,8 @@ bool CMP3(InterpState &S, CodePtr OpPC, const ComparisonCategoryInfo *CmpInfo) { // This should only happen with pointers. const SourceInfo &Loc = S.Current->getSource(OpPC); S.FFDiag(Loc, diag::note_constexpr_pointer_comparison_unspecified) - << LHS.toDiagnosticString(S.getCtx()) - << RHS.toDiagnosticString(S.getCtx()); + << LHS.toDiagnosticString(S.getASTContext()) + << RHS.toDiagnosticString(S.getASTContext()); return false; } @@ -1342,7 +1342,7 @@ bool InitGlobalTemp(InterpState &S, CodePtr OpPC, uint32_t I, const Pointer &Ptr = S.P.getGlobal(I); const T Value = S.Stk.peek(); - APValue APV = Value.toAPValue(S.getCtx()); + APValue APV = Value.toAPValue(S.getASTContext()); APValue *Cached = Temp->getOrCreateValue(true); *Cached = APV; @@ -1369,7 +1369,7 @@ inline bool InitGlobalTempComp(InterpState &S, CodePtr OpPC, std::make_pair(P.getDeclDesc()->asExpr(), Temp)); if (std::optional APV = - P.toRValue(S.getCtx(), Temp->getTemporaryExpr()->getType())) { + P.toRValue(S.getASTContext(), Temp->getTemporaryExpr()->getType())) { *Cached = *APV; return true; } @@ -1404,7 +1404,8 @@ bool InitThisBitField(InterpState &S, CodePtr OpPC, const Record::Field *F, return false; const Pointer &Field = This.atField(FieldOffset); const auto &Value = S.Stk.pop(); - Field.deref() = Value.truncate(F->Decl->getBitWidthValue(S.getCtx())); + Field.deref() = + Value.truncate(F->Decl->getBitWidthValue(S.getASTContext())); Field.initialize(); return true; } @@ -1427,7 +1428,8 @@ bool InitBitField(InterpState &S, CodePtr OpPC, const Record::Field *F) { assert(F->isBitField()); const T &Value = S.Stk.pop(); const Pointer &Field = S.Stk.peek().atField(F->Offset); - Field.deref() = Value.truncate(F->Decl->getBitWidthValue(S.getCtx())); + Field.deref() = + Value.truncate(F->Decl->getBitWidthValue(S.getASTContext())); Field.activate(); Field.initialize(); return true; @@ -1477,7 +1479,7 @@ inline bool GetPtrField(InterpState &S, CodePtr OpPC, uint32_t Off) { return false; if (Ptr.isIntegralPointer()) { - S.Stk.push(Ptr.asIntPointer().atOffset(S.getCtx(), Off)); + S.Stk.push(Ptr.asIntPointer().atOffset(S.getASTContext(), Off)); return true; } @@ -1505,7 +1507,7 @@ inline bool GetPtrFieldPop(InterpState &S, CodePtr OpPC, uint32_t Off) { return false; if (Ptr.isIntegralPointer()) { - S.Stk.push(Ptr.asIntPointer().atOffset(S.getCtx(), Off)); + S.Stk.push(Ptr.asIntPointer().atOffset(S.getASTContext(), Off)); return true; } @@ -1721,7 +1723,7 @@ bool StoreBitField(InterpState &S, CodePtr OpPC) { if (Ptr.canBeInitialized()) Ptr.initialize(); if (const auto *FD = Ptr.getField()) - Ptr.deref() = Value.truncate(FD->getBitWidthValue(S.getCtx())); + Ptr.deref() = Value.truncate(FD->getBitWidthValue(S.getASTContext())); else Ptr.deref() = Value; return true; @@ -1736,7 +1738,7 @@ bool StoreBitFieldPop(InterpState &S, CodePtr OpPC) { if (Ptr.canBeInitialized()) Ptr.initialize(); if (const auto *FD = Ptr.getField()) - Ptr.deref() = Value.truncate(FD->getBitWidthValue(S.getCtx())); + Ptr.deref() = Value.truncate(FD->getBitWidthValue(S.getASTContext())); else Ptr.deref() = Value; return true; @@ -2014,7 +2016,7 @@ inline bool SubPtr(InterpState &S, CodePtr OpPC) { while (auto *AT = dyn_cast(PtrT)) PtrT = AT->getElementType(); - QualType ArrayTy = S.getCtx().getConstantArrayType( + QualType ArrayTy = S.getASTContext().getConstantArrayType( PtrT, APInt::getZero(1), nullptr, ArraySizeModifier::Normal, 0); S.FFDiag(S.Current->getSource(OpPC), diag::note_constexpr_pointer_subtraction_zero_size) @@ -2953,7 +2955,7 @@ inline bool CheckDecl(InterpState &S, CodePtr OpPC, const VarDecl *VD) { if (VD == S.EvaluatingDecl) return true; - if (!VD->isUsableInConstantExpressions(S.getCtx())) { + if (!VD->isUsableInConstantExpressions(S.getASTContext())) { S.CCEDiag(VD->getLocation(), diag::note_constexpr_static_local) << (VD->getTSCSpec() == TSCS_unspecified ? 0 : 1) << VD; return false; @@ -3047,7 +3049,7 @@ static inline bool Free(InterpState &S, CodePtr OpPC, bool DeleteIsArrayForm) { if (!Ptr.isRoot() || Ptr.isOnePastEnd() || Ptr.isArrayElement()) { const SourceInfo &Loc = S.Current->getSource(OpPC); S.FFDiag(Loc, diag::note_constexpr_delete_subobject) - << Ptr.toDiagnosticString(S.getCtx()) << Ptr.isOnePastEnd(); + << Ptr.toDiagnosticString(S.getASTContext()) << Ptr.isOnePastEnd(); return false; } diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 26abf58205106..1a71bff25d254 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -38,7 +38,7 @@ static T getParam(const InterpFrame *Frame, unsigned Index) { } PrimType getIntPrimType(const InterpState &S) { - const TargetInfo &TI = S.getCtx().getTargetInfo(); + const TargetInfo &TI = S.getASTContext().getTargetInfo(); unsigned IntWidth = TI.getIntWidth(); if (IntWidth == 32) @@ -49,7 +49,7 @@ PrimType getIntPrimType(const InterpState &S) { } PrimType getLongPrimType(const InterpState &S) { - const TargetInfo &TI = S.getCtx().getTargetInfo(); + const TargetInfo &TI = S.getASTContext().getTargetInfo(); unsigned LongWidth = TI.getLongWidth(); if (LongWidth == 64) @@ -272,10 +272,10 @@ static bool interp__builtin_nan(InterpState &S, CodePtr OpPC, return false; const llvm::fltSemantics &TargetSemantics = - S.getCtx().getFloatTypeSemantics(F->getDecl()->getReturnType()); + S.getASTContext().getFloatTypeSemantics(F->getDecl()->getReturnType()); Floating Result; - if (S.getCtx().getTargetInfo().isNan2008()) { + if (S.getASTContext().getTargetInfo().isNan2008()) { if (Signaling) Result = Floating( llvm::APFloat::getSNaN(TargetSemantics, /*Negative=*/false, &Fill)); @@ -303,7 +303,7 @@ static bool interp__builtin_nan(InterpState &S, CodePtr OpPC, static bool interp__builtin_inf(InterpState &S, CodePtr OpPC, const InterpFrame *Frame, const Function *F) { const llvm::fltSemantics &TargetSemantics = - S.getCtx().getFloatTypeSemantics(F->getDecl()->getReturnType()); + S.getASTContext().getFloatTypeSemantics(F->getDecl()->getReturnType()); S.Stk.push(Floating::getInf(TargetSemantics)); return true; @@ -689,8 +689,8 @@ static bool interp__builtin_eh_return_data_regno(InterpState &S, CodePtr OpPC, PrimType ArgT = *S.getContext().classify(Call->getArg(0)->getType()); APSInt Arg = peekToAPSInt(S.Stk, ArgT); - int Result = - S.getCtx().getTargetInfo().getEHDataRegisterNumber(Arg.getZExtValue()); + int Result = S.getASTContext().getTargetInfo().getEHDataRegisterNumber( + Arg.getZExtValue()); pushInteger(S, Result, Call->getType()); return true; } @@ -734,7 +734,7 @@ static bool interp__builtin_overflowop(InterpState &S, CodePtr OpPC, ResultType->isSignedIntegerOrEnumerationType(); uint64_t LHSSize = LHS.getBitWidth(); uint64_t RHSSize = RHS.getBitWidth(); - uint64_t ResultSize = S.getCtx().getTypeSize(ResultType); + uint64_t ResultSize = S.getASTContext().getTypeSize(ResultType); uint64_t MaxBits = std::max(std::max(LHSSize, RHSSize), ResultSize); // Add an additional bit if the signedness isn't uniformly agreed to. We @@ -794,7 +794,7 @@ static bool interp__builtin_overflowop(InterpState &S, CodePtr OpPC, // since it will give us the behavior of a TruncOrSelf in the case where // its parameter <= its size. We previously set Result to be at least the // type-size of the result, so getTypeSize(ResultType) <= Resu - APSInt Temp = Result.extOrTrunc(S.getCtx().getTypeSize(ResultType)); + APSInt Temp = Result.extOrTrunc(S.getASTContext().getTypeSize(ResultType)); Temp.setIsSigned(ResultType->isSignedIntegerOrEnumerationType()); if (!APSInt::isSameValue(Temp, Result)) @@ -974,8 +974,8 @@ static bool interp__builtin_atomic_lock_free(InterpState &S, CodePtr OpPC, if (Size.isPowerOfTwo()) { // Check against inlining width. unsigned InlineWidthBits = - S.getCtx().getTargetInfo().getMaxAtomicInlineWidth(); - if (Size <= S.getCtx().toCharUnitsFromBits(InlineWidthBits)) { + S.getASTContext().getTargetInfo().getMaxAtomicInlineWidth(); + if (Size <= S.getASTContext().toCharUnitsFromBits(InlineWidthBits)) { // OK, we will inline appropriately-aligned operations of this size, // and _Atomic(T) is appropriately-aligned. @@ -1007,7 +1007,7 @@ static bool interp__builtin_atomic_lock_free(InterpState &S, CodePtr OpPC, if (auto PtrTy = PtrArg->getType()->getAs()) { QualType PointeeType = PtrTy->getPointeeType(); if (!PointeeType->isIncompleteType() && - S.getCtx().getTypeAlignInChars(PointeeType) >= Size) { + S.getASTContext().getTypeAlignInChars(PointeeType) >= Size) { // OK, we will inline operations on this object. return returnBool(true); } @@ -1059,7 +1059,7 @@ static bool interp__builtin_is_aligned_up_down(InterpState &S, CodePtr OpPC, S.FFDiag(Call, diag::note_constexpr_invalid_alignment) << Alignment; return false; } - unsigned SrcWidth = S.getCtx().getIntWidth(Call->getArg(0)->getType()); + unsigned SrcWidth = S.getASTContext().getIntWidth(Call->getArg(0)->getType()); APSInt MaxValue(APInt::getOneBitSet(SrcWidth, SrcWidth - 1)); if (APSInt::compareValues(Alignment, MaxValue) > 0) { S.FFDiag(Call, diag::note_constexpr_alignment_too_big) @@ -1094,7 +1094,7 @@ static bool interp__builtin_is_aligned_up_down(InterpState &S, CodePtr OpPC, unsigned PtrOffset = Ptr.getByteOffset(); PtrOffset = Ptr.getIndex(); CharUnits BaseAlignment = - S.getCtx().getDeclAlign(Ptr.getDeclDesc()->asValueDecl()); + S.getASTContext().getDeclAlign(Ptr.getDeclDesc()->asValueDecl()); CharUnits PtrAlign = BaseAlignment.alignmentAtOffset(CharUnits::fromQuantity(PtrOffset)); @@ -1157,7 +1157,7 @@ static bool interp__builtin_os_log_format_buffer_size(InterpState &S, const Function *Func, const CallExpr *Call) { analyze_os_log::OSLogBufferLayout Layout; - analyze_os_log::computeOSLogBufferLayout(S.getCtx(), Call, Layout); + analyze_os_log::computeOSLogBufferLayout(S.getASTContext(), Call, Layout); pushInteger(S, Layout.size().getQuantity(), Call->getType()); return true; } @@ -1624,10 +1624,11 @@ bool InterpretOffsetOf(InterpState &S, CodePtr OpPC, const OffsetOfExpr *E, const RecordDecl *RD = RT->getDecl(); if (RD->isInvalidDecl()) return false; - const ASTRecordLayout &RL = S.getCtx().getASTRecordLayout(RD); + const ASTRecordLayout &RL = S.getASTContext().getASTRecordLayout(RD); unsigned FieldIndex = MemberDecl->getFieldIndex(); assert(FieldIndex < RL.getFieldCount() && "offsetof field in wrong type"); - Result += S.getCtx().toCharUnitsFromBits(RL.getFieldOffset(FieldIndex)); + Result += + S.getASTContext().toCharUnitsFromBits(RL.getFieldOffset(FieldIndex)); CurrentType = MemberDecl->getType().getNonReferenceType(); break; } @@ -1635,11 +1636,11 @@ bool InterpretOffsetOf(InterpState &S, CodePtr OpPC, const OffsetOfExpr *E, // When generating bytecode, we put all the index expressions as Sint64 on // the stack. int64_t Index = ArrayIndices[ArrayIndex]; - const ArrayType *AT = S.getCtx().getAsArrayType(CurrentType); + const ArrayType *AT = S.getASTContext().getAsArrayType(CurrentType); if (!AT) return false; CurrentType = AT->getElementType(); - CharUnits ElementSize = S.getCtx().getTypeSizeInChars(CurrentType); + CharUnits ElementSize = S.getASTContext().getTypeSizeInChars(CurrentType); Result += Index * ElementSize; ++ArrayIndex; break; @@ -1656,7 +1657,7 @@ bool InterpretOffsetOf(InterpState &S, CodePtr OpPC, const OffsetOfExpr *E, const RecordDecl *RD = RT->getDecl(); if (RD->isInvalidDecl()) return false; - const ASTRecordLayout &RL = S.getCtx().getASTRecordLayout(RD); + const ASTRecordLayout &RL = S.getASTContext().getASTRecordLayout(RD); // Find the base class itself. CurrentType = BaseSpec->getType(); diff --git a/clang/lib/AST/ByteCode/InterpFrame.cpp b/clang/lib/AST/ByteCode/InterpFrame.cpp index 8b55b61cbbfa7..5e98444ef05a5 100644 --- a/clang/lib/AST/ByteCode/InterpFrame.cpp +++ b/clang/lib/AST/ByteCode/InterpFrame.cpp @@ -179,7 +179,7 @@ void InterpFrame::describe(llvm::raw_ostream &OS) const { if (const auto *MCE = dyn_cast_if_present(CallExpr)) { const Expr *Object = MCE->getImplicitObjectArgument(); Object->printPretty(OS, /*Helper=*/nullptr, - S.getCtx().getPrintingPolicy(), + S.getASTContext().getPrintingPolicy(), /*Indentation=*/0); if (Object->getType()->isPointerType()) OS << "->"; @@ -188,18 +188,18 @@ void InterpFrame::describe(llvm::raw_ostream &OS) const { } else if (const auto *OCE = dyn_cast_if_present(CallExpr)) { OCE->getArg(0)->printPretty(OS, /*Helper=*/nullptr, - S.getCtx().getPrintingPolicy(), + S.getASTContext().getPrintingPolicy(), /*Indentation=*/0); OS << "."; } else if (const auto *M = dyn_cast(F)) { - print(OS, This, S.getCtx(), - S.getCtx().getLValueReferenceType( - S.getCtx().getRecordType(M->getParent()))); + print(OS, This, S.getASTContext(), + S.getASTContext().getLValueReferenceType( + S.getASTContext().getRecordType(M->getParent()))); OS << "."; } } - F->getNameForDiagnostic(OS, S.getCtx().getPrintingPolicy(), + F->getNameForDiagnostic(OS, S.getASTContext().getPrintingPolicy(), /*Qualified=*/false); OS << '('; unsigned Off = 0; @@ -212,7 +212,7 @@ void InterpFrame::describe(llvm::raw_ostream &OS) const { PrimType PrimTy = S.Ctx.classify(Ty).value_or(PT_Ptr); - TYPE_SWITCH(PrimTy, print(OS, stackRef(Off), S.getCtx(), Ty)); + TYPE_SWITCH(PrimTy, print(OS, stackRef(Off), S.getASTContext(), Ty)); Off += align(primSize(PrimTy)); if (I + 1 != N) OS << ", "; diff --git a/clang/lib/AST/ByteCode/InterpState.h b/clang/lib/AST/ByteCode/InterpState.h index 61ee54331c65d..961ba5f5c28a0 100644 --- a/clang/lib/AST/ByteCode/InterpState.h +++ b/clang/lib/AST/ByteCode/InterpState.h @@ -59,7 +59,7 @@ class InterpState final : public State, public SourceMapper { Expr::EvalStatus &getEvalStatus() const override { return Parent.getEvalStatus(); } - ASTContext &getCtx() const override { return Parent.getCtx(); } + ASTContext &getASTContext() const override { return Parent.getASTContext(); } // Forward status checks and updates to the walker. bool checkingForUndefinedBehavior() const override { diff --git a/clang/lib/AST/ByteCode/State.cpp b/clang/lib/AST/ByteCode/State.cpp index 0d9dadec4b958..b4db86e8d22c7 100644 --- a/clang/lib/AST/ByteCode/State.cpp +++ b/clang/lib/AST/ByteCode/State.cpp @@ -74,12 +74,12 @@ void State::addNotes(ArrayRef Diags) { } DiagnosticBuilder State::report(SourceLocation Loc, diag::kind DiagId) { - return getCtx().getDiagnostics().Report(Loc, DiagId); + return getASTContext().getDiagnostics().Report(Loc, DiagId); } /// Add a diagnostic to the diagnostics list. PartialDiagnostic &State::addDiag(SourceLocation Loc, diag::kind DiagId) { - PartialDiagnostic PD(DiagId, getCtx().getDiagAllocator()); + PartialDiagnostic PD(DiagId, getASTContext().getDiagAllocator()); getEvalStatus().Diag->push_back(std::make_pair(Loc, PD)); return getEvalStatus().Diag->back().second; } @@ -93,7 +93,8 @@ OptionalDiagnostic State::diag(SourceLocation Loc, diag::kind DiagId, } unsigned CallStackNotes = getCallStackDepth() - 1; - unsigned Limit = getCtx().getDiagnostics().getConstexprBacktraceLimit(); + unsigned Limit = + getASTContext().getDiagnostics().getConstexprBacktraceLimit(); if (Limit) CallStackNotes = std::min(CallStackNotes, Limit + 1); if (checkingPotentialConstantExpression()) @@ -113,7 +114,9 @@ OptionalDiagnostic State::diag(SourceLocation Loc, diag::kind DiagId, return OptionalDiagnostic(); } -const LangOptions &State::getLangOpts() const { return getCtx().getLangOpts(); } +const LangOptions &State::getLangOpts() const { + return getASTContext().getLangOpts(); +} void State::addCallStack(unsigned Limit) { // Determine which calls to skip, if any. diff --git a/clang/lib/AST/ByteCode/State.h b/clang/lib/AST/ByteCode/State.h index 44d6c037c5ad9..2cffce4bc2ae4 100644 --- a/clang/lib/AST/ByteCode/State.h +++ b/clang/lib/AST/ByteCode/State.h @@ -67,7 +67,7 @@ class State { virtual void setActiveDiagnostic(bool Flag) = 0; virtual void setFoldFailureDiagnostic(bool Flag) = 0; virtual Expr::EvalStatus &getEvalStatus() const = 0; - virtual ASTContext &getCtx() const = 0; + virtual ASTContext &getASTContext() const = 0; virtual bool hasPriorDiagnostic() = 0; virtual unsigned getCallStackDepth() = 0; diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp index 90caf81757ac9..1a07125815832 100644 --- a/clang/lib/AST/Decl.cpp +++ b/clang/lib/AST/Decl.cpp @@ -2799,9 +2799,17 @@ bool VarDecl::isKnownToBeDefined() const { } bool VarDecl::isNoDestroy(const ASTContext &Ctx) const { - return hasGlobalStorage() && (hasAttr() || - (!Ctx.getLangOpts().RegisterStaticDestructors && - !hasAttr())); + if (!hasGlobalStorage()) + return false; + if (hasAttr()) + return true; + if (hasAttr()) + return false; + + using RSDKind = LangOptions::RegisterStaticDestructorsKind; + RSDKind K = Ctx.getLangOpts().getRegisterStaticDestructors(); + return K == RSDKind::None || + (K == RSDKind::ThreadLocal && getTLSKind() == TLS_None); } QualType::DestructionKind diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 826cc5f58bdf5..d46f57521a97d 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -1030,7 +1030,7 @@ namespace { discardCleanups(); } - ASTContext &getCtx() const override { return Ctx; } + ASTContext &getASTContext() const override { return Ctx; } void setEvaluatingDecl(APValue::LValueBase Base, APValue &Value, EvaluatingDeclKind EDK = EvaluatingDeclKind::Ctor) { @@ -2327,9 +2327,9 @@ static bool CheckLValueConstantExpression(EvalInfo &Info, SourceLocation Loc, // In CUDA/HIP device compilation, only device side variables have // constant addresses. - if (Info.getCtx().getLangOpts().CUDA && - Info.getCtx().getLangOpts().CUDAIsDevice && - Info.getCtx().CUDAConstantEvalCtx.NoWrongSidedVars) { + if (Info.getASTContext().getLangOpts().CUDA && + Info.getASTContext().getLangOpts().CUDAIsDevice && + Info.getASTContext().CUDAConstantEvalCtx.NoWrongSidedVars) { if ((!Var->hasAttr() && !Var->hasAttr() && !Var->getType()->isCUDADeviceBuiltinSurfaceType() && @@ -5662,7 +5662,7 @@ static EvalStmtResult EvaluateStmt(StmtResult &Result, EvalInfo &Info, *Info.CurrentCall, hasSpecificAttr(AS->getAttrs()) && isa(SS)); - auto LO = Info.getCtx().getLangOpts(); + auto LO = Info.getASTContext().getLangOpts(); if (LO.CXXAssumptions && !LO.MSVCCompat) { for (auto *Attr : AS->getAttrs()) { auto *AA = dyn_cast(Attr); @@ -5673,7 +5673,7 @@ static EvalStmtResult EvaluateStmt(StmtResult &Result, EvalInfo &Info, if (Assumption->isValueDependent()) return ESR_Failed; - if (Assumption->HasSideEffects(Info.getCtx())) + if (Assumption->HasSideEffects(Info.getASTContext())) continue; bool Value; diff --git a/clang/lib/CodeGen/CGBuilder.h b/clang/lib/CodeGen/CGBuilder.h index 08730a6a6672a..b8036cf6e6a30 100644 --- a/clang/lib/CodeGen/CGBuilder.h +++ b/clang/lib/CodeGen/CGBuilder.h @@ -14,6 +14,7 @@ #include "CodeGenTypeCache.h" #include "llvm/Analysis/Utils/Local.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/GEPNoWrapFlags.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Type.h" @@ -334,9 +335,10 @@ class CGBuilderTy : public CGBuilderBaseTy { Address CreateGEP(Address Addr, ArrayRef IdxList, llvm::Type *ElementType, CharUnits Align, - const Twine &Name = "") { + const Twine &Name = "", + llvm::GEPNoWrapFlags NW = llvm::GEPNoWrapFlags::none()) { llvm::Value *Ptr = emitRawPointerFromAddress(Addr); - return RawAddress(CreateGEP(Addr.getElementType(), Ptr, IdxList, Name), + return RawAddress(CreateGEP(Addr.getElementType(), Ptr, IdxList, Name, NW), ElementType, Align); } diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp index af11bc20a3b63..5b55ec9d8064e 100644 --- a/clang/lib/CodeGen/CGExprScalar.cpp +++ b/clang/lib/CodeGen/CGExprScalar.cpp @@ -36,6 +36,7 @@ #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/FixedPointBuilder.h" #include "llvm/IR/Function.h" +#include "llvm/IR/GEPNoWrapFlags.h" #include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Intrinsics.h" @@ -5756,7 +5757,12 @@ CodeGenFunction::EmitCheckedInBoundsGEP(llvm::Type *ElemTy, Value *Ptr, bool SignedIndices, bool IsSubtraction, SourceLocation Loc, const Twine &Name) { llvm::Type *PtrTy = Ptr->getType(); - Value *GEPVal = Builder.CreateInBoundsGEP(ElemTy, Ptr, IdxList, Name); + + llvm::GEPNoWrapFlags NWFlags = llvm::GEPNoWrapFlags::inBounds(); + if (!SignedIndices && !IsSubtraction) + NWFlags |= llvm::GEPNoWrapFlags::noUnsignedWrap(); + + Value *GEPVal = Builder.CreateGEP(ElemTy, Ptr, IdxList, Name, NWFlags); // If the pointer overflow sanitizer isn't enabled, do nothing. if (!SanOpts.has(SanitizerKind::PointerOverflow)) @@ -5871,8 +5877,13 @@ Address CodeGenFunction::EmitCheckedInBoundsGEP( Address Addr, ArrayRef IdxList, llvm::Type *elementType, bool SignedIndices, bool IsSubtraction, SourceLocation Loc, CharUnits Align, const Twine &Name) { - if (!SanOpts.has(SanitizerKind::PointerOverflow)) - return Builder.CreateInBoundsGEP(Addr, IdxList, elementType, Align, Name); + if (!SanOpts.has(SanitizerKind::PointerOverflow)) { + llvm::GEPNoWrapFlags NWFlags = llvm::GEPNoWrapFlags::inBounds(); + if (!SignedIndices && !IsSubtraction) + NWFlags |= llvm::GEPNoWrapFlags::noUnsignedWrap(); + + return Builder.CreateGEP(Addr, IdxList, elementType, Align, Name, NWFlags); + } return RawAddress( EmitCheckedInBoundsGEP(Addr.getElementType(), Addr.emitRawPointer(*this), diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index e12416e51f8d2..43002add33774 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -171,18 +171,18 @@ getHIPOffloadTargetTriple(const Driver &D, const ArgList &Args) { } // static -std::string Driver::GetResourcesPath(StringRef BinaryPath, - StringRef CustomResourceDir) { +std::string Driver::GetResourcesPath(StringRef BinaryPath) { // Since the resource directory is embedded in the module hash, it's important // that all places that need it call this function, so that they get the // exact same string ("a/../b/" and "b/" get different hashes, for example). // Dir is bin/ or lib/, depending on where BinaryPath is. - std::string Dir = std::string(llvm::sys::path::parent_path(BinaryPath)); - + StringRef Dir = llvm::sys::path::parent_path(BinaryPath); SmallString<128> P(Dir); - if (CustomResourceDir != "") { - llvm::sys::path::append(P, CustomResourceDir); + + StringRef ConfiguredResourceDir(CLANG_RESOURCE_DIR); + if (!ConfiguredResourceDir.empty()) { + llvm::sys::path::append(P, ConfiguredResourceDir); } else { // On Windows, libclang.dll is in bin/. // On non-Windows, libclang.so/.dylib is in lib/. @@ -239,7 +239,7 @@ Driver::Driver(StringRef ClangExecutable, StringRef TargetTriple, #endif // Compute the path to the resource directory. - ResourceDir = GetResourcesPath(ClangExecutable, CLANG_RESOURCE_DIR); + ResourceDir = GetResourcesPath(ClangExecutable); } void Driver::setDriverMode(StringRef Value) { diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 9f1d57f43b656..df86941950e46 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -7972,8 +7972,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, options::OPT_fno_keep_persistent_storage_variables); Args.addOptInFlag(CmdArgs, options::OPT_fcomplete_member_pointers, options::OPT_fno_complete_member_pointers); - Args.addOptOutFlag(CmdArgs, options::OPT_fcxx_static_destructors, - options::OPT_fno_cxx_static_destructors); + if (Arg *A = Args.getLastArg(options::OPT_cxx_static_destructors_EQ)) + A->render(Args, CmdArgs); addMachineOutlinerArgs(D, Args, CmdArgs, Triple, /*IsLTO=*/false); diff --git a/clang/lib/ExtractAPI/DeclarationFragments.cpp b/clang/lib/ExtractAPI/DeclarationFragments.cpp index 6b85c7db90349..d77bb1d424f7c 100644 --- a/clang/lib/ExtractAPI/DeclarationFragments.cpp +++ b/clang/lib/ExtractAPI/DeclarationFragments.cpp @@ -1327,14 +1327,12 @@ DeclarationFragmentsBuilder::getFragmentsForFunctionTemplateSpecialization( DeclarationFragments DeclarationFragmentsBuilder::getFragmentsForMacro(StringRef Name, - const MacroDirective *MD) { + const MacroInfo *MI) { DeclarationFragments Fragments; Fragments.append("#define", DeclarationFragments::FragmentKind::Keyword) .appendSpace(); Fragments.append(Name, DeclarationFragments::FragmentKind::Identifier); - auto *MI = MD->getMacroInfo(); - if (MI->isFunctionLike()) { Fragments.append("(", DeclarationFragments::FragmentKind::Text); unsigned numParameters = MI->getNumParams(); diff --git a/clang/lib/ExtractAPI/ExtractAPIConsumer.cpp b/clang/lib/ExtractAPI/ExtractAPIConsumer.cpp index d6335854cbf26..0adc23280fd6c 100644 --- a/clang/lib/ExtractAPI/ExtractAPIConsumer.cpp +++ b/clang/lib/ExtractAPI/ExtractAPIConsumer.cpp @@ -286,78 +286,59 @@ class MacroCallback : public PPCallbacks { MacroCallback(const SourceManager &SM, APISet &API, Preprocessor &PP) : SM(SM), API(API), PP(PP) {} - void MacroDefined(const Token &MacroNameToken, - const MacroDirective *MD) override { - auto *MacroInfo = MD->getMacroInfo(); + void EndOfMainFile() override { + for (const auto &M : PP.macros()) { + auto *II = M.getFirst(); + auto MD = PP.getMacroDefinition(II); + auto *MI = MD.getMacroInfo(); - if (MacroInfo->isBuiltinMacro()) - return; + if (!MI) + continue; - auto SourceLoc = MacroNameToken.getLocation(); - if (SM.isWrittenInBuiltinFile(SourceLoc) || - SM.isWrittenInCommandLineFile(SourceLoc)) - return; + // Ignore header guard macros + if (MI->isUsedForHeaderGuard()) + continue; - PendingMacros.emplace_back(MacroNameToken, MD); - } + // Ignore builtin macros and ones defined via the command line. + if (MI->isBuiltinMacro()) + continue; - // If a macro gets undefined at some point during preprocessing of the inputs - // it means that it isn't an exposed API and we should therefore not add a - // macro definition for it. - void MacroUndefined(const Token &MacroNameToken, const MacroDefinition &MD, - const MacroDirective *Undef) override { - // If this macro wasn't previously defined we don't need to do anything - // here. - if (!Undef) - return; - - llvm::erase_if(PendingMacros, [&MD, this](const PendingMacro &PM) { - return MD.getMacroInfo()->isIdenticalTo(*PM.MD->getMacroInfo(), PP, - /*Syntactically*/ false); - }); - } + auto DefLoc = MI->getDefinitionLoc(); - void EndOfMainFile() override { - for (auto &PM : PendingMacros) { - // `isUsedForHeaderGuard` is only set when the preprocessor leaves the - // file so check for it here. - if (PM.MD->getMacroInfo()->isUsedForHeaderGuard()) + if (SM.isWrittenInBuiltinFile(DefLoc) || + SM.isWrittenInCommandLineFile(DefLoc)) continue; - if (!shouldMacroBeIncluded(PM)) + auto AssociatedModuleMacros = MD.getModuleMacros(); + StringRef OwningModuleName; + if (!AssociatedModuleMacros.empty()) + OwningModuleName = AssociatedModuleMacros.back() + ->getOwningModule() + ->getTopLevelModuleName(); + + if (!shouldMacroBeIncluded(DefLoc, OwningModuleName)) continue; - StringRef Name = PM.MacroNameToken.getIdentifierInfo()->getName(); - PresumedLoc Loc = SM.getPresumedLoc(PM.MacroNameToken.getLocation()); + StringRef Name = II->getName(); + PresumedLoc Loc = SM.getPresumedLoc(DefLoc); SmallString<128> USR; - index::generateUSRForMacro(Name, PM.MacroNameToken.getLocation(), SM, - USR); - + index::generateUSRForMacro(Name, DefLoc, SM, USR); API.createRecord( USR, Name, SymbolReference(), Loc, - DeclarationFragmentsBuilder::getFragmentsForMacro(Name, PM.MD), + DeclarationFragmentsBuilder::getFragmentsForMacro(Name, MI), DeclarationFragmentsBuilder::getSubHeadingForMacro(Name), - SM.isInSystemHeader(PM.MacroNameToken.getLocation())); + SM.isInSystemHeader(DefLoc)); } - - PendingMacros.clear(); } -protected: - struct PendingMacro { - Token MacroNameToken; - const MacroDirective *MD; - - PendingMacro(const Token &MacroNameToken, const MacroDirective *MD) - : MacroNameToken(MacroNameToken), MD(MD) {} - }; - - virtual bool shouldMacroBeIncluded(const PendingMacro &PM) { return true; } + virtual bool shouldMacroBeIncluded(const SourceLocation &MacroLoc, + StringRef ModuleName) { + return true; + } const SourceManager &SM; APISet &API; Preprocessor &PP; - llvm::SmallVector PendingMacros; }; class APIMacroCallback : public MacroCallback { @@ -366,9 +347,10 @@ class APIMacroCallback : public MacroCallback { LocationFileChecker &LCF) : MacroCallback(SM, API, PP), LCF(LCF) {} - bool shouldMacroBeIncluded(const PendingMacro &PM) override { + bool shouldMacroBeIncluded(const SourceLocation &MacroLoc, + StringRef ModuleName) override { // Do not include macros from external files - return LCF(PM.MacroNameToken.getLocation()); + return LCF(MacroLoc) || API.ProductName == ModuleName; } private: diff --git a/clang/lib/ExtractAPI/Serialization/SymbolGraphSerializer.cpp b/clang/lib/ExtractAPI/Serialization/SymbolGraphSerializer.cpp index 1bce9c59b1979..030509d378759 100644 --- a/clang/lib/ExtractAPI/Serialization/SymbolGraphSerializer.cpp +++ b/clang/lib/ExtractAPI/Serialization/SymbolGraphSerializer.cpp @@ -928,8 +928,8 @@ bool SymbolGraphSerializer::traverseObjCCategoryRecord( return true; auto *CurrentModule = ModuleForCurrentSymbol; - if (Record->isExtendingExternalModule()) - ModuleForCurrentSymbol = &ExtendedModules[Record->Interface.Source]; + if (auto ModuleExtendedByRecord = Record->getExtendedExternalModule()) + ModuleForCurrentSymbol = &ExtendedModules[*ModuleExtendedByRecord]; if (!walkUpFromObjCCategoryRecord(Record)) return false; diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index 0bb4175dd021e..32628c5e84332 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -3130,7 +3130,7 @@ std::string CompilerInvocation::GetResourcesPath(const char *Argv0, void *MainAddr) { std::string ClangExecutable = llvm::sys::fs::getMainExecutable(Argv0, MainAddr); - return Driver::GetResourcesPath(ClangExecutable, CLANG_RESOURCE_DIR); + return Driver::GetResourcesPath(ClangExecutable); } static void GenerateHeaderSearchArgs(const HeaderSearchOptions &Opts, diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index ea57316ad8014..95f53dfefbcc5 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -17463,11 +17463,22 @@ static void RemoveNestedImmediateInvocation( ExprResult TransformInitializer(Expr *Init, bool NotCopyInit) { if (!Init) return Init; + + // We cannot use IgnoreImpCasts because we need to preserve + // full expressions. + while (true) { + if (auto *ICE = dyn_cast(Init)) + Init = ICE->getSubExpr(); + else if (auto *ICE = dyn_cast(Init)) + Init = ICE->getSubExpr(); + else + break; + } /// ConstantExpr are the first layer of implicit node to be removed so if /// Init isn't a ConstantExpr, no ConstantExpr will be skipped. - if (auto *CE = dyn_cast(Init)) - if (CE->isImmediateInvocation()) - RemoveImmediateInvocation(CE); + if (auto *CE = dyn_cast(Init); + CE && CE->isImmediateInvocation()) + RemoveImmediateInvocation(CE); return Base::TransformInitializer(Init, NotCopyInit); } ExprResult TransformDeclRefExpr(DeclRefExpr *E) { diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp index 746c67ff1e979..d8719ab26cc83 100644 --- a/clang/lib/Sema/SemaExprCXX.cpp +++ b/clang/lib/Sema/SemaExprCXX.cpp @@ -39,6 +39,7 @@ #include "clang/Sema/Scope.h" #include "clang/Sema/ScopeInfo.h" #include "clang/Sema/SemaCUDA.h" +#include "clang/Sema/SemaHLSL.h" #include "clang/Sema/SemaInternal.h" #include "clang/Sema/SemaLambda.h" #include "clang/Sema/SemaObjC.h" @@ -6248,6 +6249,23 @@ static bool EvaluateBinaryTypeTrait(Sema &Self, TypeTrait BTT, const TypeSourceI TSTToBeDeduced->getTemplateName().getAsTemplateDecl(), RhsT, Info) == TemplateDeductionResult::Success; } + case BTT_IsScalarizedLayoutCompatible: { + if (!LhsT->isVoidType() && !LhsT->isIncompleteArrayType() && + Self.RequireCompleteType(Lhs->getTypeLoc().getBeginLoc(), LhsT, + diag::err_incomplete_type)) + return true; + if (!RhsT->isVoidType() && !RhsT->isIncompleteArrayType() && + Self.RequireCompleteType(Rhs->getTypeLoc().getBeginLoc(), RhsT, + diag::err_incomplete_type)) + return true; + + DiagnoseVLAInCXXTypeTrait( + Self, Lhs, tok::kw___builtin_hlsl_is_scalarized_layout_compatible); + DiagnoseVLAInCXXTypeTrait( + Self, Rhs, tok::kw___builtin_hlsl_is_scalarized_layout_compatible); + + return Self.HLSL().IsScalarizedLayoutCompatible(LhsT, RhsT); + } default: llvm_unreachable("not a BTT"); } diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp index 17cb47f80590d..714e8f5cfa992 100644 --- a/clang/lib/Sema/SemaHLSL.cpp +++ b/clang/lib/Sema/SemaHLSL.cpp @@ -1524,3 +1524,85 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { } return false; } + +static void BuildFlattenedTypeList(QualType BaseTy, + llvm::SmallVectorImpl &List) { + llvm::SmallVector WorkList; + WorkList.push_back(BaseTy); + while (!WorkList.empty()) { + QualType T = WorkList.pop_back_val(); + T = T.getCanonicalType().getUnqualifiedType(); + assert(!isa(T) && "Matrix types not yet supported in HLSL"); + if (const auto *AT = dyn_cast(T)) { + llvm::SmallVector ElementFields; + // Generally I've avoided recursion in this algorithm, but arrays of + // structs could be time-consuming to flatten and churn through on the + // work list. Hopefully nesting arrays of structs containing arrays + // of structs too many levels deep is unlikely. + BuildFlattenedTypeList(AT->getElementType(), ElementFields); + // Repeat the element's field list n times. + for (uint64_t Ct = 0; Ct < AT->getZExtSize(); ++Ct) + List.insert(List.end(), ElementFields.begin(), ElementFields.end()); + continue; + } + // Vectors can only have element types that are builtin types, so this can + // add directly to the list instead of to the WorkList. + if (const auto *VT = dyn_cast(T)) { + List.insert(List.end(), VT->getNumElements(), VT->getElementType()); + continue; + } + if (const auto *RT = dyn_cast(T)) { + const RecordDecl *RD = RT->getDecl(); + if (RD->isUnion()) { + List.push_back(T); + continue; + } + const CXXRecordDecl *CXXD = dyn_cast(RD); + + llvm::SmallVector FieldTypes; + if (CXXD && CXXD->isStandardLayout()) + RD = CXXD->getStandardLayoutBaseWithFields(); + + for (const auto *FD : RD->fields()) + FieldTypes.push_back(FD->getType()); + // Reverse the newly added sub-range. + std::reverse(FieldTypes.begin(), FieldTypes.end()); + WorkList.insert(WorkList.end(), FieldTypes.begin(), FieldTypes.end()); + + // If this wasn't a standard layout type we may also have some base + // classes to deal with. + if (CXXD && !CXXD->isStandardLayout()) { + FieldTypes.clear(); + for (const auto &Base : CXXD->bases()) + FieldTypes.push_back(Base.getType()); + std::reverse(FieldTypes.begin(), FieldTypes.end()); + WorkList.insert(WorkList.end(), FieldTypes.begin(), FieldTypes.end()); + } + continue; + } + List.push_back(T); + } +} + +bool SemaHLSL::IsScalarizedLayoutCompatible(QualType T1, QualType T2) const { + if (T1.isNull() || T2.isNull()) + return false; + + T1 = T1.getCanonicalType().getUnqualifiedType(); + T2 = T2.getCanonicalType().getUnqualifiedType(); + + // If both types are the same canonical type, they're obviously compatible. + if (SemaRef.getASTContext().hasSameType(T1, T2)) + return true; + + llvm::SmallVector T1Types; + BuildFlattenedTypeList(T1, T1Types); + llvm::SmallVector T2Types; + BuildFlattenedTypeList(T2, T2Types); + + // Check the flattened type list + return llvm::equal(T1Types, T2Types, + [this](QualType LHS, QualType RHS) -> bool { + return SemaRef.IsLayoutCompatible(LHS, RHS); + }); +} diff --git a/clang/lib/Sema/SemaTemplateInstantiate.cpp b/clang/lib/Sema/SemaTemplateInstantiate.cpp index fd90f83f3976c..786daea5bab31 100644 --- a/clang/lib/Sema/SemaTemplateInstantiate.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiate.cpp @@ -20,6 +20,7 @@ #include "clang/AST/Expr.h" #include "clang/AST/ExprConcepts.h" #include "clang/AST/PrettyDeclStackTrace.h" +#include "clang/AST/RecursiveASTVisitor.h" #include "clang/AST/Type.h" #include "clang/AST/TypeLoc.h" #include "clang/AST/TypeVisitor.h" @@ -88,12 +89,19 @@ struct Response { // than lambda classes. const FunctionDecl * getPrimaryTemplateOfGenericLambda(const FunctionDecl *LambdaCallOperator) { + if (!isLambdaCallOperator(LambdaCallOperator)) + return LambdaCallOperator; while (true) { if (auto *FTD = dyn_cast_if_present( LambdaCallOperator->getDescribedTemplate()); FTD && FTD->getInstantiatedFromMemberTemplate()) { LambdaCallOperator = FTD->getInstantiatedFromMemberTemplate()->getTemplatedDecl(); + } else if (LambdaCallOperator->getPrimaryTemplate()) { + // Cases where the lambda operator is instantiated in + // TemplateDeclInstantiator::VisitCXXMethodDecl. + LambdaCallOperator = + LambdaCallOperator->getPrimaryTemplate()->getTemplatedDecl(); } else if (auto *Prev = cast(LambdaCallOperator) ->getInstantiatedFromMemberFunction()) LambdaCallOperator = Prev; @@ -139,22 +147,28 @@ getEnclosingTypeAliasTemplateDecl(Sema &SemaRef) { // Check if we are currently inside of a lambda expression that is // surrounded by a using alias declaration. e.g. // template using type = decltype([](auto) { ^ }()); -// By checking if: -// 1. The lambda expression and the using alias declaration share the -// same declaration context. -// 2. They have the same template depth. // We have to do so since a TypeAliasTemplateDecl (or a TypeAliasDecl) is never // a DeclContext, nor does it have an associated specialization Decl from which // we could collect these template arguments. bool isLambdaEnclosedByTypeAliasDecl( - const FunctionDecl *PrimaryLambdaCallOperator, + const FunctionDecl *LambdaCallOperator, const TypeAliasTemplateDecl *PrimaryTypeAliasDecl) { - return cast(PrimaryLambdaCallOperator->getDeclContext()) - ->getTemplateDepth() == - PrimaryTypeAliasDecl->getTemplateDepth() && - getLambdaAwareParentOfDeclContext( - const_cast(PrimaryLambdaCallOperator)) == - PrimaryTypeAliasDecl->getDeclContext(); + struct Visitor : RecursiveASTVisitor { + Visitor(const FunctionDecl *CallOperator) : CallOperator(CallOperator) {} + bool VisitLambdaExpr(const LambdaExpr *LE) { + // Return true to bail out of the traversal, implying the Decl contains + // the lambda. + return getPrimaryTemplateOfGenericLambda(LE->getCallOperator()) != + CallOperator; + } + const FunctionDecl *CallOperator; + }; + + QualType Underlying = + PrimaryTypeAliasDecl->getTemplatedDecl()->getUnderlyingType(); + + return !Visitor(getPrimaryTemplateOfGenericLambda(LambdaCallOperator)) + .TraverseType(Underlying); } // Add template arguments from a variable template instantiation. @@ -293,23 +307,8 @@ Response HandleFunction(Sema &SemaRef, const FunctionDecl *Function, // If this function is a generic lambda specialization, we are done. if (!ForConstraintInstantiation && - isGenericLambdaCallOperatorOrStaticInvokerSpecialization(Function)) { - // TypeAliasTemplateDecls should be taken into account, e.g. - // when we're deducing the return type of a lambda. - // - // template int Value = 0; - // template - // using T = decltype([]() { return Value; }()); - // - if (auto TypeAlias = getEnclosingTypeAliasTemplateDecl(SemaRef)) { - if (isLambdaEnclosedByTypeAliasDecl( - /*PrimaryLambdaCallOperator=*/getPrimaryTemplateOfGenericLambda( - Function), - /*PrimaryTypeAliasDecl=*/TypeAlias.PrimaryTypeAliasDecl)) - return Response::UseNextDecl(Function); - } + isGenericLambdaCallOperatorOrStaticInvokerSpecialization(Function)) return Response::Done(); - } } else if (Function->getDescribedFunctionTemplate()) { assert( @@ -421,10 +420,9 @@ Response HandleRecordDecl(Sema &SemaRef, const CXXRecordDecl *Rec, // Retrieve the template arguments for a using alias declaration. // This is necessary for constraint checking, since we always keep // constraints relative to the primary template. - if (auto TypeAlias = getEnclosingTypeAliasTemplateDecl(SemaRef)) { - const FunctionDecl *PrimaryLambdaCallOperator = - getPrimaryTemplateOfGenericLambda(Rec->getLambdaCallOperator()); - if (isLambdaEnclosedByTypeAliasDecl(PrimaryLambdaCallOperator, + if (auto TypeAlias = getEnclosingTypeAliasTemplateDecl(SemaRef); + ForConstraintInstantiation && TypeAlias) { + if (isLambdaEnclosedByTypeAliasDecl(Rec->getLambdaCallOperator(), TypeAlias.PrimaryTypeAliasDecl)) { Result.addOuterTemplateArguments(TypeAlias.Template, TypeAlias.AssociatedTemplateArguments, @@ -1647,12 +1645,17 @@ namespace { CXXRecordDecl::LambdaDependencyKind ComputeLambdaDependency(LambdaScopeInfo *LSI) { - auto &CCS = SemaRef.CodeSynthesisContexts.back(); - if (CCS.Kind == - Sema::CodeSynthesisContext::TypeAliasTemplateInstantiation) { - unsigned TypeAliasDeclDepth = CCS.Entity->getTemplateDepth(); + if (auto TypeAlias = + TemplateInstArgsHelpers::getEnclosingTypeAliasTemplateDecl( + getSema()); + TypeAlias && TemplateInstArgsHelpers::isLambdaEnclosedByTypeAliasDecl( + LSI->CallOperator, TypeAlias.PrimaryTypeAliasDecl)) { + unsigned TypeAliasDeclDepth = TypeAlias.Template->getTemplateDepth(); if (TypeAliasDeclDepth >= TemplateArgs.getNumSubstitutedLevels()) return CXXRecordDecl::LambdaDependencyKind::LDK_AlwaysDependent; + for (const TemplateArgument &TA : TypeAlias.AssociatedTemplateArguments) + if (TA.isDependent()) + return CXXRecordDecl::LambdaDependencyKind::LDK_AlwaysDependent; } return inherited::ComputeLambdaDependency(LSI); } diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h index 62287c2d26375..b3854cd8f8222 100644 --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -6669,9 +6669,15 @@ QualType TreeTransform::TransformPackIndexingType(TypeLocBuilder &TLB, PackIndexingTypeLoc TL) { // Transform the index - ExprResult IndexExpr = getDerived().TransformExpr(TL.getIndexExpr()); - if (IndexExpr.isInvalid()) - return QualType(); + ExprResult IndexExpr; + { + EnterExpressionEvaluationContext ConstantContext( + SemaRef, Sema::ExpressionEvaluationContext::ConstantEvaluated); + + IndexExpr = getDerived().TransformExpr(TL.getIndexExpr()); + if (IndexExpr.isInvalid()) + return QualType(); + } QualType Pattern = TL.getPattern(); const PackIndexingType *PIT = TL.getTypePtr(); @@ -15299,9 +15305,14 @@ TreeTransform::TransformPackIndexingExpr(PackIndexingExpr *E) { return E; // Transform the index - ExprResult IndexExpr = getDerived().TransformExpr(E->getIndexExpr()); - if (IndexExpr.isInvalid()) - return ExprError(); + ExprResult IndexExpr; + { + EnterExpressionEvaluationContext ConstantContext( + SemaRef, Sema::ExpressionEvaluationContext::ConstantEvaluated); + IndexExpr = getDerived().TransformExpr(E->getIndexExpr()); + if (IndexExpr.isInvalid()) + return ExprError(); + } SmallVector ExpandedExprs; if (!E->expandsToEmptyPack() && E->getExpressions().empty()) { diff --git a/clang/lib/StaticAnalyzer/Checkers/NullabilityChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/NullabilityChecker.cpp index 60934e51febe8..04472bb3895a7 100644 --- a/clang/lib/StaticAnalyzer/Checkers/NullabilityChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/NullabilityChecker.cpp @@ -692,6 +692,14 @@ void NullabilityChecker::checkPreStmt(const ReturnStmt *S, NullConstraint Nullness = getNullConstraint(*RetSVal, State); Nullability RequiredNullability = getNullabilityAnnotation(RequiredRetType); + if (const auto *FunDecl = C.getLocationContext()->getDecl(); + FunDecl && FunDecl->getAttr() && + (RequiredNullability == Nullability::Unspecified || + RequiredNullability == Nullability::Nullable)) { + // If a function is marked with the returns_nonnull attribute, + // the return value must be non-null. + RequiredNullability = Nullability::Nonnull; + } // If the returned value is null but the type of the expression // generating it is nonnull then we will suppress the diagnostic. @@ -705,7 +713,7 @@ void NullabilityChecker::checkPreStmt(const ReturnStmt *S, Nullness == NullConstraint::IsNull); if (ChecksEnabled[CK_NullReturnedFromNonnull] && NullReturnedFromNonNull && RetExprTypeLevelNullability != Nullability::Nonnull && - !InSuppressedMethodFamily && C.getLocationContext()->inTopFrame()) { + !InSuppressedMethodFamily) { static CheckerProgramPointTag Tag(this, "NullReturnedFromNonnull"); ExplodedNode *N = C.generateErrorNode(State, &Tag); if (!N) diff --git a/clang/test/AST/ByteCode/new-delete.cpp b/clang/test/AST/ByteCode/new-delete.cpp index a7be4102fd0a0..d733e3182fd59 100644 --- a/clang/test/AST/ByteCode/new-delete.cpp +++ b/clang/test/AST/ByteCode/new-delete.cpp @@ -404,7 +404,7 @@ constexpr typename std::remove_reference::type&& move(T &&t) noexcept { namespace cxx2a { struct A { - int* p = new int(42); // both-note 7{{heap allocation performed here}} + int* p = new int(42); // both-note 3{{heap allocation performed here}} consteval int ret_i() const { return p ? *p : 0; } consteval A ret_a() const { return A{}; } constexpr ~A() { delete p; } @@ -433,9 +433,7 @@ void test() { { A k = to_lvalue_ref(A()); } // both-error {{'cxx2a::to_lvalue_ref' is not a constant expression}} \ // both-note {{reference to temporary is not a constant expression}} \ // both-note {{temporary created here}} - { A k = to_lvalue_ref(A().ret_a()); } // both-error {{'cxx2a::A::ret_a' is not a constant expression}} \ - // both-note {{heap-allocated object is not a constant expression}} \ - // both-error {{'cxx2a::to_lvalue_ref' is not a constant expression}} \ + { A k = to_lvalue_ref(A().ret_a()); } // both-error {{'cxx2a::to_lvalue_ref' is not a constant expression}} \ // both-note {{reference to temporary is not a constant expression}} \ // both-note {{temporary created here}} { int k = A().ret_a().ret_i(); } // both-error {{'cxx2a::A::ret_a' is not a constant expression}} \ @@ -445,19 +443,15 @@ void test() { { int k = const_a_ref(a); } { int k = rvalue_ref(A()); } { int k = rvalue_ref(std::move(a)); } - { int k = const_a_ref(A().ret_a()); } // both-error {{'cxx2a::A::ret_a' is not a constant expression}} \ - // both-note {{is not a constant expression}} - { int k = const_a_ref(to_lvalue_ref(A().ret_a())); } // both-error {{'cxx2a::A::ret_a' is not a constant expression}} \ - // both-note {{is not a constant expression}} + { int k = const_a_ref(A().ret_a()); } + { int k = const_a_ref(to_lvalue_ref(A().ret_a())); } { int k = const_a_ref(to_lvalue_ref(std::move(a))); } { int k = by_value_a(A().ret_a()); } { int k = by_value_a(to_lvalue_ref(static_cast(a))); } { int k = (A().ret_a(), A().ret_i()); } // both-error {{'cxx2a::A::ret_a' is not a constant expression}} \ // both-note {{is not a constant expression}} \ // both-warning {{left operand of comma operator has no effect}} - { int k = (const_a_ref(A().ret_a()), A().ret_i()); } // both-error {{'cxx2a::A::ret_a' is not a constant expression}} \ - // both-note {{is not a constant expression}} \ - // both-warning {{left operand of comma operator has no effect}} + { int k = (const_a_ref(A().ret_a()), A().ret_i()); } // both-warning {{left operand of comma operator has no effect}} } } diff --git a/clang/test/Analysis/nullability.c b/clang/test/Analysis/nullability.c index fbc03c864ad83..57ce9ac8aee3d 100644 --- a/clang/test/Analysis/nullability.c +++ b/clang/test/Analysis/nullability.c @@ -1,12 +1,65 @@ -// RUN: %clang_analyze_cc1 -fblocks -analyzer-checker=core,nullability -Wno-deprecated-non-prototype -verify %s +// RUN: %clang_analyze_cc1 -fblocks -analyzer-checker=core,nullability,debug.ExprInspection -verify %s + +void clang_analyzer_warnIfReached(void); void it_takes_two(int a, int b); -void function_pointer_arity_mismatch() { +void function_pointer_arity_mismatch(void) { void(*fptr)() = it_takes_two; fptr(1); // no-crash expected-warning {{Function taking 2 arguments is called with fewer (1)}} + // expected-warning@-1 {{passing arguments to a function without a prototype is deprecated in all versions of C and is not supported in C23}} } -void block_arity_mismatch() { +void block_arity_mismatch(void) { void(^b)() = ^(int a, int b) { }; b(1); // no-crash expected-warning {{Block taking 2 arguments is called with fewer (1)}} + // expected-warning@-1 {{passing arguments to a function without a prototype is deprecated in all versions of C and is not supported in C23}} +} + +int *nonnull_return_annotation_indirect(void) __attribute__((returns_nonnull)); +int *nonnull_return_annotation_indirect(void) { + int *x = 0; + return x; // expected-warning {{Null returned from a function that is expected to return a non-null value}} +} + +int *nonnull_return_annotation_direct(void) __attribute__((returns_nonnull)); +int *nonnull_return_annotation_direct(void) { + return 0; // expected-warning {{Null returned from a function that is expected to return a non-null value}} +} // expected-warning@-1 {{null returned from function that requires a non-null return value}} + +int *nonnull_return_annotation_assumed(int* ptr) __attribute__((returns_nonnull)); +int *nonnull_return_annotation_assumed(int* ptr) { + if (ptr) { + return ptr; + } + return ptr; // expected-warning {{Null returned from a function that is expected to return a non-null value}} +} + +int *produce_nonnull_ptr(void) __attribute__((returns_nonnull)); + +__attribute__((returns_nonnull)) +int *cannot_return_null(void) { + int *x = produce_nonnull_ptr(); + if (!x) { + clang_analyzer_warnIfReached(); + // expected-warning@-1 {{REACHABLE}} + // TODO: This warning is a false positive, according to the contract of + // produce_nonnull_ptr, x cannot be null. + } + // Regardless of the potential state split above, x cannot be nullptr + // according to the produce_nonnull_ptr annotation. + return x; + // False positive: expected-warning@-1 {{Null returned from a function that is expected to return a non-null value}} +} + +__attribute__((returns_nonnull)) int *passthrough(int *p) { + return p; // no-warning: we have no evidence that `p` is null, i.e., violating the contract +} + +__attribute__((returns_nonnull)) int *passthrough2(int *p); +int *passthrough2(int *p) { + return p; // expected-warning{{Null returned from a function that is expected to return a non-null value}} +} + +void call_with_null(void) { + passthrough2(0); } diff --git a/clang/test/Analysis/nullability.mm b/clang/test/Analysis/nullability.mm index d69116d03df74..64222d939bdd0 100644 --- a/clang/test/Analysis/nullability.mm +++ b/clang/test/Analysis/nullability.mm @@ -438,7 +438,7 @@ -(Dummy *)callerWithParam:(Dummy * _Nonnull) p1 { int * _Nonnull InlinedReturnNullOverSuppressionCallee(int * _Nonnull p2) { int *result = 0; - return result; // no-warning; but this is an over suppression + return result; // expected-warning{{Null returned from a function that is expected to return a non-null value}} } int *InlinedReturnNullOverSuppressionCaller(int * _Nonnull p1) { diff --git a/clang/test/CodeGen/2005-01-02-ConstantInits.c b/clang/test/CodeGen/2005-01-02-ConstantInits.c index 7772a64331ffb..d90c2ea42da61 100644 --- a/clang/test/CodeGen/2005-01-02-ConstantInits.c +++ b/clang/test/CodeGen/2005-01-02-ConstantInits.c @@ -1,4 +1,4 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --check-globals --global-value-regex "@.+" +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --check-globals --global-value-regex "[A-Za-z].*" // RUN: %clang_cc1 -triple=x86_64-unknown-linux %s -emit-llvm -o - | FileCheck %s // This tests all kinds of hard cases with initializers and @@ -51,7 +51,7 @@ int foo(int i) { return bar(&Arr[49])+bar(&Arr[i]); } // CHECK-NEXT: store i32 [[I]], ptr [[I_ADDR]], align 4 // CHECK-NEXT: store ptr @Arr, ptr [[P]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[P]], align 8 -// CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 +// CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP0]], i32 1 // CHECK-NEXT: store ptr [[INCDEC_PTR]], ptr [[P]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I_ADDR]], align 4 // CHECK-NEXT: [[IDX_EXT:%.*]] = sext i32 [[TMP1]] to i64 diff --git a/clang/test/CodeGen/PowerPC/ppc-emmintrin.c b/clang/test/CodeGen/PowerPC/ppc-emmintrin.c index a3650beec625f..4c4d0dfce05ea 100644 --- a/clang/test/CodeGen/PowerPC/ppc-emmintrin.c +++ b/clang/test/CodeGen/PowerPC/ppc-emmintrin.c @@ -1012,14 +1012,14 @@ test_shuffle() { // CHECK: %[[SHR:[0-9a-zA-Z_.]+]] = ashr i32 %{{[0-9a-zA-Z_.]+}}, 6 // CHECK: %[[AND4:[0-9a-zA-Z_.]+]] = and i32 %[[SHR]], 3 // CHECK: sext i32 %[[AND4]] to i64 -// CHECK: getelementptr inbounds [4 x i32], ptr @_mm_shuffle_epi32.__permute_selectors, i64 0, i64 %{{[0-9a-zA-Z_.]+}} +// CHECK: getelementptr inbounds nuw [4 x i32], ptr @_mm_shuffle_epi32.__permute_selectors, i64 0, i64 %{{[0-9a-zA-Z_.]+}} // CHECK: insertelement <4 x i32> %{{[0-9a-zA-Z_.]+}}, i32 %{{[0-9a-zA-Z_.]+}}, i32 0 -// CHECK: getelementptr inbounds [4 x i32], ptr @_mm_shuffle_epi32.__permute_selectors, i64 0, i64 %{{[0-9a-zA-Z_.]+}} +// CHECK: getelementptr inbounds nuw [4 x i32], ptr @_mm_shuffle_epi32.__permute_selectors, i64 0, i64 %{{[0-9a-zA-Z_.]+}} // CHECK: insertelement <4 x i32> %{{[0-9a-zA-Z_.]+}}, i32 %{{[0-9a-zA-Z_.]+}}, i32 1 -// CHECK: getelementptr inbounds [4 x i32], ptr @_mm_shuffle_epi32.__permute_selectors, i64 0, i64 %{{[0-9a-zA-Z_.]+}} +// CHECK: getelementptr inbounds nuw [4 x i32], ptr @_mm_shuffle_epi32.__permute_selectors, i64 0, i64 %{{[0-9a-zA-Z_.]+}} // CHECK: %[[ADD:[0-9a-zA-Z_.]+]] = add i32 %{{[0-9a-zA-Z_.]+}}, 269488144 // CHECK: insertelement <4 x i32> %{{[0-9a-zA-Z_.]+}}, i32 %[[ADD]], i32 2 -// CHECK: getelementptr inbounds [4 x i32], ptr @_mm_shuffle_epi32.__permute_selectors, i64 0, i64 %{{[0-9a-zA-Z_.]+}} +// CHECK: getelementptr inbounds nuw [4 x i32], ptr @_mm_shuffle_epi32.__permute_selectors, i64 0, i64 %{{[0-9a-zA-Z_.]+}} // CHECK: add i32 %{{[0-9a-zA-Z_.]+}}, 269488144 // CHECK: call <4 x i32> @vec_perm(int vector[4], int vector[4], unsigned char vector[16]) @@ -1050,7 +1050,7 @@ test_shuffle() { // CHECK: sext i32 %[[AND4]] to i64 // CHECK-LE: store <2 x i64> , ptr %{{[0-9a-zA-Z_.]+}}, align 16 // CHECK-BE: store <2 x i64> , ptr %{{[0-9a-zA-Z_.]+}}, align 16 -// CHECK-COUNT-4: getelementptr inbounds [4 x i16], ptr @_mm_shufflehi_epi16.__permute_selectors, i64 0, i64 {{[0-9a-zA-Z_%.]+}} +// CHECK-COUNT-4: getelementptr inbounds nuw [4 x i16], ptr @_mm_shufflehi_epi16.__permute_selectors, i64 0, i64 {{[0-9a-zA-Z_%.]+}} // CHECK: call <2 x i64> @vec_perm(unsigned long long vector[2], unsigned long long vector[2], unsigned char vector[16]) // CHECK-LABEL: define available_externally <2 x i64> @_mm_shufflelo_epi16 @@ -1067,7 +1067,7 @@ test_shuffle() { // CHECK: sext i32 %[[AND4]] to i64 // CHECK-LE: store <2 x i64> , ptr %{{[0-9a-zA-Z_.]+}}, align 16 // CHECK-BE: store <2 x i64> , ptr %{{[0-9a-zA-Z_.]+}}, align 16 -// CHECK-COUNT-4: getelementptr inbounds [4 x i16], ptr @_mm_shufflelo_epi16.__permute_selectors, i64 0, i64 {{[0-9a-zA-Z_%.]+}} +// CHECK-COUNT-4: getelementptr inbounds nuw [4 x i16], ptr @_mm_shufflelo_epi16.__permute_selectors, i64 0, i64 {{[0-9a-zA-Z_%.]+}} // CHECK: call <2 x i64> @vec_perm(unsigned long long vector[2], unsigned long long vector[2], unsigned char vector[16]) void __attribute__((noinline)) diff --git a/clang/test/CodeGen/PowerPC/ppc-xmmintrin.c b/clang/test/CodeGen/PowerPC/ppc-xmmintrin.c index 95dfd1202f157..4a15fa9f76cee 100644 --- a/clang/test/CodeGen/PowerPC/ppc-xmmintrin.c +++ b/clang/test/CodeGen/PowerPC/ppc-xmmintrin.c @@ -894,16 +894,16 @@ test_shuffle() { // CHECK: %[[SHR3:[0-9a-zA-Z_.]+]] = ashr i32 %{{[0-9a-zA-Z_.]+}}, 6 // CHECK: %[[AND4:[0-9a-zA-Z_.]+]] = and i32 %[[SHR3]], 3 // CHECK: sext i32 %[[AND4]] to i64 -// CHECK: getelementptr inbounds [4 x i16], ptr @_mm_shuffle_pi16.__permute_selectors, i64 0, i64 %{{[0-9a-zA-Z_.]+}} +// CHECK: getelementptr inbounds nuw [4 x i16], ptr @_mm_shuffle_pi16.__permute_selectors, i64 0, i64 %{{[0-9a-zA-Z_.]+}} // CHECK-LE: getelementptr inbounds [4 x i16], ptr %{{[0-9a-zA-Z_.]+}}, i64 0, i64 0 // CHECK-BE: getelementptr inbounds [4 x i16], ptr %{{[0-9a-zA-Z_.]+}}, i64 0, i64 3 -// CHECK: getelementptr inbounds [4 x i16], ptr @_mm_shuffle_pi16.__permute_selectors, i64 0, i64 %{{[0-9a-zA-Z_.]+}} +// CHECK: getelementptr inbounds nuw [4 x i16], ptr @_mm_shuffle_pi16.__permute_selectors, i64 0, i64 %{{[0-9a-zA-Z_.]+}} // CHECK-LE: getelementptr inbounds [4 x i16], ptr %{{[0-9a-zA-Z_.]+}}, i64 0, i64 1 // CHECK-BE: getelementptr inbounds [4 x i16], ptr %{{[0-9a-zA-Z_.]+}}, i64 0, i64 2 -// CHECK: getelementptr inbounds [4 x i16], ptr @_mm_shuffle_pi16.__permute_selectors, i64 0, i64 %{{[0-9a-zA-Z_.]+}} +// CHECK: getelementptr inbounds nuw [4 x i16], ptr @_mm_shuffle_pi16.__permute_selectors, i64 0, i64 %{{[0-9a-zA-Z_.]+}} // CHECK-LE: getelementptr inbounds [4 x i16], ptr %{{[0-9a-zA-Z_.]+}}, i64 0, i64 2 // CHECK-BE: getelementptr inbounds [4 x i16], ptr %{{[0-9a-zA-Z_.]+}}, i64 0, i64 1 -// CHECK: getelementptr inbounds [4 x i16], ptr @_mm_shuffle_pi16.__permute_selectors, i64 0, i64 %{{[0-9a-zA-Z_.]+}} +// CHECK: getelementptr inbounds nuw [4 x i16], ptr @_mm_shuffle_pi16.__permute_selectors, i64 0, i64 %{{[0-9a-zA-Z_.]+}} // CHECK-LE: getelementptr inbounds [4 x i16], ptr %{{[0-9a-zA-Z_.]+}}, i64 0, i64 3 // CHECK-BE: getelementptr inbounds [4 x i16], ptr %{{[0-9a-zA-Z_.]+}}, i64 0, i64 0 // CHECK: call <2 x i64> @vec_splats(unsigned long long) @@ -923,14 +923,14 @@ test_shuffle() { // CHECK: %[[SHR3:[0-9a-zA-Z_.]+]] = ashr i32 %{{[0-9a-zA-Z_.]+}}, 6 // CHECK: %[[AND4:[0-9a-zA-Z_.]+]] = and i32 %[[SHR3]], 3 // CHECK: sext i32 %[[AND4]] to i64 -// CHECK: getelementptr inbounds [4 x i32], ptr @_mm_shuffle_ps.__permute_selectors, i64 0, i64 +// CHECK: getelementptr inbounds nuw [4 x i32], ptr @_mm_shuffle_ps.__permute_selectors, i64 0, i64 // CHECK: insertelement <4 x i32> %{{[0-9a-zA-Z_.]+}}, i32 %{{[0-9a-zA-Z_.]+}}, i32 0 -// CHECK: getelementptr inbounds [4 x i32], ptr @_mm_shuffle_ps.__permute_selectors, i64 0, i64 +// CHECK: getelementptr inbounds nuw [4 x i32], ptr @_mm_shuffle_ps.__permute_selectors, i64 0, i64 // CHECK: insertelement <4 x i32> %{{[0-9a-zA-Z_.]+}}, i32 %{{[0-9a-zA-Z_.]+}}, i32 1 -// CHECK: getelementptr inbounds [4 x i32], ptr @_mm_shuffle_ps.__permute_selectors, i64 0, i64 +// CHECK: getelementptr inbounds nuw [4 x i32], ptr @_mm_shuffle_ps.__permute_selectors, i64 0, i64 // CHECK: %[[ADD:[0-9a-zA-Z_.]+]] = add i32 %{{[0-9a-zA-Z_.]+}}, 269488144 // CHECK: insertelement <4 x i32> %{{[0-9a-zA-Z_.]+}}, i32 %[[ADD]], i32 2 -// CHECK: getelementptr inbounds [4 x i32], ptr @_mm_shuffle_ps.__permute_selectors, i64 0, i64 +// CHECK: getelementptr inbounds nuw [4 x i32], ptr @_mm_shuffle_ps.__permute_selectors, i64 0, i64 // CHECK: %[[ADD2:[0-9a-zA-Z_.]+]] = add i32 %{{[0-9a-zA-Z_.]+}}, 269488144 // CHECK: insertelement <4 x i32> %{{[0-9a-zA-Z_.]+}}, i32 %[[ADD2]], i32 3 // CHECK: call <4 x float> @vec_perm(float vector[4], float vector[4], unsigned char vector[16]) diff --git a/clang/test/CodeGen/attr-counted-by.c b/clang/test/CodeGen/attr-counted-by.c index 3ed8b6f0c7186..f3b1653d3632a 100644 --- a/clang/test/CodeGen/attr-counted-by.c +++ b/clang/test/CodeGen/attr-counted-by.c @@ -118,7 +118,7 @@ void test1(struct annotated *p, int index, int val) { // SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] // SANITIZE-WITH-ATTR: cont3: // SANITIZE-WITH-ATTR-NEXT: [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12 -// SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [0 x i32], ptr [[ARRAY]], i64 0, i64 [[INDEX]] +// SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [0 x i32], ptr [[ARRAY]], i64 0, i64 [[INDEX]] // SANITIZE-WITH-ATTR-NEXT: [[TMP2:%.*]] = shl i32 [[DOT_COUNTED_BY_LOAD]], 2 // SANITIZE-WITH-ATTR-NEXT: [[DOTINV:%.*]] = icmp slt i32 [[DOT_COUNTED_BY_LOAD]], 0 // SANITIZE-WITH-ATTR-NEXT: [[CONV:%.*]] = select i1 [[DOTINV]], i32 0, i32 [[TMP2]] @@ -134,7 +134,7 @@ void test1(struct annotated *p, int index, int val) { // NO-SANITIZE-WITH-ATTR-NEXT: [[DOTINV:%.*]] = icmp slt i32 [[DOT_COUNTED_BY_LOAD]], 0 // NO-SANITIZE-WITH-ATTR-NEXT: [[CONV:%.*]] = select i1 [[DOTINV]], i32 0, i32 [[TMP0]] // NO-SANITIZE-WITH-ATTR-NEXT: [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12 -// NO-SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [0 x i32], ptr [[ARRAY]], i64 0, i64 [[INDEX]] +// NO-SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [0 x i32], ptr [[ARRAY]], i64 0, i64 [[INDEX]] // NO-SANITIZE-WITH-ATTR-NEXT: store i32 [[CONV]], ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]] // NO-SANITIZE-WITH-ATTR-NEXT: ret void // @@ -142,7 +142,7 @@ void test1(struct annotated *p, int index, int val) { // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]], i64 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { // SANITIZE-WITHOUT-ATTR-NEXT: entry: // SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12 -// SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [0 x i32], ptr [[ARRAY]], i64 0, i64 [[INDEX]] +// SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [0 x i32], ptr [[ARRAY]], i64 0, i64 [[INDEX]] // SANITIZE-WITHOUT-ATTR-NEXT: store i32 -1, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]] // SANITIZE-WITHOUT-ATTR-NEXT: ret void // @@ -150,7 +150,7 @@ void test1(struct annotated *p, int index, int val) { // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]], i64 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { // NO-SANITIZE-WITHOUT-ATTR-NEXT: entry: // NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12 -// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [0 x i32], ptr [[ARRAY]], i64 0, i64 [[INDEX]] +// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [0 x i32], ptr [[ARRAY]], i64 0, i64 [[INDEX]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: store i32 -1, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: ret void // @@ -207,7 +207,7 @@ size_t test2_bdos(struct annotated *p) { // SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] // SANITIZE-WITH-ATTR: cont3: // SANITIZE-WITH-ATTR-NEXT: [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12 -// SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [0 x i32], ptr [[ARRAY]], i64 0, i64 [[INDEX]] +// SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [0 x i32], ptr [[ARRAY]], i64 0, i64 [[INDEX]] // SANITIZE-WITH-ATTR-NEXT: [[TMP2:%.*]] = sext i32 [[DOT_COUNTED_BY_LOAD]] to i64 // SANITIZE-WITH-ATTR-NEXT: [[TMP3:%.*]] = shl nsw i64 [[TMP2]], 2 // SANITIZE-WITH-ATTR-NEXT: [[TMP4:%.*]] = tail call i64 @llvm.smax.i64(i64 [[TMP3]], i64 4) @@ -231,7 +231,7 @@ size_t test2_bdos(struct annotated *p) { // NO-SANITIZE-WITH-ATTR-NEXT: [[DOTINV:%.*]] = icmp slt i32 [[DOT_COUNTED_BY_LOAD]], 0 // NO-SANITIZE-WITH-ATTR-NEXT: [[CONV:%.*]] = select i1 [[DOTINV]], i32 0, i32 [[TMP4]] // NO-SANITIZE-WITH-ATTR-NEXT: [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12 -// NO-SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [0 x i32], ptr [[ARRAY]], i64 0, i64 [[INDEX]] +// NO-SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [0 x i32], ptr [[ARRAY]], i64 0, i64 [[INDEX]] // NO-SANITIZE-WITH-ATTR-NEXT: store i32 [[CONV]], ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]] // NO-SANITIZE-WITH-ATTR-NEXT: ret void // @@ -239,7 +239,7 @@ size_t test2_bdos(struct annotated *p) { // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]], i64 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { // SANITIZE-WITHOUT-ATTR-NEXT: entry: // SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12 -// SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [0 x i32], ptr [[ARRAY]], i64 0, i64 [[INDEX]] +// SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [0 x i32], ptr [[ARRAY]], i64 0, i64 [[INDEX]] // SANITIZE-WITHOUT-ATTR-NEXT: store i32 -1, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]] // SANITIZE-WITHOUT-ATTR-NEXT: ret void // @@ -247,7 +247,7 @@ size_t test2_bdos(struct annotated *p) { // NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]], i64 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] { // NO-SANITIZE-WITHOUT-ATTR-NEXT: entry: // NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12 -// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [0 x i32], ptr [[ARRAY]], i64 0, i64 [[INDEX]] +// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [0 x i32], ptr [[ARRAY]], i64 0, i64 [[INDEX]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: store i32 -1, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: ret void // diff --git a/clang/test/CodeGen/catch-nullptr-and-nonzero-offset-when-nullptr-is-defined.c b/clang/test/CodeGen/catch-nullptr-and-nonzero-offset-when-nullptr-is-defined.c index 39ede01d6e3b8..8a560a47ad1e1 100644 --- a/clang/test/CodeGen/catch-nullptr-and-nonzero-offset-when-nullptr-is-defined.c +++ b/clang/test/CodeGen/catch-nullptr-and-nonzero-offset-when-nullptr-is-defined.c @@ -33,7 +33,7 @@ char *add_unsigned(char *base, unsigned long offset) { // CHECK-NEXT: store i64 %[[OFFSET]], ptr %[[OFFSET_ADDR]], align 8 // CHECK-NEXT: %[[BASE_RELOADED:.*]] = load ptr, ptr %[[BASE_ADDR]], align 8 // CHECK-NEXT: %[[OFFSET_RELOADED:.*]] = load i64, ptr %[[OFFSET_ADDR]], align 8 - // CHECK-NEXT: %[[ADD_PTR:.*]] = getelementptr inbounds i8, ptr %[[BASE_RELOADED]], i64 %[[OFFSET_RELOADED]] + // CHECK-NEXT: %[[ADD_PTR:.*]] = getelementptr inbounds nuw i8, ptr %[[BASE_RELOADED]], i64 %[[OFFSET_RELOADED]] // CHECK-SANITIZE-NEXT: %[[COMPUTED_OFFSET_AGGREGATE:.*]] = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 1, i64 %[[OFFSET_RELOADED]]), !nosanitize // CHECK-SANITIZE-NEXT: %[[COMPUTED_OFFSET_OVERFLOWED:.*]] = extractvalue { i64, i1 } %[[COMPUTED_OFFSET_AGGREGATE]], 1, !nosanitize // CHECK-SANITIZE-NEXT: %[[OR_OV:.+]] = or i1 %[[COMPUTED_OFFSET_OVERFLOWED]], false, !nosanitize diff --git a/clang/test/CodeGen/catch-nullptr-and-nonzero-offset.c b/clang/test/CodeGen/catch-nullptr-and-nonzero-offset.c index e93dbcb9f647b..d884993ffb2b3 100644 --- a/clang/test/CodeGen/catch-nullptr-and-nonzero-offset.c +++ b/clang/test/CodeGen/catch-nullptr-and-nonzero-offset.c @@ -50,7 +50,7 @@ char *var_var(char *base, unsigned long offset) { // CHECK-NEXT: store i64 %[[OFFSET]], ptr %[[OFFSET_ADDR]], align 8 // CHECK-NEXT: %[[BASE_RELOADED:.*]] = load ptr, ptr %[[BASE_ADDR]], align 8 // CHECK-NEXT: %[[OFFSET_RELOADED:.*]] = load i64, ptr %[[OFFSET_ADDR]], align 8 - // CHECK-NEXT: %[[ADD_PTR:.*]] = getelementptr inbounds i8, ptr %[[BASE_RELOADED]], i64 %[[OFFSET_RELOADED]] + // CHECK-NEXT: %[[ADD_PTR:.*]] = getelementptr inbounds nuw i8, ptr %[[BASE_RELOADED]], i64 %[[OFFSET_RELOADED]] // CHECK-SANITIZE-NEXT: %[[COMPUTED_OFFSET_AGGREGATE:.*]] = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 1, i64 %[[OFFSET_RELOADED]]), !nosanitize // CHECK-SANITIZE-NEXT: %[[COMPUTED_OFFSET_OVERFLOWED:.*]] = extractvalue { i64, i1 } %[[COMPUTED_OFFSET_AGGREGATE]], 1, !nosanitize // CHECK-SANITIZE-NEXT: %[[OR_OV:.+]] = or i1 %[[COMPUTED_OFFSET_OVERFLOWED]], false, !nosanitize @@ -83,7 +83,7 @@ char *var_zero(char *base) { // CHECK-NEXT: %[[BASE_ADDR:.*]] = alloca ptr, align 8 // CHECK-NEXT: store ptr %[[BASE]], ptr %[[BASE_ADDR]], align 8 // CHECK-NEXT: %[[BASE_RELOADED:.*]] = load ptr, ptr %[[BASE_ADDR]], align 8 - // CHECK-NEXT: %[[ADD_PTR:.*]] = getelementptr inbounds i8, ptr %[[BASE_RELOADED]], i64 0 + // CHECK-NEXT: %[[ADD_PTR:.*]] = getelementptr inbounds nuw i8, ptr %[[BASE_RELOADED]], i64 0 // CHECK-SANITIZE-C-NEXT: %[[BASE_RELOADED_INT:.*]] = ptrtoint ptr %[[BASE_RELOADED]] to i64, !nosanitize // CHECK-SANITIZE-C-NEXT: %[[COMPUTED_GEP:.*]] = add i64 %[[BASE_RELOADED_INT]], 0, !nosanitize // CHECK-SANITIZE-C-NEXT: %[[BASE_IS_NOT_NULLPTR:.*]] = icmp ne ptr %[[BASE_RELOADED]], null, !nosanitize @@ -111,7 +111,7 @@ char *var_one(char *base) { // CHECK-NEXT: %[[BASE_ADDR:.*]] = alloca ptr, align 8 // CHECK-NEXT: store ptr %[[BASE]], ptr %[[BASE_ADDR]], align 8 // CHECK-NEXT: %[[BASE_RELOADED:.*]] = load ptr, ptr %[[BASE_ADDR]], align 8 - // CHECK-NEXT: %[[ADD_PTR:.*]] = getelementptr inbounds i8, ptr %[[BASE_RELOADED]], i64 1 + // CHECK-NEXT: %[[ADD_PTR:.*]] = getelementptr inbounds nuw i8, ptr %[[BASE_RELOADED]], i64 1 // CHECK-SANITIZE-NEXT: %[[BASE_RELOADED_INT:.*]] = ptrtoint ptr %[[BASE_RELOADED]] to i64, !nosanitize // CHECK-SANITIZE-NEXT: %[[COMPUTED_GEP:.*]] = add i64 %[[BASE_RELOADED_INT]], 1, !nosanitize // CHECK-SANITIZE-NEXT: %[[BASE_IS_NOT_NULLPTR:.*]] = icmp ne ptr %[[BASE_RELOADED]], null, !nosanitize @@ -140,7 +140,7 @@ char *var_allones(char *base) { // CHECK-NEXT: %[[BASE_ADDR:.*]] = alloca ptr, align 8 // CHECK-NEXT: store ptr %[[BASE]], ptr %[[BASE_ADDR]], align 8 // CHECK-NEXT: %[[BASE_RELOADED:.*]] = load ptr, ptr %[[BASE_ADDR]], align 8 - // CHECK-NEXT: %[[ADD_PTR:.*]] = getelementptr inbounds i8, ptr %[[BASE_RELOADED]], i64 -1 + // CHECK-NEXT: %[[ADD_PTR:.*]] = getelementptr inbounds nuw i8, ptr %[[BASE_RELOADED]], i64 -1 // CHECK-SANITIZE-NEXT: %[[BASE_RELOADED_INT:.*]] = ptrtoint ptr %[[BASE_RELOADED]] to i64, !nosanitize // CHECK-SANITIZE-NEXT: %[[COMPUTED_GEP:.*]] = add i64 %[[BASE_RELOADED_INT]], -1, !nosanitize // CHECK-SANITIZE-NEXT: %[[BASE_IS_NOT_NULLPTR:.*]] = icmp ne ptr %[[BASE_RELOADED]], null, !nosanitize @@ -171,7 +171,7 @@ char *nullptr_var(unsigned long offset) { // CHECK-NEXT: %[[OFFSET_ADDR:.*]] = alloca i64, align 8 // CHECK-NEXT: store i64 %[[OFFSET]], ptr %[[OFFSET_ADDR]], align 8 // CHECK-NEXT: %[[OFFSET_RELOADED:.*]] = load i64, ptr %[[OFFSET_ADDR]], align 8 - // CHECK-NEXT: %[[ADD_PTR:.*]] = getelementptr inbounds i8, ptr null, i64 %[[OFFSET_RELOADED]] + // CHECK-NEXT: %[[ADD_PTR:.*]] = getelementptr inbounds nuw i8, ptr null, i64 %[[OFFSET_RELOADED]] // CHECK-SANITIZE-NEXT: %[[COMPUTED_OFFSET_AGGREGATE:.*]] = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 1, i64 %[[OFFSET_RELOADED]]), !nosanitize // CHECK-SANITIZE-NEXT: %[[COMPUTED_OFFSET_OVERFLOWED:.*]] = extractvalue { i64, i1 } %[[COMPUTED_OFFSET_AGGREGATE]], 1, !nosanitize // CHECK-SANITIZE-NEXT: %[[OR_OV:.+]] = or i1 %[[COMPUTED_OFFSET_OVERFLOWED]], false, !nosanitize @@ -217,17 +217,17 @@ char *nullptr_zero(void) { char *nullptr_one_BAD(void) { // CHECK: define{{.*}} ptr @nullptr_one_BAD() // CHECK-NEXT: [[ENTRY:.*]]: - // CHECK-SANITIZE-NEXT: %[[CMP:.*]] = icmp ne i64 ptrtoint (ptr getelementptr inbounds (i8, ptr null, i64 1) to i64), 0, !nosanitize + // CHECK-SANITIZE-NEXT: %[[CMP:.*]] = icmp ne i64 ptrtoint (ptr getelementptr inbounds nuw (i8, ptr null, i64 1) to i64), 0, !nosanitize // CHECK-SANITIZE-C-NEXT: %[[COND:.*]] = and i1 false, %[[CMP]], !nosanitize // CHECK-SANITIZE-CPP-NEXT: %[[COND:.*]] = icmp eq i1 false, %[[CMP]], !nosanitize // CHECK-SANITIZE-NEXT: br i1 %[[COND]], label %[[CONT:.*]], label %[[HANDLER_POINTER_OVERFLOW:[^,]+]],{{.*}} !nosanitize // CHECK-SANITIZE: [[HANDLER_POINTER_OVERFLOW]]: - // CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_pointer_overflow_abort(ptr @[[LINE_700]], i64 0, i64 ptrtoint (ptr getelementptr inbounds (i8, ptr null, i64 1) to i64)) - // CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_pointer_overflow(ptr @[[LINE_700]], i64 0, i64 ptrtoint (ptr getelementptr inbounds (i8, ptr null, i64 1) to i64)) + // CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_pointer_overflow_abort(ptr @[[LINE_700]], i64 0, i64 ptrtoint (ptr getelementptr inbounds nuw (i8, ptr null, i64 1) to i64)) + // CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_pointer_overflow(ptr @[[LINE_700]], i64 0, i64 ptrtoint (ptr getelementptr inbounds nuw (i8, ptr null, i64 1) to i64)) // CHECK-SANITIZE-TRAP-NEXT: call void @llvm.ubsantrap(i8 19){{.*}}, !nosanitize // CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize // CHECK-SANITIZE: [[CONT]]: - // CHECK-NEXT: ret ptr getelementptr inbounds (i8, ptr null, i64 1) + // CHECK-NEXT: ret ptr getelementptr inbounds nuw (i8, ptr null, i64 1) static char *const base = (char *)0; static const unsigned long offset = 1; #line 700 @@ -237,17 +237,17 @@ char *nullptr_one_BAD(void) { char *nullptr_allones_BAD(void) { // CHECK: define{{.*}} ptr @nullptr_allones_BAD() // CHECK-NEXT: [[ENTRY:.*]]: - // CHECK-SANITIZE-NEXT: %[[CMP:.*]] = icmp ne i64 ptrtoint (ptr getelementptr inbounds (i8, ptr null, i64 -1) to i64), 0, !nosanitize + // CHECK-SANITIZE-NEXT: %[[CMP:.*]] = icmp ne i64 ptrtoint (ptr getelementptr inbounds nuw (i8, ptr null, i64 -1) to i64), 0, !nosanitize // CHECK-SANITIZE-C-NEXT: %[[COND:.*]] = and i1 false, %[[CMP]], !nosanitize // CHECK-SANITIZE-CPP-NEXT: %[[COND:.*]] = icmp eq i1 false, %[[CMP]], !nosanitize // CHECK-SANITIZE-NEXT: br i1 %[[COND]], label %[[CONT:.*]], label %[[HANDLER_POINTER_OVERFLOW:[^,]+]],{{.*}} !nosanitize // CHECK-SANITIZE: [[HANDLER_POINTER_OVERFLOW]]: - // CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_pointer_overflow_abort(ptr @[[LINE_800]], i64 0, i64 ptrtoint (ptr getelementptr inbounds (i8, ptr null, i64 -1) to i64)) - // CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_pointer_overflow(ptr @[[LINE_800]], i64 0, i64 ptrtoint (ptr getelementptr inbounds (i8, ptr null, i64 -1) to i64)) + // CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_pointer_overflow_abort(ptr @[[LINE_800]], i64 0, i64 ptrtoint (ptr getelementptr inbounds nuw (i8, ptr null, i64 -1) to i64)) + // CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_pointer_overflow(ptr @[[LINE_800]], i64 0, i64 ptrtoint (ptr getelementptr inbounds nuw (i8, ptr null, i64 -1) to i64)) // CHECK-SANITIZE-TRAP-NEXT: call void @llvm.ubsantrap(i8 19){{.*}}, !nosanitize // CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize // CHECK-SANITIZE: [[CONT]]: - // CHECK-NEXT: ret ptr getelementptr inbounds (i8, ptr null, i64 -1) + // CHECK-NEXT: ret ptr getelementptr inbounds nuw (i8, ptr null, i64 -1) static char *const base = (char *)0; static const unsigned long offset = -1; #line 800 @@ -262,7 +262,7 @@ char *one_var(unsigned long offset) { // CHECK-NEXT: %[[OFFSET_ADDR:.*]] = alloca i64, align 8 // CHECK-NEXT: store i64 %[[OFFSET]], ptr %[[OFFSET_ADDR]], align 8 // CHECK-NEXT: %[[OFFSET_RELOADED:.*]] = load i64, ptr %[[OFFSET_ADDR]], align 8 - // CHECK-NEXT: %[[ADD_PTR:.*]] = getelementptr inbounds i8, ptr inttoptr (i64 1 to ptr), i64 %[[OFFSET_RELOADED]] + // CHECK-NEXT: %[[ADD_PTR:.*]] = getelementptr inbounds nuw i8, ptr inttoptr (i64 1 to ptr), i64 %[[OFFSET_RELOADED]] // CHECK-SANITIZE-NEXT: %[[COMPUTED_OFFSET_AGGREGATE:.*]] = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 1, i64 %[[OFFSET_RELOADED]]), !nosanitize // CHECK-SANITIZE-NEXT: %[[COMPUTED_OFFSET_OVERFLOWED:.*]] = extractvalue { i64, i1 } %[[COMPUTED_OFFSET_AGGREGATE]], 1, !nosanitize // CHECK-SANITIZE-NEXT: %[[OR_OV:.+]] = or i1 %[[COMPUTED_OFFSET_OVERFLOWED]], false, !nosanitize @@ -312,17 +312,17 @@ char *one_one_OK(void) { // CHECK: define{{.*}} ptr @one_one_OK() // CHECK-NEXT: [[ENTRY:.*]]: // CHECK-SANITIZE-NEXT: %[[CMP1:.*]] = icmp ne ptr inttoptr (i64 1 to ptr), null, !nosanitize - // CHECK-SANITIZE-NEXT: %[[CMP2:.*]] = icmp ne i64 add (i64 sub (i64 ptrtoint (ptr getelementptr inbounds (i8, ptr inttoptr (i64 1 to ptr), i64 1) to i64), i64 1), i64 1), 0, !nosanitize + // CHECK-SANITIZE-NEXT: %[[CMP2:.*]] = icmp ne i64 add (i64 sub (i64 ptrtoint (ptr getelementptr inbounds nuw (i8, ptr inttoptr (i64 1 to ptr), i64 1) to i64), i64 1), i64 1), 0, !nosanitize // CHECK-SANITIZE-C-NEXT: %[[COND:.*]] = and i1 %[[CMP1]], %[[CMP2]], !nosanitize // CHECK-SANITIZE-CPP-NEXT: %[[COND:.*]] = icmp eq i1 %[[CMP1]], %[[CMP2]], !nosanitize // CHECK-SANITIZE-NEXT: br i1 %[[COND]], label %[[CONT:.*]], label %[[HANDLER_POINTER_OVERFLOW:[^,]+]],{{.*}} !nosanitize // CHECK-SANITIZE: [[HANDLER_POINTER_OVERFLOW]]: - // CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_pointer_overflow_abort(ptr @[[LINE_1100]], i64 1, i64 add (i64 sub (i64 ptrtoint (ptr getelementptr inbounds (i8, ptr inttoptr (i64 1 to ptr), i64 1) to i64), i64 1), i64 1)) - // CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_pointer_overflow(ptr @[[LINE_1100]], i64 1, i64 add (i64 sub (i64 ptrtoint (ptr getelementptr inbounds (i8, ptr inttoptr (i64 1 to ptr), i64 1) to i64), i64 1), i64 1)) + // CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_pointer_overflow_abort(ptr @[[LINE_1100]], i64 1, i64 add (i64 sub (i64 ptrtoint (ptr getelementptr inbounds nuw (i8, ptr inttoptr (i64 1 to ptr), i64 1) to i64), i64 1), i64 1)) + // CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_pointer_overflow(ptr @[[LINE_1100]], i64 1, i64 add (i64 sub (i64 ptrtoint (ptr getelementptr inbounds nuw (i8, ptr inttoptr (i64 1 to ptr), i64 1) to i64), i64 1), i64 1)) // CHECK-SANITIZE-TRAP-NEXT: call void @llvm.ubsantrap(i8 19){{.*}}, !nosanitize // CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize // CHECK-SANITIZE: [[CONT]]: - // CHECK-NEXT: ret ptr getelementptr inbounds (i8, ptr inttoptr (i64 1 to ptr), i64 1) + // CHECK-NEXT: ret ptr getelementptr inbounds nuw (i8, ptr inttoptr (i64 1 to ptr), i64 1) static char *const base = (char *)1; static const unsigned long offset = 1; #line 1100 @@ -333,17 +333,17 @@ char *one_allones_BAD(void) { // CHECK: define{{.*}} ptr @one_allones_BAD() // CHECK-NEXT: [[ENTRY:.*]]: // CHECK-SANITIZE-NEXT: %[[CMP1:.*]] = icmp ne ptr inttoptr (i64 1 to ptr), null, !nosanitize - // CHECK-SANITIZE-NEXT: %[[CMP2:.*]] = icmp ne i64 add (i64 sub (i64 ptrtoint (ptr getelementptr inbounds (i8, ptr inttoptr (i64 1 to ptr), i64 -1) to i64), i64 1), i64 1), 0, !nosanitize + // CHECK-SANITIZE-NEXT: %[[CMP2:.*]] = icmp ne i64 add (i64 sub (i64 ptrtoint (ptr getelementptr inbounds nuw (i8, ptr inttoptr (i64 1 to ptr), i64 -1) to i64), i64 1), i64 1), 0, !nosanitize // CHECK-SANITIZE-C-NEXT: %[[COND:.*]] = and i1 %[[CMP1]], %[[CMP2]], !nosanitize // CHECK-SANITIZE-CPP-NEXT: %[[COND:.*]] = icmp eq i1 %[[CMP1]], %[[CMP2]], !nosanitize // CHECK-SANITIZE-NEXT: br i1 %[[COND]], label %[[CONT:.*]], label %[[HANDLER_POINTER_OVERFLOW:[^,]+]],{{.*}} !nosanitize // CHECK-SANITIZE: [[HANDLER_POINTER_OVERFLOW]]: - // CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_pointer_overflow_abort(ptr @[[LINE_1200]], i64 1, i64 add (i64 sub (i64 ptrtoint (ptr getelementptr inbounds (i8, ptr inttoptr (i64 1 to ptr), i64 -1) to i64), i64 1), i64 1)) - // CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_pointer_overflow(ptr @[[LINE_1200]], i64 1, i64 add (i64 sub (i64 ptrtoint (ptr getelementptr inbounds (i8, ptr inttoptr (i64 1 to ptr), i64 -1) to i64), i64 1), i64 1)) + // CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_pointer_overflow_abort(ptr @[[LINE_1200]], i64 1, i64 add (i64 sub (i64 ptrtoint (ptr getelementptr inbounds nuw (i8, ptr inttoptr (i64 1 to ptr), i64 -1) to i64), i64 1), i64 1)) + // CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_pointer_overflow(ptr @[[LINE_1200]], i64 1, i64 add (i64 sub (i64 ptrtoint (ptr getelementptr inbounds nuw (i8, ptr inttoptr (i64 1 to ptr), i64 -1) to i64), i64 1), i64 1)) // CHECK-SANITIZE-TRAP-NEXT: call void @llvm.ubsantrap(i8 19){{.*}}, !nosanitize // CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize // CHECK-SANITIZE: [[CONT]]: - // CHECK-NEXT: ret ptr getelementptr inbounds (i8, ptr inttoptr (i64 1 to ptr), i64 -1) + // CHECK-NEXT: ret ptr getelementptr inbounds nuw (i8, ptr inttoptr (i64 1 to ptr), i64 -1) static char *const base = (char *)1; static const unsigned long offset = -1; #line 1200 @@ -358,7 +358,7 @@ char *allones_var(unsigned long offset) { // CHECK-NEXT: %[[OFFSET_ADDR:.*]] = alloca i64, align 8 // CHECK-NEXT: store i64 %[[OFFSET]], ptr %[[OFFSET_ADDR]], align 8 // CHECK-NEXT: %[[OFFSET_RELOADED:.*]] = load i64, ptr %[[OFFSET_ADDR]], align 8 - // CHECK-NEXT: %[[ADD_PTR:.*]] = getelementptr inbounds i8, ptr inttoptr (i64 -1 to ptr), i64 %[[OFFSET_RELOADED]] + // CHECK-NEXT: %[[ADD_PTR:.*]] = getelementptr inbounds nuw i8, ptr inttoptr (i64 -1 to ptr), i64 %[[OFFSET_RELOADED]] // CHECK-SANITIZE-NEXT: %[[COMPUTED_OFFSET_AGGREGATE:.*]] = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 1, i64 %[[OFFSET_RELOADED]]), !nosanitize // CHECK-SANITIZE-NEXT: %[[COMPUTED_OFFSET_OVERFLOWED:.*]] = extractvalue { i64, i1 } %[[COMPUTED_OFFSET_AGGREGATE]], 1, !nosanitize // CHECK-SANITIZE-NEXT: %[[OR_OV:.+]] = or i1 %[[COMPUTED_OFFSET_OVERFLOWED]], false, !nosanitize @@ -408,17 +408,17 @@ char *allones_one_BAD(void) { // CHECK: define{{.*}} ptr @allones_one_BAD() // CHECK-NEXT: [[ENTRY:.*]]: // CHECK-SANITIZE-NEXT: %[[CMP1:.*]] = icmp ne ptr inttoptr (i64 -1 to ptr), null, !nosanitize - // CHECK-SANITIZE-NEXT: %[[CMP2:.*]] = icmp ne i64 add (i64 sub (i64 ptrtoint (ptr getelementptr inbounds (i8, ptr inttoptr (i64 -1 to ptr), i64 1) to i64), i64 -1), i64 -1), 0, !nosanitize + // CHECK-SANITIZE-NEXT: %[[CMP2:.*]] = icmp ne i64 add (i64 sub (i64 ptrtoint (ptr getelementptr inbounds nuw (i8, ptr inttoptr (i64 -1 to ptr), i64 1) to i64), i64 -1), i64 -1), 0, !nosanitize // CHECK-SANITIZE-C-NEXT: %[[COND:.*]] = and i1 %[[CMP1]], %[[CMP2]], !nosanitize // CHECK-SANITIZE-CPP-NEXT: %[[COND:.*]] = icmp eq i1 %[[CMP1]], %[[CMP2]], !nosanitize // CHECK-SANITIZE-NEXT: br i1 %[[COND]], label %[[CONT:.*]], label %[[HANDLER_POINTER_OVERFLOW:[^,]+]],{{.*}} !nosanitize // CHECK-SANITIZE: [[HANDLER_POINTER_OVERFLOW]]: - // CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_pointer_overflow_abort(ptr @[[LINE_1500]], i64 -1, i64 add (i64 sub (i64 ptrtoint (ptr getelementptr inbounds (i8, ptr inttoptr (i64 -1 to ptr), i64 1) to i64), i64 -1), i64 -1)) - // CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_pointer_overflow(ptr @[[LINE_1500]], i64 -1, i64 add (i64 sub (i64 ptrtoint (ptr getelementptr inbounds (i8, ptr inttoptr (i64 -1 to ptr), i64 1) to i64), i64 -1), i64 -1)) + // CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_pointer_overflow_abort(ptr @[[LINE_1500]], i64 -1, i64 add (i64 sub (i64 ptrtoint (ptr getelementptr inbounds nuw (i8, ptr inttoptr (i64 -1 to ptr), i64 1) to i64), i64 -1), i64 -1)) + // CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_pointer_overflow(ptr @[[LINE_1500]], i64 -1, i64 add (i64 sub (i64 ptrtoint (ptr getelementptr inbounds nuw (i8, ptr inttoptr (i64 -1 to ptr), i64 1) to i64), i64 -1), i64 -1)) // CHECK-SANITIZE-TRAP-NEXT: call void @llvm.ubsantrap(i8 19){{.*}}, !nosanitize // CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize // CHECK-SANITIZE: [[CONT]]: - // CHECK-NEXT: ret ptr getelementptr inbounds (i8, ptr inttoptr (i64 -1 to ptr), i64 1) + // CHECK-NEXT: ret ptr getelementptr inbounds nuw (i8, ptr inttoptr (i64 -1 to ptr), i64 1) static char *const base = (char *)-1; static const unsigned long offset = 1; #line 1500 @@ -429,17 +429,17 @@ char *allones_allones_OK(void) { // CHECK: define{{.*}} ptr @allones_allones_OK() // CHECK-NEXT: [[ENTRY:.*]]: // CHECK-SANITIZE-NEXT: %[[CMP1:.*]] = icmp ne ptr inttoptr (i64 -1 to ptr), null, !nosanitize - // CHECK-SANITIZE-NEXT: %[[CMP2:.*]] = icmp ne i64 add (i64 sub (i64 ptrtoint (ptr getelementptr inbounds (i8, ptr inttoptr (i64 -1 to ptr), i64 -1) to i64), i64 -1), i64 -1), 0, !nosanitize + // CHECK-SANITIZE-NEXT: %[[CMP2:.*]] = icmp ne i64 add (i64 sub (i64 ptrtoint (ptr getelementptr inbounds nuw (i8, ptr inttoptr (i64 -1 to ptr), i64 -1) to i64), i64 -1), i64 -1), 0, !nosanitize // CHECK-SANITIZE-C-NEXT: %[[COND:.*]] = and i1 %[[CMP1]], %[[CMP2]], !nosanitize // CHECK-SANITIZE-CPP-NEXT: %[[COND:.*]] = icmp eq i1 %[[CMP1]], %[[CMP2]], !nosanitize // CHECK-SANITIZE-NEXT: br i1 %[[COND]], label %[[CONT:.*]], label %[[HANDLER_POINTER_OVERFLOW:[^,]+]],{{.*}} !nosanitize // CHECK-SANITIZE: [[HANDLER_POINTER_OVERFLOW]]: - // CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_pointer_overflow_abort(ptr @[[LINE_1600]], i64 -1, i64 add (i64 sub (i64 ptrtoint (ptr getelementptr inbounds (i8, ptr inttoptr (i64 -1 to ptr), i64 -1) to i64), i64 -1), i64 -1)) - // CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_pointer_overflow(ptr @[[LINE_1600]], i64 -1, i64 add (i64 sub (i64 ptrtoint (ptr getelementptr inbounds (i8, ptr inttoptr (i64 -1 to ptr), i64 -1) to i64), i64 -1), i64 -1)) + // CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_pointer_overflow_abort(ptr @[[LINE_1600]], i64 -1, i64 add (i64 sub (i64 ptrtoint (ptr getelementptr inbounds nuw (i8, ptr inttoptr (i64 -1 to ptr), i64 -1) to i64), i64 -1), i64 -1)) + // CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_pointer_overflow(ptr @[[LINE_1600]], i64 -1, i64 add (i64 sub (i64 ptrtoint (ptr getelementptr inbounds nuw (i8, ptr inttoptr (i64 -1 to ptr), i64 -1) to i64), i64 -1), i64 -1)) // CHECK-SANITIZE-TRAP-NEXT: call void @llvm.ubsantrap(i8 19){{.*}}, !nosanitize // CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize // CHECK-SANITIZE: [[CONT]]: - // CHECK-NEXT: ret ptr getelementptr inbounds (i8, ptr inttoptr (i64 -1 to ptr), i64 -1) + // CHECK-NEXT: ret ptr getelementptr inbounds nuw (i8, ptr inttoptr (i64 -1 to ptr), i64 -1) static char *const base = (char *)-1; static const unsigned long offset = -1; #line 1600 @@ -461,7 +461,7 @@ char *void_ptr(void *base, unsigned long offset) { // CHECK-NEXT: store i64 %[[OFFSET]], ptr %[[OFFSET_ADDR]], align 8 // CHECK-NEXT: %[[BASE_RELOADED:.*]] = load ptr, ptr %[[BASE_ADDR]], align 8 // CHECK-NEXT: %[[OFFSET_RELOADED:.*]] = load i64, ptr %[[OFFSET_ADDR]], align 8 - // CHECK-NEXT: %[[ADD_PTR:.*]] = getelementptr inbounds i8, ptr %[[BASE_RELOADED]], i64 %[[OFFSET_RELOADED]] + // CHECK-NEXT: %[[ADD_PTR:.*]] = getelementptr inbounds nuw i8, ptr %[[BASE_RELOADED]], i64 %[[OFFSET_RELOADED]] // CHECK-SANITIZE-NEXT: %[[COMPUTED_OFFSET_AGGREGATE:.*]] = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 1, i64 %[[OFFSET_RELOADED]]), !nosanitize // CHECK-SANITIZE-NEXT: %[[COMPUTED_OFFSET_OVERFLOWED:.*]] = extractvalue { i64, i1 } %[[COMPUTED_OFFSET_AGGREGATE]], 1, !nosanitize // CHECK-SANITIZE-NEXT: %[[OR_OV:.+]] = or i1 %[[COMPUTED_OFFSET_OVERFLOWED]], false, !nosanitize diff --git a/clang/test/CodeGen/catch-pointer-overflow-volatile.c b/clang/test/CodeGen/catch-pointer-overflow-volatile.c index 4b0653a0ae59e..626bbc0db7afb 100644 --- a/clang/test/CodeGen/catch-pointer-overflow-volatile.c +++ b/clang/test/CodeGen/catch-pointer-overflow-volatile.c @@ -23,7 +23,7 @@ char *volatile_ptr(char *volatile base, unsigned long offset) { // CHECK-NEXT: store i64 %[[OFFSET]], ptr %[[OFFSET_ADDR]], align 8 // CHECK-NEXT: %[[BASE_RELOADED:.*]] = load volatile ptr, ptr %[[BASE_ADDR]], align 8 // CHECK-NEXT: %[[OFFSET_RELOADED:.*]] = load i64, ptr %[[OFFSET_ADDR]], align 8 - // CHECK-NEXT: %[[ADD_PTR:.*]] = getelementptr inbounds i8, ptr %[[BASE_RELOADED]], i64 %[[OFFSET_RELOADED]] + // CHECK-NEXT: %[[ADD_PTR:.*]] = getelementptr inbounds nuw i8, ptr %[[BASE_RELOADED]], i64 %[[OFFSET_RELOADED]] // CHECK-SANITIZE-NEXT: %[[COMPUTED_OFFSET_AGGREGATE:.*]] = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 1, i64 %[[OFFSET_RELOADED]]), !nosanitize // CHECK-SANITIZE-NEXT: %[[COMPUTED_OFFSET_OVERFLOWED:.*]] = extractvalue { i64, i1 } %[[COMPUTED_OFFSET_AGGREGATE]], 1, !nosanitize // CHECK-SANITIZE-NEXT: %[[OR_OV:.+]] = or i1 %[[COMPUTED_OFFSET_OVERFLOWED]], false, !nosanitize diff --git a/clang/test/CodeGen/catch-pointer-overflow.c b/clang/test/CodeGen/catch-pointer-overflow.c index 899af73bd81e0..1f7f1729098c7 100644 --- a/clang/test/CodeGen/catch-pointer-overflow.c +++ b/clang/test/CodeGen/catch-pointer-overflow.c @@ -30,7 +30,7 @@ char *add_unsigned(char *base, unsigned long offset) { // CHECK-NEXT: store i64 %[[OFFSET]], ptr %[[OFFSET_ADDR]], align 8 // CHECK-NEXT: %[[BASE_RELOADED:.*]] = load ptr, ptr %[[BASE_ADDR]], align 8 // CHECK-NEXT: %[[OFFSET_RELOADED:.*]] = load i64, ptr %[[OFFSET_ADDR]], align 8 - // CHECK-NEXT: %[[ADD_PTR:.*]] = getelementptr inbounds i8, ptr %[[BASE_RELOADED]], i64 %[[OFFSET_RELOADED]] + // CHECK-NEXT: %[[ADD_PTR:.*]] = getelementptr inbounds nuw i8, ptr %[[BASE_RELOADED]], i64 %[[OFFSET_RELOADED]] // CHECK-SANITIZE-NEXT: %[[COMPUTED_OFFSET_AGGREGATE:.*]] = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 1, i64 %[[OFFSET_RELOADED]]), !nosanitize // CHECK-SANITIZE-NEXT: %[[COMPUTED_OFFSET_OVERFLOWED:.*]] = extractvalue { i64, i1 } %[[COMPUTED_OFFSET_AGGREGATE]], 1, !nosanitize // CHECK-SANITIZE-NEXT: %[[OR_OV:.+]] = or i1 %[[COMPUTED_OFFSET_OVERFLOWED]], false, !nosanitize @@ -179,7 +179,7 @@ char *postinc(char *base) { // CHECK-NEXT: %[[BASE_ADDR:.*]] = alloca ptr, align 8 // CHECK-NEXT: store ptr %[[BASE]], ptr %[[BASE_ADDR]], align 8 // CHECK-NEXT: %[[BASE_RELOADED:.*]] = load ptr, ptr %[[BASE_ADDR]], align 8 - // CHECK-NEXT: %[[ADD_PTR:.*]] = getelementptr inbounds i8, ptr %[[BASE_RELOADED]], i32 1 + // CHECK-NEXT: %[[ADD_PTR:.*]] = getelementptr inbounds nuw i8, ptr %[[BASE_RELOADED]], i32 1 // CHECK-SANITIZE-NEXT: %[[BASE_RELOADED_INT:.*]] = ptrtoint ptr %[[BASE_RELOADED]] to i64, !nosanitize // CHECK-SANITIZE-NEXT: %[[COMPUTED_GEP:.*]] = add i64 %[[BASE_RELOADED_INT]], 1, !nosanitize // CHECK-SANITIZE-NEXT: %[[BASE_IS_NOT_NULLPTR:.*]] = icmp ne ptr %[[BASE_RELOADED]], null, !nosanitize @@ -241,7 +241,7 @@ char *preinc(char *base) { // CHECK-NEXT: %[[BASE_ADDR:.*]] = alloca ptr, align 8 // CHECK-NEXT: store ptr %[[BASE]], ptr %[[BASE_ADDR]], align 8 // CHECK-NEXT: %[[BASE_RELOADED:.*]] = load ptr, ptr %[[BASE_ADDR]], align 8 - // CHECK-NEXT: %[[ADD_PTR:.*]] = getelementptr inbounds i8, ptr %[[BASE_RELOADED]], i32 1 + // CHECK-NEXT: %[[ADD_PTR:.*]] = getelementptr inbounds nuw i8, ptr %[[BASE_RELOADED]], i32 1 // CHECK-SANITIZE-NEXT: %[[BASE_RELOADED_INT:.*]] = ptrtoint ptr %[[BASE_RELOADED]] to i64, !nosanitize // CHECK-SANITIZE-NEXT: %[[COMPUTED_GEP:.*]] = add i64 %[[BASE_RELOADED_INT]], 1, !nosanitize // CHECK-SANITIZE-NEXT: %[[BASE_IS_NOT_NULLPTR:.*]] = icmp ne ptr %[[BASE_RELOADED]], null, !nosanitize diff --git a/clang/test/CodeGen/ext-int.c b/clang/test/CodeGen/ext-int.c index 714b7e122a706..e3d609a4ba4a2 100644 --- a/clang/test/CodeGen/ext-int.c +++ b/clang/test/CodeGen/ext-int.c @@ -154,7 +154,7 @@ _BitInt(129) *f1(_BitInt(129) *p) { } char *f2(char *p) { - // CHECK64: getelementptr inbounds i8, {{.*}} i64 24 + // CHECK64: getelementptr inbounds nuw i8, {{.*}} i64 24 return p + sizeof(_BitInt(129)); } diff --git a/clang/test/CodeGen/hexagon-brev-ld-ptr-incdec.c b/clang/test/CodeGen/hexagon-brev-ld-ptr-incdec.c index 7802168de4d75..d25d1e04f15fa 100644 --- a/clang/test/CodeGen/hexagon-brev-ld-ptr-incdec.c +++ b/clang/test/CodeGen/hexagon-brev-ld-ptr-incdec.c @@ -6,9 +6,9 @@ // the return value will be the value in A[2] // CHECK: @brev_ptr_inc // CHECK-DAG: llvm.hexagon.L2.loadri.pbr -// CHECK-DAG: getelementptr inbounds i8, {{.*}}i32 4 -// CHECK-NOT: getelementptr inbounds i8, {{.*}}i32 8 -// CHECK-NOT: getelementptr inbounds i8, {{.*}}i32 4 +// CHECK-DAG: getelementptr inbounds nuw i8, {{.*}}i32 4 +// CHECK-NOT: getelementptr inbounds nuw i8, {{.*}}i32 8 +// CHECK-NOT: getelementptr inbounds nuw i8, {{.*}}i32 4 int brev_ptr_inc(int A[], int B[]) { int *p0 = &B[0]; int *p1 = &A[0]; diff --git a/clang/test/CodeGen/integer-overflow.c b/clang/test/CodeGen/integer-overflow.c index 461b026d39615..9e8cde8b33b16 100644 --- a/clang/test/CodeGen/integer-overflow.c +++ b/clang/test/CodeGen/integer-overflow.c @@ -60,10 +60,10 @@ void test1(void) { // -fwrapv should turn off inbounds for GEP's, PR9256 extern int* P; ++P; - // DEFAULT: getelementptr inbounds i32, ptr + // DEFAULT: getelementptr inbounds nuw i32, ptr // WRAPV: getelementptr i32, ptr - // TRAPV: getelementptr inbounds i32, ptr - // CATCH_UB_POINTER: getelementptr inbounds i32, ptr + // TRAPV: getelementptr inbounds nuw i32, ptr + // CATCH_UB_POINTER: getelementptr inbounds nuw i32, ptr // NOCATCH_UB_POINTER: getelementptr i32, ptr // PR9350: char pre-increment never overflows. diff --git a/clang/test/CodeGen/ms-intrinsics.c b/clang/test/CodeGen/ms-intrinsics.c index c3d64fda0b901..459a708d9b2e0 100644 --- a/clang/test/CodeGen/ms-intrinsics.c +++ b/clang/test/CodeGen/ms-intrinsics.c @@ -156,7 +156,7 @@ unsigned char test_BitScanForward(unsigned long *Index, unsigned long Mask) { // CHECK: [[RESULT:%[a-z0-9._]+]] = phi i8 [ 0, %[[ISZERO_LABEL:[a-z0-9._]+]] ], [ 1, %[[ISNOTZERO_LABEL]] ] // CHECK: ret i8 [[RESULT]] // CHECK: [[ISNOTZERO_LABEL]]: -// CHECK: [[IDXGEP:%[a-z0-9._]+]] = getelementptr inbounds i8, ptr %Index, {{i64|i32}} 4 +// CHECK: [[IDXGEP:%[a-z0-9._]+]] = getelementptr inbounds nuw i8, ptr %Index, {{i64|i32}} 4 // CHECK: [[INDEX:%[0-9]+]] = tail call range(i32 0, 33) i32 @llvm.cttz.i32(i32 %Mask, i1 true) // CHECK: store i32 [[INDEX]], ptr [[IDXGEP]], align 4 // CHECK: br label %[[END_LABEL]] @@ -171,7 +171,7 @@ unsigned char test_BitScanReverse(unsigned long *Index, unsigned long Mask) { // CHECK: [[RESULT:%[a-z0-9._]+]] = phi i8 [ 0, %[[ISZERO_LABEL:[a-z0-9._]+]] ], [ 1, %[[ISNOTZERO_LABEL]] ] // CHECK: ret i8 [[RESULT]] // CHECK: [[ISNOTZERO_LABEL]]: -// CHECK: [[IDXGEP:%[a-z0-9._]+]] = getelementptr inbounds i8, ptr %Index, {{i64|i32}} 4 +// CHECK: [[IDXGEP:%[a-z0-9._]+]] = getelementptr inbounds nuw i8, ptr %Index, {{i64|i32}} 4 // CHECK: [[REVINDEX:%[0-9]+]] = tail call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 %Mask, i1 true) // CHECK: [[INDEX:%[0-9]+]] = xor i32 [[REVINDEX]], 31 // CHECK: store i32 [[INDEX]], ptr [[IDXGEP]], align 4 @@ -437,10 +437,10 @@ unsigned char test_InterlockedCompareExchange128( ++ExchangeLow, ++ComparandResult); } // CHECK-64: define{{.*}}i8 @test_InterlockedCompareExchange128(ptr{{[a-z_ ]*}}%Destination, i64{{[a-z_ ]*}}%ExchangeHigh, i64{{[a-z_ ]*}}%ExchangeLow, ptr{{[a-z_ ]*}}%ComparandResult){{.*}}{ -// CHECK-64: %incdec.ptr = getelementptr inbounds i8, ptr %Destination, i64 8 +// CHECK-64: %incdec.ptr = getelementptr inbounds nuw i8, ptr %Destination, i64 8 // CHECK-64: %inc = add nsw i64 %ExchangeHigh, 1 // CHECK-64: %inc1 = add nsw i64 %ExchangeLow, 1 -// CHECK-64: %incdec.ptr2 = getelementptr inbounds i8, ptr %ComparandResult, i64 8 +// CHECK-64: %incdec.ptr2 = getelementptr inbounds nuw i8, ptr %ComparandResult, i64 8 // CHECK-64: [[EH:%[0-9]+]] = zext i64 %inc to i128 // CHECK-64: [[EL:%[0-9]+]] = zext i64 %inc1 to i128 // CHECK-64: [[EHS:%[0-9]+]] = shl nuw i128 [[EH]], 64 @@ -486,7 +486,7 @@ short test_InterlockedIncrement16(short volatile *Addend) { return _InterlockedIncrement16(++Addend); } // CHECK: define{{.*}}i16 @test_InterlockedIncrement16(ptr{{[a-z_ ]*}}%Addend){{.*}}{ -// CHECK: %incdec.ptr = getelementptr inbounds i8, ptr %Addend, {{i64|i32}} 2 +// CHECK: %incdec.ptr = getelementptr inbounds nuw i8, ptr %Addend, {{i64|i32}} 2 // CHECK: [[TMP:%[0-9]+]] = atomicrmw add ptr %incdec.ptr, i16 1 seq_cst, align 2 // CHECK: [[RESULT:%[0-9]+]] = add i16 [[TMP]], 1 // CHECK: ret i16 [[RESULT]] @@ -496,7 +496,7 @@ long test_InterlockedIncrement(long volatile *Addend) { return _InterlockedIncrement(++Addend); } // CHECK: define{{.*}}i32 @test_InterlockedIncrement(ptr{{[a-z_ ]*}}%Addend){{.*}}{ -// CHECK: %incdec.ptr = getelementptr inbounds i8, ptr %Addend, {{i64|i32}} 4 +// CHECK: %incdec.ptr = getelementptr inbounds nuw i8, ptr %Addend, {{i64|i32}} 4 // CHECK: [[TMP:%[0-9]+]] = atomicrmw add ptr %incdec.ptr, i32 1 seq_cst, align 4 // CHECK: [[RESULT:%[0-9]+]] = add i32 [[TMP]], 1 // CHECK: ret i32 [[RESULT]] diff --git a/clang/test/CodeGen/ubsan-pointer-overflow.m b/clang/test/CodeGen/ubsan-pointer-overflow.m index 9192598da92fc..4ecdac655669f 100644 --- a/clang/test/CodeGen/ubsan-pointer-overflow.m +++ b/clang/test/CodeGen/ubsan-pointer-overflow.m @@ -5,7 +5,7 @@ void variable_len_array_arith(int n, int k) { int vla[n]; int (*p)[n] = &vla; - // CHECK: getelementptr inbounds i32, ptr {{.*}}, i64 [[INC:%.*]] + // CHECK: getelementptr inbounds nuw i32, ptr {{.*}}, i64 [[INC:%.*]] // CHECK: @llvm.smul.with.overflow.i64(i64 4, i64 [[INC]]), !nosanitize // CHECK-NOT: select // CHECK: call void @__ubsan_handle_pointer_overflow{{.*}} diff --git a/clang/test/CodeGen/vla.c b/clang/test/CodeGen/vla.c index 33621c5dd7a29..a22ba727df2fe 100644 --- a/clang/test/CodeGen/vla.c +++ b/clang/test/CodeGen/vla.c @@ -120,7 +120,7 @@ int test4(unsigned n, char (*p)[n][n+1][6]) { // CHECK-NEXT: [[T2:%.*]] = udiv i32 [[T1]], 2 // CHECK-NEXT: [[T3:%.*]] = mul nuw i32 [[DIM0]], [[DIM1]] // CHECK-NEXT: [[T4:%.*]] = mul nsw i32 [[T2]], [[T3]] - // CHECK-NEXT: [[T5:%.*]] = getelementptr inbounds [6 x i8], ptr [[T0]], i32 [[T4]] + // CHECK-NEXT: [[T5:%.*]] = getelementptr inbounds nuw [6 x i8], ptr [[T0]], i32 [[T4]] // CHECK-NEXT: [[T6:%.*]] = load i32, ptr [[N]], align 4 // CHECK-NEXT: [[T7:%.*]] = udiv i32 [[T6]], 4 // CHECK-NEXT: [[T8:%.*]] = sub i32 0, [[T7]] diff --git a/clang/test/CodeGenCXX/always_destroy.cpp b/clang/test/CodeGenCXX/always_destroy.cpp index e84c4cf02c52f..ca8a8e0cabacb 100644 --- a/clang/test/CodeGenCXX/always_destroy.cpp +++ b/clang/test/CodeGenCXX/always_destroy.cpp @@ -1,4 +1,7 @@ -// RUN: %clang_cc1 %s -fno-c++-static-destructors -emit-llvm -triple x86_64-apple-macosx10.13.0 -o - | FileCheck %s +// RUN: %clang_cc1 %s -fc++-static-destructors=none -emit-llvm -triple x86_64-apple-macosx10.13.0 -o - | \ +// RUN: FileCheck --check-prefixes=CHECK,NO-DTORS %s +// RUN: %clang_cc1 %s -fc++-static-destructors=thread-local -emit-llvm -triple x86_64-apple-macosx10.13.0 -o - | \ +// RUN: FileCheck --check-prefixes=CHECK,THREAD-LOCAL-DTORS %s struct NonTrivial { ~NonTrivial(); @@ -6,7 +9,8 @@ struct NonTrivial { // CHECK-NOT: __cxa_atexit{{.*}}_ZN10NonTrivialD1Ev NonTrivial nt1; -// CHECK-NOT: _tlv_atexit{{.*}}_ZN10NonTrivialD1Ev +// NO-DTORS-NOT: _tlv_atexit{{.*}}_ZN10NonTrivialD1Ev +// THREAD-LOCAL-DTORS: _tlv_atexit{{.*}}_ZN10NonTrivialD1Ev thread_local NonTrivial nt2; struct NonTrivial2 { diff --git a/clang/test/CodeGenCXX/attr-likelihood-iteration-stmt.cpp b/clang/test/CodeGenCXX/attr-likelihood-iteration-stmt.cpp index e842de6335046..fd9786de3a949 100644 --- a/clang/test/CodeGenCXX/attr-likelihood-iteration-stmt.cpp +++ b/clang/test/CodeGenCXX/attr-likelihood-iteration-stmt.cpp @@ -152,16 +152,16 @@ void f_branch_elided() // CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr [[__BEGIN1]]) #[[ATTR3]] // CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__RANGE1]], align 8, !tbaa [[TBAA14]] // CHECK-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [4 x i32], ptr [[TMP1]], i64 0, i64 0 -// CHECK-NEXT: store ptr [[ARRAYDECAY]], ptr [[__BEGIN1]], align 8, !tbaa [[TBAA16:![0-9]+]] +// CHECK-NEXT: store ptr [[ARRAYDECAY]], ptr [[__BEGIN1]], align 8, !tbaa [[TBAA14]] // CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr [[__END1]]) #[[ATTR3]] // CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[__RANGE1]], align 8, !tbaa [[TBAA14]] // CHECK-NEXT: [[ARRAYDECAY1:%.*]] = getelementptr inbounds [4 x i32], ptr [[TMP2]], i64 0, i64 0 // CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i32, ptr [[ARRAYDECAY1]], i64 4 -// CHECK-NEXT: store ptr [[ADD_PTR]], ptr [[__END1]], align 8, !tbaa [[TBAA16]] +// CHECK-NEXT: store ptr [[ADD_PTR]], ptr [[__END1]], align 8, !tbaa [[TBAA14]] // CHECK-NEXT: br label [[FOR_COND:%.*]] // CHECK: for.cond: -// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[__BEGIN1]], align 8, !tbaa [[TBAA16]] -// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[__END1]], align 8, !tbaa [[TBAA16]] +// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[__BEGIN1]], align 8, !tbaa [[TBAA14]] +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[__END1]], align 8, !tbaa [[TBAA14]] // CHECK-NEXT: [[CMP:%.*]] = icmp ne ptr [[TMP3]], [[TMP4]] // CHECK-NEXT: [[CMP_EXPVAL:%.*]] = call i1 @llvm.expect.i1(i1 [[CMP]], i1 true) // CHECK-NEXT: br i1 [[CMP_EXPVAL]], label [[FOR_BODY:%.*]], label [[FOR_COND_CLEANUP:%.*]] @@ -172,16 +172,16 @@ void f_branch_elided() // CHECK-NEXT: br label [[FOR_END:%.*]] // CHECK: for.body: // CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[I]]) #[[ATTR3]] -// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[__BEGIN1]], align 8, !tbaa [[TBAA16]] +// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[__BEGIN1]], align 8, !tbaa [[TBAA14]] // CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4, !tbaa [[TBAA2]] // CHECK-NEXT: store i32 [[TMP6]], ptr [[I]], align 4, !tbaa [[TBAA2]] // CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[I]]) #[[ATTR3]] // CHECK-NEXT: br label [[FOR_INC:%.*]] // CHECK: for.inc: -// CHECK-NEXT: [[TMP7:%.*]] = load ptr, ptr [[__BEGIN1]], align 8, !tbaa [[TBAA16]] -// CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 1 -// CHECK-NEXT: store ptr [[INCDEC_PTR]], ptr [[__BEGIN1]], align 8, !tbaa [[TBAA16]] -// CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK-NEXT: [[TMP7:%.*]] = load ptr, ptr [[__BEGIN1]], align 8, !tbaa [[TBAA14]] +// CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP7]], i32 1 +// CHECK-NEXT: store ptr [[INCDEC_PTR]], ptr [[__BEGIN1]], align 8, !tbaa [[TBAA14]] +// CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK: for.end: // CHECK-NEXT: ret void // @@ -204,16 +204,16 @@ void frl(int (&&e) [4]) // CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr [[__BEGIN1]]) #[[ATTR3]] // CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__RANGE1]], align 8, !tbaa [[TBAA14]] // CHECK-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [4 x i32], ptr [[TMP1]], i64 0, i64 0 -// CHECK-NEXT: store ptr [[ARRAYDECAY]], ptr [[__BEGIN1]], align 8, !tbaa [[TBAA16]] +// CHECK-NEXT: store ptr [[ARRAYDECAY]], ptr [[__BEGIN1]], align 8, !tbaa [[TBAA14]] // CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr [[__END1]]) #[[ATTR3]] // CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[__RANGE1]], align 8, !tbaa [[TBAA14]] // CHECK-NEXT: [[ARRAYDECAY1:%.*]] = getelementptr inbounds [4 x i32], ptr [[TMP2]], i64 0, i64 0 // CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i32, ptr [[ARRAYDECAY1]], i64 4 -// CHECK-NEXT: store ptr [[ADD_PTR]], ptr [[__END1]], align 8, !tbaa [[TBAA16]] +// CHECK-NEXT: store ptr [[ADD_PTR]], ptr [[__END1]], align 8, !tbaa [[TBAA14]] // CHECK-NEXT: br label [[FOR_COND:%.*]] // CHECK: for.cond: -// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[__BEGIN1]], align 8, !tbaa [[TBAA16]] -// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[__END1]], align 8, !tbaa [[TBAA16]] +// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[__BEGIN1]], align 8, !tbaa [[TBAA14]] +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[__END1]], align 8, !tbaa [[TBAA14]] // CHECK-NEXT: [[CMP:%.*]] = icmp ne ptr [[TMP3]], [[TMP4]] // CHECK-NEXT: [[CMP_EXPVAL:%.*]] = call i1 @llvm.expect.i1(i1 [[CMP]], i1 false) // CHECK-NEXT: br i1 [[CMP_EXPVAL]], label [[FOR_BODY:%.*]], label [[FOR_COND_CLEANUP:%.*]] @@ -224,16 +224,16 @@ void frl(int (&&e) [4]) // CHECK-NEXT: br label [[FOR_END:%.*]] // CHECK: for.body: // CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[I]]) #[[ATTR3]] -// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[__BEGIN1]], align 8, !tbaa [[TBAA16]] +// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[__BEGIN1]], align 8, !tbaa [[TBAA14]] // CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4, !tbaa [[TBAA2]] // CHECK-NEXT: store i32 [[TMP6]], ptr [[I]], align 4, !tbaa [[TBAA2]] // CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[I]]) #[[ATTR3]] // CHECK-NEXT: br label [[FOR_INC:%.*]] // CHECK: for.inc: -// CHECK-NEXT: [[TMP7:%.*]] = load ptr, ptr [[__BEGIN1]], align 8, !tbaa [[TBAA16]] -// CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 1 -// CHECK-NEXT: store ptr [[INCDEC_PTR]], ptr [[__BEGIN1]], align 8, !tbaa [[TBAA16]] -// CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK-NEXT: [[TMP7:%.*]] = load ptr, ptr [[__BEGIN1]], align 8, !tbaa [[TBAA14]] +// CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP7]], i32 1 +// CHECK-NEXT: store ptr [[INCDEC_PTR]], ptr [[__BEGIN1]], align 8, !tbaa [[TBAA14]] +// CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK: for.end: // CHECK-NEXT: ret void // diff --git a/clang/test/CodeGenCXX/attr-no-destroy-d54344.cpp b/clang/test/CodeGenCXX/attr-no-destroy-d54344.cpp index b03791e5135df..053043adb61c1 100644 --- a/clang/test/CodeGenCXX/attr-no-destroy-d54344.cpp +++ b/clang/test/CodeGenCXX/attr-no-destroy-d54344.cpp @@ -1,6 +1,7 @@ // RUN: %clang_cc1 -std=c++2a -emit-llvm -O0 -triple x86_64-unknown-linux-gnu -DNOATTR %s -o - | FileCheck %s // RUN: %clang_cc1 -std=c++2a -emit-llvm -O0 -triple x86_64-unknown-linux-gnu %s -o - | FileCheck %s --check-prefix=CHECK-ATTR -// RUN: %clang_cc1 -std=c++2a -emit-llvm -O0 -triple x86_64-unknown-linux-gnu -DNOATTR -fno-c++-static-destructors %s -o - | FileCheck %s --check-prefix=CHECK-FLAG +// RUN: %clang_cc1 -std=c++2a -emit-llvm -O0 -triple x86_64-unknown-linux-gnu -DNOATTR -fc++-static-destructors=none %s -o - | FileCheck %s --check-prefix=CHECK-FLAG +// RUN: %clang_cc1 -std=c++2a -emit-llvm -O0 -triple x86_64-unknown-linux-gnu -DNOATTR -fc++-static-destructors=thread-local %s -o - | FileCheck %s --check-prefix=CHECK-FLAG // Regression test for D54344. Class with no user-defined destructor // that has an inherited member that has a non-trivial destructor diff --git a/clang/test/CodeGenCXX/for-range.cpp b/clang/test/CodeGenCXX/for-range.cpp index 10d27206d12e4..088a34647c374 100644 --- a/clang/test/CodeGenCXX/for-range.cpp +++ b/clang/test/CodeGenCXX/for-range.cpp @@ -33,7 +33,7 @@ B *end(C&); extern B array[5]; -// CHECK-LABEL: define {{[^@]+}}@_Z9for_arrayv( +// CHECK-LABEL: @_Z9for_arrayv( // CHECK-NEXT: entry: // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_A:%.*]], align 1 // CHECK-NEXT: [[__RANGE1:%.*]] = alloca ptr, align 8 @@ -57,7 +57,7 @@ extern B array[5]; // CHECK-NEXT: br label [[FOR_INC:%.*]] // CHECK: for.inc: // CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[__BEGIN1]], align 8 -// CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds [[STRUCT_B]], ptr [[TMP3]], i32 1 +// CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw [[STRUCT_B]], ptr [[TMP3]], i32 1 // CHECK-NEXT: store ptr [[INCDEC_PTR]], ptr [[__BEGIN1]], align 8 // CHECK-NEXT: br label [[FOR_COND]] // CHECK: for.end: @@ -70,7 +70,7 @@ void for_array() { } } -// CHECK-LABEL: define {{[^@]+}}@_Z9for_rangev( +// CHECK-LABEL: @_Z9for_rangev( // CHECK-NEXT: entry: // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_A:%.*]], align 1 // CHECK-NEXT: [[__RANGE1:%.*]] = alloca ptr, align 8 @@ -103,7 +103,7 @@ void for_array() { // CHECK-NEXT: br label [[FOR_INC:%.*]] // CHECK: for.inc: // CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[__BEGIN1]], align 8 -// CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds [[STRUCT_B]], ptr [[TMP5]], i32 1 +// CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw [[STRUCT_B]], ptr [[TMP5]], i32 1 // CHECK-NEXT: store ptr [[INCDEC_PTR]], ptr [[__BEGIN1]], align 8 // CHECK-NEXT: br label [[FOR_COND]] // CHECK: for.end: @@ -116,7 +116,7 @@ void for_range() { } } -// CHECK-LABEL: define {{[^@]+}}@_Z16for_member_rangev( +// CHECK-LABEL: @_Z16for_member_rangev( // CHECK-NEXT: entry: // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_A:%.*]], align 1 // CHECK-NEXT: [[__RANGE1:%.*]] = alloca ptr, align 8 @@ -149,7 +149,7 @@ void for_range() { // CHECK-NEXT: br label [[FOR_INC:%.*]] // CHECK: for.inc: // CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[__BEGIN1]], align 8 -// CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds [[STRUCT_B]], ptr [[TMP5]], i32 1 +// CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw [[STRUCT_B]], ptr [[TMP5]], i32 1 // CHECK-NEXT: store ptr [[INCDEC_PTR]], ptr [[__BEGIN1]], align 8 // CHECK-NEXT: br label [[FOR_COND]] // CHECK: for.end: diff --git a/clang/test/CodeGenCXX/pr45964-decomp-transform.cpp b/clang/test/CodeGenCXX/pr45964-decomp-transform.cpp index f7df110ec0129..bcb2d875dce66 100644 --- a/clang/test/CodeGenCXX/pr45964-decomp-transform.cpp +++ b/clang/test/CodeGenCXX/pr45964-decomp-transform.cpp @@ -16,7 +16,7 @@ void (*d)(){test_transform<0>}; // CHECK-NEXT: [[BODY]]: // CHECK-NEXT: [[CUR:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[NEXT:%.*]], %[[BODY]] ] // CHECK-NEXT: [[DEST:%.*]] = getelementptr inbounds i32, ptr [[BEGIN]], i64 [[CUR]] -// CHECK-NEXT: [[SRC:%.*]] = getelementptr inbounds [1 x i32], ptr @a, i64 0, i64 [[CUR]] +// CHECK-NEXT: [[SRC:%.*]] = getelementptr inbounds nuw [1 x i32], ptr @a, i64 0, i64 [[CUR]] // CHECK-NEXT: [[X:%.*]] = load i32, ptr [[SRC]] // CHECK-NEXT: store i32 [[X]], ptr [[DEST]] // CHECK-NEXT: [[NEXT]] = add nuw i64 [[CUR]], 1 diff --git a/clang/test/CodeGenCXX/vla.cpp b/clang/test/CodeGenCXX/vla.cpp index 4cf2b3b445b40..aadf51fce3a44 100644 --- a/clang/test/CodeGenCXX/vla.cpp +++ b/clang/test/CodeGenCXX/vla.cpp @@ -83,7 +83,7 @@ void test2(int b) { //CHECK: [[VLA_SIZEOF:%.*]] = mul nuw i64 4, [[VLA_NUM_ELEMENTS_PRE]] //CHECK-NEXT: [[VLA_NUM_ELEMENTS_POST:%.*]] = udiv i64 [[VLA_SIZEOF]], 4 - //CHECK-NEXT: [[VLA_END_PTR:%.*]] = getelementptr inbounds i32, ptr {{%.*}}, i64 [[VLA_NUM_ELEMENTS_POST]] + //CHECK-NEXT: [[VLA_END_PTR:%.*]] = getelementptr inbounds nuw i32, ptr {{%.*}}, i64 [[VLA_NUM_ELEMENTS_POST]] //X64-NEXT: store ptr [[VLA_END_PTR]], ptr %__end1 //AMDGCN-NEXT: store ptr [[VLA_END_PTR]], ptr [[END]] for (int d : varr) 0; @@ -116,7 +116,7 @@ void test3(int b, int c) { //CHECK-NEXT: [[VLA_SIZEOF_DIM2:%.*]] = mul nuw i64 4, [[VLA_DIM2_PRE]] //CHECK-NEXT: [[VLA_NUM_ELEMENTS:%.*]] = udiv i64 [[VLA_SIZEOF]], [[VLA_SIZEOF_DIM2]] //CHECK-NEXT: [[VLA_END_INDEX:%.*]] = mul nsw i64 [[VLA_NUM_ELEMENTS]], [[VLA_DIM2_PRE]] - //CHECK-NEXT: [[VLA_END_PTR:%.*]] = getelementptr inbounds i32, ptr {{%.*}}, i64 [[VLA_END_INDEX]] + //CHECK-NEXT: [[VLA_END_PTR:%.*]] = getelementptr inbounds nuw i32, ptr {{%.*}}, i64 [[VLA_END_INDEX]] //X64-NEXT: store ptr [[VLA_END_PTR]], ptr %__end //AMDGCN-NEXT: store ptr [[VLA_END_PTR]], ptr [[END]] diff --git a/clang/test/CodeGenHLSL/buffer-array-operator.hlsl b/clang/test/CodeGenHLSL/buffer-array-operator.hlsl index f5556df30871c..02e570ebdcb4f 100644 --- a/clang/test/CodeGenHLSL/buffer-array-operator.hlsl +++ b/clang/test/CodeGenHLSL/buffer-array-operator.hlsl @@ -17,7 +17,7 @@ void fn(int Idx) { // CHECK-NEXT: %h = getelementptr inbounds nuw %"class.hlsl::RWBuffer", ptr %this1, i32 0, i32 0 // CHECK-NEXT: %0 = load ptr, ptr %h, align 4 // CHECK-NEXT: %1 = load i32, ptr %Idx.addr, align 4 -// CHECK-NEXT: %arrayidx = getelementptr inbounds float, ptr %0, i32 %1 +// CHECK-NEXT: %arrayidx = getelementptr inbounds nuw float, ptr %0, i32 %1 // CHECK-NEXT: ret ptr %arrayidx // Const comes next, and returns the pointer instead of the value. @@ -26,5 +26,5 @@ void fn(int Idx) { // CHECK-NEXT: %h = getelementptr inbounds nuw %"class.hlsl::RWBuffer", ptr %this1, i32 0, i32 0 // CHECK-NEXT: %0 = load ptr, ptr %h, align 4 // CHECK-NEXT: %1 = load i32, ptr %Idx.addr, align 4 -// CHECK-NEXT: %arrayidx = getelementptr inbounds float, ptr %0, i32 %1 +// CHECK-NEXT: %arrayidx = getelementptr inbounds nuw float, ptr %0, i32 %1 // CHECK-NEXT: ret ptr %arrayidx diff --git a/clang/test/CodeGenSYCL/address-space-deduction.cpp b/clang/test/CodeGenSYCL/address-space-deduction.cpp index 96075a47343fe..5910ec3bfc305 100644 --- a/clang/test/CodeGenSYCL/address-space-deduction.cpp +++ b/clang/test/CodeGenSYCL/address-space-deduction.cpp @@ -33,55 +33,55 @@ // CHECK-NEXT: store ptr addrspace(4) [[I_ASCAST]], ptr addrspace(4) [[PPTR_ASCAST]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PPTR_ASCAST]], align 8 // CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr addrspace(4) [[TMP0]], [[I_ASCAST]] -// CHECK-NEXT: [[FROMBOOL:%.*]] = zext i1 [[CMP]] to i8 -// CHECK-NEXT: store i8 [[FROMBOOL]], ptr addrspace(4) [[IS_I_PTR_ASCAST]], align 1 +// CHECK-NEXT: [[STOREDV:%.*]] = zext i1 [[CMP]] to i8 +// CHECK-NEXT: store i8 [[STOREDV]], ptr addrspace(4) [[IS_I_PTR_ASCAST]], align 1 // CHECK-NEXT: [[TMP1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PPTR_ASCAST]], align 8 // CHECK-NEXT: store i32 66, ptr addrspace(4) [[TMP1]], align 4 // CHECK-NEXT: store i32 23, ptr addrspace(4) [[VAR23_ASCAST]], align 4 // CHECK-NEXT: store ptr addrspace(4) [[VAR23_ASCAST]], ptr addrspace(4) [[CP_ASCAST]], align 8 -// CHECK-NEXT: [[TMP3:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[CP_ASCAST]], align 8 -// CHECK-NEXT: store i8 41, ptr addrspace(4) [[TMP3]], align 1 +// CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[CP_ASCAST]], align 8 +// CHECK-NEXT: store i8 41, ptr addrspace(4) [[TMP2]], align 1 // CHECK-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [42 x i32], ptr addrspace(4) [[ARR_ASCAST]], i64 0, i64 0 // CHECK-NEXT: store ptr addrspace(4) [[ARRAYDECAY]], ptr addrspace(4) [[CPP_ASCAST]], align 8 -// CHECK-NEXT: [[TMP5:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[CPP_ASCAST]], align 8 -// CHECK-NEXT: store i8 43, ptr addrspace(4) [[TMP5]], align 1 +// CHECK-NEXT: [[TMP3:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[CPP_ASCAST]], align 8 +// CHECK-NEXT: store i8 43, ptr addrspace(4) [[TMP3]], align 1 // CHECK-NEXT: [[ARRAYDECAY1:%.*]] = getelementptr inbounds [42 x i32], ptr addrspace(4) [[ARR_ASCAST]], i64 0, i64 0 // CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[ARRAYDECAY1]], i64 10 // CHECK-NEXT: store ptr addrspace(4) [[ADD_PTR]], ptr addrspace(4) [[APTR_ASCAST]], align 8 -// CHECK-NEXT: [[TMP6:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[APTR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[APTR_ASCAST]], align 8 // CHECK-NEXT: [[ARRAYDECAY2:%.*]] = getelementptr inbounds [42 x i32], ptr addrspace(4) [[ARR_ASCAST]], i64 0, i64 0 -// CHECK-NEXT: [[ADD_PTR3:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[ARRAYDECAY2]], i64 168 -// CHECK-NEXT: [[CMP4:%.*]] = icmp ult ptr addrspace(4) [[TMP6]], [[ADD_PTR3]] +// CHECK-NEXT: [[ADD_PTR3:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[ARRAYDECAY2]], i64 168 +// CHECK-NEXT: [[CMP4:%.*]] = icmp ult ptr addrspace(4) [[TMP4]], [[ADD_PTR3]] // CHECK-NEXT: br i1 [[CMP4]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] // CHECK: if.then: -// CHECK-NEXT: [[TMP7:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[APTR_ASCAST]], align 8 -// CHECK-NEXT: store i32 44, ptr addrspace(4) [[TMP7]], align 4 +// CHECK-NEXT: [[TMP5:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[APTR_ASCAST]], align 8 +// CHECK-NEXT: store i32 44, ptr addrspace(4) [[TMP5]], align 4 // CHECK-NEXT: br label [[IF_END]] // CHECK: if.end: // CHECK-NEXT: store ptr addrspace(4) addrspacecast (ptr addrspace(1) @.str to ptr addrspace(4)), ptr addrspace(4) [[STR_ASCAST]], align 8 -// CHECK-NEXT: [[TMP8:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[STR_ASCAST]], align 8 -// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[TMP8]], i64 0 -// CHECK-NEXT: [[TMP9:%.*]] = load i8, ptr addrspace(4) [[ARRAYIDX]], align 1 -// CHECK-NEXT: [[CONV:%.*]] = sext i8 [[TMP9]] to i32 +// CHECK-NEXT: [[TMP6:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[STR_ASCAST]], align 8 +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[TMP6]], i64 0 +// CHECK-NEXT: [[TMP7:%.*]] = load i8, ptr addrspace(4) [[ARRAYIDX]], align 1 +// CHECK-NEXT: [[CONV:%.*]] = sext i8 [[TMP7]] to i32 // CHECK-NEXT: store i32 [[CONV]], ptr addrspace(4) [[I_ASCAST]], align 4 -// CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(4) [[I_ASCAST]], align 4 -// CHECK-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], 2 +// CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(4) [[I_ASCAST]], align 4 +// CHECK-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP8]], 2 // CHECK-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK: cond.true: -// CHECK-NEXT: [[TMP11:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[STR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP9:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[STR_ASCAST]], align 8 // CHECK-NEXT: br label [[COND_END:%.*]] // CHECK: cond.false: // CHECK-NEXT: br label [[COND_END]] // CHECK: cond.end: -// CHECK-NEXT: [[COND:%.*]] = phi ptr addrspace(4) [ [[TMP11]], [[COND_TRUE]] ], [ addrspacecast (ptr addrspace(1) @.str.1 to ptr addrspace(4)), [[COND_FALSE]] ] +// CHECK-NEXT: [[COND:%.*]] = phi ptr addrspace(4) [ [[TMP9]], [[COND_TRUE]] ], [ addrspacecast (ptr addrspace(1) @.str.1 to ptr addrspace(4)), [[COND_FALSE]] ] // CHECK-NEXT: store ptr addrspace(4) [[COND]], ptr addrspace(4) [[PHI_STR_ASCAST]], align 8 -// CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(4) [[I_ASCAST]], align 4 -// CHECK-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP12]], 2 -// CHECK-NEXT: [[TMP13:%.*]] = zext i1 [[CMP6]] to i64 +// CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(4) [[I_ASCAST]], align 4 +// CHECK-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP10]], 2 +// CHECK-NEXT: [[TMP11:%.*]] = zext i1 [[CMP6]] to i64 // CHECK-NEXT: [[COND7:%.*]] = select i1 [[CMP6]], ptr addrspace(4) addrspacecast (ptr addrspace(1) @.str.2 to ptr addrspace(4)), ptr addrspace(4) null // CHECK-NEXT: store ptr addrspace(4) [[COND7]], ptr addrspace(4) [[SELECT_NULL_ASCAST]], align 8 -// CHECK-NEXT: [[TMP14:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[STR_ASCAST]], align 8 -// CHECK-NEXT: store ptr addrspace(4) [[TMP14]], ptr addrspace(4) [[SELECT_STR_TRIVIAL1_ASCAST]], align 8 +// CHECK-NEXT: [[TMP12:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[STR_ASCAST]], align 8 +// CHECK-NEXT: store ptr addrspace(4) [[TMP12]], ptr addrspace(4) [[SELECT_STR_TRIVIAL1_ASCAST]], align 8 // CHECK-NEXT: store ptr addrspace(4) addrspacecast (ptr addrspace(1) @.str.1 to ptr addrspace(4)), ptr addrspace(4) [[SELECT_STR_TRIVIAL2_ASCAST]], align 8 // CHECK-NEXT: ret void // diff --git a/clang/test/Driver/cxx-static-destructors.cpp b/clang/test/Driver/cxx-static-destructors.cpp new file mode 100644 index 0000000000000..a12a292a387bd --- /dev/null +++ b/clang/test/Driver/cxx-static-destructors.cpp @@ -0,0 +1,11 @@ +// RUN: %clang -### -c -fc++-static-destructors=all %s 2>&1 | FileCheck --check-prefix ALL %s +// RUN: %clang -### -c -fc++-static-destructors %s 2>&1 | FileCheck --check-prefix ALL %s +// RUN: %clang -### -c -fno-c++-static-destructors -fc++-static-destructors %s 2>&1 | FileCheck --check-prefix ALL %s +// RUN: %clang -### -c -fc++-static-destructors=none %s 2>&1 | FileCheck --check-prefix NONE %s +// RUN: %clang -### -c -fno-c++-static-destructors %s 2>&1 | FileCheck --check-prefix NONE %s +// RUN: %clang -### -c -fc++-static-destructors -fno-c++-static-destructors %s 2>&1 | FileCheck --check-prefix NONE %s +// RUN: %clang -### -c -fc++-static-destructors=thread-local %s 2>&1 | FileCheck --check-prefix THREAD-LOCAL %s + +// ALL: -fc++-static-destructors=all +// NONE: -fc++-static-destructors=none +// THREAD-LOCAL: -fc++-static-destructors=thread-local diff --git a/clang/test/Driver/offload-packager.c b/clang/test/Driver/offload-packager.c index 9adc202322521..fb5f1006cf9e2 100644 --- a/clang/test/Driver/offload-packager.c +++ b/clang/test/Driver/offload-packager.c @@ -3,63 +3,64 @@ // REQUIRES: amdgpu-registered-target // UNSUPPORTED: system-windows -// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.elf.o +// RUN: rm -rf %t && mkdir -p %t +// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t/elf.o // Check that we can extract files from the packaged binary. -// RUN: clang-offload-packager -o %t.out \ -// RUN: --image=file=%t.elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 \ -// RUN: --image=file=%t.elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_80 \ -// RUN: --image=file=%t.elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx908 \ -// RUN: --image=file=%t.elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx90a \ -// RUN: --image=file=%t.elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx90c -// RUN: clang-offload-packager %t.out \ -// RUN: --image=file=%t-sm_70.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 \ -// RUN: --image=file=%t-gfx908.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx908 -// RUN: diff %t-sm_70.o %t.elf.o -// RUN: diff %t-gfx908.o %t.elf.o +// RUN: clang-offload-packager -o %t/package.out \ +// RUN: --image=file=%t/elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 \ +// RUN: --image=file=%t/elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_80 \ +// RUN: --image=file=%t/elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx908 \ +// RUN: --image=file=%t/elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx90a \ +// RUN: --image=file=%t/elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx90c +// RUN: clang-offload-packager %t/package.out \ +// RUN: --image=file=%t/sm_70.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 \ +// RUN: --image=file=%t/gfx908.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx908 +// RUN: diff %t/sm_70.o %t/elf.o +// RUN: diff %t/gfx908.o %t/elf.o // Check that we generate a new name if one is not given -// RUN: clang-offload-packager -o %t \ -// RUN: --image=file=%t.elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 \ -// RUN: --image=file=%t.elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_80 \ -// RUN: --image=file=%t.elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx908 \ -// RUN: --image=file=%t.elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx90a \ -// RUN: --image=file=%t.elf.o,kind=hip,triple=amdgcn-amd-amdhsa,arch=gfx90c -// RUN: cd $(dirname "%t") && clang-offload-packager %t --image=kind=openmp -// RUN: diff *-nvptx64-nvidia-cuda-sm_70.0.o %t.elf.o; rm *-nvptx64-nvidia-cuda-sm_70.0.o -// RUN: diff *-nvptx64-nvidia-cuda-sm_80.1.o %t.elf.o; rm *-nvptx64-nvidia-cuda-sm_80.1.o -// RUN: diff *-amdgcn-amd-amdhsa-gfx908.2.o %t.elf.o; rm *-amdgcn-amd-amdhsa-gfx908.2.o -// RUN: diff *-amdgcn-amd-amdhsa-gfx90a.3.o %t.elf.o; rm *-amdgcn-amd-amdhsa-gfx90a.3.o -// RUN: not diff *-amdgcn-amd-amdhsa-gfx90c.4.o %t.elf.o +// RUN: clang-offload-packager -o %t/package \ +// RUN: --image=file=%t/elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 \ +// RUN: --image=file=%t/elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_80 \ +// RUN: --image=file=%t/elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx908 \ +// RUN: --image=file=%t/elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx90a \ +// RUN: --image=file=%t/elf.o,kind=hip,triple=amdgcn-amd-amdhsa,arch=gfx90c +// RUN: clang-offload-packager %t/package --image=kind=openmp +// RUN: diff *-nvptx64-nvidia-cuda-sm_70.0.o %t/elf.o; rm *-nvptx64-nvidia-cuda-sm_70.0.o +// RUN: diff *-nvptx64-nvidia-cuda-sm_80.1.o %t/elf.o; rm *-nvptx64-nvidia-cuda-sm_80.1.o +// RUN: diff *-amdgcn-amd-amdhsa-gfx908.2.o %t/elf.o; rm *-amdgcn-amd-amdhsa-gfx908.2.o +// RUN: diff *-amdgcn-amd-amdhsa-gfx90a.3.o %t/elf.o; rm *-amdgcn-amd-amdhsa-gfx90a.3.o +// RUN: not diff *-amdgcn-amd-amdhsa-gfx90c.4.o %t/elf.o // Check that we can extract from an ELF object file -// RUN: clang-offload-packager -o %t.out \ -// RUN: --image=file=%t.elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx908 \ -// RUN: --image=file=%t.elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 -// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t.out -// RUN: clang-offload-packager %t.out \ -// RUN: --image=file=%t-sm_70.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 \ -// RUN: --image=file=%t-gfx908.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx908 -// RUN: diff %t-sm_70.o %t.elf.o -// RUN: diff %t-gfx908.o %t.elf.o +// RUN: clang-offload-packager -o %t/package.out \ +// RUN: --image=file=%t/elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx908 \ +// RUN: --image=file=%t/elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 +// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t/package.o -fembed-offload-object=%t/package.out +// RUN: clang-offload-packager %t/package.out \ +// RUN: --image=file=%t/sm_70.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 \ +// RUN: --image=file=%t/gfx908.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx908 +// RUN: diff %t/sm_70.o %t/elf.o +// RUN: diff %t/gfx908.o %t/elf.o // Check that we can extract from a bitcode file -// RUN: clang-offload-packager -o %t.out \ -// RUN: --image=file=%t.elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx908 \ -// RUN: --image=file=%t.elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 -// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-llvm -o %t.bc -fembed-offload-object=%t.out -// RUN: clang-offload-packager %t.out \ -// RUN: --image=file=%t-sm_70.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 \ -// RUN: --image=file=%t-gfx908.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx908 -// RUN: diff %t-sm_70.o %t.elf.o -// RUN: diff %t-gfx908.o %t.elf.o +// RUN: clang-offload-packager -o %t/package.out \ +// RUN: --image=file=%t/elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx908 \ +// RUN: --image=file=%t/elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 +// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-llvm -o %t/package.bc -fembed-offload-object=%t/package.out +// RUN: clang-offload-packager %t/package.out \ +// RUN: --image=file=%t/sm_70.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 \ +// RUN: --image=file=%t/gfx908.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx908 +// RUN: diff %t/sm_70.o %t/elf.o +// RUN: diff %t/gfx908.o %t/elf.o // Check that we can extract from an archive file to an archive file. -// RUN: clang-offload-packager -o %t.out \ -// RUN: --image=file=%t.elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx908 \ -// RUN: --image=file=%t.elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 -// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t.out -// RUN: llvm-ar rcs %t.a %t.o -// RUN: clang-offload-packager %t.a --archive --image=file=%t-gfx908.a,arch=gfx908 -// RUN: llvm-ar t %t-gfx908.a 2>&1 | FileCheck %s +// RUN: clang-offload-packager -o %t/package.out \ +// RUN: --image=file=%t/elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx908 \ +// RUN: --image=file=%t/elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 +// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t/package.o -fembed-offload-object=%t/package.out +// RUN: llvm-ar rcs %t/package.a %t/package.o +// RUN: clang-offload-packager %t/package.a --archive --image=file=%t/gfx908.a,arch=gfx908 +// RUN: llvm-ar t %t/gfx908.a 2>&1 | FileCheck %s // CHECK: {{.*}}.o diff --git a/clang/test/ExtractAPI/bool.c b/clang/test/ExtractAPI/bool.c index efab6dfeef03b..7a4d34acb0d26 100644 --- a/clang/test/ExtractAPI/bool.c +++ b/clang/test/ExtractAPI/bool.c @@ -2,8 +2,8 @@ // RUN: split-file %s %t // RUN: sed -e "s@INPUT_DIR@%{/t:regex_replacement}@g" \ // RUN: %t/reference.output.json.in >> %t/reference.output.json -// RUN: %clang -extract-api --pretty-sgf -target arm64-apple-macosx \ -// RUN: %t/input.h -o %t/output.json +// RUN: %clang_cc1 -extract-api --product-name=BoolTest --pretty-sgf -triple arm64-apple-macosx \ +// RUN: %t/input.h -o %t/output.json // Generator version is not consistent across test runs, normalize it. // RUN: sed -e "s@\"generator\": \".*\"@\"generator\": \"?\"@g" \ @@ -15,7 +15,7 @@ bool Foo; bool IsFoo(bool Bar); -/// expected-no-diagnostics +// expected-no-diagnostics //--- reference.output.json.in { @@ -28,7 +28,7 @@ bool IsFoo(bool Bar); "generator": "?" }, "module": { - "name": "", + "name": "BoolTest", "platform": { "architecture": "arm64", "operatingSystem": { diff --git a/clang/test/ExtractAPI/emit-symbol-graph/multi_file.c b/clang/test/ExtractAPI/emit-symbol-graph/multi_file.c index e668f69bc7e05..651e0df2cd93a 100644 --- a/clang/test/ExtractAPI/emit-symbol-graph/multi_file.c +++ b/clang/test/ExtractAPI/emit-symbol-graph/multi_file.c @@ -27,9 +27,6 @@ #ifndef TEST_H #define TEST_H -#define testmarcro1 32 -#define testmacro2 42 - int testfunc (int param1, int param2); void testfunc2 (); #endif /* TEST_H */ @@ -185,7 +182,7 @@ int main () "location": { "position": { "character": 4, - "line": 6 + "line": 3 }, "uri": "file://INPUT_DIR/test.h" }, @@ -249,7 +246,7 @@ int main () "location": { "position": { "character": 5, - "line": 7 + "line": 4 }, "uri": "file://INPUT_DIR/test.h" }, @@ -335,106 +332,6 @@ int main () "pathComponents": [ "main" ] - }, - { - "accessLevel": "public", - "declarationFragments": [ - { - "kind": "keyword", - "spelling": "#define" - }, - { - "kind": "text", - "spelling": " " - }, - { - "kind": "identifier", - "spelling": "testmarcro1" - } - ], - "identifier": { - "interfaceLanguage": "c", - "precise": "c:test.h@39@macro@testmarcro1" - }, - "kind": { - "displayName": "Macro", - "identifier": "c.macro" - }, - "location": { - "position": { - "character": 8, - "line": 3 - }, - "uri": "file://INPUT_DIR/test.h" - }, - "names": { - "navigator": [ - { - "kind": "identifier", - "spelling": "testmarcro1" - } - ], - "subHeading": [ - { - "kind": "identifier", - "spelling": "testmarcro1" - } - ], - "title": "testmarcro1" - }, - "pathComponents": [ - "testmarcro1" - ] - }, - { - "accessLevel": "public", - "declarationFragments": [ - { - "kind": "keyword", - "spelling": "#define" - }, - { - "kind": "text", - "spelling": " " - }, - { - "kind": "identifier", - "spelling": "testmacro2" - } - ], - "identifier": { - "interfaceLanguage": "c", - "precise": "c:test.h@62@macro@testmacro2" - }, - "kind": { - "displayName": "Macro", - "identifier": "c.macro" - }, - "location": { - "position": { - "character": 8, - "line": 4 - }, - "uri": "file://INPUT_DIR/test.h" - }, - "names": { - "navigator": [ - { - "kind": "identifier", - "spelling": "testmacro2" - } - ], - "subHeading": [ - { - "kind": "identifier", - "spelling": "testmacro2" - } - ], - "title": "testmacro2" - }, - "pathComponents": [ - "testmacro2" - ] } ] } @@ -573,7 +470,7 @@ int main () "location": { "position": { "character": 4, - "line": 6 + "line": 3 }, "uri": "file://INPUT_DIR/test.h" }, @@ -637,7 +534,7 @@ int main () "location": { "position": { "character": 5, - "line": 7 + "line": 4 }, "uri": "file://INPUT_DIR/test.h" }, @@ -659,106 +556,6 @@ int main () "pathComponents": [ "testfunc2" ] - }, - { - "accessLevel": "public", - "declarationFragments": [ - { - "kind": "keyword", - "spelling": "#define" - }, - { - "kind": "text", - "spelling": " " - }, - { - "kind": "identifier", - "spelling": "testmarcro1" - } - ], - "identifier": { - "interfaceLanguage": "c", - "precise": "c:test.h@39@macro@testmarcro1" - }, - "kind": { - "displayName": "Macro", - "identifier": "c.macro" - }, - "location": { - "position": { - "character": 8, - "line": 3 - }, - "uri": "file://INPUT_DIR/test.h" - }, - "names": { - "navigator": [ - { - "kind": "identifier", - "spelling": "testmarcro1" - } - ], - "subHeading": [ - { - "kind": "identifier", - "spelling": "testmarcro1" - } - ], - "title": "testmarcro1" - }, - "pathComponents": [ - "testmarcro1" - ] - }, - { - "accessLevel": "public", - "declarationFragments": [ - { - "kind": "keyword", - "spelling": "#define" - }, - { - "kind": "text", - "spelling": " " - }, - { - "kind": "identifier", - "spelling": "testmacro2" - } - ], - "identifier": { - "interfaceLanguage": "c", - "precise": "c:test.h@62@macro@testmacro2" - }, - "kind": { - "displayName": "Macro", - "identifier": "c.macro" - }, - "location": { - "position": { - "character": 8, - "line": 4 - }, - "uri": "file://INPUT_DIR/test.h" - }, - "names": { - "navigator": [ - { - "kind": "identifier", - "spelling": "testmacro2" - } - ], - "subHeading": [ - { - "kind": "identifier", - "spelling": "testmacro2" - } - ], - "title": "testmacro2" - }, - "pathComponents": [ - "testmacro2" - ] } ] } diff --git a/clang/test/ExtractAPI/emit-symbol-graph/single_file.c b/clang/test/ExtractAPI/emit-symbol-graph/single_file.c index b00b5f5237c9a..feb759f5947bc 100644 --- a/clang/test/ExtractAPI/emit-symbol-graph/single_file.c +++ b/clang/test/ExtractAPI/emit-symbol-graph/single_file.c @@ -15,9 +15,6 @@ // CHECK-NOT: warning: //--- main.c -#define TESTMACRO1 2 -#define TESTMARCRO2 5 - int main () { return 0; @@ -87,7 +84,7 @@ int main () "location": { "position": { "character": 4, - "line": 3 + "line": 0 }, "uri": "file://INPUT_DIR/main.c" }, @@ -109,106 +106,6 @@ int main () "pathComponents": [ "main" ] - }, - { - "accessLevel": "public", - "declarationFragments": [ - { - "kind": "keyword", - "spelling": "#define" - }, - { - "kind": "text", - "spelling": " " - }, - { - "kind": "identifier", - "spelling": "TESTMACRO1" - } - ], - "identifier": { - "interfaceLanguage": "c", - "precise": "c:main.c@8@macro@TESTMACRO1" - }, - "kind": { - "displayName": "Macro", - "identifier": "c.macro" - }, - "location": { - "position": { - "character": 8, - "line": 0 - }, - "uri": "file://INPUT_DIR/main.c" - }, - "names": { - "navigator": [ - { - "kind": "identifier", - "spelling": "TESTMACRO1" - } - ], - "subHeading": [ - { - "kind": "identifier", - "spelling": "TESTMACRO1" - } - ], - "title": "TESTMACRO1" - }, - "pathComponents": [ - "TESTMACRO1" - ] - }, - { - "accessLevel": "public", - "declarationFragments": [ - { - "kind": "keyword", - "spelling": "#define" - }, - { - "kind": "text", - "spelling": " " - }, - { - "kind": "identifier", - "spelling": "TESTMARCRO2" - } - ], - "identifier": { - "interfaceLanguage": "c", - "precise": "c:main.c@29@macro@TESTMARCRO2" - }, - "kind": { - "displayName": "Macro", - "identifier": "c.macro" - }, - "location": { - "position": { - "character": 8, - "line": 1 - }, - "uri": "file://INPUT_DIR/main.c" - }, - "names": { - "navigator": [ - { - "kind": "identifier", - "spelling": "TESTMARCRO2" - } - ], - "subHeading": [ - { - "kind": "identifier", - "spelling": "TESTMARCRO2" - } - ], - "title": "TESTMARCRO2" - }, - "pathComponents": [ - "TESTMARCRO2" - ] } ] } diff --git a/clang/test/ExtractAPI/macros.c b/clang/test/ExtractAPI/macros.c index 10003fe6f6e40..15eb5f6a7f66f 100644 --- a/clang/test/ExtractAPI/macros.c +++ b/clang/test/ExtractAPI/macros.c @@ -1,416 +1,358 @@ // RUN: rm -rf %t -// RUN: split-file %s %t -// RUN: sed -e "s@INPUT_DIR@%{/t:regex_replacement}@g" \ -// RUN: %t/reference.output.json.in >> %t/reference.output.json -// RUN: %clang -extract-api --pretty-sgf --product-name=Macros -target arm64-apple-macosx \ -// RUN: -x objective-c-header %t/input.h -o %t/output.json | FileCheck -allow-empty %s +// RUN: %clang_cc1 -extract-api --pretty-sgf --emit-sgf-symbol-labels-for-testing --product-name=Macros -triple arm64-apple-macosx \ +// RUN: -isystem %S -x objective-c-header %s -o %t/output.symbols.json -// Generator version is not consistent across test runs, normalize it. -// RUN: sed -e "s@\"generator\": \".*\"@\"generator\": \"?\"@g" \ -// RUN: %t/output.json >> %t/output-normalized.json -// RUN: diff %t/reference.output.json %t/output-normalized.json - -// CHECK-NOT: error: -// CHECK-NOT: warning: - -//--- input.h +// RUN: FileCheck %s --input-file %t/output.symbols.json --check-prefix HELLO #define HELLO 1 +// HELLO-LABEL: "!testLabel": "c:@macro@HELLO" +// HELLO: "accessLevel": "public", +// HELLO-NEXT: "declarationFragments": [ +// HELLO-NEXT: { +// HELLO-NEXT: "kind": "keyword", +// HELLO-NEXT: "spelling": "#define" +// HELLO-NEXT: }, +// HELLO-NEXT: { +// HELLO-NEXT: "kind": "text", +// HELLO-NEXT: "spelling": " " +// HELLO-NEXT: }, +// HELLO-NEXT: { +// HELLO-NEXT: "kind": "identifier", +// HELLO-NEXT: "spelling": "HELLO" +// HELLO-NEXT: } +// HELLO-NEXT: ], +// HELLO: "kind": { +// HELLO-NEXT: "displayName": "Macro", +// HELLO-NEXT: "identifier": "objective-c.macro" +// HELLO-NEXT: }, +// HELLO-NEXT: "location": { +// HELLO-NEXT: "position": { +// HELLO-NEXT: "character": 8, +// HELLO-NEXT: "line": [[# @LINE - 25]] +// HELLO-NEXT: }, +// HELLO-NEXT: "uri": "file://{{.*}}/macros.c" +// HELLO-NEXT: }, +// HELLO-NEXT: "names": { +// HELLO-NEXT: "navigator": [ +// HELLO-NEXT: { +// HELLO-NEXT: "kind": "identifier", +// HELLO-NEXT: "spelling": "HELLO" +// HELLO-NEXT: } +// HELLO-NEXT: ], +// HELLO-NEXT: "subHeading": [ +// HELLO-NEXT: { +// HELLO-NEXT: "kind": "identifier", +// HELLO-NEXT: "spelling": "HELLO" +// HELLO-NEXT: } +// HELLO-NEXT: ], +// HELLO-NEXT: "title": "HELLO" +// HELLO-NEXT: }, +// HELLO-NEXT: "pathComponents": [ +// HELLO-NEXT: "HELLO" +// HELLO-NEXT: ] + +// RUN: FileCheck %s --input-file %t/output.symbols.json --check-prefix WORLD #define WORLD 2 +// WORLD-LABEL: "!testLabel": "c:@macro@WORLD" +// WORLD: "accessLevel": "public", +// WORLD-NEXT: "declarationFragments": [ +// WORLD-NEXT: { +// WORLD-NEXT: "kind": "keyword", +// WORLD-NEXT: "spelling": "#define" +// WORLD-NEXT: }, +// WORLD-NEXT: { +// WORLD-NEXT: "kind": "text", +// WORLD-NEXT: "spelling": " " +// WORLD-NEXT: }, +// WORLD-NEXT: { +// WORLD-NEXT: "kind": "identifier", +// WORLD-NEXT: "spelling": "WORLD" +// WORLD-NEXT: } +// WORLD-NEXT: ], +// WORLD: "kind": { +// WORLD-NEXT: "displayName": "Macro", +// WORLD-NEXT: "identifier": "objective-c.macro" +// WORLD-NEXT: }, +// WORLD-NEXT: "location": { +// WORLD-NEXT: "position": { +// WORLD-NEXT: "character": 8, +// WORLD-NEXT: "line": [[# @LINE - 25]] +// WORLD-NEXT: }, +// WORLD-NEXT: "uri": "file://{{.*}}/macros.c" +// WORLD-NEXT: }, +// WORLD-NEXT: "names": { +// WORLD-NEXT: "navigator": [ +// WORLD-NEXT: { +// WORLD-NEXT: "kind": "identifier", +// WORLD-NEXT: "spelling": "WORLD" +// WORLD-NEXT: } +// WORLD-NEXT: ], +// WORLD-NEXT: "subHeading": [ +// WORLD-NEXT: { +// WORLD-NEXT: "kind": "identifier", +// WORLD-NEXT: "spelling": "WORLD" +// WORLD-NEXT: } +// WORLD-NEXT: ], +// WORLD-NEXT: "title": "WORLD" +// WORLD-NEXT: }, +// WORLD-NEXT: "pathComponents": [ +// WORLD-NEXT: "WORLD" +// WORLD-NEXT: ] + +// RUN: FileCheck %s --input-file %t/output.symbols.json --check-prefix MACRO_FUN #define MACRO_FUN(x) x x +// MACRO_FUN-LABEL: "!testLabel": "c:@macro@MACRO_FUN" +// MACRO_FUN-NEXT: "accessLevel": "public", +// MACRO_FUN-NEXT: "declarationFragments": [ +// MACRO_FUN-NEXT: { +// MACRO_FUN-NEXT: "kind": "keyword", +// MACRO_FUN-NEXT: "spelling": "#define" +// MACRO_FUN-NEXT: }, +// MACRO_FUN-NEXT: { +// MACRO_FUN-NEXT: "kind": "text", +// MACRO_FUN-NEXT: "spelling": " " +// MACRO_FUN-NEXT: }, +// MACRO_FUN-NEXT: { +// MACRO_FUN-NEXT: "kind": "identifier", +// MACRO_FUN-NEXT: "spelling": "MACRO_FUN" +// MACRO_FUN-NEXT: }, +// MACRO_FUN-NEXT: { +// MACRO_FUN-NEXT: "kind": "text", +// MACRO_FUN-NEXT: "spelling": "(" +// MACRO_FUN-NEXT: }, +// MACRO_FUN-NEXT: { +// MACRO_FUN-NEXT: "kind": "internalParam", +// MACRO_FUN-NEXT: "spelling": "x" +// MACRO_FUN-NEXT: }, +// MACRO_FUN-NEXT: { +// MACRO_FUN-NEXT: "kind": "text", +// MACRO_FUN-NEXT: "spelling": ")" +// MACRO_FUN-NEXT: } +// MACRO_FUN-NEXT: ], +// MACRO_FUN: "kind": { +// MACRO_FUN-NEXT: "displayName": "Macro", +// MACRO_FUN-NEXT: "identifier": "objective-c.macro" +// MACRO_FUN-NEXT: }, +// MACRO_FUN-NEXT: "location": { +// MACRO_FUN-NEXT: "position": { +// MACRO_FUN-NEXT: "character": 8, +// MACRO_FUN-NEXT: "line": [[# @LINE - 37]] +// MACRO_FUN-NEXT: }, +// MACRO_FUN-NEXT: "uri": "file://{{.*}}/macros.c" +// MACRO_FUN-NEXT: }, +// MACRO_FUN-NEXT: "names": { +// MACRO_FUN-NEXT: "navigator": [ +// MACRO_FUN-NEXT: { +// MACRO_FUN-NEXT: "kind": "identifier", +// MACRO_FUN-NEXT: "spelling": "MACRO_FUN" +// MACRO_FUN-NEXT: } +// MACRO_FUN-NEXT: ], +// MACRO_FUN-NEXT: "subHeading": [ +// MACRO_FUN-NEXT: { +// MACRO_FUN-NEXT: "kind": "identifier", +// MACRO_FUN-NEXT: "spelling": "MACRO_FUN" +// MACRO_FUN-NEXT: } +// MACRO_FUN-NEXT: ], +// MACRO_FUN-NEXT: "title": "MACRO_FUN" +// MACRO_FUN-NEXT: }, +// MACRO_FUN-NEXT: "pathComponents": [ +// MACRO_FUN-NEXT: "MACRO_FUN" +// MACRO_FUN-NEXT: ] + +// RUN: FileCheck %s --input-file %t/output.symbols.json --check-prefix FUN #define FUN(x, y, z) x + y + z +// FUN-LABEL: "!testLabel": "c:@macro@FUN" +// FUN-NEXT: "accessLevel": "public", +// FUN-NEXT: "declarationFragments": [ +// FUN-NEXT: { +// FUN-NEXT: "kind": "keyword", +// FUN-NEXT: "spelling": "#define" +// FUN-NEXT: }, +// FUN-NEXT: { +// FUN-NEXT: "kind": "text", +// FUN-NEXT: "spelling": " " +// FUN-NEXT: }, +// FUN-NEXT: { +// FUN-NEXT: "kind": "identifier", +// FUN-NEXT: "spelling": "FUN" +// FUN-NEXT: }, +// FUN-NEXT: { +// FUN-NEXT: "kind": "text", +// FUN-NEXT: "spelling": "(" +// FUN-NEXT: }, +// FUN-NEXT: { +// FUN-NEXT: "kind": "internalParam", +// FUN-NEXT: "spelling": "x" +// FUN-NEXT: }, +// FUN-NEXT: { +// FUN-NEXT: "kind": "text", +// FUN-NEXT: "spelling": ", " +// FUN-NEXT: }, +// FUN-NEXT: { +// FUN-NEXT: "kind": "internalParam", +// FUN-NEXT: "spelling": "y" +// FUN-NEXT: }, +// FUN-NEXT: { +// FUN-NEXT: "kind": "text", +// FUN-NEXT: "spelling": ", " +// FUN-NEXT: }, +// FUN-NEXT: { +// FUN-NEXT: "kind": "internalParam", +// FUN-NEXT: "spelling": "z" +// FUN-NEXT: }, +// FUN-NEXT: { +// FUN-NEXT: "kind": "text", +// FUN-NEXT: "spelling": ")" +// FUN-NEXT: } +// FUN-NEXT: ], +// FUN: "kind": { +// FUN-NEXT: "displayName": "Macro", +// FUN-NEXT: "identifier": "objective-c.macro" +// FUN-NEXT: }, +// FUN-NEXT: "location": { +// FUN-NEXT: "position": { +// FUN-NEXT: "character": 8, +// FUN-NEXT: "line": [[# @LINE - 53]] +// FUN-NEXT: }, +// FUN-NEXT: "uri": "file://{{.*}}/macros.c" +// FUN-NEXT: }, +// FUN-NEXT: "names": { +// FUN-NEXT: "navigator": [ +// FUN-NEXT: { +// FUN-NEXT: "kind": "identifier", +// FUN-NEXT: "spelling": "FUN" +// FUN-NEXT: } +// FUN-NEXT: ], +// FUN-NEXT: "subHeading": [ +// FUN-NEXT: { +// FUN-NEXT: "kind": "identifier", +// FUN-NEXT: "spelling": "FUN" +// FUN-NEXT: } +// FUN-NEXT: ], +// FUN-NEXT: "title": "FUN" +// FUN-NEXT: }, +// FUN-NEXT: "pathComponents": [ +// FUN-NEXT: "FUN" +// FUN-NEXT: ] + +// RUN: FileCheck %s --input-file %t/output.symbols.json --check-prefix FUNC99 #define FUNC99(x, ...) +// FUNC99-LABEL: "!testLabel": "c:@macro@FUNC99" +// FUNC99-NEXT: "accessLevel": "public", +// FUNC99-NEXT: "declarationFragments": [ +// FUNC99-NEXT: { +// FUNC99-NEXT: "kind": "keyword", +// FUNC99-NEXT: "spelling": "#define" +// FUNC99-NEXT: }, +// FUNC99-NEXT: { +// FUNC99-NEXT: "kind": "text", +// FUNC99-NEXT: "spelling": " " +// FUNC99-NEXT: }, +// FUNC99-NEXT: { +// FUNC99-NEXT: "kind": "identifier", +// FUNC99-NEXT: "spelling": "FUNC99" +// FUNC99-NEXT: }, +// FUNC99-NEXT: { +// FUNC99-NEXT: "kind": "text", +// FUNC99-NEXT: "spelling": "(" +// FUNC99-NEXT: }, +// FUNC99-NEXT: { +// FUNC99-NEXT: "kind": "internalParam", +// FUNC99-NEXT: "spelling": "x" +// FUNC99-NEXT: }, +// FUNC99-NEXT: { +// FUNC99-NEXT: "kind": "text", +// FUNC99-NEXT: "spelling": ", ...)" +// FUNC99-NEXT: } +// FUNC99-NEXT: ], +// FUNC99: "kind": { +// FUNC99-NEXT: "displayName": "Macro", +// FUNC99-NEXT: "identifier": "objective-c.macro" +// FUNC99-NEXT: }, +// FUNC99-NEXT: "location": { +// FUNC99-NEXT: "position": { +// FUNC99-NEXT: "character": 8, +// FUNC99-NEXT: "line": [[# @LINE - 37]] +// FUNC99-NEXT: }, +// FUNC99-NEXT: "uri": "file://{{.*}}/macros.c" +// FUNC99-NEXT: }, +// FUNC99-NEXT: "names": { +// FUNC99-NEXT: "navigator": [ +// FUNC99-NEXT: { +// FUNC99-NEXT: "kind": "identifier", +// FUNC99-NEXT: "spelling": "FUNC99" +// FUNC99-NEXT: } +// FUNC99-NEXT: ], +// FUNC99-NEXT: "subHeading": [ +// FUNC99-NEXT: { +// FUNC99-NEXT: "kind": "identifier", +// FUNC99-NEXT: "spelling": "FUNC99" +// FUNC99-NEXT: } +// FUNC99-NEXT: ], +// FUNC99-NEXT: "title": "FUNC99" +// FUNC99-NEXT: }, +// FUNC99-NEXT: "pathComponents": [ +// FUNC99-NEXT: "FUNC99" +// FUNC99-NEXT: ] + +// RUN: FileCheck %s --input-file %t/output.symbols.json --check-prefix FUNGNU #define FUNGNU(x...) +// FUNGNU-LABEL: "!testLabel": "c:@macro@FUNGNU" +// FUNGNU-NEXT: "accessLevel": "public", +// FUNGNU-NEXT: "declarationFragments": [ +// FUNGNU-NEXT: { +// FUNGNU-NEXT: "kind": "keyword", +// FUNGNU-NEXT: "spelling": "#define" +// FUNGNU-NEXT: }, +// FUNGNU-NEXT: { +// FUNGNU-NEXT: "kind": "text", +// FUNGNU-NEXT: "spelling": " " +// FUNGNU-NEXT: }, +// FUNGNU-NEXT: { +// FUNGNU-NEXT: "kind": "identifier", +// FUNGNU-NEXT: "spelling": "FUNGNU" +// FUNGNU-NEXT: }, +// FUNGNU-NEXT: { +// FUNGNU-NEXT: "kind": "text", +// FUNGNU-NEXT: "spelling": "(" +// FUNGNU-NEXT: }, +// FUNGNU-NEXT: { +// FUNGNU-NEXT: "kind": "internalParam", +// FUNGNU-NEXT: "spelling": "x" +// FUNGNU-NEXT: }, +// FUNGNU-NEXT: { +// FUNGNU-NEXT: "kind": "text", +// FUNGNU-NEXT: "spelling": "...)" +// FUNGNU-NEXT: } +// FUNGNU-NEXT: ], +// FUNGNU: "kind": { +// FUNGNU-NEXT: "displayName": "Macro", +// FUNGNU-NEXT: "identifier": "objective-c.macro" +// FUNGNU-NEXT: }, +// FUNGNU-NEXT: "location": { +// FUNGNU-NEXT: "position": { +// FUNGNU-NEXT: "character": 8, +// FUNGNU-NEXT: "line": [[# @LINE - 37]] +// FUNGNU-NEXT: }, +// FUNGNU-NEXT: "uri": "file://{{.*}}/macros.c" +// FUNGNU-NEXT: }, +// FUNGNU-NEXT: "names": { +// FUNGNU-NEXT: "navigator": [ +// FUNGNU-NEXT: { +// FUNGNU-NEXT: "kind": "identifier", +// FUNGNU-NEXT: "spelling": "FUNGNU" +// FUNGNU-NEXT: } +// FUNGNU-NEXT: ], +// FUNGNU-NEXT: "subHeading": [ +// FUNGNU-NEXT: { +// FUNGNU-NEXT: "kind": "identifier", +// FUNGNU-NEXT: "spelling": "FUNGNU" +// FUNGNU-NEXT: } +// FUNGNU-NEXT: ], +// FUNGNU-NEXT: "title": "FUNGNU" +// FUNGNU-NEXT: }, +// FUNGNU-NEXT: "pathComponents": [ +// FUNGNU-NEXT: "FUNGNU" +// FUNGNU-NEXT: ] + +// expected-no-diagnostics -//--- reference.output.json.in -{ - "metadata": { - "formatVersion": { - "major": 0, - "minor": 5, - "patch": 3 - }, - "generator": "?" - }, - "module": { - "name": "Macros", - "platform": { - "architecture": "arm64", - "operatingSystem": { - "minimumVersion": { - "major": 11, - "minor": 0, - "patch": 0 - }, - "name": "macosx" - }, - "vendor": "apple" - } - }, - "relationships": [], - "symbols": [ - { - "accessLevel": "public", - "declarationFragments": [ - { - "kind": "keyword", - "spelling": "#define" - }, - { - "kind": "text", - "spelling": " " - }, - { - "kind": "identifier", - "spelling": "HELLO" - } - ], - "identifier": { - "interfaceLanguage": "objective-c", - "precise": "c:input.h@8@macro@HELLO" - }, - "kind": { - "displayName": "Macro", - "identifier": "objective-c.macro" - }, - "location": { - "position": { - "character": 8, - "line": 0 - }, - "uri": "file://INPUT_DIR/input.h" - }, - "names": { - "navigator": [ - { - "kind": "identifier", - "spelling": "HELLO" - } - ], - "subHeading": [ - { - "kind": "identifier", - "spelling": "HELLO" - } - ], - "title": "HELLO" - }, - "pathComponents": [ - "HELLO" - ] - }, - { - "accessLevel": "public", - "declarationFragments": [ - { - "kind": "keyword", - "spelling": "#define" - }, - { - "kind": "text", - "spelling": " " - }, - { - "kind": "identifier", - "spelling": "WORLD" - } - ], - "identifier": { - "interfaceLanguage": "objective-c", - "precise": "c:input.h@24@macro@WORLD" - }, - "kind": { - "displayName": "Macro", - "identifier": "objective-c.macro" - }, - "location": { - "position": { - "character": 8, - "line": 1 - }, - "uri": "file://INPUT_DIR/input.h" - }, - "names": { - "navigator": [ - { - "kind": "identifier", - "spelling": "WORLD" - } - ], - "subHeading": [ - { - "kind": "identifier", - "spelling": "WORLD" - } - ], - "title": "WORLD" - }, - "pathComponents": [ - "WORLD" - ] - }, - { - "accessLevel": "public", - "declarationFragments": [ - { - "kind": "keyword", - "spelling": "#define" - }, - { - "kind": "text", - "spelling": " " - }, - { - "kind": "identifier", - "spelling": "MACRO_FUN" - }, - { - "kind": "text", - "spelling": "(" - }, - { - "kind": "internalParam", - "spelling": "x" - }, - { - "kind": "text", - "spelling": ")" - } - ], - "identifier": { - "interfaceLanguage": "objective-c", - "precise": "c:input.h@40@macro@MACRO_FUN" - }, - "kind": { - "displayName": "Macro", - "identifier": "objective-c.macro" - }, - "location": { - "position": { - "character": 8, - "line": 2 - }, - "uri": "file://INPUT_DIR/input.h" - }, - "names": { - "navigator": [ - { - "kind": "identifier", - "spelling": "MACRO_FUN" - } - ], - "subHeading": [ - { - "kind": "identifier", - "spelling": "MACRO_FUN" - } - ], - "title": "MACRO_FUN" - }, - "pathComponents": [ - "MACRO_FUN" - ] - }, - { - "accessLevel": "public", - "declarationFragments": [ - { - "kind": "keyword", - "spelling": "#define" - }, - { - "kind": "text", - "spelling": " " - }, - { - "kind": "identifier", - "spelling": "FUN" - }, - { - "kind": "text", - "spelling": "(" - }, - { - "kind": "internalParam", - "spelling": "x" - }, - { - "kind": "text", - "spelling": ", " - }, - { - "kind": "internalParam", - "spelling": "y" - }, - { - "kind": "text", - "spelling": ", " - }, - { - "kind": "internalParam", - "spelling": "z" - }, - { - "kind": "text", - "spelling": ")" - } - ], - "identifier": { - "interfaceLanguage": "objective-c", - "precise": "c:input.h@65@macro@FUN" - }, - "kind": { - "displayName": "Macro", - "identifier": "objective-c.macro" - }, - "location": { - "position": { - "character": 8, - "line": 3 - }, - "uri": "file://INPUT_DIR/input.h" - }, - "names": { - "navigator": [ - { - "kind": "identifier", - "spelling": "FUN" - } - ], - "subHeading": [ - { - "kind": "identifier", - "spelling": "FUN" - } - ], - "title": "FUN" - }, - "pathComponents": [ - "FUN" - ] - }, - { - "accessLevel": "public", - "declarationFragments": [ - { - "kind": "keyword", - "spelling": "#define" - }, - { - "kind": "text", - "spelling": " " - }, - { - "kind": "identifier", - "spelling": "FUNC99" - }, - { - "kind": "text", - "spelling": "(" - }, - { - "kind": "internalParam", - "spelling": "x" - }, - { - "kind": "text", - "spelling": ", ...)" - } - ], - "identifier": { - "interfaceLanguage": "objective-c", - "precise": "c:input.h@96@macro@FUNC99" - }, - "kind": { - "displayName": "Macro", - "identifier": "objective-c.macro" - }, - "location": { - "position": { - "character": 8, - "line": 4 - }, - "uri": "file://INPUT_DIR/input.h" - }, - "names": { - "navigator": [ - { - "kind": "identifier", - "spelling": "FUNC99" - } - ], - "subHeading": [ - { - "kind": "identifier", - "spelling": "FUNC99" - } - ], - "title": "FUNC99" - }, - "pathComponents": [ - "FUNC99" - ] - }, - { - "accessLevel": "public", - "declarationFragments": [ - { - "kind": "keyword", - "spelling": "#define" - }, - { - "kind": "text", - "spelling": " " - }, - { - "kind": "identifier", - "spelling": "FUNGNU" - }, - { - "kind": "text", - "spelling": "(" - }, - { - "kind": "internalParam", - "spelling": "x" - }, - { - "kind": "text", - "spelling": "...)" - } - ], - "identifier": { - "interfaceLanguage": "objective-c", - "precise": "c:input.h@119@macro@FUNGNU" - }, - "kind": { - "displayName": "Macro", - "identifier": "objective-c.macro" - }, - "location": { - "position": { - "character": 8, - "line": 5 - }, - "uri": "file://INPUT_DIR/input.h" - }, - "names": { - "navigator": [ - { - "kind": "identifier", - "spelling": "FUNGNU" - } - ], - "subHeading": [ - { - "kind": "identifier", - "spelling": "FUNGNU" - } - ], - "title": "FUNGNU" - }, - "pathComponents": [ - "FUNGNU" - ] - } - ] -} diff --git a/clang/test/ExtractAPI/submodule-macro.m b/clang/test/ExtractAPI/submodule-macro.m new file mode 100644 index 0000000000000..a41d59a157425 --- /dev/null +++ b/clang/test/ExtractAPI/submodule-macro.m @@ -0,0 +1,25 @@ +// RUN: rm -rf %t +// RUN: split-file %s %t +// RUN: %clang_cc1 -extract-api --pretty-sgf --emit-sgf-symbol-labels-for-testing \ +// RUN: --emit-extension-symbol-graphs --symbol-graph-dir=%t/symbols -isystem %t \ +// RUN: --product-name=Umbrella -fmodules -fimplicit-module-maps -fmodules-cache-path=%t/modules-cache \ +// RUN: -triple arm64-apple-macosx -x objective-c-header %t/Umbrella.h %t/Subheader.h + +//--- Umbrella.h +#include "Subheader.h" +#import + +//--- Subheader.h +#define FOO 1 + +//--- module.modulemap +module Umbrella { + umbrella header "Umbrella.h" + export * + module * { export * } +} + +// RUN: FileCheck %s --input-file %t/symbols/Umbrella.symbols.json --check-prefix MOD +// MOD-NOT: bool +// MOD: "!testLabel": "c:@macro@FOO" + diff --git a/clang/test/Headers/__clang_hip_math.hip b/clang/test/Headers/__clang_hip_math.hip index 6ee10976f1207..bc0c3796ca0f6 100644 --- a/clang/test/Headers/__clang_hip_math.hip +++ b/clang/test/Headers/__clang_hip_math.hip @@ -47,7 +47,7 @@ typedef unsigned long long uint64_t; // CHECK-NEXT: [[CONV5_I:%.*]] = zext nneg i8 [[TMP0]] to i64 // CHECK-NEXT: [[ADD_I:%.*]] = add i64 [[MUL_I]], -48 // CHECK-NEXT: [[SUB_I:%.*]] = add i64 [[ADD_I]], [[CONV5_I]] -// CHECK-NEXT: [[INCDEC_PTR_I:%.*]] = getelementptr inbounds i8, ptr [[__TAGP_ADDR_0_I]], i64 1 +// CHECK-NEXT: [[INCDEC_PTR_I:%.*]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I]], i64 1 // CHECK-NEXT: br label [[CLEANUP_I]] // CHECK: cleanup.i: // CHECK-NEXT: [[__TAGP_ADDR_1_I]] = phi ptr [ [[INCDEC_PTR_I]], [[IF_THEN_I]] ], [ [[__TAGP_ADDR_0_I]], [[WHILE_BODY_I]] ] @@ -79,7 +79,7 @@ extern "C" __device__ uint64_t test___make_mantissa_base8(const char *p) { // CHECK-NEXT: [[CONV5_I:%.*]] = zext nneg i8 [[TMP0]] to i64 // CHECK-NEXT: [[ADD_I:%.*]] = add i64 [[MUL_I]], -48 // CHECK-NEXT: [[SUB_I:%.*]] = add i64 [[ADD_I]], [[CONV5_I]] -// CHECK-NEXT: [[INCDEC_PTR_I:%.*]] = getelementptr inbounds i8, ptr [[__TAGP_ADDR_0_I]], i64 1 +// CHECK-NEXT: [[INCDEC_PTR_I:%.*]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I]], i64 1 // CHECK-NEXT: br label [[CLEANUP_I]] // CHECK: cleanup.i: // CHECK-NEXT: [[__TAGP_ADDR_1_I]] = phi ptr [ [[INCDEC_PTR_I]], [[IF_THEN_I]] ], [ [[__TAGP_ADDR_0_I]], [[WHILE_BODY_I]] ] @@ -120,7 +120,7 @@ extern "C" __device__ uint64_t test___make_mantissa_base10(const char *p) { // CHECK-NEXT: [[CONV25_I:%.*]] = zext nneg i8 [[TMP0]] to i64 // CHECK-NEXT: [[ADD26_I:%.*]] = add i64 [[MUL24_I]], [[DOTSINK]] // CHECK-NEXT: [[ADD28_I:%.*]] = add i64 [[ADD26_I]], [[CONV25_I]] -// CHECK-NEXT: [[INCDEC_PTR_I:%.*]] = getelementptr inbounds i8, ptr [[__TAGP_ADDR_0_I]], i64 1 +// CHECK-NEXT: [[INCDEC_PTR_I:%.*]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I]], i64 1 // CHECK-NEXT: br label [[CLEANUP_I]] // CHECK: cleanup.i: // CHECK-NEXT: [[__TAGP_ADDR_1_I]] = phi ptr [ [[INCDEC_PTR_I]], [[IF_END31_I]] ], [ [[__TAGP_ADDR_0_I]], [[IF_ELSE17_I]] ] @@ -141,7 +141,7 @@ extern "C" __device__ uint64_t test___make_mantissa_base16(const char *p) { // CHECK-NEXT: [[CMP_I:%.*]] = icmp eq i8 [[TMP0]], 48 // CHECK-NEXT: br i1 [[CMP_I]], label [[IF_THEN_I:%.*]], label [[WHILE_COND_I14_I:%.*]] // CHECK: if.then.i: -// CHECK-NEXT: [[INCDEC_PTR_I:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 1 +// CHECK-NEXT: [[INCDEC_PTR_I:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 1 // CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[INCDEC_PTR_I]], align 1, !tbaa [[TBAA4]] // CHECK-NEXT: switch i8 [[TMP1]], label [[WHILE_COND_I_I:%.*]] [ // CHECK-NEXT: i8 120, label [[WHILE_COND_I30_I_PREHEADER:%.*]] @@ -173,7 +173,7 @@ extern "C" __device__ uint64_t test___make_mantissa_base16(const char *p) { // CHECK-NEXT: [[CONV25_I_I:%.*]] = zext nneg i8 [[TMP2]] to i64 // CHECK-NEXT: [[ADD26_I_I:%.*]] = add i64 [[MUL24_I_I]], [[DOTSINK]] // CHECK-NEXT: [[ADD28_I_I:%.*]] = add i64 [[ADD26_I_I]], [[CONV25_I_I]] -// CHECK-NEXT: [[INCDEC_PTR_I40_I:%.*]] = getelementptr inbounds i8, ptr [[__TAGP_ADDR_0_I31_I]], i64 1 +// CHECK-NEXT: [[INCDEC_PTR_I40_I:%.*]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I31_I]], i64 1 // CHECK-NEXT: br label [[CLEANUP_I36_I]] // CHECK: cleanup.i36.i: // CHECK-NEXT: [[__TAGP_ADDR_1_I37_I]] = phi ptr [ [[INCDEC_PTR_I40_I]], [[IF_END31_I_I]] ], [ [[__TAGP_ADDR_0_I31_I]], [[IF_ELSE17_I_I]] ] @@ -195,7 +195,7 @@ extern "C" __device__ uint64_t test___make_mantissa_base16(const char *p) { // CHECK-NEXT: [[CONV5_I_I:%.*]] = zext nneg i8 [[TMP6]] to i64 // CHECK-NEXT: [[ADD_I_I:%.*]] = add i64 [[MUL_I_I]], -48 // CHECK-NEXT: [[SUB_I_I:%.*]] = add i64 [[ADD_I_I]], [[CONV5_I_I]] -// CHECK-NEXT: [[INCDEC_PTR_I_I:%.*]] = getelementptr inbounds i8, ptr [[__TAGP_ADDR_0_I_I]], i64 1 +// CHECK-NEXT: [[INCDEC_PTR_I_I:%.*]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I_I]], i64 1 // CHECK-NEXT: br label [[CLEANUP_I_I]] // CHECK: cleanup.i.i: // CHECK-NEXT: [[__TAGP_ADDR_1_I_I]] = phi ptr [ [[INCDEC_PTR_I_I]], [[IF_THEN_I_I]] ], [ [[__TAGP_ADDR_0_I_I]], [[WHILE_BODY_I_I]] ] @@ -216,7 +216,7 @@ extern "C" __device__ uint64_t test___make_mantissa_base16(const char *p) { // CHECK-NEXT: [[CONV5_I26_I:%.*]] = zext nneg i8 [[TMP8]] to i64 // CHECK-NEXT: [[ADD_I27_I:%.*]] = add i64 [[MUL_I25_I]], -48 // CHECK-NEXT: [[SUB_I28_I:%.*]] = add i64 [[ADD_I27_I]], [[CONV5_I26_I]] -// CHECK-NEXT: [[INCDEC_PTR_I29_I:%.*]] = getelementptr inbounds i8, ptr [[__TAGP_ADDR_0_I15_I]], i64 1 +// CHECK-NEXT: [[INCDEC_PTR_I29_I:%.*]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I15_I]], i64 1 // CHECK-NEXT: br label [[CLEANUP_I20_I]] // CHECK: cleanup.i20.i: // CHECK-NEXT: [[__TAGP_ADDR_1_I21_I]] = phi ptr [ [[INCDEC_PTR_I29_I]], [[IF_THEN_I24_I]] ], [ [[__TAGP_ADDR_0_I15_I]], [[WHILE_BODY_I18_I]] ] @@ -2367,7 +2367,7 @@ extern "C" __device__ double test_modf(double x, double* y) { // CHECK-NEXT: [[CMP_I_I:%.*]] = icmp eq i8 [[TMP0]], 48 // CHECK-NEXT: br i1 [[CMP_I_I]], label [[IF_THEN_I_I:%.*]], label [[WHILE_COND_I14_I_I:%.*]] // CHECK: if.then.i.i: -// CHECK-NEXT: [[INCDEC_PTR_I_I:%.*]] = getelementptr inbounds i8, ptr [[TAG]], i64 1 +// CHECK-NEXT: [[INCDEC_PTR_I_I:%.*]] = getelementptr inbounds nuw i8, ptr [[TAG]], i64 1 // CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA4]] // CHECK-NEXT: switch i8 [[TMP1]], label [[WHILE_COND_I_I_I:%.*]] [ // CHECK-NEXT: i8 120, label [[WHILE_COND_I30_I_I_PREHEADER:%.*]] @@ -2399,7 +2399,7 @@ extern "C" __device__ double test_modf(double x, double* y) { // CHECK-NEXT: [[CONV25_I_I_I:%.*]] = zext nneg i8 [[TMP2]] to i64 // CHECK-NEXT: [[ADD26_I_I_I:%.*]] = add i64 [[MUL24_I_I_I]], [[DOTSINK]] // CHECK-NEXT: [[ADD28_I_I_I:%.*]] = add i64 [[ADD26_I_I_I]], [[CONV25_I_I_I]] -// CHECK-NEXT: [[INCDEC_PTR_I40_I_I:%.*]] = getelementptr inbounds i8, ptr [[__TAGP_ADDR_0_I31_I_I]], i64 1 +// CHECK-NEXT: [[INCDEC_PTR_I40_I_I:%.*]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I31_I_I]], i64 1 // CHECK-NEXT: br label [[CLEANUP_I36_I_I]] // CHECK: cleanup.i36.i.i: // CHECK-NEXT: [[__TAGP_ADDR_1_I37_I_I]] = phi ptr [ [[INCDEC_PTR_I40_I_I]], [[IF_END31_I_I_I]] ], [ [[__TAGP_ADDR_0_I31_I_I]], [[IF_ELSE17_I_I_I]] ] @@ -2421,7 +2421,7 @@ extern "C" __device__ double test_modf(double x, double* y) { // CHECK-NEXT: [[CONV5_I_I_I:%.*]] = zext nneg i8 [[TMP6]] to i64 // CHECK-NEXT: [[ADD_I_I_I:%.*]] = add i64 [[MUL_I_I_I]], -48 // CHECK-NEXT: [[SUB_I_I_I:%.*]] = add i64 [[ADD_I_I_I]], [[CONV5_I_I_I]] -// CHECK-NEXT: [[INCDEC_PTR_I_I_I:%.*]] = getelementptr inbounds i8, ptr [[__TAGP_ADDR_0_I_I_I]], i64 1 +// CHECK-NEXT: [[INCDEC_PTR_I_I_I:%.*]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I_I_I]], i64 1 // CHECK-NEXT: br label [[CLEANUP_I_I_I]] // CHECK: cleanup.i.i.i: // CHECK-NEXT: [[__TAGP_ADDR_1_I_I_I]] = phi ptr [ [[INCDEC_PTR_I_I_I]], [[IF_THEN_I_I_I]] ], [ [[__TAGP_ADDR_0_I_I_I]], [[WHILE_BODY_I_I_I]] ] @@ -2442,7 +2442,7 @@ extern "C" __device__ double test_modf(double x, double* y) { // CHECK-NEXT: [[CONV5_I26_I_I:%.*]] = zext nneg i8 [[TMP8]] to i64 // CHECK-NEXT: [[ADD_I27_I_I:%.*]] = add i64 [[MUL_I25_I_I]], -48 // CHECK-NEXT: [[SUB_I28_I_I:%.*]] = add i64 [[ADD_I27_I_I]], [[CONV5_I26_I_I]] -// CHECK-NEXT: [[INCDEC_PTR_I29_I_I:%.*]] = getelementptr inbounds i8, ptr [[__TAGP_ADDR_0_I15_I_I]], i64 1 +// CHECK-NEXT: [[INCDEC_PTR_I29_I_I:%.*]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I15_I_I]], i64 1 // CHECK-NEXT: br label [[CLEANUP_I20_I_I]] // CHECK: cleanup.i20.i.i: // CHECK-NEXT: [[__TAGP_ADDR_1_I21_I_I]] = phi ptr [ [[INCDEC_PTR_I29_I_I]], [[IF_THEN_I24_I_I]] ], [ [[__TAGP_ADDR_0_I15_I_I]], [[WHILE_BODY_I18_I_I]] ] @@ -2466,7 +2466,7 @@ extern "C" __device__ float test_nanf(const char *tag) { // CHECK-NEXT: [[CMP_I_I:%.*]] = icmp eq i8 [[TMP0]], 48 // CHECK-NEXT: br i1 [[CMP_I_I]], label [[IF_THEN_I_I:%.*]], label [[WHILE_COND_I14_I_I:%.*]] // CHECK: if.then.i.i: -// CHECK-NEXT: [[INCDEC_PTR_I_I:%.*]] = getelementptr inbounds i8, ptr [[TAG]], i64 1 +// CHECK-NEXT: [[INCDEC_PTR_I_I:%.*]] = getelementptr inbounds nuw i8, ptr [[TAG]], i64 1 // CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA4]] // CHECK-NEXT: switch i8 [[TMP1]], label [[WHILE_COND_I_I_I:%.*]] [ // CHECK-NEXT: i8 120, label [[WHILE_COND_I30_I_I_PREHEADER:%.*]] @@ -2498,7 +2498,7 @@ extern "C" __device__ float test_nanf(const char *tag) { // CHECK-NEXT: [[CONV25_I_I_I:%.*]] = zext nneg i8 [[TMP2]] to i64 // CHECK-NEXT: [[ADD26_I_I_I:%.*]] = add i64 [[MUL24_I_I_I]], [[DOTSINK]] // CHECK-NEXT: [[ADD28_I_I_I:%.*]] = add i64 [[ADD26_I_I_I]], [[CONV25_I_I_I]] -// CHECK-NEXT: [[INCDEC_PTR_I40_I_I:%.*]] = getelementptr inbounds i8, ptr [[__TAGP_ADDR_0_I31_I_I]], i64 1 +// CHECK-NEXT: [[INCDEC_PTR_I40_I_I:%.*]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I31_I_I]], i64 1 // CHECK-NEXT: br label [[CLEANUP_I36_I_I]] // CHECK: cleanup.i36.i.i: // CHECK-NEXT: [[__TAGP_ADDR_1_I37_I_I]] = phi ptr [ [[INCDEC_PTR_I40_I_I]], [[IF_END31_I_I_I]] ], [ [[__TAGP_ADDR_0_I31_I_I]], [[IF_ELSE17_I_I_I]] ] @@ -2520,7 +2520,7 @@ extern "C" __device__ float test_nanf(const char *tag) { // CHECK-NEXT: [[CONV5_I_I_I:%.*]] = zext nneg i8 [[TMP6]] to i64 // CHECK-NEXT: [[ADD_I_I_I:%.*]] = add i64 [[MUL_I_I_I]], -48 // CHECK-NEXT: [[SUB_I_I_I:%.*]] = add i64 [[ADD_I_I_I]], [[CONV5_I_I_I]] -// CHECK-NEXT: [[INCDEC_PTR_I_I_I:%.*]] = getelementptr inbounds i8, ptr [[__TAGP_ADDR_0_I_I_I]], i64 1 +// CHECK-NEXT: [[INCDEC_PTR_I_I_I:%.*]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I_I_I]], i64 1 // CHECK-NEXT: br label [[CLEANUP_I_I_I]] // CHECK: cleanup.i.i.i: // CHECK-NEXT: [[__TAGP_ADDR_1_I_I_I]] = phi ptr [ [[INCDEC_PTR_I_I_I]], [[IF_THEN_I_I_I]] ], [ [[__TAGP_ADDR_0_I_I_I]], [[WHILE_BODY_I_I_I]] ] @@ -2541,7 +2541,7 @@ extern "C" __device__ float test_nanf(const char *tag) { // CHECK-NEXT: [[CONV5_I26_I_I:%.*]] = zext nneg i8 [[TMP8]] to i64 // CHECK-NEXT: [[ADD_I27_I_I:%.*]] = add i64 [[MUL_I25_I_I]], -48 // CHECK-NEXT: [[SUB_I28_I_I:%.*]] = add i64 [[ADD_I27_I_I]], [[CONV5_I26_I_I]] -// CHECK-NEXT: [[INCDEC_PTR_I29_I_I:%.*]] = getelementptr inbounds i8, ptr [[__TAGP_ADDR_0_I15_I_I]], i64 1 +// CHECK-NEXT: [[INCDEC_PTR_I29_I_I:%.*]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I15_I_I]], i64 1 // CHECK-NEXT: br label [[CLEANUP_I20_I_I]] // CHECK: cleanup.i20.i.i: // CHECK-NEXT: [[__TAGP_ADDR_1_I21_I_I]] = phi ptr [ [[INCDEC_PTR_I29_I_I]], [[IF_THEN_I24_I_I]] ], [ [[__TAGP_ADDR_0_I15_I_I]], [[WHILE_BODY_I18_I_I]] ] @@ -2862,7 +2862,7 @@ extern "C" __device__ double test_normcdfinv(double x) { // DEFAULT-NEXT: [[TMP0:%.*]] = load float, ptr [[__A_ADDR_0_I3]], align 4, !tbaa [[TBAA16]] // DEFAULT-NEXT: [[MUL_I:%.*]] = fmul contract float [[TMP0]], [[TMP0]] // DEFAULT-NEXT: [[ADD_I]] = fadd contract float [[__R_0_I4]], [[MUL_I]] -// DEFAULT-NEXT: [[INCDEC_PTR_I]] = getelementptr inbounds i8, ptr [[__A_ADDR_0_I3]], i64 4 +// DEFAULT-NEXT: [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr [[__A_ADDR_0_I3]], i64 4 // DEFAULT-NEXT: [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[DEC_I]], 0 // DEFAULT-NEXT: br i1 [[TOBOOL_NOT_I]], label [[_ZL5NORMFIPKF_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP20:![0-9]+]] // DEFAULT: _ZL5normfiPKf.exit: @@ -2882,7 +2882,7 @@ extern "C" __device__ double test_normcdfinv(double x) { // FINITEONLY-NEXT: [[TMP0:%.*]] = load float, ptr [[__A_ADDR_0_I3]], align 4, !tbaa [[TBAA16]] // FINITEONLY-NEXT: [[MUL_I:%.*]] = fmul nnan ninf contract float [[TMP0]], [[TMP0]] // FINITEONLY-NEXT: [[ADD_I]] = fadd nnan ninf contract float [[__R_0_I4]], [[MUL_I]] -// FINITEONLY-NEXT: [[INCDEC_PTR_I]] = getelementptr inbounds i8, ptr [[__A_ADDR_0_I3]], i64 4 +// FINITEONLY-NEXT: [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr [[__A_ADDR_0_I3]], i64 4 // FINITEONLY-NEXT: [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[DEC_I]], 0 // FINITEONLY-NEXT: br i1 [[TOBOOL_NOT_I]], label [[_ZL5NORMFIPKF_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP20:![0-9]+]] // FINITEONLY: _ZL5normfiPKf.exit: @@ -2902,7 +2902,7 @@ extern "C" __device__ double test_normcdfinv(double x) { // APPROX-NEXT: [[TMP0:%.*]] = load float, ptr [[__A_ADDR_0_I3]], align 4, !tbaa [[TBAA16]] // APPROX-NEXT: [[MUL_I:%.*]] = fmul contract float [[TMP0]], [[TMP0]] // APPROX-NEXT: [[ADD_I]] = fadd contract float [[__R_0_I4]], [[MUL_I]] -// APPROX-NEXT: [[INCDEC_PTR_I]] = getelementptr inbounds i8, ptr [[__A_ADDR_0_I3]], i64 4 +// APPROX-NEXT: [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr [[__A_ADDR_0_I3]], i64 4 // APPROX-NEXT: [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[DEC_I]], 0 // APPROX-NEXT: br i1 [[TOBOOL_NOT_I]], label [[_ZL5NORMFIPKF_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP20:![0-9]+]] // APPROX: _ZL5normfiPKf.exit: @@ -2926,7 +2926,7 @@ extern "C" __device__ float test_normf(int x, const float *y) { // DEFAULT-NEXT: [[TMP0:%.*]] = load double, ptr [[__A_ADDR_0_I3]], align 8, !tbaa [[TBAA18]] // DEFAULT-NEXT: [[MUL_I:%.*]] = fmul contract double [[TMP0]], [[TMP0]] // DEFAULT-NEXT: [[ADD_I]] = fadd contract double [[__R_0_I4]], [[MUL_I]] -// DEFAULT-NEXT: [[INCDEC_PTR_I]] = getelementptr inbounds i8, ptr [[__A_ADDR_0_I3]], i64 8 +// DEFAULT-NEXT: [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr [[__A_ADDR_0_I3]], i64 8 // DEFAULT-NEXT: [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[DEC_I]], 0 // DEFAULT-NEXT: br i1 [[TOBOOL_NOT_I]], label [[_ZL4NORMIPKD_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP21:![0-9]+]] // DEFAULT: _ZL4normiPKd.exit: @@ -2946,7 +2946,7 @@ extern "C" __device__ float test_normf(int x, const float *y) { // FINITEONLY-NEXT: [[TMP0:%.*]] = load double, ptr [[__A_ADDR_0_I3]], align 8, !tbaa [[TBAA18]] // FINITEONLY-NEXT: [[MUL_I:%.*]] = fmul nnan ninf contract double [[TMP0]], [[TMP0]] // FINITEONLY-NEXT: [[ADD_I]] = fadd nnan ninf contract double [[__R_0_I4]], [[MUL_I]] -// FINITEONLY-NEXT: [[INCDEC_PTR_I]] = getelementptr inbounds i8, ptr [[__A_ADDR_0_I3]], i64 8 +// FINITEONLY-NEXT: [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr [[__A_ADDR_0_I3]], i64 8 // FINITEONLY-NEXT: [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[DEC_I]], 0 // FINITEONLY-NEXT: br i1 [[TOBOOL_NOT_I]], label [[_ZL4NORMIPKD_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP21:![0-9]+]] // FINITEONLY: _ZL4normiPKd.exit: @@ -2966,7 +2966,7 @@ extern "C" __device__ float test_normf(int x, const float *y) { // APPROX-NEXT: [[TMP0:%.*]] = load double, ptr [[__A_ADDR_0_I3]], align 8, !tbaa [[TBAA18]] // APPROX-NEXT: [[MUL_I:%.*]] = fmul contract double [[TMP0]], [[TMP0]] // APPROX-NEXT: [[ADD_I]] = fadd contract double [[__R_0_I4]], [[MUL_I]] -// APPROX-NEXT: [[INCDEC_PTR_I]] = getelementptr inbounds i8, ptr [[__A_ADDR_0_I3]], i64 8 +// APPROX-NEXT: [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr [[__A_ADDR_0_I3]], i64 8 // APPROX-NEXT: [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[DEC_I]], 0 // APPROX-NEXT: br i1 [[TOBOOL_NOT_I]], label [[_ZL4NORMIPKD_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP21:![0-9]+]] // APPROX: _ZL4normiPKd.exit: @@ -3286,7 +3286,7 @@ extern "C" __device__ double test_rint(double x) { // DEFAULT-NEXT: [[TMP0:%.*]] = load float, ptr [[__A_ADDR_0_I3]], align 4, !tbaa [[TBAA16]] // DEFAULT-NEXT: [[MUL_I:%.*]] = fmul contract float [[TMP0]], [[TMP0]] // DEFAULT-NEXT: [[ADD_I]] = fadd contract float [[__R_0_I4]], [[MUL_I]] -// DEFAULT-NEXT: [[INCDEC_PTR_I]] = getelementptr inbounds i8, ptr [[__A_ADDR_0_I3]], i64 4 +// DEFAULT-NEXT: [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr [[__A_ADDR_0_I3]], i64 4 // DEFAULT-NEXT: [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[DEC_I]], 0 // DEFAULT-NEXT: br i1 [[TOBOOL_NOT_I]], label [[_ZL6RNORMFIPKF_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP22:![0-9]+]] // DEFAULT: _ZL6rnormfiPKf.exit: @@ -3306,7 +3306,7 @@ extern "C" __device__ double test_rint(double x) { // FINITEONLY-NEXT: [[TMP0:%.*]] = load float, ptr [[__A_ADDR_0_I3]], align 4, !tbaa [[TBAA16]] // FINITEONLY-NEXT: [[MUL_I:%.*]] = fmul nnan ninf contract float [[TMP0]], [[TMP0]] // FINITEONLY-NEXT: [[ADD_I]] = fadd nnan ninf contract float [[__R_0_I4]], [[MUL_I]] -// FINITEONLY-NEXT: [[INCDEC_PTR_I]] = getelementptr inbounds i8, ptr [[__A_ADDR_0_I3]], i64 4 +// FINITEONLY-NEXT: [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr [[__A_ADDR_0_I3]], i64 4 // FINITEONLY-NEXT: [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[DEC_I]], 0 // FINITEONLY-NEXT: br i1 [[TOBOOL_NOT_I]], label [[_ZL6RNORMFIPKF_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP22:![0-9]+]] // FINITEONLY: _ZL6rnormfiPKf.exit: @@ -3326,7 +3326,7 @@ extern "C" __device__ double test_rint(double x) { // APPROX-NEXT: [[TMP0:%.*]] = load float, ptr [[__A_ADDR_0_I3]], align 4, !tbaa [[TBAA16]] // APPROX-NEXT: [[MUL_I:%.*]] = fmul contract float [[TMP0]], [[TMP0]] // APPROX-NEXT: [[ADD_I]] = fadd contract float [[__R_0_I4]], [[MUL_I]] -// APPROX-NEXT: [[INCDEC_PTR_I]] = getelementptr inbounds i8, ptr [[__A_ADDR_0_I3]], i64 4 +// APPROX-NEXT: [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr [[__A_ADDR_0_I3]], i64 4 // APPROX-NEXT: [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[DEC_I]], 0 // APPROX-NEXT: br i1 [[TOBOOL_NOT_I]], label [[_ZL6RNORMFIPKF_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP22:![0-9]+]] // APPROX: _ZL6rnormfiPKf.exit: @@ -3350,7 +3350,7 @@ extern "C" __device__ float test_rnormf(int x, const float* y) { // DEFAULT-NEXT: [[TMP0:%.*]] = load double, ptr [[__A_ADDR_0_I3]], align 8, !tbaa [[TBAA18]] // DEFAULT-NEXT: [[MUL_I:%.*]] = fmul contract double [[TMP0]], [[TMP0]] // DEFAULT-NEXT: [[ADD_I]] = fadd contract double [[__R_0_I4]], [[MUL_I]] -// DEFAULT-NEXT: [[INCDEC_PTR_I]] = getelementptr inbounds i8, ptr [[__A_ADDR_0_I3]], i64 8 +// DEFAULT-NEXT: [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr [[__A_ADDR_0_I3]], i64 8 // DEFAULT-NEXT: [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[DEC_I]], 0 // DEFAULT-NEXT: br i1 [[TOBOOL_NOT_I]], label [[_ZL5RNORMIPKD_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP23:![0-9]+]] // DEFAULT: _ZL5rnormiPKd.exit: @@ -3370,7 +3370,7 @@ extern "C" __device__ float test_rnormf(int x, const float* y) { // FINITEONLY-NEXT: [[TMP0:%.*]] = load double, ptr [[__A_ADDR_0_I3]], align 8, !tbaa [[TBAA18]] // FINITEONLY-NEXT: [[MUL_I:%.*]] = fmul nnan ninf contract double [[TMP0]], [[TMP0]] // FINITEONLY-NEXT: [[ADD_I]] = fadd nnan ninf contract double [[__R_0_I4]], [[MUL_I]] -// FINITEONLY-NEXT: [[INCDEC_PTR_I]] = getelementptr inbounds i8, ptr [[__A_ADDR_0_I3]], i64 8 +// FINITEONLY-NEXT: [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr [[__A_ADDR_0_I3]], i64 8 // FINITEONLY-NEXT: [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[DEC_I]], 0 // FINITEONLY-NEXT: br i1 [[TOBOOL_NOT_I]], label [[_ZL5RNORMIPKD_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP23:![0-9]+]] // FINITEONLY: _ZL5rnormiPKd.exit: @@ -3390,7 +3390,7 @@ extern "C" __device__ float test_rnormf(int x, const float* y) { // APPROX-NEXT: [[TMP0:%.*]] = load double, ptr [[__A_ADDR_0_I3]], align 8, !tbaa [[TBAA18]] // APPROX-NEXT: [[MUL_I:%.*]] = fmul contract double [[TMP0]], [[TMP0]] // APPROX-NEXT: [[ADD_I]] = fadd contract double [[__R_0_I4]], [[MUL_I]] -// APPROX-NEXT: [[INCDEC_PTR_I]] = getelementptr inbounds i8, ptr [[__A_ADDR_0_I3]], i64 8 +// APPROX-NEXT: [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr [[__A_ADDR_0_I3]], i64 8 // APPROX-NEXT: [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[DEC_I]], 0 // APPROX-NEXT: br i1 [[TOBOOL_NOT_I]], label [[_ZL5RNORMIPKD_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP23:![0-9]+]] // APPROX: _ZL5rnormiPKd.exit: diff --git a/clang/test/OpenMP/bug60602.cpp b/clang/test/OpenMP/bug60602.cpp index cb2e4e5b11e33..0789ef958e523 100644 --- a/clang/test/OpenMP/bug60602.cpp +++ b/clang/test/OpenMP/bug60602.cpp @@ -58,13 +58,13 @@ int kernel_within_loop(int *a, int *b, int N, int num_iters) { // CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[B_ADDR]], align 8 // CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[A_ADDR]], align 8 // CHECK-NEXT: [[TMP7:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i64 0 +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP7]], i64 0 // CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[N_ADDR]], align 4 // CHECK-NEXT: [[CONV:%.*]] = sext i32 [[TMP8]] to i64 // CHECK-NEXT: [[TMP9:%.*]] = mul nuw i64 [[CONV]], 4 // CHECK-NEXT: [[TMP10:%.*]] = load ptr, ptr [[B_ADDR]], align 8 // CHECK-NEXT: [[TMP11:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i64 0 +// CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP11]], i64 0 // CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[N_ADDR]], align 4 // CHECK-NEXT: [[CONV2:%.*]] = sext i32 [[TMP12]] to i64 // CHECK-NEXT: [[TMP13:%.*]] = mul nuw i64 [[CONV2]], 4 @@ -134,13 +134,13 @@ int kernel_within_loop(int *a, int *b, int N, int num_iters) { // CHECK-NEXT: [[TMP46:%.*]] = load ptr, ptr [[B_ADDR]], align 8 // CHECK-NEXT: [[TMP47:%.*]] = load ptr, ptr [[A_ADDR]], align 8 // CHECK-NEXT: [[TMP48:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[TMP48]], i64 0 +// CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP48]], i64 0 // CHECK-NEXT: [[TMP49:%.*]] = load i32, ptr [[N_ADDR]], align 4 // CHECK-NEXT: [[CONV5:%.*]] = sext i32 [[TMP49]] to i64 // CHECK-NEXT: [[TMP50:%.*]] = mul nuw i64 [[CONV5]], 4 // CHECK-NEXT: [[TMP51:%.*]] = load ptr, ptr [[B_ADDR]], align 8 // CHECK-NEXT: [[TMP52:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[TMP52]], i64 0 +// CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP52]], i64 0 // CHECK-NEXT: [[TMP53:%.*]] = load i32, ptr [[N_ADDR]], align 4 // CHECK-NEXT: [[CONV7:%.*]] = sext i32 [[TMP53]] to i64 // CHECK-NEXT: [[TMP54:%.*]] = mul nuw i64 [[CONV7]], 4 diff --git a/clang/test/OpenMP/declare_mapper_codegen.cpp b/clang/test/OpenMP/declare_mapper_codegen.cpp index 52d5ceffa1471..d2954b7a74821 100644 --- a/clang/test/OpenMP/declare_mapper_codegen.cpp +++ b/clang/test/OpenMP/declare_mapper_codegen.cpp @@ -129,7 +129,7 @@ class C { // CK0-DAG: [[BBEGIN:%.+]] = getelementptr inbounds nuw %class.C, ptr [[PTR]], i32 0, i32 1 // CK0-DAG: [[BBEGIN2:%.+]] = getelementptr inbounds nuw %class.C, ptr [[PTR]], i32 0, i32 1 // CK0-DAG: [[BARRBEGIN:%.+]] = load ptr, ptr [[BBEGIN2]] -// CK0-DAG: [[BARRBEGINGEP:%.+]] = getelementptr inbounds double, ptr [[BARRBEGIN]], i[[sz:64|32]] 0 +// CK0-DAG: [[BARRBEGINGEP:%.+]] = getelementptr inbounds nuw double, ptr [[BARRBEGIN]], i[[sz:64|32]] 0 // CK0-DAG: [[BEND:%.+]] = getelementptr ptr, ptr [[BBEGIN]], i32 1 // CK0-DAG: [[ABEGINI:%.+]] = ptrtoint ptr [[ABEGIN]] to i64 // CK0-DAG: [[BENDI:%.+]] = ptrtoint ptr [[BEND]] to i64 @@ -965,7 +965,7 @@ class C { // CK4-DAG: [[BBEGIN:%.+]] = getelementptr inbounds nuw %class.C, ptr [[PTR]], i32 0, i32 1 // CK4-DAG: [[BBEGIN2:%.+]] = getelementptr inbounds nuw %class.C, ptr [[PTR]], i32 0, i32 1 // CK4-DAG: [[BARRBEGIN:%.+]] = load ptr, ptr [[BBEGIN2]] -// CK4-DAG: [[BARRBEGINGEP:%.+]] = getelementptr inbounds double, ptr [[BARRBEGIN]], i[[sz:64|32]] 0 +// CK4-DAG: [[BARRBEGINGEP:%.+]] = getelementptr inbounds nuw double, ptr [[BARRBEGIN]], i[[sz:64|32]] 0 // CK4-DAG: [[BEND:%.+]] = getelementptr ptr, ptr [[BBEGIN]], i32 1 // CK4-DAG: [[ABEGINI:%.+]] = ptrtoint ptr [[ABEGIN]] to i64 // CK4-DAG: [[BENDI:%.+]] = ptrtoint ptr [[BEND]] to i64 diff --git a/clang/test/OpenMP/distribute_codegen.cpp b/clang/test/OpenMP/distribute_codegen.cpp index ea619cb6e0f26..6c588ba25db30 100644 --- a/clang/test/OpenMP/distribute_codegen.cpp +++ b/clang/test/OpenMP/distribute_codegen.cpp @@ -662,24 +662,24 @@ int fint(void) { return ftemplate(); } // CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP10]] // CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK1-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP14]], i64 [[IDXPROM]] // CHECK1-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP10]] // CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK1-NEXT: [[IDXPROM3:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[IDXPROM3]] +// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw float, ptr [[TMP17]], i64 [[IDXPROM3]] // CHECK1-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX4]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK1-NEXT: [[MUL5:%.*]] = fmul float [[TMP16]], [[TMP19]] // CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP10]] // CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK1-NEXT: [[IDXPROM6:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[IDXPROM6]] +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds nuw float, ptr [[TMP20]], i64 [[IDXPROM6]] // CHECK1-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK1-NEXT: [[MUL8:%.*]] = fmul float [[MUL5]], [[TMP22]] // CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP0]], align 8, !llvm.access.group [[ACC_GRP10]] // CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK1-NEXT: [[IDXPROM9:%.*]] = zext i32 [[TMP24]] to i64 -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i64 [[IDXPROM9]] +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds nuw float, ptr [[TMP23]], i64 [[IDXPROM9]] // CHECK1-NEXT: store float [[MUL8]], ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: @@ -1574,21 +1574,21 @@ int fint(void) { return ftemplate(); } // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP1]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i32 [[TMP15]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP14]], i32 [[TMP15]] // CHECK3-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 [[TMP18]] +// CHECK3-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw float, ptr [[TMP17]], i32 [[TMP18]] // CHECK3-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX3]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK3-NEXT: [[MUL4:%.*]] = fmul float [[TMP16]], [[TMP19]] // CHECK3-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP3]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i32 [[TMP21]] +// CHECK3-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds nuw float, ptr [[TMP20]], i32 [[TMP21]] // CHECK3-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX5]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK3-NEXT: [[MUL6:%.*]] = fmul float [[MUL4]], [[TMP22]] // CHECK3-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP0]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i32 [[TMP24]] +// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds nuw float, ptr [[TMP23]], i32 [[TMP24]] // CHECK3-NEXT: store float [[MUL6]], ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: @@ -2252,24 +2252,24 @@ int fint(void) { return ftemplate(); } // CHECK17-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP11]] // CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK17-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[IDXPROM]] +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP14]], i64 [[IDXPROM]] // CHECK17-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK17-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP11]] // CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK17-NEXT: [[IDXPROM3:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK17-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[IDXPROM3]] +// CHECK17-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw float, ptr [[TMP17]], i64 [[IDXPROM3]] // CHECK17-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX4]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK17-NEXT: [[MUL5:%.*]] = fmul float [[TMP16]], [[TMP19]] // CHECK17-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP11]] // CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK17-NEXT: [[IDXPROM6:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK17-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[IDXPROM6]] +// CHECK17-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds nuw float, ptr [[TMP20]], i64 [[IDXPROM6]] // CHECK17-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK17-NEXT: [[MUL8:%.*]] = fmul float [[MUL5]], [[TMP22]] // CHECK17-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP0]], align 8, !llvm.access.group [[ACC_GRP11]] // CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK17-NEXT: [[IDXPROM9:%.*]] = zext i32 [[TMP24]] to i64 -// CHECK17-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i64 [[IDXPROM9]] +// CHECK17-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds nuw float, ptr [[TMP23]], i64 [[IDXPROM9]] // CHECK17-NEXT: store float [[MUL8]], ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: @@ -2790,21 +2790,21 @@ int fint(void) { return ftemplate(); } // CHECK19-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK19-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP1]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i32 [[TMP15]] +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP14]], i32 [[TMP15]] // CHECK19-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK19-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK19-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 [[TMP18]] +// CHECK19-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw float, ptr [[TMP17]], i32 [[TMP18]] // CHECK19-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX3]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK19-NEXT: [[MUL4:%.*]] = fmul float [[TMP16]], [[TMP19]] // CHECK19-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP3]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK19-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i32 [[TMP21]] +// CHECK19-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds nuw float, ptr [[TMP20]], i32 [[TMP21]] // CHECK19-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX5]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK19-NEXT: [[MUL6:%.*]] = fmul float [[MUL4]], [[TMP22]] // CHECK19-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP0]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK19-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i32 [[TMP24]] +// CHECK19-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds nuw float, ptr [[TMP23]], i32 [[TMP24]] // CHECK19-NEXT: store float [[MUL6]], ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: diff --git a/clang/test/OpenMP/distribute_parallel_for_reduction_task_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_reduction_task_codegen.cpp index b019b4ff92ad5..93a6779ac02e8 100644 --- a/clang/test/OpenMP/distribute_parallel_for_reduction_task_codegen.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_reduction_task_codegen.cpp @@ -175,16 +175,16 @@ int main(int argc, char **argv) { // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK1-NEXT: store i32 0, ptr [[ARGC1]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP3]], i64 0 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP3]], i64 0 // CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 -// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 0 +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP4]], i64 0 // CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = sext i32 [[TMP5]] to i64 // CHECK1-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP6]] // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds ptr, ptr [[TMP7]], i64 9 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP7]], i64 9 // CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[ARRAYIDX3]], align 8 -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i64 [[LB_ADD_LEN]] +// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP8]], i64 [[LB_ADD_LEN]] // CHECK1-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[ARRAYIDX4]] to i64 // CHECK1-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[ARRAYIDX2]] to i64 // CHECK1-NEXT: [[TMP11:%.*]] = sub i64 [[TMP9]], [[TMP10]] @@ -214,7 +214,7 @@ int main(int argc, char **argv) { // CHECK1-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP22]] // CHECK1-NEXT: store ptr [[_TMP6]], ptr [[_TMP5]], align 8 // CHECK1-NEXT: store ptr [[TMP23]], ptr [[_TMP6]], align 8 -// CHECK1-NEXT: [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 0 +// CHECK1-NEXT: [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds nuw [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 0 // CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP24]], align 8 // CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1 @@ -229,19 +229,19 @@ int main(int argc, char **argv) { // CHECK1-NEXT: store ptr @.red_comb., ptr [[TMP29]], align 8 // CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6 // CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP30]], i8 0, i64 4, i1 false) -// CHECK1-NEXT: [[DOTRD_INPUT_GEP_7:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1 +// CHECK1-NEXT: [[DOTRD_INPUT_GEP_7:%.*]] = getelementptr inbounds nuw [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1 // CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 0 // CHECK1-NEXT: [[TMP32:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds ptr, ptr [[TMP32]], i64 0 +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP32]], i64 0 // CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[ARRAYIDX8]], align 8 -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i8, ptr [[TMP33]], i64 0 +// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP33]], i64 0 // CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP0]], align 4 // CHECK1-NEXT: [[TMP35:%.*]] = sext i32 [[TMP34]] to i64 // CHECK1-NEXT: [[LB_ADD_LEN10:%.*]] = add nsw i64 -1, [[TMP35]] // CHECK1-NEXT: [[TMP36:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds ptr, ptr [[TMP36]], i64 9 +// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP36]], i64 9 // CHECK1-NEXT: [[TMP37:%.*]] = load ptr, ptr [[ARRAYIDX11]], align 8 -// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds i8, ptr [[TMP37]], i64 [[LB_ADD_LEN10]] +// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP37]], i64 [[LB_ADD_LEN10]] // CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP31]], align 8 // CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 1 // CHECK1-NEXT: store ptr [[ARRAYIDX9]], ptr [[TMP38]], align 8 @@ -562,9 +562,9 @@ int main(int argc, char **argv) { // CHECK1-NEXT: [[LB_ADD_LEN_I:%.*]] = add nsw i64 -1, [[TMP24]] // CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2 // CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP25]], align 8 -// CHECK1-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds ptr, ptr [[TMP26]], i64 9 +// CHECK1-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP26]], i64 9 // CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[ARRAYIDX2_I]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3_I:%.*]] = getelementptr inbounds i8, ptr [[TMP27]], i64 [[LB_ADD_LEN_I]] +// CHECK1-NEXT: [[ARRAYIDX3_I:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP27]], i64 [[LB_ADD_LEN_I]] // CHECK1-NEXT: [[TMP28:%.*]] = ptrtoint ptr [[ARRAYIDX3_I]] to i64 // CHECK1-NEXT: [[TMP29:%.*]] = ptrtoint ptr [[TMP20]] to i64 // CHECK1-NEXT: [[TMP30:%.*]] = sub i64 [[TMP28]], [[TMP29]] diff --git a/clang/test/OpenMP/distribute_simd_codegen.cpp b/clang/test/OpenMP/distribute_simd_codegen.cpp index f7353172e235c..ad93fd6030ac7 100644 --- a/clang/test/OpenMP/distribute_simd_codegen.cpp +++ b/clang/test/OpenMP/distribute_simd_codegen.cpp @@ -706,24 +706,24 @@ int fint(void) { return ftemplate(); } // CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP17]] // CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] // CHECK1-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP14]], i64 [[IDXPROM]] // CHECK1-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP17]] // CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP17]] // CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] // CHECK1-NEXT: [[IDXPROM3:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[IDXPROM3]] +// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw float, ptr [[TMP17]], i64 [[IDXPROM3]] // CHECK1-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX4]], align 4, !llvm.access.group [[ACC_GRP17]] // CHECK1-NEXT: [[MUL5:%.*]] = fmul float [[TMP16]], [[TMP19]] // CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP17]] // CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] // CHECK1-NEXT: [[IDXPROM6:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[IDXPROM6]] +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds nuw float, ptr [[TMP20]], i64 [[IDXPROM6]] // CHECK1-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP17]] // CHECK1-NEXT: [[MUL8:%.*]] = fmul float [[MUL5]], [[TMP22]] // CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP0]], align 8, !llvm.access.group [[ACC_GRP17]] // CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] // CHECK1-NEXT: [[IDXPROM9:%.*]] = zext i32 [[TMP24]] to i64 -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i64 [[IDXPROM9]] +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds nuw float, ptr [[TMP23]], i64 [[IDXPROM9]] // CHECK1-NEXT: store float [[MUL8]], ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP17]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: @@ -1682,21 +1682,21 @@ int fint(void) { return ftemplate(); } // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP1]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i32 [[TMP15]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP14]], i32 [[TMP15]] // CHECK3-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 [[TMP18]] +// CHECK3-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw float, ptr [[TMP17]], i32 [[TMP18]] // CHECK3-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX3]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK3-NEXT: [[MUL4:%.*]] = fmul float [[TMP16]], [[TMP19]] // CHECK3-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP3]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i32 [[TMP21]] +// CHECK3-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds nuw float, ptr [[TMP20]], i32 [[TMP21]] // CHECK3-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX5]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK3-NEXT: [[MUL6:%.*]] = fmul float [[MUL4]], [[TMP22]] // CHECK3-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP0]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i32 [[TMP24]] +// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds nuw float, ptr [[TMP23]], i32 [[TMP24]] // CHECK3-NEXT: store float [[MUL6]], ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: @@ -2664,24 +2664,24 @@ int fint(void) { return ftemplate(); } // CHECK5-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP18]] // CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK5-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[IDXPROM]] +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP14]], i64 [[IDXPROM]] // CHECK5-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK5-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP18]] // CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK5-NEXT: [[IDXPROM3:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK5-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[IDXPROM3]] +// CHECK5-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw float, ptr [[TMP17]], i64 [[IDXPROM3]] // CHECK5-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX4]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK5-NEXT: [[MUL5:%.*]] = fmul float [[TMP16]], [[TMP19]] // CHECK5-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP18]] // CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK5-NEXT: [[IDXPROM6:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK5-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[IDXPROM6]] +// CHECK5-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds nuw float, ptr [[TMP20]], i64 [[IDXPROM6]] // CHECK5-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK5-NEXT: [[MUL8:%.*]] = fmul float [[MUL5]], [[TMP22]] // CHECK5-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP0]], align 8, !llvm.access.group [[ACC_GRP18]] // CHECK5-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK5-NEXT: [[IDXPROM9:%.*]] = zext i32 [[TMP24]] to i64 -// CHECK5-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i64 [[IDXPROM9]] +// CHECK5-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds nuw float, ptr [[TMP23]], i64 [[IDXPROM9]] // CHECK5-NEXT: store float [[MUL8]], ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: @@ -3671,21 +3671,21 @@ int fint(void) { return ftemplate(); } // CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK7-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP1]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i32 [[TMP15]] +// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP14]], i32 [[TMP15]] // CHECK7-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK7-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK7-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 [[TMP18]] +// CHECK7-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw float, ptr [[TMP17]], i32 [[TMP18]] // CHECK7-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX3]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK7-NEXT: [[MUL4:%.*]] = fmul float [[TMP16]], [[TMP19]] // CHECK7-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP3]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK7-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK7-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i32 [[TMP21]] +// CHECK7-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds nuw float, ptr [[TMP20]], i32 [[TMP21]] // CHECK7-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX5]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK7-NEXT: [[MUL6:%.*]] = fmul float [[MUL4]], [[TMP22]] // CHECK7-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP0]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK7-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK7-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i32 [[TMP24]] +// CHECK7-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds nuw float, ptr [[TMP23]], i32 [[TMP24]] // CHECK7-NEXT: store float [[MUL6]], ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: @@ -4290,24 +4290,24 @@ int fint(void) { return ftemplate(); } // CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !llvm.access.group [[ACC_GRP9]] // CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK9-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP5]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP4]], i64 [[IDXPROM]] // CHECK9-NEXT: [[TMP6:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !llvm.access.group [[ACC_GRP9]] // CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK9-NEXT: [[IDXPROM1:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK9-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i64 [[IDXPROM1]] +// CHECK9-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw float, ptr [[TMP7]], i64 [[IDXPROM1]] // CHECK9-NEXT: [[TMP9:%.*]] = load float, ptr [[ARRAYIDX2]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK9-NEXT: [[MUL3:%.*]] = fmul float [[TMP6]], [[TMP9]] // CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[D_ADDR]], align 8, !llvm.access.group [[ACC_GRP9]] // CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK9-NEXT: [[IDXPROM4:%.*]] = zext i32 [[TMP11]] to i64 -// CHECK9-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[TMP10]], i64 [[IDXPROM4]] +// CHECK9-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds nuw float, ptr [[TMP10]], i64 [[IDXPROM4]] // CHECK9-NEXT: [[TMP12:%.*]] = load float, ptr [[ARRAYIDX5]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK9-NEXT: [[MUL6:%.*]] = fmul float [[MUL3]], [[TMP12]] // CHECK9-NEXT: [[TMP13:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !llvm.access.group [[ACC_GRP9]] // CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK9-NEXT: [[IDXPROM7:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i64 [[IDXPROM7]] +// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds nuw float, ptr [[TMP13]], i64 [[IDXPROM7]] // CHECK9-NEXT: store float [[MUL6]], ptr [[ARRAYIDX8]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: @@ -4606,21 +4606,21 @@ int fint(void) { return ftemplate(); } // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i32 [[TMP5]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP4]], i32 [[TMP5]] // CHECK11-NEXT: [[TMP6:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[C_ADDR]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i32 [[TMP8]] +// CHECK11-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw float, ptr [[TMP7]], i32 [[TMP8]] // CHECK11-NEXT: [[TMP9:%.*]] = load float, ptr [[ARRAYIDX1]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK11-NEXT: [[MUL2:%.*]] = fmul float [[TMP6]], [[TMP9]] // CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[D_ADDR]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP10]], i32 [[TMP11]] +// CHECK11-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw float, ptr [[TMP10]], i32 [[TMP11]] // CHECK11-NEXT: [[TMP12:%.*]] = load float, ptr [[ARRAYIDX3]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK11-NEXT: [[MUL4:%.*]] = fmul float [[MUL2]], [[TMP12]] // CHECK11-NEXT: [[TMP13:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i32 [[TMP14]] +// CHECK11-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds nuw float, ptr [[TMP13]], i32 [[TMP14]] // CHECK11-NEXT: store float [[MUL4]], ptr [[ARRAYIDX5]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: @@ -4928,24 +4928,24 @@ int fint(void) { return ftemplate(); } // CHECK13-NEXT: [[TMP4:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !llvm.access.group [[ACC_GRP10]] // CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK13-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP5]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i64 [[IDXPROM]] +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP4]], i64 [[IDXPROM]] // CHECK13-NEXT: [[TMP6:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK13-NEXT: [[TMP7:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !llvm.access.group [[ACC_GRP10]] // CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK13-NEXT: [[IDXPROM1:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK13-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i64 [[IDXPROM1]] +// CHECK13-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw float, ptr [[TMP7]], i64 [[IDXPROM1]] // CHECK13-NEXT: [[TMP9:%.*]] = load float, ptr [[ARRAYIDX2]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK13-NEXT: [[MUL3:%.*]] = fmul float [[TMP6]], [[TMP9]] // CHECK13-NEXT: [[TMP10:%.*]] = load ptr, ptr [[D_ADDR]], align 8, !llvm.access.group [[ACC_GRP10]] // CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK13-NEXT: [[IDXPROM4:%.*]] = zext i32 [[TMP11]] to i64 -// CHECK13-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[TMP10]], i64 [[IDXPROM4]] +// CHECK13-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds nuw float, ptr [[TMP10]], i64 [[IDXPROM4]] // CHECK13-NEXT: [[TMP12:%.*]] = load float, ptr [[ARRAYIDX5]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK13-NEXT: [[MUL6:%.*]] = fmul float [[MUL3]], [[TMP12]] // CHECK13-NEXT: [[TMP13:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !llvm.access.group [[ACC_GRP10]] // CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK13-NEXT: [[IDXPROM7:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK13-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i64 [[IDXPROM7]] +// CHECK13-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds nuw float, ptr [[TMP13]], i64 [[IDXPROM7]] // CHECK13-NEXT: store float [[MUL6]], ptr [[ARRAYIDX8]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: @@ -5275,21 +5275,21 @@ int fint(void) { return ftemplate(); } // CHECK15-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK15-NEXT: [[TMP4:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i32 [[TMP5]] +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP4]], i32 [[TMP5]] // CHECK15-NEXT: [[TMP6:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK15-NEXT: [[TMP7:%.*]] = load ptr, ptr [[C_ADDR]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK15-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i32 [[TMP8]] +// CHECK15-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw float, ptr [[TMP7]], i32 [[TMP8]] // CHECK15-NEXT: [[TMP9:%.*]] = load float, ptr [[ARRAYIDX1]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK15-NEXT: [[MUL2:%.*]] = fmul float [[TMP6]], [[TMP9]] // CHECK15-NEXT: [[TMP10:%.*]] = load ptr, ptr [[D_ADDR]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK15-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP10]], i32 [[TMP11]] +// CHECK15-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw float, ptr [[TMP10]], i32 [[TMP11]] // CHECK15-NEXT: [[TMP12:%.*]] = load float, ptr [[ARRAYIDX3]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK15-NEXT: [[MUL4:%.*]] = fmul float [[MUL2]], [[TMP12]] // CHECK15-NEXT: [[TMP13:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK15-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i32 [[TMP14]] +// CHECK15-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds nuw float, ptr [[TMP13]], i32 [[TMP14]] // CHECK15-NEXT: store float [[MUL4]], ptr [[ARRAYIDX5]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: @@ -5782,24 +5782,24 @@ int fint(void) { return ftemplate(); } // CHECK17-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP18]] // CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK17-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[IDXPROM]] +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP14]], i64 [[IDXPROM]] // CHECK17-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK17-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP18]] // CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK17-NEXT: [[IDXPROM3:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK17-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[IDXPROM3]] +// CHECK17-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw float, ptr [[TMP17]], i64 [[IDXPROM3]] // CHECK17-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX4]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK17-NEXT: [[MUL5:%.*]] = fmul float [[TMP16]], [[TMP19]] // CHECK17-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP18]] // CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK17-NEXT: [[IDXPROM6:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK17-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[IDXPROM6]] +// CHECK17-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds nuw float, ptr [[TMP20]], i64 [[IDXPROM6]] // CHECK17-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK17-NEXT: [[MUL8:%.*]] = fmul float [[MUL5]], [[TMP22]] // CHECK17-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP0]], align 8, !llvm.access.group [[ACC_GRP18]] // CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK17-NEXT: [[IDXPROM9:%.*]] = zext i32 [[TMP24]] to i64 -// CHECK17-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i64 [[IDXPROM9]] +// CHECK17-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds nuw float, ptr [[TMP23]], i64 [[IDXPROM9]] // CHECK17-NEXT: store float [[MUL8]], ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: @@ -6373,21 +6373,21 @@ int fint(void) { return ftemplate(); } // CHECK19-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK19-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP1]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i32 [[TMP15]] +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP14]], i32 [[TMP15]] // CHECK19-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK19-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK19-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 [[TMP18]] +// CHECK19-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw float, ptr [[TMP17]], i32 [[TMP18]] // CHECK19-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX3]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK19-NEXT: [[MUL4:%.*]] = fmul float [[TMP16]], [[TMP19]] // CHECK19-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP3]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK19-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i32 [[TMP21]] +// CHECK19-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds nuw float, ptr [[TMP20]], i32 [[TMP21]] // CHECK19-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX5]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK19-NEXT: [[MUL6:%.*]] = fmul float [[MUL4]], [[TMP22]] // CHECK19-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP0]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK19-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i32 [[TMP24]] +// CHECK19-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds nuw float, ptr [[TMP23]], i32 [[TMP24]] // CHECK19-NEXT: store float [[MUL6]], ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: @@ -6970,24 +6970,24 @@ int fint(void) { return ftemplate(); } // CHECK21-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP19]] // CHECK21-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK21-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK21-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[IDXPROM]] +// CHECK21-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP14]], i64 [[IDXPROM]] // CHECK21-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK21-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP19]] // CHECK21-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK21-NEXT: [[IDXPROM3:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK21-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[IDXPROM3]] +// CHECK21-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw float, ptr [[TMP17]], i64 [[IDXPROM3]] // CHECK21-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX4]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK21-NEXT: [[MUL5:%.*]] = fmul float [[TMP16]], [[TMP19]] // CHECK21-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP19]] // CHECK21-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK21-NEXT: [[IDXPROM6:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK21-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[IDXPROM6]] +// CHECK21-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds nuw float, ptr [[TMP20]], i64 [[IDXPROM6]] // CHECK21-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK21-NEXT: [[MUL8:%.*]] = fmul float [[MUL5]], [[TMP22]] // CHECK21-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP0]], align 8, !llvm.access.group [[ACC_GRP19]] // CHECK21-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK21-NEXT: [[IDXPROM9:%.*]] = zext i32 [[TMP24]] to i64 -// CHECK21-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i64 [[IDXPROM9]] +// CHECK21-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds nuw float, ptr [[TMP23]], i64 [[IDXPROM9]] // CHECK21-NEXT: store float [[MUL8]], ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK21-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK21: omp.body.continue: @@ -7592,21 +7592,21 @@ int fint(void) { return ftemplate(); } // CHECK23-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP20]] // CHECK23-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP1]], align 4, !llvm.access.group [[ACC_GRP20]] // CHECK23-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK23-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i32 [[TMP15]] +// CHECK23-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP14]], i32 [[TMP15]] // CHECK23-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP20]] // CHECK23-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP20]] // CHECK23-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK23-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 [[TMP18]] +// CHECK23-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw float, ptr [[TMP17]], i32 [[TMP18]] // CHECK23-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX3]], align 4, !llvm.access.group [[ACC_GRP20]] // CHECK23-NEXT: [[MUL4:%.*]] = fmul float [[TMP16]], [[TMP19]] // CHECK23-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP3]], align 4, !llvm.access.group [[ACC_GRP20]] // CHECK23-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK23-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i32 [[TMP21]] +// CHECK23-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds nuw float, ptr [[TMP20]], i32 [[TMP21]] // CHECK23-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX5]], align 4, !llvm.access.group [[ACC_GRP20]] // CHECK23-NEXT: [[MUL6:%.*]] = fmul float [[MUL4]], [[TMP22]] // CHECK23-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP0]], align 4, !llvm.access.group [[ACC_GRP20]] // CHECK23-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK23-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i32 [[TMP24]] +// CHECK23-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds nuw float, ptr [[TMP23]], i32 [[TMP24]] // CHECK23-NEXT: store float [[MUL6]], ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP20]] // CHECK23-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK23: omp.body.continue: diff --git a/clang/test/OpenMP/for_linear_codegen.cpp b/clang/test/OpenMP/for_linear_codegen.cpp index 395ccdbeed763..5a21fe8509fd3 100644 --- a/clang/test/OpenMP/for_linear_codegen.cpp +++ b/clang/test/OpenMP/for_linear_codegen.cpp @@ -650,7 +650,7 @@ int main() { // CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP16]], [[MUL9]] // CHECK1-NEXT: store i32 [[ADD10]], ptr [[LVAR5]], align 4 // CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[PVAR4]], align 8 -// CHECK1-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP18]], i32 1 +// CHECK1-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP18]], i32 1 // CHECK1-NEXT: store ptr [[INCDEC_PTR]], ptr [[PVAR4]], align 8 // CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[_TMP6]], align 8 // CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 diff --git a/clang/test/OpenMP/for_reduction_codegen.cpp b/clang/test/OpenMP/for_reduction_codegen.cpp index ea32e98bf1423..83632db238484 100644 --- a/clang/test/OpenMP/for_reduction_codegen.cpp +++ b/clang/test/OpenMP/for_reduction_codegen.cpp @@ -1021,14 +1021,14 @@ int main() { // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK1-NEXT: [[TMP5:%.*]] = mul nsw i64 1, [[TMP1]] // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[TMP5]] -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i64 0 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX]], i64 0 // CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP3]], i64 0, i64 1 // CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4 // CHECK1-NEXT: [[TMP7:%.*]] = sext i32 [[TMP6]] to i64 // CHECK1-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP7]] // CHECK1-NEXT: [[TMP8:%.*]] = mul nsw i64 1, [[TMP1]] // CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[TMP8]] -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX5]], i64 [[LB_ADD_LEN]] +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX5]], i64 [[LB_ADD_LEN]] // CHECK1-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[ARRAYIDX6]] to i64 // CHECK1-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[ARRAYIDX3]] to i64 // CHECK1-NEXT: [[TMP11:%.*]] = sub i64 [[TMP9]], [[TMP10]] @@ -1054,16 +1054,16 @@ int main() { // CHECK1-NEXT: [[TMP19:%.*]] = sub i64 [[TMP17]], [[TMP18]] // CHECK1-NEXT: [[TMP20:%.*]] = sdiv exact i64 [[TMP19]], ptrtoint (ptr getelementptr (i32, ptr null, i32 1) to i64) // CHECK1-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[VLA7]], i64 [[TMP20]] -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x [4 x %struct.S]], ptr [[TMP4]], i64 0, i64 1 +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds nuw [10 x [4 x %struct.S]], ptr [[TMP4]], i64 0, i64 1 // CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [4 x %struct.S], ptr [[ARRAYIDX8]], i64 0, i64 0 -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], ptr [[ARRAYDECAY]], i64 1 +// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds nuw [[STRUCT_S:%.*]], ptr [[ARRAYDECAY]], i64 1 // CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP3]], i64 0, i64 1 // CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[ARRAYIDX10]], align 4 // CHECK1-NEXT: [[TMP23:%.*]] = sext i32 [[TMP22]] to i64 // CHECK1-NEXT: [[LB_ADD_LEN11:%.*]] = add nsw i64 0, [[TMP23]] -// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x [4 x %struct.S]], ptr [[TMP4]], i64 0, i64 [[LB_ADD_LEN11]] +// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds nuw [10 x [4 x %struct.S]], ptr [[TMP4]], i64 0, i64 [[LB_ADD_LEN11]] // CHECK1-NEXT: [[ARRAYDECAY13:%.*]] = getelementptr inbounds [4 x %struct.S], ptr [[ARRAYIDX12]], i64 0, i64 0 -// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDECAY13]], i64 2 +// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr [[ARRAYDECAY13]], i64 2 // CHECK1-NEXT: [[TMP24:%.*]] = ptrtoint ptr [[ARRAYIDX14]] to i64 // CHECK1-NEXT: [[TMP25:%.*]] = ptrtoint ptr [[ARRAYIDX9]] to i64 // CHECK1-NEXT: [[TMP26:%.*]] = sub i64 [[TMP24]], [[TMP25]] @@ -1580,10 +1580,10 @@ int main() { // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = mul nsw i64 1, [[TMP1]] // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[TMP3]] -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i64 0 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX]], i64 0 // CHECK1-NEXT: [[TMP4:%.*]] = mul nsw i64 1, [[TMP1]] // CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[TMP4]] -// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX4]], i64 1 +// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX4]], i64 1 // CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [1 x [2 x i32]], ptr [[ARR6]], i32 0, i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] @@ -1757,13 +1757,13 @@ int main() { // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i64 0 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP1]], i64 0 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 -// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], ptr [[TMP2]], i64 1 +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw [[STRUCT_S:%.*]], ptr [[TMP2]], i64 1 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds ptr, ptr [[TMP3]], i64 4 +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP3]], i64 4 // CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[ARRAYIDX2]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[TMP4]], i64 6 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr [[TMP4]], i64 6 // CHECK1-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[ARRAYIDX3]] to i64 // CHECK1-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 // CHECK1-NEXT: [[TMP7:%.*]] = sub i64 [[TMP5]], [[TMP6]] @@ -1963,11 +1963,11 @@ int main() { // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i64 1 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 -// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], ptr [[TMP2]], i64 1 +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw [[STRUCT_S:%.*]], ptr [[TMP2]], i64 1 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP0]], align 8 // CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds ptr, ptr [[TMP3]], i64 1 // CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[ARRAYIDX2]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[TMP4]], i64 6 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr [[TMP4]], i64 6 // CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [1 x [6 x %struct.S]], ptr [[VAR24]], i32 0, i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 6 // CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] @@ -2148,13 +2148,13 @@ int main() { // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i64 1 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP1]], i64 1 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 -// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], ptr [[TMP2]], i64 1 +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw [[STRUCT_S:%.*]], ptr [[TMP2]], i64 1 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds ptr, ptr [[TMP3]], i64 1 +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP3]], i64 1 // CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[ARRAYIDX2]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[TMP4]], i64 6 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr [[TMP4]], i64 6 // CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [1 x [6 x %struct.S]], ptr [[VAR24]], i32 0, i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 6 // CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] @@ -2335,13 +2335,13 @@ int main() { // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i64 1 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP1]], i64 1 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 -// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[TMP2]], i64 1 +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr [[TMP2]], i64 1 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds ptr, ptr [[TMP3]], i64 1 +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP3]], i64 1 // CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[ARRAYIDX2]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[TMP4]], i64 1 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr [[TMP4]], i64 1 // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR24]]) // CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP0]], align 8 // CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 @@ -2459,8 +2459,8 @@ int main() { // CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [5 x %struct.S], ptr [[TMP0]], i64 0, i64 0 -// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [5 x %struct.S], ptr [[TMP0]], i64 0, i64 4 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [5 x %struct.S], ptr [[TMP0]], i64 0, i64 0 +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw [5 x %struct.S], ptr [[TMP0]], i64 0, i64 4 // CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [5 x %struct.S], ptr [[VVAR22]], i32 0, i32 0 // CHECK1-NEXT: [[TMP1:%.*]] = getelementptr [[STRUCT_S:%.*]], ptr [[ARRAY_BEGIN]], i64 5 // CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP1]] @@ -2641,9 +2641,9 @@ int main() { // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x %struct.S], ptr [[TMP2]], i64 0, i64 1 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [4 x %struct.S], ptr [[TMP2]], i64 0, i64 1 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [4 x %struct.S], ptr [[TMP3]], i64 0, i64 2 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw [4 x %struct.S], ptr [[TMP3]], i64 0, i64 2 // CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[VAR34]], i32 0, i32 0 // CHECK1-NEXT: [[TMP4:%.*]] = getelementptr [[STRUCT_S:%.*]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP4]] @@ -2826,9 +2826,9 @@ int main() { // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x %struct.S], ptr [[TMP2]], i64 0, i64 0 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [4 x %struct.S], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [4 x %struct.S], ptr [[TMP3]], i64 0, i64 1 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw [4 x %struct.S], ptr [[TMP3]], i64 0, i64 1 // CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[VAR34]], i32 0, i32 0 // CHECK1-NEXT: [[TMP4:%.*]] = getelementptr [[STRUCT_S:%.*]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP4]] @@ -3012,9 +3012,9 @@ int main() { // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x %struct.S], ptr [[TMP2]], i64 0, i64 2 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [4 x %struct.S], ptr [[TMP2]], i64 0, i64 2 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [4 x %struct.S], ptr [[TMP3]], i64 0, i64 3 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw [4 x %struct.S], ptr [[TMP3]], i64 0, i64 3 // CHECK1-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[ARRAYIDX3]] to i64 // CHECK1-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 // CHECK1-NEXT: [[TMP6:%.*]] = sub i64 [[TMP4]], [[TMP5]] @@ -3974,8 +3974,8 @@ int main() { // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [42 x %struct.S.0], ptr [[TMP0]], i64 0, i64 1 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [42 x %struct.S.0], ptr [[TMP0]], i64 0, i64 40 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [42 x %struct.S.0], ptr [[TMP0]], i64 0, i64 1 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw [42 x %struct.S.0], ptr [[TMP0]], i64 0, i64 40 // CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [40 x %struct.S.0], ptr [[ARR4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 40 // CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP6]] diff --git a/clang/test/OpenMP/for_reduction_codegen_UDR.cpp b/clang/test/OpenMP/for_reduction_codegen_UDR.cpp index 16d6c23542fce..82f94c949eea6 100644 --- a/clang/test/OpenMP/for_reduction_codegen_UDR.cpp +++ b/clang/test/OpenMP/for_reduction_codegen_UDR.cpp @@ -1074,14 +1074,14 @@ int main() { // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK1-NEXT: [[TMP5:%.*]] = mul nsw i64 1, [[TMP1]] // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[TMP5]] -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i64 0 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX]], i64 0 // CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP3]], i64 0, i64 1 // CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4 // CHECK1-NEXT: [[TMP7:%.*]] = sext i32 [[TMP6]] to i64 // CHECK1-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP7]] // CHECK1-NEXT: [[TMP8:%.*]] = mul nsw i64 1, [[TMP1]] // CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[TMP8]] -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX5]], i64 [[LB_ADD_LEN]] +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX5]], i64 [[LB_ADD_LEN]] // CHECK1-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[ARRAYIDX6]] to i64 // CHECK1-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[ARRAYIDX3]] to i64 // CHECK1-NEXT: [[TMP11:%.*]] = sub i64 [[TMP9]], [[TMP10]] @@ -1109,16 +1109,16 @@ int main() { // CHECK1-NEXT: [[TMP19:%.*]] = sub i64 [[TMP17]], [[TMP18]] // CHECK1-NEXT: [[TMP20:%.*]] = sdiv exact i64 [[TMP19]], ptrtoint (ptr getelementptr (i32, ptr null, i32 1) to i64) // CHECK1-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[VLA7]], i64 [[TMP20]] -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x [4 x %struct.S.0]], ptr [[TMP4]], i64 0, i64 1 +// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds nuw [10 x [4 x %struct.S.0]], ptr [[TMP4]], i64 0, i64 1 // CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [4 x %struct.S.0], ptr [[ARRAYIDX9]], i64 0, i64 0 -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[ARRAYDECAY]], i64 1 +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds nuw [[STRUCT_S_0:%.*]], ptr [[ARRAYDECAY]], i64 1 // CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP3]], i64 0, i64 1 // CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[ARRAYIDX11]], align 4 // CHECK1-NEXT: [[TMP23:%.*]] = sext i32 [[TMP22]] to i64 // CHECK1-NEXT: [[LB_ADD_LEN12:%.*]] = add nsw i64 0, [[TMP23]] -// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x [4 x %struct.S.0]], ptr [[TMP4]], i64 0, i64 [[LB_ADD_LEN12]] +// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds nuw [10 x [4 x %struct.S.0]], ptr [[TMP4]], i64 0, i64 [[LB_ADD_LEN12]] // CHECK1-NEXT: [[ARRAYDECAY14:%.*]] = getelementptr inbounds [4 x %struct.S.0], ptr [[ARRAYIDX13]], i64 0, i64 0 -// CHECK1-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDECAY14]], i64 2 +// CHECK1-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds nuw [[STRUCT_S_0]], ptr [[ARRAYDECAY14]], i64 2 // CHECK1-NEXT: [[TMP24:%.*]] = ptrtoint ptr [[ARRAYIDX15]] to i64 // CHECK1-NEXT: [[TMP25:%.*]] = ptrtoint ptr [[ARRAYIDX10]] to i64 // CHECK1-NEXT: [[TMP26:%.*]] = sub i64 [[TMP24]], [[TMP25]] @@ -1669,13 +1669,13 @@ int main() { // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i64 0 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP1]], i64 0 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 -// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[TMP2]], i64 1 +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw [[STRUCT_S_0:%.*]], ptr [[TMP2]], i64 1 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds ptr, ptr [[TMP3]], i64 4 +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP3]], i64 4 // CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[ARRAYIDX2]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[TMP4]], i64 6 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw [[STRUCT_S_0]], ptr [[TMP4]], i64 6 // CHECK1-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[ARRAYIDX3]] to i64 // CHECK1-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 // CHECK1-NEXT: [[TMP7:%.*]] = sub i64 [[TMP5]], [[TMP6]] @@ -1877,8 +1877,8 @@ int main() { // CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [5 x %struct.S.0], ptr [[TMP0]], i64 0, i64 0 -// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [5 x %struct.S.0], ptr [[TMP0]], i64 0, i64 4 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [5 x %struct.S.0], ptr [[TMP0]], i64 0, i64 0 +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw [5 x %struct.S.0], ptr [[TMP0]], i64 0, i64 4 // CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [5 x %struct.S.0], ptr [[VVAR22]], i32 0, i32 0 // CHECK1-NEXT: [[TMP1:%.*]] = getelementptr [[STRUCT_S_0:%.*]], ptr [[ARRAY_BEGIN]], i64 5 // CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP1]] @@ -2066,9 +2066,9 @@ int main() { // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x %struct.S.0], ptr [[TMP2]], i64 0, i64 1 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [4 x %struct.S.0], ptr [[TMP2]], i64 0, i64 1 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [4 x %struct.S.0], ptr [[TMP3]], i64 0, i64 2 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw [4 x %struct.S.0], ptr [[TMP3]], i64 0, i64 2 // CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[VAR34]], i32 0, i32 0 // CHECK1-NEXT: [[TMP4:%.*]] = getelementptr [[STRUCT_S_0:%.*]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP4]] @@ -2979,8 +2979,8 @@ int main() { // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [42 x %struct.S], ptr [[TMP0]], i64 0, i64 1 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [42 x %struct.S], ptr [[TMP0]], i64 0, i64 40 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [42 x %struct.S], ptr [[TMP0]], i64 0, i64 1 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw [42 x %struct.S], ptr [[TMP0]], i64 0, i64 40 // CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [40 x %struct.S], ptr [[ARR4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr [[STRUCT_S:%.*]], ptr [[ARRAY_BEGIN]], i64 40 // CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP6]] diff --git a/clang/test/OpenMP/for_reduction_task_codegen.cpp b/clang/test/OpenMP/for_reduction_task_codegen.cpp index ea93323de77d0..b875279c2a144 100644 --- a/clang/test/OpenMP/for_reduction_task_codegen.cpp +++ b/clang/test/OpenMP/for_reduction_task_codegen.cpp @@ -68,16 +68,16 @@ int main(int argc, char **argv) { // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK1-NEXT: store i32 0, ptr [[ARGC1]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 0 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP2]], i64 0 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 -// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 0 +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP3]], i64 0 // CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 // CHECK1-NEXT: [[TMP5:%.*]] = sext i32 [[TMP4]] to i64 // CHECK1-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP5]] // CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds ptr, ptr [[TMP6]], i64 9 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP6]], i64 9 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[ARRAYIDX3]], align 8 -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i64 [[LB_ADD_LEN]] +// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP7]], i64 [[LB_ADD_LEN]] // CHECK1-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[ARRAYIDX4]] to i64 // CHECK1-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[ARRAYIDX2]] to i64 // CHECK1-NEXT: [[TMP10:%.*]] = sub i64 [[TMP8]], [[TMP9]] @@ -107,7 +107,7 @@ int main(int argc, char **argv) { // CHECK1-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP21]] // CHECK1-NEXT: store ptr [[_TMP6]], ptr [[_TMP5]], align 8 // CHECK1-NEXT: store ptr [[TMP22]], ptr [[_TMP6]], align 8 -// CHECK1-NEXT: [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 0 +// CHECK1-NEXT: [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds nuw [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 0 // CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP23]], align 8 // CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1 @@ -122,19 +122,19 @@ int main(int argc, char **argv) { // CHECK1-NEXT: store ptr @.red_comb., ptr [[TMP28]], align 8 // CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6 // CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP29]], i8 0, i64 4, i1 false) -// CHECK1-NEXT: [[DOTRD_INPUT_GEP_7:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1 +// CHECK1-NEXT: [[DOTRD_INPUT_GEP_7:%.*]] = getelementptr inbounds nuw [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1 // CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 0 // CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds ptr, ptr [[TMP31]], i64 0 +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP31]], i64 0 // CHECK1-NEXT: [[TMP32:%.*]] = load ptr, ptr [[ARRAYIDX8]], align 8 -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i8, ptr [[TMP32]], i64 0 +// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP32]], i64 0 // CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP0]], align 4 // CHECK1-NEXT: [[TMP34:%.*]] = sext i32 [[TMP33]] to i64 // CHECK1-NEXT: [[LB_ADD_LEN10:%.*]] = add nsw i64 -1, [[TMP34]] // CHECK1-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds ptr, ptr [[TMP35]], i64 9 +// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP35]], i64 9 // CHECK1-NEXT: [[TMP36:%.*]] = load ptr, ptr [[ARRAYIDX11]], align 8 -// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds i8, ptr [[TMP36]], i64 [[LB_ADD_LEN10]] +// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP36]], i64 [[LB_ADD_LEN10]] // CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP30]], align 8 // CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 1 // CHECK1-NEXT: store ptr [[ARRAYIDX9]], ptr [[TMP37]], align 8 @@ -459,9 +459,9 @@ int main(int argc, char **argv) { // CHECK1-NEXT: [[LB_ADD_LEN_I:%.*]] = add nsw i64 -1, [[TMP24]] // CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2 // CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP25]], align 8 -// CHECK1-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds ptr, ptr [[TMP26]], i64 9 +// CHECK1-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP26]], i64 9 // CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[ARRAYIDX2_I]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3_I:%.*]] = getelementptr inbounds i8, ptr [[TMP27]], i64 [[LB_ADD_LEN_I]] +// CHECK1-NEXT: [[ARRAYIDX3_I:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP27]], i64 [[LB_ADD_LEN_I]] // CHECK1-NEXT: [[TMP28:%.*]] = ptrtoint ptr [[ARRAYIDX3_I]] to i64 // CHECK1-NEXT: [[TMP29:%.*]] = ptrtoint ptr [[TMP20]] to i64 // CHECK1-NEXT: [[TMP30:%.*]] = sub i64 [[TMP28]], [[TMP29]] diff --git a/clang/test/OpenMP/for_scan_codegen.cpp b/clang/test/OpenMP/for_scan_codegen.cpp index 4cf18a76fbfef..61e6534db471e 100644 --- a/clang/test/OpenMP/for_scan_codegen.cpp +++ b/clang/test/OpenMP/for_scan_codegen.cpp @@ -39,13 +39,13 @@ void baz(int n) { // CHECK: [[BASE_IDX_I:%.+]] = load i32, ptr [[IV_ADDR:%.+]], // CHECK: [[BASE_IDX:%.+]] = zext i32 [[BASE_IDX_I]] to i64 // CHECK: [[IDX:%.+]] = mul nsw i64 [[BASE_IDX]], [[NUM_ELEMS]] - // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds float, ptr [[A_BUF]], i64 [[IDX]] - // CHECK: [[A_PRIV:%.+]] = getelementptr inbounds [10 x float], ptr [[A_PRIV_ADDR:%.+]], i64 0, i64 0 + // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds nuw float, ptr [[A_BUF]], i64 [[IDX]] + // CHECK: [[A_PRIV:%.+]] = getelementptr inbounds nuw [10 x float], ptr [[A_PRIV_ADDR:%.+]], i64 0, i64 0 // CHECK: [[BYTES:%.+]] = mul nuw i64 [[NUM_ELEMS:%.+]], 4 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr {{.*}}[[A_BUF_IDX]], ptr {{.*}}[[A_PRIV]], i64 [[BYTES]], i1 false) // b_buffer[i] = b_priv; - // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds double, ptr [[B_BUF]], i64 [[BASE_IDX]] + // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds nuw double, ptr [[B_BUF]], i64 [[BASE_IDX]] // CHECK: [[B_PRIV:%.+]] = load double, ptr [[B_PRIV_ADDR]], // CHECK: store double [[B_PRIV]], ptr [[B_BUF_IDX]], // CHECK: br label %[[LOOP_CONTINUE:.+]] @@ -72,13 +72,13 @@ void baz(int n) { // a_buffer[i] += a_buffer[i-pow(2, k)]; // CHECK: [[IDX:%.+]] = mul nsw i64 [[I]], [[NUM_ELEMS]] - // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds float, ptr [[A_BUF]], i64 [[IDX]] + // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds nuw float, ptr [[A_BUF]], i64 [[IDX]] // CHECK: [[IDX_SUB_K2POW:%.+]] = sub nuw i64 [[I]], [[K2POW]] // CHECK: [[IDX:%.+]] = mul nsw i64 [[IDX_SUB_K2POW]], [[NUM_ELEMS]] - // CHECK: [[A_BUF_IDX_SUB_K2POW:%.+]] = getelementptr inbounds float, ptr [[A_BUF]], i64 [[IDX]] - // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds double, ptr [[B_BUF]], i64 [[I]] + // CHECK: [[A_BUF_IDX_SUB_K2POW:%.+]] = getelementptr inbounds nuw float, ptr [[A_BUF]], i64 [[IDX]] + // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds nuw double, ptr [[B_BUF]], i64 [[I]] // CHECK: [[IDX_SUB_K2POW:%.+]] = sub nuw i64 [[I]], [[K2POW]] - // CHECK: [[B_BUF_IDX_SUB_K2POW:%.+]] = getelementptr inbounds double, ptr [[B_BUF]], i64 [[IDX_SUB_K2POW]] + // CHECK: [[B_BUF_IDX_SUB_K2POW:%.+]] = getelementptr inbounds nuw double, ptr [[B_BUF]], i64 [[IDX_SUB_K2POW]] // CHECK: [[A_BUF_END:%.+]] = getelementptr float, ptr [[A_BUF_IDX]], i64 [[NUM_ELEMS]] // CHECK: [[ISEMPTY:%.+]] = icmp eq ptr [[A_BUF_IDX]], [[A_BUF_END]] // CHECK: br i1 [[ISEMPTY]], label %[[RED_DONE:[^,]+]], label %[[RED_BODY:[^,]+]] @@ -132,13 +132,13 @@ void baz(int n) { // CHECK: [[BASE_IDX_I:%.+]] = load i32, ptr [[IV_ADDR:%.+]], // CHECK: [[BASE_IDX:%.+]] = zext i32 [[BASE_IDX_I]] to i64 // CHECK: [[IDX:%.+]] = mul nsw i64 [[BASE_IDX]], [[NUM_ELEMS]] - // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds float, ptr [[A_BUF]], i64 [[IDX]] - // CHECK: [[A_PRIV:%.+]] = getelementptr inbounds [10 x float], ptr [[A_PRIV_ADDR:%.+]], i64 0, i64 0 + // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds nuw float, ptr [[A_BUF]], i64 [[IDX]] + // CHECK: [[A_PRIV:%.+]] = getelementptr inbounds nuw [10 x float], ptr [[A_PRIV_ADDR:%.+]], i64 0, i64 0 // CHECK: [[BYTES:%.+]] = mul nuw i64 [[NUM_ELEMS:%.+]], 4 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr {{.*}}[[A_PRIV]], ptr {{.*}}[[A_BUF_IDX]], i64 [[BYTES]], i1 false) // b_priv = b_buffer[i]; - // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds double, ptr [[B_BUF]], i64 [[BASE_IDX]] + // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds nuw double, ptr [[B_BUF]], i64 [[BASE_IDX]] // CHECK: [[B_BUF_IDX_VAL:%.+]] = load double, ptr [[B_BUF_IDX]], // CHECK: store double [[B_BUF_IDX_VAL]], ptr [[B_PRIV_ADDR]], // CHECK: br label %[[SCAN_PHASE:[^,]+]] @@ -179,13 +179,13 @@ void baz(int n) { // CHECK: [[BASE_IDX_I:%.+]] = load i32, ptr [[IV_ADDR:%.+]], // CHECK: [[BASE_IDX:%.+]] = zext i32 [[BASE_IDX_I]] to i64 // CHECK: [[IDX:%.+]] = mul nsw i64 [[BASE_IDX]], [[NUM_ELEMS]] - // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds float, ptr [[A_BUF]], i64 [[IDX]] - // CHECK: [[A_PRIV:%.+]] = getelementptr inbounds [10 x float], ptr [[A_PRIV_ADDR:%.+]], i64 0, i64 0 + // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds nuw float, ptr [[A_BUF]], i64 [[IDX]] + // CHECK: [[A_PRIV:%.+]] = getelementptr inbounds nuw [10 x float], ptr [[A_PRIV_ADDR:%.+]], i64 0, i64 0 // CHECK: [[BYTES:%.+]] = mul nuw i64 [[NUM_ELEMS:%.+]], 4 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr {{.*}}[[A_BUF_IDX]], ptr {{.*}}[[A_PRIV]], i64 [[BYTES]], i1 false) // b_buffer[i] = b_priv; - // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds double, ptr [[B_BUF]], i64 [[BASE_IDX]] + // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds nuw double, ptr [[B_BUF]], i64 [[BASE_IDX]] // CHECK: [[B_PRIV:%.+]] = load double, ptr [[B_PRIV_ADDR]], // CHECK: store double [[B_PRIV]], ptr [[B_BUF_IDX]], // CHECK: br label %[[LOOP_CONTINUE:[^,]+]] @@ -217,13 +217,13 @@ void baz(int n) { // a_buffer[i] += a_buffer[i-pow(2, k)]; // CHECK: [[IDX:%.+]] = mul nsw i64 [[I]], [[NUM_ELEMS]] - // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds float, ptr [[A_BUF]], i64 [[IDX]] + // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds nuw float, ptr [[A_BUF]], i64 [[IDX]] // CHECK: [[IDX_SUB_K2POW:%.+]] = sub nuw i64 [[I]], [[K2POW]] // CHECK: [[IDX:%.+]] = mul nsw i64 [[IDX_SUB_K2POW]], [[NUM_ELEMS]] - // CHECK: [[A_BUF_IDX_SUB_K2POW:%.+]] = getelementptr inbounds float, ptr [[A_BUF]], i64 [[IDX]] - // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds double, ptr [[B_BUF]], i64 [[I]] + // CHECK: [[A_BUF_IDX_SUB_K2POW:%.+]] = getelementptr inbounds nuw float, ptr [[A_BUF]], i64 [[IDX]] + // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds nuw double, ptr [[B_BUF]], i64 [[I]] // CHECK: [[IDX_SUB_K2POW:%.+]] = sub nuw i64 [[I]], [[K2POW]] - // CHECK: [[B_BUF_IDX_SUB_K2POW:%.+]] = getelementptr inbounds double, ptr [[B_BUF]], i64 [[IDX_SUB_K2POW]] + // CHECK: [[B_BUF_IDX_SUB_K2POW:%.+]] = getelementptr inbounds nuw double, ptr [[B_BUF]], i64 [[IDX_SUB_K2POW]] // CHECK: [[A_BUF_END:%.+]] = getelementptr float, ptr [[A_BUF_IDX]], i64 [[NUM_ELEMS]] // CHECK: [[ISEMPTY:%.+]] = icmp eq ptr [[A_BUF_IDX]], [[A_BUF_END]] // CHECK: br i1 [[ISEMPTY]], label %[[RED_DONE:[^,]+]], label %[[RED_BODY:[^,]+]] @@ -280,13 +280,13 @@ void baz(int n) { // CHECK: [[IF_THEN]]: // CHECK: [[BASE_IDX_SUB_1:%.+]] = sub nuw i64 [[BASE_IDX]], 1 // CHECK: [[IDX:%.+]] = mul nsw i64 [[BASE_IDX_SUB_1]], [[NUM_ELEMS]] - // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds float, ptr [[A_BUF]], i64 [[IDX]] - // CHECK: [[A_PRIV:%.+]] = getelementptr inbounds [10 x float], ptr [[A_PRIV_ADDR:%.+]], i64 0, i64 0 + // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds nuw float, ptr [[A_BUF]], i64 [[IDX]] + // CHECK: [[A_PRIV:%.+]] = getelementptr inbounds nuw [10 x float], ptr [[A_PRIV_ADDR:%.+]], i64 0, i64 0 // CHECK: [[BYTES:%.+]] = mul nuw i64 [[NUM_ELEMS:%.+]], 4 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr {{.*}}[[A_PRIV]], ptr {{.*}}[[A_BUF_IDX]], i64 [[BYTES]], i1 false) // b_priv = b_buffer[i]; - // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds double, ptr [[B_BUF]], i64 [[BASE_IDX_SUB_1]] + // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds nuw double, ptr [[B_BUF]], i64 [[BASE_IDX_SUB_1]] // CHECK: [[B_BUF_IDX_VAL:%.+]] = load double, ptr [[B_BUF_IDX]], // CHECK: store double [[B_BUF_IDX_VAL]], ptr [[B_PRIV_ADDR]], // CHECK: br label %[[SCAN_PHASE]] diff --git a/clang/test/OpenMP/for_simd_scan_codegen.cpp b/clang/test/OpenMP/for_simd_scan_codegen.cpp index 29af5f74c5b5b..829f2656042fb 100644 --- a/clang/test/OpenMP/for_simd_scan_codegen.cpp +++ b/clang/test/OpenMP/for_simd_scan_codegen.cpp @@ -39,13 +39,13 @@ void baz(int n) { // CHECK: [[BASE_IDX_I:%.+]] = load i32, ptr [[IV_ADDR:%.+]], // CHECK: [[BASE_IDX:%.+]] = zext i32 [[BASE_IDX_I]] to i64 // CHECK: [[IDX:%.+]] = mul nsw i64 [[BASE_IDX]], [[NUM_ELEMS]] - // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds float, ptr [[A_BUF]], i64 [[IDX]] - // CHECK: [[A_PRIV:%.+]] = getelementptr inbounds [10 x float], ptr [[A_PRIV_ADDR:%.+]], i64 0, i64 0 + // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds nuw float, ptr [[A_BUF]], i64 [[IDX]] + // CHECK: [[A_PRIV:%.+]] = getelementptr inbounds nuw [10 x float], ptr [[A_PRIV_ADDR:%.+]], i64 0, i64 0 // CHECK: [[BYTES:%.+]] = mul nuw i64 [[NUM_ELEMS:%.+]], 4 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr {{.*}}[[A_BUF_IDX]], ptr {{.*}}[[A_PRIV]], i64 [[BYTES]], i1 false) // b_buffer[i] = b_priv; - // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds double, ptr [[B_BUF]], i64 [[BASE_IDX]] + // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds nuw double, ptr [[B_BUF]], i64 [[BASE_IDX]] // CHECK: [[B_PRIV:%.+]] = load double, ptr [[B_PRIV_ADDR]], // CHECK: store double [[B_PRIV]], ptr [[B_BUF_IDX]], // CHECK: br label %[[LOOP_CONTINUE:.+]] @@ -72,13 +72,13 @@ void baz(int n) { // a_buffer[i] += a_buffer[i-pow(2, k)]; // CHECK: [[IDX:%.+]] = mul nsw i64 [[I]], [[NUM_ELEMS]] - // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds float, ptr [[A_BUF]], i64 [[IDX]] + // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds nuw float, ptr [[A_BUF]], i64 [[IDX]] // CHECK: [[IDX_SUB_K2POW:%.+]] = sub nuw i64 [[I]], [[K2POW]] // CHECK: [[IDX:%.+]] = mul nsw i64 [[IDX_SUB_K2POW]], [[NUM_ELEMS]] - // CHECK: [[A_BUF_IDX_SUB_K2POW:%.+]] = getelementptr inbounds float, ptr [[A_BUF]], i64 [[IDX]] - // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds double, ptr [[B_BUF]], i64 [[I]] + // CHECK: [[A_BUF_IDX_SUB_K2POW:%.+]] = getelementptr inbounds nuw float, ptr [[A_BUF]], i64 [[IDX]] + // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds nuw double, ptr [[B_BUF]], i64 [[I]] // CHECK: [[IDX_SUB_K2POW:%.+]] = sub nuw i64 [[I]], [[K2POW]] - // CHECK: [[B_BUF_IDX_SUB_K2POW:%.+]] = getelementptr inbounds double, ptr [[B_BUF]], i64 [[IDX_SUB_K2POW]] + // CHECK: [[B_BUF_IDX_SUB_K2POW:%.+]] = getelementptr inbounds nuw double, ptr [[B_BUF]], i64 [[IDX_SUB_K2POW]] // CHECK: [[A_BUF_END:%.+]] = getelementptr float, ptr [[A_BUF_IDX]], i64 [[NUM_ELEMS]] // CHECK: [[ISEMPTY:%.+]] = icmp eq ptr [[A_BUF_IDX]], [[A_BUF_END]] // CHECK: br i1 [[ISEMPTY]], label %[[RED_DONE:[^,]+]], label %[[RED_BODY:[^,]+]] @@ -132,13 +132,13 @@ void baz(int n) { // CHECK: [[BASE_IDX_I:%.+]] = load i32, ptr [[IV_ADDR:%.+]], // CHECK: [[BASE_IDX:%.+]] = zext i32 [[BASE_IDX_I]] to i64 // CHECK: [[IDX:%.+]] = mul nsw i64 [[BASE_IDX]], [[NUM_ELEMS]] - // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds float, ptr [[A_BUF]], i64 [[IDX]] - // CHECK: [[A_PRIV:%.+]] = getelementptr inbounds [10 x float], ptr [[A_PRIV_ADDR:%.+]], i64 0, i64 0 + // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds nuw float, ptr [[A_BUF]], i64 [[IDX]] + // CHECK: [[A_PRIV:%.+]] = getelementptr inbounds nuw [10 x float], ptr [[A_PRIV_ADDR:%.+]], i64 0, i64 0 // CHECK: [[BYTES:%.+]] = mul nuw i64 [[NUM_ELEMS:%.+]], 4 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr {{.*}}[[A_PRIV]], ptr {{.*}}[[A_BUF_IDX]], i64 [[BYTES]], i1 false) // b_priv = b_buffer[i]; - // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds double, ptr [[B_BUF]], i64 [[BASE_IDX]] + // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds nuw double, ptr [[B_BUF]], i64 [[BASE_IDX]] // CHECK: [[B_BUF_IDX_VAL:%.+]] = load double, ptr [[B_BUF_IDX]], // CHECK: store double [[B_BUF_IDX_VAL]], ptr [[B_PRIV_ADDR]], // CHECK: br label %[[SCAN_PHASE:[^,]+]] @@ -179,13 +179,13 @@ void baz(int n) { // CHECK: [[BASE_IDX_I:%.+]] = load i32, ptr [[IV_ADDR:%.+]], // CHECK: [[BASE_IDX:%.+]] = zext i32 [[BASE_IDX_I]] to i64 // CHECK: [[IDX:%.+]] = mul nsw i64 [[BASE_IDX]], [[NUM_ELEMS]] - // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds float, ptr [[A_BUF]], i64 [[IDX]] - // CHECK: [[A_PRIV:%.+]] = getelementptr inbounds [10 x float], ptr [[A_PRIV_ADDR:%.+]], i64 0, i64 0 + // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds nuw float, ptr [[A_BUF]], i64 [[IDX]] + // CHECK: [[A_PRIV:%.+]] = getelementptr inbounds nuw [10 x float], ptr [[A_PRIV_ADDR:%.+]], i64 0, i64 0 // CHECK: [[BYTES:%.+]] = mul nuw i64 [[NUM_ELEMS:%.+]], 4 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr {{.*}}[[A_BUF_IDX]], ptr {{.*}}[[A_PRIV]], i64 [[BYTES]], i1 false) // b_buffer[i] = b_priv; - // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds double, ptr [[B_BUF]], i64 [[BASE_IDX]] + // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds nuw double, ptr [[B_BUF]], i64 [[BASE_IDX]] // CHECK: [[B_PRIV:%.+]] = load double, ptr [[B_PRIV_ADDR]], // CHECK: store double [[B_PRIV]], ptr [[B_BUF_IDX]], // CHECK: br label %[[LOOP_CONTINUE:[^,]+]] @@ -217,13 +217,13 @@ void baz(int n) { // a_buffer[i] += a_buffer[i-pow(2, k)]; // CHECK: [[IDX:%.+]] = mul nsw i64 [[I]], [[NUM_ELEMS]] - // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds float, ptr [[A_BUF]], i64 [[IDX]] + // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds nuw float, ptr [[A_BUF]], i64 [[IDX]] // CHECK: [[IDX_SUB_K2POW:%.+]] = sub nuw i64 [[I]], [[K2POW]] // CHECK: [[IDX:%.+]] = mul nsw i64 [[IDX_SUB_K2POW]], [[NUM_ELEMS]] - // CHECK: [[A_BUF_IDX_SUB_K2POW:%.+]] = getelementptr inbounds float, ptr [[A_BUF]], i64 [[IDX]] - // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds double, ptr [[B_BUF]], i64 [[I]] + // CHECK: [[A_BUF_IDX_SUB_K2POW:%.+]] = getelementptr inbounds nuw float, ptr [[A_BUF]], i64 [[IDX]] + // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds nuw double, ptr [[B_BUF]], i64 [[I]] // CHECK: [[IDX_SUB_K2POW:%.+]] = sub nuw i64 [[I]], [[K2POW]] - // CHECK: [[B_BUF_IDX_SUB_K2POW:%.+]] = getelementptr inbounds double, ptr [[B_BUF]], i64 [[IDX_SUB_K2POW]] + // CHECK: [[B_BUF_IDX_SUB_K2POW:%.+]] = getelementptr inbounds nuw double, ptr [[B_BUF]], i64 [[IDX_SUB_K2POW]] // CHECK: [[A_BUF_END:%.+]] = getelementptr float, ptr [[A_BUF_IDX]], i64 [[NUM_ELEMS]] // CHECK: [[ISEMPTY:%.+]] = icmp eq ptr [[A_BUF_IDX]], [[A_BUF_END]] // CHECK: br i1 [[ISEMPTY]], label %[[RED_DONE:[^,]+]], label %[[RED_BODY:[^,]+]] @@ -280,13 +280,13 @@ void baz(int n) { // CHECK: [[IF_THEN]]: // CHECK: [[BASE_IDX_SUB_1:%.+]] = sub nuw i64 [[BASE_IDX]], 1 // CHECK: [[IDX:%.+]] = mul nsw i64 [[BASE_IDX_SUB_1]], [[NUM_ELEMS]] - // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds float, ptr [[A_BUF]], i64 [[IDX]] - // CHECK: [[A_PRIV:%.+]] = getelementptr inbounds [10 x float], ptr [[A_PRIV_ADDR:%.+]], i64 0, i64 0 + // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds nuw float, ptr [[A_BUF]], i64 [[IDX]] + // CHECK: [[A_PRIV:%.+]] = getelementptr inbounds nuw [10 x float], ptr [[A_PRIV_ADDR:%.+]], i64 0, i64 0 // CHECK: [[BYTES:%.+]] = mul nuw i64 [[NUM_ELEMS:%.+]], 4 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr {{.*}}[[A_PRIV]], ptr {{.*}}[[A_BUF_IDX]], i64 [[BYTES]], i1 false) // b_priv = b_buffer[i]; - // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds double, ptr [[B_BUF]], i64 [[BASE_IDX_SUB_1]] + // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds nuw double, ptr [[B_BUF]], i64 [[BASE_IDX_SUB_1]] // CHECK: [[B_BUF_IDX_VAL:%.+]] = load double, ptr [[B_BUF_IDX]], // CHECK: store double [[B_BUF_IDX_VAL]], ptr [[B_PRIV_ADDR]], // CHECK: br label %[[SCAN_PHASE]] diff --git a/clang/test/OpenMP/irbuilder_for_iterator.cpp b/clang/test/OpenMP/irbuilder_for_iterator.cpp index 0098a7db575c3..ec1c3af744b49 100644 --- a/clang/test/OpenMP/irbuilder_for_iterator.cpp +++ b/clang/test/OpenMP/irbuilder_for_iterator.cpp @@ -78,18 +78,18 @@ extern "C" void workshareloop_iterator(float *a, float *b, float *c) { // CHECK-NEXT: [[TMP7:%.*]] = load ptr, ptr [[B_ADDR]], align 8 // CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[I]], align 4 // CHECK-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i64 [[IDXPROM]] +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP7]], i64 [[IDXPROM]] // CHECK-NEXT: [[TMP9:%.*]] = load float, ptr [[ARRAYIDX]], align 4 // CHECK-NEXT: [[TMP10:%.*]] = load ptr, ptr [[C_ADDR]], align 8 // CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 // CHECK-NEXT: [[IDXPROM2:%.*]] = zext i32 [[TMP11]] to i64 -// CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP10]], i64 [[IDXPROM2]] +// CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw float, ptr [[TMP10]], i64 [[IDXPROM2]] // CHECK-NEXT: [[TMP12:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 // CHECK-NEXT: [[MUL:%.*]] = fmul float [[TMP9]], [[TMP12]] // CHECK-NEXT: [[TMP13:%.*]] = load ptr, ptr [[A_ADDR]], align 8 // CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4 // CHECK-NEXT: [[IDXPROM4:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i64 [[IDXPROM4]] +// CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds nuw float, ptr [[TMP13]], i64 [[IDXPROM4]] // CHECK-NEXT: store float [[MUL]], ptr [[ARRAYIDX5]], align 4 // CHECK-NEXT: br label [[OMP_LOOP_INC]] // CHECK: omp_loop.inc: diff --git a/clang/test/OpenMP/irbuilder_for_rangefor.cpp b/clang/test/OpenMP/irbuilder_for_rangefor.cpp index 45b34621afbb9..86a043e638bc3 100644 --- a/clang/test/OpenMP/irbuilder_for_rangefor.cpp +++ b/clang/test/OpenMP/irbuilder_for_rangefor.cpp @@ -94,18 +94,18 @@ extern "C" void workshareloop_rangefor(float *a, float *b, float *c) { // CHECK-NEXT: [[TMP10:%.*]] = load ptr, ptr [[B_ADDR]], align 8 // CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 // CHECK-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP11]] to i64 -// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP10]], i64 [[IDXPROM]] +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK-NEXT: [[TMP12:%.*]] = load float, ptr [[ARRAYIDX]], align 4 // CHECK-NEXT: [[TMP13:%.*]] = load ptr, ptr [[C_ADDR]], align 8 // CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4 // CHECK-NEXT: [[IDXPROM2:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i64 [[IDXPROM2]] +// CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw float, ptr [[TMP13]], i64 [[IDXPROM2]] // CHECK-NEXT: [[TMP15:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 // CHECK-NEXT: [[MUL:%.*]] = fmul float [[TMP12]], [[TMP15]] // CHECK-NEXT: [[TMP16:%.*]] = load ptr, ptr [[A_ADDR]], align 8 // CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 // CHECK-NEXT: [[IDXPROM4:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[IDXPROM4]] +// CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds nuw float, ptr [[TMP16]], i64 [[IDXPROM4]] // CHECK-NEXT: store float [[MUL]], ptr [[ARRAYIDX5]], align 4 // CHECK-NEXT: br label [[OMP_LOOP_INC]] // CHECK: omp_loop.inc: diff --git a/clang/test/OpenMP/irbuilder_for_unsigned.c b/clang/test/OpenMP/irbuilder_for_unsigned.c index b0043b823ac85..675871a87b3bd 100644 --- a/clang/test/OpenMP/irbuilder_for_unsigned.c +++ b/clang/test/OpenMP/irbuilder_for_unsigned.c @@ -65,24 +65,24 @@ extern "C" void workshareloop_unsigned(float *a, float *b, float *c, float *d) { // CHECK-NEXT: [[TMP9:%.*]] = load ptr, ptr [[B_ADDR]], align 8 // CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4 // CHECK-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP9]], i64 [[IDXPROM]] +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP9]], i64 [[IDXPROM]] // CHECK-NEXT: [[TMP11:%.*]] = load float, ptr [[ARRAYIDX]], align 4 // CHECK-NEXT: [[TMP12:%.*]] = load ptr, ptr [[C_ADDR]], align 8 // CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 // CHECK-NEXT: [[IDXPROM2:%.*]] = zext i32 [[TMP13]] to i64 -// CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i64 [[IDXPROM2]] +// CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw float, ptr [[TMP12]], i64 [[IDXPROM2]] // CHECK-NEXT: [[TMP14:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 // CHECK-NEXT: [[MUL:%.*]] = fmul float [[TMP11]], [[TMP14]] // CHECK-NEXT: [[TMP15:%.*]] = load ptr, ptr [[D_ADDR]], align 8 // CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 // CHECK-NEXT: [[IDXPROM4:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[TMP15]], i64 [[IDXPROM4]] +// CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds nuw float, ptr [[TMP15]], i64 [[IDXPROM4]] // CHECK-NEXT: [[TMP17:%.*]] = load float, ptr [[ARRAYIDX5]], align 4 // CHECK-NEXT: [[MUL6:%.*]] = fmul float [[MUL]], [[TMP17]] // CHECK-NEXT: [[TMP18:%.*]] = load ptr, ptr [[A_ADDR]], align 8 // CHECK-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 // CHECK-NEXT: [[IDXPROM7:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i64 [[IDXPROM7]] +// CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds nuw float, ptr [[TMP18]], i64 [[IDXPROM7]] // CHECK-NEXT: store float [[MUL6]], ptr [[ARRAYIDX8]], align 4 // CHECK-NEXT: br label [[OMP_LOOP_INC]] // CHECK: omp_loop.inc: diff --git a/clang/test/OpenMP/irbuilder_for_unsigned_auto.c b/clang/test/OpenMP/irbuilder_for_unsigned_auto.c index 19b2770bfa2df..39ede3ef971d0 100644 --- a/clang/test/OpenMP/irbuilder_for_unsigned_auto.c +++ b/clang/test/OpenMP/irbuilder_for_unsigned_auto.c @@ -66,24 +66,24 @@ extern "C" void workshareloop_unsigned_auto(float *a, float *b, float *c, float // CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8 // CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4 // CHECK-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP4]] to i64 -// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i64 [[IDXPROM]] +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP3]], i64 [[IDXPROM]] // CHECK-NEXT: [[TMP5:%.*]] = load float, ptr [[ARRAYIDX]], align 4 // CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[C_ADDR]], align 8 // CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[I]], align 4 // CHECK-NEXT: [[IDXPROM2:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 [[IDXPROM2]] +// CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw float, ptr [[TMP6]], i64 [[IDXPROM2]] // CHECK-NEXT: [[TMP8:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 // CHECK-NEXT: [[MUL:%.*]] = fmul float [[TMP5]], [[TMP8]] // CHECK-NEXT: [[TMP9:%.*]] = load ptr, ptr [[D_ADDR]], align 8 // CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4 // CHECK-NEXT: [[IDXPROM4:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[TMP9]], i64 [[IDXPROM4]] +// CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds nuw float, ptr [[TMP9]], i64 [[IDXPROM4]] // CHECK-NEXT: [[TMP11:%.*]] = load float, ptr [[ARRAYIDX5]], align 4 // CHECK-NEXT: [[MUL6:%.*]] = fmul float [[MUL]], [[TMP11]] // CHECK-NEXT: [[TMP12:%.*]] = load ptr, ptr [[A_ADDR]], align 8 // CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 // CHECK-NEXT: [[IDXPROM7:%.*]] = zext i32 [[TMP13]] to i64 -// CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i64 [[IDXPROM7]] +// CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds nuw float, ptr [[TMP12]], i64 [[IDXPROM7]] // CHECK-NEXT: store float [[MUL6]], ptr [[ARRAYIDX8]], align 4 // CHECK-NEXT: br label [[OMP_LOOP_INC]] // CHECK: omp_loop.inc: diff --git a/clang/test/OpenMP/irbuilder_for_unsigned_down.c b/clang/test/OpenMP/irbuilder_for_unsigned_down.c index 6e179826a6efa..5515f086c34a7 100644 --- a/clang/test/OpenMP/irbuilder_for_unsigned_down.c +++ b/clang/test/OpenMP/irbuilder_for_unsigned_down.c @@ -67,7 +67,7 @@ extern "C" void workshareloop_unsigned_down(float *a) { // CHECK-NEXT: [[TMP10:%.*]] = load ptr, ptr [[A_ADDR]], align 8 // CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 // CHECK-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP11]] to i64 -// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP10]], i64 [[IDXPROM]] +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK-NEXT: store float [[CONV]], ptr [[ARRAYIDX]], align 4 // CHECK-NEXT: br label [[OMP_LOOP_INC]] // CHECK: omp_loop.inc: diff --git a/clang/test/OpenMP/irbuilder_for_unsigned_dynamic.c b/clang/test/OpenMP/irbuilder_for_unsigned_dynamic.c index 8f3297061938b..f20b60e608d2f 100644 --- a/clang/test/OpenMP/irbuilder_for_unsigned_dynamic.c +++ b/clang/test/OpenMP/irbuilder_for_unsigned_dynamic.c @@ -66,24 +66,24 @@ extern "C" void workshareloop_unsigned_dynamic(float *a, float *b, float *c, flo // CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8 // CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4 // CHECK-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP4]] to i64 -// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i64 [[IDXPROM]] +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP3]], i64 [[IDXPROM]] // CHECK-NEXT: [[TMP5:%.*]] = load float, ptr [[ARRAYIDX]], align 4 // CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[C_ADDR]], align 8 // CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[I]], align 4 // CHECK-NEXT: [[IDXPROM2:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 [[IDXPROM2]] +// CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw float, ptr [[TMP6]], i64 [[IDXPROM2]] // CHECK-NEXT: [[TMP8:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 // CHECK-NEXT: [[MUL:%.*]] = fmul float [[TMP5]], [[TMP8]] // CHECK-NEXT: [[TMP9:%.*]] = load ptr, ptr [[D_ADDR]], align 8 // CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4 // CHECK-NEXT: [[IDXPROM4:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[TMP9]], i64 [[IDXPROM4]] +// CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds nuw float, ptr [[TMP9]], i64 [[IDXPROM4]] // CHECK-NEXT: [[TMP11:%.*]] = load float, ptr [[ARRAYIDX5]], align 4 // CHECK-NEXT: [[MUL6:%.*]] = fmul float [[MUL]], [[TMP11]] // CHECK-NEXT: [[TMP12:%.*]] = load ptr, ptr [[A_ADDR]], align 8 // CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 // CHECK-NEXT: [[IDXPROM7:%.*]] = zext i32 [[TMP13]] to i64 -// CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i64 [[IDXPROM7]] +// CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds nuw float, ptr [[TMP12]], i64 [[IDXPROM7]] // CHECK-NEXT: store float [[MUL6]], ptr [[ARRAYIDX8]], align 4 // CHECK-NEXT: br label [[OMP_LOOP_INC]] // CHECK: omp_loop.inc: diff --git a/clang/test/OpenMP/irbuilder_for_unsigned_dynamic_chunked.c b/clang/test/OpenMP/irbuilder_for_unsigned_dynamic_chunked.c index c2b0948bf7aeb..599f256243b11 100644 --- a/clang/test/OpenMP/irbuilder_for_unsigned_dynamic_chunked.c +++ b/clang/test/OpenMP/irbuilder_for_unsigned_dynamic_chunked.c @@ -66,24 +66,24 @@ extern "C" void workshareloop_unsigned_dynamic_chunked(float *a, float *b, float // CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8 // CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4 // CHECK-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP4]] to i64 -// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i64 [[IDXPROM]] +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP3]], i64 [[IDXPROM]] // CHECK-NEXT: [[TMP5:%.*]] = load float, ptr [[ARRAYIDX]], align 4 // CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[C_ADDR]], align 8 // CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[I]], align 4 // CHECK-NEXT: [[IDXPROM2:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 [[IDXPROM2]] +// CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw float, ptr [[TMP6]], i64 [[IDXPROM2]] // CHECK-NEXT: [[TMP8:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 // CHECK-NEXT: [[MUL:%.*]] = fmul float [[TMP5]], [[TMP8]] // CHECK-NEXT: [[TMP9:%.*]] = load ptr, ptr [[D_ADDR]], align 8 // CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4 // CHECK-NEXT: [[IDXPROM4:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[TMP9]], i64 [[IDXPROM4]] +// CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds nuw float, ptr [[TMP9]], i64 [[IDXPROM4]] // CHECK-NEXT: [[TMP11:%.*]] = load float, ptr [[ARRAYIDX5]], align 4 // CHECK-NEXT: [[MUL6:%.*]] = fmul float [[MUL]], [[TMP11]] // CHECK-NEXT: [[TMP12:%.*]] = load ptr, ptr [[A_ADDR]], align 8 // CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 // CHECK-NEXT: [[IDXPROM7:%.*]] = zext i32 [[TMP13]] to i64 -// CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i64 [[IDXPROM7]] +// CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds nuw float, ptr [[TMP12]], i64 [[IDXPROM7]] // CHECK-NEXT: store float [[MUL6]], ptr [[ARRAYIDX8]], align 4 // CHECK-NEXT: br label [[OMP_LOOP_INC]] // CHECK: omp_loop.inc: diff --git a/clang/test/OpenMP/irbuilder_for_unsigned_runtime.c b/clang/test/OpenMP/irbuilder_for_unsigned_runtime.c index 68becf9f694ad..c27bcba155910 100644 --- a/clang/test/OpenMP/irbuilder_for_unsigned_runtime.c +++ b/clang/test/OpenMP/irbuilder_for_unsigned_runtime.c @@ -66,24 +66,24 @@ extern "C" void workshareloop_unsigned_runtime(float *a, float *b, float *c, flo // CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8 // CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4 // CHECK-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP4]] to i64 -// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i64 [[IDXPROM]] +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP3]], i64 [[IDXPROM]] // CHECK-NEXT: [[TMP5:%.*]] = load float, ptr [[ARRAYIDX]], align 4 // CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[C_ADDR]], align 8 // CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[I]], align 4 // CHECK-NEXT: [[IDXPROM2:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 [[IDXPROM2]] +// CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw float, ptr [[TMP6]], i64 [[IDXPROM2]] // CHECK-NEXT: [[TMP8:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 // CHECK-NEXT: [[MUL:%.*]] = fmul float [[TMP5]], [[TMP8]] // CHECK-NEXT: [[TMP9:%.*]] = load ptr, ptr [[D_ADDR]], align 8 // CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4 // CHECK-NEXT: [[IDXPROM4:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[TMP9]], i64 [[IDXPROM4]] +// CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds nuw float, ptr [[TMP9]], i64 [[IDXPROM4]] // CHECK-NEXT: [[TMP11:%.*]] = load float, ptr [[ARRAYIDX5]], align 4 // CHECK-NEXT: [[MUL6:%.*]] = fmul float [[MUL]], [[TMP11]] // CHECK-NEXT: [[TMP12:%.*]] = load ptr, ptr [[A_ADDR]], align 8 // CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 // CHECK-NEXT: [[IDXPROM7:%.*]] = zext i32 [[TMP13]] to i64 -// CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i64 [[IDXPROM7]] +// CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds nuw float, ptr [[TMP12]], i64 [[IDXPROM7]] // CHECK-NEXT: store float [[MUL6]], ptr [[ARRAYIDX8]], align 4 // CHECK-NEXT: br label [[OMP_LOOP_INC]] // CHECK: omp_loop.inc: diff --git a/clang/test/OpenMP/irbuilder_for_unsigned_static_chunked.c b/clang/test/OpenMP/irbuilder_for_unsigned_static_chunked.c index 71fb6b5473da8..b937568ca9f11 100644 --- a/clang/test/OpenMP/irbuilder_for_unsigned_static_chunked.c +++ b/clang/test/OpenMP/irbuilder_for_unsigned_static_chunked.c @@ -108,24 +108,24 @@ extern "C" void workshareloop_unsigned_static_chunked(float *a, float *b, float // CHECK-NEXT: [[TMP17:%.*]] = load ptr, ptr [[B_ADDR]], align 8 // CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 // CHECK-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[IDXPROM]] +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP17]], i64 [[IDXPROM]] // CHECK-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX]], align 4 // CHECK-NEXT: [[TMP20:%.*]] = load ptr, ptr [[C_ADDR]], align 8 // CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 // CHECK-NEXT: [[IDXPROM2:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[IDXPROM2]] +// CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw float, ptr [[TMP20]], i64 [[IDXPROM2]] // CHECK-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 // CHECK-NEXT: [[MUL:%.*]] = fmul float [[TMP19]], [[TMP22]] // CHECK-NEXT: [[TMP23:%.*]] = load ptr, ptr [[D_ADDR]], align 8 // CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4 // CHECK-NEXT: [[IDXPROM4:%.*]] = zext i32 [[TMP24]] to i64 -// CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i64 [[IDXPROM4]] +// CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds nuw float, ptr [[TMP23]], i64 [[IDXPROM4]] // CHECK-NEXT: [[TMP25:%.*]] = load float, ptr [[ARRAYIDX5]], align 4 // CHECK-NEXT: [[MUL6:%.*]] = fmul float [[MUL]], [[TMP25]] // CHECK-NEXT: [[TMP26:%.*]] = load ptr, ptr [[A_ADDR]], align 8 // CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4 // CHECK-NEXT: [[IDXPROM7:%.*]] = zext i32 [[TMP27]] to i64 -// CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds float, ptr [[TMP26]], i64 [[IDXPROM7]] +// CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds nuw float, ptr [[TMP26]], i64 [[IDXPROM7]] // CHECK-NEXT: store float [[MUL6]], ptr [[ARRAYIDX8]], align 4 // CHECK-NEXT: br label [[OMP_LOOP_INC]] // CHECK: omp_loop.inc: diff --git a/clang/test/OpenMP/map_struct_ordering.cpp b/clang/test/OpenMP/map_struct_ordering.cpp index d5b22d8ff2a4d..a52ddad465f37 100644 --- a/clang/test/OpenMP/map_struct_ordering.cpp +++ b/clang/test/OpenMP/map_struct_ordering.cpp @@ -57,7 +57,7 @@ int map_struct() { // CHECK-NEXT: [[DATUM:%.*]] = getelementptr inbounds nuw [[STRUCT_DESCRIPTOR]], ptr [[DAT]], i32 0, i32 0 // CHECK-NEXT: [[DATUM2:%.*]] = getelementptr inbounds nuw [[STRUCT_DESCRIPTOR]], ptr [[DAT]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DATUM2]], align 8 -// CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 0 +// CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP0]], i64 0 // CHECK-NEXT: [[TMP1:%.*]] = getelementptr [[STRUCT_DESCRIPTOR]], ptr [[DAT]], i32 1 // CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[TMP1]] to i64 // CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[DAT]] to i64 diff --git a/clang/test/OpenMP/master_taskloop_in_reduction_codegen.cpp b/clang/test/OpenMP/master_taskloop_in_reduction_codegen.cpp index e90f0783787c0..7d467293d0c8f 100644 --- a/clang/test/OpenMP/master_taskloop_in_reduction_codegen.cpp +++ b/clang/test/OpenMP/master_taskloop_in_reduction_codegen.cpp @@ -76,7 +76,7 @@ int main(int argc, char **argv) { // CHECK1-NEXT: [[VLA:%.*]] = alloca i16, i64 [[TMP2]], align 16 // CHECK1-NEXT: store i64 [[TMP2]], ptr [[__VLA_EXPR0]], align 8 // CHECK1-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP0]]) -// CHECK1-NEXT: [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds [3 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 0 +// CHECK1-NEXT: [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds nuw [3 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 0 // CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[A]], ptr [[TMP4]], align 8 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1 @@ -91,7 +91,7 @@ int main(int argc, char **argv) { // CHECK1-NEXT: store ptr @.red_comb., ptr [[TMP9]], align 8 // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6 // CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP10]], i8 0, i64 4, i1 false) -// CHECK1-NEXT: [[DOTRD_INPUT_GEP_1:%.*]] = getelementptr inbounds [3 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1 +// CHECK1-NEXT: [[DOTRD_INPUT_GEP_1:%.*]] = getelementptr inbounds nuw [3 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1 // CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_1]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[B]], ptr [[TMP11]], align 8 // CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_1]], i32 0, i32 1 @@ -106,7 +106,7 @@ int main(int argc, char **argv) { // CHECK1-NEXT: store ptr @.red_comb..2, ptr [[TMP16]], align 8 // CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_1]], i32 0, i32 6 // CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP17]], i8 0, i64 4, i1 false) -// CHECK1-NEXT: [[DOTRD_INPUT_GEP_2:%.*]] = getelementptr inbounds [3 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 2 +// CHECK1-NEXT: [[DOTRD_INPUT_GEP_2:%.*]] = getelementptr inbounds nuw [3 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 2 // CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_2]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[ARGC_ADDR]], ptr [[TMP18]], align 8 // CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_2]], i32 0, i32 1 @@ -124,7 +124,7 @@ int main(int argc, char **argv) { // CHECK1-NEXT: [[TMP25:%.*]] = call ptr @__kmpc_taskred_init(i32 [[TMP0]], i32 3, ptr [[DOTRD_INPUT_]]) // CHECK1-NEXT: store ptr [[TMP25]], ptr [[DOTTASK_RED_]], align 8 // CHECK1-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP0]]) -// CHECK1-NEXT: [[DOTRD_INPUT_GEP_4:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t.0], ptr [[DOTRD_INPUT_3]], i64 0, i64 0 +// CHECK1-NEXT: [[DOTRD_INPUT_GEP_4:%.*]] = getelementptr inbounds nuw [2 x %struct.kmp_taskred_input_t.0], ptr [[DOTRD_INPUT_3]], i64 0, i64 0 // CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T_0:%.*]], ptr [[DOTRD_INPUT_GEP_4]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[C]], ptr [[TMP26]], align 8 // CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_4]], i32 0, i32 1 @@ -139,7 +139,7 @@ int main(int argc, char **argv) { // CHECK1-NEXT: store ptr @.red_comb..6, ptr [[TMP31]], align 8 // CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_4]], i32 0, i32 6 // CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP32]], i8 0, i64 4, i1 false) -// CHECK1-NEXT: [[DOTRD_INPUT_GEP_5:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t.0], ptr [[DOTRD_INPUT_3]], i64 0, i64 1 +// CHECK1-NEXT: [[DOTRD_INPUT_GEP_5:%.*]] = getelementptr inbounds nuw [2 x %struct.kmp_taskred_input_t.0], ptr [[DOTRD_INPUT_3]], i64 0, i64 1 // CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP33]], align 8 // CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 1 diff --git a/clang/test/OpenMP/master_taskloop_reduction_codegen.cpp b/clang/test/OpenMP/master_taskloop_reduction_codegen.cpp index 180ff3a94d24c..b0652c843845c 100644 --- a/clang/test/OpenMP/master_taskloop_reduction_codegen.cpp +++ b/clang/test/OpenMP/master_taskloop_reduction_codegen.cpp @@ -84,9 +84,9 @@ sum = 0.0; // CHECK-DAG: store ptr @[[RED_COMB1:.+]], ptr [[TMP25]], // CHECK-DAG: [[TMP26:%.*]] = getelementptr inbounds nuw %struct.kmp_taskred_input_t, ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6 // CHECK-DAG: call void @llvm.memset.p0.i64(ptr align 8 [[TMP26]], i8 0, i64 4, i1 false) -// CHECK-DAG: [[ARRAYIDX5:%.*]] = getelementptr inbounds [100 x %struct.S], ptr [[C]], i64 0, i64 0 +// CHECK-DAG: [[ARRAYIDX5:%.*]] = getelementptr inbounds nuw [100 x %struct.S], ptr [[C]], i64 0, i64 0 // CHECK-DAG: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, % -// CHECK-DAG: [[ARRAYIDX6:%.*]] = getelementptr inbounds [100 x %struct.S], ptr [[C]], i64 0, i64 [[LB_ADD_LEN]] +// CHECK-DAG: [[ARRAYIDX6:%.*]] = getelementptr inbounds nuw [100 x %struct.S], ptr [[C]], i64 0, i64 [[LB_ADD_LEN]] // CHECK-DAG: store ptr [[ARRAYIDX5]], ptr [[TMP28:%[^,]+]], // CHECK-DAG: [[TMP28]] = getelementptr inbounds nuw %struct.kmp_taskred_input_t, ptr [[DOTRD_INPUT_GEP_4:%.+]], i32 0, i32 0 // CHECK-DAG: store ptr [[ARRAYIDX5]], ptr [[TMP28:%[^,]+]], @@ -138,10 +138,10 @@ sum = 0.0; // CHECK-DAG: store ptr @[[RED_COMB4:.+]], ptr [[TMP59]], // CHECK-DAG: [[TMP60:%.*]] = getelementptr inbounds nuw %struct.kmp_taskred_input_t, ptr [[DOTRD_INPUT_GEP_8]], i32 0, i32 6 // CHECK-DAG: store i32 1, ptr [[TMP60]], -// CHECK-DAG: [[DOTRD_INPUT_GEP_]] = getelementptr inbounds [4 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 -// CHECK-DAG: [[DOTRD_INPUT_GEP_4]] = getelementptr inbounds [4 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 -// CHECK-DAG: [[DOTRD_INPUT_GEP_7]] = getelementptr inbounds [4 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 -// CHECK-DAG: [[DOTRD_INPUT_GEP_8]] = getelementptr inbounds [4 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 +// CHECK-DAG: [[DOTRD_INPUT_GEP_]] = getelementptr inbounds nuw [4 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 +// CHECK-DAG: [[DOTRD_INPUT_GEP_4]] = getelementptr inbounds nuw [4 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 +// CHECK-DAG: [[DOTRD_INPUT_GEP_7]] = getelementptr inbounds nuw [4 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 +// CHECK-DAG: [[DOTRD_INPUT_GEP_8]] = getelementptr inbounds nuw [4 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 // CHECK: [[TMP62:%.*]] = call ptr @__kmpc_taskred_init(i32 [[TMP0]], i32 4, ptr [[DOTRD_INPUT_]]) // CHECK: [[TMP63:%.*]] = load i32, ptr [[N]], // CHECK: store i32 [[TMP63]], ptr [[DOTCAPTURE_EXPR_]], diff --git a/clang/test/OpenMP/master_taskloop_simd_in_reduction_codegen.cpp b/clang/test/OpenMP/master_taskloop_simd_in_reduction_codegen.cpp index 2061da7c4d781..b0d00c5f539b1 100644 --- a/clang/test/OpenMP/master_taskloop_simd_in_reduction_codegen.cpp +++ b/clang/test/OpenMP/master_taskloop_simd_in_reduction_codegen.cpp @@ -76,7 +76,7 @@ int main(int argc, char **argv) { // CHECK1-NEXT: [[VLA:%.*]] = alloca i16, i64 [[TMP2]], align 16 // CHECK1-NEXT: store i64 [[TMP2]], ptr [[__VLA_EXPR0]], align 8 // CHECK1-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP0]]) -// CHECK1-NEXT: [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds [3 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 0 +// CHECK1-NEXT: [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds nuw [3 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 0 // CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[A]], ptr [[TMP4]], align 8 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1 @@ -91,7 +91,7 @@ int main(int argc, char **argv) { // CHECK1-NEXT: store ptr @.red_comb., ptr [[TMP9]], align 8 // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6 // CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP10]], i8 0, i64 4, i1 false) -// CHECK1-NEXT: [[DOTRD_INPUT_GEP_1:%.*]] = getelementptr inbounds [3 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1 +// CHECK1-NEXT: [[DOTRD_INPUT_GEP_1:%.*]] = getelementptr inbounds nuw [3 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1 // CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_1]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[B]], ptr [[TMP11]], align 8 // CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_1]], i32 0, i32 1 @@ -106,7 +106,7 @@ int main(int argc, char **argv) { // CHECK1-NEXT: store ptr @.red_comb..2, ptr [[TMP16]], align 8 // CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_1]], i32 0, i32 6 // CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP17]], i8 0, i64 4, i1 false) -// CHECK1-NEXT: [[DOTRD_INPUT_GEP_2:%.*]] = getelementptr inbounds [3 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 2 +// CHECK1-NEXT: [[DOTRD_INPUT_GEP_2:%.*]] = getelementptr inbounds nuw [3 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 2 // CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_2]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[ARGC_ADDR]], ptr [[TMP18]], align 8 // CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_2]], i32 0, i32 1 @@ -124,7 +124,7 @@ int main(int argc, char **argv) { // CHECK1-NEXT: [[TMP25:%.*]] = call ptr @__kmpc_taskred_init(i32 [[TMP0]], i32 3, ptr [[DOTRD_INPUT_]]) // CHECK1-NEXT: store ptr [[TMP25]], ptr [[DOTTASK_RED_]], align 8 // CHECK1-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP0]]) -// CHECK1-NEXT: [[DOTRD_INPUT_GEP_4:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t.0], ptr [[DOTRD_INPUT_3]], i64 0, i64 0 +// CHECK1-NEXT: [[DOTRD_INPUT_GEP_4:%.*]] = getelementptr inbounds nuw [2 x %struct.kmp_taskred_input_t.0], ptr [[DOTRD_INPUT_3]], i64 0, i64 0 // CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T_0:%.*]], ptr [[DOTRD_INPUT_GEP_4]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[C]], ptr [[TMP26]], align 8 // CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_4]], i32 0, i32 1 @@ -139,7 +139,7 @@ int main(int argc, char **argv) { // CHECK1-NEXT: store ptr @.red_comb..6, ptr [[TMP31]], align 8 // CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_4]], i32 0, i32 6 // CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP32]], i8 0, i64 4, i1 false) -// CHECK1-NEXT: [[DOTRD_INPUT_GEP_5:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t.0], ptr [[DOTRD_INPUT_3]], i64 0, i64 1 +// CHECK1-NEXT: [[DOTRD_INPUT_GEP_5:%.*]] = getelementptr inbounds nuw [2 x %struct.kmp_taskred_input_t.0], ptr [[DOTRD_INPUT_3]], i64 0, i64 1 // CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP33]], align 8 // CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 1 diff --git a/clang/test/OpenMP/master_taskloop_simd_reduction_codegen.cpp b/clang/test/OpenMP/master_taskloop_simd_reduction_codegen.cpp index a69844dc4dee2..7def61251b24e 100644 --- a/clang/test/OpenMP/master_taskloop_simd_reduction_codegen.cpp +++ b/clang/test/OpenMP/master_taskloop_simd_reduction_codegen.cpp @@ -80,9 +80,9 @@ sum = 0.0; // CHECK-DAG: store ptr @[[RED_COMB1:.+]], ptr [[TMP25]], // CHECK-DAG: [[TMP26:%.*]] = getelementptr inbounds nuw %struct.kmp_taskred_input_t, ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6 // CHECK-DAG: call void @llvm.memset.p0.i64(ptr align 8 [[TMP26]], i8 0, i64 4, i1 false) -// CHECK-DAG: [[ARRAYIDX5:%.*]] = getelementptr inbounds [100 x %struct.S], ptr [[C]], i64 0, i64 0 +// CHECK-DAG: [[ARRAYIDX5:%.*]] = getelementptr inbounds nuw [100 x %struct.S], ptr [[C]], i64 0, i64 0 // CHECK-DAG: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, % -// CHECK-DAG: [[ARRAYIDX6:%.*]] = getelementptr inbounds [100 x %struct.S], ptr [[C]], i64 0, i64 [[LB_ADD_LEN]] +// CHECK-DAG: [[ARRAYIDX6:%.*]] = getelementptr inbounds nuw [100 x %struct.S], ptr [[C]], i64 0, i64 [[LB_ADD_LEN]] // CHECK-DAG: store ptr [[ARRAYIDX5]], ptr [[TMP28:%[^,]+]], // CHECK-DAG: [[TMP28]] = getelementptr inbounds nuw %struct.kmp_taskred_input_t, ptr [[DOTRD_INPUT_GEP_4:%.+]], i32 0, i32 0 // CHECK-DAG: store ptr [[ARRAYIDX5]], ptr [[TMP28:%[^,]+]], @@ -134,10 +134,10 @@ sum = 0.0; // CHECK-DAG: store ptr @[[RED_COMB4:.+]], ptr [[TMP59]], // CHECK-DAG: [[TMP60:%.*]] = getelementptr inbounds nuw %struct.kmp_taskred_input_t, ptr [[DOTRD_INPUT_GEP_8]], i32 0, i32 6 // CHECK-DAG: store i32 1, ptr [[TMP60]], -// CHECK-DAG: [[DOTRD_INPUT_GEP_]] = getelementptr inbounds [4 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 -// CHECK-DAG: [[DOTRD_INPUT_GEP_4]] = getelementptr inbounds [4 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 -// CHECK-DAG: [[DOTRD_INPUT_GEP_7]] = getelementptr inbounds [4 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 -// CHECK-DAG: [[DOTRD_INPUT_GEP_8]] = getelementptr inbounds [4 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 +// CHECK-DAG: [[DOTRD_INPUT_GEP_]] = getelementptr inbounds nuw [4 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 +// CHECK-DAG: [[DOTRD_INPUT_GEP_4]] = getelementptr inbounds nuw [4 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 +// CHECK-DAG: [[DOTRD_INPUT_GEP_7]] = getelementptr inbounds nuw [4 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 +// CHECK-DAG: [[DOTRD_INPUT_GEP_8]] = getelementptr inbounds nuw [4 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 // CHECK: [[TMP62:%.*]] = call ptr @__kmpc_taskred_init(i32 [[TMP0]], i32 4, ptr [[DOTRD_INPUT_]]) // CHECK: [[TMP63:%.*]] = load i32, ptr [[N]], // CHECK: store i32 [[TMP63]], ptr [[DOTCAPTURE_EXPR_]], diff --git a/clang/test/OpenMP/ordered_codegen.cpp b/clang/test/OpenMP/ordered_codegen.cpp index 0a73eaefc9808..67285cfaef34d 100644 --- a/clang/test/OpenMP/ordered_codegen.cpp +++ b/clang/test/OpenMP/ordered_codegen.cpp @@ -255,21 +255,21 @@ void foo_simd(int low, int up) { // CHECK1-NEXT: call void @__kmpc_ordered(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[B_ADDR]], align 8 // CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[I]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 [[TMP7]] +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP6]], i64 [[TMP7]] // CHECK1-NEXT: [[TMP8:%.*]] = load float, ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[C_ADDR]], align 8 // CHECK1-NEXT: [[TMP10:%.*]] = load i64, ptr [[I]], align 8 -// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP9]], i64 [[TMP10]] +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw float, ptr [[TMP9]], i64 [[TMP10]] // CHECK1-NEXT: [[TMP11:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 // CHECK1-NEXT: [[MUL3:%.*]] = fmul float [[TMP8]], [[TMP11]] // CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[D_ADDR]], align 8 // CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[I]], align 8 -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i64 [[TMP13]] +// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw float, ptr [[TMP12]], i64 [[TMP13]] // CHECK1-NEXT: [[TMP14:%.*]] = load float, ptr [[ARRAYIDX4]], align 4 // CHECK1-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP14]] // CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[A_ADDR]], align 8 // CHECK1-NEXT: [[TMP16:%.*]] = load i64, ptr [[I]], align 8 -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP15]], i64 [[TMP16]] +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds nuw float, ptr [[TMP15]], i64 [[TMP16]] // CHECK1-NEXT: store float [[MUL5]], ptr [[ARRAYIDX6]], align 4 // CHECK1-NEXT: call void @__kmpc_end_ordered(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] @@ -485,24 +485,24 @@ void foo_simd(int low, int up) { // CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[B_ADDR]], align 8 // CHECK1-NEXT: [[TMP9:%.*]] = load i8, ptr [[I]], align 1 // CHECK1-NEXT: [[IDXPROM:%.*]] = zext i8 [[TMP9]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP8]], i64 [[IDXPROM]] // CHECK1-NEXT: [[TMP10:%.*]] = load float, ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[C_ADDR]], align 8 // CHECK1-NEXT: [[TMP12:%.*]] = load i8, ptr [[I]], align 1 // CHECK1-NEXT: [[IDXPROM7:%.*]] = zext i8 [[TMP12]] to i64 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds float, ptr [[TMP11]], i64 [[IDXPROM7]] +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds nuw float, ptr [[TMP11]], i64 [[IDXPROM7]] // CHECK1-NEXT: [[TMP13:%.*]] = load float, ptr [[ARRAYIDX8]], align 4 // CHECK1-NEXT: [[MUL9:%.*]] = fmul float [[TMP10]], [[TMP13]] // CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[D_ADDR]], align 8 // CHECK1-NEXT: [[TMP15:%.*]] = load i8, ptr [[I]], align 1 // CHECK1-NEXT: [[IDXPROM10:%.*]] = zext i8 [[TMP15]] to i64 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[IDXPROM10]] +// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds nuw float, ptr [[TMP14]], i64 [[IDXPROM10]] // CHECK1-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX11]], align 4 // CHECK1-NEXT: [[MUL12:%.*]] = fmul float [[MUL9]], [[TMP16]] // CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[A_ADDR]], align 8 // CHECK1-NEXT: [[TMP18:%.*]] = load i8, ptr [[I]], align 1 // CHECK1-NEXT: [[IDXPROM13:%.*]] = zext i8 [[TMP18]] to i64 -// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[IDXPROM13]] +// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds nuw float, ptr [[TMP17]], i64 [[IDXPROM13]] // CHECK1-NEXT: store float [[MUL12]], ptr [[ARRAYIDX14]], align 4 // CHECK1-NEXT: call void @__kmpc_end_ordered(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] @@ -866,21 +866,21 @@ void foo_simd(int low, int up) { // CHECK1-IRBUILDER-NEXT: call void @__kmpc_ordered(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) // CHECK1-IRBUILDER-NEXT: [[TMP5:%.*]] = load ptr, ptr [[B_ADDR]], align 8 // CHECK1-IRBUILDER-NEXT: [[TMP6:%.*]] = load i64, ptr [[I]], align 8 -// CHECK1-IRBUILDER-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i64 [[TMP6]] +// CHECK1-IRBUILDER-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP5]], i64 [[TMP6]] // CHECK1-IRBUILDER-NEXT: [[TMP7:%.*]] = load float, ptr [[ARRAYIDX]], align 4 // CHECK1-IRBUILDER-NEXT: [[TMP8:%.*]] = load ptr, ptr [[C_ADDR]], align 8 // CHECK1-IRBUILDER-NEXT: [[TMP9:%.*]] = load i64, ptr [[I]], align 8 -// CHECK1-IRBUILDER-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i64 [[TMP9]] +// CHECK1-IRBUILDER-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw float, ptr [[TMP8]], i64 [[TMP9]] // CHECK1-IRBUILDER-NEXT: [[TMP10:%.*]] = load float, ptr [[ARRAYIDX4]], align 4 // CHECK1-IRBUILDER-NEXT: [[MUL5:%.*]] = fmul float [[TMP7]], [[TMP10]] // CHECK1-IRBUILDER-NEXT: [[TMP11:%.*]] = load ptr, ptr [[D_ADDR]], align 8 // CHECK1-IRBUILDER-NEXT: [[TMP12:%.*]] = load i64, ptr [[I]], align 8 -// CHECK1-IRBUILDER-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP11]], i64 [[TMP12]] +// CHECK1-IRBUILDER-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds nuw float, ptr [[TMP11]], i64 [[TMP12]] // CHECK1-IRBUILDER-NEXT: [[TMP13:%.*]] = load float, ptr [[ARRAYIDX6]], align 4 // CHECK1-IRBUILDER-NEXT: [[MUL7:%.*]] = fmul float [[MUL5]], [[TMP13]] // CHECK1-IRBUILDER-NEXT: [[TMP14:%.*]] = load ptr, ptr [[A_ADDR]], align 8 // CHECK1-IRBUILDER-NEXT: [[TMP15:%.*]] = load i64, ptr [[I]], align 8 -// CHECK1-IRBUILDER-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[TMP15]] +// CHECK1-IRBUILDER-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds nuw float, ptr [[TMP14]], i64 [[TMP15]] // CHECK1-IRBUILDER-NEXT: store float [[MUL7]], ptr [[ARRAYIDX8]], align 4 // CHECK1-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_BODY_ORDERED_AFTER:%.*]] // CHECK1-IRBUILDER: omp.inner.for.body.ordered.after: @@ -1110,24 +1110,24 @@ void foo_simd(int low, int up) { // CHECK1-IRBUILDER-NEXT: [[TMP7:%.*]] = load ptr, ptr [[B_ADDR]], align 8 // CHECK1-IRBUILDER-NEXT: [[TMP8:%.*]] = load i8, ptr [[I]], align 1 // CHECK1-IRBUILDER-NEXT: [[IDXPROM:%.*]] = zext i8 [[TMP8]] to i64 -// CHECK1-IRBUILDER-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i64 [[IDXPROM]] +// CHECK1-IRBUILDER-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP7]], i64 [[IDXPROM]] // CHECK1-IRBUILDER-NEXT: [[TMP9:%.*]] = load float, ptr [[ARRAYIDX]], align 4 // CHECK1-IRBUILDER-NEXT: [[TMP10:%.*]] = load ptr, ptr [[C_ADDR]], align 8 // CHECK1-IRBUILDER-NEXT: [[TMP11:%.*]] = load i8, ptr [[I]], align 1 // CHECK1-IRBUILDER-NEXT: [[IDXPROM9:%.*]] = zext i8 [[TMP11]] to i64 -// CHECK1-IRBUILDER-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP10]], i64 [[IDXPROM9]] +// CHECK1-IRBUILDER-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds nuw float, ptr [[TMP10]], i64 [[IDXPROM9]] // CHECK1-IRBUILDER-NEXT: [[TMP12:%.*]] = load float, ptr [[ARRAYIDX10]], align 4 // CHECK1-IRBUILDER-NEXT: [[MUL11:%.*]] = fmul float [[TMP9]], [[TMP12]] // CHECK1-IRBUILDER-NEXT: [[TMP13:%.*]] = load ptr, ptr [[D_ADDR]], align 8 // CHECK1-IRBUILDER-NEXT: [[TMP14:%.*]] = load i8, ptr [[I]], align 1 // CHECK1-IRBUILDER-NEXT: [[IDXPROM12:%.*]] = zext i8 [[TMP14]] to i64 -// CHECK1-IRBUILDER-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i64 [[IDXPROM12]] +// CHECK1-IRBUILDER-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds nuw float, ptr [[TMP13]], i64 [[IDXPROM12]] // CHECK1-IRBUILDER-NEXT: [[TMP15:%.*]] = load float, ptr [[ARRAYIDX13]], align 4 // CHECK1-IRBUILDER-NEXT: [[MUL14:%.*]] = fmul float [[MUL11]], [[TMP15]] // CHECK1-IRBUILDER-NEXT: [[TMP16:%.*]] = load ptr, ptr [[A_ADDR]], align 8 // CHECK1-IRBUILDER-NEXT: [[TMP17:%.*]] = load i8, ptr [[I]], align 1 // CHECK1-IRBUILDER-NEXT: [[IDXPROM15:%.*]] = zext i8 [[TMP17]] to i64 -// CHECK1-IRBUILDER-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[IDXPROM15]] +// CHECK1-IRBUILDER-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds nuw float, ptr [[TMP16]], i64 [[IDXPROM15]] // CHECK1-IRBUILDER-NEXT: store float [[MUL14]], ptr [[ARRAYIDX16]], align 4 // CHECK1-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_BODY_ORDERED_AFTER:%.*]] // CHECK1-IRBUILDER: omp.inner.for.body.ordered.after: @@ -1495,21 +1495,21 @@ void foo_simd(int low, int up) { // CHECK3-NEXT: call void @__kmpc_ordered(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[B_ADDR]], align 8 // CHECK3-NEXT: [[TMP7:%.*]] = load i64, ptr [[I]], align 8 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 [[TMP7]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP6]], i64 [[TMP7]] // CHECK3-NEXT: [[TMP8:%.*]] = load float, ptr [[ARRAYIDX]], align 4 // CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[C_ADDR]], align 8 // CHECK3-NEXT: [[TMP10:%.*]] = load i64, ptr [[I]], align 8 -// CHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP9]], i64 [[TMP10]] +// CHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw float, ptr [[TMP9]], i64 [[TMP10]] // CHECK3-NEXT: [[TMP11:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 // CHECK3-NEXT: [[MUL3:%.*]] = fmul float [[TMP8]], [[TMP11]] // CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[D_ADDR]], align 8 // CHECK3-NEXT: [[TMP13:%.*]] = load i64, ptr [[I]], align 8 -// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i64 [[TMP13]] +// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw float, ptr [[TMP12]], i64 [[TMP13]] // CHECK3-NEXT: [[TMP14:%.*]] = load float, ptr [[ARRAYIDX4]], align 4 // CHECK3-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP14]] // CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[A_ADDR]], align 8 // CHECK3-NEXT: [[TMP16:%.*]] = load i64, ptr [[I]], align 8 -// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP15]], i64 [[TMP16]] +// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds nuw float, ptr [[TMP15]], i64 [[TMP16]] // CHECK3-NEXT: store float [[MUL5]], ptr [[ARRAYIDX6]], align 4 // CHECK3-NEXT: call void @__kmpc_end_ordered(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] @@ -1725,24 +1725,24 @@ void foo_simd(int low, int up) { // CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[B_ADDR]], align 8 // CHECK3-NEXT: [[TMP9:%.*]] = load i8, ptr [[I]], align 1 // CHECK3-NEXT: [[IDXPROM:%.*]] = zext i8 [[TMP9]] to i64 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i64 [[IDXPROM]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP8]], i64 [[IDXPROM]] // CHECK3-NEXT: [[TMP10:%.*]] = load float, ptr [[ARRAYIDX]], align 4 // CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[C_ADDR]], align 8 // CHECK3-NEXT: [[TMP12:%.*]] = load i8, ptr [[I]], align 1 // CHECK3-NEXT: [[IDXPROM7:%.*]] = zext i8 [[TMP12]] to i64 -// CHECK3-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds float, ptr [[TMP11]], i64 [[IDXPROM7]] +// CHECK3-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds nuw float, ptr [[TMP11]], i64 [[IDXPROM7]] // CHECK3-NEXT: [[TMP13:%.*]] = load float, ptr [[ARRAYIDX8]], align 4 // CHECK3-NEXT: [[MUL9:%.*]] = fmul float [[TMP10]], [[TMP13]] // CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[D_ADDR]], align 8 // CHECK3-NEXT: [[TMP15:%.*]] = load i8, ptr [[I]], align 1 // CHECK3-NEXT: [[IDXPROM10:%.*]] = zext i8 [[TMP15]] to i64 -// CHECK3-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[IDXPROM10]] +// CHECK3-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds nuw float, ptr [[TMP14]], i64 [[IDXPROM10]] // CHECK3-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX11]], align 4 // CHECK3-NEXT: [[MUL12:%.*]] = fmul float [[MUL9]], [[TMP16]] // CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[A_ADDR]], align 8 // CHECK3-NEXT: [[TMP18:%.*]] = load i8, ptr [[I]], align 1 // CHECK3-NEXT: [[IDXPROM13:%.*]] = zext i8 [[TMP18]] to i64 -// CHECK3-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[IDXPROM13]] +// CHECK3-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds nuw float, ptr [[TMP17]], i64 [[IDXPROM13]] // CHECK3-NEXT: store float [[MUL12]], ptr [[ARRAYIDX14]], align 4 // CHECK3-NEXT: call void @__kmpc_end_ordered(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] @@ -2106,21 +2106,21 @@ void foo_simd(int low, int up) { // CHECK3-IRBUILDER-NEXT: call void @__kmpc_ordered(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) // CHECK3-IRBUILDER-NEXT: [[TMP5:%.*]] = load ptr, ptr [[B_ADDR]], align 8 // CHECK3-IRBUILDER-NEXT: [[TMP6:%.*]] = load i64, ptr [[I]], align 8 -// CHECK3-IRBUILDER-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i64 [[TMP6]] +// CHECK3-IRBUILDER-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP5]], i64 [[TMP6]] // CHECK3-IRBUILDER-NEXT: [[TMP7:%.*]] = load float, ptr [[ARRAYIDX]], align 4 // CHECK3-IRBUILDER-NEXT: [[TMP8:%.*]] = load ptr, ptr [[C_ADDR]], align 8 // CHECK3-IRBUILDER-NEXT: [[TMP9:%.*]] = load i64, ptr [[I]], align 8 -// CHECK3-IRBUILDER-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i64 [[TMP9]] +// CHECK3-IRBUILDER-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw float, ptr [[TMP8]], i64 [[TMP9]] // CHECK3-IRBUILDER-NEXT: [[TMP10:%.*]] = load float, ptr [[ARRAYIDX4]], align 4 // CHECK3-IRBUILDER-NEXT: [[MUL5:%.*]] = fmul float [[TMP7]], [[TMP10]] // CHECK3-IRBUILDER-NEXT: [[TMP11:%.*]] = load ptr, ptr [[D_ADDR]], align 8 // CHECK3-IRBUILDER-NEXT: [[TMP12:%.*]] = load i64, ptr [[I]], align 8 -// CHECK3-IRBUILDER-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP11]], i64 [[TMP12]] +// CHECK3-IRBUILDER-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds nuw float, ptr [[TMP11]], i64 [[TMP12]] // CHECK3-IRBUILDER-NEXT: [[TMP13:%.*]] = load float, ptr [[ARRAYIDX6]], align 4 // CHECK3-IRBUILDER-NEXT: [[MUL7:%.*]] = fmul float [[MUL5]], [[TMP13]] // CHECK3-IRBUILDER-NEXT: [[TMP14:%.*]] = load ptr, ptr [[A_ADDR]], align 8 // CHECK3-IRBUILDER-NEXT: [[TMP15:%.*]] = load i64, ptr [[I]], align 8 -// CHECK3-IRBUILDER-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[TMP15]] +// CHECK3-IRBUILDER-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds nuw float, ptr [[TMP14]], i64 [[TMP15]] // CHECK3-IRBUILDER-NEXT: store float [[MUL7]], ptr [[ARRAYIDX8]], align 4 // CHECK3-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_BODY_ORDERED_AFTER:%.*]] // CHECK3-IRBUILDER: omp.inner.for.body.ordered.after: @@ -2350,24 +2350,24 @@ void foo_simd(int low, int up) { // CHECK3-IRBUILDER-NEXT: [[TMP7:%.*]] = load ptr, ptr [[B_ADDR]], align 8 // CHECK3-IRBUILDER-NEXT: [[TMP8:%.*]] = load i8, ptr [[I]], align 1 // CHECK3-IRBUILDER-NEXT: [[IDXPROM:%.*]] = zext i8 [[TMP8]] to i64 -// CHECK3-IRBUILDER-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i64 [[IDXPROM]] +// CHECK3-IRBUILDER-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP7]], i64 [[IDXPROM]] // CHECK3-IRBUILDER-NEXT: [[TMP9:%.*]] = load float, ptr [[ARRAYIDX]], align 4 // CHECK3-IRBUILDER-NEXT: [[TMP10:%.*]] = load ptr, ptr [[C_ADDR]], align 8 // CHECK3-IRBUILDER-NEXT: [[TMP11:%.*]] = load i8, ptr [[I]], align 1 // CHECK3-IRBUILDER-NEXT: [[IDXPROM9:%.*]] = zext i8 [[TMP11]] to i64 -// CHECK3-IRBUILDER-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP10]], i64 [[IDXPROM9]] +// CHECK3-IRBUILDER-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds nuw float, ptr [[TMP10]], i64 [[IDXPROM9]] // CHECK3-IRBUILDER-NEXT: [[TMP12:%.*]] = load float, ptr [[ARRAYIDX10]], align 4 // CHECK3-IRBUILDER-NEXT: [[MUL11:%.*]] = fmul float [[TMP9]], [[TMP12]] // CHECK3-IRBUILDER-NEXT: [[TMP13:%.*]] = load ptr, ptr [[D_ADDR]], align 8 // CHECK3-IRBUILDER-NEXT: [[TMP14:%.*]] = load i8, ptr [[I]], align 1 // CHECK3-IRBUILDER-NEXT: [[IDXPROM12:%.*]] = zext i8 [[TMP14]] to i64 -// CHECK3-IRBUILDER-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i64 [[IDXPROM12]] +// CHECK3-IRBUILDER-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds nuw float, ptr [[TMP13]], i64 [[IDXPROM12]] // CHECK3-IRBUILDER-NEXT: [[TMP15:%.*]] = load float, ptr [[ARRAYIDX13]], align 4 // CHECK3-IRBUILDER-NEXT: [[MUL14:%.*]] = fmul float [[MUL11]], [[TMP15]] // CHECK3-IRBUILDER-NEXT: [[TMP16:%.*]] = load ptr, ptr [[A_ADDR]], align 8 // CHECK3-IRBUILDER-NEXT: [[TMP17:%.*]] = load i8, ptr [[I]], align 1 // CHECK3-IRBUILDER-NEXT: [[IDXPROM15:%.*]] = zext i8 [[TMP17]] to i64 -// CHECK3-IRBUILDER-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[IDXPROM15]] +// CHECK3-IRBUILDER-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds nuw float, ptr [[TMP16]], i64 [[IDXPROM15]] // CHECK3-IRBUILDER-NEXT: store float [[MUL14]], ptr [[ARRAYIDX16]], align 4 // CHECK3-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_BODY_ORDERED_AFTER:%.*]] // CHECK3-IRBUILDER: omp.inner.for.body.ordered.after: @@ -2674,21 +2674,21 @@ void foo_simd(int low, int up) { // CHECK5: for.body: // CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8 // CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[I]], align 8 -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 [[TMP2]] +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP1]], i64 [[TMP2]] // CHECK5-NEXT: [[TMP3:%.*]] = load float, ptr [[ARRAYIDX]], align 4 // CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 8 // CHECK5-NEXT: [[TMP5:%.*]] = load i64, ptr [[I]], align 8 -// CHECK5-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i64 [[TMP5]] +// CHECK5-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw float, ptr [[TMP4]], i64 [[TMP5]] // CHECK5-NEXT: [[TMP6:%.*]] = load float, ptr [[ARRAYIDX1]], align 4 // CHECK5-NEXT: [[MUL:%.*]] = fmul float [[TMP3]], [[TMP6]] // CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8 // CHECK5-NEXT: [[TMP8:%.*]] = load i64, ptr [[I]], align 8 -// CHECK5-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i64 [[TMP8]] +// CHECK5-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw float, ptr [[TMP7]], i64 [[TMP8]] // CHECK5-NEXT: [[TMP9:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 // CHECK5-NEXT: [[MUL3:%.*]] = fmul float [[MUL]], [[TMP9]] // CHECK5-NEXT: [[TMP10:%.*]] = load ptr, ptr [[A_ADDR]], align 8 // CHECK5-NEXT: [[TMP11:%.*]] = load i64, ptr [[I]], align 8 -// CHECK5-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP10]], i64 [[TMP11]] +// CHECK5-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw float, ptr [[TMP10]], i64 [[TMP11]] // CHECK5-NEXT: store float [[MUL3]], ptr [[ARRAYIDX4]], align 4 // CHECK5-NEXT: br label [[FOR_INC:%.*]] // CHECK5: for.inc: @@ -2804,24 +2804,24 @@ void foo_simd(int low, int up) { // CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 // CHECK5-NEXT: [[TMP3:%.*]] = load i8, ptr [[I]], align 1 // CHECK5-NEXT: [[IDXPROM:%.*]] = zext i8 [[TMP3]] to i64 -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 [[IDXPROM]] +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP2]], i64 [[IDXPROM]] // CHECK5-NEXT: [[TMP4:%.*]] = load float, ptr [[ARRAYIDX]], align 4 // CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[C_ADDR]], align 8 // CHECK5-NEXT: [[TMP6:%.*]] = load i8, ptr [[I]], align 1 // CHECK5-NEXT: [[IDXPROM4:%.*]] = zext i8 [[TMP6]] to i64 -// CHECK5-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i64 [[IDXPROM4]] +// CHECK5-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds nuw float, ptr [[TMP5]], i64 [[IDXPROM4]] // CHECK5-NEXT: [[TMP7:%.*]] = load float, ptr [[ARRAYIDX5]], align 4 // CHECK5-NEXT: [[MUL:%.*]] = fmul float [[TMP4]], [[TMP7]] // CHECK5-NEXT: [[TMP8:%.*]] = load ptr, ptr [[D_ADDR]], align 8 // CHECK5-NEXT: [[TMP9:%.*]] = load i8, ptr [[I]], align 1 // CHECK5-NEXT: [[IDXPROM6:%.*]] = zext i8 [[TMP9]] to i64 -// CHECK5-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i64 [[IDXPROM6]] +// CHECK5-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds nuw float, ptr [[TMP8]], i64 [[IDXPROM6]] // CHECK5-NEXT: [[TMP10:%.*]] = load float, ptr [[ARRAYIDX7]], align 4 // CHECK5-NEXT: [[MUL8:%.*]] = fmul float [[MUL]], [[TMP10]] // CHECK5-NEXT: [[TMP11:%.*]] = load ptr, ptr [[A_ADDR]], align 8 // CHECK5-NEXT: [[TMP12:%.*]] = load i8, ptr [[I]], align 1 // CHECK5-NEXT: [[IDXPROM9:%.*]] = zext i8 [[TMP12]] to i64 -// CHECK5-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP11]], i64 [[IDXPROM9]] +// CHECK5-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds nuw float, ptr [[TMP11]], i64 [[IDXPROM9]] // CHECK5-NEXT: store float [[MUL8]], ptr [[ARRAYIDX10]], align 4 // CHECK5-NEXT: br label [[FOR_INC:%.*]] // CHECK5: for.inc: diff --git a/clang/test/OpenMP/parallel_for_codegen.cpp b/clang/test/OpenMP/parallel_for_codegen.cpp index 2dec32d71b91a..c7afae419509b 100644 --- a/clang/test/OpenMP/parallel_for_codegen.cpp +++ b/clang/test/OpenMP/parallel_for_codegen.cpp @@ -665,24 +665,24 @@ void range_for_collapsed() { // CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4 // CHECK1-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP14]], i64 [[IDXPROM]] // CHECK1-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP2]], align 8 // CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 // CHECK1-NEXT: [[IDXPROM3:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[IDXPROM3]] +// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw float, ptr [[TMP17]], i64 [[IDXPROM3]] // CHECK1-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX4]], align 4 // CHECK1-NEXT: [[MUL5:%.*]] = fmul float [[TMP16]], [[TMP19]] // CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 // CHECK1-NEXT: [[IDXPROM6:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[IDXPROM6]] +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds nuw float, ptr [[TMP20]], i64 [[IDXPROM6]] // CHECK1-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX7]], align 4 // CHECK1-NEXT: [[MUL8:%.*]] = fmul float [[MUL5]], [[TMP22]] // CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP0]], align 8 // CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4 // CHECK1-NEXT: [[IDXPROM9:%.*]] = zext i32 [[TMP24]] to i64 -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i64 [[IDXPROM9]] +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds nuw float, ptr [[TMP23]], i64 [[IDXPROM9]] // CHECK1-NEXT: store float [[MUL8]], ptr [[ARRAYIDX10]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: @@ -779,21 +779,21 @@ void range_for_collapsed() { // CHECK1-NEXT: store i64 [[ADD1]], ptr [[I]], align 8, !llvm.access.group [[ACC_GRP5]] // CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP5]] // CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP11]], i64 [[TMP12]] +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP11]], i64 [[TMP12]] // CHECK1-NEXT: [[TMP13:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP5]] // CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[TMP15]] +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw float, ptr [[TMP14]], i64 [[TMP15]] // CHECK1-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX2]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK1-NEXT: [[MUL3:%.*]] = fmul float [[TMP13]], [[TMP16]] // CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP5]] // CHECK1-NEXT: [[TMP18:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[TMP18]] +// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw float, ptr [[TMP17]], i64 [[TMP18]] // CHECK1-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX4]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK1-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP19]] // CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP0]], align 8, !llvm.access.group [[ACC_GRP5]] // CHECK1-NEXT: [[TMP21:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[TMP21]] +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds nuw float, ptr [[TMP20]], i64 [[TMP21]] // CHECK1-NEXT: store float [[MUL5]], ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: @@ -882,21 +882,21 @@ void range_for_collapsed() { // CHECK1-NEXT: store i64 [[ADD1]], ptr [[I]], align 8, !llvm.access.group [[ACC_GRP8]] // CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP8]] // CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP11]], i64 [[TMP12]] +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP11]], i64 [[TMP12]] // CHECK1-NEXT: [[TMP13:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP8]] // CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP8]] // CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[TMP15]] +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw float, ptr [[TMP14]], i64 [[TMP15]] // CHECK1-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX2]], align 4, !llvm.access.group [[ACC_GRP8]] // CHECK1-NEXT: [[MUL3:%.*]] = fmul float [[TMP13]], [[TMP16]] // CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP8]] // CHECK1-NEXT: [[TMP18:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[TMP18]] +// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw float, ptr [[TMP17]], i64 [[TMP18]] // CHECK1-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX4]], align 4, !llvm.access.group [[ACC_GRP8]] // CHECK1-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP19]] // CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP0]], align 8, !llvm.access.group [[ACC_GRP8]] // CHECK1-NEXT: [[TMP21:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[TMP21]] +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds nuw float, ptr [[TMP20]], i64 [[TMP21]] // CHECK1-NEXT: store float [[MUL5]], ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP8]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: @@ -1159,24 +1159,24 @@ void range_for_collapsed() { // CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP14]] // CHECK1-NEXT: [[TMP14:%.*]] = load i8, ptr [[I]], align 1, !llvm.access.group [[ACC_GRP14]] // CHECK1-NEXT: [[IDXPROM:%.*]] = zext i8 [[TMP14]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP13]], i64 [[IDXPROM]] // CHECK1-NEXT: [[TMP15:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP14]] // CHECK1-NEXT: [[TMP17:%.*]] = load i8, ptr [[I]], align 1, !llvm.access.group [[ACC_GRP14]] // CHECK1-NEXT: [[IDXPROM6:%.*]] = zext i8 [[TMP17]] to i64 -// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[IDXPROM6]] +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds nuw float, ptr [[TMP16]], i64 [[IDXPROM6]] // CHECK1-NEXT: [[TMP18:%.*]] = load float, ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK1-NEXT: [[MUL8:%.*]] = fmul float [[TMP15]], [[TMP18]] // CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP14]] // CHECK1-NEXT: [[TMP20:%.*]] = load i8, ptr [[I]], align 1, !llvm.access.group [[ACC_GRP14]] // CHECK1-NEXT: [[IDXPROM9:%.*]] = zext i8 [[TMP20]] to i64 -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP19]], i64 [[IDXPROM9]] +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds nuw float, ptr [[TMP19]], i64 [[IDXPROM9]] // CHECK1-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK1-NEXT: [[MUL11:%.*]] = fmul float [[MUL8]], [[TMP21]] // CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP0]], align 8, !llvm.access.group [[ACC_GRP14]] // CHECK1-NEXT: [[TMP23:%.*]] = load i8, ptr [[I]], align 1, !llvm.access.group [[ACC_GRP14]] // CHECK1-NEXT: [[IDXPROM12:%.*]] = zext i8 [[TMP23]] to i64 -// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i64 [[IDXPROM12]] +// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds nuw float, ptr [[TMP22]], i64 [[IDXPROM12]] // CHECK1-NEXT: store float [[MUL11]], ptr [[ARRAYIDX13]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: @@ -1303,7 +1303,7 @@ void range_for_collapsed() { // CHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[CALL]] to float // CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 // CHECK1-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP13]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[VLA1]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[VLA1]], i64 [[IDXPROM]] // CHECK1-NEXT: [[TMP14:%.*]] = load float, ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: [[ADD4:%.*]] = fadd float [[CONV]], [[TMP14]] // CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[N_ADDR]], align 4 @@ -1312,7 +1312,7 @@ void range_for_collapsed() { // CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP0]], align 8 // CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 // CHECK1-NEXT: [[IDXPROM7:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[IDXPROM7]] +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds nuw float, ptr [[TMP16]], i64 [[IDXPROM7]] // CHECK1-NEXT: [[TMP18:%.*]] = load float, ptr [[ARRAYIDX8]], align 4 // CHECK1-NEXT: [[ADD9:%.*]] = fadd float [[TMP18]], [[ADD6]] // CHECK1-NEXT: store float [[ADD9]], ptr [[ARRAYIDX8]], align 4 @@ -1781,24 +1781,24 @@ void range_for_collapsed() { // CHECK2-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4 // CHECK2-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[IDXPROM]] +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP14]], i64 [[IDXPROM]] // CHECK2-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX]], align 4 // CHECK2-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP2]], align 8 // CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 // CHECK2-NEXT: [[IDXPROM3:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK2-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[IDXPROM3]] +// CHECK2-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw float, ptr [[TMP17]], i64 [[IDXPROM3]] // CHECK2-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX4]], align 4 // CHECK2-NEXT: [[MUL5:%.*]] = fmul float [[TMP16]], [[TMP19]] // CHECK2-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 // CHECK2-NEXT: [[IDXPROM6:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK2-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[IDXPROM6]] +// CHECK2-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds nuw float, ptr [[TMP20]], i64 [[IDXPROM6]] // CHECK2-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX7]], align 4 // CHECK2-NEXT: [[MUL8:%.*]] = fmul float [[MUL5]], [[TMP22]] // CHECK2-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP0]], align 8 // CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4 // CHECK2-NEXT: [[IDXPROM9:%.*]] = zext i32 [[TMP24]] to i64 -// CHECK2-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i64 [[IDXPROM9]] +// CHECK2-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds nuw float, ptr [[TMP23]], i64 [[IDXPROM9]] // CHECK2-NEXT: store float [[MUL8]], ptr [[ARRAYIDX10]], align 4 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: @@ -1895,21 +1895,21 @@ void range_for_collapsed() { // CHECK2-NEXT: store i64 [[ADD1]], ptr [[I]], align 8, !llvm.access.group [[ACC_GRP5]] // CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP5]] // CHECK2-NEXT: [[TMP12:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP11]], i64 [[TMP12]] +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP11]], i64 [[TMP12]] // CHECK2-NEXT: [[TMP13:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK2-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP5]] // CHECK2-NEXT: [[TMP15:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK2-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[TMP15]] +// CHECK2-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw float, ptr [[TMP14]], i64 [[TMP15]] // CHECK2-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX2]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK2-NEXT: [[MUL3:%.*]] = fmul float [[TMP13]], [[TMP16]] // CHECK2-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP5]] // CHECK2-NEXT: [[TMP18:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK2-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[TMP18]] +// CHECK2-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw float, ptr [[TMP17]], i64 [[TMP18]] // CHECK2-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX4]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK2-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP19]] // CHECK2-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP0]], align 8, !llvm.access.group [[ACC_GRP5]] // CHECK2-NEXT: [[TMP21:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK2-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[TMP21]] +// CHECK2-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds nuw float, ptr [[TMP20]], i64 [[TMP21]] // CHECK2-NEXT: store float [[MUL5]], ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: @@ -1998,21 +1998,21 @@ void range_for_collapsed() { // CHECK2-NEXT: store i64 [[ADD1]], ptr [[I]], align 8, !llvm.access.group [[ACC_GRP8]] // CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP8]] // CHECK2-NEXT: [[TMP12:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP11]], i64 [[TMP12]] +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP11]], i64 [[TMP12]] // CHECK2-NEXT: [[TMP13:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP8]] // CHECK2-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP8]] // CHECK2-NEXT: [[TMP15:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK2-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[TMP15]] +// CHECK2-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw float, ptr [[TMP14]], i64 [[TMP15]] // CHECK2-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX2]], align 4, !llvm.access.group [[ACC_GRP8]] // CHECK2-NEXT: [[MUL3:%.*]] = fmul float [[TMP13]], [[TMP16]] // CHECK2-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP8]] // CHECK2-NEXT: [[TMP18:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK2-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[TMP18]] +// CHECK2-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw float, ptr [[TMP17]], i64 [[TMP18]] // CHECK2-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX4]], align 4, !llvm.access.group [[ACC_GRP8]] // CHECK2-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP19]] // CHECK2-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP0]], align 8, !llvm.access.group [[ACC_GRP8]] // CHECK2-NEXT: [[TMP21:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK2-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[TMP21]] +// CHECK2-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds nuw float, ptr [[TMP20]], i64 [[TMP21]] // CHECK2-NEXT: store float [[MUL5]], ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP8]] // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: @@ -2275,24 +2275,24 @@ void range_for_collapsed() { // CHECK2-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP14]] // CHECK2-NEXT: [[TMP14:%.*]] = load i8, ptr [[I]], align 1, !llvm.access.group [[ACC_GRP14]] // CHECK2-NEXT: [[IDXPROM:%.*]] = zext i8 [[TMP14]] to i64 -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i64 [[IDXPROM]] +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP13]], i64 [[IDXPROM]] // CHECK2-NEXT: [[TMP15:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK2-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP14]] // CHECK2-NEXT: [[TMP17:%.*]] = load i8, ptr [[I]], align 1, !llvm.access.group [[ACC_GRP14]] // CHECK2-NEXT: [[IDXPROM6:%.*]] = zext i8 [[TMP17]] to i64 -// CHECK2-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[IDXPROM6]] +// CHECK2-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds nuw float, ptr [[TMP16]], i64 [[IDXPROM6]] // CHECK2-NEXT: [[TMP18:%.*]] = load float, ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK2-NEXT: [[MUL8:%.*]] = fmul float [[TMP15]], [[TMP18]] // CHECK2-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP14]] // CHECK2-NEXT: [[TMP20:%.*]] = load i8, ptr [[I]], align 1, !llvm.access.group [[ACC_GRP14]] // CHECK2-NEXT: [[IDXPROM9:%.*]] = zext i8 [[TMP20]] to i64 -// CHECK2-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP19]], i64 [[IDXPROM9]] +// CHECK2-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds nuw float, ptr [[TMP19]], i64 [[IDXPROM9]] // CHECK2-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK2-NEXT: [[MUL11:%.*]] = fmul float [[MUL8]], [[TMP21]] // CHECK2-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP0]], align 8, !llvm.access.group [[ACC_GRP14]] // CHECK2-NEXT: [[TMP23:%.*]] = load i8, ptr [[I]], align 1, !llvm.access.group [[ACC_GRP14]] // CHECK2-NEXT: [[IDXPROM12:%.*]] = zext i8 [[TMP23]] to i64 -// CHECK2-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i64 [[IDXPROM12]] +// CHECK2-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds nuw float, ptr [[TMP22]], i64 [[IDXPROM12]] // CHECK2-NEXT: store float [[MUL11]], ptr [[ARRAYIDX13]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: @@ -2419,7 +2419,7 @@ void range_for_collapsed() { // CHECK2-NEXT: [[CONV:%.*]] = sitofp i32 [[CALL]] to float // CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 // CHECK2-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP13]] to i64 -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[VLA1]], i64 [[IDXPROM]] +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[VLA1]], i64 [[IDXPROM]] // CHECK2-NEXT: [[TMP14:%.*]] = load float, ptr [[ARRAYIDX]], align 4 // CHECK2-NEXT: [[ADD4:%.*]] = fadd float [[CONV]], [[TMP14]] // CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[N_ADDR]], align 4 @@ -2428,7 +2428,7 @@ void range_for_collapsed() { // CHECK2-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP0]], align 8 // CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 // CHECK2-NEXT: [[IDXPROM7:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK2-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[IDXPROM7]] +// CHECK2-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds nuw float, ptr [[TMP16]], i64 [[IDXPROM7]] // CHECK2-NEXT: [[TMP18:%.*]] = load float, ptr [[ARRAYIDX8]], align 4 // CHECK2-NEXT: [[ADD9:%.*]] = fadd float [[TMP18]], [[ADD6]] // CHECK2-NEXT: store float [[ADD9]], ptr [[ARRAYIDX8]], align 4 @@ -2897,24 +2897,24 @@ void range_for_collapsed() { // CHECK5-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG45:![0-9]+]] // CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG45]] // CHECK5-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP15]] to i64, !dbg [[DBG45]] -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[IDXPROM]], !dbg [[DBG45]] +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP14]], i64 [[IDXPROM]], !dbg [[DBG45]] // CHECK5-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !dbg [[DBG45]] // CHECK5-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP2]], align 8, !dbg [[DBG45]] // CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG45]] // CHECK5-NEXT: [[IDXPROM3:%.*]] = zext i32 [[TMP18]] to i64, !dbg [[DBG45]] -// CHECK5-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[IDXPROM3]], !dbg [[DBG45]] +// CHECK5-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw float, ptr [[TMP17]], i64 [[IDXPROM3]], !dbg [[DBG45]] // CHECK5-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX4]], align 4, !dbg [[DBG45]] // CHECK5-NEXT: [[MUL5:%.*]] = fmul float [[TMP16]], [[TMP19]], !dbg [[DBG45]] // CHECK5-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP3]], align 8, !dbg [[DBG45]] // CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG45]] // CHECK5-NEXT: [[IDXPROM6:%.*]] = zext i32 [[TMP21]] to i64, !dbg [[DBG45]] -// CHECK5-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[IDXPROM6]], !dbg [[DBG45]] +// CHECK5-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds nuw float, ptr [[TMP20]], i64 [[IDXPROM6]], !dbg [[DBG45]] // CHECK5-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX7]], align 4, !dbg [[DBG45]] // CHECK5-NEXT: [[MUL8:%.*]] = fmul float [[MUL5]], [[TMP22]], !dbg [[DBG45]] // CHECK5-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP0]], align 8, !dbg [[DBG45]] // CHECK5-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG45]] // CHECK5-NEXT: [[IDXPROM9:%.*]] = zext i32 [[TMP24]] to i64, !dbg [[DBG45]] -// CHECK5-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i64 [[IDXPROM9]], !dbg [[DBG45]] +// CHECK5-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds nuw float, ptr [[TMP23]], i64 [[IDXPROM9]], !dbg [[DBG45]] // CHECK5-NEXT: store float [[MUL8]], ptr [[ARRAYIDX10]], align 4, !dbg [[DBG45]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG46:![0-9]+]] // CHECK5: omp.body.continue: @@ -3011,21 +3011,21 @@ void range_for_collapsed() { // CHECK5-NEXT: store i64 [[ADD1]], ptr [[I]], align 8, !dbg [[DBG54]], !llvm.access.group [[ACC_GRP55]] // CHECK5-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG56:![0-9]+]], !llvm.access.group [[ACC_GRP55]] // CHECK5-NEXT: [[TMP12:%.*]] = load i64, ptr [[I]], align 8, !dbg [[DBG56]], !llvm.access.group [[ACC_GRP55]] -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP11]], i64 [[TMP12]], !dbg [[DBG56]] +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP11]], i64 [[TMP12]], !dbg [[DBG56]] // CHECK5-NEXT: [[TMP13:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !dbg [[DBG56]], !llvm.access.group [[ACC_GRP55]] // CHECK5-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP2]], align 8, !dbg [[DBG56]], !llvm.access.group [[ACC_GRP55]] // CHECK5-NEXT: [[TMP15:%.*]] = load i64, ptr [[I]], align 8, !dbg [[DBG56]], !llvm.access.group [[ACC_GRP55]] -// CHECK5-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[TMP15]], !dbg [[DBG56]] +// CHECK5-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw float, ptr [[TMP14]], i64 [[TMP15]], !dbg [[DBG56]] // CHECK5-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX2]], align 4, !dbg [[DBG56]], !llvm.access.group [[ACC_GRP55]] // CHECK5-NEXT: [[MUL3:%.*]] = fmul float [[TMP13]], [[TMP16]], !dbg [[DBG56]] // CHECK5-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP3]], align 8, !dbg [[DBG56]], !llvm.access.group [[ACC_GRP55]] // CHECK5-NEXT: [[TMP18:%.*]] = load i64, ptr [[I]], align 8, !dbg [[DBG56]], !llvm.access.group [[ACC_GRP55]] -// CHECK5-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[TMP18]], !dbg [[DBG56]] +// CHECK5-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw float, ptr [[TMP17]], i64 [[TMP18]], !dbg [[DBG56]] // CHECK5-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX4]], align 4, !dbg [[DBG56]], !llvm.access.group [[ACC_GRP55]] // CHECK5-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP19]], !dbg [[DBG56]] // CHECK5-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP0]], align 8, !dbg [[DBG56]], !llvm.access.group [[ACC_GRP55]] // CHECK5-NEXT: [[TMP21:%.*]] = load i64, ptr [[I]], align 8, !dbg [[DBG56]], !llvm.access.group [[ACC_GRP55]] -// CHECK5-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[TMP21]], !dbg [[DBG56]] +// CHECK5-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds nuw float, ptr [[TMP20]], i64 [[TMP21]], !dbg [[DBG56]] // CHECK5-NEXT: store float [[MUL5]], ptr [[ARRAYIDX6]], align 4, !dbg [[DBG56]], !llvm.access.group [[ACC_GRP55]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG57:![0-9]+]] // CHECK5: omp.body.continue: @@ -3114,21 +3114,21 @@ void range_for_collapsed() { // CHECK5-NEXT: store i64 [[ADD1]], ptr [[I]], align 8, !dbg [[DBG66]], !llvm.access.group [[ACC_GRP67]] // CHECK5-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG68:![0-9]+]], !llvm.access.group [[ACC_GRP67]] // CHECK5-NEXT: [[TMP12:%.*]] = load i64, ptr [[I]], align 8, !dbg [[DBG68]], !llvm.access.group [[ACC_GRP67]] -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP11]], i64 [[TMP12]], !dbg [[DBG68]] +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP11]], i64 [[TMP12]], !dbg [[DBG68]] // CHECK5-NEXT: [[TMP13:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !dbg [[DBG68]], !llvm.access.group [[ACC_GRP67]] // CHECK5-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP2]], align 8, !dbg [[DBG68]], !llvm.access.group [[ACC_GRP67]] // CHECK5-NEXT: [[TMP15:%.*]] = load i64, ptr [[I]], align 8, !dbg [[DBG68]], !llvm.access.group [[ACC_GRP67]] -// CHECK5-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[TMP15]], !dbg [[DBG68]] +// CHECK5-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw float, ptr [[TMP14]], i64 [[TMP15]], !dbg [[DBG68]] // CHECK5-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX2]], align 4, !dbg [[DBG68]], !llvm.access.group [[ACC_GRP67]] // CHECK5-NEXT: [[MUL3:%.*]] = fmul float [[TMP13]], [[TMP16]], !dbg [[DBG68]] // CHECK5-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP3]], align 8, !dbg [[DBG68]], !llvm.access.group [[ACC_GRP67]] // CHECK5-NEXT: [[TMP18:%.*]] = load i64, ptr [[I]], align 8, !dbg [[DBG68]], !llvm.access.group [[ACC_GRP67]] -// CHECK5-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[TMP18]], !dbg [[DBG68]] +// CHECK5-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw float, ptr [[TMP17]], i64 [[TMP18]], !dbg [[DBG68]] // CHECK5-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX4]], align 4, !dbg [[DBG68]], !llvm.access.group [[ACC_GRP67]] // CHECK5-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP19]], !dbg [[DBG68]] // CHECK5-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP0]], align 8, !dbg [[DBG68]], !llvm.access.group [[ACC_GRP67]] // CHECK5-NEXT: [[TMP21:%.*]] = load i64, ptr [[I]], align 8, !dbg [[DBG68]], !llvm.access.group [[ACC_GRP67]] -// CHECK5-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[TMP21]], !dbg [[DBG68]] +// CHECK5-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds nuw float, ptr [[TMP20]], i64 [[TMP21]], !dbg [[DBG68]] // CHECK5-NEXT: store float [[MUL5]], ptr [[ARRAYIDX6]], align 4, !dbg [[DBG68]], !llvm.access.group [[ACC_GRP67]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG69:![0-9]+]] // CHECK5: omp.body.continue: @@ -3391,24 +3391,24 @@ void range_for_collapsed() { // CHECK5-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG97:![0-9]+]], !llvm.access.group [[ACC_GRP95]] // CHECK5-NEXT: [[TMP14:%.*]] = load i8, ptr [[I]], align 1, !dbg [[DBG97]], !llvm.access.group [[ACC_GRP95]] // CHECK5-NEXT: [[IDXPROM:%.*]] = zext i8 [[TMP14]] to i64, !dbg [[DBG97]] -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i64 [[IDXPROM]], !dbg [[DBG97]] +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP13]], i64 [[IDXPROM]], !dbg [[DBG97]] // CHECK5-NEXT: [[TMP15:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !dbg [[DBG97]], !llvm.access.group [[ACC_GRP95]] // CHECK5-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP2]], align 8, !dbg [[DBG97]], !llvm.access.group [[ACC_GRP95]] // CHECK5-NEXT: [[TMP17:%.*]] = load i8, ptr [[I]], align 1, !dbg [[DBG97]], !llvm.access.group [[ACC_GRP95]] // CHECK5-NEXT: [[IDXPROM6:%.*]] = zext i8 [[TMP17]] to i64, !dbg [[DBG97]] -// CHECK5-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[IDXPROM6]], !dbg [[DBG97]] +// CHECK5-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds nuw float, ptr [[TMP16]], i64 [[IDXPROM6]], !dbg [[DBG97]] // CHECK5-NEXT: [[TMP18:%.*]] = load float, ptr [[ARRAYIDX7]], align 4, !dbg [[DBG97]], !llvm.access.group [[ACC_GRP95]] // CHECK5-NEXT: [[MUL8:%.*]] = fmul float [[TMP15]], [[TMP18]], !dbg [[DBG97]] // CHECK5-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP3]], align 8, !dbg [[DBG97]], !llvm.access.group [[ACC_GRP95]] // CHECK5-NEXT: [[TMP20:%.*]] = load i8, ptr [[I]], align 1, !dbg [[DBG97]], !llvm.access.group [[ACC_GRP95]] // CHECK5-NEXT: [[IDXPROM9:%.*]] = zext i8 [[TMP20]] to i64, !dbg [[DBG97]] -// CHECK5-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP19]], i64 [[IDXPROM9]], !dbg [[DBG97]] +// CHECK5-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds nuw float, ptr [[TMP19]], i64 [[IDXPROM9]], !dbg [[DBG97]] // CHECK5-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX10]], align 4, !dbg [[DBG97]], !llvm.access.group [[ACC_GRP95]] // CHECK5-NEXT: [[MUL11:%.*]] = fmul float [[MUL8]], [[TMP21]], !dbg [[DBG97]] // CHECK5-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP0]], align 8, !dbg [[DBG97]], !llvm.access.group [[ACC_GRP95]] // CHECK5-NEXT: [[TMP23:%.*]] = load i8, ptr [[I]], align 1, !dbg [[DBG97]], !llvm.access.group [[ACC_GRP95]] // CHECK5-NEXT: [[IDXPROM12:%.*]] = zext i8 [[TMP23]] to i64, !dbg [[DBG97]] -// CHECK5-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i64 [[IDXPROM12]], !dbg [[DBG97]] +// CHECK5-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds nuw float, ptr [[TMP22]], i64 [[IDXPROM12]], !dbg [[DBG97]] // CHECK5-NEXT: store float [[MUL11]], ptr [[ARRAYIDX13]], align 4, !dbg [[DBG97]], !llvm.access.group [[ACC_GRP95]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG98:![0-9]+]] // CHECK5: omp.body.continue: @@ -3535,7 +3535,7 @@ void range_for_collapsed() { // CHECK5-NEXT: [[CONV:%.*]] = sitofp i32 [[CALL]] to float, !dbg [[DBG111]] // CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG111]] // CHECK5-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP13]] to i64, !dbg [[DBG111]] -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[VLA1]], i64 [[IDXPROM]], !dbg [[DBG111]] +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[VLA1]], i64 [[IDXPROM]], !dbg [[DBG111]] // CHECK5-NEXT: [[TMP14:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !dbg [[DBG111]] // CHECK5-NEXT: [[ADD4:%.*]] = fadd float [[CONV]], [[TMP14]], !dbg [[DBG111]] // CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[N_ADDR]], align 4, !dbg [[DBG111]] @@ -3544,7 +3544,7 @@ void range_for_collapsed() { // CHECK5-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP0]], align 8, !dbg [[DBG111]] // CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG111]] // CHECK5-NEXT: [[IDXPROM7:%.*]] = zext i32 [[TMP17]] to i64, !dbg [[DBG111]] -// CHECK5-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[IDXPROM7]], !dbg [[DBG111]] +// CHECK5-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds nuw float, ptr [[TMP16]], i64 [[IDXPROM7]], !dbg [[DBG111]] // CHECK5-NEXT: [[TMP18:%.*]] = load float, ptr [[ARRAYIDX8]], align 4, !dbg [[DBG111]] // CHECK5-NEXT: [[ADD9:%.*]] = fadd float [[TMP18]], [[ADD6]], !dbg [[DBG111]] // CHECK5-NEXT: store float [[ADD9]], ptr [[ARRAYIDX8]], align 4, !dbg [[DBG111]] @@ -4013,24 +4013,24 @@ void range_for_collapsed() { // CHECK6-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK6-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4 // CHECK6-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[IDXPROM]] +// CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP14]], i64 [[IDXPROM]] // CHECK6-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX]], align 4 // CHECK6-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP2]], align 8 // CHECK6-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 // CHECK6-NEXT: [[IDXPROM3:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK6-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[IDXPROM3]] +// CHECK6-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw float, ptr [[TMP17]], i64 [[IDXPROM3]] // CHECK6-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX4]], align 4 // CHECK6-NEXT: [[MUL5:%.*]] = fmul float [[TMP16]], [[TMP19]] // CHECK6-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK6-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 // CHECK6-NEXT: [[IDXPROM6:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK6-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[IDXPROM6]] +// CHECK6-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds nuw float, ptr [[TMP20]], i64 [[IDXPROM6]] // CHECK6-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX7]], align 4 // CHECK6-NEXT: [[MUL8:%.*]] = fmul float [[MUL5]], [[TMP22]] // CHECK6-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP0]], align 8 // CHECK6-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4 // CHECK6-NEXT: [[IDXPROM9:%.*]] = zext i32 [[TMP24]] to i64 -// CHECK6-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i64 [[IDXPROM9]] +// CHECK6-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds nuw float, ptr [[TMP23]], i64 [[IDXPROM9]] // CHECK6-NEXT: store float [[MUL8]], ptr [[ARRAYIDX10]], align 4 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: @@ -4127,21 +4127,21 @@ void range_for_collapsed() { // CHECK6-NEXT: store i64 [[ADD1]], ptr [[I]], align 8, !llvm.access.group [[ACC_GRP5]] // CHECK6-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP5]] // CHECK6-NEXT: [[TMP12:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP11]], i64 [[TMP12]] +// CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP11]], i64 [[TMP12]] // CHECK6-NEXT: [[TMP13:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK6-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP5]] // CHECK6-NEXT: [[TMP15:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK6-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[TMP15]] +// CHECK6-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw float, ptr [[TMP14]], i64 [[TMP15]] // CHECK6-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX2]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK6-NEXT: [[MUL3:%.*]] = fmul float [[TMP13]], [[TMP16]] // CHECK6-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP5]] // CHECK6-NEXT: [[TMP18:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK6-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[TMP18]] +// CHECK6-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw float, ptr [[TMP17]], i64 [[TMP18]] // CHECK6-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX4]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK6-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP19]] // CHECK6-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP0]], align 8, !llvm.access.group [[ACC_GRP5]] // CHECK6-NEXT: [[TMP21:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK6-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[TMP21]] +// CHECK6-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds nuw float, ptr [[TMP20]], i64 [[TMP21]] // CHECK6-NEXT: store float [[MUL5]], ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: @@ -4230,21 +4230,21 @@ void range_for_collapsed() { // CHECK6-NEXT: store i64 [[ADD1]], ptr [[I]], align 8, !llvm.access.group [[ACC_GRP8]] // CHECK6-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP8]] // CHECK6-NEXT: [[TMP12:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP11]], i64 [[TMP12]] +// CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP11]], i64 [[TMP12]] // CHECK6-NEXT: [[TMP13:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP8]] // CHECK6-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP8]] // CHECK6-NEXT: [[TMP15:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK6-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[TMP15]] +// CHECK6-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw float, ptr [[TMP14]], i64 [[TMP15]] // CHECK6-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX2]], align 4, !llvm.access.group [[ACC_GRP8]] // CHECK6-NEXT: [[MUL3:%.*]] = fmul float [[TMP13]], [[TMP16]] // CHECK6-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP8]] // CHECK6-NEXT: [[TMP18:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK6-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[TMP18]] +// CHECK6-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw float, ptr [[TMP17]], i64 [[TMP18]] // CHECK6-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX4]], align 4, !llvm.access.group [[ACC_GRP8]] // CHECK6-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP19]] // CHECK6-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP0]], align 8, !llvm.access.group [[ACC_GRP8]] // CHECK6-NEXT: [[TMP21:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK6-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[TMP21]] +// CHECK6-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds nuw float, ptr [[TMP20]], i64 [[TMP21]] // CHECK6-NEXT: store float [[MUL5]], ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP8]] // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: @@ -4507,24 +4507,24 @@ void range_for_collapsed() { // CHECK6-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP14]] // CHECK6-NEXT: [[TMP14:%.*]] = load i8, ptr [[I]], align 1, !llvm.access.group [[ACC_GRP14]] // CHECK6-NEXT: [[IDXPROM:%.*]] = zext i8 [[TMP14]] to i64 -// CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i64 [[IDXPROM]] +// CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP13]], i64 [[IDXPROM]] // CHECK6-NEXT: [[TMP15:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK6-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP14]] // CHECK6-NEXT: [[TMP17:%.*]] = load i8, ptr [[I]], align 1, !llvm.access.group [[ACC_GRP14]] // CHECK6-NEXT: [[IDXPROM6:%.*]] = zext i8 [[TMP17]] to i64 -// CHECK6-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[IDXPROM6]] +// CHECK6-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds nuw float, ptr [[TMP16]], i64 [[IDXPROM6]] // CHECK6-NEXT: [[TMP18:%.*]] = load float, ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK6-NEXT: [[MUL8:%.*]] = fmul float [[TMP15]], [[TMP18]] // CHECK6-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP14]] // CHECK6-NEXT: [[TMP20:%.*]] = load i8, ptr [[I]], align 1, !llvm.access.group [[ACC_GRP14]] // CHECK6-NEXT: [[IDXPROM9:%.*]] = zext i8 [[TMP20]] to i64 -// CHECK6-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP19]], i64 [[IDXPROM9]] +// CHECK6-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds nuw float, ptr [[TMP19]], i64 [[IDXPROM9]] // CHECK6-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK6-NEXT: [[MUL11:%.*]] = fmul float [[MUL8]], [[TMP21]] // CHECK6-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP0]], align 8, !llvm.access.group [[ACC_GRP14]] // CHECK6-NEXT: [[TMP23:%.*]] = load i8, ptr [[I]], align 1, !llvm.access.group [[ACC_GRP14]] // CHECK6-NEXT: [[IDXPROM12:%.*]] = zext i8 [[TMP23]] to i64 -// CHECK6-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i64 [[IDXPROM12]] +// CHECK6-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds nuw float, ptr [[TMP22]], i64 [[IDXPROM12]] // CHECK6-NEXT: store float [[MUL11]], ptr [[ARRAYIDX13]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: @@ -4649,7 +4649,7 @@ void range_for_collapsed() { // CHECK6-NEXT: [[CONV:%.*]] = sitofp i32 [[CALL]] to float // CHECK6-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 // CHECK6-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP13]] to i64 -// CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[VLA1]], i64 [[IDXPROM]] +// CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[VLA1]], i64 [[IDXPROM]] // CHECK6-NEXT: [[TMP14:%.*]] = load float, ptr [[ARRAYIDX]], align 4 // CHECK6-NEXT: [[ADD4:%.*]] = fadd float [[CONV]], [[TMP14]] // CHECK6-NEXT: [[TMP15:%.*]] = load i32, ptr [[N_ADDR]], align 4 @@ -4658,7 +4658,7 @@ void range_for_collapsed() { // CHECK6-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP0]], align 8 // CHECK6-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 // CHECK6-NEXT: [[IDXPROM7:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK6-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[IDXPROM7]] +// CHECK6-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds nuw float, ptr [[TMP16]], i64 [[IDXPROM7]] // CHECK6-NEXT: [[TMP18:%.*]] = load float, ptr [[ARRAYIDX8]], align 4 // CHECK6-NEXT: [[ADD9:%.*]] = fadd float [[TMP18]], [[ADD6]] // CHECK6-NEXT: store float [[ADD9]], ptr [[ARRAYIDX8]], align 4 diff --git a/clang/test/OpenMP/parallel_for_linear_codegen.cpp b/clang/test/OpenMP/parallel_for_linear_codegen.cpp index 8b46797ae253f..15eb0dfa42af5 100644 --- a/clang/test/OpenMP/parallel_for_linear_codegen.cpp +++ b/clang/test/OpenMP/parallel_for_linear_codegen.cpp @@ -337,7 +337,7 @@ int main() { // CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP14]], [[MUL6]] // CHECK1-NEXT: store i32 [[ADD7]], ptr [[LVAR3]], align 4 // CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[PVAR2]], align 8 -// CHECK1-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i32 1 +// CHECK1-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP16]], i32 1 // CHECK1-NEXT: store ptr [[INCDEC_PTR]], ptr [[PVAR2]], align 8 // CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[LVAR3]], align 4 // CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP17]], 1 diff --git a/clang/test/OpenMP/parallel_for_reduction_task_codegen.cpp b/clang/test/OpenMP/parallel_for_reduction_task_codegen.cpp index 752aec788bf34..59d169d7a1738 100644 --- a/clang/test/OpenMP/parallel_for_reduction_task_codegen.cpp +++ b/clang/test/OpenMP/parallel_for_reduction_task_codegen.cpp @@ -83,16 +83,16 @@ int main(int argc, char **argv) { // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK1-NEXT: store i32 0, ptr [[ARGC1]], align 4 // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i64 0 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP1]], i64 0 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 -// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 0 +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP2]], i64 0 // CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = sext i32 [[TMP3]] to i64 // CHECK1-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP4]] // CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds ptr, ptr [[TMP5]], i64 9 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP5]], i64 9 // CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[ARRAYIDX3]], align 8 -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i64 [[LB_ADD_LEN]] +// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP6]], i64 [[LB_ADD_LEN]] // CHECK1-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[ARRAYIDX4]] to i64 // CHECK1-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[ARRAYIDX2]] to i64 // CHECK1-NEXT: [[TMP9:%.*]] = sub i64 [[TMP7]], [[TMP8]] @@ -122,7 +122,7 @@ int main(int argc, char **argv) { // CHECK1-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP20]] // CHECK1-NEXT: store ptr [[_TMP6]], ptr [[_TMP5]], align 8 // CHECK1-NEXT: store ptr [[TMP21]], ptr [[_TMP6]], align 8 -// CHECK1-NEXT: [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 0 +// CHECK1-NEXT: [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds nuw [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 0 // CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP22]], align 8 // CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1 @@ -137,19 +137,19 @@ int main(int argc, char **argv) { // CHECK1-NEXT: store ptr @.red_comb., ptr [[TMP27]], align 8 // CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6 // CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP28]], i8 0, i64 4, i1 false) -// CHECK1-NEXT: [[DOTRD_INPUT_GEP_7:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1 +// CHECK1-NEXT: [[DOTRD_INPUT_GEP_7:%.*]] = getelementptr inbounds nuw [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1 // CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 0 // CHECK1-NEXT: [[TMP30:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds ptr, ptr [[TMP30]], i64 0 +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP30]], i64 0 // CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[ARRAYIDX8]], align 8 -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i8, ptr [[TMP31]], i64 0 +// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP31]], i64 0 // CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP0]], align 4 // CHECK1-NEXT: [[TMP33:%.*]] = sext i32 [[TMP32]] to i64 // CHECK1-NEXT: [[LB_ADD_LEN10:%.*]] = add nsw i64 -1, [[TMP33]] // CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds ptr, ptr [[TMP34]], i64 9 +// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP34]], i64 9 // CHECK1-NEXT: [[TMP35:%.*]] = load ptr, ptr [[ARRAYIDX11]], align 8 -// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds i8, ptr [[TMP35]], i64 [[LB_ADD_LEN10]] +// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP35]], i64 [[LB_ADD_LEN10]] // CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP29]], align 8 // CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 1 // CHECK1-NEXT: store ptr [[ARRAYIDX9]], ptr [[TMP36]], align 8 @@ -470,9 +470,9 @@ int main(int argc, char **argv) { // CHECK1-NEXT: [[LB_ADD_LEN_I:%.*]] = add nsw i64 -1, [[TMP24]] // CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2 // CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP25]], align 8 -// CHECK1-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds ptr, ptr [[TMP26]], i64 9 +// CHECK1-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP26]], i64 9 // CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[ARRAYIDX2_I]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3_I:%.*]] = getelementptr inbounds i8, ptr [[TMP27]], i64 [[LB_ADD_LEN_I]] +// CHECK1-NEXT: [[ARRAYIDX3_I:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP27]], i64 [[LB_ADD_LEN_I]] // CHECK1-NEXT: [[TMP28:%.*]] = ptrtoint ptr [[ARRAYIDX3_I]] to i64 // CHECK1-NEXT: [[TMP29:%.*]] = ptrtoint ptr [[TMP20]] to i64 // CHECK1-NEXT: [[TMP30:%.*]] = sub i64 [[TMP28]], [[TMP29]] diff --git a/clang/test/OpenMP/parallel_for_scan_codegen.cpp b/clang/test/OpenMP/parallel_for_scan_codegen.cpp index 161534814a793..67b32407c712f 100644 --- a/clang/test/OpenMP/parallel_for_scan_codegen.cpp +++ b/clang/test/OpenMP/parallel_for_scan_codegen.cpp @@ -28,9 +28,9 @@ void baz(int n) { // CHECK: call void (ptr, i32, ptr, ...) @__kmpc_fork_call( // CHECK: [[LAST:%.+]] = mul nsw i64 9, % - // CHECK: [[LAST_REF:%.+]] = getelementptr inbounds float, ptr [[A_BUF]], i64 [[LAST]] + // CHECK: [[LAST_REF:%.+]] = getelementptr inbounds nuw float, ptr [[A_BUF]], i64 [[LAST]] // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 @_ZZ3baziE1a, ptr align 4 [[LAST_REF]], i64 %{{.+}}, i1 false) - // CHECK: [[LAST_REF_B:%.+]] = getelementptr inbounds double, ptr [[B_BUF]], i64 9 + // CHECK: [[LAST_REF_B:%.+]] = getelementptr inbounds nuw double, ptr [[B_BUF]], i64 9 // CHECK: [[LAST_VAL:%.+]] = load double, ptr [[LAST_REF_B]], // CHECK: store double [[LAST_VAL]], ptr @_ZZ3baziE1b, @@ -58,13 +58,13 @@ void baz(int n) { // CHECK: [[BASE_IDX_I:%.+]] = load i32, ptr [[IV_ADDR:%.+]], // CHECK: [[BASE_IDX:%.+]] = zext i32 [[BASE_IDX_I]] to i64 // CHECK: [[IDX:%.+]] = mul nsw i64 [[BASE_IDX]], [[NUM_ELEMS:%.+]] - // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds float, ptr [[A_BUF:%.+]], i64 [[IDX]] - // CHECK: [[A_PRIV:%.+]] = getelementptr inbounds [10 x float], ptr [[A_PRIV_ADDR:%.+]], i64 0, i64 0 + // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds nuw float, ptr [[A_BUF:%.+]], i64 [[IDX]] + // CHECK: [[A_PRIV:%.+]] = getelementptr inbounds nuw [10 x float], ptr [[A_PRIV_ADDR:%.+]], i64 0, i64 0 // CHECK: [[BYTES:%.+]] = mul nuw i64 [[NUM_ELEMS:%.+]], 4 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr {{.*}}[[A_BUF_IDX]], ptr {{.*}}[[A_PRIV]], i64 [[BYTES]], i1 false) // b_buffer[i] = b_priv; - // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds double, ptr [[B_BUF:%.+]], i64 [[BASE_IDX]] + // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds nuw double, ptr [[B_BUF:%.+]], i64 [[BASE_IDX]] // CHECK: [[B_PRIV:%.+]] = load double, ptr [[B_PRIV_ADDR]], // CHECK: store double [[B_PRIV]], ptr [[B_BUF_IDX]], // CHECK: br label %[[LOOP_CONTINUE:.+]] @@ -91,13 +91,13 @@ void baz(int n) { // a_buffer[i] += a_buffer[i-pow(2, k)]; // CHECK: [[IDX:%.+]] = mul nsw i64 [[I]], [[NUM_ELEMS]] - // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds float, ptr [[A_BUF]], i64 [[IDX]] + // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds nuw float, ptr [[A_BUF]], i64 [[IDX]] // CHECK: [[IDX_SUB_K2POW:%.+]] = sub nuw i64 [[I]], [[K2POW]] // CHECK: [[IDX:%.+]] = mul nsw i64 [[IDX_SUB_K2POW]], [[NUM_ELEMS]] - // CHECK: [[A_BUF_IDX_SUB_K2POW:%.+]] = getelementptr inbounds float, ptr [[A_BUF]], i64 [[IDX]] - // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds double, ptr [[B_BUF]], i64 [[I]] + // CHECK: [[A_BUF_IDX_SUB_K2POW:%.+]] = getelementptr inbounds nuw float, ptr [[A_BUF]], i64 [[IDX]] + // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds nuw double, ptr [[B_BUF]], i64 [[I]] // CHECK: [[IDX_SUB_K2POW:%.+]] = sub nuw i64 [[I]], [[K2POW]] - // CHECK: [[B_BUF_IDX_SUB_K2POW:%.+]] = getelementptr inbounds double, ptr [[B_BUF]], i64 [[IDX_SUB_K2POW]] + // CHECK: [[B_BUF_IDX_SUB_K2POW:%.+]] = getelementptr inbounds nuw double, ptr [[B_BUF]], i64 [[IDX_SUB_K2POW]] // CHECK: [[A_BUF_END:%.+]] = getelementptr float, ptr [[A_BUF_IDX]], i64 [[NUM_ELEMS]] // CHECK: [[ISEMPTY:%.+]] = icmp eq ptr [[A_BUF_IDX]], [[A_BUF_END]] // CHECK: br i1 [[ISEMPTY]], label %[[RED_DONE:[^,]+]], label %[[RED_BODY:[^,]+]] @@ -151,13 +151,13 @@ void baz(int n) { // CHECK: [[BASE_IDX_I:%.+]] = load i32, ptr [[IV_ADDR:%.+]], // CHECK: [[BASE_IDX:%.+]] = zext i32 [[BASE_IDX_I]] to i64 // CHECK: [[IDX:%.+]] = mul nsw i64 [[BASE_IDX]], [[NUM_ELEMS]] - // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds float, ptr [[A_BUF]], i64 [[IDX]] - // CHECK: [[A_PRIV:%.+]] = getelementptr inbounds [10 x float], ptr [[A_PRIV_ADDR:%.+]], i64 0, i64 0 + // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds nuw float, ptr [[A_BUF]], i64 [[IDX]] + // CHECK: [[A_PRIV:%.+]] = getelementptr inbounds nuw [10 x float], ptr [[A_PRIV_ADDR:%.+]], i64 0, i64 0 // CHECK: [[BYTES:%.+]] = mul nuw i64 [[NUM_ELEMS:%.+]], 4 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr {{.*}}[[A_PRIV]], ptr {{.*}}[[A_BUF_IDX]], i64 [[BYTES]], i1 false) // b_priv = b_buffer[i]; - // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds double, ptr [[B_BUF]], i64 [[BASE_IDX]] + // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds nuw double, ptr [[B_BUF]], i64 [[BASE_IDX]] // CHECK: [[B_BUF_IDX_VAL:%.+]] = load double, ptr [[B_BUF_IDX]], // CHECK: store double [[B_BUF_IDX_VAL]], ptr [[B_PRIV_ADDR]], // CHECK: br label %[[SCAN_PHASE:[^,]+]] @@ -188,13 +188,13 @@ void baz(int n) { // CHECK: [[BASE_IDX_I:%.+]] = load i32, ptr [[IV_ADDR:%.+]], // CHECK: [[BASE_IDX:%.+]] = zext i32 [[BASE_IDX_I]] to i64 // CHECK: [[IDX:%.+]] = mul nsw i64 [[BASE_IDX]], [[NUM_ELEMS:%.+]] - // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds float, ptr [[A_BUF:%.+]], i64 [[IDX]] - // CHECK: [[A_PRIV:%.+]] = getelementptr inbounds [10 x float], ptr [[A_PRIV_ADDR:%.+]], i64 0, i64 0 + // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds nuw float, ptr [[A_BUF:%.+]], i64 [[IDX]] + // CHECK: [[A_PRIV:%.+]] = getelementptr inbounds nuw [10 x float], ptr [[A_PRIV_ADDR:%.+]], i64 0, i64 0 // CHECK: [[BYTES:%.+]] = mul nuw i64 [[NUM_ELEMS:%.+]], 4 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr {{.*}}[[A_BUF_IDX]], ptr {{.*}}[[A_PRIV]], i64 [[BYTES]], i1 false) // b_buffer[i] = b_priv; - // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds double, ptr [[B_BUF:%.+]], i64 [[BASE_IDX]] + // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds nuw double, ptr [[B_BUF:%.+]], i64 [[BASE_IDX]] // CHECK: [[B_PRIV:%.+]] = load double, ptr [[B_PRIV_ADDR]], // CHECK: store double [[B_PRIV]], ptr [[B_BUF_IDX]], // CHECK: br label %[[LOOP_CONTINUE:[^,]+]] @@ -226,13 +226,13 @@ void baz(int n) { // a_buffer[i] += a_buffer[i-pow(2, k)]; // CHECK: [[IDX:%.+]] = mul nsw i64 [[I]], [[NUM_ELEMS]] - // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds float, ptr [[A_BUF]], i64 [[IDX]] + // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds nuw float, ptr [[A_BUF]], i64 [[IDX]] // CHECK: [[IDX_SUB_K2POW:%.+]] = sub nuw i64 [[I]], [[K2POW]] // CHECK: [[IDX:%.+]] = mul nsw i64 [[IDX_SUB_K2POW]], [[NUM_ELEMS]] - // CHECK: [[A_BUF_IDX_SUB_K2POW:%.+]] = getelementptr inbounds float, ptr [[A_BUF]], i64 [[IDX]] - // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds double, ptr [[B_BUF]], i64 [[I]] + // CHECK: [[A_BUF_IDX_SUB_K2POW:%.+]] = getelementptr inbounds nuw float, ptr [[A_BUF]], i64 [[IDX]] + // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds nuw double, ptr [[B_BUF]], i64 [[I]] // CHECK: [[IDX_SUB_K2POW:%.+]] = sub nuw i64 [[I]], [[K2POW]] - // CHECK: [[B_BUF_IDX_SUB_K2POW:%.+]] = getelementptr inbounds double, ptr [[B_BUF]], i64 [[IDX_SUB_K2POW]] + // CHECK: [[B_BUF_IDX_SUB_K2POW:%.+]] = getelementptr inbounds nuw double, ptr [[B_BUF]], i64 [[IDX_SUB_K2POW]] // CHECK: [[A_BUF_END:%.+]] = getelementptr float, ptr [[A_BUF_IDX]], i64 [[NUM_ELEMS]] // CHECK: [[ISEMPTY:%.+]] = icmp eq ptr [[A_BUF_IDX]], [[A_BUF_END]] // CHECK: br i1 [[ISEMPTY]], label %[[RED_DONE:[^,]+]], label %[[RED_BODY:[^,]+]] @@ -289,13 +289,13 @@ void baz(int n) { // CHECK: [[IF_THEN]]: // CHECK: [[BASE_IDX_SUB_1:%.+]] = sub nuw i64 [[BASE_IDX]], 1 // CHECK: [[IDX:%.+]] = mul nsw i64 [[BASE_IDX_SUB_1]], [[NUM_ELEMS]] - // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds float, ptr [[A_BUF]], i64 [[IDX]] - // CHECK: [[A_PRIV:%.+]] = getelementptr inbounds [10 x float], ptr [[A_PRIV_ADDR:%.+]], i64 0, i64 0 + // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds nuw float, ptr [[A_BUF]], i64 [[IDX]] + // CHECK: [[A_PRIV:%.+]] = getelementptr inbounds nuw [10 x float], ptr [[A_PRIV_ADDR:%.+]], i64 0, i64 0 // CHECK: [[BYTES:%.+]] = mul nuw i64 [[NUM_ELEMS:%.+]], 4 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr {{.*}}[[A_PRIV]], ptr {{.*}}[[A_BUF_IDX]], i64 [[BYTES]], i1 false) // b_priv = b_buffer[i]; - // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds double, ptr [[B_BUF]], i64 [[BASE_IDX_SUB_1]] + // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds nuw double, ptr [[B_BUF]], i64 [[BASE_IDX_SUB_1]] // CHECK: [[B_BUF_IDX_VAL:%.+]] = load double, ptr [[B_BUF_IDX]], // CHECK: store double [[B_BUF_IDX_VAL]], ptr [[B_PRIV_ADDR]], // CHECK: br label %[[SCAN_PHASE]] diff --git a/clang/test/OpenMP/parallel_for_simd_scan_codegen.cpp b/clang/test/OpenMP/parallel_for_simd_scan_codegen.cpp index 7e973a602a65c..cac997753d480 100644 --- a/clang/test/OpenMP/parallel_for_simd_scan_codegen.cpp +++ b/clang/test/OpenMP/parallel_for_simd_scan_codegen.cpp @@ -51,13 +51,13 @@ void baz(int n) { // CHECK: [[BASE_IDX_I:%.+]] = load i32, ptr [[IV_ADDR:%.+]], // CHECK: [[BASE_IDX:%.+]] = zext i32 [[BASE_IDX_I]] to i64 // CHECK: [[IDX:%.+]] = mul nsw i64 [[BASE_IDX]], [[NUM_ELEMS:%.+]] - // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds float, ptr [[A_BUF:%.+]], i64 [[IDX]] - // CHECK: [[A_PRIV:%.+]] = getelementptr inbounds [10 x float], ptr [[A_PRIV_ADDR:%.+]], i64 0, i64 0 + // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds nuw float, ptr [[A_BUF:%.+]], i64 [[IDX]] + // CHECK: [[A_PRIV:%.+]] = getelementptr inbounds nuw [10 x float], ptr [[A_PRIV_ADDR:%.+]], i64 0, i64 0 // CHECK: [[BYTES:%.+]] = mul nuw i64 [[NUM_ELEMS:%.+]], 4 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr {{.*}}[[A_BUF_IDX]], ptr {{.*}}[[A_PRIV]], i64 [[BYTES]], i1 false) // b_buffer[i] = b_priv; - // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds double, ptr [[B_BUF:%.+]], i64 [[BASE_IDX]] + // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds nuw double, ptr [[B_BUF:%.+]], i64 [[BASE_IDX]] // CHECK: [[B_PRIV:%.+]] = load double, ptr [[B_PRIV_ADDR]], // CHECK: store double [[B_PRIV]], ptr [[B_BUF_IDX]], // CHECK: br label %[[LOOP_CONTINUE:.+]] @@ -84,13 +84,13 @@ void baz(int n) { // a_buffer[i] += a_buffer[i-pow(2, k)]; // CHECK: [[IDX:%.+]] = mul nsw i64 [[I]], [[NUM_ELEMS]] - // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds float, ptr [[A_BUF]], i64 [[IDX]] + // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds nuw float, ptr [[A_BUF]], i64 [[IDX]] // CHECK: [[IDX_SUB_K2POW:%.+]] = sub nuw i64 [[I]], [[K2POW]] // CHECK: [[IDX:%.+]] = mul nsw i64 [[IDX_SUB_K2POW]], [[NUM_ELEMS]] - // CHECK: [[A_BUF_IDX_SUB_K2POW:%.+]] = getelementptr inbounds float, ptr [[A_BUF]], i64 [[IDX]] - // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds double, ptr [[B_BUF]], i64 [[I]] + // CHECK: [[A_BUF_IDX_SUB_K2POW:%.+]] = getelementptr inbounds nuw float, ptr [[A_BUF]], i64 [[IDX]] + // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds nuw double, ptr [[B_BUF]], i64 [[I]] // CHECK: [[IDX_SUB_K2POW:%.+]] = sub nuw i64 [[I]], [[K2POW]] - // CHECK: [[B_BUF_IDX_SUB_K2POW:%.+]] = getelementptr inbounds double, ptr [[B_BUF]], i64 [[IDX_SUB_K2POW]] + // CHECK: [[B_BUF_IDX_SUB_K2POW:%.+]] = getelementptr inbounds nuw double, ptr [[B_BUF]], i64 [[IDX_SUB_K2POW]] // CHECK: [[A_BUF_END:%.+]] = getelementptr float, ptr [[A_BUF_IDX]], i64 [[NUM_ELEMS]] // CHECK: [[ISEMPTY:%.+]] = icmp eq ptr [[A_BUF_IDX]], [[A_BUF_END]] // CHECK: br i1 [[ISEMPTY]], label %[[RED_DONE:[^,]+]], label %[[RED_BODY:[^,]+]] @@ -144,13 +144,13 @@ void baz(int n) { // CHECK: [[BASE_IDX_I:%.+]] = load i32, ptr [[IV_ADDR:%.+]], // CHECK: [[BASE_IDX:%.+]] = zext i32 [[BASE_IDX_I]] to i64 // CHECK: [[IDX:%.+]] = mul nsw i64 [[BASE_IDX]], [[NUM_ELEMS]] - // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds float, ptr [[A_BUF]], i64 [[IDX]] - // CHECK: [[A_PRIV:%.+]] = getelementptr inbounds [10 x float], ptr [[A_PRIV_ADDR:%.+]], i64 0, i64 0 + // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds nuw float, ptr [[A_BUF]], i64 [[IDX]] + // CHECK: [[A_PRIV:%.+]] = getelementptr inbounds nuw [10 x float], ptr [[A_PRIV_ADDR:%.+]], i64 0, i64 0 // CHECK: [[BYTES:%.+]] = mul nuw i64 [[NUM_ELEMS:%.+]], 4 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr {{.*}}[[A_PRIV]], ptr {{.*}}[[A_BUF_IDX]], i64 [[BYTES]], i1 false) // b_priv = b_buffer[i]; - // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds double, ptr [[B_BUF]], i64 [[BASE_IDX]] + // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds nuw double, ptr [[B_BUF]], i64 [[BASE_IDX]] // CHECK: [[B_BUF_IDX_VAL:%.+]] = load double, ptr [[B_BUF_IDX]], // CHECK: store double [[B_BUF_IDX_VAL]], ptr [[B_PRIV_ADDR]], // CHECK: br label %[[SCAN_PHASE:[^,]+]] @@ -181,13 +181,13 @@ void baz(int n) { // CHECK: [[BASE_IDX_I:%.+]] = load i32, ptr [[IV_ADDR:%.+]], // CHECK: [[BASE_IDX:%.+]] = zext i32 [[BASE_IDX_I]] to i64 // CHECK: [[IDX:%.+]] = mul nsw i64 [[BASE_IDX]], [[NUM_ELEMS:%.+]] - // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds float, ptr [[A_BUF:%.+]], i64 [[IDX]] - // CHECK: [[A_PRIV:%.+]] = getelementptr inbounds [10 x float], ptr [[A_PRIV_ADDR:%.+]], i64 0, i64 0 + // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds nuw float, ptr [[A_BUF:%.+]], i64 [[IDX]] + // CHECK: [[A_PRIV:%.+]] = getelementptr inbounds nuw [10 x float], ptr [[A_PRIV_ADDR:%.+]], i64 0, i64 0 // CHECK: [[BYTES:%.+]] = mul nuw i64 [[NUM_ELEMS:%.+]], 4 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr {{.*}}[[A_BUF_IDX]], ptr {{.*}}[[A_PRIV]], i64 [[BYTES]], i1 false) // b_buffer[i] = b_priv; - // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds double, ptr [[B_BUF:%.+]], i64 [[BASE_IDX]] + // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds nuw double, ptr [[B_BUF:%.+]], i64 [[BASE_IDX]] // CHECK: [[B_PRIV:%.+]] = load double, ptr [[B_PRIV_ADDR]], // CHECK: store double [[B_PRIV]], ptr [[B_BUF_IDX]], // CHECK: br label %[[LOOP_CONTINUE:[^,]+]] @@ -219,13 +219,13 @@ void baz(int n) { // a_buffer[i] += a_buffer[i-pow(2, k)]; // CHECK: [[IDX:%.+]] = mul nsw i64 [[I]], [[NUM_ELEMS]] - // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds float, ptr [[A_BUF]], i64 [[IDX]] + // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds nuw float, ptr [[A_BUF]], i64 [[IDX]] // CHECK: [[IDX_SUB_K2POW:%.+]] = sub nuw i64 [[I]], [[K2POW]] // CHECK: [[IDX:%.+]] = mul nsw i64 [[IDX_SUB_K2POW]], [[NUM_ELEMS]] - // CHECK: [[A_BUF_IDX_SUB_K2POW:%.+]] = getelementptr inbounds float, ptr [[A_BUF]], i64 [[IDX]] - // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds double, ptr [[B_BUF]], i64 [[I]] + // CHECK: [[A_BUF_IDX_SUB_K2POW:%.+]] = getelementptr inbounds nuw float, ptr [[A_BUF]], i64 [[IDX]] + // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds nuw double, ptr [[B_BUF]], i64 [[I]] // CHECK: [[IDX_SUB_K2POW:%.+]] = sub nuw i64 [[I]], [[K2POW]] - // CHECK: [[B_BUF_IDX_SUB_K2POW:%.+]] = getelementptr inbounds double, ptr [[B_BUF]], i64 [[IDX_SUB_K2POW]] + // CHECK: [[B_BUF_IDX_SUB_K2POW:%.+]] = getelementptr inbounds nuw double, ptr [[B_BUF]], i64 [[IDX_SUB_K2POW]] // CHECK: [[A_BUF_END:%.+]] = getelementptr float, ptr [[A_BUF_IDX]], i64 [[NUM_ELEMS]] // CHECK: [[ISEMPTY:%.+]] = icmp eq ptr [[A_BUF_IDX]], [[A_BUF_END]] // CHECK: br i1 [[ISEMPTY]], label %[[RED_DONE:[^,]+]], label %[[RED_BODY:[^,]+]] @@ -282,13 +282,13 @@ void baz(int n) { // CHECK: [[IF_THEN]]: // CHECK: [[BASE_IDX_SUB_1:%.+]] = sub nuw i64 [[BASE_IDX]], 1 // CHECK: [[IDX:%.+]] = mul nsw i64 [[BASE_IDX_SUB_1]], [[NUM_ELEMS]] - // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds float, ptr [[A_BUF]], i64 [[IDX]] - // CHECK: [[A_PRIV:%.+]] = getelementptr inbounds [10 x float], ptr [[A_PRIV_ADDR:%.+]], i64 0, i64 0 + // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds nuw float, ptr [[A_BUF]], i64 [[IDX]] + // CHECK: [[A_PRIV:%.+]] = getelementptr inbounds nuw [10 x float], ptr [[A_PRIV_ADDR:%.+]], i64 0, i64 0 // CHECK: [[BYTES:%.+]] = mul nuw i64 [[NUM_ELEMS:%.+]], 4 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr {{.*}}[[A_PRIV]], ptr {{.*}}[[A_BUF_IDX]], i64 [[BYTES]], i1 false) // b_priv = b_buffer[i]; - // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds double, ptr [[B_BUF]], i64 [[BASE_IDX_SUB_1]] + // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds nuw double, ptr [[B_BUF]], i64 [[BASE_IDX_SUB_1]] // CHECK: [[B_BUF_IDX_VAL:%.+]] = load double, ptr [[B_BUF_IDX]], // CHECK: store double [[B_BUF_IDX_VAL]], ptr [[B_PRIV_ADDR]], // CHECK: br label %[[SCAN_PHASE]] diff --git a/clang/test/OpenMP/parallel_master_reduction_task_codegen.cpp b/clang/test/OpenMP/parallel_master_reduction_task_codegen.cpp index b17757a5f978a..c1fe00f238001 100644 --- a/clang/test/OpenMP/parallel_master_reduction_task_codegen.cpp +++ b/clang/test/OpenMP/parallel_master_reduction_task_codegen.cpp @@ -72,16 +72,16 @@ int main(int argc, char **argv) { // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[ARGC1]], align 4 // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i64 0 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP1]], i64 0 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 -// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 0 +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP2]], i64 0 // CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = sext i32 [[TMP3]] to i64 // CHECK1-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP4]] // CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds ptr, ptr [[TMP5]], i64 9 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP5]], i64 9 // CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[ARRAYIDX3]], align 8 -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i64 [[LB_ADD_LEN]] +// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP6]], i64 [[LB_ADD_LEN]] // CHECK1-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[ARRAYIDX4]] to i64 // CHECK1-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[ARRAYIDX2]] to i64 // CHECK1-NEXT: [[TMP9:%.*]] = sub i64 [[TMP7]], [[TMP8]] @@ -111,7 +111,7 @@ int main(int argc, char **argv) { // CHECK1-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP20]] // CHECK1-NEXT: store ptr [[_TMP5]], ptr [[TMP]], align 8 // CHECK1-NEXT: store ptr [[TMP21]], ptr [[_TMP5]], align 8 -// CHECK1-NEXT: [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 0 +// CHECK1-NEXT: [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds nuw [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 0 // CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP22]], align 8 // CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1 @@ -126,19 +126,19 @@ int main(int argc, char **argv) { // CHECK1-NEXT: store ptr @.red_comb., ptr [[TMP27]], align 8 // CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6 // CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP28]], i8 0, i64 4, i1 false) -// CHECK1-NEXT: [[DOTRD_INPUT_GEP_6:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1 +// CHECK1-NEXT: [[DOTRD_INPUT_GEP_6:%.*]] = getelementptr inbounds nuw [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1 // CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 0 // CHECK1-NEXT: [[TMP30:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds ptr, ptr [[TMP30]], i64 0 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP30]], i64 0 // CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[ARRAYIDX7]], align 8 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i8, ptr [[TMP31]], i64 0 +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP31]], i64 0 // CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP0]], align 4 // CHECK1-NEXT: [[TMP33:%.*]] = sext i32 [[TMP32]] to i64 // CHECK1-NEXT: [[LB_ADD_LEN9:%.*]] = add nsw i64 -1, [[TMP33]] // CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds ptr, ptr [[TMP34]], i64 9 +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP34]], i64 9 // CHECK1-NEXT: [[TMP35:%.*]] = load ptr, ptr [[ARRAYIDX10]], align 8 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i8, ptr [[TMP35]], i64 [[LB_ADD_LEN9]] +// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP35]], i64 [[LB_ADD_LEN9]] // CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP29]], align 8 // CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 1 // CHECK1-NEXT: store ptr [[ARRAYIDX8]], ptr [[TMP36]], align 8 @@ -425,9 +425,9 @@ int main(int argc, char **argv) { // CHECK1-NEXT: [[LB_ADD_LEN_I:%.*]] = add nsw i64 -1, [[TMP24]] // CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2 // CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP25]], align 8 -// CHECK1-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds ptr, ptr [[TMP26]], i64 9 +// CHECK1-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP26]], i64 9 // CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[ARRAYIDX2_I]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3_I:%.*]] = getelementptr inbounds i8, ptr [[TMP27]], i64 [[LB_ADD_LEN_I]] +// CHECK1-NEXT: [[ARRAYIDX3_I:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP27]], i64 [[LB_ADD_LEN_I]] // CHECK1-NEXT: [[TMP28:%.*]] = ptrtoint ptr [[ARRAYIDX3_I]] to i64 // CHECK1-NEXT: [[TMP29:%.*]] = ptrtoint ptr [[TMP20]] to i64 // CHECK1-NEXT: [[TMP30:%.*]] = sub i64 [[TMP28]], [[TMP29]] diff --git a/clang/test/OpenMP/parallel_master_taskloop_reduction_codegen.cpp b/clang/test/OpenMP/parallel_master_taskloop_reduction_codegen.cpp index 3c74aaca3f46d..1d106922435d5 100644 --- a/clang/test/OpenMP/parallel_master_taskloop_reduction_codegen.cpp +++ b/clang/test/OpenMP/parallel_master_taskloop_reduction_codegen.cpp @@ -84,9 +84,9 @@ sum = 0.0; // CHECK-DAG: store ptr @[[RED_COMB1:.+]], ptr [[TMP25]], // CHECK-DAG: [[TMP26:%.*]] = getelementptr inbounds nuw %struct.kmp_taskred_input_t, ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6 // CHECK-DAG: call void @llvm.memset.p0.i64(ptr align 8 [[TMP26]], i8 0, i64 4, i1 false) -// CHECK-DAG: [[ARRAYIDX5:%.*]] = getelementptr inbounds [100 x %struct.S], ptr [[C:%.+]], i64 0, i64 0 +// CHECK-DAG: [[ARRAYIDX5:%.*]] = getelementptr inbounds nuw [100 x %struct.S], ptr [[C:%.+]], i64 0, i64 0 // CHECK-DAG: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, % -// CHECK-DAG: [[ARRAYIDX6:%.*]] = getelementptr inbounds [100 x %struct.S], ptr [[C]], i64 0, i64 [[LB_ADD_LEN]] +// CHECK-DAG: [[ARRAYIDX6:%.*]] = getelementptr inbounds nuw [100 x %struct.S], ptr [[C]], i64 0, i64 [[LB_ADD_LEN]] // CHECK-DAG: store ptr [[ARRAYIDX5]], ptr [[TMP28:%[^,]+]], // CHECK-DAG: [[TMP28]] = getelementptr inbounds nuw %struct.kmp_taskred_input_t, ptr [[DOTRD_INPUT_GEP_4:%.+]], i32 0, i32 0 // CHECK-DAG: store ptr [[ARRAYIDX5]], ptr [[TMP28:%[^,]+]], @@ -138,10 +138,10 @@ sum = 0.0; // CHECK-DAG: store ptr @[[RED_COMB4:.+]], ptr [[TMP59]], // CHECK-DAG: [[TMP60:%.*]] = getelementptr inbounds nuw %struct.kmp_taskred_input_t, ptr [[DOTRD_INPUT_GEP_8]], i32 0, i32 6 // CHECK-DAG: store i32 1, ptr [[TMP60]], -// CHECK-DAG: [[DOTRD_INPUT_GEP_]] = getelementptr inbounds [4 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 -// CHECK-DAG: [[DOTRD_INPUT_GEP_4]] = getelementptr inbounds [4 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 -// CHECK-DAG: [[DOTRD_INPUT_GEP_7]] = getelementptr inbounds [4 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 -// CHECK-DAG: [[DOTRD_INPUT_GEP_8]] = getelementptr inbounds [4 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 +// CHECK-DAG: [[DOTRD_INPUT_GEP_]] = getelementptr inbounds nuw [4 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 +// CHECK-DAG: [[DOTRD_INPUT_GEP_4]] = getelementptr inbounds nuw [4 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 +// CHECK-DAG: [[DOTRD_INPUT_GEP_7]] = getelementptr inbounds nuw [4 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 +// CHECK-DAG: [[DOTRD_INPUT_GEP_8]] = getelementptr inbounds nuw [4 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 // CHECK: [[TMP62:%.*]] = call ptr @__kmpc_taskred_init(i32 [[TMP0:%.+]], i32 4, ptr [[DOTRD_INPUT_]]) // CHECK: [[TMP63:%.*]] = load i32, ptr [[N:%.+]], // CHECK: store i32 [[TMP63]], ptr [[DOTCAPTURE_EXPR_]], diff --git a/clang/test/OpenMP/parallel_master_taskloop_simd_reduction_codegen.cpp b/clang/test/OpenMP/parallel_master_taskloop_simd_reduction_codegen.cpp index 7d4216ddde6a3..9a524c3b94c6e 100644 --- a/clang/test/OpenMP/parallel_master_taskloop_simd_reduction_codegen.cpp +++ b/clang/test/OpenMP/parallel_master_taskloop_simd_reduction_codegen.cpp @@ -84,9 +84,9 @@ sum = 0.0; // CHECK-DAG: store ptr @[[RED_COMB1:.+]], ptr [[TMP25]], // CHECK-DAG: [[TMP26:%.*]] = getelementptr inbounds nuw %struct.kmp_taskred_input_t, ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6 // CHECK-DAG: call void @llvm.memset.p0.i64(ptr align 8 [[TMP26]], i8 0, i64 4, i1 false) -// CHECK-DAG: [[ARRAYIDX5:%.*]] = getelementptr inbounds [100 x %struct.S], ptr [[C:%.+]], i64 0, i64 0 +// CHECK-DAG: [[ARRAYIDX5:%.*]] = getelementptr inbounds nuw [100 x %struct.S], ptr [[C:%.+]], i64 0, i64 0 // CHECK-DAG: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, % -// CHECK-DAG: [[ARRAYIDX6:%.*]] = getelementptr inbounds [100 x %struct.S], ptr [[C]], i64 0, i64 [[LB_ADD_LEN]] +// CHECK-DAG: [[ARRAYIDX6:%.*]] = getelementptr inbounds nuw [100 x %struct.S], ptr [[C]], i64 0, i64 [[LB_ADD_LEN]] // CHECK-DAG: store ptr [[ARRAYIDX5]], ptr [[TMP28:%[^,]+]], // CHECK-DAG: [[TMP28]] = getelementptr inbounds nuw %struct.kmp_taskred_input_t, ptr [[DOTRD_INPUT_GEP_4:%.+]], i32 0, i32 0 // CHECK-DAG: store ptr [[ARRAYIDX5]], ptr [[TMP28:%[^,]+]], @@ -138,10 +138,10 @@ sum = 0.0; // CHECK-DAG: store ptr @[[RED_COMB4:.+]], ptr [[TMP59]], // CHECK-DAG: [[TMP60:%.*]] = getelementptr inbounds nuw %struct.kmp_taskred_input_t, ptr [[DOTRD_INPUT_GEP_8]], i32 0, i32 6 // CHECK-DAG: store i32 1, ptr [[TMP60]], -// CHECK-DAG: [[DOTRD_INPUT_GEP_]] = getelementptr inbounds [4 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 -// CHECK-DAG: [[DOTRD_INPUT_GEP_4]] = getelementptr inbounds [4 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 -// CHECK-DAG: [[DOTRD_INPUT_GEP_7]] = getelementptr inbounds [4 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 -// CHECK-DAG: [[DOTRD_INPUT_GEP_8]] = getelementptr inbounds [4 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 +// CHECK-DAG: [[DOTRD_INPUT_GEP_]] = getelementptr inbounds nuw [4 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 +// CHECK-DAG: [[DOTRD_INPUT_GEP_4]] = getelementptr inbounds nuw [4 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 +// CHECK-DAG: [[DOTRD_INPUT_GEP_7]] = getelementptr inbounds nuw [4 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 +// CHECK-DAG: [[DOTRD_INPUT_GEP_8]] = getelementptr inbounds nuw [4 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 // CHECK: [[TMP62:%.*]] = call ptr @__kmpc_taskred_init(i32 [[TMP0:%.+]], i32 4, ptr [[DOTRD_INPUT_]]) // CHECK: [[TMP63:%.*]] = load i32, ptr [[N:%.+]], // CHECK: store i32 [[TMP63]], ptr [[DOTCAPTURE_EXPR_]], diff --git a/clang/test/OpenMP/parallel_reduction_codegen.cpp b/clang/test/OpenMP/parallel_reduction_codegen.cpp index f49faa6b89deb..ce76429b871fe 100644 --- a/clang/test/OpenMP/parallel_reduction_codegen.cpp +++ b/clang/test/OpenMP/parallel_reduction_codegen.cpp @@ -354,9 +354,9 @@ int main() { // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP0]], i64 0 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i16, ptr [[TMP0]], i64 0 // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[X_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i64 0 +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw i16, ptr [[TMP1]], i64 0 // CHECK1-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 // CHECK1-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 // CHECK1-NEXT: [[TMP4:%.*]] = sub i64 [[TMP2]], [[TMP3]] @@ -1632,9 +1632,9 @@ int main() { // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 8 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR]], align 8 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP0]], i64 0 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i16, ptr [[TMP0]], i64 0 // CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[X_ADDR]], align 8 -// CHECK3-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i64 0 +// CHECK3-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw i16, ptr [[TMP1]], i64 0 // CHECK3-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 // CHECK3-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 // CHECK3-NEXT: [[TMP4:%.*]] = sub i64 [[TMP2]], [[TMP3]] @@ -2142,9 +2142,9 @@ int main() { // CHECK4-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 // CHECK4-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 8 // CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR]], align 8 -// CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP0]], i64 0 +// CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i16, ptr [[TMP0]], i64 0 // CHECK4-NEXT: [[TMP1:%.*]] = load ptr, ptr [[X_ADDR]], align 8 -// CHECK4-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i64 0 +// CHECK4-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw i16, ptr [[TMP1]], i64 0 // CHECK4-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 // CHECK4-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 // CHECK4-NEXT: [[TMP4:%.*]] = sub i64 [[TMP2]], [[TMP3]] diff --git a/clang/test/OpenMP/parallel_reduction_task_codegen.cpp b/clang/test/OpenMP/parallel_reduction_task_codegen.cpp index 208f7a41aa3db..40cc3103b1c0f 100644 --- a/clang/test/OpenMP/parallel_reduction_task_codegen.cpp +++ b/clang/test/OpenMP/parallel_reduction_task_codegen.cpp @@ -72,16 +72,16 @@ int main(int argc, char **argv) { // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[ARGC1]], align 4 // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i64 0 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP1]], i64 0 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 -// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 0 +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP2]], i64 0 // CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = sext i32 [[TMP3]] to i64 // CHECK1-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP4]] // CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds ptr, ptr [[TMP5]], i64 9 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP5]], i64 9 // CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[ARRAYIDX3]], align 8 -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i64 [[LB_ADD_LEN]] +// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP6]], i64 [[LB_ADD_LEN]] // CHECK1-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[ARRAYIDX4]] to i64 // CHECK1-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[ARRAYIDX2]] to i64 // CHECK1-NEXT: [[TMP9:%.*]] = sub i64 [[TMP7]], [[TMP8]] @@ -111,7 +111,7 @@ int main(int argc, char **argv) { // CHECK1-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP20]] // CHECK1-NEXT: store ptr [[_TMP5]], ptr [[TMP]], align 8 // CHECK1-NEXT: store ptr [[TMP21]], ptr [[_TMP5]], align 8 -// CHECK1-NEXT: [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 0 +// CHECK1-NEXT: [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds nuw [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 0 // CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP22]], align 8 // CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1 @@ -126,19 +126,19 @@ int main(int argc, char **argv) { // CHECK1-NEXT: store ptr @.red_comb., ptr [[TMP27]], align 8 // CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6 // CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP28]], i8 0, i64 4, i1 false) -// CHECK1-NEXT: [[DOTRD_INPUT_GEP_6:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1 +// CHECK1-NEXT: [[DOTRD_INPUT_GEP_6:%.*]] = getelementptr inbounds nuw [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1 // CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 0 // CHECK1-NEXT: [[TMP30:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds ptr, ptr [[TMP30]], i64 0 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP30]], i64 0 // CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[ARRAYIDX7]], align 8 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i8, ptr [[TMP31]], i64 0 +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP31]], i64 0 // CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP0]], align 4 // CHECK1-NEXT: [[TMP33:%.*]] = sext i32 [[TMP32]] to i64 // CHECK1-NEXT: [[LB_ADD_LEN9:%.*]] = add nsw i64 -1, [[TMP33]] // CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds ptr, ptr [[TMP34]], i64 9 +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP34]], i64 9 // CHECK1-NEXT: [[TMP35:%.*]] = load ptr, ptr [[ARRAYIDX10]], align 8 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i8, ptr [[TMP35]], i64 [[LB_ADD_LEN9]] +// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP35]], i64 [[LB_ADD_LEN9]] // CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP29]], align 8 // CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 1 // CHECK1-NEXT: store ptr [[ARRAYIDX8]], ptr [[TMP36]], align 8 @@ -416,9 +416,9 @@ int main(int argc, char **argv) { // CHECK1-NEXT: [[LB_ADD_LEN_I:%.*]] = add nsw i64 -1, [[TMP24]] // CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2 // CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP25]], align 8 -// CHECK1-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds ptr, ptr [[TMP26]], i64 9 +// CHECK1-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP26]], i64 9 // CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[ARRAYIDX2_I]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3_I:%.*]] = getelementptr inbounds i8, ptr [[TMP27]], i64 [[LB_ADD_LEN_I]] +// CHECK1-NEXT: [[ARRAYIDX3_I:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP27]], i64 [[LB_ADD_LEN_I]] // CHECK1-NEXT: [[TMP28:%.*]] = ptrtoint ptr [[ARRAYIDX3_I]] to i64 // CHECK1-NEXT: [[TMP29:%.*]] = ptrtoint ptr [[TMP20]] to i64 // CHECK1-NEXT: [[TMP30:%.*]] = sub i64 [[TMP28]], [[TMP29]] diff --git a/clang/test/OpenMP/parallel_sections_reduction_task_codegen.cpp b/clang/test/OpenMP/parallel_sections_reduction_task_codegen.cpp index 6d73652c3ea27..61597a074cf59 100644 --- a/clang/test/OpenMP/parallel_sections_reduction_task_codegen.cpp +++ b/clang/test/OpenMP/parallel_sections_reduction_task_codegen.cpp @@ -81,16 +81,16 @@ int main(int argc, char **argv) { // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_IL_]], align 4 // CHECK1-NEXT: store i32 0, ptr [[ARGC1]], align 4 // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i64 0 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP1]], i64 0 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 -// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 0 +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP2]], i64 0 // CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = sext i32 [[TMP3]] to i64 // CHECK1-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP4]] // CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds ptr, ptr [[TMP5]], i64 9 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP5]], i64 9 // CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[ARRAYIDX3]], align 8 -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i64 [[LB_ADD_LEN]] +// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP6]], i64 [[LB_ADD_LEN]] // CHECK1-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[ARRAYIDX4]] to i64 // CHECK1-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[ARRAYIDX2]] to i64 // CHECK1-NEXT: [[TMP9:%.*]] = sub i64 [[TMP7]], [[TMP8]] @@ -120,7 +120,7 @@ int main(int argc, char **argv) { // CHECK1-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP20]] // CHECK1-NEXT: store ptr [[_TMP5]], ptr [[TMP]], align 8 // CHECK1-NEXT: store ptr [[TMP21]], ptr [[_TMP5]], align 8 -// CHECK1-NEXT: [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 0 +// CHECK1-NEXT: [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds nuw [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 0 // CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP22]], align 8 // CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1 @@ -135,19 +135,19 @@ int main(int argc, char **argv) { // CHECK1-NEXT: store ptr @.red_comb., ptr [[TMP27]], align 8 // CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6 // CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP28]], i8 0, i64 4, i1 false) -// CHECK1-NEXT: [[DOTRD_INPUT_GEP_6:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1 +// CHECK1-NEXT: [[DOTRD_INPUT_GEP_6:%.*]] = getelementptr inbounds nuw [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1 // CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 0 // CHECK1-NEXT: [[TMP30:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds ptr, ptr [[TMP30]], i64 0 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP30]], i64 0 // CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[ARRAYIDX7]], align 8 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i8, ptr [[TMP31]], i64 0 +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP31]], i64 0 // CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP0]], align 4 // CHECK1-NEXT: [[TMP33:%.*]] = sext i32 [[TMP32]] to i64 // CHECK1-NEXT: [[LB_ADD_LEN9:%.*]] = add nsw i64 -1, [[TMP33]] // CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds ptr, ptr [[TMP34]], i64 9 +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP34]], i64 9 // CHECK1-NEXT: [[TMP35:%.*]] = load ptr, ptr [[ARRAYIDX10]], align 8 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i8, ptr [[TMP35]], i64 [[LB_ADD_LEN9]] +// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP35]], i64 [[LB_ADD_LEN9]] // CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP29]], align 8 // CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 1 // CHECK1-NEXT: store ptr [[ARRAYIDX8]], ptr [[TMP36]], align 8 @@ -458,9 +458,9 @@ int main(int argc, char **argv) { // CHECK1-NEXT: [[LB_ADD_LEN_I:%.*]] = add nsw i64 -1, [[TMP24]] // CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2 // CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP25]], align 8 -// CHECK1-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds ptr, ptr [[TMP26]], i64 9 +// CHECK1-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP26]], i64 9 // CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[ARRAYIDX2_I]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3_I:%.*]] = getelementptr inbounds i8, ptr [[TMP27]], i64 [[LB_ADD_LEN_I]] +// CHECK1-NEXT: [[ARRAYIDX3_I:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP27]], i64 [[LB_ADD_LEN_I]] // CHECK1-NEXT: [[TMP28:%.*]] = ptrtoint ptr [[ARRAYIDX3_I]] to i64 // CHECK1-NEXT: [[TMP29:%.*]] = ptrtoint ptr [[TMP20]] to i64 // CHECK1-NEXT: [[TMP30:%.*]] = sub i64 [[TMP28]], [[TMP29]] diff --git a/clang/test/OpenMP/reduction_implicit_map.cpp b/clang/test/OpenMP/reduction_implicit_map.cpp index 4d2b93ffd4712..a7db3da7d1f86 100644 --- a/clang/test/OpenMP/reduction_implicit_map.cpp +++ b/clang/test/OpenMP/reduction_implicit_map.cpp @@ -133,9 +133,9 @@ int main() // CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 // CHECK-NEXT: store ptr [[E]], ptr [[E_ADDR]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[E_ADDR]], align 8 -// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i64 0 +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw double, ptr [[TMP0]], i64 0 // CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[E_ADDR]], align 8 -// CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds double, ptr [[TMP1]], i64 0 +// CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw double, ptr [[TMP1]], i64 0 // CHECK-NEXT: store double 0.000000e+00, ptr [[E2]], align 8 // CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[E_ADDR]], align 8 // CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[TMP2]] to i64 @@ -529,16 +529,16 @@ int main() // CHECK1-NEXT: store i64 9, ptr [[DOTOMP_UB]], align 8 // CHECK1-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x [10 x double]]], ptr [[TMP0]], i64 0, i64 0 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [10 x [10 x [10 x double]]], ptr [[TMP0]], i64 0, i64 0 // CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [10 x [10 x double]], ptr [[ARRAYIDX]], i64 0, i64 0 -// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYDECAY]], i64 2 +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw [10 x double], ptr [[ARRAYDECAY]], i64 2 // CHECK1-NEXT: [[ARRAYDECAY2:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX1]], i64 0, i64 0 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY2]], i64 1 -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [10 x [10 x [10 x double]]], ptr [[TMP0]], i64 0, i64 1 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw double, ptr [[ARRAYDECAY2]], i64 1 +// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw [10 x [10 x [10 x double]]], ptr [[TMP0]], i64 0, i64 1 // CHECK1-NEXT: [[ARRAYDECAY5:%.*]] = getelementptr inbounds [10 x [10 x double]], ptr [[ARRAYIDX4]], i64 0, i64 0 -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYDECAY5]], i64 5 +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds nuw [10 x double], ptr [[ARRAYDECAY5]], i64 5 // CHECK1-NEXT: [[ARRAYDECAY7:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX6]], i64 0, i64 0 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY7]], i64 1 +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds nuw double, ptr [[ARRAYDECAY7]], i64 1 // CHECK1-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[ARRAYIDX8]] to i64 // CHECK1-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[ARRAYIDX3]] to i64 // CHECK1-NEXT: [[TMP3:%.*]] = sub i64 [[TMP1]], [[TMP2]] @@ -564,18 +564,18 @@ int main() // CHECK1-NEXT: [[TMP11:%.*]] = sub i64 [[TMP9]], [[TMP10]] // CHECK1-NEXT: [[TMP12:%.*]] = sdiv exact i64 [[TMP11]], ptrtoint (ptr getelementptr (double, ptr null, i32 1) to i64) // CHECK1-NEXT: [[TMP13:%.*]] = getelementptr double, ptr [[VLA]], i64 [[TMP12]] -// CHECK1-NEXT: [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds [1 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 0 +// CHECK1-NEXT: [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds nuw [1 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 0 // CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 0 -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x [10 x [10 x double]]], ptr [[TMP0]], i64 0, i64 0 +// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds nuw [10 x [10 x [10 x double]]], ptr [[TMP0]], i64 0, i64 0 // CHECK1-NEXT: [[ARRAYDECAY10:%.*]] = getelementptr inbounds [10 x [10 x double]], ptr [[ARRAYIDX9]], i64 0, i64 0 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYDECAY10]], i64 2 +// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds nuw [10 x double], ptr [[ARRAYDECAY10]], i64 2 // CHECK1-NEXT: [[ARRAYDECAY12:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX11]], i64 0, i64 0 -// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY12]], i64 1 -// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x [10 x [10 x double]]], ptr [[TMP0]], i64 0, i64 1 +// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds nuw double, ptr [[ARRAYDECAY12]], i64 1 +// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds nuw [10 x [10 x [10 x double]]], ptr [[TMP0]], i64 0, i64 1 // CHECK1-NEXT: [[ARRAYDECAY15:%.*]] = getelementptr inbounds [10 x [10 x double]], ptr [[ARRAYIDX14]], i64 0, i64 0 -// CHECK1-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYDECAY15]], i64 5 +// CHECK1-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds nuw [10 x double], ptr [[ARRAYDECAY15]], i64 5 // CHECK1-NEXT: [[ARRAYDECAY17:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX16]], i64 0, i64 0 -// CHECK1-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY17]], i64 1 +// CHECK1-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds nuw double, ptr [[ARRAYDECAY17]], i64 1 // CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP14]], align 8 // CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1 // CHECK1-NEXT: store ptr [[ARRAYIDX13]], ptr [[TMP15]], align 8 @@ -852,7 +852,7 @@ int main() // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0 // CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[INPUT_ADDR]], align 4 // CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[INPUT_ADDR]], align 4 -// CHECK2-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0 +// CHECK2-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP7]], i32 0 // CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4 // CHECK2-NEXT: [[TMP9:%.*]] = mul nuw i32 [[TMP8]], 4 // CHECK2-NEXT: [[TMP10:%.*]] = sext i32 [[TMP9]] to i64 @@ -930,10 +930,10 @@ int main() // CHECK2-NEXT: [[TMP46:%.*]] = load ptr, ptr [[INPUT_ADDR]], align 4 // CHECK2-NEXT: [[TMP47:%.*]] = load ptr, ptr [[OUTPUT_ADDR]], align 4 // CHECK2-NEXT: [[TMP48:%.*]] = load ptr, ptr [[OUTPUT_ADDR]], align 4 -// CHECK2-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[TMP48]], i32 0 +// CHECK2-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP48]], i32 0 // CHECK2-NEXT: [[TMP49:%.*]] = load ptr, ptr [[INPUT_ADDR]], align 4 // CHECK2-NEXT: [[TMP50:%.*]] = load ptr, ptr [[INPUT_ADDR]], align 4 -// CHECK2-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[TMP50]], i32 0 +// CHECK2-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP50]], i32 0 // CHECK2-NEXT: [[TMP51:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4 // CHECK2-NEXT: [[TMP52:%.*]] = mul nuw i32 [[TMP51]], 4 // CHECK2-NEXT: [[TMP53:%.*]] = sext i32 [[TMP52]] to i64 @@ -1007,7 +1007,7 @@ int main() // CHECK2-NEXT: [[TMP86:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP86]], ptr [[SIZE_CASTED21]], align 4 // CHECK2-NEXT: [[TMP87:%.*]] = load i32, ptr [[SIZE_CASTED21]], align 4 -// CHECK2-NEXT: [[ARRAYIDX22:%.*]] = getelementptr inbounds [10 x i32], ptr [[A]], i32 0, i32 0 +// CHECK2-NEXT: [[ARRAYIDX22:%.*]] = getelementptr inbounds nuw [10 x i32], ptr [[A]], i32 0, i32 0 // CHECK2-NEXT: [[TMP88:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 0 // CHECK2-NEXT: store i32 [[TMP87]], ptr [[TMP88]], align 4 // CHECK2-NEXT: [[TMP89:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS24]], i32 0, i32 0 @@ -1480,9 +1480,9 @@ int main() // CHECK2-NEXT: store ptr [[OUTPUT]], ptr [[OUTPUT_ADDR]], align 4 // CHECK2-NEXT: store ptr [[INPUT]], ptr [[INPUT_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[OUTPUT_ADDR]], align 4 -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP0]], i32 0 // CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[OUTPUT_ADDR]], align 4 -// CHECK2-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 2 +// CHECK2-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP1]], i32 2 // CHECK2-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [3 x i32], ptr [[OUTPUT2]], i32 0, i32 0 // CHECK2-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i32 3 // CHECK2-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP2]] @@ -1664,9 +1664,9 @@ int main() // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[OUTPUT_ADDR]], align 4 -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0 +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP6]], i32 0 // CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[OUTPUT_ADDR]], align 4 -// CHECK2-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 2 +// CHECK2-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP7]], i32 2 // CHECK2-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [3 x i32], ptr [[OUTPUT4]], i32 0, i32 0 // CHECK2-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i32 3 // CHECK2-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP8]] @@ -1878,8 +1878,8 @@ int main() // CHECK2-NEXT: store i32 [[SIZE]], ptr [[SIZE_ADDR]], align 4 // CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 0 -// CHECK2-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [10 x i32], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw [10 x i32], ptr [[TMP0]], i32 0, i32 1 // CHECK2-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x i32], ptr [[A2]], i32 0, i32 0 // CHECK2-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i32 2 // CHECK2-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP1]] diff --git a/clang/test/OpenMP/sections_reduction_task_codegen.cpp b/clang/test/OpenMP/sections_reduction_task_codegen.cpp index 1a2cf7aede321..5d749eeb81776 100644 --- a/clang/test/OpenMP/sections_reduction_task_codegen.cpp +++ b/clang/test/OpenMP/sections_reduction_task_codegen.cpp @@ -82,16 +82,16 @@ int main(int argc, char **argv) { // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_IL_]], align 4 // CHECK1-NEXT: store i32 0, ptr [[ARGC1]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 0 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP2]], i64 0 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 -// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 0 +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP3]], i64 0 // CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 // CHECK1-NEXT: [[TMP5:%.*]] = sext i32 [[TMP4]] to i64 // CHECK1-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP5]] // CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds ptr, ptr [[TMP6]], i64 9 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP6]], i64 9 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[ARRAYIDX3]], align 8 -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i64 [[LB_ADD_LEN]] +// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP7]], i64 [[LB_ADD_LEN]] // CHECK1-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[ARRAYIDX4]] to i64 // CHECK1-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[ARRAYIDX2]] to i64 // CHECK1-NEXT: [[TMP10:%.*]] = sub i64 [[TMP8]], [[TMP9]] @@ -121,7 +121,7 @@ int main(int argc, char **argv) { // CHECK1-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP21]] // CHECK1-NEXT: store ptr [[_TMP5]], ptr [[TMP]], align 8 // CHECK1-NEXT: store ptr [[TMP22]], ptr [[_TMP5]], align 8 -// CHECK1-NEXT: [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 0 +// CHECK1-NEXT: [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds nuw [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 0 // CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP23]], align 8 // CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1 @@ -136,19 +136,19 @@ int main(int argc, char **argv) { // CHECK1-NEXT: store ptr @.red_comb., ptr [[TMP28]], align 8 // CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6 // CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP29]], i8 0, i64 4, i1 false) -// CHECK1-NEXT: [[DOTRD_INPUT_GEP_6:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1 +// CHECK1-NEXT: [[DOTRD_INPUT_GEP_6:%.*]] = getelementptr inbounds nuw [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1 // CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 0 // CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds ptr, ptr [[TMP31]], i64 0 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP31]], i64 0 // CHECK1-NEXT: [[TMP32:%.*]] = load ptr, ptr [[ARRAYIDX7]], align 8 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i8, ptr [[TMP32]], i64 0 +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP32]], i64 0 // CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP0]], align 4 // CHECK1-NEXT: [[TMP34:%.*]] = sext i32 [[TMP33]] to i64 // CHECK1-NEXT: [[LB_ADD_LEN9:%.*]] = add nsw i64 -1, [[TMP34]] // CHECK1-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds ptr, ptr [[TMP35]], i64 9 +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP35]], i64 9 // CHECK1-NEXT: [[TMP36:%.*]] = load ptr, ptr [[ARRAYIDX10]], align 8 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i8, ptr [[TMP36]], i64 [[LB_ADD_LEN9]] +// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP36]], i64 [[LB_ADD_LEN9]] // CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP30]], align 8 // CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 1 // CHECK1-NEXT: store ptr [[ARRAYIDX8]], ptr [[TMP37]], align 8 @@ -463,9 +463,9 @@ int main(int argc, char **argv) { // CHECK1-NEXT: [[LB_ADD_LEN_I:%.*]] = add nsw i64 -1, [[TMP24]] // CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2 // CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP25]], align 8 -// CHECK1-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds ptr, ptr [[TMP26]], i64 9 +// CHECK1-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP26]], i64 9 // CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[ARRAYIDX2_I]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3_I:%.*]] = getelementptr inbounds i8, ptr [[TMP27]], i64 [[LB_ADD_LEN_I]] +// CHECK1-NEXT: [[ARRAYIDX3_I:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP27]], i64 [[LB_ADD_LEN_I]] // CHECK1-NEXT: [[TMP28:%.*]] = ptrtoint ptr [[ARRAYIDX3_I]] to i64 // CHECK1-NEXT: [[TMP29:%.*]] = ptrtoint ptr [[TMP20]] to i64 // CHECK1-NEXT: [[TMP30:%.*]] = sub i64 [[TMP28]], [[TMP29]] diff --git a/clang/test/OpenMP/target_data_use_device_addr_codegen.cpp b/clang/test/OpenMP/target_data_use_device_addr_codegen.cpp index d912f801a33db..19a523ee165c8 100644 --- a/clang/test/OpenMP/target_data_use_device_addr_codegen.cpp +++ b/clang/test/OpenMP/target_data_use_device_addr_codegen.cpp @@ -54,12 +54,12 @@ int main() { // CHECK: [[SIZES:%.+]] = alloca [6 x i64], // CHECK: [[VLA_ADDR:%.+]] = alloca float, i64 %{{.+}}, // CHECK: [[PTR:%.+]] = load ptr, ptr [[PTR_ADDR]], -// CHECK-NEXT: [[ARR_IDX:%.+]] = getelementptr inbounds float, ptr [[PTR]], i64 3 +// CHECK-NEXT: [[ARR_IDX:%.+]] = getelementptr inbounds nuw float, ptr [[PTR]], i64 3 // CHECK: [[P5:%.+]] = load ptr, ptr [[PTR_ADDR]], align 8 // CHECK-NEXT: [[ARR_IDX1:%.+]] = getelementptr inbounds float, ptr [[P5]], i64 0 // CHECK: [[P7:%.+]] = load ptr, ptr [[REF_ADDR]], // CHECK-NEXT: [[REF:%.+]] = load ptr, ptr [[REF_ADDR]], -// CHECK-NEXT: [[ARR_IDX2:%.+]] = getelementptr inbounds [4 x float], ptr [[ARR_ADDR]], i64 0, i64 0 +// CHECK-NEXT: [[ARR_IDX2:%.+]] = getelementptr inbounds nuw [4 x float], ptr [[ARR_ADDR]], i64 0, i64 0 // CHECK: [[P10:%.+]] = mul nuw i64 {{.+}}, 4 // CHECK-NEXT: [[ARR_IDX5:%.+]] = getelementptr inbounds float, ptr [[VLA_ADDR]], i64 0 // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[SIZES]], ptr align 8 [[SIZES1]], i64 48, i1 false) @@ -132,14 +132,14 @@ int main() { // CHECK: [[SIZES:%.+]] = alloca [6 x i64], // CHECK: [[A_ADDR:%.+]] = getelementptr inbounds nuw %struct.S, ptr [[THIS:%.+]], i32 0, i32 0 // CHECK: [[PTR_ADDR:%.+]] = getelementptr inbounds nuw %struct.S, ptr [[THIS]], i32 0, i32 1 -// CHECK: [[ARR_IDX:%.+]] = getelementptr inbounds i32, ptr %{{.+}}, i64 3 +// CHECK: [[ARR_IDX:%.+]] = getelementptr inbounds nuw i32, ptr %{{.+}}, i64 3 // CHECK: [[REF_REF:%.+]] = getelementptr inbounds nuw %struct.S, ptr [[THIS]], i32 0, i32 2 // CHECK: [[REF_PTR:%.+]] = load ptr, ptr [[REF_REF]], // CHECK-NEXT: [[P3:%.+]] = getelementptr inbounds nuw %struct.S, ptr [[THIS]], i32 0, i32 1 // CHECK: [[ARR_IDX5:%.+]] = getelementptr inbounds i32, ptr {{.+}}, i64 0 // CHECK: [[ARR_ADDR:%.+]] = getelementptr inbounds nuw %struct.S, ptr [[THIS]], i32 0, i32 3 -// CHECK: [[ARR_IDX6:%.+]] = getelementptr inbounds [4 x i32], ptr [[ARR_ADDR]], i64 0, i64 0 +// CHECK: [[ARR_IDX6:%.+]] = getelementptr inbounds nuw [4 x i32], ptr [[ARR_ADDR]], i64 0, i64 0 // CHECK: [[A_ADDR2:%.+]] = getelementptr inbounds nuw %struct.S, ptr [[THIS]], i32 0, i32 0 // CHECK: [[P4:%.+]] = mul nuw i64 [[CONV:%.+]], 4 // CHECK: [[A_ADDR3:%.+]] = getelementptr inbounds nuw %struct.S, ptr [[THIS]], i32 0, i32 0 @@ -147,7 +147,7 @@ int main() { // CHECK: [[L6:%.+]] = sext i32 [[L5]] to i64 // CHECK: [[LB_ADD_LEN:%lb_add_len]] = add nsw i64 -1, [[L6]] // CHECK: [[ARR_ADDR9:%.+]] = getelementptr inbounds nuw %struct.S, ptr [[THIS]], i32 0, i32 3 -// CHECK: [[ARR_IDX10:%arrayidx.+]] = getelementptr inbounds [4 x i32], ptr [[ARR_ADDR9]], i64 0, i64 %lb_add_len +// CHECK: [[ARR_IDX10:%arrayidx.+]] = getelementptr inbounds nuw [4 x i32], ptr [[ARR_ADDR9]], i64 0, i64 %lb_add_len // CHECK: [[ARR_END:%.+]] = getelementptr i32, ptr [[ARR_IDX10]], i32 1 // CHECK: [[E:%.+]] = ptrtoint ptr [[ARR_END]] to i64 // CHECK: [[B:%.+]] = ptrtoint ptr [[A_ADDR]] to i64 diff --git a/clang/test/OpenMP/target_data_use_device_ptr_codegen.cpp b/clang/test/OpenMP/target_data_use_device_ptr_codegen.cpp index c90819dc2a22f..6d1c0213d648c 100644 --- a/clang/test/OpenMP/target_data_use_device_ptr_codegen.cpp +++ b/clang/test/OpenMP/target_data_use_device_ptr_codegen.cpp @@ -49,14 +49,14 @@ void foo(float *&lr, T *&tr) { // CK1-NOT: store ptr [[VAL]], ptr [[DECL]], // CK1: store ptr [[VAL]], ptr [[PVT:%.+]], // CK1: [[TT:%.+]] = load ptr, ptr [[PVT]], - // CK1: getelementptr inbounds double, ptr [[TT]], i32 1 + // CK1: getelementptr inbounds nuw double, ptr [[TT]], i32 1 #pragma omp target data map(g[:10]) use_device_ptr(g) { ++g; } // CK1: call void @__tgt_target_data_end{{.+}}[[MTYPE00]] // CK1: [[TTT:%.+]] = load ptr, ptr [[DECL]], - // CK1: getelementptr inbounds double, ptr [[TTT]], i32 1 + // CK1: getelementptr inbounds nuw double, ptr [[TTT]], i32 1 ++g; // CK1: [[T1:%.+]] = load ptr, ptr [[DECL:%.+]], @@ -67,26 +67,26 @@ void foo(float *&lr, T *&tr) { // CK1-NOT: store ptr [[VAL]], ptr [[DECL]], // CK1: store ptr [[VAL]], ptr [[PVT:%.+]], // CK1: [[TT1:%.+]] = load ptr, ptr [[PVT]], - // CK1: getelementptr inbounds float, ptr [[TT1]], i32 1 + // CK1: getelementptr inbounds nuw float, ptr [[TT1]], i32 1 #pragma omp target data map(l[:10]) use_device_ptr(l) { ++l; } // CK1: call void @__tgt_target_data_end{{.+}}[[MTYPE01]] // CK1: [[TTT:%.+]] = load ptr, ptr [[DECL]], - // CK1: getelementptr inbounds float, ptr [[TTT]], i32 1 + // CK1: getelementptr inbounds nuw float, ptr [[TTT]], i32 1 ++l; // CK1-NOT: call void @__tgt_target // CK1: [[TTT:%.+]] = load ptr, ptr [[DECL]], - // CK1: getelementptr inbounds float, ptr [[TTT]], i32 1 + // CK1: getelementptr inbounds nuw float, ptr [[TTT]], i32 1 #pragma omp target data map(l[:10]) use_device_ptr(l) if(0) { ++l; } // CK1-NOT: call void @__tgt_target // CK1: [[TTT:%.+]] = load ptr, ptr [[DECL]], - // CK1: getelementptr inbounds float, ptr [[TTT]], i32 1 + // CK1: getelementptr inbounds nuw float, ptr [[TTT]], i32 1 ++l; // CK1: [[T1:%.+]] = load ptr, ptr [[DECL:%.+]], @@ -97,14 +97,14 @@ void foo(float *&lr, T *&tr) { // CK1-NOT: store ptr [[VAL]], ptr [[DECL]], // CK1: store ptr [[VAL]], ptr [[PVT:%.+]], // CK1: [[TT1:%.+]] = load ptr, ptr [[PVT]], - // CK1: getelementptr inbounds float, ptr [[TT1]], i32 1 + // CK1: getelementptr inbounds nuw float, ptr [[TT1]], i32 1 #pragma omp target data map(l[:10]) use_device_ptr(l) if(1) { ++l; } // CK1: call void @__tgt_target_data_end{{.+}}[[MTYPE03]] // CK1: [[TTT:%.+]] = load ptr, ptr [[DECL]], - // CK1: getelementptr inbounds float, ptr [[TTT]], i32 1 + // CK1: getelementptr inbounds nuw float, ptr [[TTT]], i32 1 ++l; // CK1: [[CMP:%.+]] = icmp ne ptr %{{.+}}, null @@ -119,12 +119,12 @@ void foo(float *&lr, T *&tr) { // CK1-NOT: store ptr [[VAL]], ptr [[DECL]], // CK1: store ptr [[VAL]], ptr [[PVT:%.+]], // CK1: [[TT1:%.+]] = load ptr, ptr [[PVT]], - // CK1: getelementptr inbounds float, ptr [[TT1]], i32 1 + // CK1: getelementptr inbounds nuw float, ptr [[TT1]], i32 1 // CK1: br label %[[BEND:.+]] // CK1: [[BELSE]]: // CK1: [[TTT:%.+]] = load ptr, ptr [[DECL]], - // CK1: getelementptr inbounds float, ptr [[TTT]], i32 1 + // CK1: getelementptr inbounds nuw float, ptr [[TTT]], i32 1 // CK1: br label %[[BEND]] #pragma omp target data map(l[:10]) use_device_ptr(l) if(lr != 0) { @@ -142,7 +142,7 @@ void foo(float *&lr, T *&tr) { // CK1: [[BEND]]: // CK1: [[TTT:%.+]] = load ptr, ptr [[DECL]], - // CK1: getelementptr inbounds float, ptr [[TTT]], i32 1 + // CK1: getelementptr inbounds nuw float, ptr [[TTT]], i32 1 ++l; // CK1: [[T2:%.+]] = load ptr, ptr [[DECL:%.+]], @@ -156,7 +156,7 @@ void foo(float *&lr, T *&tr) { // CK1: store ptr [[PVTV]], ptr [[PVT:%.+]], // CK1: [[TT1:%.+]] = load ptr, ptr [[PVT]], // CK1: [[TT2:%.+]] = load ptr, ptr [[TT1]], - // CK1: getelementptr inbounds float, ptr [[TT2]], i32 1 + // CK1: getelementptr inbounds nuw float, ptr [[TT2]], i32 1 #pragma omp target data map(lr[:10]) use_device_ptr(lr) { ++lr; @@ -164,7 +164,7 @@ void foo(float *&lr, T *&tr) { // CK1: call void @__tgt_target_data_end{{.+}}[[MTYPE05]] // CK1: [[TTT:%.+]] = load ptr, ptr [[DECL]], // CK1: [[TTTT:%.+]] = load ptr, ptr [[TTT]], - // CK1: getelementptr inbounds float, ptr [[TTTT]], i32 1 + // CK1: getelementptr inbounds nuw float, ptr [[TTTT]], i32 1 ++lr; // CK1: [[T1:%.+]] = load ptr, ptr [[DECL:%.+]], @@ -175,14 +175,14 @@ void foo(float *&lr, T *&tr) { // CK1-NOT: store ptr [[VAL]], ptr [[DECL]], // CK1: store ptr [[VAL]], ptr [[PVT:%.+]], // CK1: [[TT1:%.+]] = load ptr, ptr [[PVT]], - // CK1: getelementptr inbounds i32, ptr [[TT1]], i32 1 + // CK1: getelementptr inbounds nuw i32, ptr [[TT1]], i32 1 #pragma omp target data map(t[:10]) use_device_ptr(t) { ++t; } // CK1: call void @__tgt_target_data_end{{.+}}[[MTYPE06]] // CK1: [[TTT:%.+]] = load ptr, ptr [[DECL]], - // CK1: getelementptr inbounds i32, ptr [[TTT]], i32 1 + // CK1: getelementptr inbounds nuw i32, ptr [[TTT]], i32 1 ++t; // CK1: [[T2:%.+]] = load ptr, ptr [[DECL:%.+]], @@ -196,7 +196,7 @@ void foo(float *&lr, T *&tr) { // CK1: store ptr [[PVTV]], ptr [[PVT:%.+]], // CK1: [[TT1:%.+]] = load ptr, ptr [[PVT]], // CK1: [[TT2:%.+]] = load ptr, ptr [[TT1]], - // CK1: getelementptr inbounds i32, ptr [[TT2]], i32 1 + // CK1: getelementptr inbounds nuw i32, ptr [[TT2]], i32 1 #pragma omp target data map(tr[:10]) use_device_ptr(tr) { ++tr; @@ -204,7 +204,7 @@ void foo(float *&lr, T *&tr) { // CK1: call void @__tgt_target_data_end{{.+}}[[MTYPE07]] // CK1: [[TTT:%.+]] = load ptr, ptr [[DECL]], // CK1: [[TTTT:%.+]] = load ptr, ptr [[TTT]], - // CK1: getelementptr inbounds i32, ptr [[TTTT]], i32 1 + // CK1: getelementptr inbounds nuw i32, ptr [[TTTT]], i32 1 ++tr; // CK1: [[T1:%.+]] = load ptr, ptr [[DECL:%.+]], @@ -215,14 +215,14 @@ void foo(float *&lr, T *&tr) { // CK1-NOT: store ptr [[VAL]], ptr [[DECL]], // CK1: store ptr [[VAL]], ptr [[PVT:%.+]], // CK1: [[TT1:%.+]] = load ptr, ptr [[PVT]], - // CK1: getelementptr inbounds float, ptr [[TT1]], i32 1 + // CK1: getelementptr inbounds nuw float, ptr [[TT1]], i32 1 #pragma omp target data map(l[:10], t[:10]) use_device_ptr(l) { ++l; ++t; } // CK1: call void @__tgt_target_data_end{{.+}}[[MTYPE08]] // CK1: [[TTT:%.+]] = load ptr, ptr [[DECL]], - // CK1: getelementptr inbounds float, ptr [[TTT]], i32 1 + // CK1: getelementptr inbounds nuw float, ptr [[TTT]], i32 1 ++l; ++t; @@ -232,18 +232,18 @@ void foo(float *&lr, T *&tr) { // CK1: [[VAL:%.+]] = load ptr, ptr {{%.+}}, // CK1: store ptr [[VAL]], ptr [[PVT:%.+]], // CK1: [[_TT1:%.+]] = load ptr, ptr [[_PVT]], - // CK1: getelementptr inbounds float, ptr [[_TT1]], i32 1 + // CK1: getelementptr inbounds nuw float, ptr [[_TT1]], i32 1 // CK1: [[TT1:%.+]] = load ptr, ptr [[PVT]], - // CK1: getelementptr inbounds i32, ptr [[TT1]], i32 1 + // CK1: getelementptr inbounds nuw i32, ptr [[TT1]], i32 1 #pragma omp target data map(l[:10], t[:10]) use_device_ptr(l) use_device_ptr(t) { ++l; ++t; } // CK1: call void @__tgt_target_data_end{{.+}}[[MTYPE09]] // CK1: [[_TTT:%.+]] = load ptr, ptr {{%.+}}, - // CK1: getelementptr inbounds float, ptr [[_TTT]], i32 1 + // CK1: getelementptr inbounds nuw float, ptr [[_TTT]], i32 1 // CK1: [[TTT:%.+]] = load ptr, ptr {{%.+}}, - // CK1: getelementptr inbounds i32, ptr [[TTT]], i32 1 + // CK1: getelementptr inbounds nuw i32, ptr [[TTT]], i32 1 ++l; ++t; // CK1: call void @__tgt_target_data_begin{{.+}}[[MTYPE10]] @@ -252,18 +252,18 @@ void foo(float *&lr, T *&tr) { // CK1: [[VAL:%.+]] = load ptr, ptr {{%.+}}, // CK1: store ptr [[VAL]], ptr [[PVT:%.+]], // CK1: [[_TT1:%.+]] = load ptr, ptr [[_PVT]], - // CK1: getelementptr inbounds float, ptr [[_TT1]], i32 1 + // CK1: getelementptr inbounds nuw float, ptr [[_TT1]], i32 1 // CK1: [[TT1:%.+]] = load ptr, ptr [[PVT]], - // CK1: getelementptr inbounds i32, ptr [[TT1]], i32 1 + // CK1: getelementptr inbounds nuw i32, ptr [[TT1]], i32 1 #pragma omp target data map(l[:10], t[:10]) use_device_ptr(l,t) { ++l; ++t; } // CK1: call void @__tgt_target_data_end{{.+}}[[MTYPE10]] // CK1: [[_TTT:%.+]] = load ptr, ptr {{%.+}}, - // CK1: getelementptr inbounds float, ptr [[_TTT]], i32 1 + // CK1: getelementptr inbounds nuw float, ptr [[_TTT]], i32 1 // CK1: [[TTT:%.+]] = load ptr, ptr {{%.+}}, - // CK1: getelementptr inbounds i32, ptr [[TTT]], i32 1 + // CK1: getelementptr inbounds nuw i32, ptr [[TTT]], i32 1 ++l; ++t; // CK1: [[T1:%.+]] = load ptr, ptr [[DECL:%.+]], @@ -274,14 +274,14 @@ void foo(float *&lr, T *&tr) { // CK1-NOT: store ptr [[VAL]], ptr [[DECL]], // CK1: store ptr [[VAL]], ptr [[PVT:%.+]], // CK1: [[TT1:%.+]] = load ptr, ptr [[PVT]], - // CK1: getelementptr inbounds i32, ptr [[TT1]], i32 1 + // CK1: getelementptr inbounds nuw i32, ptr [[TT1]], i32 1 #pragma omp target data map(l[:10]) use_device_ptr(t) { ++l; ++t; } // CK1: call void @__tgt_target_data_end{{.+}}[[MTYPE11]] // CK1: [[TTT:%.+]] = load ptr, ptr [[DECL]], - // CK1: getelementptr inbounds i32, ptr [[TTT]], i32 1 + // CK1: getelementptr inbounds nuw i32, ptr [[TTT]], i32 1 ++l; ++t; // CK1: [[T2:%.+]] = load ptr, ptr [[DECL:%.+]], @@ -295,7 +295,7 @@ void foo(float *&lr, T *&tr) { // CK1: store ptr [[PVTV]], ptr [[PVT:%.+]], // CK1: [[TT1:%.+]] = load ptr, ptr [[PVT]], // CK1: [[TT2:%.+]] = load ptr, ptr [[TT1]], - // CK1: getelementptr inbounds i32, ptr [[TT2]], i32 1 + // CK1: getelementptr inbounds nuw i32, ptr [[TT2]], i32 1 #pragma omp target data map(l[:10]) use_device_ptr(tr) { ++l; ++tr; @@ -303,7 +303,7 @@ void foo(float *&lr, T *&tr) { // CK1: call void @__tgt_target_data_end{{.+}}[[MTYPE12]] // CK1: [[TTT:%.+]] = load ptr, ptr [[DECL]], // CK1: [[TTTT:%.+]] = load ptr, ptr [[TTT]], - // CK1: getelementptr inbounds i32, ptr [[TTTT]], i32 1 + // CK1: getelementptr inbounds nuw i32, ptr [[TTTT]], i32 1 ++l; ++tr; } @@ -354,7 +354,7 @@ struct ST { // CK2: store ptr [[PVT]], ptr [[PVT2:%.+]], // CK2: [[TT1:%.+]] = load ptr, ptr [[PVT2]], // CK2: [[TT2:%.+]] = load ptr, ptr [[TT1]], - // CK2: getelementptr inbounds double, ptr [[TT2]], i32 1 + // CK2: getelementptr inbounds nuw double, ptr [[TT2]], i32 1 #pragma omp target data map(a[:10]) use_device_ptr(a) { a++; @@ -362,7 +362,7 @@ struct ST { // CK2: call void @__tgt_target_data_end{{.+}}[[MTYPE00]] // CK2: [[DECL:%.+]] = getelementptr inbounds nuw [[ST]], ptr %this1, i32 0, i32 0 // CK2: [[TTT:%.+]] = load ptr, ptr [[DECL]], - // CK2: getelementptr inbounds double, ptr [[TTT]], i32 1 + // CK2: getelementptr inbounds nuw double, ptr [[TTT]], i32 1 a++; // CK2: [[BP:%.+]] = getelementptr inbounds [2 x ptr], ptr %{{.+}}, i32 0, i32 1 @@ -373,7 +373,7 @@ struct ST { // CK2: store ptr [[PVT]], ptr [[PVT2:%.+]], // CK2: [[TT1:%.+]] = load ptr, ptr [[PVT2]], // CK2: [[TT2:%.+]] = load ptr, ptr [[TT1]], - // CK2: getelementptr inbounds double, ptr [[TT2]], i32 1 + // CK2: getelementptr inbounds nuw double, ptr [[TT2]], i32 1 #pragma omp target data map(b[:10]) use_device_ptr(b) { b++; @@ -382,7 +382,7 @@ struct ST { // CK2: [[DECL:%.+]] = getelementptr inbounds nuw [[ST]], ptr %{{.+}}, i32 0, i32 1 // CK2: [[TTT:%.+]] = load ptr, ptr [[DECL]], // CK2: [[TTTT:%.+]] = load ptr, ptr [[TTT]], - // CK2: getelementptr inbounds double, ptr [[TTTT]], i32 1 + // CK2: getelementptr inbounds nuw double, ptr [[TTTT]], i32 1 b++; // CK2: [[BP:%.+]] = getelementptr inbounds [3 x ptr], ptr %{{.+}}, i32 0, i32 2 @@ -393,7 +393,7 @@ struct ST { // CK2: store ptr [[PVT]], ptr [[PVT2:%.+]], // CK2: [[TT1:%.+]] = load ptr, ptr [[PVT2]], // CK2: [[TT2:%.+]] = load ptr, ptr [[TT1]], - // CK2: getelementptr inbounds double, ptr [[TT2]], i32 1 + // CK2: getelementptr inbounds nuw double, ptr [[TT2]], i32 1 #pragma omp target data map(la[:10]) use_device_ptr(a) { a++; @@ -402,7 +402,7 @@ struct ST { // CK2: call void @__tgt_target_data_end{{.+}}[[MTYPE02]] // CK2: [[DECL:%.+]] = getelementptr inbounds nuw [[ST]], ptr %this1, i32 0, i32 0 // CK2: [[TTT:%.+]] = load ptr, ptr [[DECL]], - // CK2: getelementptr inbounds double, ptr [[TTT]], i32 1 + // CK2: getelementptr inbounds nuw double, ptr [[TTT]], i32 1 a++; la++; @@ -419,10 +419,10 @@ struct ST { // CK2: store ptr [[PVT1]], ptr [[_PVT1:%.+]], // CK2: [[TT2:%.+]] = load ptr, ptr [[_PVT2]], // CK2: [[_TT2:%.+]] = load ptr, ptr [[TT2]], - // CK2: getelementptr inbounds double, ptr [[_TT2]], i32 1 + // CK2: getelementptr inbounds nuw double, ptr [[_TT2]], i32 1 // CK2: [[TT1:%.+]] = load ptr, ptr [[_PVT1]], // CK2: [[_TT1:%.+]] = load ptr, ptr [[TT1]], - // CK2: getelementptr inbounds double, ptr [[_TT1]], i32 1 + // CK2: getelementptr inbounds nuw double, ptr [[_TT1]], i32 1 #pragma omp target data map(b[:10]) use_device_ptr(a, b) { a++; @@ -431,11 +431,11 @@ struct ST { // CK2: call void @__tgt_target_data_end{{.+}}[[MTYPE03]] // CK2: [[DECL:%.+]] = getelementptr inbounds nuw [[ST]], ptr %this1, i32 0, i32 0 // CK2: [[TTT:%.+]] = load ptr, ptr [[DECL]], - // CK2: getelementptr inbounds double, ptr [[TTT]], i32 1 + // CK2: getelementptr inbounds nuw double, ptr [[TTT]], i32 1 // CK2: [[_DECL:%.+]] = getelementptr inbounds nuw [[ST]], ptr %this1, i32 0, i32 1 // CK2: [[_TTT:%.+]] = load ptr, ptr [[_DECL]], // CK2: [[_TTTT:%.+]] = load ptr, ptr [[_TTT]], - // CK2: getelementptr inbounds double, ptr [[_TTTT]], i32 1 + // CK2: getelementptr inbounds nuw double, ptr [[_TTTT]], i32 1 a++; b++; } diff --git a/clang/test/OpenMP/target_has_device_addr_codegen.cpp b/clang/test/OpenMP/target_has_device_addr_codegen.cpp index 08bcc87ca5f0a..39eaedb0e48d1 100644 --- a/clang/test/OpenMP/target_has_device_addr_codegen.cpp +++ b/clang/test/OpenMP/target_has_device_addr_codegen.cpp @@ -586,7 +586,7 @@ void use_template() { // CHECK-NEXT: store ptr [[K]], ptr [[K_ADDR]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[K_ADDR]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1 +// CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP1]], i32 1 // CHECK-NEXT: store ptr [[INCDEC_PTR]], ptr [[TMP0]], align 8 // CHECK-NEXT: ret void // @@ -601,7 +601,7 @@ void use_template() { // CHECK-NEXT: store ptr [[TMP0]], ptr [[TMP]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP]], align 8 // CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 1 +// CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP2]], i32 1 // CHECK-NEXT: store ptr [[INCDEC_PTR]], ptr [[TMP1]], align 8 // CHECK-NEXT: ret void // @@ -1079,7 +1079,7 @@ void use_template() { // CHECK-NEXT: store ptr [[K]], ptr [[K_ADDR]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[K_ADDR]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1 +// CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP1]], i32 1 // CHECK-NEXT: store ptr [[INCDEC_PTR]], ptr [[TMP0]], align 8 // CHECK-NEXT: ret void // @@ -1094,7 +1094,7 @@ void use_template() { // CHECK-NEXT: store ptr [[TMP0]], ptr [[TMP]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP]], align 8 // CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 1 +// CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP2]], i32 1 // CHECK-NEXT: store ptr [[INCDEC_PTR]], ptr [[TMP1]], align 8 // CHECK-NEXT: ret void // @@ -1133,7 +1133,7 @@ void use_template() { // CHECK-NEXT: store ptr [[K]], ptr [[K_ADDR]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[K_ADDR]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 1 +// CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP1]], i32 1 // CHECK-NEXT: store ptr [[INCDEC_PTR]], ptr [[TMP0]], align 8 // CHECK-NEXT: ret void // @@ -1148,7 +1148,7 @@ void use_template() { // CHECK-NEXT: store ptr [[TMP0]], ptr [[TMP]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP]], align 8 // CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i32 1 +// CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP2]], i32 1 // CHECK-NEXT: store ptr [[INCDEC_PTR]], ptr [[TMP1]], align 8 // CHECK-NEXT: ret void // @@ -1422,14 +1422,14 @@ void use_template() { // SIMD-ONLY0-NEXT: store ptr [[K]], ptr [[Z]], align 8 // SIMD-ONLY0-NEXT: store ptr [[AA]], ptr [[RAA]], align 8 // SIMD-ONLY0-NEXT: [[TMP1:%.*]] = load ptr, ptr [[K]], align 8 -// SIMD-ONLY0-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1 +// SIMD-ONLY0-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP1]], i32 1 // SIMD-ONLY0-NEXT: store ptr [[INCDEC_PTR]], ptr [[K]], align 8 // SIMD-ONLY0-NEXT: [[TMP2:%.*]] = load ptr, ptr [[Z]], align 8 // SIMD-ONLY0-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 // SIMD-ONLY0-NEXT: [[TMP3:%.*]] = load ptr, ptr [[Z]], align 8 // SIMD-ONLY0-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 // SIMD-ONLY0-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 -// SIMD-ONLY0-NEXT: [[INCDEC_PTR1:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 1 +// SIMD-ONLY0-NEXT: [[INCDEC_PTR1:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP5]], i32 1 // SIMD-ONLY0-NEXT: store ptr [[INCDEC_PTR1]], ptr [[TMP4]], align 8 // SIMD-ONLY0-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[AA]], i64 0, i64 0 // SIMD-ONLY0-NEXT: store i32 1, ptr [[ARRAYIDX]], align 4 @@ -1478,14 +1478,14 @@ void use_template() { // SIMD-ONLY0-NEXT: store ptr [[TMP0]], ptr [[K]], align 8 // SIMD-ONLY0-NEXT: store ptr [[K]], ptr [[Z]], align 8 // SIMD-ONLY0-NEXT: [[TMP1:%.*]] = load ptr, ptr [[K]], align 8 -// SIMD-ONLY0-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1 +// SIMD-ONLY0-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP1]], i32 1 // SIMD-ONLY0-NEXT: store ptr [[INCDEC_PTR]], ptr [[K]], align 8 // SIMD-ONLY0-NEXT: [[TMP2:%.*]] = load ptr, ptr [[Z]], align 8 // SIMD-ONLY0-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 // SIMD-ONLY0-NEXT: [[TMP3:%.*]] = load ptr, ptr [[Z]], align 8 // SIMD-ONLY0-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 // SIMD-ONLY0-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 -// SIMD-ONLY0-NEXT: [[INCDEC_PTR1:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 1 +// SIMD-ONLY0-NEXT: [[INCDEC_PTR1:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP5]], i32 1 // SIMD-ONLY0-NEXT: store ptr [[INCDEC_PTR1]], ptr [[TMP4]], align 8 // SIMD-ONLY0-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[AA]], i64 0, i64 0 // SIMD-ONLY0-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 @@ -1520,14 +1520,14 @@ void use_template() { // SIMD-ONLY0-NEXT: store ptr [[TMP0]], ptr [[K]], align 8 // SIMD-ONLY0-NEXT: store ptr [[K]], ptr [[Z]], align 8 // SIMD-ONLY0-NEXT: [[TMP1:%.*]] = load ptr, ptr [[K]], align 8 -// SIMD-ONLY0-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 1 +// SIMD-ONLY0-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP1]], i32 1 // SIMD-ONLY0-NEXT: store ptr [[INCDEC_PTR]], ptr [[K]], align 8 // SIMD-ONLY0-NEXT: [[TMP2:%.*]] = load ptr, ptr [[Z]], align 8 // SIMD-ONLY0-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 // SIMD-ONLY0-NEXT: [[TMP3:%.*]] = load ptr, ptr [[Z]], align 8 // SIMD-ONLY0-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 // SIMD-ONLY0-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 -// SIMD-ONLY0-NEXT: [[INCDEC_PTR1:%.*]] = getelementptr inbounds ptr, ptr [[TMP5]], i32 1 +// SIMD-ONLY0-NEXT: [[INCDEC_PTR1:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP5]], i32 1 // SIMD-ONLY0-NEXT: store ptr [[INCDEC_PTR1]], ptr [[TMP4]], align 8 // SIMD-ONLY0-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x ptr], ptr [[AA]], i64 0, i64 0 // SIMD-ONLY0-NEXT: [[TMP6:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 diff --git a/clang/test/OpenMP/target_in_reduction_codegen.cpp b/clang/test/OpenMP/target_in_reduction_codegen.cpp index fb715e2de2a59..56191ee575136 100644 --- a/clang/test/OpenMP/target_in_reduction_codegen.cpp +++ b/clang/test/OpenMP/target_in_reduction_codegen.cpp @@ -70,7 +70,7 @@ int main(int argc, char **argv) { // CHECK1-NEXT: [[VLA:%.*]] = alloca i16, i64 [[TMP2]], align 16 // CHECK1-NEXT: store i64 [[TMP2]], ptr [[__VLA_EXPR0]], align 8 // CHECK1-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP0]]) -// CHECK1-NEXT: [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds [3 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 0 +// CHECK1-NEXT: [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds nuw [3 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 0 // CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[A]], ptr [[TMP4]], align 8 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1 @@ -85,7 +85,7 @@ int main(int argc, char **argv) { // CHECK1-NEXT: store ptr @.red_comb., ptr [[TMP11]], align 8 // CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6 // CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP12]], i8 0, i64 4, i1 false) -// CHECK1-NEXT: [[DOTRD_INPUT_GEP_1:%.*]] = getelementptr inbounds [3 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1 +// CHECK1-NEXT: [[DOTRD_INPUT_GEP_1:%.*]] = getelementptr inbounds nuw [3 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1 // CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_1]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[B]], ptr [[TMP14]], align 8 // CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_1]], i32 0, i32 1 @@ -100,7 +100,7 @@ int main(int argc, char **argv) { // CHECK1-NEXT: store ptr @.red_comb..2, ptr [[TMP21]], align 8 // CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_1]], i32 0, i32 6 // CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP22]], i8 0, i64 4, i1 false) -// CHECK1-NEXT: [[DOTRD_INPUT_GEP_2:%.*]] = getelementptr inbounds [3 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 2 +// CHECK1-NEXT: [[DOTRD_INPUT_GEP_2:%.*]] = getelementptr inbounds nuw [3 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 2 // CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_2]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[ARGC_ADDR]], ptr [[TMP24]], align 8 // CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_2]], i32 0, i32 1 @@ -118,7 +118,7 @@ int main(int argc, char **argv) { // CHECK1-NEXT: [[TMP35:%.*]] = call ptr @__kmpc_taskred_init(i32 [[TMP0]], i32 3, ptr [[DOTRD_INPUT_]]) // CHECK1-NEXT: store ptr [[TMP35]], ptr [[DOTTASK_RED_]], align 8 // CHECK1-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP0]]) -// CHECK1-NEXT: [[DOTRD_INPUT_GEP_4:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t.0], ptr [[DOTRD_INPUT_3]], i64 0, i64 0 +// CHECK1-NEXT: [[DOTRD_INPUT_GEP_4:%.*]] = getelementptr inbounds nuw [2 x %struct.kmp_taskred_input_t.0], ptr [[DOTRD_INPUT_3]], i64 0, i64 0 // CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T_0:%.*]], ptr [[DOTRD_INPUT_GEP_4]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[C]], ptr [[TMP36]], align 8 // CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_4]], i32 0, i32 1 @@ -133,7 +133,7 @@ int main(int argc, char **argv) { // CHECK1-NEXT: store ptr @.red_comb..6, ptr [[TMP43]], align 8 // CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_4]], i32 0, i32 6 // CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP44]], i8 0, i64 4, i1 false) -// CHECK1-NEXT: [[DOTRD_INPUT_GEP_5:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t.0], ptr [[DOTRD_INPUT_3]], i64 0, i64 1 +// CHECK1-NEXT: [[DOTRD_INPUT_GEP_5:%.*]] = getelementptr inbounds nuw [2 x %struct.kmp_taskred_input_t.0], ptr [[DOTRD_INPUT_3]], i64 0, i64 1 // CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP46]], align 8 // CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 1 diff --git a/clang/test/OpenMP/target_is_device_ptr_codegen.cpp b/clang/test/OpenMP/target_is_device_ptr_codegen.cpp index 3a1c168533c37..505c34e21733c 100644 --- a/clang/test/OpenMP/target_is_device_ptr_codegen.cpp +++ b/clang/test/OpenMP/target_is_device_ptr_codegen.cpp @@ -2142,7 +2142,7 @@ void bar() { // CK10-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 // CK10-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 // CK10-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 8 -// CK10-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i32 1 +// CK10-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP0]], i32 1 // CK10-NEXT: store ptr [[INCDEC_PTR]], ptr [[G_ADDR]], align 8 // CK10-NEXT: ret void // @@ -2153,7 +2153,7 @@ void bar() { // CK10-NEXT: [[L_ADDR:%.*]] = alloca ptr, align 8 // CK10-NEXT: store ptr [[L]], ptr [[L_ADDR]], align 8 // CK10-NEXT: [[TMP0:%.*]] = load ptr, ptr [[L_ADDR]], align 8 -// CK10-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds float, ptr [[TMP0]], i32 1 +// CK10-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw float, ptr [[TMP0]], i32 1 // CK10-NEXT: store ptr [[INCDEC_PTR]], ptr [[L_ADDR]], align 8 // CK10-NEXT: ret void // @@ -2164,7 +2164,7 @@ void bar() { // CK10-NEXT: [[T_ADDR:%.*]] = alloca ptr, align 8 // CK10-NEXT: store ptr [[T]], ptr [[T_ADDR]], align 8 // CK10-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_ADDR]], align 8 -// CK10-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 +// CK10-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP0]], i32 1 // CK10-NEXT: store ptr [[INCDEC_PTR]], ptr [[T_ADDR]], align 8 // CK10-NEXT: ret void // @@ -2178,7 +2178,7 @@ void bar() { // CK10-NEXT: store ptr [[LR_ADDR]], ptr [[TMP]], align 8 // CK10-NEXT: [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 8 // CK10-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CK10-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 1 +// CK10-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw float, ptr [[TMP1]], i32 1 // CK10-NEXT: store ptr [[INCDEC_PTR]], ptr [[TMP0]], align 8 // CK10-NEXT: ret void // @@ -2192,7 +2192,7 @@ void bar() { // CK10-NEXT: store ptr [[TR_ADDR]], ptr [[TMP]], align 8 // CK10-NEXT: [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 8 // CK10-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CK10-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1 +// CK10-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP1]], i32 1 // CK10-NEXT: store ptr [[INCDEC_PTR]], ptr [[TMP0]], align 8 // CK10-NEXT: ret void // @@ -2206,7 +2206,7 @@ void bar() { // CK10-NEXT: store ptr [[TR_ADDR]], ptr [[TMP]], align 8 // CK10-NEXT: [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 8 // CK10-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CK10-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1 +// CK10-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP1]], i32 1 // CK10-NEXT: store ptr [[INCDEC_PTR]], ptr [[TMP0]], align 8 // CK10-NEXT: ret void // @@ -2224,11 +2224,11 @@ void bar() { // CK10-NEXT: store ptr [[LR_ADDR]], ptr [[_TMP1]], align 8 // CK10-NEXT: [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 8 // CK10-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CK10-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1 +// CK10-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP1]], i32 1 // CK10-NEXT: store ptr [[INCDEC_PTR]], ptr [[TMP0]], align 8 // CK10-NEXT: [[TMP2:%.*]] = load ptr, ptr [[_TMP1]], align 8 // CK10-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8 -// CK10-NEXT: [[INCDEC_PTR2:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i32 1 +// CK10-NEXT: [[INCDEC_PTR2:%.*]] = getelementptr inbounds nuw float, ptr [[TMP3]], i32 1 // CK10-NEXT: store ptr [[INCDEC_PTR2]], ptr [[TMP2]], align 8 // CK10-NEXT: ret void // @@ -2613,7 +2613,7 @@ void bar() { // CK11-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 // CK11-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 // CK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 8 -// CK11-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i32 1 +// CK11-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP0]], i32 1 // CK11-NEXT: store ptr [[INCDEC_PTR]], ptr [[G_ADDR]], align 8 // CK11-NEXT: ret void // @@ -2624,7 +2624,7 @@ void bar() { // CK11-NEXT: [[L_ADDR:%.*]] = alloca ptr, align 8 // CK11-NEXT: store ptr [[L]], ptr [[L_ADDR]], align 8 // CK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[L_ADDR]], align 8 -// CK11-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds float, ptr [[TMP0]], i32 1 +// CK11-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw float, ptr [[TMP0]], i32 1 // CK11-NEXT: store ptr [[INCDEC_PTR]], ptr [[L_ADDR]], align 8 // CK11-NEXT: ret void // @@ -2635,7 +2635,7 @@ void bar() { // CK11-NEXT: [[T_ADDR:%.*]] = alloca ptr, align 8 // CK11-NEXT: store ptr [[T]], ptr [[T_ADDR]], align 8 // CK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_ADDR]], align 8 -// CK11-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 +// CK11-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP0]], i32 1 // CK11-NEXT: store ptr [[INCDEC_PTR]], ptr [[T_ADDR]], align 8 // CK11-NEXT: ret void // @@ -2649,7 +2649,7 @@ void bar() { // CK11-NEXT: store ptr [[LR_ADDR]], ptr [[TMP]], align 8 // CK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 8 // CK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CK11-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 1 +// CK11-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw float, ptr [[TMP1]], i32 1 // CK11-NEXT: store ptr [[INCDEC_PTR]], ptr [[TMP0]], align 8 // CK11-NEXT: ret void // @@ -2663,7 +2663,7 @@ void bar() { // CK11-NEXT: store ptr [[TR_ADDR]], ptr [[TMP]], align 8 // CK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 8 // CK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CK11-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1 +// CK11-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP1]], i32 1 // CK11-NEXT: store ptr [[INCDEC_PTR]], ptr [[TMP0]], align 8 // CK11-NEXT: ret void // @@ -2677,7 +2677,7 @@ void bar() { // CK11-NEXT: store ptr [[TR_ADDR]], ptr [[TMP]], align 8 // CK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 8 // CK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CK11-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1 +// CK11-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP1]], i32 1 // CK11-NEXT: store ptr [[INCDEC_PTR]], ptr [[TMP0]], align 8 // CK11-NEXT: ret void // @@ -2695,11 +2695,11 @@ void bar() { // CK11-NEXT: store ptr [[LR_ADDR]], ptr [[_TMP1]], align 8 // CK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 8 // CK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CK11-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1 +// CK11-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP1]], i32 1 // CK11-NEXT: store ptr [[INCDEC_PTR]], ptr [[TMP0]], align 8 // CK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[_TMP1]], align 8 // CK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8 -// CK11-NEXT: [[INCDEC_PTR2:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i32 1 +// CK11-NEXT: [[INCDEC_PTR2:%.*]] = getelementptr inbounds nuw float, ptr [[TMP3]], i32 1 // CK11-NEXT: store ptr [[INCDEC_PTR2]], ptr [[TMP2]], align 8 // CK11-NEXT: ret void // @@ -3084,7 +3084,7 @@ void bar() { // CK12-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 4 // CK12-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 // CK12-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 4 -// CK12-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i32 1 +// CK12-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP0]], i32 1 // CK12-NEXT: store ptr [[INCDEC_PTR]], ptr [[G_ADDR]], align 4 // CK12-NEXT: ret void // @@ -3095,7 +3095,7 @@ void bar() { // CK12-NEXT: [[L_ADDR:%.*]] = alloca ptr, align 4 // CK12-NEXT: store ptr [[L]], ptr [[L_ADDR]], align 4 // CK12-NEXT: [[TMP0:%.*]] = load ptr, ptr [[L_ADDR]], align 4 -// CK12-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds float, ptr [[TMP0]], i32 1 +// CK12-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw float, ptr [[TMP0]], i32 1 // CK12-NEXT: store ptr [[INCDEC_PTR]], ptr [[L_ADDR]], align 4 // CK12-NEXT: ret void // @@ -3106,7 +3106,7 @@ void bar() { // CK12-NEXT: [[T_ADDR:%.*]] = alloca ptr, align 4 // CK12-NEXT: store ptr [[T]], ptr [[T_ADDR]], align 4 // CK12-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_ADDR]], align 4 -// CK12-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 +// CK12-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP0]], i32 1 // CK12-NEXT: store ptr [[INCDEC_PTR]], ptr [[T_ADDR]], align 4 // CK12-NEXT: ret void // @@ -3120,7 +3120,7 @@ void bar() { // CK12-NEXT: store ptr [[LR_ADDR]], ptr [[TMP]], align 4 // CK12-NEXT: [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 4 // CK12-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 4 -// CK12-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 1 +// CK12-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw float, ptr [[TMP1]], i32 1 // CK12-NEXT: store ptr [[INCDEC_PTR]], ptr [[TMP0]], align 4 // CK12-NEXT: ret void // @@ -3134,7 +3134,7 @@ void bar() { // CK12-NEXT: store ptr [[TR_ADDR]], ptr [[TMP]], align 4 // CK12-NEXT: [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 4 // CK12-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 4 -// CK12-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1 +// CK12-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP1]], i32 1 // CK12-NEXT: store ptr [[INCDEC_PTR]], ptr [[TMP0]], align 4 // CK12-NEXT: ret void // @@ -3148,7 +3148,7 @@ void bar() { // CK12-NEXT: store ptr [[TR_ADDR]], ptr [[TMP]], align 4 // CK12-NEXT: [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 4 // CK12-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 4 -// CK12-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1 +// CK12-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP1]], i32 1 // CK12-NEXT: store ptr [[INCDEC_PTR]], ptr [[TMP0]], align 4 // CK12-NEXT: ret void // @@ -3166,11 +3166,11 @@ void bar() { // CK12-NEXT: store ptr [[LR_ADDR]], ptr [[_TMP1]], align 4 // CK12-NEXT: [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 4 // CK12-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 4 -// CK12-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1 +// CK12-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP1]], i32 1 // CK12-NEXT: store ptr [[INCDEC_PTR]], ptr [[TMP0]], align 4 // CK12-NEXT: [[TMP2:%.*]] = load ptr, ptr [[_TMP1]], align 4 // CK12-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 4 -// CK12-NEXT: [[INCDEC_PTR2:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i32 1 +// CK12-NEXT: [[INCDEC_PTR2:%.*]] = getelementptr inbounds nuw float, ptr [[TMP3]], i32 1 // CK12-NEXT: store ptr [[INCDEC_PTR2]], ptr [[TMP2]], align 4 // CK12-NEXT: ret void // @@ -3555,7 +3555,7 @@ void bar() { // CK13-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 4 // CK13-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 // CK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 4 -// CK13-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i32 1 +// CK13-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP0]], i32 1 // CK13-NEXT: store ptr [[INCDEC_PTR]], ptr [[G_ADDR]], align 4 // CK13-NEXT: ret void // @@ -3566,7 +3566,7 @@ void bar() { // CK13-NEXT: [[L_ADDR:%.*]] = alloca ptr, align 4 // CK13-NEXT: store ptr [[L]], ptr [[L_ADDR]], align 4 // CK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[L_ADDR]], align 4 -// CK13-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds float, ptr [[TMP0]], i32 1 +// CK13-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw float, ptr [[TMP0]], i32 1 // CK13-NEXT: store ptr [[INCDEC_PTR]], ptr [[L_ADDR]], align 4 // CK13-NEXT: ret void // @@ -3577,7 +3577,7 @@ void bar() { // CK13-NEXT: [[T_ADDR:%.*]] = alloca ptr, align 4 // CK13-NEXT: store ptr [[T]], ptr [[T_ADDR]], align 4 // CK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_ADDR]], align 4 -// CK13-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 +// CK13-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP0]], i32 1 // CK13-NEXT: store ptr [[INCDEC_PTR]], ptr [[T_ADDR]], align 4 // CK13-NEXT: ret void // @@ -3591,7 +3591,7 @@ void bar() { // CK13-NEXT: store ptr [[LR_ADDR]], ptr [[TMP]], align 4 // CK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 4 // CK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 4 -// CK13-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 1 +// CK13-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw float, ptr [[TMP1]], i32 1 // CK13-NEXT: store ptr [[INCDEC_PTR]], ptr [[TMP0]], align 4 // CK13-NEXT: ret void // @@ -3605,7 +3605,7 @@ void bar() { // CK13-NEXT: store ptr [[TR_ADDR]], ptr [[TMP]], align 4 // CK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 4 // CK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 4 -// CK13-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1 +// CK13-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP1]], i32 1 // CK13-NEXT: store ptr [[INCDEC_PTR]], ptr [[TMP0]], align 4 // CK13-NEXT: ret void // @@ -3619,7 +3619,7 @@ void bar() { // CK13-NEXT: store ptr [[TR_ADDR]], ptr [[TMP]], align 4 // CK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 4 // CK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 4 -// CK13-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1 +// CK13-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP1]], i32 1 // CK13-NEXT: store ptr [[INCDEC_PTR]], ptr [[TMP0]], align 4 // CK13-NEXT: ret void // @@ -3637,11 +3637,11 @@ void bar() { // CK13-NEXT: store ptr [[LR_ADDR]], ptr [[_TMP1]], align 4 // CK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 4 // CK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 4 -// CK13-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1 +// CK13-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP1]], i32 1 // CK13-NEXT: store ptr [[INCDEC_PTR]], ptr [[TMP0]], align 4 // CK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[_TMP1]], align 4 // CK13-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 4 -// CK13-NEXT: [[INCDEC_PTR2:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i32 1 +// CK13-NEXT: [[INCDEC_PTR2:%.*]] = getelementptr inbounds nuw float, ptr [[TMP3]], i32 1 // CK13-NEXT: store ptr [[INCDEC_PTR2]], ptr [[TMP2]], align 4 // CK13-NEXT: ret void // @@ -3674,34 +3674,34 @@ void bar() { // SIMD-ONLY00-NEXT: store ptr [[LR]], ptr [[LR_ADDR]], align 8 // SIMD-ONLY00-NEXT: store ptr [[TR]], ptr [[TR_ADDR]], align 8 // SIMD-ONLY00-NEXT: [[TMP0:%.*]] = load ptr, ptr @g, align 8 -// SIMD-ONLY00-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i32 1 +// SIMD-ONLY00-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP0]], i32 1 // SIMD-ONLY00-NEXT: store ptr [[INCDEC_PTR]], ptr @g, align 8 // SIMD-ONLY00-NEXT: [[TMP1:%.*]] = load ptr, ptr [[L]], align 8 -// SIMD-ONLY00-NEXT: [[INCDEC_PTR1:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 1 +// SIMD-ONLY00-NEXT: [[INCDEC_PTR1:%.*]] = getelementptr inbounds nuw float, ptr [[TMP1]], i32 1 // SIMD-ONLY00-NEXT: store ptr [[INCDEC_PTR1]], ptr [[L]], align 8 // SIMD-ONLY00-NEXT: [[TMP2:%.*]] = load ptr, ptr [[T]], align 8 -// SIMD-ONLY00-NEXT: [[INCDEC_PTR2:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 1 +// SIMD-ONLY00-NEXT: [[INCDEC_PTR2:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP2]], i32 1 // SIMD-ONLY00-NEXT: store ptr [[INCDEC_PTR2]], ptr [[T]], align 8 // SIMD-ONLY00-NEXT: [[TMP3:%.*]] = load ptr, ptr [[LR_ADDR]], align 8 // SIMD-ONLY00-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 8 // SIMD-ONLY00-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LR_ADDR]], align 8 // SIMD-ONLY00-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 // SIMD-ONLY00-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 -// SIMD-ONLY00-NEXT: [[INCDEC_PTR3:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i32 1 +// SIMD-ONLY00-NEXT: [[INCDEC_PTR3:%.*]] = getelementptr inbounds nuw float, ptr [[TMP6]], i32 1 // SIMD-ONLY00-NEXT: store ptr [[INCDEC_PTR3]], ptr [[TMP5]], align 8 // SIMD-ONLY00-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TR_ADDR]], align 8 // SIMD-ONLY00-NEXT: store ptr [[TMP7]], ptr [[_TMP4]], align 8 // SIMD-ONLY00-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TR_ADDR]], align 8 // SIMD-ONLY00-NEXT: [[TMP9:%.*]] = load ptr, ptr [[_TMP4]], align 8 // SIMD-ONLY00-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// SIMD-ONLY00-NEXT: [[INCDEC_PTR5:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 1 +// SIMD-ONLY00-NEXT: [[INCDEC_PTR5:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP10]], i32 1 // SIMD-ONLY00-NEXT: store ptr [[INCDEC_PTR5]], ptr [[TMP9]], align 8 // SIMD-ONLY00-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TR_ADDR]], align 8 // SIMD-ONLY00-NEXT: store ptr [[TMP11]], ptr [[_TMP6]], align 8 // SIMD-ONLY00-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TR_ADDR]], align 8 // SIMD-ONLY00-NEXT: [[TMP13:%.*]] = load ptr, ptr [[_TMP6]], align 8 // SIMD-ONLY00-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8 -// SIMD-ONLY00-NEXT: [[INCDEC_PTR7:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i32 1 +// SIMD-ONLY00-NEXT: [[INCDEC_PTR7:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP14]], i32 1 // SIMD-ONLY00-NEXT: store ptr [[INCDEC_PTR7]], ptr [[TMP13]], align 8 // SIMD-ONLY00-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TR_ADDR]], align 8 // SIMD-ONLY00-NEXT: store ptr [[TMP15]], ptr [[_TMP8]], align 8 @@ -3711,11 +3711,11 @@ void bar() { // SIMD-ONLY00-NEXT: [[TMP18:%.*]] = load ptr, ptr [[LR_ADDR]], align 8 // SIMD-ONLY00-NEXT: [[TMP19:%.*]] = load ptr, ptr [[_TMP8]], align 8 // SIMD-ONLY00-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 8 -// SIMD-ONLY00-NEXT: [[INCDEC_PTR10:%.*]] = getelementptr inbounds i32, ptr [[TMP20]], i32 1 +// SIMD-ONLY00-NEXT: [[INCDEC_PTR10:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP20]], i32 1 // SIMD-ONLY00-NEXT: store ptr [[INCDEC_PTR10]], ptr [[TMP19]], align 8 // SIMD-ONLY00-NEXT: [[TMP21:%.*]] = load ptr, ptr [[_TMP9]], align 8 // SIMD-ONLY00-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 8 -// SIMD-ONLY00-NEXT: [[INCDEC_PTR11:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i32 1 +// SIMD-ONLY00-NEXT: [[INCDEC_PTR11:%.*]] = getelementptr inbounds nuw float, ptr [[TMP22]], i32 1 // SIMD-ONLY00-NEXT: store ptr [[INCDEC_PTR11]], ptr [[TMP21]], align 8 // SIMD-ONLY00-NEXT: ret void // @@ -3748,34 +3748,34 @@ void bar() { // SIMD-ONLY01-NEXT: store ptr [[LR]], ptr [[LR_ADDR]], align 8 // SIMD-ONLY01-NEXT: store ptr [[TR]], ptr [[TR_ADDR]], align 8 // SIMD-ONLY01-NEXT: [[TMP0:%.*]] = load ptr, ptr @g, align 8 -// SIMD-ONLY01-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i32 1 +// SIMD-ONLY01-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP0]], i32 1 // SIMD-ONLY01-NEXT: store ptr [[INCDEC_PTR]], ptr @g, align 8 // SIMD-ONLY01-NEXT: [[TMP1:%.*]] = load ptr, ptr [[L]], align 8 -// SIMD-ONLY01-NEXT: [[INCDEC_PTR1:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 1 +// SIMD-ONLY01-NEXT: [[INCDEC_PTR1:%.*]] = getelementptr inbounds nuw float, ptr [[TMP1]], i32 1 // SIMD-ONLY01-NEXT: store ptr [[INCDEC_PTR1]], ptr [[L]], align 8 // SIMD-ONLY01-NEXT: [[TMP2:%.*]] = load ptr, ptr [[T]], align 8 -// SIMD-ONLY01-NEXT: [[INCDEC_PTR2:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 1 +// SIMD-ONLY01-NEXT: [[INCDEC_PTR2:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP2]], i32 1 // SIMD-ONLY01-NEXT: store ptr [[INCDEC_PTR2]], ptr [[T]], align 8 // SIMD-ONLY01-NEXT: [[TMP3:%.*]] = load ptr, ptr [[LR_ADDR]], align 8 // SIMD-ONLY01-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 8 // SIMD-ONLY01-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LR_ADDR]], align 8 // SIMD-ONLY01-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 // SIMD-ONLY01-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 -// SIMD-ONLY01-NEXT: [[INCDEC_PTR3:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i32 1 +// SIMD-ONLY01-NEXT: [[INCDEC_PTR3:%.*]] = getelementptr inbounds nuw float, ptr [[TMP6]], i32 1 // SIMD-ONLY01-NEXT: store ptr [[INCDEC_PTR3]], ptr [[TMP5]], align 8 // SIMD-ONLY01-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TR_ADDR]], align 8 // SIMD-ONLY01-NEXT: store ptr [[TMP7]], ptr [[_TMP4]], align 8 // SIMD-ONLY01-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TR_ADDR]], align 8 // SIMD-ONLY01-NEXT: [[TMP9:%.*]] = load ptr, ptr [[_TMP4]], align 8 // SIMD-ONLY01-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// SIMD-ONLY01-NEXT: [[INCDEC_PTR5:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 1 +// SIMD-ONLY01-NEXT: [[INCDEC_PTR5:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP10]], i32 1 // SIMD-ONLY01-NEXT: store ptr [[INCDEC_PTR5]], ptr [[TMP9]], align 8 // SIMD-ONLY01-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TR_ADDR]], align 8 // SIMD-ONLY01-NEXT: store ptr [[TMP11]], ptr [[_TMP6]], align 8 // SIMD-ONLY01-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TR_ADDR]], align 8 // SIMD-ONLY01-NEXT: [[TMP13:%.*]] = load ptr, ptr [[_TMP6]], align 8 // SIMD-ONLY01-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8 -// SIMD-ONLY01-NEXT: [[INCDEC_PTR7:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i32 1 +// SIMD-ONLY01-NEXT: [[INCDEC_PTR7:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP14]], i32 1 // SIMD-ONLY01-NEXT: store ptr [[INCDEC_PTR7]], ptr [[TMP13]], align 8 // SIMD-ONLY01-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TR_ADDR]], align 8 // SIMD-ONLY01-NEXT: store ptr [[TMP15]], ptr [[_TMP8]], align 8 @@ -3785,11 +3785,11 @@ void bar() { // SIMD-ONLY01-NEXT: [[TMP18:%.*]] = load ptr, ptr [[LR_ADDR]], align 8 // SIMD-ONLY01-NEXT: [[TMP19:%.*]] = load ptr, ptr [[_TMP8]], align 8 // SIMD-ONLY01-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 8 -// SIMD-ONLY01-NEXT: [[INCDEC_PTR10:%.*]] = getelementptr inbounds i32, ptr [[TMP20]], i32 1 +// SIMD-ONLY01-NEXT: [[INCDEC_PTR10:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP20]], i32 1 // SIMD-ONLY01-NEXT: store ptr [[INCDEC_PTR10]], ptr [[TMP19]], align 8 // SIMD-ONLY01-NEXT: [[TMP21:%.*]] = load ptr, ptr [[_TMP9]], align 8 // SIMD-ONLY01-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 8 -// SIMD-ONLY01-NEXT: [[INCDEC_PTR11:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i32 1 +// SIMD-ONLY01-NEXT: [[INCDEC_PTR11:%.*]] = getelementptr inbounds nuw float, ptr [[TMP22]], i32 1 // SIMD-ONLY01-NEXT: store ptr [[INCDEC_PTR11]], ptr [[TMP21]], align 8 // SIMD-ONLY01-NEXT: ret void // @@ -3822,34 +3822,34 @@ void bar() { // SIMD-ONLY02-NEXT: store ptr [[LR]], ptr [[LR_ADDR]], align 4 // SIMD-ONLY02-NEXT: store ptr [[TR]], ptr [[TR_ADDR]], align 4 // SIMD-ONLY02-NEXT: [[TMP0:%.*]] = load ptr, ptr @g, align 4 -// SIMD-ONLY02-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i32 1 +// SIMD-ONLY02-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP0]], i32 1 // SIMD-ONLY02-NEXT: store ptr [[INCDEC_PTR]], ptr @g, align 4 // SIMD-ONLY02-NEXT: [[TMP1:%.*]] = load ptr, ptr [[L]], align 4 -// SIMD-ONLY02-NEXT: [[INCDEC_PTR1:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 1 +// SIMD-ONLY02-NEXT: [[INCDEC_PTR1:%.*]] = getelementptr inbounds nuw float, ptr [[TMP1]], i32 1 // SIMD-ONLY02-NEXT: store ptr [[INCDEC_PTR1]], ptr [[L]], align 4 // SIMD-ONLY02-NEXT: [[TMP2:%.*]] = load ptr, ptr [[T]], align 4 -// SIMD-ONLY02-NEXT: [[INCDEC_PTR2:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 1 +// SIMD-ONLY02-NEXT: [[INCDEC_PTR2:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP2]], i32 1 // SIMD-ONLY02-NEXT: store ptr [[INCDEC_PTR2]], ptr [[T]], align 4 // SIMD-ONLY02-NEXT: [[TMP3:%.*]] = load ptr, ptr [[LR_ADDR]], align 4 // SIMD-ONLY02-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 4 // SIMD-ONLY02-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LR_ADDR]], align 4 // SIMD-ONLY02-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 4 // SIMD-ONLY02-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 -// SIMD-ONLY02-NEXT: [[INCDEC_PTR3:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i32 1 +// SIMD-ONLY02-NEXT: [[INCDEC_PTR3:%.*]] = getelementptr inbounds nuw float, ptr [[TMP6]], i32 1 // SIMD-ONLY02-NEXT: store ptr [[INCDEC_PTR3]], ptr [[TMP5]], align 4 // SIMD-ONLY02-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TR_ADDR]], align 4 // SIMD-ONLY02-NEXT: store ptr [[TMP7]], ptr [[_TMP4]], align 4 // SIMD-ONLY02-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TR_ADDR]], align 4 // SIMD-ONLY02-NEXT: [[TMP9:%.*]] = load ptr, ptr [[_TMP4]], align 4 // SIMD-ONLY02-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 -// SIMD-ONLY02-NEXT: [[INCDEC_PTR5:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 1 +// SIMD-ONLY02-NEXT: [[INCDEC_PTR5:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP10]], i32 1 // SIMD-ONLY02-NEXT: store ptr [[INCDEC_PTR5]], ptr [[TMP9]], align 4 // SIMD-ONLY02-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TR_ADDR]], align 4 // SIMD-ONLY02-NEXT: store ptr [[TMP11]], ptr [[_TMP6]], align 4 // SIMD-ONLY02-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TR_ADDR]], align 4 // SIMD-ONLY02-NEXT: [[TMP13:%.*]] = load ptr, ptr [[_TMP6]], align 4 // SIMD-ONLY02-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 4 -// SIMD-ONLY02-NEXT: [[INCDEC_PTR7:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i32 1 +// SIMD-ONLY02-NEXT: [[INCDEC_PTR7:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP14]], i32 1 // SIMD-ONLY02-NEXT: store ptr [[INCDEC_PTR7]], ptr [[TMP13]], align 4 // SIMD-ONLY02-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TR_ADDR]], align 4 // SIMD-ONLY02-NEXT: store ptr [[TMP15]], ptr [[_TMP8]], align 4 @@ -3859,11 +3859,11 @@ void bar() { // SIMD-ONLY02-NEXT: [[TMP18:%.*]] = load ptr, ptr [[LR_ADDR]], align 4 // SIMD-ONLY02-NEXT: [[TMP19:%.*]] = load ptr, ptr [[_TMP8]], align 4 // SIMD-ONLY02-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 4 -// SIMD-ONLY02-NEXT: [[INCDEC_PTR10:%.*]] = getelementptr inbounds i32, ptr [[TMP20]], i32 1 +// SIMD-ONLY02-NEXT: [[INCDEC_PTR10:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP20]], i32 1 // SIMD-ONLY02-NEXT: store ptr [[INCDEC_PTR10]], ptr [[TMP19]], align 4 // SIMD-ONLY02-NEXT: [[TMP21:%.*]] = load ptr, ptr [[_TMP9]], align 4 // SIMD-ONLY02-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 4 -// SIMD-ONLY02-NEXT: [[INCDEC_PTR11:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i32 1 +// SIMD-ONLY02-NEXT: [[INCDEC_PTR11:%.*]] = getelementptr inbounds nuw float, ptr [[TMP22]], i32 1 // SIMD-ONLY02-NEXT: store ptr [[INCDEC_PTR11]], ptr [[TMP21]], align 4 // SIMD-ONLY02-NEXT: ret void // @@ -3896,34 +3896,34 @@ void bar() { // SIMD-ONLY03-NEXT: store ptr [[LR]], ptr [[LR_ADDR]], align 4 // SIMD-ONLY03-NEXT: store ptr [[TR]], ptr [[TR_ADDR]], align 4 // SIMD-ONLY03-NEXT: [[TMP0:%.*]] = load ptr, ptr @g, align 4 -// SIMD-ONLY03-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i32 1 +// SIMD-ONLY03-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP0]], i32 1 // SIMD-ONLY03-NEXT: store ptr [[INCDEC_PTR]], ptr @g, align 4 // SIMD-ONLY03-NEXT: [[TMP1:%.*]] = load ptr, ptr [[L]], align 4 -// SIMD-ONLY03-NEXT: [[INCDEC_PTR1:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 1 +// SIMD-ONLY03-NEXT: [[INCDEC_PTR1:%.*]] = getelementptr inbounds nuw float, ptr [[TMP1]], i32 1 // SIMD-ONLY03-NEXT: store ptr [[INCDEC_PTR1]], ptr [[L]], align 4 // SIMD-ONLY03-NEXT: [[TMP2:%.*]] = load ptr, ptr [[T]], align 4 -// SIMD-ONLY03-NEXT: [[INCDEC_PTR2:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 1 +// SIMD-ONLY03-NEXT: [[INCDEC_PTR2:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP2]], i32 1 // SIMD-ONLY03-NEXT: store ptr [[INCDEC_PTR2]], ptr [[T]], align 4 // SIMD-ONLY03-NEXT: [[TMP3:%.*]] = load ptr, ptr [[LR_ADDR]], align 4 // SIMD-ONLY03-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 4 // SIMD-ONLY03-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LR_ADDR]], align 4 // SIMD-ONLY03-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 4 // SIMD-ONLY03-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 -// SIMD-ONLY03-NEXT: [[INCDEC_PTR3:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i32 1 +// SIMD-ONLY03-NEXT: [[INCDEC_PTR3:%.*]] = getelementptr inbounds nuw float, ptr [[TMP6]], i32 1 // SIMD-ONLY03-NEXT: store ptr [[INCDEC_PTR3]], ptr [[TMP5]], align 4 // SIMD-ONLY03-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TR_ADDR]], align 4 // SIMD-ONLY03-NEXT: store ptr [[TMP7]], ptr [[_TMP4]], align 4 // SIMD-ONLY03-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TR_ADDR]], align 4 // SIMD-ONLY03-NEXT: [[TMP9:%.*]] = load ptr, ptr [[_TMP4]], align 4 // SIMD-ONLY03-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 -// SIMD-ONLY03-NEXT: [[INCDEC_PTR5:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 1 +// SIMD-ONLY03-NEXT: [[INCDEC_PTR5:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP10]], i32 1 // SIMD-ONLY03-NEXT: store ptr [[INCDEC_PTR5]], ptr [[TMP9]], align 4 // SIMD-ONLY03-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TR_ADDR]], align 4 // SIMD-ONLY03-NEXT: store ptr [[TMP11]], ptr [[_TMP6]], align 4 // SIMD-ONLY03-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TR_ADDR]], align 4 // SIMD-ONLY03-NEXT: [[TMP13:%.*]] = load ptr, ptr [[_TMP6]], align 4 // SIMD-ONLY03-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 4 -// SIMD-ONLY03-NEXT: [[INCDEC_PTR7:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i32 1 +// SIMD-ONLY03-NEXT: [[INCDEC_PTR7:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP14]], i32 1 // SIMD-ONLY03-NEXT: store ptr [[INCDEC_PTR7]], ptr [[TMP13]], align 4 // SIMD-ONLY03-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TR_ADDR]], align 4 // SIMD-ONLY03-NEXT: store ptr [[TMP15]], ptr [[_TMP8]], align 4 @@ -3933,11 +3933,11 @@ void bar() { // SIMD-ONLY03-NEXT: [[TMP18:%.*]] = load ptr, ptr [[LR_ADDR]], align 4 // SIMD-ONLY03-NEXT: [[TMP19:%.*]] = load ptr, ptr [[_TMP8]], align 4 // SIMD-ONLY03-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 4 -// SIMD-ONLY03-NEXT: [[INCDEC_PTR10:%.*]] = getelementptr inbounds i32, ptr [[TMP20]], i32 1 +// SIMD-ONLY03-NEXT: [[INCDEC_PTR10:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP20]], i32 1 // SIMD-ONLY03-NEXT: store ptr [[INCDEC_PTR10]], ptr [[TMP19]], align 4 // SIMD-ONLY03-NEXT: [[TMP21:%.*]] = load ptr, ptr [[_TMP9]], align 4 // SIMD-ONLY03-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 4 -// SIMD-ONLY03-NEXT: [[INCDEC_PTR11:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i32 1 +// SIMD-ONLY03-NEXT: [[INCDEC_PTR11:%.*]] = getelementptr inbounds nuw float, ptr [[TMP22]], i32 1 // SIMD-ONLY03-NEXT: store ptr [[INCDEC_PTR11]], ptr [[TMP21]], align 4 // SIMD-ONLY03-NEXT: ret void // @@ -3951,7 +3951,7 @@ void bar() { // CK20-NEXT: call void @_ZN2STIdEC1ERPd(ptr noundef nonnull align 8 dereferenceable(16) [[A]], ptr noundef nonnull align 8 dereferenceable(8) [[ARG_ADDR]]) // CK20-NEXT: call void @_ZN2STIdE3fooERPd(ptr noundef nonnull align 8 dereferenceable(16) [[A]], ptr noundef nonnull align 8 dereferenceable(8) [[ARG_ADDR]]) // CK20-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARG_ADDR]], align 8 -// CK20-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i32 1 +// CK20-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP0]], i32 1 // CK20-NEXT: store ptr [[INCDEC_PTR]], ptr [[ARG_ADDR]], align 8 // CK20-NEXT: ret void // @@ -4185,7 +4185,7 @@ void bar() { // CK20-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CK20-NEXT: [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_ST:%.*]], ptr [[TMP0]], i32 0, i32 0 // CK20-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A]], align 8 -// CK20-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP1]], i32 1 +// CK20-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP1]], i32 1 // CK20-NEXT: store ptr [[INCDEC_PTR]], ptr [[A]], align 8 // CK20-NEXT: ret void // @@ -4199,7 +4199,7 @@ void bar() { // CK20-NEXT: [[B:%.*]] = getelementptr inbounds nuw [[STRUCT_ST:%.*]], ptr [[TMP0]], i32 0, i32 1 // CK20-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B]], align 8 // CK20-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CK20-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP2]], i32 1 +// CK20-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP2]], i32 1 // CK20-NEXT: store ptr [[INCDEC_PTR]], ptr [[TMP1]], align 8 // CK20-NEXT: ret void // @@ -4212,12 +4212,12 @@ void bar() { // CK20-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CK20-NEXT: [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_ST:%.*]], ptr [[TMP0]], i32 0, i32 0 // CK20-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A]], align 8 -// CK20-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP1]], i32 1 +// CK20-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP1]], i32 1 // CK20-NEXT: store ptr [[INCDEC_PTR]], ptr [[A]], align 8 // CK20-NEXT: [[B:%.*]] = getelementptr inbounds nuw [[STRUCT_ST]], ptr [[TMP0]], i32 0, i32 1 // CK20-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B]], align 8 // CK20-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8 -// CK20-NEXT: [[INCDEC_PTR1:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i32 1 +// CK20-NEXT: [[INCDEC_PTR1:%.*]] = getelementptr inbounds nuw double, ptr [[TMP3]], i32 1 // CK20-NEXT: store ptr [[INCDEC_PTR1]], ptr [[TMP2]], align 8 // CK20-NEXT: ret void // @@ -4231,7 +4231,7 @@ void bar() { // CK21-NEXT: call void @_ZN2STIdEC1ERPd(ptr noundef nonnull align 8 dereferenceable(16) [[A]], ptr noundef nonnull align 8 dereferenceable(8) [[ARG_ADDR]]) // CK21-NEXT: call void @_ZN2STIdE3fooERPd(ptr noundef nonnull align 8 dereferenceable(16) [[A]], ptr noundef nonnull align 8 dereferenceable(8) [[ARG_ADDR]]) // CK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARG_ADDR]], align 8 -// CK21-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i32 1 +// CK21-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP0]], i32 1 // CK21-NEXT: store ptr [[INCDEC_PTR]], ptr [[ARG_ADDR]], align 8 // CK21-NEXT: ret void // @@ -4465,7 +4465,7 @@ void bar() { // CK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CK21-NEXT: [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_ST:%.*]], ptr [[TMP0]], i32 0, i32 0 // CK21-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A]], align 8 -// CK21-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP1]], i32 1 +// CK21-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP1]], i32 1 // CK21-NEXT: store ptr [[INCDEC_PTR]], ptr [[A]], align 8 // CK21-NEXT: ret void // @@ -4479,7 +4479,7 @@ void bar() { // CK21-NEXT: [[B:%.*]] = getelementptr inbounds nuw [[STRUCT_ST:%.*]], ptr [[TMP0]], i32 0, i32 1 // CK21-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B]], align 8 // CK21-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CK21-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP2]], i32 1 +// CK21-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP2]], i32 1 // CK21-NEXT: store ptr [[INCDEC_PTR]], ptr [[TMP1]], align 8 // CK21-NEXT: ret void // @@ -4492,12 +4492,12 @@ void bar() { // CK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CK21-NEXT: [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_ST:%.*]], ptr [[TMP0]], i32 0, i32 0 // CK21-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A]], align 8 -// CK21-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP1]], i32 1 +// CK21-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP1]], i32 1 // CK21-NEXT: store ptr [[INCDEC_PTR]], ptr [[A]], align 8 // CK21-NEXT: [[B:%.*]] = getelementptr inbounds nuw [[STRUCT_ST]], ptr [[TMP0]], i32 0, i32 1 // CK21-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B]], align 8 // CK21-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8 -// CK21-NEXT: [[INCDEC_PTR1:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i32 1 +// CK21-NEXT: [[INCDEC_PTR1:%.*]] = getelementptr inbounds nuw double, ptr [[TMP3]], i32 1 // CK21-NEXT: store ptr [[INCDEC_PTR1]], ptr [[TMP2]], align 8 // CK21-NEXT: ret void // @@ -4511,7 +4511,7 @@ void bar() { // CK22-NEXT: call void @_ZN2STIdEC1ERPd(ptr noundef nonnull align 4 dereferenceable(8) [[A]], ptr noundef nonnull align 4 dereferenceable(4) [[ARG_ADDR]]) // CK22-NEXT: call void @_ZN2STIdE3fooERPd(ptr noundef nonnull align 4 dereferenceable(8) [[A]], ptr noundef nonnull align 4 dereferenceable(4) [[ARG_ADDR]]) // CK22-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARG_ADDR]], align 4 -// CK22-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i32 1 +// CK22-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP0]], i32 1 // CK22-NEXT: store ptr [[INCDEC_PTR]], ptr [[ARG_ADDR]], align 4 // CK22-NEXT: ret void // @@ -4745,7 +4745,7 @@ void bar() { // CK22-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 // CK22-NEXT: [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_ST:%.*]], ptr [[TMP0]], i32 0, i32 0 // CK22-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A]], align 4 -// CK22-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP1]], i32 1 +// CK22-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP1]], i32 1 // CK22-NEXT: store ptr [[INCDEC_PTR]], ptr [[A]], align 4 // CK22-NEXT: ret void // @@ -4759,7 +4759,7 @@ void bar() { // CK22-NEXT: [[B:%.*]] = getelementptr inbounds nuw [[STRUCT_ST:%.*]], ptr [[TMP0]], i32 0, i32 1 // CK22-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B]], align 4 // CK22-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 -// CK22-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP2]], i32 1 +// CK22-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP2]], i32 1 // CK22-NEXT: store ptr [[INCDEC_PTR]], ptr [[TMP1]], align 4 // CK22-NEXT: ret void // @@ -4772,12 +4772,12 @@ void bar() { // CK22-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 // CK22-NEXT: [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_ST:%.*]], ptr [[TMP0]], i32 0, i32 0 // CK22-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A]], align 4 -// CK22-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP1]], i32 1 +// CK22-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP1]], i32 1 // CK22-NEXT: store ptr [[INCDEC_PTR]], ptr [[A]], align 4 // CK22-NEXT: [[B:%.*]] = getelementptr inbounds nuw [[STRUCT_ST]], ptr [[TMP0]], i32 0, i32 1 // CK22-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B]], align 4 // CK22-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 4 -// CK22-NEXT: [[INCDEC_PTR1:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i32 1 +// CK22-NEXT: [[INCDEC_PTR1:%.*]] = getelementptr inbounds nuw double, ptr [[TMP3]], i32 1 // CK22-NEXT: store ptr [[INCDEC_PTR1]], ptr [[TMP2]], align 4 // CK22-NEXT: ret void // @@ -4791,7 +4791,7 @@ void bar() { // CK23-NEXT: call void @_ZN2STIdEC1ERPd(ptr noundef nonnull align 4 dereferenceable(8) [[A]], ptr noundef nonnull align 4 dereferenceable(4) [[ARG_ADDR]]) // CK23-NEXT: call void @_ZN2STIdE3fooERPd(ptr noundef nonnull align 4 dereferenceable(8) [[A]], ptr noundef nonnull align 4 dereferenceable(4) [[ARG_ADDR]]) // CK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARG_ADDR]], align 4 -// CK23-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i32 1 +// CK23-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP0]], i32 1 // CK23-NEXT: store ptr [[INCDEC_PTR]], ptr [[ARG_ADDR]], align 4 // CK23-NEXT: ret void // @@ -5025,7 +5025,7 @@ void bar() { // CK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 // CK23-NEXT: [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_ST:%.*]], ptr [[TMP0]], i32 0, i32 0 // CK23-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A]], align 4 -// CK23-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP1]], i32 1 +// CK23-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP1]], i32 1 // CK23-NEXT: store ptr [[INCDEC_PTR]], ptr [[A]], align 4 // CK23-NEXT: ret void // @@ -5039,7 +5039,7 @@ void bar() { // CK23-NEXT: [[B:%.*]] = getelementptr inbounds nuw [[STRUCT_ST:%.*]], ptr [[TMP0]], i32 0, i32 1 // CK23-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B]], align 4 // CK23-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 -// CK23-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP2]], i32 1 +// CK23-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP2]], i32 1 // CK23-NEXT: store ptr [[INCDEC_PTR]], ptr [[TMP1]], align 4 // CK23-NEXT: ret void // @@ -5052,12 +5052,12 @@ void bar() { // CK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 // CK23-NEXT: [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_ST:%.*]], ptr [[TMP0]], i32 0, i32 0 // CK23-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A]], align 4 -// CK23-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP1]], i32 1 +// CK23-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP1]], i32 1 // CK23-NEXT: store ptr [[INCDEC_PTR]], ptr [[A]], align 4 // CK23-NEXT: [[B:%.*]] = getelementptr inbounds nuw [[STRUCT_ST]], ptr [[TMP0]], i32 0, i32 1 // CK23-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B]], align 4 // CK23-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 4 -// CK23-NEXT: [[INCDEC_PTR1:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i32 1 +// CK23-NEXT: [[INCDEC_PTR1:%.*]] = getelementptr inbounds nuw double, ptr [[TMP3]], i32 1 // CK23-NEXT: store ptr [[INCDEC_PTR1]], ptr [[TMP2]], align 4 // CK23-NEXT: ret void // @@ -5071,7 +5071,7 @@ void bar() { // SIMD-ONLY10-NEXT: call void @_ZN2STIdEC1ERPd(ptr noundef nonnull align 8 dereferenceable(16) [[A]], ptr noundef nonnull align 8 dereferenceable(8) [[ARG_ADDR]]) // SIMD-ONLY10-NEXT: call void @_ZN2STIdE3fooERPd(ptr noundef nonnull align 8 dereferenceable(16) [[A]], ptr noundef nonnull align 8 dereferenceable(8) [[ARG_ADDR]]) // SIMD-ONLY10-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARG_ADDR]], align 8 -// SIMD-ONLY10-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i32 1 +// SIMD-ONLY10-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP0]], i32 1 // SIMD-ONLY10-NEXT: store ptr [[INCDEC_PTR]], ptr [[ARG_ADDR]], align 8 // SIMD-ONLY10-NEXT: ret void // @@ -5101,21 +5101,21 @@ void bar() { // SIMD-ONLY10-NEXT: store ptr null, ptr [[LA]], align 8 // SIMD-ONLY10-NEXT: [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_ST:%.*]], ptr [[THIS1]], i32 0, i32 0 // SIMD-ONLY10-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A]], align 8 -// SIMD-ONLY10-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i32 1 +// SIMD-ONLY10-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP0]], i32 1 // SIMD-ONLY10-NEXT: store ptr [[INCDEC_PTR]], ptr [[A]], align 8 // SIMD-ONLY10-NEXT: [[B:%.*]] = getelementptr inbounds nuw [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 1 // SIMD-ONLY10-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B]], align 8 // SIMD-ONLY10-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 -// SIMD-ONLY10-NEXT: [[INCDEC_PTR2:%.*]] = getelementptr inbounds double, ptr [[TMP2]], i32 1 +// SIMD-ONLY10-NEXT: [[INCDEC_PTR2:%.*]] = getelementptr inbounds nuw double, ptr [[TMP2]], i32 1 // SIMD-ONLY10-NEXT: store ptr [[INCDEC_PTR2]], ptr [[TMP1]], align 8 // SIMD-ONLY10-NEXT: [[A3:%.*]] = getelementptr inbounds nuw [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 0 // SIMD-ONLY10-NEXT: [[TMP3:%.*]] = load ptr, ptr [[A3]], align 8 -// SIMD-ONLY10-NEXT: [[INCDEC_PTR4:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i32 1 +// SIMD-ONLY10-NEXT: [[INCDEC_PTR4:%.*]] = getelementptr inbounds nuw double, ptr [[TMP3]], i32 1 // SIMD-ONLY10-NEXT: store ptr [[INCDEC_PTR4]], ptr [[A3]], align 8 // SIMD-ONLY10-NEXT: [[B5:%.*]] = getelementptr inbounds nuw [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 1 // SIMD-ONLY10-NEXT: [[TMP4:%.*]] = load ptr, ptr [[B5]], align 8 // SIMD-ONLY10-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 -// SIMD-ONLY10-NEXT: [[INCDEC_PTR6:%.*]] = getelementptr inbounds double, ptr [[TMP5]], i32 1 +// SIMD-ONLY10-NEXT: [[INCDEC_PTR6:%.*]] = getelementptr inbounds nuw double, ptr [[TMP5]], i32 1 // SIMD-ONLY10-NEXT: store ptr [[INCDEC_PTR6]], ptr [[TMP4]], align 8 // SIMD-ONLY10-NEXT: ret void // @@ -5145,7 +5145,7 @@ void bar() { // SIMD-ONLY11-NEXT: call void @_ZN2STIdEC1ERPd(ptr noundef nonnull align 8 dereferenceable(16) [[A]], ptr noundef nonnull align 8 dereferenceable(8) [[ARG_ADDR]]) // SIMD-ONLY11-NEXT: call void @_ZN2STIdE3fooERPd(ptr noundef nonnull align 8 dereferenceable(16) [[A]], ptr noundef nonnull align 8 dereferenceable(8) [[ARG_ADDR]]) // SIMD-ONLY11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARG_ADDR]], align 8 -// SIMD-ONLY11-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i32 1 +// SIMD-ONLY11-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP0]], i32 1 // SIMD-ONLY11-NEXT: store ptr [[INCDEC_PTR]], ptr [[ARG_ADDR]], align 8 // SIMD-ONLY11-NEXT: ret void // @@ -5175,21 +5175,21 @@ void bar() { // SIMD-ONLY11-NEXT: store ptr null, ptr [[LA]], align 8 // SIMD-ONLY11-NEXT: [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_ST:%.*]], ptr [[THIS1]], i32 0, i32 0 // SIMD-ONLY11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A]], align 8 -// SIMD-ONLY11-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i32 1 +// SIMD-ONLY11-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP0]], i32 1 // SIMD-ONLY11-NEXT: store ptr [[INCDEC_PTR]], ptr [[A]], align 8 // SIMD-ONLY11-NEXT: [[B:%.*]] = getelementptr inbounds nuw [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 1 // SIMD-ONLY11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B]], align 8 // SIMD-ONLY11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 -// SIMD-ONLY11-NEXT: [[INCDEC_PTR2:%.*]] = getelementptr inbounds double, ptr [[TMP2]], i32 1 +// SIMD-ONLY11-NEXT: [[INCDEC_PTR2:%.*]] = getelementptr inbounds nuw double, ptr [[TMP2]], i32 1 // SIMD-ONLY11-NEXT: store ptr [[INCDEC_PTR2]], ptr [[TMP1]], align 8 // SIMD-ONLY11-NEXT: [[A3:%.*]] = getelementptr inbounds nuw [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 0 // SIMD-ONLY11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[A3]], align 8 -// SIMD-ONLY11-NEXT: [[INCDEC_PTR4:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i32 1 +// SIMD-ONLY11-NEXT: [[INCDEC_PTR4:%.*]] = getelementptr inbounds nuw double, ptr [[TMP3]], i32 1 // SIMD-ONLY11-NEXT: store ptr [[INCDEC_PTR4]], ptr [[A3]], align 8 // SIMD-ONLY11-NEXT: [[B5:%.*]] = getelementptr inbounds nuw [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 1 // SIMD-ONLY11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[B5]], align 8 // SIMD-ONLY11-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 -// SIMD-ONLY11-NEXT: [[INCDEC_PTR6:%.*]] = getelementptr inbounds double, ptr [[TMP5]], i32 1 +// SIMD-ONLY11-NEXT: [[INCDEC_PTR6:%.*]] = getelementptr inbounds nuw double, ptr [[TMP5]], i32 1 // SIMD-ONLY11-NEXT: store ptr [[INCDEC_PTR6]], ptr [[TMP4]], align 8 // SIMD-ONLY11-NEXT: ret void // @@ -5219,7 +5219,7 @@ void bar() { // SIMD-ONLY12-NEXT: call void @_ZN2STIdEC1ERPd(ptr noundef nonnull align 4 dereferenceable(8) [[A]], ptr noundef nonnull align 4 dereferenceable(4) [[ARG_ADDR]]) // SIMD-ONLY12-NEXT: call void @_ZN2STIdE3fooERPd(ptr noundef nonnull align 4 dereferenceable(8) [[A]], ptr noundef nonnull align 4 dereferenceable(4) [[ARG_ADDR]]) // SIMD-ONLY12-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARG_ADDR]], align 4 -// SIMD-ONLY12-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i32 1 +// SIMD-ONLY12-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP0]], i32 1 // SIMD-ONLY12-NEXT: store ptr [[INCDEC_PTR]], ptr [[ARG_ADDR]], align 4 // SIMD-ONLY12-NEXT: ret void // @@ -5249,21 +5249,21 @@ void bar() { // SIMD-ONLY12-NEXT: store ptr null, ptr [[LA]], align 4 // SIMD-ONLY12-NEXT: [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_ST:%.*]], ptr [[THIS1]], i32 0, i32 0 // SIMD-ONLY12-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A]], align 4 -// SIMD-ONLY12-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i32 1 +// SIMD-ONLY12-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP0]], i32 1 // SIMD-ONLY12-NEXT: store ptr [[INCDEC_PTR]], ptr [[A]], align 4 // SIMD-ONLY12-NEXT: [[B:%.*]] = getelementptr inbounds nuw [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 1 // SIMD-ONLY12-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B]], align 4 // SIMD-ONLY12-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 -// SIMD-ONLY12-NEXT: [[INCDEC_PTR2:%.*]] = getelementptr inbounds double, ptr [[TMP2]], i32 1 +// SIMD-ONLY12-NEXT: [[INCDEC_PTR2:%.*]] = getelementptr inbounds nuw double, ptr [[TMP2]], i32 1 // SIMD-ONLY12-NEXT: store ptr [[INCDEC_PTR2]], ptr [[TMP1]], align 4 // SIMD-ONLY12-NEXT: [[A3:%.*]] = getelementptr inbounds nuw [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 0 // SIMD-ONLY12-NEXT: [[TMP3:%.*]] = load ptr, ptr [[A3]], align 4 -// SIMD-ONLY12-NEXT: [[INCDEC_PTR4:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i32 1 +// SIMD-ONLY12-NEXT: [[INCDEC_PTR4:%.*]] = getelementptr inbounds nuw double, ptr [[TMP3]], i32 1 // SIMD-ONLY12-NEXT: store ptr [[INCDEC_PTR4]], ptr [[A3]], align 4 // SIMD-ONLY12-NEXT: [[B5:%.*]] = getelementptr inbounds nuw [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 1 // SIMD-ONLY12-NEXT: [[TMP4:%.*]] = load ptr, ptr [[B5]], align 4 // SIMD-ONLY12-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 -// SIMD-ONLY12-NEXT: [[INCDEC_PTR6:%.*]] = getelementptr inbounds double, ptr [[TMP5]], i32 1 +// SIMD-ONLY12-NEXT: [[INCDEC_PTR6:%.*]] = getelementptr inbounds nuw double, ptr [[TMP5]], i32 1 // SIMD-ONLY12-NEXT: store ptr [[INCDEC_PTR6]], ptr [[TMP4]], align 4 // SIMD-ONLY12-NEXT: ret void // @@ -5293,7 +5293,7 @@ void bar() { // SIMD-ONLY13-NEXT: call void @_ZN2STIdEC1ERPd(ptr noundef nonnull align 4 dereferenceable(8) [[A]], ptr noundef nonnull align 4 dereferenceable(4) [[ARG_ADDR]]) // SIMD-ONLY13-NEXT: call void @_ZN2STIdE3fooERPd(ptr noundef nonnull align 4 dereferenceable(8) [[A]], ptr noundef nonnull align 4 dereferenceable(4) [[ARG_ADDR]]) // SIMD-ONLY13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARG_ADDR]], align 4 -// SIMD-ONLY13-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i32 1 +// SIMD-ONLY13-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP0]], i32 1 // SIMD-ONLY13-NEXT: store ptr [[INCDEC_PTR]], ptr [[ARG_ADDR]], align 4 // SIMD-ONLY13-NEXT: ret void // @@ -5323,21 +5323,21 @@ void bar() { // SIMD-ONLY13-NEXT: store ptr null, ptr [[LA]], align 4 // SIMD-ONLY13-NEXT: [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_ST:%.*]], ptr [[THIS1]], i32 0, i32 0 // SIMD-ONLY13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A]], align 4 -// SIMD-ONLY13-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i32 1 +// SIMD-ONLY13-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP0]], i32 1 // SIMD-ONLY13-NEXT: store ptr [[INCDEC_PTR]], ptr [[A]], align 4 // SIMD-ONLY13-NEXT: [[B:%.*]] = getelementptr inbounds nuw [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 1 // SIMD-ONLY13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B]], align 4 // SIMD-ONLY13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 -// SIMD-ONLY13-NEXT: [[INCDEC_PTR2:%.*]] = getelementptr inbounds double, ptr [[TMP2]], i32 1 +// SIMD-ONLY13-NEXT: [[INCDEC_PTR2:%.*]] = getelementptr inbounds nuw double, ptr [[TMP2]], i32 1 // SIMD-ONLY13-NEXT: store ptr [[INCDEC_PTR2]], ptr [[TMP1]], align 4 // SIMD-ONLY13-NEXT: [[A3:%.*]] = getelementptr inbounds nuw [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 0 // SIMD-ONLY13-NEXT: [[TMP3:%.*]] = load ptr, ptr [[A3]], align 4 -// SIMD-ONLY13-NEXT: [[INCDEC_PTR4:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i32 1 +// SIMD-ONLY13-NEXT: [[INCDEC_PTR4:%.*]] = getelementptr inbounds nuw double, ptr [[TMP3]], i32 1 // SIMD-ONLY13-NEXT: store ptr [[INCDEC_PTR4]], ptr [[A3]], align 4 // SIMD-ONLY13-NEXT: [[B5:%.*]] = getelementptr inbounds nuw [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 1 // SIMD-ONLY13-NEXT: [[TMP4:%.*]] = load ptr, ptr [[B5]], align 4 // SIMD-ONLY13-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 -// SIMD-ONLY13-NEXT: [[INCDEC_PTR6:%.*]] = getelementptr inbounds double, ptr [[TMP5]], i32 1 +// SIMD-ONLY13-NEXT: [[INCDEC_PTR6:%.*]] = getelementptr inbounds nuw double, ptr [[TMP5]], i32 1 // SIMD-ONLY13-NEXT: store ptr [[INCDEC_PTR6]], ptr [[TMP4]], align 4 // SIMD-ONLY13-NEXT: ret void // diff --git a/clang/test/OpenMP/target_map_both_pointer_pointee_codegen.cpp b/clang/test/OpenMP/target_map_both_pointer_pointee_codegen.cpp index fcaceac7d3467..87fa7fe462daa 100644 --- a/clang/test/OpenMP/target_map_both_pointer_pointee_codegen.cpp +++ b/clang/test/OpenMP/target_map_both_pointer_pointee_codegen.cpp @@ -45,7 +45,7 @@ void foo() { // CHECK-NEXT: store ptr [[CALL]], ptr [[PTR]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[PTR]], align 8 -// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 0 +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP1]], i64 0 // CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK-NEXT: store ptr [[PTR]], ptr [[TMP2]], align 8 // CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 diff --git a/clang/test/OpenMP/target_map_codegen_01.cpp b/clang/test/OpenMP/target_map_codegen_01.cpp index d112500eb5fdd..9f3553d2377cb 100644 --- a/clang/test/OpenMP/target_map_codegen_01.cpp +++ b/clang/test/OpenMP/target_map_codegen_01.cpp @@ -108,6 +108,6 @@ void implicit_maps_reference (int a, int *b){ // CK2: store ptr [[ADDR]], ptr [[REF]], // CK2: [[T:%.+]] = load ptr, ptr [[REF]], // CK2: [[TT:%.+]] = load ptr, ptr [[T]], -// CK2: getelementptr inbounds i32, ptr [[TT]], i32 1 +// CK2: getelementptr inbounds nuw i32, ptr [[TT]], i32 1 #endif // CK2 #endif diff --git a/clang/test/OpenMP/target_map_codegen_21.cpp b/clang/test/OpenMP/target_map_codegen_21.cpp index a1419b7d4beb8..f5c517692d8c8 100644 --- a/clang/test/OpenMP/target_map_codegen_21.cpp +++ b/clang/test/OpenMP/target_map_codegen_21.cpp @@ -185,7 +185,7 @@ int explicit_maps_globals(void){ // CK22-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0 // CK22-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0 // CK22-DAG: store ptr @c, ptr [[BP0]] -// CK22-DAG: store ptr getelementptr inbounds ([100 x i32], ptr @c, i{{.+}} 0, i{{.+}} 1), ptr [[P0]] +// CK22-DAG: store ptr getelementptr inbounds nuw ([100 x i32], ptr @c, i{{.+}} 0, i{{.+}} 1), ptr [[P0]] // CK22: call void [[CALL03:@.+]](ptr {{[^,]+}}) #pragma omp target map(c [1:4]) @@ -277,7 +277,7 @@ int explicit_maps_globals(void){ // CK22-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0 // CK22-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0 // CK22-DAG: store ptr @sc, ptr [[BP0]] -// CK22-DAG: store ptr getelementptr inbounds ([100 x [[ST]]], ptr @sc, i{{.+}} 0, i{{.+}} 1), ptr [[P0]] +// CK22-DAG: store ptr getelementptr inbounds nuw ([100 x [[ST]]], ptr @sc, i{{.+}} 0, i{{.+}} 1), ptr [[P0]] // CK22: call void [[CALL08:@.+]](ptr {{[^,]+}}) #pragma omp target map(sc [1:4]) @@ -369,7 +369,7 @@ int explicit_maps_globals(void){ // CK22-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0 // CK22-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0 // CK22-DAG: store ptr @stc, ptr [[BP0]] -// CK22-DAG: store ptr getelementptr inbounds ([100 x [[STT]]], ptr @stc, i{{.+}} 0, i{{.+}} 1), ptr [[P0]] +// CK22-DAG: store ptr getelementptr inbounds nuw ([100 x [[STT]]], ptr @stc, i{{.+}} 0, i{{.+}} 1), ptr [[P0]] // CK22: call void [[CALL13:@.+]](ptr {{[^,]+}}) #pragma omp target map(stc [1:4]) diff --git a/clang/test/OpenMP/target_map_codegen_27.cpp b/clang/test/OpenMP/target_map_codegen_27.cpp index fe7ae12e00d13..bfe75bca481be 100644 --- a/clang/test/OpenMP/target_map_codegen_27.cpp +++ b/clang/test/OpenMP/target_map_codegen_27.cpp @@ -82,7 +82,7 @@ void explicit_maps_pointer_references (int *p){ // CK28-DAG: store ptr [[VAR1:%.+]], ptr [[P0]] // CK28-DAG: [[VAR0]] = load ptr, ptr [[VAR00:%.+]], // CK28-DAG: [[VAR00]] = load ptr, ptr [[VAR000:%.+]], -// CK28-DAG: [[VAR1]] = getelementptr inbounds i32, ptr [[VAR11:%.+]], i{{64|32}} 2 +// CK28-DAG: [[VAR1]] = getelementptr inbounds nuw i32, ptr [[VAR11:%.+]], i{{64|32}} 2 // CK28-DAG: [[VAR11]] = load ptr, ptr [[VAR111:%.+]], // CK28-DAG: [[VAR111]] = load ptr, ptr [[VAR1111:%.+]], diff --git a/clang/test/OpenMP/target_map_codegen_28.cpp b/clang/test/OpenMP/target_map_codegen_28.cpp index e92f7e4773ecf..67ea72d791d03 100644 --- a/clang/test/OpenMP/target_map_codegen_28.cpp +++ b/clang/test/OpenMP/target_map_codegen_28.cpp @@ -89,7 +89,7 @@ struct SSB{ // CK29-DAG: store ptr [[VAR1:%.+]], ptr [[BP2]] // CK29-DAG: store ptr [[VAR2:%.+]], ptr [[P2]] // CK29-DAG: [[VAR1]] = getelementptr inbounds nuw [[SSA]], ptr %{{.+}}, i32 0, i32 1 -// CK29-DAG: [[VAR2]] = getelementptr inbounds double, ptr [[VAR22:%.+]], i{{.+}} 0 +// CK29-DAG: [[VAR2]] = getelementptr inbounds nuw double, ptr [[VAR22:%.+]], i{{.+}} 0 // CK29-DAG: [[VAR22]] = load ptr, ptr %{{.+}}, // CK29: call void [[CALL00:@.+]](ptr {{[^,]+}}) @@ -129,7 +129,7 @@ struct SSB{ // CK29-DAG: [[P2:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 2 // CK29-DAG: store ptr [[VAR1]], ptr [[BP2]] // CK29-DAG: store ptr [[VAR2:%.+]], ptr [[P2]] -// CK29-DAG: [[VAR2]] = getelementptr inbounds double, ptr [[VAR22:%.+]], i{{.+}} 0 +// CK29-DAG: [[VAR2]] = getelementptr inbounds nuw double, ptr [[VAR22:%.+]], i{{.+}} 0 // CK29-DAG: [[VAR22]] = load ptr, ptr %{{.+}}, // CK29: call void [[CALL00:@.+]](ptr {{[^,]+}}) @@ -164,7 +164,7 @@ struct SSB{ // CK29-DAG: store ptr [[VAR1:%.+]], ptr [[BP2]] // CK29-DAG: store ptr [[VAR2:%.+]], ptr [[P2]] // CK29-DAG: [[VAR1]] = getelementptr inbounds nuw [[SSA]], ptr %{{.+}}, i32 0, i32 1 -// CK29-DAG: [[VAR2]] = getelementptr inbounds double, ptr [[VAR22:%.+]], i{{.+}} 0 +// CK29-DAG: [[VAR2]] = getelementptr inbounds nuw double, ptr [[VAR22:%.+]], i{{.+}} 0 // CK29-DAG: [[VAR22]] = load ptr, ptr %{{.+}}, // CK29: call void [[CALL00:@.+]](ptr {{[^,]+}}) diff --git a/clang/test/OpenMP/target_map_codegen_29.cpp b/clang/test/OpenMP/target_map_codegen_29.cpp index 936a01573c2d2..3ca7b228d26c2 100644 --- a/clang/test/OpenMP/target_map_codegen_29.cpp +++ b/clang/test/OpenMP/target_map_codegen_29.cpp @@ -89,7 +89,7 @@ typedef struct StructWithPtrTag : public Base { // CK30-DAG: [[PTR:%.+]] = getelementptr inbounds [4 x ptr], ptr [[PTRS]], i32 0, i32 2 // CK30-DAG: store ptr [[S_PTR1_BEGIN:%.+]], ptr [[PTR]], // CK30-DAG: [[S_PTR1]] = getelementptr inbounds nuw [[STRUCT]], ptr [[S]], i32 0, i32 4 -// CK30-DAG: [[S_PTR1_BEGIN]] = getelementptr inbounds i32, ptr [[S_PTR1_BEGIN_REF:%.+]], i{{64|32}} 0 +// CK30-DAG: [[S_PTR1_BEGIN]] = getelementptr inbounds nuw i32, ptr [[S_PTR1_BEGIN_REF:%.+]], i{{64|32}} 0 // CK30-DAG: [[S_PTR1_BEGIN_REF]] = load ptr, ptr [[S_PTR1:%.+]], // CK30-DAG: [[S_PTR1]] = getelementptr inbounds nuw [[STRUCT]], ptr [[S]], i32 0, i32 4 @@ -98,7 +98,7 @@ typedef struct StructWithPtrTag : public Base { // CK30-DAG: [[PTR:%.+]] = getelementptr inbounds [4 x ptr], ptr [[PTRS]], i32 0, i32 3 // CK30-DAG: store ptr [[S_PTRBASE1_BEGIN:%.+]], ptr [[PTR]], // CK30-DAG: [[S_PTRBASE1]] = getelementptr inbounds nuw [[BASE]], ptr [[S_BASE:%.+]], i32 0, i32 2 -// CK30-DAG: [[S_PTRBASE1_BEGIN]] = getelementptr inbounds i32, ptr [[S_PTRBASE1_BEGIN_REF:%.+]], i{{64|32}} 0 +// CK30-DAG: [[S_PTRBASE1_BEGIN]] = getelementptr inbounds nuw i32, ptr [[S_PTRBASE1_BEGIN_REF:%.+]], i{{64|32}} 0 // CK30-DAG: [[S_PTRBASE1_BEGIN_REF]] = load ptr, ptr [[S_PTRBASE1:%.+]], // CK30-DAG: [[S_PTRBASE1]] = getelementptr inbounds nuw [[BASE]], ptr [[S_BASE:%.+]], i32 0, i32 2 void map_with_deep_copy() { diff --git a/clang/test/OpenMP/target_map_deref_array_codegen.cpp b/clang/test/OpenMP/target_map_deref_array_codegen.cpp index 9d395b0ab8cd8..e61fc7296332b 100644 --- a/clang/test/OpenMP/target_map_deref_array_codegen.cpp +++ b/clang/test/OpenMP/target_map_deref_array_codegen.cpp @@ -75,7 +75,7 @@ void foo(int **t1d) // CHECK-NEXT: [[TMP8:%.*]] = load ptr, ptr [[T1D_ADDR]], align 8 // CHECK-NEXT: [[TMP9:%.*]] = load ptr, ptr [[T1D_ADDR]], align 8 // CHECK-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 0 +// CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP10]], i64 0 // CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK-NEXT: store ptr [[TMP7]], ptr [[TMP11]], align 8 // CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 diff --git a/clang/test/OpenMP/target_map_member_expr_array_section_codegen.cpp b/clang/test/OpenMP/target_map_member_expr_array_section_codegen.cpp index 7a0da002fb944..692e3a4214c9d 100644 --- a/clang/test/OpenMP/target_map_member_expr_array_section_codegen.cpp +++ b/clang/test/OpenMP/target_map_member_expr_array_section_codegen.cpp @@ -28,12 +28,12 @@ struct maptest { // CHECK: getelementptr inbounds // CHECK: [[S_ADDR:%.+]] = getelementptr inbounds nuw %struct.maptest, ptr [[THIS:%.+]], i32 0, i32 0 // CHECK: [[S_DATA_ADDR:%.+]] = getelementptr inbounds nuw %struct.S, ptr [[S_ADDR]], i32 0, i32 0 - // CHECK: [[S_DATA_0_ADDR:%.+]] = getelementptr inbounds [6 x float], ptr [[S_DATA_ADDR]], i64 0, i64 0 + // CHECK: [[S_DATA_0_ADDR:%.+]] = getelementptr inbounds nuw [6 x float], ptr [[S_DATA_ADDR]], i64 0, i64 0 // SZ = &this->s.data[6]-&this->s.data[0] // CHECK: [[S_ADDR:%.+]] = getelementptr inbounds nuw %struct.maptest, ptr [[THIS]], i32 0, i32 0 // CHECK: [[S_DATA_ADDR:%.+]] = getelementptr inbounds nuw %struct.S, ptr [[S_ADDR]], i32 0, i32 0 - // CHECK: [[S_DATA_5_ADDR:%.+]] = getelementptr inbounds [6 x float], ptr [[S_DATA_ADDR]], i64 0, i64 5 + // CHECK: [[S_DATA_5_ADDR:%.+]] = getelementptr inbounds nuw [6 x float], ptr [[S_DATA_ADDR]], i64 0, i64 5 // CHECK: [[S_DATA_6_ADDR:%.+]] = getelementptr float, ptr [[S_DATA_5_ADDR]], i32 1 // CHECK: [[END_BC:%.+]] = ptrtoint ptr [[S_DATA_6_ADDR]] to i64 // CHECK: [[BEG_BC:%.+]] = ptrtoint ptr [[S_DATA_0_ADDR]] to i64 @@ -64,12 +64,12 @@ struct maptest { // CHECK: [[SIZE:%.+]] = alloca [2 x i64], // CHECK: [[S_ADDR:%.+]] = getelementptr inbounds nuw %struct.maptest, ptr [[THIS:%.+]], i32 0, i32 0 // CHECK: [[S_DATA_ADDR:%.+]] = getelementptr inbounds nuw %struct.S, ptr [[S_ADDR]], i32 0, i32 0 - // CHECK: [[S_DATA_0_ADDR:%.+]] = getelementptr inbounds [6 x float], ptr [[S_DATA_ADDR]], i64 0, i64 0 + // CHECK: [[S_DATA_0_ADDR:%.+]] = getelementptr inbounds nuw [6 x float], ptr [[S_DATA_ADDR]], i64 0, i64 0 // SZ = &this->s.data[6]-&this->s.data[0] // CHECK: [[S_ADDR:%.+]] = getelementptr inbounds nuw %struct.maptest, ptr [[THIS]], i32 0, i32 0 // CHECK: [[S_DATA_ADDR:%.+]] = getelementptr inbounds nuw %struct.S, ptr [[S_ADDR]], i32 0, i32 0 - // CHECK: [[S_DATA_5_ADDR:%.+]] = getelementptr inbounds [6 x float], ptr [[S_DATA_ADDR]], i64 0, i64 5 + // CHECK: [[S_DATA_5_ADDR:%.+]] = getelementptr inbounds nuw [6 x float], ptr [[S_DATA_ADDR]], i64 0, i64 5 // CHECK: [[S_DATA_6_ADDR:%.+]] = getelementptr float, ptr [[S_DATA_5_ADDR]], i32 1 // CHECK: [[END_BC:%.+]] = ptrtoint ptr [[S_DATA_6_ADDR]] to i64 // CHECK: [[BEG_BC:%.+]] = ptrtoint ptr [[S_DATA_0_ADDR]] to i64 diff --git a/clang/test/OpenMP/target_map_member_expr_codegen.cpp b/clang/test/OpenMP/target_map_member_expr_codegen.cpp index 9b64647928a24..fb36ba7b78d5b 100644 --- a/clang/test/OpenMP/target_map_member_expr_codegen.cpp +++ b/clang/test/OpenMP/target_map_member_expr_codegen.cpp @@ -223,7 +223,7 @@ void foo() { // CHECK-NEXT: [[TMP10:%.*]] = load ptr, ptr [[D_ADDR]], align 8 // CHECK-NEXT: [[A4:%.*]] = getelementptr inbounds nuw [[STRUCT_DESCRIPTOR]], ptr [[TMP10]], i32 0, i32 0 // CHECK-NEXT: [[TMP11:%.*]] = load ptr, ptr [[A4]], align 8 -// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP11]], i64 0 +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP11]], i64 0 // CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[ASIZE]], align 4 // CHECK-NEXT: [[CONV:%.*]] = zext i32 [[TMP12]] to i64 // CHECK-NEXT: [[TMP13:%.*]] = mul nuw i64 [[CONV]], 4 @@ -233,7 +233,7 @@ void foo() { // CHECK-NEXT: [[TMP16:%.*]] = load ptr, ptr [[D_ADDR]], align 8 // CHECK-NEXT: [[C5:%.*]] = getelementptr inbounds nuw [[STRUCT_DESCRIPTOR]], ptr [[TMP16]], i32 0, i32 1 // CHECK-NEXT: [[TMP17:%.*]] = load ptr, ptr [[C5]], align 8 -// CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 0 +// CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds nuw float, ptr [[TMP17]], i64 0 // CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[CSIZE]], align 4 // CHECK-NEXT: [[CONV7:%.*]] = zext i32 [[TMP18]] to i64 // CHECK-NEXT: [[TMP19:%.*]] = mul nuw i64 [[CONV7]], 4 @@ -343,7 +343,7 @@ void foo() { // CHECK-NEXT: [[TMP79:%.*]] = load ptr, ptr [[_TMP12]], align 8 // CHECK-NEXT: [[C15:%.*]] = getelementptr inbounds nuw [[STRUCT_DESCRIPTOR]], ptr [[TMP79]], i32 0, i32 1 // CHECK-NEXT: [[TMP80:%.*]] = load ptr, ptr [[C15]], align 8 -// CHECK-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i64 0 +// CHECK-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds nuw float, ptr [[TMP80]], i64 0 // CHECK-NEXT: [[TMP81:%.*]] = load i32, ptr [[CSIZE]], align 4 // CHECK-NEXT: [[CONV17:%.*]] = zext i32 [[TMP81]] to i64 // CHECK-NEXT: [[TMP82:%.*]] = mul nuw i64 [[CONV17]], 4 diff --git a/clang/test/OpenMP/target_map_nest_defalut_mapper_codegen.cpp b/clang/test/OpenMP/target_map_nest_defalut_mapper_codegen.cpp index ffb145d8e50fc..775f0b296b1b6 100644 --- a/clang/test/OpenMP/target_map_nest_defalut_mapper_codegen.cpp +++ b/clang/test/OpenMP/target_map_nest_defalut_mapper_codegen.cpp @@ -45,7 +45,7 @@ void foo() { // CHECK-NEXT: [[F:%.*]] = getelementptr inbounds nuw [[STRUCT_D]], ptr [[ARRAYIDX1]], i32 0, i32 1 // CHECK-NEXT: [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_C:%.*]], ptr [[F]], i32 0, i32 0 // CHECK-NEXT: store i32 222, ptr [[A]], align 4 -// CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [10 x %struct.D], ptr [[SA]], i64 0, i64 0 +// CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw [10 x %struct.D], ptr [[SA]], i64 0, i64 0 // CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK-NEXT: store ptr [[SA]], ptr [[TMP0]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 diff --git a/clang/test/OpenMP/target_parallel_for_reduction_task_codegen.cpp b/clang/test/OpenMP/target_parallel_for_reduction_task_codegen.cpp index 3d0710acf0ee7..5cce677e88572 100644 --- a/clang/test/OpenMP/target_parallel_for_reduction_task_codegen.cpp +++ b/clang/test/OpenMP/target_parallel_for_reduction_task_codegen.cpp @@ -96,16 +96,16 @@ int main(int argc, char **argv) { // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK1-NEXT: store i32 0, ptr [[ARGC1]], align 4 // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i64 0 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP1]], i64 0 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 -// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 0 +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP2]], i64 0 // CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = sext i32 [[TMP3]] to i64 // CHECK1-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP4]] // CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds ptr, ptr [[TMP5]], i64 9 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP5]], i64 9 // CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[ARRAYIDX3]], align 8 -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i64 [[LB_ADD_LEN]] +// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP6]], i64 [[LB_ADD_LEN]] // CHECK1-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[ARRAYIDX4]] to i64 // CHECK1-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[ARRAYIDX2]] to i64 // CHECK1-NEXT: [[TMP9:%.*]] = sub i64 [[TMP7]], [[TMP8]] @@ -135,7 +135,7 @@ int main(int argc, char **argv) { // CHECK1-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP20]] // CHECK1-NEXT: store ptr [[_TMP6]], ptr [[_TMP5]], align 8 // CHECK1-NEXT: store ptr [[TMP21]], ptr [[_TMP6]], align 8 -// CHECK1-NEXT: [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 0 +// CHECK1-NEXT: [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds nuw [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 0 // CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP22]], align 8 // CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1 @@ -150,19 +150,19 @@ int main(int argc, char **argv) { // CHECK1-NEXT: store ptr @.red_comb., ptr [[TMP27]], align 8 // CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6 // CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP28]], i8 0, i64 4, i1 false) -// CHECK1-NEXT: [[DOTRD_INPUT_GEP_7:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1 +// CHECK1-NEXT: [[DOTRD_INPUT_GEP_7:%.*]] = getelementptr inbounds nuw [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1 // CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 0 // CHECK1-NEXT: [[TMP30:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds ptr, ptr [[TMP30]], i64 0 +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP30]], i64 0 // CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[ARRAYIDX8]], align 8 -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i8, ptr [[TMP31]], i64 0 +// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP31]], i64 0 // CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP0]], align 4 // CHECK1-NEXT: [[TMP33:%.*]] = sext i32 [[TMP32]] to i64 // CHECK1-NEXT: [[LB_ADD_LEN10:%.*]] = add nsw i64 -1, [[TMP33]] // CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds ptr, ptr [[TMP34]], i64 9 +// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP34]], i64 9 // CHECK1-NEXT: [[TMP35:%.*]] = load ptr, ptr [[ARRAYIDX11]], align 8 -// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds i8, ptr [[TMP35]], i64 [[LB_ADD_LEN10]] +// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP35]], i64 [[LB_ADD_LEN10]] // CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP29]], align 8 // CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 1 // CHECK1-NEXT: store ptr [[ARRAYIDX9]], ptr [[TMP36]], align 8 @@ -483,9 +483,9 @@ int main(int argc, char **argv) { // CHECK1-NEXT: [[LB_ADD_LEN_I:%.*]] = add nsw i64 -1, [[TMP24]] // CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2 // CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP25]], align 8 -// CHECK1-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds ptr, ptr [[TMP26]], i64 9 +// CHECK1-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP26]], i64 9 // CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[ARRAYIDX2_I]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3_I:%.*]] = getelementptr inbounds i8, ptr [[TMP27]], i64 [[LB_ADD_LEN_I]] +// CHECK1-NEXT: [[ARRAYIDX3_I:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP27]], i64 [[LB_ADD_LEN_I]] // CHECK1-NEXT: [[TMP28:%.*]] = ptrtoint ptr [[ARRAYIDX3_I]] to i64 // CHECK1-NEXT: [[TMP29:%.*]] = ptrtoint ptr [[TMP20]] to i64 // CHECK1-NEXT: [[TMP30:%.*]] = sub i64 [[TMP28]], [[TMP29]] diff --git a/clang/test/OpenMP/target_parallel_reduction_task_codegen.cpp b/clang/test/OpenMP/target_parallel_reduction_task_codegen.cpp index 28d63dbf8c4a9..c0bb4a6d6cc82 100644 --- a/clang/test/OpenMP/target_parallel_reduction_task_codegen.cpp +++ b/clang/test/OpenMP/target_parallel_reduction_task_codegen.cpp @@ -85,16 +85,16 @@ int main(int argc, char **argv) { // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[ARGC1]], align 4 // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i64 0 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP1]], i64 0 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 -// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 0 +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP2]], i64 0 // CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = sext i32 [[TMP3]] to i64 // CHECK1-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP4]] // CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds ptr, ptr [[TMP5]], i64 9 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP5]], i64 9 // CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[ARRAYIDX3]], align 8 -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i64 [[LB_ADD_LEN]] +// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP6]], i64 [[LB_ADD_LEN]] // CHECK1-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[ARRAYIDX4]] to i64 // CHECK1-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[ARRAYIDX2]] to i64 // CHECK1-NEXT: [[TMP9:%.*]] = sub i64 [[TMP7]], [[TMP8]] @@ -124,7 +124,7 @@ int main(int argc, char **argv) { // CHECK1-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP20]] // CHECK1-NEXT: store ptr [[_TMP5]], ptr [[TMP]], align 8 // CHECK1-NEXT: store ptr [[TMP21]], ptr [[_TMP5]], align 8 -// CHECK1-NEXT: [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 0 +// CHECK1-NEXT: [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds nuw [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 0 // CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP22]], align 8 // CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1 @@ -139,19 +139,19 @@ int main(int argc, char **argv) { // CHECK1-NEXT: store ptr @.red_comb., ptr [[TMP27]], align 8 // CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6 // CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP28]], i8 0, i64 4, i1 false) -// CHECK1-NEXT: [[DOTRD_INPUT_GEP_6:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1 +// CHECK1-NEXT: [[DOTRD_INPUT_GEP_6:%.*]] = getelementptr inbounds nuw [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1 // CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 0 // CHECK1-NEXT: [[TMP30:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds ptr, ptr [[TMP30]], i64 0 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP30]], i64 0 // CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[ARRAYIDX7]], align 8 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i8, ptr [[TMP31]], i64 0 +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP31]], i64 0 // CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP0]], align 4 // CHECK1-NEXT: [[TMP33:%.*]] = sext i32 [[TMP32]] to i64 // CHECK1-NEXT: [[LB_ADD_LEN9:%.*]] = add nsw i64 -1, [[TMP33]] // CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds ptr, ptr [[TMP34]], i64 9 +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP34]], i64 9 // CHECK1-NEXT: [[TMP35:%.*]] = load ptr, ptr [[ARRAYIDX10]], align 8 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i8, ptr [[TMP35]], i64 [[LB_ADD_LEN9]] +// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP35]], i64 [[LB_ADD_LEN9]] // CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP29]], align 8 // CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 1 // CHECK1-NEXT: store ptr [[ARRAYIDX8]], ptr [[TMP36]], align 8 @@ -429,9 +429,9 @@ int main(int argc, char **argv) { // CHECK1-NEXT: [[LB_ADD_LEN_I:%.*]] = add nsw i64 -1, [[TMP24]] // CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2 // CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP25]], align 8 -// CHECK1-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds ptr, ptr [[TMP26]], i64 9 +// CHECK1-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP26]], i64 9 // CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[ARRAYIDX2_I]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3_I:%.*]] = getelementptr inbounds i8, ptr [[TMP27]], i64 [[LB_ADD_LEN_I]] +// CHECK1-NEXT: [[ARRAYIDX3_I:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP27]], i64 [[LB_ADD_LEN_I]] // CHECK1-NEXT: [[TMP28:%.*]] = ptrtoint ptr [[ARRAYIDX3_I]] to i64 // CHECK1-NEXT: [[TMP29:%.*]] = ptrtoint ptr [[TMP20]] to i64 // CHECK1-NEXT: [[TMP30:%.*]] = sub i64 [[TMP28]], [[TMP29]] diff --git a/clang/test/OpenMP/target_task_affinity_codegen.cpp b/clang/test/OpenMP/target_task_affinity_codegen.cpp index 85c5d63a6cd9c..53960cee4b730 100644 --- a/clang/test/OpenMP/target_task_affinity_codegen.cpp +++ b/clang/test/OpenMP/target_task_affinity_codegen.cpp @@ -76,7 +76,7 @@ int main() { // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 0 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP2]], i64 0 // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP3]], align 8 // CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 @@ -102,7 +102,7 @@ int main() { // CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP0]], align 8 // CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[B]], align 8 // CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[B]], align 8 -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[TMP18]], i64 0 +// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP18]], i64 0 // CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[TMP17]], ptr [[TMP19]], align 8 // CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS6]], i32 0, i32 0 @@ -174,9 +174,9 @@ int main() { // CHECK1-NEXT: [[TMP2:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i64 48, i64 8, ptr @.omp_task_entry.) // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x %struct.kmp_task_affinity_info_t], ptr [[DOTAFFS_ARR_ADDR]], i64 0, i64 0 // CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 0 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP4]], i64 0 // CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i64 1023 +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP5]], i64 1023 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[ARRAYIDX1]], i32 1 // CHECK1-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 // CHECK1-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[TMP6]] to i64 @@ -299,7 +299,7 @@ int main() { // CHECK3-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 0 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP2]], i32 0 // CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP3]], align 4 // CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 @@ -325,7 +325,7 @@ int main() { // CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP0]], align 4 // CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[B]], align 4 // CHECK3-NEXT: [[TMP18:%.*]] = load ptr, ptr [[B]], align 4 -// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[TMP18]], i32 0 +// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP18]], i32 0 // CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0 // CHECK3-NEXT: store ptr [[TMP17]], ptr [[TMP19]], align 4 // CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS6]], i32 0, i32 0 @@ -397,9 +397,9 @@ int main() { // CHECK3-NEXT: [[TMP2:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 24, i32 4, ptr @.omp_task_entry.) // CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x %struct.kmp_task_affinity_info_t], ptr [[DOTAFFS_ARR_ADDR]], i32 0, i32 0 // CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 0 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP4]], i32 0 // CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 1023 +// CHECK3-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP5]], i32 1023 // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[ARRAYIDX1]], i32 1 // CHECK3-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i32 // CHECK3-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[TMP6]] to i32 @@ -587,9 +587,9 @@ int main() { // CHECK9-NEXT: [[TMP2:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i64 48, i64 8, ptr @.omp_task_entry.) // CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x %struct.kmp_task_affinity_info_t], ptr [[DOTAFFS_ARR_ADDR]], i64 0, i64 0 // CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 0 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP4]], i64 0 // CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i64 1023 +// CHECK9-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP5]], i64 1023 // CHECK9-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[ARRAYIDX1]], i32 1 // CHECK9-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 // CHECK9-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[TMP6]] to i64 @@ -709,9 +709,9 @@ int main() { // CHECK11-NEXT: [[TMP2:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 24, i32 4, ptr @.omp_task_entry.) // CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x %struct.kmp_task_affinity_info_t], ptr [[DOTAFFS_ARR_ADDR]], i32 0, i32 0 // CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 0 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP4]], i32 0 // CHECK11-NEXT: [[TMP5:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 1023 +// CHECK11-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP5]], i32 1023 // CHECK11-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[ARRAYIDX1]], i32 1 // CHECK11-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i32 // CHECK11-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[TMP6]] to i32 diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_task_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_task_codegen.cpp index 6f671dbb27abb..2c36b410af064 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_task_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_task_codegen.cpp @@ -91,16 +91,16 @@ int main(int argc, char **argv) { // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[ARGC1]], align 4 // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i64 0 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP1]], i64 0 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 -// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 0 +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP2]], i64 0 // CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = sext i32 [[TMP3]] to i64 // CHECK1-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP4]] // CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds ptr, ptr [[TMP5]], i64 9 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP5]], i64 9 // CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[ARRAYIDX3]], align 8 -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i64 [[LB_ADD_LEN]] +// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP6]], i64 [[LB_ADD_LEN]] // CHECK1-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[ARRAYIDX4]] to i64 // CHECK1-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[ARRAYIDX2]] to i64 // CHECK1-NEXT: [[TMP9:%.*]] = sub i64 [[TMP7]], [[TMP8]] @@ -130,7 +130,7 @@ int main(int argc, char **argv) { // CHECK1-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP20]] // CHECK1-NEXT: store ptr [[_TMP5]], ptr [[TMP]], align 8 // CHECK1-NEXT: store ptr [[TMP21]], ptr [[_TMP5]], align 8 -// CHECK1-NEXT: [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 0 +// CHECK1-NEXT: [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds nuw [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 0 // CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP22]], align 8 // CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1 @@ -145,19 +145,19 @@ int main(int argc, char **argv) { // CHECK1-NEXT: store ptr @.red_comb., ptr [[TMP27]], align 8 // CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6 // CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP28]], i8 0, i64 4, i1 false) -// CHECK1-NEXT: [[DOTRD_INPUT_GEP_6:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1 +// CHECK1-NEXT: [[DOTRD_INPUT_GEP_6:%.*]] = getelementptr inbounds nuw [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1 // CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 0 // CHECK1-NEXT: [[TMP30:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds ptr, ptr [[TMP30]], i64 0 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP30]], i64 0 // CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[ARRAYIDX7]], align 8 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i8, ptr [[TMP31]], i64 0 +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP31]], i64 0 // CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP0]], align 4 // CHECK1-NEXT: [[TMP33:%.*]] = sext i32 [[TMP32]] to i64 // CHECK1-NEXT: [[LB_ADD_LEN9:%.*]] = add nsw i64 -1, [[TMP33]] // CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds ptr, ptr [[TMP34]], i64 9 +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP34]], i64 9 // CHECK1-NEXT: [[TMP35:%.*]] = load ptr, ptr [[ARRAYIDX10]], align 8 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i8, ptr [[TMP35]], i64 [[LB_ADD_LEN9]] +// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP35]], i64 [[LB_ADD_LEN9]] // CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP29]], align 8 // CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 1 // CHECK1-NEXT: store ptr [[ARRAYIDX8]], ptr [[TMP36]], align 8 @@ -435,16 +435,16 @@ int main(int argc, char **argv) { // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK1-NEXT: store i32 0, ptr [[ARGC1]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP3]], i64 0 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP3]], i64 0 // CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 -// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 0 +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP4]], i64 0 // CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = sext i32 [[TMP5]] to i64 // CHECK1-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP6]] // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds ptr, ptr [[TMP7]], i64 9 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP7]], i64 9 // CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[ARRAYIDX3]], align 8 -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i64 [[LB_ADD_LEN]] +// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP8]], i64 [[LB_ADD_LEN]] // CHECK1-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[ARRAYIDX4]] to i64 // CHECK1-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[ARRAYIDX2]] to i64 // CHECK1-NEXT: [[TMP11:%.*]] = sub i64 [[TMP9]], [[TMP10]] @@ -474,7 +474,7 @@ int main(int argc, char **argv) { // CHECK1-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP22]] // CHECK1-NEXT: store ptr [[_TMP6]], ptr [[_TMP5]], align 8 // CHECK1-NEXT: store ptr [[TMP23]], ptr [[_TMP6]], align 8 -// CHECK1-NEXT: [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t.0], ptr [[DOTRD_INPUT_]], i64 0, i64 0 +// CHECK1-NEXT: [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds nuw [2 x %struct.kmp_taskred_input_t.0], ptr [[DOTRD_INPUT_]], i64 0, i64 0 // CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T_0:%.*]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP24]], align 8 // CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1 @@ -489,19 +489,19 @@ int main(int argc, char **argv) { // CHECK1-NEXT: store ptr @.red_comb..4, ptr [[TMP29]], align 8 // CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6 // CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP30]], i8 0, i64 4, i1 false) -// CHECK1-NEXT: [[DOTRD_INPUT_GEP_7:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t.0], ptr [[DOTRD_INPUT_]], i64 0, i64 1 +// CHECK1-NEXT: [[DOTRD_INPUT_GEP_7:%.*]] = getelementptr inbounds nuw [2 x %struct.kmp_taskred_input_t.0], ptr [[DOTRD_INPUT_]], i64 0, i64 1 // CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 0 // CHECK1-NEXT: [[TMP32:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds ptr, ptr [[TMP32]], i64 0 +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP32]], i64 0 // CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[ARRAYIDX8]], align 8 -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i8, ptr [[TMP33]], i64 0 +// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP33]], i64 0 // CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP0]], align 4 // CHECK1-NEXT: [[TMP35:%.*]] = sext i32 [[TMP34]] to i64 // CHECK1-NEXT: [[LB_ADD_LEN10:%.*]] = add nsw i64 -1, [[TMP35]] // CHECK1-NEXT: [[TMP36:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds ptr, ptr [[TMP36]], i64 9 +// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP36]], i64 9 // CHECK1-NEXT: [[TMP37:%.*]] = load ptr, ptr [[ARRAYIDX11]], align 8 -// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds i8, ptr [[TMP37]], i64 [[LB_ADD_LEN10]] +// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP37]], i64 [[LB_ADD_LEN10]] // CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP31]], align 8 // CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 1 // CHECK1-NEXT: store ptr [[ARRAYIDX9]], ptr [[TMP38]], align 8 @@ -822,9 +822,9 @@ int main(int argc, char **argv) { // CHECK1-NEXT: [[LB_ADD_LEN_I:%.*]] = add nsw i64 -1, [[TMP24]] // CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2 // CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP25]], align 8 -// CHECK1-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds ptr, ptr [[TMP26]], i64 9 +// CHECK1-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP26]], i64 9 // CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[ARRAYIDX2_I]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3_I:%.*]] = getelementptr inbounds i8, ptr [[TMP27]], i64 [[LB_ADD_LEN_I]] +// CHECK1-NEXT: [[ARRAYIDX3_I:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP27]], i64 [[LB_ADD_LEN_I]] // CHECK1-NEXT: [[TMP28:%.*]] = ptrtoint ptr [[ARRAYIDX3_I]] to i64 // CHECK1-NEXT: [[TMP29:%.*]] = ptrtoint ptr [[TMP20]] to i64 // CHECK1-NEXT: [[TMP30:%.*]] = sub i64 [[TMP28]], [[TMP29]] diff --git a/clang/test/OpenMP/target_update_codegen.cpp b/clang/test/OpenMP/target_update_codegen.cpp index 5e038989ab6dd..c8211f475c7fc 100644 --- a/clang/test/OpenMP/target_update_codegen.cpp +++ b/clang/test/OpenMP/target_update_codegen.cpp @@ -1118,9 +1118,9 @@ struct ST { void foo(int arg) { ST arr[3][4]; // CK20: [[DIMS:%.+]] = alloca [3 x [[STRUCT_DESCRIPTOR]]], - // CK20: [[ARRAY_IDX:%.+]] = getelementptr inbounds [3 x [4 x [[STRUCT_ST]]]], ptr [[ARR:%.+]], {{.+}} 0, {{.+}} 0 + // CK20: [[ARRAY_IDX:%.+]] = getelementptr inbounds nuw [3 x [4 x [[STRUCT_ST]]]], ptr [[ARR:%.+]], {{.+}} 0, {{.+}} 0 // CK20: [[ARRAY_DECAY:%.+]] = getelementptr inbounds [4 x [[STRUCT_ST]]], ptr [[ARRAY_IDX]], {{.+}} 0, {{.+}} 0 - // CK20: [[ARRAY_IDX_1:%.+]] = getelementptr inbounds [[STRUCT_ST]], ptr [[ARRAY_DECAY]], {{.+}} + // CK20: [[ARRAY_IDX_1:%.+]] = getelementptr inbounds nuw [[STRUCT_ST]], ptr [[ARRAY_DECAY]], {{.+}} // CK20: [[BP0:%.+]] = getelementptr inbounds [1 x ptr], ptr [[BP:%.+]], {{.+}} 0, {{.+}} 0 // CK20: store ptr [[ARR]], ptr [[BP0]], // CK20: [[P0:%.+]] = getelementptr inbounds [1 x ptr], ptr [[P:%.+]], {{.+}} 0, {{.+}} 0 @@ -1186,9 +1186,9 @@ struct ST { // CK21: _ZN2ST3fooEv void foo() { // CK21: [[DIMS:%.+]] = alloca [4 x [[STRUCT_DESCRIPTOR]]], - // CK21: [[ARRAY_IDX:%.+]] = getelementptr inbounds [10 x [10 x [10 x ptr]]], ptr [[DPTR:%.+]], {{.+}} 0, {{.+}} 0 + // CK21: [[ARRAY_IDX:%.+]] = getelementptr inbounds nuw [10 x [10 x [10 x ptr]]], ptr [[DPTR:%.+]], {{.+}} 0, {{.+}} 0 // CK21: [[ARRAY_DECAY:%.+]] = getelementptr inbounds [10 x [10 x ptr]], ptr [[ARRAY_IDX]], {{.+}} 0, {{.+}} 0 - // CK21: [[ARRAY_IDX_1:%.+]] = getelementptr inbounds [10 x ptr], ptr [[ARRAY_DECAY]], {{.+}} 1 + // CK21: [[ARRAY_IDX_1:%.+]] = getelementptr inbounds nuw [10 x ptr], ptr [[ARRAY_DECAY]], {{.+}} 1 // CK21: [[ARRAY_DECAY_2:%.+]] = getelementptr inbounds [10 x ptr], ptr [[ARRAY_IDX_1]], {{.+}} 0, {{.+}} 0 // CK21: [[ARRAY_IDX_3:%.+]] = getelementptr inbounds {{.+}}, ptr [[ARRAY_DECAY_2]], {{.+}} 0 // CK21: [[BP0:%.+]] = getelementptr inbounds [2 x ptr], ptr [[BP:%.+]], {{.+}} 0, {{.+}} 0 @@ -1262,9 +1262,9 @@ struct ST { // CK22: _ZN2ST3fooEPA10_Pi void foo(int *arr[5][10]) { // CK22: [[DIMS:%.+]] = alloca [4 x [[STRUCT_DESCRIPTOR]]], - // CK22: [[ARRAY_IDX:%.+]] = getelementptr inbounds [10 x ptr], ptr [[ARR:%.+]], {{.+}} 0 + // CK22: [[ARRAY_IDX:%.+]] = getelementptr inbounds nuw [10 x ptr], ptr [[ARR:%.+]], {{.+}} 0 // CK22: [[ARRAY_DECAY:%.+]] = getelementptr inbounds [10 x ptr], ptr [[ARRAY_IDX]], {{.+}} 0, {{.+}} 0 - // CK22: [[ARRAY_IDX_2:%.+]] = getelementptr inbounds ptr, ptr [[ARRAY_DECAY:%.+]], {{.+}} 1 + // CK22: [[ARRAY_IDX_2:%.+]] = getelementptr inbounds nuw ptr, ptr [[ARRAY_DECAY:%.+]], {{.+}} 1 // CK22: [[BP0:%.+]] = getelementptr inbounds [1 x ptr], ptr [[BP:%.+]], {{.+}} 0, {{.+}} 0 // CK22: [[P0:%.+]] = getelementptr inbounds [1 x ptr], ptr [[P:%.+]], i{{.+}} 0, i{{.+}} 0 // CK22: [[DIM_1:%.+]] = getelementptr inbounds [4 x [[STRUCT_DESCRIPTOR]]], ptr [[DIMS]], {{.+}} 0, {{.+}} 0 @@ -1338,11 +1338,11 @@ void foo(int arg) { float farr[5][5][5]; // CK23: [[ARG_ADDR:%.+]] = alloca i32, // CK23: [[DIMS:%.+]] = alloca [4 x [[STRUCT_DESCRIPTOR]]], - // CK23: [[ARRAY_IDX:%.+]] = getelementptr inbounds [5 x [5 x [5 x float]]], ptr [[ARR:%.+]], {{.+}} 0, {{.+}} 0 + // CK23: [[ARRAY_IDX:%.+]] = getelementptr inbounds nuw [5 x [5 x [5 x float]]], ptr [[ARR:%.+]], {{.+}} 0, {{.+}} 0 // CK23: [[ARRAY_DECAY:%.+]] = getelementptr inbounds [5 x [5 x float]], ptr [[ARRAY_IDX]], {{.+}} 0, {{.+}} 0 - // CK23: [[ARRAY_IDX_1:%.+]] = getelementptr inbounds [5 x float], ptr [[ARRAY_DECAY]], {{.+}} + // CK23: [[ARRAY_IDX_1:%.+]] = getelementptr inbounds nuw [5 x float], ptr [[ARRAY_DECAY]], {{.+}} // CK23: [[ARRAY_DECAY_2:%.+]] = getelementptr inbounds [5 x float], ptr [[ARRAY_IDX_1]], {{.+}} 0, {{.+}} 0 - // CK23: [[ARRAY_IDX_2:%.+]] = getelementptr inbounds float, ptr [[ARRAY_DECAY_2]], {{.+}} + // CK23: [[ARRAY_IDX_2:%.+]] = getelementptr inbounds nuw float, ptr [[ARRAY_DECAY_2]], {{.+}} // CK23: [[MUL:%.+]] = mul nuw i64 4, // CK23: [[BP0:%.+]] = getelementptr inbounds [1 x ptr], ptr [[BP:%.+]], {{.+}} 0, {{.+}} 0 // CK23: store ptr [[ARR]], ptr [[BP0]], @@ -1411,11 +1411,11 @@ void foo(int arg) { void foo(int arg) { double darr[3][4][5]; // CK24: [[DIMS:%.+]] = alloca [4 x [[STRUCT_DESCRIPTOR]]], - // CK24: [[ARRAY_IDX:%.+]] = getelementptr inbounds [3 x [4 x [5 x double]]], ptr [[ARR:%.+]], {{.+}} 0, {{.+}} 0 + // CK24: [[ARRAY_IDX:%.+]] = getelementptr inbounds nuw [3 x [4 x [5 x double]]], ptr [[ARR:%.+]], {{.+}} 0, {{.+}} 0 // CK24: [[ARRAY_DECAY:%.+]] = getelementptr inbounds [4 x [5 x double]], ptr [[ARRAY_IDX]], {{.+}} 0, {{.+}} 0 - // CK24: [[ARRAY_IDX_1:%.+]] = getelementptr inbounds [5 x double], ptr [[ARRAY_DECAY]], {{.+}} + // CK24: [[ARRAY_IDX_1:%.+]] = getelementptr inbounds nuw [5 x double], ptr [[ARRAY_DECAY]], {{.+}} // CK24: [[ARRAY_DECAY_2:%.+]] = getelementptr inbounds [5 x double], ptr [[ARRAY_IDX_1]], {{.+}} 0, {{.+}} 0 - // CK24: [[ARRAY_IDX_2:%.+]] = getelementptr inbounds double, ptr [[ARRAY_DECAY_2]], {{.+}} + // CK24: [[ARRAY_IDX_2:%.+]] = getelementptr inbounds nuw double, ptr [[ARRAY_DECAY_2]], {{.+}} // CK24: [[MUL:%.+]] = mul nuw i64 8, // CK24: [[SUB:%.+]] = sub nuw i64 4, [[ARG:%.+]] // CK24: [[LEN:%.+]] = udiv {{.+}} [[SUB]], 1 @@ -1488,15 +1488,15 @@ void foo(int arg) { // CK25: [[DIMS:%.+]] = alloca [4 x [[STRUCT_DESCRIPTOR]]], // CK25: [[DIMS_2:%.+]] = alloca [3 x [[STRUCT_DESCRIPTOR]]], - // CK25: [[ARRAY_IDX:%.+]] = getelementptr inbounds [3 x [4 x [5 x i32]]], ptr [[ARR:%.+]], {{.+}} 0, {{.+}} 0 + // CK25: [[ARRAY_IDX:%.+]] = getelementptr inbounds nuw [3 x [4 x [5 x i32]]], ptr [[ARR:%.+]], {{.+}} 0, {{.+}} 0 // CK25: [[ARRAY_DECAY:%.+]] = getelementptr inbounds [4 x [5 x i32]], ptr [[ARRAY_IDX]], {{.+}} 0, {{.+}} 0 - // CK25: [[ARRAY_IDX_1:%.+]] = getelementptr inbounds [5 x i32], ptr [[ARRAY_DECAY]], {{.+}} + // CK25: [[ARRAY_IDX_1:%.+]] = getelementptr inbounds nuw [5 x i32], ptr [[ARRAY_DECAY]], {{.+}} // CK25: [[ARRAY_DECAY_2:%.+]] = getelementptr inbounds [5 x i32], ptr [[ARRAY_IDX_1]], {{.+}} 0, {{.+}} 0 - // CK25: [[ARRAY_IDX_3:%.+]] = getelementptr inbounds {{.+}}, ptr [[ARRAY_DECAY_2]], {{.+}} 1 + // CK25: [[ARRAY_IDX_3:%.+]] = getelementptr inbounds nuw {{.+}}, ptr [[ARRAY_DECAY_2]], {{.+}} 1 // CK25: [[LEN:%.+]] = sub nuw i64 4, [[ARG_ADDR:%.+]] - // CK25: [[ARRAY_IDX_4:%.+]] = getelementptr inbounds [4 x [3 x float]], ptr [[FARR:%.+]], {{.+}} 0, {{.+}} 0 + // CK25: [[ARRAY_IDX_4:%.+]] = getelementptr inbounds nuw [4 x [3 x float]], ptr [[FARR:%.+]], {{.+}} 0, {{.+}} 0 // CK25: [[ARRAY_DECAY_5:%.+]] = getelementptr inbounds [3 x float], ptr [[ARRAY_IDX_4]], {{.+}} 0, {{.+}} 0 - // CK25: [[ARRAY_IDX_6:%.+]] = getelementptr inbounds float, ptr [[ARRAY_DECAY_5:%.+]], {{.+}} 1 + // CK25: [[ARRAY_IDX_6:%.+]] = getelementptr inbounds nuw float, ptr [[ARRAY_DECAY_5:%.+]], {{.+}} 1 // CK25: [[BP0:%.+]] = getelementptr inbounds [3 x ptr], ptr [[BP:%.+]], i{{.+}} 0, i{{.+}} 0 // CK25: [[P0:%.+]] = getelementptr inbounds [3 x ptr], ptr [[P:%.+]], i{{.+}} 0, i{{.+}} 0 // CK25: [[DIM_1:%.+]] = getelementptr inbounds [4 x [[STRUCT_DESCRIPTOR]]], ptr [[DIMS]], {{.+}} 0, {{.+}} 0 diff --git a/clang/test/OpenMP/task_codegen.c b/clang/test/OpenMP/task_codegen.c index 0d10cbce4aa80..d08eb3762d5c9 100644 --- a/clang/test/OpenMP/task_codegen.c +++ b/clang/test/OpenMP/task_codegen.c @@ -183,7 +183,7 @@ for (int i = 0; i < 10; ++i) // CHECK: [[A:%.+]] = load ptr, ptr [[A_ADDR:%.+]], // CHECK: [[K:%.+]] = load i32, ptr [[K_ADDR]], // CHECK: [[IDX:%.+]] = zext i32 [[K]] to i64 - // CHECK: [[AK_ADDR:%.+]] = getelementptr inbounds ptr, ptr [[A]], i64 [[IDX]] + // CHECK: [[AK_ADDR:%.+]] = getelementptr inbounds nuw ptr, ptr [[A]], i64 [[IDX]] // CHECK: [[AK:%.+]] = load ptr, ptr [[AK_ADDR]], // CHECK: [[I:%.+]] = load i32, ptr [[I_ADDR]], // CHECK: [[IDX:%.+]] = sext i32 [[I]] to i64 diff --git a/clang/test/OpenMP/task_codegen.cpp b/clang/test/OpenMP/task_codegen.cpp index b256c41132ed3..c3e6d9e6b1cf7 100644 --- a/clang/test/OpenMP/task_codegen.cpp +++ b/clang/test/OpenMP/task_codegen.cpp @@ -309,9 +309,9 @@ void test_omp_all_memory() // CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP30]], i32 0, i32 2 // CHECK1-NEXT: store i8 1, ptr [[TMP33]], align 8 // CHECK1-NEXT: [[TMP34:%.*]] = mul nsw i64 0, [[TMP2]] -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP34]] +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP34]] // CHECK1-NEXT: [[TMP35:%.*]] = mul nsw i64 9, [[TMP2]] -// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP35]] +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP35]] // CHECK1-NEXT: [[TMP36:%.*]] = getelementptr i32, ptr [[ARRAYIDX2]], i32 1 // CHECK1-NEXT: [[TMP37:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 // CHECK1-NEXT: [[TMP38:%.*]] = ptrtoint ptr [[TMP36]] to i64 @@ -346,13 +346,13 @@ void test_omp_all_memory() // CHECK1-NEXT: [[TMP58:%.*]] = load i8, ptr [[B]], align 1 // CHECK1-NEXT: [[TMP59:%.*]] = sext i8 [[TMP58]] to i64 // CHECK1-NEXT: [[TMP60:%.*]] = mul nsw i64 4, [[TMP2]] -// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP60]] -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX7]], i64 [[TMP59]] +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP60]] +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX7]], i64 [[TMP59]] // CHECK1-NEXT: [[TMP61:%.*]] = load i8, ptr [[B]], align 1 // CHECK1-NEXT: [[TMP62:%.*]] = sext i8 [[TMP61]] to i64 // CHECK1-NEXT: [[TMP63:%.*]] = mul nsw i64 9, [[TMP2]] -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP63]] -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX9]], i64 [[TMP62]] +// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP63]] +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX9]], i64 [[TMP62]] // CHECK1-NEXT: [[TMP64:%.*]] = getelementptr i32, ptr [[ARRAYIDX10]], i32 1 // CHECK1-NEXT: [[TMP65:%.*]] = ptrtoint ptr [[ARRAYIDX8]] to i64 // CHECK1-NEXT: [[TMP66:%.*]] = ptrtoint ptr [[TMP64]] to i64 @@ -384,13 +384,13 @@ void test_omp_all_memory() // CHECK1-NEXT: [[TMP83:%.*]] = load i8, ptr [[B]], align 1 // CHECK1-NEXT: [[TMP84:%.*]] = sext i8 [[TMP83]] to i64 // CHECK1-NEXT: [[TMP85:%.*]] = mul nsw i64 4, [[TMP2]] -// CHECK1-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP85]] -// CHECK1-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX15]], i64 [[TMP84]] +// CHECK1-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP85]] +// CHECK1-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX15]], i64 [[TMP84]] // CHECK1-NEXT: [[TMP86:%.*]] = load i8, ptr [[B]], align 1 // CHECK1-NEXT: [[TMP87:%.*]] = sext i8 [[TMP86]] to i64 // CHECK1-NEXT: [[TMP88:%.*]] = mul nsw i64 9, [[TMP2]] -// CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP88]] -// CHECK1-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX17]], i64 [[TMP87]] +// CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP88]] +// CHECK1-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX17]], i64 [[TMP87]] // CHECK1-NEXT: [[TMP89:%.*]] = getelementptr i32, ptr [[ARRAYIDX18]], i32 1 // CHECK1-NEXT: [[TMP90:%.*]] = ptrtoint ptr [[ARRAYIDX16]] to i64 // CHECK1-NEXT: [[TMP91:%.*]] = ptrtoint ptr [[TMP89]] to i64 @@ -427,8 +427,8 @@ void test_omp_all_memory() // CHECK1-NEXT: [[TMP111:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP108]], i32 0, i32 2 // CHECK1-NEXT: store i8 3, ptr [[TMP111]], align 8 // CHECK1-NEXT: [[TMP112:%.*]] = mul nsw i64 0, [[TMP2]] -// CHECK1-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP112]] -// CHECK1-NEXT: [[ARRAYIDX24:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX23]], i64 3 +// CHECK1-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP112]] +// CHECK1-NEXT: [[ARRAYIDX24:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX23]], i64 3 // CHECK1-NEXT: [[TMP113:%.*]] = load i32, ptr @a, align 4 // CHECK1-NEXT: [[TMP114:%.*]] = sext i32 [[TMP113]] to i64 // CHECK1-NEXT: [[LEN_SUB_1:%.*]] = sub nsw i64 [[TMP114]], 1 @@ -436,8 +436,8 @@ void test_omp_all_memory() // CHECK1-NEXT: [[TMP116:%.*]] = sext i32 [[TMP115]] to i64 // CHECK1-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP116]] // CHECK1-NEXT: [[TMP117:%.*]] = mul nsw i64 [[LB_ADD_LEN]], [[TMP2]] -// CHECK1-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP117]] -// CHECK1-NEXT: [[ARRAYIDX26:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX25]], i64 [[LEN_SUB_1]] +// CHECK1-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP117]] +// CHECK1-NEXT: [[ARRAYIDX26:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX25]], i64 [[LEN_SUB_1]] // CHECK1-NEXT: [[TMP118:%.*]] = getelementptr i32, ptr [[ARRAYIDX26]], i32 1 // CHECK1-NEXT: [[TMP119:%.*]] = ptrtoint ptr [[ARRAYIDX24]] to i64 // CHECK1-NEXT: [[TMP120:%.*]] = ptrtoint ptr [[TMP118]] to i64 @@ -1432,9 +1432,9 @@ void test_omp_all_memory() // CHECK1-51-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP30]], i32 0, i32 2 // CHECK1-51-NEXT: store i8 1, ptr [[TMP33]], align 8 // CHECK1-51-NEXT: [[TMP34:%.*]] = mul nsw i64 0, [[TMP2]] -// CHECK1-51-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP34]] +// CHECK1-51-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP34]] // CHECK1-51-NEXT: [[TMP35:%.*]] = mul nsw i64 9, [[TMP2]] -// CHECK1-51-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP35]] +// CHECK1-51-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP35]] // CHECK1-51-NEXT: [[TMP36:%.*]] = getelementptr i32, ptr [[ARRAYIDX2]], i32 1 // CHECK1-51-NEXT: [[TMP37:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 // CHECK1-51-NEXT: [[TMP38:%.*]] = ptrtoint ptr [[TMP36]] to i64 @@ -1469,13 +1469,13 @@ void test_omp_all_memory() // CHECK1-51-NEXT: [[TMP58:%.*]] = load i8, ptr [[B]], align 1 // CHECK1-51-NEXT: [[TMP59:%.*]] = sext i8 [[TMP58]] to i64 // CHECK1-51-NEXT: [[TMP60:%.*]] = mul nsw i64 4, [[TMP2]] -// CHECK1-51-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP60]] -// CHECK1-51-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX7]], i64 [[TMP59]] +// CHECK1-51-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP60]] +// CHECK1-51-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX7]], i64 [[TMP59]] // CHECK1-51-NEXT: [[TMP61:%.*]] = load i8, ptr [[B]], align 1 // CHECK1-51-NEXT: [[TMP62:%.*]] = sext i8 [[TMP61]] to i64 // CHECK1-51-NEXT: [[TMP63:%.*]] = mul nsw i64 9, [[TMP2]] -// CHECK1-51-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP63]] -// CHECK1-51-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX9]], i64 [[TMP62]] +// CHECK1-51-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP63]] +// CHECK1-51-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX9]], i64 [[TMP62]] // CHECK1-51-NEXT: [[TMP64:%.*]] = getelementptr i32, ptr [[ARRAYIDX10]], i32 1 // CHECK1-51-NEXT: [[TMP65:%.*]] = ptrtoint ptr [[ARRAYIDX8]] to i64 // CHECK1-51-NEXT: [[TMP66:%.*]] = ptrtoint ptr [[TMP64]] to i64 @@ -1507,13 +1507,13 @@ void test_omp_all_memory() // CHECK1-51-NEXT: [[TMP83:%.*]] = load i8, ptr [[B]], align 1 // CHECK1-51-NEXT: [[TMP84:%.*]] = sext i8 [[TMP83]] to i64 // CHECK1-51-NEXT: [[TMP85:%.*]] = mul nsw i64 4, [[TMP2]] -// CHECK1-51-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP85]] -// CHECK1-51-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX15]], i64 [[TMP84]] +// CHECK1-51-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP85]] +// CHECK1-51-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX15]], i64 [[TMP84]] // CHECK1-51-NEXT: [[TMP86:%.*]] = load i8, ptr [[B]], align 1 // CHECK1-51-NEXT: [[TMP87:%.*]] = sext i8 [[TMP86]] to i64 // CHECK1-51-NEXT: [[TMP88:%.*]] = mul nsw i64 9, [[TMP2]] -// CHECK1-51-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP88]] -// CHECK1-51-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX17]], i64 [[TMP87]] +// CHECK1-51-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP88]] +// CHECK1-51-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX17]], i64 [[TMP87]] // CHECK1-51-NEXT: [[TMP89:%.*]] = getelementptr i32, ptr [[ARRAYIDX18]], i32 1 // CHECK1-51-NEXT: [[TMP90:%.*]] = ptrtoint ptr [[ARRAYIDX16]] to i64 // CHECK1-51-NEXT: [[TMP91:%.*]] = ptrtoint ptr [[TMP89]] to i64 @@ -1550,8 +1550,8 @@ void test_omp_all_memory() // CHECK1-51-NEXT: [[TMP111:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP108]], i32 0, i32 2 // CHECK1-51-NEXT: store i8 3, ptr [[TMP111]], align 8 // CHECK1-51-NEXT: [[TMP112:%.*]] = mul nsw i64 0, [[TMP2]] -// CHECK1-51-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP112]] -// CHECK1-51-NEXT: [[ARRAYIDX24:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX23]], i64 3 +// CHECK1-51-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP112]] +// CHECK1-51-NEXT: [[ARRAYIDX24:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX23]], i64 3 // CHECK1-51-NEXT: [[TMP113:%.*]] = load i32, ptr @a, align 4 // CHECK1-51-NEXT: [[TMP114:%.*]] = sext i32 [[TMP113]] to i64 // CHECK1-51-NEXT: [[LEN_SUB_1:%.*]] = sub nsw i64 [[TMP114]], 1 @@ -1559,8 +1559,8 @@ void test_omp_all_memory() // CHECK1-51-NEXT: [[TMP116:%.*]] = sext i32 [[TMP115]] to i64 // CHECK1-51-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP116]] // CHECK1-51-NEXT: [[TMP117:%.*]] = mul nsw i64 [[LB_ADD_LEN]], [[TMP2]] -// CHECK1-51-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP117]] -// CHECK1-51-NEXT: [[ARRAYIDX26:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX25]], i64 [[LEN_SUB_1]] +// CHECK1-51-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP117]] +// CHECK1-51-NEXT: [[ARRAYIDX26:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX25]], i64 [[LEN_SUB_1]] // CHECK1-51-NEXT: [[TMP118:%.*]] = getelementptr i32, ptr [[ARRAYIDX26]], i32 1 // CHECK1-51-NEXT: [[TMP119:%.*]] = ptrtoint ptr [[ARRAYIDX24]] to i64 // CHECK1-51-NEXT: [[TMP120:%.*]] = ptrtoint ptr [[TMP118]] to i64 @@ -1595,8 +1595,8 @@ void test_omp_all_memory() // CHECK1-51-NEXT: [[TMP139:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP136]], i32 0, i32 2 // CHECK1-51-NEXT: store i8 8, ptr [[TMP139]], align 8 // CHECK1-51-NEXT: [[TMP140:%.*]] = mul nsw i64 0, [[TMP2]] -// CHECK1-51-NEXT: [[ARRAYIDX31:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP140]] -// CHECK1-51-NEXT: [[ARRAYIDX32:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX31]], i64 3 +// CHECK1-51-NEXT: [[ARRAYIDX31:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP140]] +// CHECK1-51-NEXT: [[ARRAYIDX32:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX31]], i64 3 // CHECK1-51-NEXT: [[TMP141:%.*]] = load i32, ptr @a, align 4 // CHECK1-51-NEXT: [[TMP142:%.*]] = sext i32 [[TMP141]] to i64 // CHECK1-51-NEXT: [[LEN_SUB_133:%.*]] = sub nsw i64 [[TMP142]], 1 @@ -1604,8 +1604,8 @@ void test_omp_all_memory() // CHECK1-51-NEXT: [[TMP144:%.*]] = sext i32 [[TMP143]] to i64 // CHECK1-51-NEXT: [[LB_ADD_LEN34:%.*]] = add nsw i64 -1, [[TMP144]] // CHECK1-51-NEXT: [[TMP145:%.*]] = mul nsw i64 [[LB_ADD_LEN34]], [[TMP2]] -// CHECK1-51-NEXT: [[ARRAYIDX35:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP145]] -// CHECK1-51-NEXT: [[ARRAYIDX36:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX35]], i64 [[LEN_SUB_133]] +// CHECK1-51-NEXT: [[ARRAYIDX35:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP145]] +// CHECK1-51-NEXT: [[ARRAYIDX36:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX35]], i64 [[LEN_SUB_133]] // CHECK1-51-NEXT: [[TMP146:%.*]] = getelementptr i32, ptr [[ARRAYIDX36]], i32 1 // CHECK1-51-NEXT: [[TMP147:%.*]] = ptrtoint ptr [[ARRAYIDX32]] to i64 // CHECK1-51-NEXT: [[TMP148:%.*]] = ptrtoint ptr [[TMP146]] to i64 @@ -3040,9 +3040,9 @@ void test_omp_all_memory() // CHECK2-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP30]], i32 0, i32 2 // CHECK2-NEXT: store i8 1, ptr [[TMP33]], align 8 // CHECK2-NEXT: [[TMP34:%.*]] = mul nsw i64 0, [[TMP2]] -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP34]] +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP34]] // CHECK2-NEXT: [[TMP35:%.*]] = mul nsw i64 9, [[TMP2]] -// CHECK2-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP35]] +// CHECK2-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP35]] // CHECK2-NEXT: [[TMP36:%.*]] = getelementptr i32, ptr [[ARRAYIDX2]], i32 1 // CHECK2-NEXT: [[TMP37:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 // CHECK2-NEXT: [[TMP38:%.*]] = ptrtoint ptr [[TMP36]] to i64 @@ -3077,13 +3077,13 @@ void test_omp_all_memory() // CHECK2-NEXT: [[TMP58:%.*]] = load i8, ptr [[B]], align 1 // CHECK2-NEXT: [[TMP59:%.*]] = sext i8 [[TMP58]] to i64 // CHECK2-NEXT: [[TMP60:%.*]] = mul nsw i64 4, [[TMP2]] -// CHECK2-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP60]] -// CHECK2-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX7]], i64 [[TMP59]] +// CHECK2-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP60]] +// CHECK2-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX7]], i64 [[TMP59]] // CHECK2-NEXT: [[TMP61:%.*]] = load i8, ptr [[B]], align 1 // CHECK2-NEXT: [[TMP62:%.*]] = sext i8 [[TMP61]] to i64 // CHECK2-NEXT: [[TMP63:%.*]] = mul nsw i64 9, [[TMP2]] -// CHECK2-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP63]] -// CHECK2-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX9]], i64 [[TMP62]] +// CHECK2-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP63]] +// CHECK2-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX9]], i64 [[TMP62]] // CHECK2-NEXT: [[TMP64:%.*]] = getelementptr i32, ptr [[ARRAYIDX10]], i32 1 // CHECK2-NEXT: [[TMP65:%.*]] = ptrtoint ptr [[ARRAYIDX8]] to i64 // CHECK2-NEXT: [[TMP66:%.*]] = ptrtoint ptr [[TMP64]] to i64 @@ -3115,13 +3115,13 @@ void test_omp_all_memory() // CHECK2-NEXT: [[TMP83:%.*]] = load i8, ptr [[B]], align 1 // CHECK2-NEXT: [[TMP84:%.*]] = sext i8 [[TMP83]] to i64 // CHECK2-NEXT: [[TMP85:%.*]] = mul nsw i64 4, [[TMP2]] -// CHECK2-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP85]] -// CHECK2-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX15]], i64 [[TMP84]] +// CHECK2-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP85]] +// CHECK2-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX15]], i64 [[TMP84]] // CHECK2-NEXT: [[TMP86:%.*]] = load i8, ptr [[B]], align 1 // CHECK2-NEXT: [[TMP87:%.*]] = sext i8 [[TMP86]] to i64 // CHECK2-NEXT: [[TMP88:%.*]] = mul nsw i64 9, [[TMP2]] -// CHECK2-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP88]] -// CHECK2-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX17]], i64 [[TMP87]] +// CHECK2-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP88]] +// CHECK2-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX17]], i64 [[TMP87]] // CHECK2-NEXT: [[TMP89:%.*]] = getelementptr i32, ptr [[ARRAYIDX18]], i32 1 // CHECK2-NEXT: [[TMP90:%.*]] = ptrtoint ptr [[ARRAYIDX16]] to i64 // CHECK2-NEXT: [[TMP91:%.*]] = ptrtoint ptr [[TMP89]] to i64 @@ -3158,8 +3158,8 @@ void test_omp_all_memory() // CHECK2-NEXT: [[TMP111:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP108]], i32 0, i32 2 // CHECK2-NEXT: store i8 3, ptr [[TMP111]], align 8 // CHECK2-NEXT: [[TMP112:%.*]] = mul nsw i64 0, [[TMP2]] -// CHECK2-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP112]] -// CHECK2-NEXT: [[ARRAYIDX24:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX23]], i64 3 +// CHECK2-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP112]] +// CHECK2-NEXT: [[ARRAYIDX24:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX23]], i64 3 // CHECK2-NEXT: [[TMP113:%.*]] = load i32, ptr @a, align 4 // CHECK2-NEXT: [[TMP114:%.*]] = sext i32 [[TMP113]] to i64 // CHECK2-NEXT: [[LEN_SUB_1:%.*]] = sub nsw i64 [[TMP114]], 1 @@ -3167,8 +3167,8 @@ void test_omp_all_memory() // CHECK2-NEXT: [[TMP116:%.*]] = sext i32 [[TMP115]] to i64 // CHECK2-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP116]] // CHECK2-NEXT: [[TMP117:%.*]] = mul nsw i64 [[LB_ADD_LEN]], [[TMP2]] -// CHECK2-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP117]] -// CHECK2-NEXT: [[ARRAYIDX26:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX25]], i64 [[LEN_SUB_1]] +// CHECK2-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP117]] +// CHECK2-NEXT: [[ARRAYIDX26:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX25]], i64 [[LEN_SUB_1]] // CHECK2-NEXT: [[TMP118:%.*]] = getelementptr i32, ptr [[ARRAYIDX26]], i32 1 // CHECK2-NEXT: [[TMP119:%.*]] = ptrtoint ptr [[ARRAYIDX24]] to i64 // CHECK2-NEXT: [[TMP120:%.*]] = ptrtoint ptr [[TMP118]] to i64 @@ -4163,9 +4163,9 @@ void test_omp_all_memory() // CHECK2-51-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP30]], i32 0, i32 2 // CHECK2-51-NEXT: store i8 1, ptr [[TMP33]], align 8 // CHECK2-51-NEXT: [[TMP34:%.*]] = mul nsw i64 0, [[TMP2]] -// CHECK2-51-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP34]] +// CHECK2-51-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP34]] // CHECK2-51-NEXT: [[TMP35:%.*]] = mul nsw i64 9, [[TMP2]] -// CHECK2-51-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP35]] +// CHECK2-51-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP35]] // CHECK2-51-NEXT: [[TMP36:%.*]] = getelementptr i32, ptr [[ARRAYIDX2]], i32 1 // CHECK2-51-NEXT: [[TMP37:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 // CHECK2-51-NEXT: [[TMP38:%.*]] = ptrtoint ptr [[TMP36]] to i64 @@ -4200,13 +4200,13 @@ void test_omp_all_memory() // CHECK2-51-NEXT: [[TMP58:%.*]] = load i8, ptr [[B]], align 1 // CHECK2-51-NEXT: [[TMP59:%.*]] = sext i8 [[TMP58]] to i64 // CHECK2-51-NEXT: [[TMP60:%.*]] = mul nsw i64 4, [[TMP2]] -// CHECK2-51-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP60]] -// CHECK2-51-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX7]], i64 [[TMP59]] +// CHECK2-51-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP60]] +// CHECK2-51-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX7]], i64 [[TMP59]] // CHECK2-51-NEXT: [[TMP61:%.*]] = load i8, ptr [[B]], align 1 // CHECK2-51-NEXT: [[TMP62:%.*]] = sext i8 [[TMP61]] to i64 // CHECK2-51-NEXT: [[TMP63:%.*]] = mul nsw i64 9, [[TMP2]] -// CHECK2-51-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP63]] -// CHECK2-51-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX9]], i64 [[TMP62]] +// CHECK2-51-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP63]] +// CHECK2-51-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX9]], i64 [[TMP62]] // CHECK2-51-NEXT: [[TMP64:%.*]] = getelementptr i32, ptr [[ARRAYIDX10]], i32 1 // CHECK2-51-NEXT: [[TMP65:%.*]] = ptrtoint ptr [[ARRAYIDX8]] to i64 // CHECK2-51-NEXT: [[TMP66:%.*]] = ptrtoint ptr [[TMP64]] to i64 @@ -4238,13 +4238,13 @@ void test_omp_all_memory() // CHECK2-51-NEXT: [[TMP83:%.*]] = load i8, ptr [[B]], align 1 // CHECK2-51-NEXT: [[TMP84:%.*]] = sext i8 [[TMP83]] to i64 // CHECK2-51-NEXT: [[TMP85:%.*]] = mul nsw i64 4, [[TMP2]] -// CHECK2-51-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP85]] -// CHECK2-51-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX15]], i64 [[TMP84]] +// CHECK2-51-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP85]] +// CHECK2-51-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX15]], i64 [[TMP84]] // CHECK2-51-NEXT: [[TMP86:%.*]] = load i8, ptr [[B]], align 1 // CHECK2-51-NEXT: [[TMP87:%.*]] = sext i8 [[TMP86]] to i64 // CHECK2-51-NEXT: [[TMP88:%.*]] = mul nsw i64 9, [[TMP2]] -// CHECK2-51-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP88]] -// CHECK2-51-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX17]], i64 [[TMP87]] +// CHECK2-51-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP88]] +// CHECK2-51-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX17]], i64 [[TMP87]] // CHECK2-51-NEXT: [[TMP89:%.*]] = getelementptr i32, ptr [[ARRAYIDX18]], i32 1 // CHECK2-51-NEXT: [[TMP90:%.*]] = ptrtoint ptr [[ARRAYIDX16]] to i64 // CHECK2-51-NEXT: [[TMP91:%.*]] = ptrtoint ptr [[TMP89]] to i64 @@ -4281,8 +4281,8 @@ void test_omp_all_memory() // CHECK2-51-NEXT: [[TMP111:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP108]], i32 0, i32 2 // CHECK2-51-NEXT: store i8 3, ptr [[TMP111]], align 8 // CHECK2-51-NEXT: [[TMP112:%.*]] = mul nsw i64 0, [[TMP2]] -// CHECK2-51-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP112]] -// CHECK2-51-NEXT: [[ARRAYIDX24:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX23]], i64 3 +// CHECK2-51-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP112]] +// CHECK2-51-NEXT: [[ARRAYIDX24:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX23]], i64 3 // CHECK2-51-NEXT: [[TMP113:%.*]] = load i32, ptr @a, align 4 // CHECK2-51-NEXT: [[TMP114:%.*]] = sext i32 [[TMP113]] to i64 // CHECK2-51-NEXT: [[LEN_SUB_1:%.*]] = sub nsw i64 [[TMP114]], 1 @@ -4290,8 +4290,8 @@ void test_omp_all_memory() // CHECK2-51-NEXT: [[TMP116:%.*]] = sext i32 [[TMP115]] to i64 // CHECK2-51-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP116]] // CHECK2-51-NEXT: [[TMP117:%.*]] = mul nsw i64 [[LB_ADD_LEN]], [[TMP2]] -// CHECK2-51-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP117]] -// CHECK2-51-NEXT: [[ARRAYIDX26:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX25]], i64 [[LEN_SUB_1]] +// CHECK2-51-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP117]] +// CHECK2-51-NEXT: [[ARRAYIDX26:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX25]], i64 [[LEN_SUB_1]] // CHECK2-51-NEXT: [[TMP118:%.*]] = getelementptr i32, ptr [[ARRAYIDX26]], i32 1 // CHECK2-51-NEXT: [[TMP119:%.*]] = ptrtoint ptr [[ARRAYIDX24]] to i64 // CHECK2-51-NEXT: [[TMP120:%.*]] = ptrtoint ptr [[TMP118]] to i64 @@ -4326,8 +4326,8 @@ void test_omp_all_memory() // CHECK2-51-NEXT: [[TMP139:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP136]], i32 0, i32 2 // CHECK2-51-NEXT: store i8 8, ptr [[TMP139]], align 8 // CHECK2-51-NEXT: [[TMP140:%.*]] = mul nsw i64 0, [[TMP2]] -// CHECK2-51-NEXT: [[ARRAYIDX31:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP140]] -// CHECK2-51-NEXT: [[ARRAYIDX32:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX31]], i64 3 +// CHECK2-51-NEXT: [[ARRAYIDX31:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP140]] +// CHECK2-51-NEXT: [[ARRAYIDX32:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX31]], i64 3 // CHECK2-51-NEXT: [[TMP141:%.*]] = load i32, ptr @a, align 4 // CHECK2-51-NEXT: [[TMP142:%.*]] = sext i32 [[TMP141]] to i64 // CHECK2-51-NEXT: [[LEN_SUB_133:%.*]] = sub nsw i64 [[TMP142]], 1 @@ -4335,8 +4335,8 @@ void test_omp_all_memory() // CHECK2-51-NEXT: [[TMP144:%.*]] = sext i32 [[TMP143]] to i64 // CHECK2-51-NEXT: [[LB_ADD_LEN34:%.*]] = add nsw i64 -1, [[TMP144]] // CHECK2-51-NEXT: [[TMP145:%.*]] = mul nsw i64 [[LB_ADD_LEN34]], [[TMP2]] -// CHECK2-51-NEXT: [[ARRAYIDX35:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP145]] -// CHECK2-51-NEXT: [[ARRAYIDX36:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX35]], i64 [[LEN_SUB_133]] +// CHECK2-51-NEXT: [[ARRAYIDX35:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP145]] +// CHECK2-51-NEXT: [[ARRAYIDX36:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX35]], i64 [[LEN_SUB_133]] // CHECK2-51-NEXT: [[TMP146:%.*]] = getelementptr i32, ptr [[ARRAYIDX36]], i32 1 // CHECK2-51-NEXT: [[TMP147:%.*]] = ptrtoint ptr [[ARRAYIDX32]] to i64 // CHECK2-51-NEXT: [[TMP148:%.*]] = ptrtoint ptr [[TMP146]] to i64 @@ -5773,9 +5773,9 @@ void test_omp_all_memory() // CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP29]], i32 0, i32 2 // CHECK3-NEXT: store i8 1, ptr [[TMP32]], align 8 // CHECK3-NEXT: [[TMP33:%.*]] = mul nsw i64 0, [[TMP1]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP33]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP33]] // CHECK3-NEXT: [[TMP34:%.*]] = mul nsw i64 9, [[TMP1]] -// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP34]] +// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP34]] // CHECK3-NEXT: [[TMP35:%.*]] = getelementptr i32, ptr [[ARRAYIDX4]], i32 1 // CHECK3-NEXT: [[TMP36:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 // CHECK3-NEXT: [[TMP37:%.*]] = ptrtoint ptr [[TMP35]] to i64 @@ -5814,13 +5814,13 @@ void test_omp_all_memory() // CHECK3-NEXT: [[TMP57:%.*]] = load i8, ptr [[B]], align 1 // CHECK3-NEXT: [[TMP58:%.*]] = sext i8 [[TMP57]] to i64 // CHECK3-NEXT: [[TMP59:%.*]] = mul nsw i64 4, [[TMP1]] -// CHECK3-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP59]] -// CHECK3-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX13]], i64 [[TMP58]] +// CHECK3-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP59]] +// CHECK3-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX13]], i64 [[TMP58]] // CHECK3-NEXT: [[TMP60:%.*]] = load i8, ptr [[B]], align 1 // CHECK3-NEXT: [[TMP61:%.*]] = sext i8 [[TMP60]] to i64 // CHECK3-NEXT: [[TMP62:%.*]] = mul nsw i64 9, [[TMP1]] -// CHECK3-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP62]] -// CHECK3-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX15]], i64 [[TMP61]] +// CHECK3-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP62]] +// CHECK3-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX15]], i64 [[TMP61]] // CHECK3-NEXT: [[TMP63:%.*]] = getelementptr i32, ptr [[ARRAYIDX16]], i32 1 // CHECK3-NEXT: [[TMP64:%.*]] = ptrtoint ptr [[ARRAYIDX14]] to i64 // CHECK3-NEXT: [[TMP65:%.*]] = ptrtoint ptr [[TMP63]] to i64 @@ -5854,13 +5854,13 @@ void test_omp_all_memory() // CHECK3-NEXT: [[TMP82:%.*]] = load i8, ptr [[B]], align 1 // CHECK3-NEXT: [[TMP83:%.*]] = sext i8 [[TMP82]] to i64 // CHECK3-NEXT: [[TMP84:%.*]] = mul nsw i64 4, [[TMP1]] -// CHECK3-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP84]] -// CHECK3-NEXT: [[ARRAYIDX24:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX23]], i64 [[TMP83]] +// CHECK3-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP84]] +// CHECK3-NEXT: [[ARRAYIDX24:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX23]], i64 [[TMP83]] // CHECK3-NEXT: [[TMP85:%.*]] = load i8, ptr [[B]], align 1 // CHECK3-NEXT: [[TMP86:%.*]] = sext i8 [[TMP85]] to i64 // CHECK3-NEXT: [[TMP87:%.*]] = mul nsw i64 9, [[TMP1]] -// CHECK3-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP87]] -// CHECK3-NEXT: [[ARRAYIDX26:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX25]], i64 [[TMP86]] +// CHECK3-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP87]] +// CHECK3-NEXT: [[ARRAYIDX26:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX25]], i64 [[TMP86]] // CHECK3-NEXT: [[TMP88:%.*]] = getelementptr i32, ptr [[ARRAYIDX26]], i32 1 // CHECK3-NEXT: [[TMP89:%.*]] = ptrtoint ptr [[ARRAYIDX24]] to i64 // CHECK3-NEXT: [[TMP90:%.*]] = ptrtoint ptr [[TMP88]] to i64 @@ -5899,8 +5899,8 @@ void test_omp_all_memory() // CHECK3-NEXT: [[TMP110:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP107]], i32 0, i32 2 // CHECK3-NEXT: store i8 3, ptr [[TMP110]], align 8 // CHECK3-NEXT: [[TMP111:%.*]] = mul nsw i64 0, [[TMP1]] -// CHECK3-NEXT: [[ARRAYIDX33:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP111]] -// CHECK3-NEXT: [[ARRAYIDX34:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX33]], i64 3 +// CHECK3-NEXT: [[ARRAYIDX33:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP111]] +// CHECK3-NEXT: [[ARRAYIDX34:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX33]], i64 3 // CHECK3-NEXT: [[TMP112:%.*]] = load i32, ptr @a, align 4 // CHECK3-NEXT: [[TMP113:%.*]] = sext i32 [[TMP112]] to i64 // CHECK3-NEXT: [[LEN_SUB_1:%.*]] = sub nsw i64 [[TMP113]], 1 @@ -5908,8 +5908,8 @@ void test_omp_all_memory() // CHECK3-NEXT: [[TMP115:%.*]] = sext i32 [[TMP114]] to i64 // CHECK3-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP115]] // CHECK3-NEXT: [[TMP116:%.*]] = mul nsw i64 [[LB_ADD_LEN]], [[TMP1]] -// CHECK3-NEXT: [[ARRAYIDX35:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP116]] -// CHECK3-NEXT: [[ARRAYIDX36:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX35]], i64 [[LEN_SUB_1]] +// CHECK3-NEXT: [[ARRAYIDX35:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP116]] +// CHECK3-NEXT: [[ARRAYIDX36:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX35]], i64 [[LEN_SUB_1]] // CHECK3-NEXT: [[TMP117:%.*]] = getelementptr i32, ptr [[ARRAYIDX36]], i32 1 // CHECK3-NEXT: [[TMP118:%.*]] = ptrtoint ptr [[ARRAYIDX34]] to i64 // CHECK3-NEXT: [[TMP119:%.*]] = ptrtoint ptr [[TMP117]] to i64 @@ -6789,9 +6789,9 @@ void test_omp_all_memory() // CHECK4-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP29]], i32 0, i32 2 // CHECK4-NEXT: store i8 1, ptr [[TMP32]], align 8 // CHECK4-NEXT: [[TMP33:%.*]] = mul nsw i64 0, [[TMP1]] -// CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP33]] +// CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP33]] // CHECK4-NEXT: [[TMP34:%.*]] = mul nsw i64 9, [[TMP1]] -// CHECK4-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP34]] +// CHECK4-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP34]] // CHECK4-NEXT: [[TMP35:%.*]] = getelementptr i32, ptr [[ARRAYIDX4]], i32 1 // CHECK4-NEXT: [[TMP36:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 // CHECK4-NEXT: [[TMP37:%.*]] = ptrtoint ptr [[TMP35]] to i64 @@ -6830,13 +6830,13 @@ void test_omp_all_memory() // CHECK4-NEXT: [[TMP57:%.*]] = load i8, ptr [[B]], align 1 // CHECK4-NEXT: [[TMP58:%.*]] = sext i8 [[TMP57]] to i64 // CHECK4-NEXT: [[TMP59:%.*]] = mul nsw i64 4, [[TMP1]] -// CHECK4-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP59]] -// CHECK4-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX13]], i64 [[TMP58]] +// CHECK4-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP59]] +// CHECK4-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX13]], i64 [[TMP58]] // CHECK4-NEXT: [[TMP60:%.*]] = load i8, ptr [[B]], align 1 // CHECK4-NEXT: [[TMP61:%.*]] = sext i8 [[TMP60]] to i64 // CHECK4-NEXT: [[TMP62:%.*]] = mul nsw i64 9, [[TMP1]] -// CHECK4-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP62]] -// CHECK4-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX15]], i64 [[TMP61]] +// CHECK4-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP62]] +// CHECK4-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX15]], i64 [[TMP61]] // CHECK4-NEXT: [[TMP63:%.*]] = getelementptr i32, ptr [[ARRAYIDX16]], i32 1 // CHECK4-NEXT: [[TMP64:%.*]] = ptrtoint ptr [[ARRAYIDX14]] to i64 // CHECK4-NEXT: [[TMP65:%.*]] = ptrtoint ptr [[TMP63]] to i64 @@ -6870,13 +6870,13 @@ void test_omp_all_memory() // CHECK4-NEXT: [[TMP82:%.*]] = load i8, ptr [[B]], align 1 // CHECK4-NEXT: [[TMP83:%.*]] = sext i8 [[TMP82]] to i64 // CHECK4-NEXT: [[TMP84:%.*]] = mul nsw i64 4, [[TMP1]] -// CHECK4-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP84]] -// CHECK4-NEXT: [[ARRAYIDX24:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX23]], i64 [[TMP83]] +// CHECK4-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP84]] +// CHECK4-NEXT: [[ARRAYIDX24:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX23]], i64 [[TMP83]] // CHECK4-NEXT: [[TMP85:%.*]] = load i8, ptr [[B]], align 1 // CHECK4-NEXT: [[TMP86:%.*]] = sext i8 [[TMP85]] to i64 // CHECK4-NEXT: [[TMP87:%.*]] = mul nsw i64 9, [[TMP1]] -// CHECK4-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP87]] -// CHECK4-NEXT: [[ARRAYIDX26:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX25]], i64 [[TMP86]] +// CHECK4-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP87]] +// CHECK4-NEXT: [[ARRAYIDX26:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX25]], i64 [[TMP86]] // CHECK4-NEXT: [[TMP88:%.*]] = getelementptr i32, ptr [[ARRAYIDX26]], i32 1 // CHECK4-NEXT: [[TMP89:%.*]] = ptrtoint ptr [[ARRAYIDX24]] to i64 // CHECK4-NEXT: [[TMP90:%.*]] = ptrtoint ptr [[TMP88]] to i64 @@ -6915,8 +6915,8 @@ void test_omp_all_memory() // CHECK4-NEXT: [[TMP110:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP107]], i32 0, i32 2 // CHECK4-NEXT: store i8 3, ptr [[TMP110]], align 8 // CHECK4-NEXT: [[TMP111:%.*]] = mul nsw i64 0, [[TMP1]] -// CHECK4-NEXT: [[ARRAYIDX33:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP111]] -// CHECK4-NEXT: [[ARRAYIDX34:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX33]], i64 3 +// CHECK4-NEXT: [[ARRAYIDX33:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP111]] +// CHECK4-NEXT: [[ARRAYIDX34:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX33]], i64 3 // CHECK4-NEXT: [[TMP112:%.*]] = load i32, ptr @a, align 4 // CHECK4-NEXT: [[TMP113:%.*]] = sext i32 [[TMP112]] to i64 // CHECK4-NEXT: [[LEN_SUB_1:%.*]] = sub nsw i64 [[TMP113]], 1 @@ -6924,8 +6924,8 @@ void test_omp_all_memory() // CHECK4-NEXT: [[TMP115:%.*]] = sext i32 [[TMP114]] to i64 // CHECK4-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP115]] // CHECK4-NEXT: [[TMP116:%.*]] = mul nsw i64 [[LB_ADD_LEN]], [[TMP1]] -// CHECK4-NEXT: [[ARRAYIDX35:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP116]] -// CHECK4-NEXT: [[ARRAYIDX36:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX35]], i64 [[LEN_SUB_1]] +// CHECK4-NEXT: [[ARRAYIDX35:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP116]] +// CHECK4-NEXT: [[ARRAYIDX36:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX35]], i64 [[LEN_SUB_1]] // CHECK4-NEXT: [[TMP117:%.*]] = getelementptr i32, ptr [[ARRAYIDX36]], i32 1 // CHECK4-NEXT: [[TMP118:%.*]] = ptrtoint ptr [[ARRAYIDX34]] to i64 // CHECK4-NEXT: [[TMP119:%.*]] = ptrtoint ptr [[TMP117]] to i64 @@ -7808,9 +7808,9 @@ void test_omp_all_memory() // CHECK3-51-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP29]], i32 0, i32 2 // CHECK3-51-NEXT: store i8 1, ptr [[TMP32]], align 8 // CHECK3-51-NEXT: [[TMP33:%.*]] = mul nsw i64 0, [[TMP1]] -// CHECK3-51-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP33]] +// CHECK3-51-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP33]] // CHECK3-51-NEXT: [[TMP34:%.*]] = mul nsw i64 9, [[TMP1]] -// CHECK3-51-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP34]] +// CHECK3-51-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP34]] // CHECK3-51-NEXT: [[TMP35:%.*]] = getelementptr i32, ptr [[ARRAYIDX4]], i32 1 // CHECK3-51-NEXT: [[TMP36:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 // CHECK3-51-NEXT: [[TMP37:%.*]] = ptrtoint ptr [[TMP35]] to i64 @@ -7849,13 +7849,13 @@ void test_omp_all_memory() // CHECK3-51-NEXT: [[TMP57:%.*]] = load i8, ptr [[B]], align 1 // CHECK3-51-NEXT: [[TMP58:%.*]] = sext i8 [[TMP57]] to i64 // CHECK3-51-NEXT: [[TMP59:%.*]] = mul nsw i64 4, [[TMP1]] -// CHECK3-51-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP59]] -// CHECK3-51-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX13]], i64 [[TMP58]] +// CHECK3-51-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP59]] +// CHECK3-51-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX13]], i64 [[TMP58]] // CHECK3-51-NEXT: [[TMP60:%.*]] = load i8, ptr [[B]], align 1 // CHECK3-51-NEXT: [[TMP61:%.*]] = sext i8 [[TMP60]] to i64 // CHECK3-51-NEXT: [[TMP62:%.*]] = mul nsw i64 9, [[TMP1]] -// CHECK3-51-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP62]] -// CHECK3-51-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX15]], i64 [[TMP61]] +// CHECK3-51-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP62]] +// CHECK3-51-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX15]], i64 [[TMP61]] // CHECK3-51-NEXT: [[TMP63:%.*]] = getelementptr i32, ptr [[ARRAYIDX16]], i32 1 // CHECK3-51-NEXT: [[TMP64:%.*]] = ptrtoint ptr [[ARRAYIDX14]] to i64 // CHECK3-51-NEXT: [[TMP65:%.*]] = ptrtoint ptr [[TMP63]] to i64 @@ -7889,13 +7889,13 @@ void test_omp_all_memory() // CHECK3-51-NEXT: [[TMP82:%.*]] = load i8, ptr [[B]], align 1 // CHECK3-51-NEXT: [[TMP83:%.*]] = sext i8 [[TMP82]] to i64 // CHECK3-51-NEXT: [[TMP84:%.*]] = mul nsw i64 4, [[TMP1]] -// CHECK3-51-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP84]] -// CHECK3-51-NEXT: [[ARRAYIDX24:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX23]], i64 [[TMP83]] +// CHECK3-51-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP84]] +// CHECK3-51-NEXT: [[ARRAYIDX24:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX23]], i64 [[TMP83]] // CHECK3-51-NEXT: [[TMP85:%.*]] = load i8, ptr [[B]], align 1 // CHECK3-51-NEXT: [[TMP86:%.*]] = sext i8 [[TMP85]] to i64 // CHECK3-51-NEXT: [[TMP87:%.*]] = mul nsw i64 9, [[TMP1]] -// CHECK3-51-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP87]] -// CHECK3-51-NEXT: [[ARRAYIDX26:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX25]], i64 [[TMP86]] +// CHECK3-51-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP87]] +// CHECK3-51-NEXT: [[ARRAYIDX26:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX25]], i64 [[TMP86]] // CHECK3-51-NEXT: [[TMP88:%.*]] = getelementptr i32, ptr [[ARRAYIDX26]], i32 1 // CHECK3-51-NEXT: [[TMP89:%.*]] = ptrtoint ptr [[ARRAYIDX24]] to i64 // CHECK3-51-NEXT: [[TMP90:%.*]] = ptrtoint ptr [[TMP88]] to i64 @@ -7934,8 +7934,8 @@ void test_omp_all_memory() // CHECK3-51-NEXT: [[TMP110:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP107]], i32 0, i32 2 // CHECK3-51-NEXT: store i8 3, ptr [[TMP110]], align 8 // CHECK3-51-NEXT: [[TMP111:%.*]] = mul nsw i64 0, [[TMP1]] -// CHECK3-51-NEXT: [[ARRAYIDX33:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP111]] -// CHECK3-51-NEXT: [[ARRAYIDX34:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX33]], i64 3 +// CHECK3-51-NEXT: [[ARRAYIDX33:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP111]] +// CHECK3-51-NEXT: [[ARRAYIDX34:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX33]], i64 3 // CHECK3-51-NEXT: [[TMP112:%.*]] = load i32, ptr @a, align 4 // CHECK3-51-NEXT: [[TMP113:%.*]] = sext i32 [[TMP112]] to i64 // CHECK3-51-NEXT: [[LEN_SUB_1:%.*]] = sub nsw i64 [[TMP113]], 1 @@ -7943,8 +7943,8 @@ void test_omp_all_memory() // CHECK3-51-NEXT: [[TMP115:%.*]] = sext i32 [[TMP114]] to i64 // CHECK3-51-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP115]] // CHECK3-51-NEXT: [[TMP116:%.*]] = mul nsw i64 [[LB_ADD_LEN]], [[TMP1]] -// CHECK3-51-NEXT: [[ARRAYIDX35:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP116]] -// CHECK3-51-NEXT: [[ARRAYIDX36:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX35]], i64 [[LEN_SUB_1]] +// CHECK3-51-NEXT: [[ARRAYIDX35:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP116]] +// CHECK3-51-NEXT: [[ARRAYIDX36:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX35]], i64 [[LEN_SUB_1]] // CHECK3-51-NEXT: [[TMP117:%.*]] = getelementptr i32, ptr [[ARRAYIDX36]], i32 1 // CHECK3-51-NEXT: [[TMP118:%.*]] = ptrtoint ptr [[ARRAYIDX34]] to i64 // CHECK3-51-NEXT: [[TMP119:%.*]] = ptrtoint ptr [[TMP117]] to i64 @@ -7981,8 +7981,8 @@ void test_omp_all_memory() // CHECK3-51-NEXT: [[TMP138:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP135]], i32 0, i32 2 // CHECK3-51-NEXT: store i8 8, ptr [[TMP138]], align 8 // CHECK3-51-NEXT: [[TMP139:%.*]] = mul nsw i64 0, [[TMP1]] -// CHECK3-51-NEXT: [[ARRAYIDX43:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP139]] -// CHECK3-51-NEXT: [[ARRAYIDX44:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX43]], i64 3 +// CHECK3-51-NEXT: [[ARRAYIDX43:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP139]] +// CHECK3-51-NEXT: [[ARRAYIDX44:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX43]], i64 3 // CHECK3-51-NEXT: [[TMP140:%.*]] = load i32, ptr @a, align 4 // CHECK3-51-NEXT: [[TMP141:%.*]] = sext i32 [[TMP140]] to i64 // CHECK3-51-NEXT: [[LEN_SUB_145:%.*]] = sub nsw i64 [[TMP141]], 1 @@ -7990,8 +7990,8 @@ void test_omp_all_memory() // CHECK3-51-NEXT: [[TMP143:%.*]] = sext i32 [[TMP142]] to i64 // CHECK3-51-NEXT: [[LB_ADD_LEN46:%.*]] = add nsw i64 -1, [[TMP143]] // CHECK3-51-NEXT: [[TMP144:%.*]] = mul nsw i64 [[LB_ADD_LEN46]], [[TMP1]] -// CHECK3-51-NEXT: [[ARRAYIDX47:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP144]] -// CHECK3-51-NEXT: [[ARRAYIDX48:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX47]], i64 [[LEN_SUB_145]] +// CHECK3-51-NEXT: [[ARRAYIDX47:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP144]] +// CHECK3-51-NEXT: [[ARRAYIDX48:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX47]], i64 [[LEN_SUB_145]] // CHECK3-51-NEXT: [[TMP145:%.*]] = getelementptr i32, ptr [[ARRAYIDX48]], i32 1 // CHECK3-51-NEXT: [[TMP146:%.*]] = ptrtoint ptr [[ARRAYIDX44]] to i64 // CHECK3-51-NEXT: [[TMP147:%.*]] = ptrtoint ptr [[TMP145]] to i64 @@ -9323,9 +9323,9 @@ void test_omp_all_memory() // CHECK4-51-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP29]], i32 0, i32 2 // CHECK4-51-NEXT: store i8 1, ptr [[TMP32]], align 8 // CHECK4-51-NEXT: [[TMP33:%.*]] = mul nsw i64 0, [[TMP1]] -// CHECK4-51-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP33]] +// CHECK4-51-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP33]] // CHECK4-51-NEXT: [[TMP34:%.*]] = mul nsw i64 9, [[TMP1]] -// CHECK4-51-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP34]] +// CHECK4-51-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP34]] // CHECK4-51-NEXT: [[TMP35:%.*]] = getelementptr i32, ptr [[ARRAYIDX4]], i32 1 // CHECK4-51-NEXT: [[TMP36:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 // CHECK4-51-NEXT: [[TMP37:%.*]] = ptrtoint ptr [[TMP35]] to i64 @@ -9364,13 +9364,13 @@ void test_omp_all_memory() // CHECK4-51-NEXT: [[TMP57:%.*]] = load i8, ptr [[B]], align 1 // CHECK4-51-NEXT: [[TMP58:%.*]] = sext i8 [[TMP57]] to i64 // CHECK4-51-NEXT: [[TMP59:%.*]] = mul nsw i64 4, [[TMP1]] -// CHECK4-51-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP59]] -// CHECK4-51-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX13]], i64 [[TMP58]] +// CHECK4-51-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP59]] +// CHECK4-51-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX13]], i64 [[TMP58]] // CHECK4-51-NEXT: [[TMP60:%.*]] = load i8, ptr [[B]], align 1 // CHECK4-51-NEXT: [[TMP61:%.*]] = sext i8 [[TMP60]] to i64 // CHECK4-51-NEXT: [[TMP62:%.*]] = mul nsw i64 9, [[TMP1]] -// CHECK4-51-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP62]] -// CHECK4-51-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX15]], i64 [[TMP61]] +// CHECK4-51-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP62]] +// CHECK4-51-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX15]], i64 [[TMP61]] // CHECK4-51-NEXT: [[TMP63:%.*]] = getelementptr i32, ptr [[ARRAYIDX16]], i32 1 // CHECK4-51-NEXT: [[TMP64:%.*]] = ptrtoint ptr [[ARRAYIDX14]] to i64 // CHECK4-51-NEXT: [[TMP65:%.*]] = ptrtoint ptr [[TMP63]] to i64 @@ -9404,13 +9404,13 @@ void test_omp_all_memory() // CHECK4-51-NEXT: [[TMP82:%.*]] = load i8, ptr [[B]], align 1 // CHECK4-51-NEXT: [[TMP83:%.*]] = sext i8 [[TMP82]] to i64 // CHECK4-51-NEXT: [[TMP84:%.*]] = mul nsw i64 4, [[TMP1]] -// CHECK4-51-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP84]] -// CHECK4-51-NEXT: [[ARRAYIDX24:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX23]], i64 [[TMP83]] +// CHECK4-51-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP84]] +// CHECK4-51-NEXT: [[ARRAYIDX24:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX23]], i64 [[TMP83]] // CHECK4-51-NEXT: [[TMP85:%.*]] = load i8, ptr [[B]], align 1 // CHECK4-51-NEXT: [[TMP86:%.*]] = sext i8 [[TMP85]] to i64 // CHECK4-51-NEXT: [[TMP87:%.*]] = mul nsw i64 9, [[TMP1]] -// CHECK4-51-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP87]] -// CHECK4-51-NEXT: [[ARRAYIDX26:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX25]], i64 [[TMP86]] +// CHECK4-51-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP87]] +// CHECK4-51-NEXT: [[ARRAYIDX26:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX25]], i64 [[TMP86]] // CHECK4-51-NEXT: [[TMP88:%.*]] = getelementptr i32, ptr [[ARRAYIDX26]], i32 1 // CHECK4-51-NEXT: [[TMP89:%.*]] = ptrtoint ptr [[ARRAYIDX24]] to i64 // CHECK4-51-NEXT: [[TMP90:%.*]] = ptrtoint ptr [[TMP88]] to i64 @@ -9449,8 +9449,8 @@ void test_omp_all_memory() // CHECK4-51-NEXT: [[TMP110:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP107]], i32 0, i32 2 // CHECK4-51-NEXT: store i8 3, ptr [[TMP110]], align 8 // CHECK4-51-NEXT: [[TMP111:%.*]] = mul nsw i64 0, [[TMP1]] -// CHECK4-51-NEXT: [[ARRAYIDX33:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP111]] -// CHECK4-51-NEXT: [[ARRAYIDX34:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX33]], i64 3 +// CHECK4-51-NEXT: [[ARRAYIDX33:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP111]] +// CHECK4-51-NEXT: [[ARRAYIDX34:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX33]], i64 3 // CHECK4-51-NEXT: [[TMP112:%.*]] = load i32, ptr @a, align 4 // CHECK4-51-NEXT: [[TMP113:%.*]] = sext i32 [[TMP112]] to i64 // CHECK4-51-NEXT: [[LEN_SUB_1:%.*]] = sub nsw i64 [[TMP113]], 1 @@ -9458,8 +9458,8 @@ void test_omp_all_memory() // CHECK4-51-NEXT: [[TMP115:%.*]] = sext i32 [[TMP114]] to i64 // CHECK4-51-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP115]] // CHECK4-51-NEXT: [[TMP116:%.*]] = mul nsw i64 [[LB_ADD_LEN]], [[TMP1]] -// CHECK4-51-NEXT: [[ARRAYIDX35:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP116]] -// CHECK4-51-NEXT: [[ARRAYIDX36:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX35]], i64 [[LEN_SUB_1]] +// CHECK4-51-NEXT: [[ARRAYIDX35:%.*]] = getelementptr inbounds nuw i32, ptr [[VLA]], i64 [[TMP116]] +// CHECK4-51-NEXT: [[ARRAYIDX36:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYIDX35]], i64 [[LEN_SUB_1]] // CHECK4-51-NEXT: [[TMP117:%.*]] = getelementptr i32, ptr [[ARRAYIDX36]], i32 1 // CHECK4-51-NEXT: [[TMP118:%.*]] = ptrtoint ptr [[ARRAYIDX34]] to i64 // CHECK4-51-NEXT: [[TMP119:%.*]] = ptrtoint ptr [[TMP117]] to i64 diff --git a/clang/test/OpenMP/task_in_reduction_codegen.cpp b/clang/test/OpenMP/task_in_reduction_codegen.cpp index aa2a478137990..29dc12978d7d9 100644 --- a/clang/test/OpenMP/task_in_reduction_codegen.cpp +++ b/clang/test/OpenMP/task_in_reduction_codegen.cpp @@ -90,7 +90,7 @@ int main(int argc, char **argv) { // CHECK1-NEXT: [[VLA:%.*]] = alloca i16, i64 [[TMP2]], align 16 // CHECK1-NEXT: store i64 [[TMP2]], ptr [[__VLA_EXPR0]], align 8 // CHECK1-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP0]]) -// CHECK1-NEXT: [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds [3 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 0 +// CHECK1-NEXT: [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds nuw [3 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 0 // CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[A]], ptr [[TMP4]], align 8 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1 @@ -105,7 +105,7 @@ int main(int argc, char **argv) { // CHECK1-NEXT: store ptr @.red_comb., ptr [[TMP9]], align 8 // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6 // CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP10]], i8 0, i64 4, i1 false) -// CHECK1-NEXT: [[DOTRD_INPUT_GEP_1:%.*]] = getelementptr inbounds [3 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1 +// CHECK1-NEXT: [[DOTRD_INPUT_GEP_1:%.*]] = getelementptr inbounds nuw [3 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1 // CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_1]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[B]], ptr [[TMP11]], align 8 // CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_1]], i32 0, i32 1 @@ -120,7 +120,7 @@ int main(int argc, char **argv) { // CHECK1-NEXT: store ptr @.red_comb..2, ptr [[TMP16]], align 8 // CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_1]], i32 0, i32 6 // CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP17]], i8 0, i64 4, i1 false) -// CHECK1-NEXT: [[DOTRD_INPUT_GEP_2:%.*]] = getelementptr inbounds [3 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 2 +// CHECK1-NEXT: [[DOTRD_INPUT_GEP_2:%.*]] = getelementptr inbounds nuw [3 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 2 // CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_2]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[ARGC_ADDR]], ptr [[TMP18]], align 8 // CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_2]], i32 0, i32 1 @@ -138,7 +138,7 @@ int main(int argc, char **argv) { // CHECK1-NEXT: [[TMP25:%.*]] = call ptr @__kmpc_taskred_init(i32 [[TMP0]], i32 3, ptr [[DOTRD_INPUT_]]) // CHECK1-NEXT: store ptr [[TMP25]], ptr [[DOTTASK_RED_]], align 8 // CHECK1-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP0]]) -// CHECK1-NEXT: [[DOTRD_INPUT_GEP_4:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t.0], ptr [[DOTRD_INPUT_3]], i64 0, i64 0 +// CHECK1-NEXT: [[DOTRD_INPUT_GEP_4:%.*]] = getelementptr inbounds nuw [2 x %struct.kmp_taskred_input_t.0], ptr [[DOTRD_INPUT_3]], i64 0, i64 0 // CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T_0:%.*]], ptr [[DOTRD_INPUT_GEP_4]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[C]], ptr [[TMP26]], align 8 // CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_4]], i32 0, i32 1 @@ -153,7 +153,7 @@ int main(int argc, char **argv) { // CHECK1-NEXT: store ptr @.red_comb..6, ptr [[TMP31]], align 8 // CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_4]], i32 0, i32 6 // CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP32]], i8 0, i64 4, i1 false) -// CHECK1-NEXT: [[DOTRD_INPUT_GEP_5:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t.0], ptr [[DOTRD_INPUT_3]], i64 0, i64 1 +// CHECK1-NEXT: [[DOTRD_INPUT_GEP_5:%.*]] = getelementptr inbounds nuw [2 x %struct.kmp_taskred_input_t.0], ptr [[DOTRD_INPUT_3]], i64 0, i64 1 // CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP33]], align 8 // CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 1 diff --git a/clang/test/OpenMP/taskgroup_task_reduction_codegen.cpp b/clang/test/OpenMP/taskgroup_task_reduction_codegen.cpp index faf86479dfdae..a8577c7a13579 100644 --- a/clang/test/OpenMP/taskgroup_task_reduction_codegen.cpp +++ b/clang/test/OpenMP/taskgroup_task_reduction_codegen.cpp @@ -67,7 +67,7 @@ int main(int argc, char **argv) { // CHECK-DAG: [[A_REF]] = getelementptr inbounds nuw [[T1]], ptr [[GEPA:%[^,]+]], i32 0, i32 0 // CHECK-DAG: store ptr [[A]], ptr [[A_REF:[^,]+]], // CHECK-DAG: [[A_REF]] = getelementptr inbounds nuw [[T1]], ptr [[GEPA]], i32 0, i32 1 -// CHECK-DAG: [[GEPA]] = getelementptr inbounds [3 x [[T1]]], ptr [[RD_IN1]], i64 0, i64 +// CHECK-DAG: [[GEPA]] = getelementptr inbounds nuw [3 x [[T1]]], ptr [[RD_IN1]], i64 0, i64 // CHECK-DAG: [[TMP6:%.+]] = getelementptr inbounds nuw [[T1]], ptr [[GEPA]], i32 0, i32 2 // CHECK-DAG: store i64 4, ptr [[TMP6]], // CHECK-DAG: [[TMP7:%.+]] = getelementptr inbounds nuw [[T1]], ptr [[GEPA]], i32 0, i32 3 @@ -82,7 +82,7 @@ int main(int argc, char **argv) { // CHECK-DAG: [[TMP12]] = getelementptr inbounds nuw [[T1]], ptr [[GEPB:%[^,]+]], i32 0, i32 0 // CHECK-DAG: store ptr [[B]], ptr [[TMP12:%[^,]+]], // CHECK-DAG: [[TMP12]] = getelementptr inbounds nuw [[T1]], ptr [[GEPB]], i32 0, i32 1 -// CHECK-DAG: [[GEPB]] = getelementptr inbounds [3 x [[T1]]], ptr [[RD_IN1]], i64 0, i64 +// CHECK-DAG: [[GEPB]] = getelementptr inbounds nuw [3 x [[T1]]], ptr [[RD_IN1]], i64 0, i64 // CHECK-DAG: [[TMP14:%.+]] = getelementptr inbounds nuw [[T1]], ptr [[GEPB]], i32 0, i32 2 // CHECK-DAG: store i64 4, ptr [[TMP14]], // CHECK-DAG: [[TMP15:%.+]] = getelementptr inbounds nuw [[T1]], ptr [[GEPB]], i32 0, i32 3 @@ -97,7 +97,7 @@ int main(int argc, char **argv) { // CHECK-DAG: [[TMP20]] = getelementptr inbounds nuw [[T1]], ptr [[GEPARGC:%[^,]+]], i32 0, i32 0 // CHECK-DAG: store ptr [[ARGC_ADDR]], ptr [[TMP20:%[^,]+]], // CHECK-DAG: [[TMP20]] = getelementptr inbounds nuw [[T1]], ptr [[GEPARGC]], i32 0, i32 1 -// CHECK-DAG: [[GEPARGC]] = getelementptr inbounds [3 x [[T1]]], ptr [[RD_IN1]], i64 0, i64 +// CHECK-DAG: [[GEPARGC]] = getelementptr inbounds nuw [3 x [[T1]]], ptr [[RD_IN1]], i64 0, i64 // CHECK-DAG: [[TMP22:%.+]] = getelementptr inbounds nuw [[T1]], ptr [[GEPARGC]], i32 0, i32 2 // CHECK-DAG: store i64 4, ptr [[TMP22]], // CHECK-DAG: [[TMP23:%.+]] = getelementptr inbounds nuw [[T1]], ptr [[GEPARGC]], i32 0, i32 3 @@ -116,7 +116,7 @@ int main(int argc, char **argv) { // CHECK-DAG: [[TMP30]] = getelementptr inbounds nuw [[T2]], ptr [[GEPC:%[^,]+]], i32 0, i32 0 // CHECK-DAG: store ptr [[C]], ptr [[TMP30:%[^,]+]], // CHECK-DAG: [[TMP30]] = getelementptr inbounds nuw [[T2]], ptr [[GEPC]], i32 0, i32 1 -// CHECK-DAG: [[GEPC]] = getelementptr inbounds [2 x [[T2]]], ptr [[RD_IN2]], i64 0, i64 +// CHECK-DAG: [[GEPC]] = getelementptr inbounds nuw [2 x [[T2]]], ptr [[RD_IN2]], i64 0, i64 // CHECK-DAG: [[TMP32:%.+]] = getelementptr inbounds nuw [[T2]], ptr [[GEPC]], i32 0, i32 2 // CHECK-DAG: store i64 20, ptr [[TMP32]], // CHECK-DAG: [[TMP33:%.+]] = getelementptr inbounds nuw [[T2]], ptr [[GEPC]], i32 0, i32 3 @@ -131,7 +131,7 @@ int main(int argc, char **argv) { // CHECK-DAG: [[TMP38]] = getelementptr inbounds nuw [[T2]], ptr [[GEPVLA:%[^,]+]], i32 0, i32 0 // CHECK-DAG: store ptr [[VLA]], ptr [[TMP38:%[^,]+]], // CHECK-DAG: [[TMP38]] = getelementptr inbounds nuw [[T2]], ptr [[GEPVLA]], i32 0, i32 1 -// CHECK-DAG: [[GEPVLA]] = getelementptr inbounds [2 x [[T2]]], ptr [[RD_IN2]], i64 0, i64 +// CHECK-DAG: [[GEPVLA]] = getelementptr inbounds nuw [2 x [[T2]]], ptr [[RD_IN2]], i64 0, i64 // CHECK-DAG: [[TMP40:%.+]] = mul nuw i64 [[VLA_SIZE]], 2 // CHECK-DAG: [[TMP41:%.+]] = udiv exact i64 [[TMP40]], ptrtoint (ptr getelementptr (i16, ptr null, i32 1) to i64) // CHECK-DAG: [[TMP42:%.+]] = getelementptr inbounds nuw [[T2]], ptr [[GEPVLA]], i32 0, i32 2 diff --git a/clang/test/OpenMP/taskloop_in_reduction_codegen.cpp b/clang/test/OpenMP/taskloop_in_reduction_codegen.cpp index ae0d007756140..87b4cd2caf18a 100644 --- a/clang/test/OpenMP/taskloop_in_reduction_codegen.cpp +++ b/clang/test/OpenMP/taskloop_in_reduction_codegen.cpp @@ -76,7 +76,7 @@ int main(int argc, char **argv) { // CHECK1-NEXT: [[VLA:%.*]] = alloca i16, i64 [[TMP2]], align 16 // CHECK1-NEXT: store i64 [[TMP2]], ptr [[__VLA_EXPR0]], align 8 // CHECK1-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP0]]) -// CHECK1-NEXT: [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds [3 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 0 +// CHECK1-NEXT: [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds nuw [3 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 0 // CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[A]], ptr [[TMP4]], align 8 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1 @@ -91,7 +91,7 @@ int main(int argc, char **argv) { // CHECK1-NEXT: store ptr @.red_comb., ptr [[TMP9]], align 8 // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6 // CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP10]], i8 0, i64 4, i1 false) -// CHECK1-NEXT: [[DOTRD_INPUT_GEP_1:%.*]] = getelementptr inbounds [3 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1 +// CHECK1-NEXT: [[DOTRD_INPUT_GEP_1:%.*]] = getelementptr inbounds nuw [3 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1 // CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_1]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[B]], ptr [[TMP11]], align 8 // CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_1]], i32 0, i32 1 @@ -106,7 +106,7 @@ int main(int argc, char **argv) { // CHECK1-NEXT: store ptr @.red_comb..2, ptr [[TMP16]], align 8 // CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_1]], i32 0, i32 6 // CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP17]], i8 0, i64 4, i1 false) -// CHECK1-NEXT: [[DOTRD_INPUT_GEP_2:%.*]] = getelementptr inbounds [3 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 2 +// CHECK1-NEXT: [[DOTRD_INPUT_GEP_2:%.*]] = getelementptr inbounds nuw [3 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 2 // CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_2]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[ARGC_ADDR]], ptr [[TMP18]], align 8 // CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_2]], i32 0, i32 1 @@ -124,7 +124,7 @@ int main(int argc, char **argv) { // CHECK1-NEXT: [[TMP25:%.*]] = call ptr @__kmpc_taskred_init(i32 [[TMP0]], i32 3, ptr [[DOTRD_INPUT_]]) // CHECK1-NEXT: store ptr [[TMP25]], ptr [[DOTTASK_RED_]], align 8 // CHECK1-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP0]]) -// CHECK1-NEXT: [[DOTRD_INPUT_GEP_4:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t.0], ptr [[DOTRD_INPUT_3]], i64 0, i64 0 +// CHECK1-NEXT: [[DOTRD_INPUT_GEP_4:%.*]] = getelementptr inbounds nuw [2 x %struct.kmp_taskred_input_t.0], ptr [[DOTRD_INPUT_3]], i64 0, i64 0 // CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T_0:%.*]], ptr [[DOTRD_INPUT_GEP_4]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[C]], ptr [[TMP26]], align 8 // CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_4]], i32 0, i32 1 @@ -139,7 +139,7 @@ int main(int argc, char **argv) { // CHECK1-NEXT: store ptr @.red_comb..6, ptr [[TMP31]], align 8 // CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_4]], i32 0, i32 6 // CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP32]], i8 0, i64 4, i1 false) -// CHECK1-NEXT: [[DOTRD_INPUT_GEP_5:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t.0], ptr [[DOTRD_INPUT_3]], i64 0, i64 1 +// CHECK1-NEXT: [[DOTRD_INPUT_GEP_5:%.*]] = getelementptr inbounds nuw [2 x %struct.kmp_taskred_input_t.0], ptr [[DOTRD_INPUT_3]], i64 0, i64 1 // CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP33]], align 8 // CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 1 diff --git a/clang/test/OpenMP/taskloop_reduction_codegen.cpp b/clang/test/OpenMP/taskloop_reduction_codegen.cpp index 3cdc88ba20b77..6eca033eca551 100644 --- a/clang/test/OpenMP/taskloop_reduction_codegen.cpp +++ b/clang/test/OpenMP/taskloop_reduction_codegen.cpp @@ -83,9 +83,9 @@ sum = 0.0; // CHECK-DAG: store ptr @[[RED_COMB1:.+]], ptr [[TMP25]], // CHECK-DAG: [[TMP26:%.*]] = getelementptr inbounds nuw %struct.kmp_taskred_input_t, ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6 // CHECK-DAG: call void @llvm.memset.p0.i64(ptr align 8 [[TMP26]], i8 0, i64 4, i1 false) -// CHECK-DAG: [[ARRAYIDX5:%.*]] = getelementptr inbounds [100 x %struct.S], ptr [[C]], i64 0, i64 0 +// CHECK-DAG: [[ARRAYIDX5:%.*]] = getelementptr inbounds nuw [100 x %struct.S], ptr [[C]], i64 0, i64 0 // CHECK-DAG: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, % -// CHECK-DAG: [[ARRAYIDX6:%.*]] = getelementptr inbounds [100 x %struct.S], ptr [[C]], i64 0, i64 [[LB_ADD_LEN]] +// CHECK-DAG: [[ARRAYIDX6:%.*]] = getelementptr inbounds nuw [100 x %struct.S], ptr [[C]], i64 0, i64 [[LB_ADD_LEN]] // CHECK-DAG: store ptr [[ARRAYIDX5]], ptr [[TMP28:%[^,]+]], // CHECK-DAG: [[TMP28]] = getelementptr inbounds nuw %struct.kmp_taskred_input_t, ptr [[DOTRD_INPUT_GEP_4:%.+]], i32 0, i32 0 // CHECK-DAG: store ptr [[ARRAYIDX5]], ptr [[TMP28:%[^,]+]], @@ -137,10 +137,10 @@ sum = 0.0; // CHECK-DAG: store ptr @[[RED_COMB4:.+]], ptr [[TMP59]], // CHECK-DAG: [[TMP60:%.*]] = getelementptr inbounds nuw %struct.kmp_taskred_input_t, ptr [[DOTRD_INPUT_GEP_8]], i32 0, i32 6 // CHECK-DAG: store i32 1, ptr [[TMP60]], -// CHECK-DAG: [[DOTRD_INPUT_GEP_]] = getelementptr inbounds [4 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 -// CHECK-DAG: [[DOTRD_INPUT_GEP_4]] = getelementptr inbounds [4 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 -// CHECK-DAG: [[DOTRD_INPUT_GEP_7]] = getelementptr inbounds [4 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 -// CHECK-DAG: [[DOTRD_INPUT_GEP_8]] = getelementptr inbounds [4 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 +// CHECK-DAG: [[DOTRD_INPUT_GEP_]] = getelementptr inbounds nuw [4 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 +// CHECK-DAG: [[DOTRD_INPUT_GEP_4]] = getelementptr inbounds nuw [4 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 +// CHECK-DAG: [[DOTRD_INPUT_GEP_7]] = getelementptr inbounds nuw [4 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 +// CHECK-DAG: [[DOTRD_INPUT_GEP_8]] = getelementptr inbounds nuw [4 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 // CHECK: [[TMP62:%.*]] = call ptr @__kmpc_taskred_init(i32 [[TMP0]], i32 4, ptr [[DOTRD_INPUT_]]) // CHECK: [[TMP63:%.*]] = load i32, ptr [[N]], // CHECK: store i32 [[TMP63]], ptr [[DOTCAPTURE_EXPR_]], diff --git a/clang/test/OpenMP/taskloop_simd_in_reduction_codegen.cpp b/clang/test/OpenMP/taskloop_simd_in_reduction_codegen.cpp index 6da28d2d973c9..9e4e51a442742 100644 --- a/clang/test/OpenMP/taskloop_simd_in_reduction_codegen.cpp +++ b/clang/test/OpenMP/taskloop_simd_in_reduction_codegen.cpp @@ -76,7 +76,7 @@ int main(int argc, char **argv) { // CHECK1-NEXT: [[VLA:%.*]] = alloca i16, i64 [[TMP2]], align 16 // CHECK1-NEXT: store i64 [[TMP2]], ptr [[__VLA_EXPR0]], align 8 // CHECK1-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP0]]) -// CHECK1-NEXT: [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds [3 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 0 +// CHECK1-NEXT: [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds nuw [3 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 0 // CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[A]], ptr [[TMP4]], align 8 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1 @@ -91,7 +91,7 @@ int main(int argc, char **argv) { // CHECK1-NEXT: store ptr @.red_comb., ptr [[TMP9]], align 8 // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6 // CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP10]], i8 0, i64 4, i1 false) -// CHECK1-NEXT: [[DOTRD_INPUT_GEP_1:%.*]] = getelementptr inbounds [3 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1 +// CHECK1-NEXT: [[DOTRD_INPUT_GEP_1:%.*]] = getelementptr inbounds nuw [3 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1 // CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_1]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[B]], ptr [[TMP11]], align 8 // CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_1]], i32 0, i32 1 @@ -106,7 +106,7 @@ int main(int argc, char **argv) { // CHECK1-NEXT: store ptr @.red_comb..2, ptr [[TMP16]], align 8 // CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_1]], i32 0, i32 6 // CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP17]], i8 0, i64 4, i1 false) -// CHECK1-NEXT: [[DOTRD_INPUT_GEP_2:%.*]] = getelementptr inbounds [3 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 2 +// CHECK1-NEXT: [[DOTRD_INPUT_GEP_2:%.*]] = getelementptr inbounds nuw [3 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 2 // CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_2]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[ARGC_ADDR]], ptr [[TMP18]], align 8 // CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_2]], i32 0, i32 1 @@ -124,7 +124,7 @@ int main(int argc, char **argv) { // CHECK1-NEXT: [[TMP25:%.*]] = call ptr @__kmpc_taskred_init(i32 [[TMP0]], i32 3, ptr [[DOTRD_INPUT_]]) // CHECK1-NEXT: store ptr [[TMP25]], ptr [[DOTTASK_RED_]], align 8 // CHECK1-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP0]]) -// CHECK1-NEXT: [[DOTRD_INPUT_GEP_4:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t.0], ptr [[DOTRD_INPUT_3]], i64 0, i64 0 +// CHECK1-NEXT: [[DOTRD_INPUT_GEP_4:%.*]] = getelementptr inbounds nuw [2 x %struct.kmp_taskred_input_t.0], ptr [[DOTRD_INPUT_3]], i64 0, i64 0 // CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T_0:%.*]], ptr [[DOTRD_INPUT_GEP_4]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[C]], ptr [[TMP26]], align 8 // CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_4]], i32 0, i32 1 @@ -139,7 +139,7 @@ int main(int argc, char **argv) { // CHECK1-NEXT: store ptr @.red_comb..6, ptr [[TMP31]], align 8 // CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_4]], i32 0, i32 6 // CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP32]], i8 0, i64 4, i1 false) -// CHECK1-NEXT: [[DOTRD_INPUT_GEP_5:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t.0], ptr [[DOTRD_INPUT_3]], i64 0, i64 1 +// CHECK1-NEXT: [[DOTRD_INPUT_GEP_5:%.*]] = getelementptr inbounds nuw [2 x %struct.kmp_taskred_input_t.0], ptr [[DOTRD_INPUT_3]], i64 0, i64 1 // CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP33]], align 8 // CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 1 diff --git a/clang/test/OpenMP/taskloop_simd_reduction_codegen.cpp b/clang/test/OpenMP/taskloop_simd_reduction_codegen.cpp index d6e40831484aa..83ae053cfd9bd 100644 --- a/clang/test/OpenMP/taskloop_simd_reduction_codegen.cpp +++ b/clang/test/OpenMP/taskloop_simd_reduction_codegen.cpp @@ -80,9 +80,9 @@ sum = 0.0; // CHECK-DAG: store ptr @[[RED_COMB1:.+]], ptr [[TMP25]], // CHECK-DAG: [[TMP26:%.*]] = getelementptr inbounds nuw %struct.kmp_taskred_input_t, ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6 // CHECK-DAG: call void @llvm.memset.p0.i64(ptr align 8 [[TMP26]], i8 0, i64 4, i1 false) -// CHECK-DAG: [[ARRAYIDX5:%.*]] = getelementptr inbounds [100 x %struct.S], ptr [[C]], i64 0, i64 0 +// CHECK-DAG: [[ARRAYIDX5:%.*]] = getelementptr inbounds nuw [100 x %struct.S], ptr [[C]], i64 0, i64 0 // CHECK-DAG: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, % -// CHECK-DAG: [[ARRAYIDX6:%.*]] = getelementptr inbounds [100 x %struct.S], ptr [[C]], i64 0, i64 [[LB_ADD_LEN]] +// CHECK-DAG: [[ARRAYIDX6:%.*]] = getelementptr inbounds nuw [100 x %struct.S], ptr [[C]], i64 0, i64 [[LB_ADD_LEN]] // CHECK-DAG: store ptr [[ARRAYIDX5]], ptr [[TMP28:%[^,]+]], // CHECK-DAG: [[TMP28]] = getelementptr inbounds nuw %struct.kmp_taskred_input_t, ptr [[DOTRD_INPUT_GEP_4:%.+]], i32 0, i32 0 // CHECK-DAG: store ptr [[ARRAYIDX5]], ptr [[TMP28:%[^,]+]], @@ -134,10 +134,10 @@ sum = 0.0; // CHECK-DAG: store ptr @[[RED_COMB4:.+]], ptr [[TMP59]], // CHECK-DAG: [[TMP60:%.*]] = getelementptr inbounds nuw %struct.kmp_taskred_input_t, ptr [[DOTRD_INPUT_GEP_8]], i32 0, i32 6 // CHECK-DAG: store i32 1, ptr [[TMP60]], -// CHECK-DAG: [[DOTRD_INPUT_GEP_]] = getelementptr inbounds [4 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 -// CHECK-DAG: [[DOTRD_INPUT_GEP_4]] = getelementptr inbounds [4 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 -// CHECK-DAG: [[DOTRD_INPUT_GEP_7]] = getelementptr inbounds [4 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 -// CHECK-DAG: [[DOTRD_INPUT_GEP_8]] = getelementptr inbounds [4 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 +// CHECK-DAG: [[DOTRD_INPUT_GEP_]] = getelementptr inbounds nuw [4 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 +// CHECK-DAG: [[DOTRD_INPUT_GEP_4]] = getelementptr inbounds nuw [4 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 +// CHECK-DAG: [[DOTRD_INPUT_GEP_7]] = getelementptr inbounds nuw [4 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 +// CHECK-DAG: [[DOTRD_INPUT_GEP_8]] = getelementptr inbounds nuw [4 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 // CHECK: [[TMP62:%.*]] = call ptr @__kmpc_taskred_init(i32 [[TMP0]], i32 4, ptr [[DOTRD_INPUT_]]) // CHECK: [[TMP63:%.*]] = load i32, ptr [[N]], // CHECK: store i32 [[TMP63]], ptr [[DOTCAPTURE_EXPR_]], diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_reduction_task_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_reduction_task_codegen.cpp index be499e0b36548..7987c2de7dd8f 100644 --- a/clang/test/OpenMP/teams_distribute_parallel_for_reduction_task_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_reduction_task_codegen.cpp @@ -100,16 +100,16 @@ int main(int argc, char **argv) { // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[ARGC1]], align 4 // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i64 0 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP1]], i64 0 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 -// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 0 +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP2]], i64 0 // CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = sext i32 [[TMP3]] to i64 // CHECK1-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP4]] // CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds ptr, ptr [[TMP5]], i64 9 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP5]], i64 9 // CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[ARRAYIDX3]], align 8 -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i64 [[LB_ADD_LEN]] +// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP6]], i64 [[LB_ADD_LEN]] // CHECK1-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[ARRAYIDX4]] to i64 // CHECK1-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[ARRAYIDX2]] to i64 // CHECK1-NEXT: [[TMP9:%.*]] = sub i64 [[TMP7]], [[TMP8]] @@ -139,7 +139,7 @@ int main(int argc, char **argv) { // CHECK1-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP20]] // CHECK1-NEXT: store ptr [[_TMP5]], ptr [[TMP]], align 8 // CHECK1-NEXT: store ptr [[TMP21]], ptr [[_TMP5]], align 8 -// CHECK1-NEXT: [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 0 +// CHECK1-NEXT: [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds nuw [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 0 // CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP22]], align 8 // CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1 @@ -154,19 +154,19 @@ int main(int argc, char **argv) { // CHECK1-NEXT: store ptr @.red_comb., ptr [[TMP27]], align 8 // CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6 // CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP28]], i8 0, i64 4, i1 false) -// CHECK1-NEXT: [[DOTRD_INPUT_GEP_6:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1 +// CHECK1-NEXT: [[DOTRD_INPUT_GEP_6:%.*]] = getelementptr inbounds nuw [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1 // CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 0 // CHECK1-NEXT: [[TMP30:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds ptr, ptr [[TMP30]], i64 0 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP30]], i64 0 // CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[ARRAYIDX7]], align 8 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i8, ptr [[TMP31]], i64 0 +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP31]], i64 0 // CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP0]], align 4 // CHECK1-NEXT: [[TMP33:%.*]] = sext i32 [[TMP32]] to i64 // CHECK1-NEXT: [[LB_ADD_LEN9:%.*]] = add nsw i64 -1, [[TMP33]] // CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds ptr, ptr [[TMP34]], i64 9 +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP34]], i64 9 // CHECK1-NEXT: [[TMP35:%.*]] = load ptr, ptr [[ARRAYIDX10]], align 8 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i8, ptr [[TMP35]], i64 [[LB_ADD_LEN9]] +// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP35]], i64 [[LB_ADD_LEN9]] // CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP29]], align 8 // CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 1 // CHECK1-NEXT: store ptr [[ARRAYIDX8]], ptr [[TMP36]], align 8 @@ -444,16 +444,16 @@ int main(int argc, char **argv) { // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK1-NEXT: store i32 0, ptr [[ARGC1]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP3]], i64 0 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP3]], i64 0 // CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 -// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 0 +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP4]], i64 0 // CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = sext i32 [[TMP5]] to i64 // CHECK1-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP6]] // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds ptr, ptr [[TMP7]], i64 9 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP7]], i64 9 // CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[ARRAYIDX3]], align 8 -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i64 [[LB_ADD_LEN]] +// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP8]], i64 [[LB_ADD_LEN]] // CHECK1-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[ARRAYIDX4]] to i64 // CHECK1-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[ARRAYIDX2]] to i64 // CHECK1-NEXT: [[TMP11:%.*]] = sub i64 [[TMP9]], [[TMP10]] @@ -483,7 +483,7 @@ int main(int argc, char **argv) { // CHECK1-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP22]] // CHECK1-NEXT: store ptr [[_TMP6]], ptr [[_TMP5]], align 8 // CHECK1-NEXT: store ptr [[TMP23]], ptr [[_TMP6]], align 8 -// CHECK1-NEXT: [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t.0], ptr [[DOTRD_INPUT_]], i64 0, i64 0 +// CHECK1-NEXT: [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds nuw [2 x %struct.kmp_taskred_input_t.0], ptr [[DOTRD_INPUT_]], i64 0, i64 0 // CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T_0:%.*]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP24]], align 8 // CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1 @@ -498,19 +498,19 @@ int main(int argc, char **argv) { // CHECK1-NEXT: store ptr @.red_comb..4, ptr [[TMP29]], align 8 // CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6 // CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP30]], i8 0, i64 4, i1 false) -// CHECK1-NEXT: [[DOTRD_INPUT_GEP_7:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t.0], ptr [[DOTRD_INPUT_]], i64 0, i64 1 +// CHECK1-NEXT: [[DOTRD_INPUT_GEP_7:%.*]] = getelementptr inbounds nuw [2 x %struct.kmp_taskred_input_t.0], ptr [[DOTRD_INPUT_]], i64 0, i64 1 // CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 0 // CHECK1-NEXT: [[TMP32:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds ptr, ptr [[TMP32]], i64 0 +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP32]], i64 0 // CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[ARRAYIDX8]], align 8 -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i8, ptr [[TMP33]], i64 0 +// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP33]], i64 0 // CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP0]], align 4 // CHECK1-NEXT: [[TMP35:%.*]] = sext i32 [[TMP34]] to i64 // CHECK1-NEXT: [[LB_ADD_LEN10:%.*]] = add nsw i64 -1, [[TMP35]] // CHECK1-NEXT: [[TMP36:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds ptr, ptr [[TMP36]], i64 9 +// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP36]], i64 9 // CHECK1-NEXT: [[TMP37:%.*]] = load ptr, ptr [[ARRAYIDX11]], align 8 -// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds i8, ptr [[TMP37]], i64 [[LB_ADD_LEN10]] +// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP37]], i64 [[LB_ADD_LEN10]] // CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP31]], align 8 // CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 1 // CHECK1-NEXT: store ptr [[ARRAYIDX9]], ptr [[TMP38]], align 8 @@ -831,9 +831,9 @@ int main(int argc, char **argv) { // CHECK1-NEXT: [[LB_ADD_LEN_I:%.*]] = add nsw i64 -1, [[TMP24]] // CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2 // CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP25]], align 8 -// CHECK1-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds ptr, ptr [[TMP26]], i64 9 +// CHECK1-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP26]], i64 9 // CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[ARRAYIDX2_I]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3_I:%.*]] = getelementptr inbounds i8, ptr [[TMP27]], i64 [[LB_ADD_LEN_I]] +// CHECK1-NEXT: [[ARRAYIDX3_I:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP27]], i64 [[LB_ADD_LEN_I]] // CHECK1-NEXT: [[TMP28:%.*]] = ptrtoint ptr [[ARRAYIDX3_I]] to i64 // CHECK1-NEXT: [[TMP29:%.*]] = ptrtoint ptr [[TMP20]] to i64 // CHECK1-NEXT: [[TMP30:%.*]] = sub i64 [[TMP28]], [[TMP29]] diff --git a/clang/test/SemaCXX/cxx2a-consteval.cpp b/clang/test/SemaCXX/cxx2a-consteval.cpp index 6b0609a26c588..81923617f637e 100644 --- a/clang/test/SemaCXX/cxx2a-consteval.cpp +++ b/clang/test/SemaCXX/cxx2a-consteval.cpp @@ -380,11 +380,9 @@ void test() { { A k = to_lvalue_ref(A()); } // expected-error {{is not a constant expression}} // expected-note@-1 {{is not a constant expression}} expected-note@-1 {{temporary created here}} { A k = to_lvalue_ref(A().ret_a()); } - // expected-error@-1 {{'alloc::A::ret_a' is not a constant expression}} - // expected-note@-2 {{heap-allocated object is not a constant expression}} - // expected-error@-3 {{'alloc::to_lvalue_ref' is not a constant expression}} - // expected-note@-4 {{reference to temporary is not a constant expression}} - // expected-note@-5 {{temporary created here}} + // expected-note@-1 {{reference to temporary is not a constant expression}} + // expected-error@-2 {{'alloc::to_lvalue_ref' is not a constant expression}} + // expected-note@-3 {{temporary created here}} { int k = A().ret_a().ret_i(); } // expected-error@-1 {{'alloc::A::ret_a' is not a constant expression}} // expected-note@-2 {{heap-allocated object is not a constant expression}} @@ -394,19 +392,13 @@ void test() { { int k = rvalue_ref(A()); } { int k = rvalue_ref(std::move(a)); } { int k = const_a_ref(A().ret_a()); } - // expected-error@-1 {{'alloc::A::ret_a' is not a constant expression}} - // expected-note@-2 {{is not a constant expression}} { int k = const_a_ref(to_lvalue_ref(A().ret_a())); } - // expected-error@-1 {{'alloc::A::ret_a' is not a constant expression}} - // expected-note@-2 {{is not a constant expression}} { int k = const_a_ref(to_lvalue_ref(std::move(a))); } { int k = by_value_a(A().ret_a()); } { int k = by_value_a(to_lvalue_ref(static_cast(a))); } { int k = (A().ret_a(), A().ret_i()); }// expected-error {{is not a constant expression}} // expected-note@-1 {{is not a constant expression}} { int k = (const_a_ref(A().ret_a()), A().ret_i()); } - // expected-error@-1 {{'alloc::A::ret_a' is not a constant expression}} - // expected-note@-2 {{is not a constant expression}} } } @@ -1232,4 +1224,27 @@ consteval void immediate() { } +} + +namespace GH105558 { + +consteval int* alloc() { return new int(0); } +consteval void f(int* p) { delete p; } +consteval void g1(int*&& p) { delete p; } +consteval void g2(const int* p) { delete p; } +consteval void g3(int*const& p) { delete p; } +struct X { + int* p; + explicit(false) constexpr X(int* p) : p(p) {} +}; +consteval void g4(X x) { delete x.p; } + +void test() { + f(alloc()); + g1(alloc()); + g2(alloc()); + g3(alloc()); + g4(alloc()); +} + } diff --git a/clang/test/SemaCXX/cxx2c-pack-indexing.cpp b/clang/test/SemaCXX/cxx2c-pack-indexing.cpp index 9ea90a4c3e30f..7d7e808746217 100644 --- a/clang/test/SemaCXX/cxx2c-pack-indexing.cpp +++ b/clang/test/SemaCXX/cxx2c-pack-indexing.cpp @@ -231,3 +231,31 @@ struct type_info { namespace GH93650 { auto func(auto... inputArgs) { return typeid(inputArgs...[0]); } } // namespace GH93650 + + +namespace GH105900 { + +template +struct types { + template + static constexpr __SIZE_TYPE__ get_index() { return idx; } + + template + static auto x() -> opts...[get_index()] {} +}; + +template +struct vars { + template + static constexpr __SIZE_TYPE__ get_index() { return idx; } + + template + static auto x() -> decltype(opts...[get_index()]) {return 0;} +}; + +void f() { + types::x<0>(); + vars<0>::x<0>(); +} + +} diff --git a/clang/test/SemaCXX/no_destroy.cpp b/clang/test/SemaCXX/no_destroy.cpp index 5872bcf4b439e..d39bcaeff860a 100644 --- a/clang/test/SemaCXX/no_destroy.cpp +++ b/clang/test/SemaCXX/no_destroy.cpp @@ -1,31 +1,21 @@ -// RUN: %clang_cc1 -DNO_DTORS -DNO_EXCEPTIONS -fno-c++-static-destructors -verify %s -// RUN: %clang_cc1 -DNO_EXCEPTIONS -verify %s -// RUN: %clang_cc1 -DNO_DTORS -fexceptions -fno-c++-static-destructors -verify %s -// RUN: %clang_cc1 -fexceptions -verify %s +// RUN: %clang_cc1 -fc++-static-destructors=none -verify %s +// RUN: %clang_cc1 -fc++-static-destructors=thread-local -verify=expected,thread-local-dtors %s +// RUN: %clang_cc1 -verify=expected,thread-local-dtors,all-dtors %s +// RUN: %clang_cc1 -fexceptions -fc++-static-destructors=none -verify %s +// RUN: %clang_cc1 -fexceptions -fc++-static-destructors=thread-local -verify=expected,thread-local-dtors %s +// RUN: %clang_cc1 -fexceptions -verify=expected,thread-local-dtors,all-dtors %s struct SecretDestructor { -#ifndef NO_DTORS - // expected-note@+2 4 {{private}} -#endif private: ~SecretDestructor(); // expected-note + {{private}} }; -SecretDestructor sd1; -thread_local SecretDestructor sd2; +SecretDestructor sd1; // all-dtors-error{{private}} +thread_local SecretDestructor sd2; // thread-local-dtors-error{{private}} void locals() { - static SecretDestructor sd3; - thread_local SecretDestructor sd4; + static SecretDestructor sd3; // all-dtors-error{{private}} + thread_local SecretDestructor sd4; // thread-local-dtors-error{{private}} } -#ifndef NO_DTORS -// SecretDestructor sd1; // expected-error@-8 {{private}} -// thread_local SecretDestructor sd2; // expected-error@-8 {{private}} -// void locals() { -// static SecretDestructor sd3; // expected-error@-8 {{private}} -// thread_local SecretDestructor sd4; // expected-error@-8 {{private}} -// } -#endif - [[clang::always_destroy]] SecretDestructor sd6; // expected-error{{private}} [[clang::always_destroy]] thread_local SecretDestructor sd7; // expected-error{{private}} diff --git a/clang/test/SemaHLSL/Types/Traits/ScalarizedLayoutCompatible.hlsl b/clang/test/SemaHLSL/Types/Traits/ScalarizedLayoutCompatible.hlsl new file mode 100644 index 0000000000000..db46a8e141495 --- /dev/null +++ b/clang/test/SemaHLSL/Types/Traits/ScalarizedLayoutCompatible.hlsl @@ -0,0 +1,132 @@ +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.6-library -finclude-default-header -verify %s +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.6-library -finclude-default-header -fnative-half-type -verify %s +// expected-no-diagnostics + +// Case 1: How many ways can I come up with to represent three float values? +struct ThreeFloats1 { + float X, Y, Z; +}; + +struct ThreeFloats2 { + float X[3]; +}; + +struct ThreeFloats3 { + float3 V; +}; + +struct ThreeFloats4 { + float2 V; + float F; +}; + +_Static_assert(__builtin_hlsl_is_scalarized_layout_compatible(float3, float[3]), ""); +_Static_assert(__builtin_hlsl_is_scalarized_layout_compatible(float3, ThreeFloats1), ""); +_Static_assert(__builtin_hlsl_is_scalarized_layout_compatible(float3, ThreeFloats2), ""); +_Static_assert(__builtin_hlsl_is_scalarized_layout_compatible(float3, ThreeFloats3), ""); +_Static_assert(__builtin_hlsl_is_scalarized_layout_compatible(float3, ThreeFloats4), ""); + +// Case 2: structs and base classes and arrays, oh my! +struct Dog { + int Leg[4]; + bool Tail; + float Fur; +}; + +struct Shiba { + int4 StubbyLegs; + bool CurlyTail; + struct Coating { + float Fur; + } F; +}; + +struct FourLegged { + int FR, FL, BR, BL; +}; + +struct Doggo : FourLegged { + bool WaggyBit; + float Fuzz; +}; + +_Static_assert(__builtin_hlsl_is_scalarized_layout_compatible(Dog, Shiba), ""); +_Static_assert(__builtin_hlsl_is_scalarized_layout_compatible(Dog, Doggo), ""); + +// Case 3: Arrays of structs inside structs + +struct Cat { + struct Leg { + int L; + } Legs[4]; + struct Other { + bool Tail; + float Furs; + } Bits; +}; + +_Static_assert(__builtin_hlsl_is_scalarized_layout_compatible(Dog, Cat), ""); + +// case 4: Arrays of structs inside arrays of structs. +struct Pets { + Dog Puppers[6]; + Cat Kitties[4]; +}; + +struct Animals { + Dog Puppers[2]; + Cat Kitties[8]; +}; + +_Static_assert(__builtin_hlsl_is_scalarized_layout_compatible(Pets, Animals), ""); + +// Case 5: Turtles all the way down... + +typedef int Turtle; + +enum Ninja : Turtle { + Leonardo, + Donatello, + Michelangelo, + Raphael, +}; + +enum NotNinja : Turtle { + Fred, + Mikey, +}; + +enum Mammals : uint { + Dog, + Cat, +}; + +_Static_assert(__builtin_hlsl_is_scalarized_layout_compatible(Ninja, NotNinja), ""); +_Static_assert(!__builtin_hlsl_is_scalarized_layout_compatible(Ninja, Mammals), ""); + +// Case 6: Some basic types. +_Static_assert(__builtin_hlsl_is_scalarized_layout_compatible(int, int32_t), ""); +_Static_assert(__builtin_hlsl_is_scalarized_layout_compatible(uint, uint32_t), ""); +_Static_assert(!__builtin_hlsl_is_scalarized_layout_compatible(int, uint), ""); +_Static_assert(!__builtin_hlsl_is_scalarized_layout_compatible(int, float), ""); + +// Even though half and float may be the same size we don't want them to be +// layout compatible since they are different types. +_Static_assert(!__builtin_hlsl_is_scalarized_layout_compatible(half, float), ""); + +// Case 6: Empty classes... because they're fun. + +struct NotEmpty { int X; }; +struct Empty {}; +struct AlsoEmpty {}; + +struct DerivedEmpty : Empty {}; + +struct DerivedNotEmpty : Empty { int X; }; +struct DerivedEmptyNotEmptyBase : NotEmpty {}; + +_Static_assert(__builtin_hlsl_is_scalarized_layout_compatible(Empty, AlsoEmpty), ""); +_Static_assert(__builtin_hlsl_is_scalarized_layout_compatible(Empty, DerivedEmpty), ""); + +_Static_assert(__builtin_hlsl_is_scalarized_layout_compatible(NotEmpty, DerivedNotEmpty), ""); +_Static_assert(__builtin_hlsl_is_scalarized_layout_compatible(NotEmpty, DerivedEmptyNotEmptyBase), ""); diff --git a/clang/test/SemaHLSL/Types/Traits/ScalarizedLayoutCompatibleErrors.hlsl b/clang/test/SemaHLSL/Types/Traits/ScalarizedLayoutCompatibleErrors.hlsl new file mode 100644 index 0000000000000..4c96795da7fd0 --- /dev/null +++ b/clang/test/SemaHLSL/Types/Traits/ScalarizedLayoutCompatibleErrors.hlsl @@ -0,0 +1,64 @@ +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.6-library -finclude-default-header -verify %s + +// Some things that don't work! + +// Case 1: Both types must be complete! +struct Defined { + int X; +}; + + +struct Undefined; // expected-note {{forward declaration of 'Undefined'}} + +_Static_assert(__builtin_hlsl_is_scalarized_layout_compatible(Undefined, Defined), ""); // expected-error{{incomplete type 'Undefined' where a complete type is required}} + +// Case 2: No variable length arrays! + +void fn(int X) { + // expected-error@#vla {{variable length arrays are not supported for the current target}} + // expected-error@#vla {{variable length arrays are not supported in '__builtin_hlsl_is_scalarized_layout_compatible'}} + // expected-error@#vla {{static assertion failed due to requirement '__builtin_hlsl_is_scalarized_layout_compatible(int[4], int[X])'}} + // expected-warning@#vla {{variable length arrays in C++ are a Clang extension}} + _Static_assert(__builtin_hlsl_is_scalarized_layout_compatible(int[4], int[X]), ""); // #vla +} + +// Case 3: Make this always fail for unions. +// HLSL doesn't really support unions, and the places where scalarized layouts +// are valid is probably going to be really confusing for unions, so we should +// just make sure unions are never scalarized compatible with anything other +// than themselves. + +union Wah { + int OhNo; + float NotAgain; +}; + +struct OneInt { + int I; +}; + +struct OneFloat { + float F; +}; + +struct HasUnion { + int I; + Wah W; +}; + +struct HasUnionSame { + int I; + Wah W; +}; + +struct HasUnionDifferent { + Wah W; + int I; +}; + +_Static_assert(__builtin_hlsl_is_scalarized_layout_compatible(Wah, Wah), "Identical types are always compatible"); +_Static_assert(!__builtin_hlsl_is_scalarized_layout_compatible(Wah, OneInt), "Unions are not compatible with anything else"); +_Static_assert(!__builtin_hlsl_is_scalarized_layout_compatible(Wah, OneFloat), "Unions are not compatible with anything else"); + +_Static_assert(__builtin_hlsl_is_scalarized_layout_compatible(HasUnion, HasUnionSame), ""); +_Static_assert(!__builtin_hlsl_is_scalarized_layout_compatible(HasUnion, HasUnionDifferent), ""); diff --git a/clang/test/SemaTemplate/alias-template-with-lambdas.cpp b/clang/test/SemaTemplate/alias-template-with-lambdas.cpp index ff94031e4d86f..5ec93163e4d18 100644 --- a/clang/test/SemaTemplate/alias-template-with-lambdas.cpp +++ b/clang/test/SemaTemplate/alias-template-with-lambdas.cpp @@ -91,15 +91,84 @@ void bar() { namespace GH82104 { -template int Zero = 0; +template constexpr int Value = sizeof...(D); -template -using T14 = decltype([]() { return Zero; }()); +template +using T14 = decltype([](auto Param) { + return Value + V + (int)sizeof(Param); +}("hello")); template using T15 = T14; static_assert(__is_same(T15, int)); +// FIXME: This still crashes because we can't extract template arguments T and U +// outside of the instantiation context of T16. +#if 0 +template +using T16 = decltype([](auto Param) requires (sizeof(Param) != 1 && sizeof...(U) > 0) { + return Value + sizeof(Param); +}); +static_assert(T16()(42) == 2 + sizeof(42)); +#endif } // namespace GH82104 +namespace GH89853 { + +template +static constexpr auto innocuous = [] { return m; }; + +template > +using broken = decltype(Pred.template operator()<42>()); + +broken<> *boom; + +template { + (void)static_cast(c); + }> +using broken2 = decltype(Pred.template operator()<42>()); + +broken2<> *boom2; + +template { return m; }> +using broken3 = decltype(Pred.template operator()<42>()); + +broken3<> *boom3; + +static constexpr auto non_default = [](True auto) { + (void) static_cast(c); +}; + +template +using broken4 = decltype(Pred.template operator()<42>(Pred)); + +broken4* boom4; + +} // namespace GH89853 + +namespace GH105885 { + +template +using test = decltype([](auto...) { +}()); + +static_assert(__is_same(test<0>, void)); + +} // namespace GH105885 + +namespace GH102760 { + +auto make_tuple = []< class Tag, class... Captures>(Tag, Captures...) { + return []< class _Fun >( _Fun) -> void requires requires { 0; } + {}; +}; + +template < class, class... _As > +using Result = decltype(make_tuple(0)(_As{}...)); + +using T = Result; + +} // namespace GH102760 + } // namespace lambda_calls diff --git a/clang/tools/clang-format/clang-format-diff.py b/clang/tools/clang-format/clang-format-diff.py index 9eec0f3c89de3..aebe193eadb34 100755 --- a/clang/tools/clang-format/clang-format-diff.py +++ b/clang/tools/clang-format/clang-format-diff.py @@ -168,7 +168,7 @@ def main(): 'Failed to run "%s" - %s"' % (" ".join(command), e.strerror) ) - stdout, stderr = p.communicate() + stdout, _stderr = p.communicate() if p.returncode != 0: return p.returncode diff --git a/compiler-rt/lib/rtsan/rtsan_interceptors.cpp b/compiler-rt/lib/rtsan/rtsan_interceptors.cpp index e451ea6b03622..1b1655f94eaa5 100644 --- a/compiler-rt/lib/rtsan/rtsan_interceptors.cpp +++ b/compiler-rt/lib/rtsan/rtsan_interceptors.cpp @@ -161,6 +161,37 @@ INTERCEPTOR(int, puts, const char *s) { return REAL(puts)(s); } +INTERCEPTOR(ssize_t, read, int fd, void *buf, size_t count) { + ExpectNotRealtime("read"); + return REAL(read)(fd, buf, count); +} + +INTERCEPTOR(ssize_t, write, int fd, const void *buf, size_t count) { + ExpectNotRealtime("write"); + return REAL(write)(fd, buf, count); +} + +INTERCEPTOR(ssize_t, pread, int fd, void *buf, size_t count, off_t offset) { + ExpectNotRealtime("pread"); + return REAL(pread)(fd, buf, count, offset); +} + +INTERCEPTOR(ssize_t, readv, int fd, const struct iovec *iov, int iovcnt) { + ExpectNotRealtime("readv"); + return REAL(readv)(fd, iov, iovcnt); +} + +INTERCEPTOR(ssize_t, pwrite, int fd, const void *buf, size_t count, + off_t offset) { + ExpectNotRealtime("pwrite"); + return REAL(pwrite)(fd, buf, count, offset); +} + +INTERCEPTOR(ssize_t, writev, int fd, const struct iovec *iov, int iovcnt) { + ExpectNotRealtime("writev"); + return REAL(writev)(fd, iov, iovcnt); +} + // Concurrency #if SANITIZER_APPLE #pragma clang diagnostic push @@ -400,6 +431,12 @@ void __rtsan::InitializeInterceptors() { INTERCEPT_FUNCTION(close); INTERCEPT_FUNCTION(fopen); INTERCEPT_FUNCTION(fread); + INTERCEPT_FUNCTION(read); + INTERCEPT_FUNCTION(write); + INTERCEPT_FUNCTION(pread); + INTERCEPT_FUNCTION(readv); + INTERCEPT_FUNCTION(pwrite); + INTERCEPT_FUNCTION(writev); INTERCEPT_FUNCTION(fwrite); INTERCEPT_FUNCTION(fclose); INTERCEPT_FUNCTION(fcntl); diff --git a/compiler-rt/lib/rtsan/tests/rtsan_test_interceptors.cpp b/compiler-rt/lib/rtsan/tests/rtsan_test_interceptors.cpp index 5b88cf6461294..f91d694dd1e05 100644 --- a/compiler-rt/lib/rtsan/tests/rtsan_test_interceptors.cpp +++ b/compiler-rt/lib/rtsan/tests/rtsan_test_interceptors.cpp @@ -18,6 +18,8 @@ #if SANITIZER_APPLE #include #include +#include +#include #endif #if SANITIZER_INTERCEPT_MEMALIGN || SANITIZER_INTERCEPT_PVALLOC @@ -33,6 +35,7 @@ #include #include #include +#include using namespace testing; using namespace rtsan_testing; @@ -71,6 +74,12 @@ TEST(TestRtsanInterceptors, MallocDiesWhenRealtime) { ExpectNonRealtimeSurvival(Func); } +TEST(TestRtsanInterceptors, CallocDiesWhenRealtime) { + auto Func = []() { EXPECT_NE(nullptr, calloc(2, 4)); }; + ExpectRealtimeDeath(Func, "calloc"); + ExpectNonRealtimeSurvival(Func); +} + TEST(TestRtsanInterceptors, ReallocDiesWhenRealtime) { void *ptr_1 = malloc(1); auto Func = [ptr_1]() { EXPECT_NE(nullptr, realloc(ptr_1, 8)); }; @@ -274,23 +283,43 @@ TEST_F(RtsanFileTest, FopenDiesWhenRealtime) { ExpectNonRealtimeSurvival(func); } -TEST_F(RtsanFileTest, FreadDiesWhenRealtime) { - auto fd = fopen(GetTemporaryFilePath(), "w"); - auto func = [fd]() { +class RtsanOpenedFileTest : public RtsanFileTest { +protected: + void SetUp() override { + RtsanFileTest::SetUp(); + file = fopen(GetTemporaryFilePath(), "w"); + ASSERT_THAT(file, Ne(nullptr)); + fd = fileno(file); + ASSERT_THAT(fd, Ne(-1)); + } + + void TearDown() override { + if (file != nullptr) + fclose(file); + RtsanFileTest::TearDown(); + } + + FILE *GetOpenFile() { return file; } + + int GetOpenFd() { return fd; } + +private: + FILE *file = nullptr; + int fd = -1; +}; + +TEST_F(RtsanOpenedFileTest, FreadDiesWhenRealtime) { + auto func = [this]() { char c{}; - fread(&c, 1, 1, fd); + fread(&c, 1, 1, GetOpenFile()); }; ExpectRealtimeDeath(func, "fread"); ExpectNonRealtimeSurvival(func); - if (fd != nullptr) - fclose(fd); } -TEST_F(RtsanFileTest, FwriteDiesWhenRealtime) { - auto fd = fopen(GetTemporaryFilePath(), "w"); - ASSERT_NE(nullptr, fd); - auto message = "Hello, world!"; - auto func = [&]() { fwrite(&message, 1, 4, fd); }; +TEST_F(RtsanOpenedFileTest, FwriteDiesWhenRealtime) { + const char *message = "Hello, world!"; + auto func = [&]() { fwrite(&message, 1, 4, GetOpenFile()); }; ExpectRealtimeDeath(func, "fwrite"); ExpectNonRealtimeSurvival(func); } @@ -309,14 +338,66 @@ TEST(TestRtsanInterceptors, PutsDiesWhenRealtime) { ExpectNonRealtimeSurvival(func); } -TEST_F(RtsanFileTest, FputsDiesWhenRealtime) { - auto fd = fopen(GetTemporaryFilePath(), "w"); - ASSERT_THAT(fd, Ne(nullptr)) << errno; - auto func = [fd]() { fputs("Hello, world!\n", fd); }; +TEST_F(RtsanOpenedFileTest, FputsDiesWhenRealtime) { + auto func = [this]() { fputs("Hello, world!\n", GetOpenFile()); }; ExpectRealtimeDeath(func); ExpectNonRealtimeSurvival(func); - if (fd != nullptr) - fclose(fd); +} + +TEST_F(RtsanOpenedFileTest, ReadDiesWhenRealtime) { + auto Func = [this]() { + char c{}; + read(GetOpenFd(), &c, 1); + }; + ExpectRealtimeDeath(Func, "read"); + ExpectNonRealtimeSurvival(Func); +} + +TEST_F(RtsanOpenedFileTest, WriteDiesWhenRealtime) { + auto Func = [this]() { + char c = 'a'; + write(GetOpenFd(), &c, 1); + }; + ExpectRealtimeDeath(Func, "write"); + ExpectNonRealtimeSurvival(Func); +} + +TEST_F(RtsanOpenedFileTest, PreadDiesWhenRealtime) { + auto Func = [this]() { + char c{}; + pread(GetOpenFd(), &c, 1, 0); + }; + ExpectRealtimeDeath(Func, "pread"); + ExpectNonRealtimeSurvival(Func); +} + +TEST_F(RtsanOpenedFileTest, ReadvDiesWhenRealtime) { + auto Func = [this]() { + char c{}; + iovec iov{&c, 1}; + readv(GetOpenFd(), &iov, 1); + }; + ExpectRealtimeDeath(Func, "readv"); + ExpectNonRealtimeSurvival(Func); +} + +TEST_F(RtsanOpenedFileTest, PwriteDiesWhenRealtime) { + auto Func = [this]() { + char c = 'a'; + pwrite(GetOpenFd(), &c, 1, 0); + }; + ExpectRealtimeDeath(Func, "pwrite"); + ExpectNonRealtimeSurvival(Func); +} + +TEST_F(RtsanOpenedFileTest, WritevDiesWhenRealtime) { + auto Func = [this]() { + char c = 'a'; + iovec iov{&c, 1}; + writev(GetOpenFd(), &iov, 1); + }; + ExpectRealtimeDeath(Func, "writev"); + ExpectNonRealtimeSurvival(Func); } /* diff --git a/compiler-rt/lib/scudo/standalone/secondary.h b/compiler-rt/lib/scudo/standalone/secondary.h index 27f8697db7838..fba21c3cb6a09 100644 --- a/compiler-rt/lib/scudo/standalone/secondary.h +++ b/compiler-rt/lib/scudo/standalone/secondary.h @@ -72,6 +72,15 @@ namespace { struct CachedBlock { static constexpr u16 CacheIndexMax = UINT16_MAX; static constexpr u16 InvalidEntry = CacheIndexMax; + // * MaxReleasedCachePages default is currently 4 + // - We arrived at this value after noticing that mapping + // in larger memory regions performs better than releasing + // memory and forcing a cache hit. According to the data, + // it suggests that beyond 4 pages, the release execution time is + // longer than the map execution time. In this way, the default + // is dependent on the platform. + // TODO: set MaxReleasedCachePages back to 4U + static constexpr uptr MaxReleasedCachePages = 0U; uptr CommitBase = 0; uptr CommitSize = 0; @@ -90,8 +99,9 @@ struct CachedBlock { template class MapAllocatorNoCache { public: void init(UNUSED s32 ReleaseToOsInterval) {} - CachedBlock retrieve(UNUSED uptr Size, UNUSED uptr Alignment, - UNUSED uptr HeadersSize, UNUSED uptr &EntryHeaderPos) { + CachedBlock retrieve(UNUSED uptr MaxAllowedFragmentedBytes, UNUSED uptr Size, + UNUSED uptr Alignment, UNUSED uptr HeadersSize, + UNUSED uptr &EntryHeaderPos) { return {}; } void store(UNUSED Options Options, UNUSED uptr CommitBase, @@ -121,7 +131,7 @@ template class MapAllocatorNoCache { } }; -static const uptr MaxUnusedCachePages = 4U; +static const uptr MaxUnreleasedCachePages = 4U; template bool mapSecondary(const Options &Options, uptr CommitBase, uptr CommitSize, @@ -151,9 +161,11 @@ bool mapSecondary(const Options &Options, uptr CommitBase, uptr CommitSize, } } - const uptr MaxUnusedCacheBytes = MaxUnusedCachePages * PageSize; - if (useMemoryTagging(Options) && CommitSize > MaxUnusedCacheBytes) { - const uptr UntaggedPos = Max(AllocPos, CommitBase + MaxUnusedCacheBytes); + const uptr MaxUnreleasedCacheBytes = MaxUnreleasedCachePages * PageSize; + if (useMemoryTagging(Options) && + CommitSize > MaxUnreleasedCacheBytes) { + const uptr UntaggedPos = + Max(AllocPos, CommitBase + MaxUnreleasedCacheBytes); return MemMap.remap(CommitBase, UntaggedPos - CommitBase, "scudo:secondary", MAP_MEMTAG | Flags) && MemMap.remap(UntaggedPos, CommitBase + CommitSize - UntaggedPos, @@ -334,13 +346,13 @@ class MapAllocatorCache { } } - CachedBlock retrieve(uptr Size, uptr Alignment, uptr HeadersSize, - uptr &EntryHeaderPos) EXCLUDES(Mutex) { + CachedBlock retrieve(uptr MaxAllowedFragmentedPages, uptr Size, + uptr Alignment, uptr HeadersSize, uptr &EntryHeaderPos) + EXCLUDES(Mutex) { const uptr PageSize = getPageSizeCached(); // 10% of the requested size proved to be the optimal choice for // retrieving cached blocks after testing several options. constexpr u32 FragmentedBytesDivisor = 10; - bool Found = false; CachedBlock Entry; EntryHeaderPos = 0; { @@ -348,47 +360,100 @@ class MapAllocatorCache { CallsToRetrieve++; if (EntriesCount == 0) return {}; - u32 OptimalFitIndex = 0; + u16 RetrievedIndex = CachedBlock::InvalidEntry; uptr MinDiff = UINTPTR_MAX; - for (u32 I = LRUHead; I != CachedBlock::InvalidEntry; + + // Since allocation sizes don't always match cached memory chunk sizes + // we allow some memory to be unused (called fragmented bytes). The + // amount of unused bytes is exactly EntryHeaderPos - CommitBase. + // + // CommitBase CommitBase + CommitSize + // V V + // +---+------------+-----------------+---+ + // | | | | | + // +---+------------+-----------------+---+ + // ^ ^ ^ + // Guard EntryHeaderPos Guard-page-end + // page-begin + // + // [EntryHeaderPos, CommitBase + CommitSize) contains the user data as + // well as the header metadata. If EntryHeaderPos - CommitBase exceeds + // MaxAllowedFragmentedPages * PageSize, the cached memory chunk is + // not considered valid for retrieval. + for (u16 I = LRUHead; I != CachedBlock::InvalidEntry; I = Entries[I].Next) { const uptr CommitBase = Entries[I].CommitBase; const uptr CommitSize = Entries[I].CommitSize; const uptr AllocPos = roundDown(CommitBase + CommitSize - Size, Alignment); const uptr HeaderPos = AllocPos - HeadersSize; + const uptr MaxAllowedFragmentedBytes = + MaxAllowedFragmentedPages * PageSize; if (HeaderPos > CommitBase + CommitSize) continue; + // TODO: Remove AllocPos > CommitBase + MaxAllowedFragmentedBytes + // and replace with Diff > MaxAllowedFragmentedBytes if (HeaderPos < CommitBase || - AllocPos > CommitBase + PageSize * MaxUnusedCachePages) { + AllocPos > CommitBase + MaxAllowedFragmentedBytes) { continue; } - Found = true; - const uptr Diff = HeaderPos - CommitBase; - // immediately use a cached block if it's size is close enough to the - // requested size. - const uptr MaxAllowedFragmentedBytes = - (CommitBase + CommitSize - HeaderPos) / FragmentedBytesDivisor; - if (Diff <= MaxAllowedFragmentedBytes) { - OptimalFitIndex = I; - EntryHeaderPos = HeaderPos; - break; - } - // keep track of the smallest cached block + + const uptr Diff = roundDown(HeaderPos, PageSize) - CommitBase; + + // Keep track of the smallest cached block // that is greater than (AllocSize + HeaderSize) - if (Diff > MinDiff) + if (Diff >= MinDiff) continue; - OptimalFitIndex = I; + MinDiff = Diff; + RetrievedIndex = I; EntryHeaderPos = HeaderPos; + + // Immediately use a cached block if its size is close enough to the + // requested size + const uptr OptimalFitThesholdBytes = + (CommitBase + CommitSize - HeaderPos) / FragmentedBytesDivisor; + if (Diff <= OptimalFitThesholdBytes) + break; } - if (Found) { - Entry = Entries[OptimalFitIndex]; - remove(OptimalFitIndex); + if (RetrievedIndex != CachedBlock::InvalidEntry) { + Entry = Entries[RetrievedIndex]; + remove(RetrievedIndex); SuccessfulRetrieves++; } } + // The difference between the retrieved memory chunk and the request + // size is at most MaxAllowedFragmentedPages + // + // +- MaxAllowedFragmentedPages * PageSize -+ + // +--------------------------+-------------+ + // | | | + // +--------------------------+-------------+ + // \ Bytes to be released / ^ + // | + // (may or may not be committed) + // + // The maximum number of bytes released to the OS is capped by + // MaxReleasedCachePages + // + // TODO : Consider making MaxReleasedCachePages configurable since + // the release to OS API can vary across systems. + if (Entry.Time != 0) { + const uptr FragmentedBytes = + roundDown(EntryHeaderPos, PageSize) - Entry.CommitBase; + const uptr MaxUnreleasedCacheBytes = MaxUnreleasedCachePages * PageSize; + if (FragmentedBytes > MaxUnreleasedCacheBytes) { + const uptr MaxReleasedCacheBytes = + CachedBlock::MaxReleasedCachePages * PageSize; + uptr BytesToRelease = + roundUp(Min(MaxReleasedCacheBytes, + FragmentedBytes - MaxUnreleasedCacheBytes), + PageSize); + Entry.MemMap.releaseAndZeroPagesToOS(Entry.CommitBase, BytesToRelease); + } + } + return Entry; } @@ -659,8 +724,13 @@ MapAllocator::tryAllocateFromCache(const Options &Options, uptr Size, FillContentsMode FillContents) { CachedBlock Entry; uptr EntryHeaderPos; + uptr MaxAllowedFragmentedPages = MaxUnreleasedCachePages; + + if (UNLIKELY(useMemoryTagging(Options))) + MaxAllowedFragmentedPages += CachedBlock::MaxReleasedCachePages; - Entry = Cache.retrieve(Size, Alignment, getHeadersSize(), EntryHeaderPos); + Entry = Cache.retrieve(MaxAllowedFragmentedPages, Size, Alignment, + getHeadersSize(), EntryHeaderPos); if (!Entry.isValid()) return nullptr; diff --git a/compiler-rt/lib/scudo/standalone/tests/secondary_test.cpp b/compiler-rt/lib/scudo/standalone/tests/secondary_test.cpp index e85b6abdb36d2..3638f1c36ddd9 100644 --- a/compiler-rt/lib/scudo/standalone/tests/secondary_test.cpp +++ b/compiler-rt/lib/scudo/standalone/tests/secondary_test.cpp @@ -281,8 +281,8 @@ struct MapAllocatorCacheTest : public Test { std::unique_ptr Cache = std::make_unique(); const scudo::uptr PageSize = scudo::getPageSizeCached(); - // The current test allocation size is set to the minimum size - // needed for the scudo allocator to fall back to the secondary allocator + // The current test allocation size is set to the maximum + // cache entry size static constexpr scudo::uptr TestAllocSize = CacheConfig::getDefaultMaxEntrySize(); @@ -327,7 +327,7 @@ TEST_F(MapAllocatorCacheTest, CacheOrder) { for (scudo::uptr I = CacheConfig::getEntriesArraySize(); I > 0; I--) { scudo::uptr EntryHeaderPos; scudo::CachedBlock Entry = - Cache->retrieve(TestAllocSize, PageSize, 0, EntryHeaderPos); + Cache->retrieve(0, TestAllocSize, PageSize, 0, EntryHeaderPos); EXPECT_EQ(Entry.MemMap.getBase(), MemMaps[I - 1].getBase()); } @@ -336,6 +336,30 @@ TEST_F(MapAllocatorCacheTest, CacheOrder) { MemMap.unmap(); } +TEST_F(MapAllocatorCacheTest, PartialChunkHeuristicRetrievalTest) { + const scudo::uptr FragmentedPages = + 1 + scudo::CachedBlock::MaxReleasedCachePages; + scudo::uptr EntryHeaderPos; + scudo::CachedBlock Entry; + scudo::MemMapT MemMap = allocate(PageSize + FragmentedPages * PageSize); + Cache->store(Options, MemMap.getBase(), MemMap.getCapacity(), + MemMap.getBase(), MemMap); + + // FragmentedPages > MaxAllowedFragmentedPages so PageSize + // cannot be retrieved from the cache + Entry = Cache->retrieve(/*MaxAllowedFragmentedPages=*/0, PageSize, PageSize, + 0, EntryHeaderPos); + EXPECT_FALSE(Entry.isValid()); + + // FragmentedPages == MaxAllowedFragmentedPages so PageSize + // can be retrieved from the cache + Entry = + Cache->retrieve(FragmentedPages, PageSize, PageSize, 0, EntryHeaderPos); + EXPECT_TRUE(Entry.isValid()); + + MemMap.unmap(); +} + TEST_F(MapAllocatorCacheTest, MemoryLeakTest) { std::vector MemMaps; // Fill the cache above MaxEntriesCount to force an eviction @@ -351,7 +375,7 @@ TEST_F(MapAllocatorCacheTest, MemoryLeakTest) { for (scudo::uptr I = CacheConfig::getDefaultMaxEntriesCount(); I > 0; I--) { scudo::uptr EntryHeaderPos; RetrievedEntries.push_back( - Cache->retrieve(TestAllocSize, PageSize, 0, EntryHeaderPos)); + Cache->retrieve(0, TestAllocSize, PageSize, 0, EntryHeaderPos)); EXPECT_EQ(MemMaps[I].getBase(), RetrievedEntries.back().MemMap.getBase()); } diff --git a/compiler-rt/lib/scudo/standalone/tests/timing_test.cpp b/compiler-rt/lib/scudo/standalone/tests/timing_test.cpp index 23f0a02ea4277..a762aee48f7c6 100644 --- a/compiler-rt/lib/scudo/standalone/tests/timing_test.cpp +++ b/compiler-rt/lib/scudo/standalone/tests/timing_test.cpp @@ -154,7 +154,7 @@ TEST_F(ScudoTimingTest, VerifyMax) { unsigned long long MaxNs = std::strtoull(&end[6], &end, 10); ASSERT_TRUE(end != nullptr); - EXPECT_GT(MaxNs, AvgNs); + EXPECT_GE(MaxNs, AvgNs); } TEST_F(ScudoTimingTest, VerifyMultipleTimerCalls) { diff --git a/compiler-rt/test/asan/TestCases/Linux/allocator_oom_test.cpp b/compiler-rt/test/asan/TestCases/Linux/allocator_oom_test.cpp index b096624a7f95b..f60a6a4ef79e6 100644 --- a/compiler-rt/test/asan/TestCases/Linux/allocator_oom_test.cpp +++ b/compiler-rt/test/asan/TestCases/Linux/allocator_oom_test.cpp @@ -31,7 +31,7 @@ // ASan shadow memory on s390 is too large for this test. // AArch64 bots fail on this test. // TODO(alekseys): Android lit do not run ulimit on device. -// REQUIRES: shadow-scale-3 +// REQUIRES: shell, shadow-scale-3 // UNSUPPORTED: android, target={{(s390|aarch64|powerpc64le).*}} #include diff --git a/compiler-rt/test/asan/TestCases/Posix/deep_call_stack.cpp b/compiler-rt/test/asan/TestCases/Posix/deep_call_stack.cpp index e6e82a4757205..37aa7b11a231a 100644 --- a/compiler-rt/test/asan/TestCases/Posix/deep_call_stack.cpp +++ b/compiler-rt/test/asan/TestCases/Posix/deep_call_stack.cpp @@ -1,4 +1,5 @@ // Check that UAR mode can handle very deep recusrion. +// REQUIRES: shell // RUN: %clangxx_asan -O2 %s -o %t // RUN: ulimit -s 4096 // RUN: %env_asan_opts=detect_stack_use_after_return=1 %run %t 2>&1 | FileCheck %s diff --git a/compiler-rt/test/fuzzer/focus-function.test b/compiler-rt/test/fuzzer/focus-function.test index ec4a03c95a635..64fd5eebb2389 100644 --- a/compiler-rt/test/fuzzer/focus-function.test +++ b/compiler-rt/test/fuzzer/focus-function.test @@ -1,7 +1,8 @@ # Tests -focus_function # # TODO: don't require linux. -# REQUIRES: linux +# Requires full shell support for the `for` loop syntax. +# REQUIRES: shell, linux UNSUPPORTED: target=aarch64{{.*}} RUN: %cpp_compiler %S/OnlySomeBytesTest.cpp -o %t-exe diff --git a/compiler-rt/test/fuzzer/merge-posix.test b/compiler-rt/test/fuzzer/merge-posix.test index 9fece647ca60b..2721668fb9706 100644 --- a/compiler-rt/test/fuzzer/merge-posix.test +++ b/compiler-rt/test/fuzzer/merge-posix.test @@ -1,5 +1,5 @@ +REQUIRES: shell XFAIL: ios -UNSUPPORTED: target={{.*windows.*}} RUN: %cpp_compiler %S/FullCoverageSetTest.cpp -o %t-FullCoverageSetTest RUN: rm -rf %tmp/T1 %tmp/T2 diff --git a/compiler-rt/test/fuzzer/out-of-process-fuzz.test b/compiler-rt/test/fuzzer/out-of-process-fuzz.test index 4bd866061f1fc..d239bfac1b9cb 100644 --- a/compiler-rt/test/fuzzer/out-of-process-fuzz.test +++ b/compiler-rt/test/fuzzer/out-of-process-fuzz.test @@ -14,8 +14,8 @@ RUN: echo %t # Out-of-process fuzzing with this rig is slow, # we can not wait for the fuzzer to find the faulty input. # Just run for a bit and observe the corpus expansion. -RUN: LIBFUZZER_OOP_TARGET="./oop-target > /dev/null 2>&1 " ./oop-fuzzer -max_len=3 OOP_CORPUS -runs=1000 -jobs=4 -CHECK: Running: OOP_CORPUS/ -CHECK: Running: OOP_CORPUS/ -CHECK: Running: OOP_CORPUS/ +RUN: env LIBFUZZER_OOP_TARGET="./oop-target > /dev/null 2>&1 " ./oop-fuzzer -max_len=3 OOP_CORPUS -runs=1000 -jobs=4 +CHECK: Running: {{.*}}OOP_CORPUS/ +CHECK: Running: {{.*}}OOP_CORPUS/ +CHECK: Running: {{.*}}OOP_CORPUS/ RUN: ./oop-target OOP_CORPUS/* 2>&1 | FileCheck %s diff --git a/compiler-rt/test/fuzzer/ulimit.test b/compiler-rt/test/fuzzer/ulimit.test index 223f2ac9bb6e2..e330a97cc07c5 100644 --- a/compiler-rt/test/fuzzer/ulimit.test +++ b/compiler-rt/test/fuzzer/ulimit.test @@ -1,5 +1,4 @@ -# FIXME: Disabled on Windows for now because Windows has no ulimit command. -UNSUPPORTED: target={{.*windows.*}} +REQUIRES: shell RUN: %cpp_compiler %S/SimpleTest.cpp -o %t-SimpleTest RUN: ulimit -s 1000 RUN: not %run %t-SimpleTest diff --git a/compiler-rt/test/hwasan/TestCases/hwasan_symbolize.cpp b/compiler-rt/test/hwasan/TestCases/hwasan_symbolize.cpp index 6e28e19122317..41dd0902b794c 100644 --- a/compiler-rt/test/hwasan/TestCases/hwasan_symbolize.cpp +++ b/compiler-rt/test/hwasan/TestCases/hwasan_symbolize.cpp @@ -1,8 +1,9 @@ -// RUN: %clang_hwasan -Wl,--build-id -g %s -o %t -// RUN: echo '[{"prefix": "'"$(realpath $(dirname %s))"'/", "link": "http://test.invalid/{file}:{line}"}]' > %t.linkify -// RUN: %env_hwasan_opts=symbolize=0 not %run %t 2>&1 | hwasan_symbolize --html --symbols $(dirname %t) --index | FileCheck %s -// RUN: %env_hwasan_opts=symbolize=0 not %run %t 2>&1 | hwasan_symbolize --html --linkify %t.linkify --symbols $(dirname %t) --index | FileCheck --check-prefixes=CHECK,LINKIFY %s -// RUN: %env_hwasan_opts=symbolize=0 not %run %t 2>&1 | hwasan_symbolize --symbols $(dirname %t) --index | FileCheck %s +// RUN: rm -rf %t && mkdir -p %t +// RUN: %clang_hwasan -Wl,--build-id -g %s -o %t/hwasan_symbolize_test +// RUN: echo '[{"prefix": "'"%S"'/", "link": "http://test.invalid/{file}:{line}"}]' > %t/hwasan_symbolize_test.linkify +// RUN: %env_hwasan_opts=symbolize=0 not %run %t/hwasan_symbolize_test 2>&1 | hwasan_symbolize --html --symbols %t --index | FileCheck %s +// RUN: %env_hwasan_opts=symbolize=0 not %run %t/hwasan_symbolize_test 2>&1 | hwasan_symbolize --html --linkify %t/hwasan_symbolize_test.linkify --symbols %t --index | FileCheck --check-prefixes=CHECK,LINKIFY %s +// RUN: %env_hwasan_opts=symbolize=0 not %run %t/hwasan_symbolize_test 2>&1 | hwasan_symbolize --symbols %t --index | FileCheck %s #include #include diff --git a/compiler-rt/test/hwasan/TestCases/print-memory-usage.c b/compiler-rt/test/hwasan/TestCases/print-memory-usage.c index 2c89d4e70ebc7..13652fbd921b0 100644 --- a/compiler-rt/test/hwasan/TestCases/print-memory-usage.c +++ b/compiler-rt/test/hwasan/TestCases/print-memory-usage.c @@ -1,4 +1,5 @@ // Tests __hwasan_print_memory_usage. +// REQUIRES: shell // RUN: %clang_hwasan %s -o %t // RUN: ulimit -s 1000 // RUN: %run %t 2>&1 | FileCheck %s diff --git a/compiler-rt/test/msan/Linux/reexec_unlimited_stack.cpp b/compiler-rt/test/msan/Linux/reexec_unlimited_stack.cpp index 61492ec34533f..8dee27047470e 100644 --- a/compiler-rt/test/msan/Linux/reexec_unlimited_stack.cpp +++ b/compiler-rt/test/msan/Linux/reexec_unlimited_stack.cpp @@ -1,5 +1,6 @@ // MSAN re-execs on unlimited stacks. We use that to verify ReExec() uses the // right path. +// REQUIRES: shell // RUN: %clangxx_msan -O0 %s -o %t && ulimit -s unlimited && %run %t | FileCheck %s #include diff --git a/compiler-rt/test/profile/Linux/counter_promo_for.c b/compiler-rt/test/profile/Linux/counter_promo_for.c index aa77e6084bf85..f59f3e4b34a26 100644 --- a/compiler-rt/test/profile/Linux/counter_promo_for.c +++ b/compiler-rt/test/profile/Linux/counter_promo_for.c @@ -12,7 +12,9 @@ // RUN: %run %t.nopromo.gen // RUN: llvm-profdata merge -o %t.nopromo.profdata %t.nopromo.prof/ // RUN: llvm-profdata show --counts --all-functions %t.nopromo.profdata > %t.nopromo.dump -// RUN: diff <(llvm-profdata show %t.promo.profdata) <(llvm-profdata show %t.nopromo.profdata) +// RUN: llvm-profdata show %t.promo.profdata > %t.promo.dump +// RUN: llvm-profdata show %t.nopromo.profdata > %t.nopromo.dump +// RUN: diff %t.promo.dump %t.nopromo.dump int g; __attribute__((noinline)) void bar(int i) { g += i; } diff --git a/compiler-rt/test/profile/Linux/counter_promo_nest.c b/compiler-rt/test/profile/Linux/counter_promo_nest.c index ac32d16d706ba..a893108c96e37 100644 --- a/compiler-rt/test/profile/Linux/counter_promo_nest.c +++ b/compiler-rt/test/profile/Linux/counter_promo_nest.c @@ -10,7 +10,9 @@ // RUN: %run %t.nopromo.gen // RUN: llvm-profdata merge -o %t.nopromo.profdata %t.nopromo.prof/ // RUN: llvm-profdata show --counts --all-functions %t.nopromo.profdata > %t.nopromo.dump -// RUN: diff <(llvm-profdata show %t.promo.profdata) <(llvm-profdata show %t.nopromo.profdata) +// RUN: llvm-profdata show %t.promo.profdata > %t.promo.dump +// RUN: llvm-profdata show %t.nopromo.profdata > %t.nopromo.dump +// RUN: diff %t.promo.dump %t.nopromo.dump int g; __attribute__((noinline)) void bar() { g++; diff --git a/compiler-rt/test/profile/Linux/counter_promo_while.c b/compiler-rt/test/profile/Linux/counter_promo_while.c index c6ea3a7282d42..5471571392682 100644 --- a/compiler-rt/test/profile/Linux/counter_promo_while.c +++ b/compiler-rt/test/profile/Linux/counter_promo_while.c @@ -12,7 +12,9 @@ // RUN: %run %t.nopromo.gen // RUN: llvm-profdata merge -o %t.nopromo.profdata %t.nopromo.prof/ // RUN: llvm-profdata show --counts --all-functions %t.nopromo.profdata > %t.nopromo.dump -// RUN: diff <(llvm-profdata show %t.promo.profdata) <(llvm-profdata show %t.nopromo.profdata) +// RUN: llvm-profdata show %t.promo.profdata > %t.promo.dump +// RUN: llvm-profdata show %t.nopromo.profdata > %t.nopromo.dump +// RUN: diff %t.promo.dump %t.nopromo.dump int g; __attribute__((noinline)) void bar(int i) { g += i; } __attribute__((noinline)) void foo(int n, int N) { diff --git a/compiler-rt/test/profile/Linux/instrprof-debug-info-correlate.c b/compiler-rt/test/profile/Linux/instrprof-debug-info-correlate.c index a918d7b629900..426426d9a05a2 100644 --- a/compiler-rt/test/profile/Linux/instrprof-debug-info-correlate.c +++ b/compiler-rt/test/profile/Linux/instrprof-debug-info-correlate.c @@ -7,13 +7,17 @@ // RUN: env LLVM_PROFILE_FILE=%t.d4.proflite %run %t.d4 // RUN: llvm-profdata merge -o %t.d4.profdata --debug-info=%t.d4 %t.d4.proflite -// RUN: diff <(llvm-profdata show --all-functions --counts %t.normal.profdata) <(llvm-profdata show --all-functions --counts %t.d4.profdata) +// RUN: llvm-profdata show --all-functions --counts %t.normal.profdata > %t.normal.dump +// RUN: llvm-profdata show --all-functions --counts %t.d4.profdata > %t.d4.dump +// RUN: diff %t.normal.dump %t.d4.dump // RUN: %clang_pgogen -o %t -g -mllvm --debug-info-correlate -mllvm --disable-vp=true %S/../Inputs/instrprof-debug-info-correlate-main.cpp %S/../Inputs/instrprof-debug-info-correlate-foo.cpp // RUN: env LLVM_PROFILE_FILE=%t.proflite %run %t // RUN: llvm-profdata merge -o %t.profdata --debug-info=%t %t.proflite -// RUN: diff <(llvm-profdata show --all-functions --counts %t.normal.profdata) <(llvm-profdata show --all-functions --counts %t.profdata) +// RUN: llvm-profdata show --all-functions --counts %t.normal.profdata > %t.normal2.dump +// RUN: llvm-profdata show --all-functions --counts %t.profdata > %t.prof.dump +// RUN: diff %t.normal2.dump %t.prof.dump // RUN: %clang_pgogen -o %t.cov -g -mllvm --debug-info-correlate -mllvm -pgo-function-entry-coverage -mllvm --disable-vp=true %S/../Inputs/instrprof-debug-info-correlate-main.cpp %S/../Inputs/instrprof-debug-info-correlate-foo.cpp // RUN: env LLVM_PROFILE_FILE=%t.cov.proflite %run %t.cov @@ -23,7 +27,9 @@ // RUN: env LLVM_PROFILE_FILE=%t.cov.profraw %run %t.cov.normal // RUN: llvm-profdata merge -o %t.cov.normal.profdata %t.cov.profraw -// RUN: diff <(llvm-profdata show --all-functions --counts %t.cov.normal.profdata) <(llvm-profdata show --all-functions --counts %t.cov.profdata) +// RUN: llvm-profdata show --all-functions --counts %t.cov.normal.profdata > %t.cov.normal.dump +// RUN: llvm-profdata show --all-functions --counts %t.cov.profdata > %t.cov.dump +// RUN: diff %t.cov.normal.dump %t.cov.dump // Test debug info correlate with online merging. @@ -36,11 +42,15 @@ // RUN: env LLVM_PROFILE_FILE=%t.profdir/%m.proflite %run %t // RUN: llvm-profdata merge -o %t.profdata --debug-info=%t %t.profdir/ -// RUN: diff <(llvm-profdata show --all-functions --counts %t.normal.profdata) <(llvm-profdata show --all-functions --counts %t.profdata) +// RUN: llvm-profdata show --all-functions --counts %t.normal.profdata > %t.normal3.dump +// RUN: llvm-profdata show --all-functions --counts %t.profdata > %t.prof3.dump +// RUN: diff %t.normal3.dump %t.prof3.dump // RUN: rm -rf %t.profdir && mkdir %t.profdir // RUN: env LLVM_PROFILE_FILE=%t.profdir/%m.cov.proflite %run %t.cov // RUN: env LLVM_PROFILE_FILE=%t.profdir/%m.cov.proflite %run %t.cov // RUN: llvm-profdata merge -o %t.cov.profdata --debug-info=%t.cov %t.profdir/ -// RUN: diff <(llvm-profdata show --all-functions --counts %t.cov.normal.profdata) <(llvm-profdata show --all-functions --counts %t.cov.profdata) +// RUN: llvm-profdata show --all-functions --counts %t.cov.normal.profdata > %t.cov.normal2.dump +// RUN: llvm-profdata show --all-functions --counts %t.cov.profdata > %t.cov2.dump +// RUN: diff %t.cov.normal2.dump %t.cov2.dump diff --git a/compiler-rt/test/ubsan/TestCases/TypeCheck/vptr.cpp b/compiler-rt/test/ubsan/TestCases/TypeCheck/vptr.cpp index 2d0e48cd84f3c..4f2a5e65005ed 100644 --- a/compiler-rt/test/ubsan/TestCases/TypeCheck/vptr.cpp +++ b/compiler-rt/test/ubsan/TestCases/TypeCheck/vptr.cpp @@ -23,7 +23,7 @@ // RUN: %env_ubsan_opts=halt_on_error=1 not %run %t nN 2>&1 | FileCheck %s --check-prefix=CHECK-NULL-MEMFUN --strict-whitespace // RUN: %env_ubsan_opts=print_stacktrace=1 %run %t dT 2>&1 | FileCheck %s --check-prefix=CHECK-DYNAMIC --allow-unused-prefixes --check-prefix=CHECK-%os-DYNAMIC --strict-whitespace -// RUN: (echo "vptr_check:S"; echo "vptr_check:T"; echo "vptr_check:U") > %t.supp +// RUN: echo -e "vptr_check:S\nvptr_check:T\nvptr_check:U" > %t.supp // RUN: %env_ubsan_opts=halt_on_error=1:suppressions='"%t.supp"' %run %t mS // RUN: %env_ubsan_opts=halt_on_error=1:suppressions='"%t.supp"' %run %t fS // RUN: %env_ubsan_opts=halt_on_error=1:suppressions='"%t.supp"' %run %t cS diff --git a/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td b/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td index e95af629ef32f..f643674f1d5d6 100644 --- a/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td +++ b/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td @@ -161,10 +161,11 @@ def cuf_DataTransferOp : cuf_Op<"data_transfer", []> { let arguments = (ins Arg:$src, Arg:$dst, + Optional:$shape, cuf_DataTransferKindAttr:$transfer_kind); let assemblyFormat = [{ - $src `to` $dst attr-dict `:` type(operands) + $src `to` $dst (`,` $shape^ `:` type($shape) )? attr-dict `:` type($src) `,` type($dst) }]; let hasVerifier = 1; diff --git a/flang/include/flang/Optimizer/Dialect/FIRType.h b/flang/include/flang/Optimizer/Dialect/FIRType.h index 3498a329ced30..68e03eab7268b 100644 --- a/flang/include/flang/Optimizer/Dialect/FIRType.h +++ b/flang/include/flang/Optimizer/Dialect/FIRType.h @@ -487,11 +487,18 @@ std::string getTypeAsString(mlir::Type ty, const KindMapping &kindMap, /// target dependent type size inquiries in lowering. It would also not be /// straightforward given the need for a kind map that would need to be /// converted in terms of mlir::DataLayoutEntryKey. + +/// This variant terminates the compilation if an unsupported type is passed. std::pair +getTypeSizeAndAlignmentOrCrash(mlir::Location loc, mlir::Type ty, + const mlir::DataLayout &dl, + const fir::KindMapping &kindMap); + +/// This variant returns std::nullopt if an unsupported type is passed. +std::optional> getTypeSizeAndAlignment(mlir::Location loc, mlir::Type ty, const mlir::DataLayout &dl, const fir::KindMapping &kindMap); - } // namespace fir #endif // FORTRAN_OPTIMIZER_DIALECT_FIRTYPE_H diff --git a/flang/include/flang/Runtime/numeric.h b/flang/include/flang/Runtime/numeric.h index e051e86431663..6e1979790e3c6 100644 --- a/flang/include/flang/Runtime/numeric.h +++ b/flang/include/flang/Runtime/numeric.h @@ -377,6 +377,8 @@ CppTypeFor RTDECL(SelectedCharKind)( // SELECTED_INT_KIND CppTypeFor RTDECL(SelectedIntKind)( const char *, int, void *, int); +CppTypeFor RTDECL(SelectedIntKindMasked)( + const char *, int, void *, int, int); // SELECTED_LOGICAL_KIND CppTypeFor RTDECL(SelectedLogicalKind)( @@ -385,6 +387,8 @@ CppTypeFor RTDECL(SelectedLogicalKind)( // SELECTED_REAL_KIND CppTypeFor RTDECL(SelectedRealKind)( const char *, int, void *, int, void *, int, void *, int); +CppTypeFor RTDECL(SelectedRealKindMasked)( + const char *, int, void *, int, void *, int, void *, int, int); // SPACING CppTypeFor RTDECL(Spacing4)( diff --git a/flang/lib/Frontend/FrontendActions.cpp b/flang/lib/Frontend/FrontendActions.cpp index 5c86bd947ce73..c9991ff18d175 100644 --- a/flang/lib/Frontend/FrontendActions.cpp +++ b/flang/lib/Frontend/FrontendActions.cpp @@ -323,6 +323,7 @@ bool CodeGenAction::beginSourceFileAction() { // run the default passes. mlir::PassManager pm((*mlirModule)->getName(), mlir::OpPassManager::Nesting::Implicit); + (void)mlir::applyPassManagerCLOptions(pm); // Add OpenMP-related passes // WARNING: These passes must be run immediately after the lowering to ensure // that the FIR is correct with respect to OpenMP operations/attributes. diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp index ccbb481f472d8..24cd6b22b8925 100644 --- a/flang/lib/Lower/Bridge.cpp +++ b/flang/lib/Lower/Bridge.cpp @@ -4272,18 +4272,19 @@ class FirConverter : public Fortran::lower::AbstractConverter { base = convertOp.getValue(); // Special case if the rhs is a constant. if (matchPattern(base.getDefiningOp(), mlir::m_Constant())) { - builder.create(loc, base, lhsVal, - transferKindAttr); + builder.create( + loc, base, lhsVal, /*shape=*/mlir::Value{}, transferKindAttr); } else { auto associate = hlfir::genAssociateExpr( loc, builder, rhs, rhs.getType(), ".cuf_host_tmp"); builder.create(loc, associate.getBase(), lhsVal, + /*shape=*/mlir::Value{}, transferKindAttr); builder.create(loc, associate); } } else { - builder.create(loc, rhsVal, lhsVal, - transferKindAttr); + builder.create( + loc, rhsVal, lhsVal, /*shape=*/mlir::Value{}, transferKindAttr); } return; } @@ -4293,6 +4294,7 @@ class FirConverter : public Fortran::lower::AbstractConverter { auto transferKindAttr = cuf::DataTransferKindAttr::get( builder.getContext(), cuf::DataTransferKind::DeviceHost); builder.create(loc, rhsVal, lhsVal, + /*shape=*/mlir::Value{}, transferKindAttr); return; } @@ -4303,6 +4305,7 @@ class FirConverter : public Fortran::lower::AbstractConverter { auto transferKindAttr = cuf::DataTransferKindAttr::get( builder.getContext(), cuf::DataTransferKind::DeviceDevice); builder.create(loc, rhsVal, lhsVal, + /*shape=*/mlir::Value{}, transferKindAttr); return; } @@ -4346,8 +4349,8 @@ class FirConverter : public Fortran::lower::AbstractConverter { addSymbol(sym, hlfir::translateToExtendedValue(loc, builder, temp).first, /*forced=*/true); - builder.create(loc, addr, temp, - transferKindAttr); + builder.create( + loc, addr, temp, /*shape=*/mlir::Value{}, transferKindAttr); ++nbDeviceResidentObject; } } diff --git a/flang/lib/Optimizer/CodeGen/Target.cpp b/flang/lib/Optimizer/CodeGen/Target.cpp index 25141102a8c43..7bc730bff76fe 100644 --- a/flang/lib/Optimizer/CodeGen/Target.cpp +++ b/flang/lib/Optimizer/CodeGen/Target.cpp @@ -431,8 +431,8 @@ struct TargetX86_64 : public GenericTarget { return byteOffset; } mlir::Type compType = component.second; - auto [compSize, compAlign] = - fir::getTypeSizeAndAlignment(loc, compType, getDataLayout(), kindMap); + auto [compSize, compAlign] = fir::getTypeSizeAndAlignmentOrCrash( + loc, compType, getDataLayout(), kindMap); byteOffset = llvm::alignTo(byteOffset, compAlign); ArgClass LoComp, HiComp; classify(loc, compType, byteOffset, LoComp, HiComp); @@ -452,8 +452,8 @@ struct TargetX86_64 : public GenericTarget { ArgClass &Hi) const { mlir::Type eleTy = seqTy.getEleTy(); const std::uint64_t arraySize = seqTy.getConstantArraySize(); - auto [eleSize, eleAlign] = - fir::getTypeSizeAndAlignment(loc, eleTy, getDataLayout(), kindMap); + auto [eleSize, eleAlign] = fir::getTypeSizeAndAlignmentOrCrash( + loc, eleTy, getDataLayout(), kindMap); std::uint64_t eleStorageSize = llvm::alignTo(eleSize, eleAlign); for (std::uint64_t i = 0; i < arraySize; ++i) { byteOffset = llvm::alignTo(byteOffset, eleAlign); @@ -641,7 +641,7 @@ struct TargetX86_64 : public GenericTarget { mlir::Type ty) const { CodeGenSpecifics::Marshalling marshal; auto sizeAndAlign = - fir::getTypeSizeAndAlignment(loc, ty, getDataLayout(), kindMap); + fir::getTypeSizeAndAlignmentOrCrash(loc, ty, getDataLayout(), kindMap); // The stack is always 8 byte aligned (note 14 in 3.2.3). unsigned short align = std::max(sizeAndAlign.second, static_cast(8)); diff --git a/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp b/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp index f7b36b208a7de..3b4ad95cafe6b 100644 --- a/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp +++ b/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp @@ -99,6 +99,11 @@ llvm::LogicalResult cuf::AllocateOp::verify() { llvm::LogicalResult cuf::DataTransferOp::verify() { mlir::Type srcTy = getSrc().getType(); mlir::Type dstTy = getDst().getType(); + if (getShape()) { + if (!fir::isa_ref_type(srcTy) || !fir::isa_ref_type(dstTy)) + return emitOpError() + << "shape can only be specified on data transfer with references"; + } if ((fir::isa_ref_type(srcTy) && fir::isa_ref_type(dstTy)) || (fir::isa_box_type(srcTy) && fir::isa_box_type(dstTy)) || (fir::isa_ref_type(srcTy) && fir::isa_box_type(dstTy)) || diff --git a/flang/lib/Optimizer/Dialect/FIRType.cpp b/flang/lib/Optimizer/Dialect/FIRType.cpp index dbccacfa8be26..c1debf28d0033 100644 --- a/flang/lib/Optimizer/Dialect/FIRType.cpp +++ b/flang/lib/Optimizer/Dialect/FIRType.cpp @@ -1393,43 +1393,50 @@ void FIROpsDialect::registerTypes() { OpenACCPointerLikeModel>(*getContext()); } -std::pair +std::optional> fir::getTypeSizeAndAlignment(mlir::Location loc, mlir::Type ty, const mlir::DataLayout &dl, const fir::KindMapping &kindMap) { if (mlir::isa(ty)) { llvm::TypeSize size = dl.getTypeSize(ty); unsigned short alignment = dl.getTypeABIAlignment(ty); - return {size, alignment}; + return std::pair{size, alignment}; } if (auto firCmplx = mlir::dyn_cast(ty)) { - auto [floatSize, floatAlign] = + auto result = getTypeSizeAndAlignment(loc, firCmplx.getEleType(kindMap), dl, kindMap); - return {llvm::alignTo(floatSize, floatAlign) + floatSize, floatAlign}; + if (!result) + return result; + auto [floatSize, floatAlign] = *result; + return std::pair{llvm::alignTo(floatSize, floatAlign) + floatSize, + floatAlign}; } if (auto real = mlir::dyn_cast(ty)) return getTypeSizeAndAlignment(loc, real.getFloatType(kindMap), dl, kindMap); if (auto seqTy = mlir::dyn_cast(ty)) { - auto [eleSize, eleAlign] = - getTypeSizeAndAlignment(loc, seqTy.getEleTy(), dl, kindMap); - + auto result = getTypeSizeAndAlignment(loc, seqTy.getEleTy(), dl, kindMap); + if (!result) + return result; + auto [eleSize, eleAlign] = *result; std::uint64_t size = llvm::alignTo(eleSize, eleAlign) * seqTy.getConstantArraySize(); - return {size, eleAlign}; + return std::pair{size, eleAlign}; } if (auto recTy = mlir::dyn_cast(ty)) { std::uint64_t size = 0; unsigned short align = 1; for (auto component : recTy.getTypeList()) { - auto [compSize, compAlign] = - getTypeSizeAndAlignment(loc, component.second, dl, kindMap); + auto result = getTypeSizeAndAlignment(loc, component.second, dl, kindMap); + if (!result) + return result; + auto [compSize, compAlign] = *result; size = llvm::alignTo(size, compAlign) + llvm::alignTo(compSize, compAlign); align = std::max(align, compAlign); } - return {size, align}; + return std::pair{size, align}; } if (auto logical = mlir::dyn_cast(ty)) { mlir::Type intTy = mlir::IntegerType::get( @@ -1440,7 +1447,24 @@ fir::getTypeSizeAndAlignment(mlir::Location loc, mlir::Type ty, mlir::Type intTy = mlir::IntegerType::get( character.getContext(), kindMap.getCharacterBitsize(character.getFKind())); - return getTypeSizeAndAlignment(loc, intTy, dl, kindMap); + auto result = getTypeSizeAndAlignment(loc, intTy, dl, kindMap); + if (!result) + return result; + auto [compSize, compAlign] = *result; + if (character.hasConstantLen()) + compSize *= character.getLen(); + return std::pair{compSize, compAlign}; } - TODO(loc, "computing size of a component"); + return std::nullopt; } + +std::pair +fir::getTypeSizeAndAlignmentOrCrash(mlir::Location loc, mlir::Type ty, + const mlir::DataLayout &dl, + const fir::KindMapping &kindMap) { + std::optional> result = + getTypeSizeAndAlignment(loc, ty, dl, kindMap); + if (result) + return *result; + TODO(loc, "computing size of a component"); +} \ No newline at end of file diff --git a/flang/lib/Optimizer/Transforms/AddDebugInfo.cpp b/flang/lib/Optimizer/Transforms/AddDebugInfo.cpp index 30fc4185575e6..46fc40b714aac 100644 --- a/flang/lib/Optimizer/Transforms/AddDebugInfo.cpp +++ b/flang/lib/Optimizer/Transforms/AddDebugInfo.cpp @@ -65,20 +65,15 @@ class AddDebugInfoPass : public fir::impl::AddDebugInfoBase { void handleGlobalOp(fir::GlobalOp glocalOp, mlir::LLVM::DIFileAttr fileAttr, mlir::LLVM::DIScopeAttr scope, + fir::DebugTypeGenerator &typeGen, mlir::SymbolTable *symbolTable, fir::cg::XDeclareOp declOp); void handleFuncOp(mlir::func::FuncOp funcOp, mlir::LLVM::DIFileAttr fileAttr, mlir::LLVM::DICompileUnitAttr cuAttr, + fir::DebugTypeGenerator &typeGen, mlir::SymbolTable *symbolTable); }; -static uint32_t getLineFromLoc(mlir::Location loc) { - uint32_t line = 1; - if (auto fileLoc = mlir::dyn_cast(loc)) - line = fileLoc.getLine(); - return line; -} - bool debugInfoIsAlreadySet(mlir::Location loc) { if (mlir::isa(loc)) { if (loc->findInstanceOf>()) @@ -103,7 +98,7 @@ void AddDebugInfoPass::handleDeclareOp(fir::cg::XDeclareOp declOp, return; // If this DeclareOp actually represents a global then treat it as such. if (auto global = symbolTable->lookup(declOp.getUniqName())) { - handleGlobalOp(global, fileAttr, scopeAttr, symbolTable, declOp); + handleGlobalOp(global, fileAttr, scopeAttr, typeGen, symbolTable, declOp); return; } @@ -160,19 +155,24 @@ mlir::LLVM::DIModuleAttr AddDebugInfoPass::getOrCreateModuleAttr( void AddDebugInfoPass::handleGlobalOp(fir::GlobalOp globalOp, mlir::LLVM::DIFileAttr fileAttr, mlir::LLVM::DIScopeAttr scope, + fir::DebugTypeGenerator &typeGen, mlir::SymbolTable *symbolTable, fir::cg::XDeclareOp declOp) { if (debugInfoIsAlreadySet(globalOp.getLoc())) return; - mlir::ModuleOp module = getOperation(); mlir::MLIRContext *context = &getContext(); - fir::DebugTypeGenerator typeGen(module); mlir::OpBuilder builder(context); std::pair result = fir::NameUniquer::deconstruct(globalOp.getSymName()); if (result.first != fir::NameUniquer::NameKind::VARIABLE) return; + // Discard entries that describe a derived type. Usually start with '.c.', + // '.dt.' or '.n.'. It would be better if result of the deconstruct had a flag + // for such values so that we dont have to look at string values. + if (!result.second.name.empty() && result.second.name[0] == '.') + return; + unsigned line = getLineFromLoc(globalOp.getLoc()); // DWARF5 says following about the fortran modules: @@ -214,6 +214,7 @@ void AddDebugInfoPass::handleGlobalOp(fir::GlobalOp globalOp, void AddDebugInfoPass::handleFuncOp(mlir::func::FuncOp funcOp, mlir::LLVM::DIFileAttr fileAttr, mlir::LLVM::DICompileUnitAttr cuAttr, + fir::DebugTypeGenerator &typeGen, mlir::SymbolTable *symbolTable) { mlir::Location l = funcOp->getLoc(); // If fused location has already been created then nothing to do @@ -221,7 +222,6 @@ void AddDebugInfoPass::handleFuncOp(mlir::func::FuncOp funcOp, if (debugInfoIsAlreadySet(l)) return; - mlir::ModuleOp module = getOperation(); mlir::MLIRContext *context = &getContext(); mlir::OpBuilder builder(context); llvm::StringRef fileName(fileAttr.getName()); @@ -245,7 +245,6 @@ void AddDebugInfoPass::handleFuncOp(mlir::func::FuncOp funcOp, funcName = mlir::StringAttr::get(context, result.second.name); llvm::SmallVector types; - fir::DebugTypeGenerator typeGen(module); for (auto resTy : funcOp.getResultTypes()) { auto tyAttr = typeGen.convertType(resTy, fileAttr, cuAttr, /*declOp=*/nullptr); @@ -285,7 +284,7 @@ void AddDebugInfoPass::handleFuncOp(mlir::func::FuncOp funcOp, if (auto func = symbolTable->lookup(sym.getLeafReference())) { // Make sure that parent is processed. - handleFuncOp(func, fileAttr, cuAttr, symbolTable); + handleFuncOp(func, fileAttr, cuAttr, typeGen, symbolTable); if (auto fusedLoc = mlir::dyn_cast_if_present(func.getLoc())) { if (auto spAttr = @@ -302,7 +301,7 @@ void AddDebugInfoPass::handleFuncOp(mlir::func::FuncOp funcOp, auto spAttr = mlir::LLVM::DISubprogramAttr::get( context, id, compilationUnit, Scope, funcName, fullName, funcFileAttr, - line, line, subprogramFlags, subTypeAttr); + line, line, subprogramFlags, subTypeAttr, /*retainedNodes=*/{}); funcOp->setLoc(builder.getFusedLoc({funcOp->getLoc()}, spAttr)); // Don't process variables if user asked for line tables only. @@ -320,6 +319,14 @@ void AddDebugInfoPass::runOnOperation() { mlir::SymbolTable symbolTable(module); llvm::StringRef fileName; std::string filePath; + std::optional dl = + fir::support::getOrSetDataLayout(module, /*allowDefaultLayout=*/true); + if (!dl) { + mlir::emitError(module.getLoc(), "Missing data layout attribute in module"); + signalPassFailure(); + return; + } + fir::DebugTypeGenerator typeGen(module, &symbolTable, *dl); // We need 2 type of file paths here. // 1. Name of the file as was presented to compiler. This can be absolute // or relative to 2. @@ -354,13 +361,13 @@ void AddDebugInfoPass::runOnOperation() { isOptimized, debugLevel); module.walk([&](mlir::func::FuncOp funcOp) { - handleFuncOp(funcOp, fileAttr, cuAttr, &symbolTable); + handleFuncOp(funcOp, fileAttr, cuAttr, typeGen, &symbolTable); }); // Process any global which was not processed through DeclareOp. if (debugLevel == mlir::LLVM::DIEmissionKind::Full) { // Process 'GlobalOp' only if full debug info is requested. for (auto globalOp : module.getOps()) - handleGlobalOp(globalOp, fileAttr, cuAttr, &symbolTable, + handleGlobalOp(globalOp, fileAttr, cuAttr, typeGen, &symbolTable, /*declOp=*/nullptr); } } diff --git a/flang/lib/Optimizer/Transforms/DebugTypeGenerator.cpp b/flang/lib/Optimizer/Transforms/DebugTypeGenerator.cpp index 860c16c9a13ce..54f2a12d80008 100644 --- a/flang/lib/Optimizer/Transforms/DebugTypeGenerator.cpp +++ b/flang/lib/Optimizer/Transforms/DebugTypeGenerator.cpp @@ -15,7 +15,7 @@ #include "DebugTypeGenerator.h" #include "flang/Optimizer/CodeGen/DescriptorModel.h" #include "flang/Optimizer/CodeGen/TypeConverter.h" -#include "flang/Optimizer/Support/DataLayout.h" +#include "flang/Optimizer/Support/InternalNames.h" #include "mlir/Pass/Pass.h" #include "llvm/ADT/ScopeExit.h" #include "llvm/BinaryFormat/Dwarf.h" @@ -44,17 +44,13 @@ std::uint64_t getComponentOffset<0>(const mlir::DataLayout &dl, return 0; } -DebugTypeGenerator::DebugTypeGenerator(mlir::ModuleOp m) - : module(m), kindMapping(getKindMapping(m)) { +DebugTypeGenerator::DebugTypeGenerator(mlir::ModuleOp m, + mlir::SymbolTable *symbolTable_, + const mlir::DataLayout &dl) + : module(m), symbolTable(symbolTable_), dataLayout{&dl}, + kindMapping(getKindMapping(m)) { LLVM_DEBUG(llvm::dbgs() << "DITypeAttr generator\n"); - std::optional dl = - fir::support::getOrSetDataLayout(module, /*allowDefaultLayout=*/true); - if (!dl) { - mlir::emitError(module.getLoc(), "Missing data layout attribute in module"); - return; - } - mlir::MLIRContext *context = module.getContext(); // The debug information requires the offset of certain fields in the @@ -62,10 +58,12 @@ DebugTypeGenerator::DebugTypeGenerator(mlir::ModuleOp m) mlir::Type llvmDimsType = getDescFieldTypeModel()(context); mlir::Type llvmPtrType = getDescFieldTypeModel()(context); mlir::Type llvmLenType = getDescFieldTypeModel()(context); - dimsOffset = getComponentOffset(*dl, context, llvmDimsType); - dimsSize = dl->getTypeSize(llvmDimsType); - ptrSize = dl->getTypeSize(llvmPtrType); - lenOffset = getComponentOffset(*dl, context, llvmLenType); + dimsOffset = + getComponentOffset(*dataLayout, context, llvmDimsType); + dimsSize = dataLayout->getTypeSize(llvmDimsType); + ptrSize = dataLayout->getTypeSize(llvmPtrType); + lenOffset = + getComponentOffset(*dataLayout, context, llvmLenType); } static mlir::LLVM::DITypeAttr genBasicType(mlir::MLIRContext *context, @@ -154,6 +152,49 @@ mlir::LLVM::DITypeAttr DebugTypeGenerator::convertBoxedSequenceType( dataLocation, /*rank=*/nullptr, allocated, associated); } +mlir::LLVM::DITypeAttr DebugTypeGenerator::convertRecordType( + fir::RecordType Ty, mlir::LLVM::DIFileAttr fileAttr, + mlir::LLVM::DIScopeAttr scope, fir::cg::XDeclareOp declOp) { + mlir::MLIRContext *context = module.getContext(); + auto result = fir::NameUniquer::deconstruct(Ty.getName()); + if (result.first != fir::NameUniquer::NameKind::DERIVED_TYPE) + return genPlaceholderType(context); + + fir::TypeInfoOp tiOp = symbolTable->lookup(Ty.getName()); + unsigned line = (tiOp) ? getLineFromLoc(tiOp.getLoc()) : 1; + + llvm::SmallVector elements; + std::uint64_t offset = 0; + for (auto [fieldName, fieldTy] : Ty.getTypeList()) { + auto result = fir::getTypeSizeAndAlignment(module.getLoc(), fieldTy, + *dataLayout, kindMapping); + // If we get a type whose size we can't determine, we will break the loop + // and generate the derived type with whatever components we have + // assembled thus far. + if (!result) + break; + auto [byteSize, byteAlign] = *result; + // FIXME: Handle non defaults array bound in derived types + mlir::LLVM::DITypeAttr elemTy = + convertType(fieldTy, fileAttr, scope, /*declOp=*/nullptr); + offset = llvm::alignTo(offset, byteAlign); + mlir::LLVM::DIDerivedTypeAttr tyAttr = mlir::LLVM::DIDerivedTypeAttr::get( + context, llvm::dwarf::DW_TAG_member, + mlir::StringAttr::get(context, fieldName), elemTy, byteSize * 8, + byteAlign * 8, offset * 8, /*optional
=*/std::nullopt, + /*extra data=*/nullptr); + elements.push_back(tyAttr); + offset += llvm::alignTo(byteSize, byteAlign); + } + + return mlir::LLVM::DICompositeTypeAttr::get( + context, llvm::dwarf::DW_TAG_structure_type, /*recursive_id=*/{}, + mlir::StringAttr::get(context, result.second.name), fileAttr, line, scope, + /*baseType=*/nullptr, mlir::LLVM::DIFlags::Zero, offset * 8, + /*alignInBits=*/0, elements, /*dataLocation=*/nullptr, /*rank=*/nullptr, + /*allocated=*/nullptr, /*associated=*/nullptr); +} + mlir::LLVM::DITypeAttr DebugTypeGenerator::convertSequenceType( fir::SequenceType seqTy, mlir::LLVM::DIFileAttr fileAttr, mlir::LLVM::DIScopeAttr scope, fir::cg::XDeclareOp declOp) { @@ -312,6 +353,8 @@ DebugTypeGenerator::convertType(mlir::Type Ty, mlir::LLVM::DIFileAttr fileAttr, } else if (auto charTy = mlir::dyn_cast_or_null(Ty)) { return convertCharacterType(charTy, fileAttr, scope, declOp, /*hasDescriptor=*/false); + } else if (auto recTy = mlir::dyn_cast_or_null(Ty)) { + return convertRecordType(recTy, fileAttr, scope, declOp); } else if (auto boxTy = mlir::dyn_cast_or_null(Ty)) { auto elTy = boxTy.getElementType(); if (auto seqTy = mlir::dyn_cast_or_null(elTy)) diff --git a/flang/lib/Optimizer/Transforms/DebugTypeGenerator.h b/flang/lib/Optimizer/Transforms/DebugTypeGenerator.h index 5ab6ca5e9f880..e3220f18958df 100644 --- a/flang/lib/Optimizer/Transforms/DebugTypeGenerator.h +++ b/flang/lib/Optimizer/Transforms/DebugTypeGenerator.h @@ -17,6 +17,7 @@ #include "flang/Optimizer/Dialect/FIRType.h" #include "flang/Optimizer/Dialect/Support/FIRContext.h" #include "flang/Optimizer/Dialect/Support/KindMapping.h" +#include "flang/Optimizer/Support/DataLayout.h" #include "llvm/Support/Debug.h" namespace fir { @@ -24,7 +25,8 @@ namespace fir { /// This converts FIR/mlir type to DITypeAttr. class DebugTypeGenerator { public: - DebugTypeGenerator(mlir::ModuleOp module); + DebugTypeGenerator(mlir::ModuleOp module, mlir::SymbolTable *symbolTable, + const mlir::DataLayout &dl); mlir::LLVM::DITypeAttr convertType(mlir::Type Ty, mlir::LLVM::DIFileAttr fileAttr, @@ -32,6 +34,10 @@ class DebugTypeGenerator { fir::cg::XDeclareOp declOp); private: + mlir::LLVM::DITypeAttr convertRecordType(fir::RecordType Ty, + mlir::LLVM::DIFileAttr fileAttr, + mlir::LLVM::DIScopeAttr scope, + fir::cg::XDeclareOp declOp); mlir::LLVM::DITypeAttr convertSequenceType(fir::SequenceType seqTy, mlir::LLVM::DIFileAttr fileAttr, mlir::LLVM::DIScopeAttr scope, @@ -59,6 +65,8 @@ class DebugTypeGenerator { bool genAssociated); mlir::ModuleOp module; + mlir::SymbolTable *symbolTable; + const mlir::DataLayout *dataLayout; KindMapping kindMapping; std::uint64_t dimsSize; std::uint64_t dimsOffset; @@ -68,4 +76,11 @@ class DebugTypeGenerator { } // namespace fir +static uint32_t getLineFromLoc(mlir::Location loc) { + uint32_t line = 1; + if (auto fileLoc = mlir::dyn_cast(loc)) + line = fileLoc.getLine(); + return line; +} + #endif // FORTRAN_OPTIMIZER_TRANSFORMS_DEBUGTYPEGENERATOR_H diff --git a/flang/lib/Optimizer/Transforms/LoopVersioning.cpp b/flang/lib/Optimizer/Transforms/LoopVersioning.cpp index 38cdc2b1388d4..51dc48f0fcb12 100644 --- a/flang/lib/Optimizer/Transforms/LoopVersioning.cpp +++ b/flang/lib/Optimizer/Transforms/LoopVersioning.cpp @@ -266,7 +266,7 @@ void LoopVersioningPass::runOnOperation() { if (mlir::isa(elementType) || mlir::isa(elementType) || mlir::isa(elementType)) { - auto [eleSize, eleAlign] = fir::getTypeSizeAndAlignment( + auto [eleSize, eleAlign] = fir::getTypeSizeAndAlignmentOrCrash( arg.getLoc(), elementType, *dl, kindMap); typeSize = llvm::alignTo(eleSize, eleAlign); } diff --git a/flang/lib/Parser/io-parsers.cpp b/flang/lib/Parser/io-parsers.cpp index ca0dbedc8da42..25b09efd40c52 100644 --- a/flang/lib/Parser/io-parsers.cpp +++ b/flang/lib/Parser/io-parsers.cpp @@ -27,7 +27,8 @@ TYPE_PARSER(construct(variable / lookAhead(space / ",);\n"_ch)) || construct(fileUnitNumber) || construct(star)) // R1202 file-unit-number -> scalar-int-expr -TYPE_PARSER(construct(scalarIntExpr / !"="_tok)) +TYPE_PARSER(construct( + scalarIntExpr / (lookAhead(space >> ",)"_ch) || atEndOfStmt))) // R1204 open-stmt -> OPEN ( connect-spec-list ) TYPE_CONTEXT_PARSER("OPEN statement"_en_US, diff --git a/flang/lib/Parser/prescan.cpp b/flang/lib/Parser/prescan.cpp index c01d512b4653d..804ada7d11e02 100644 --- a/flang/lib/Parser/prescan.cpp +++ b/flang/lib/Parser/prescan.cpp @@ -207,11 +207,13 @@ void Prescanner::Statement() { toks.Put(id, GetProvenance(at_)); if (auto replaced{preprocessor_.MacroReplacement(toks, *this)}) { auto newLineClass{ClassifyLine(*replaced, GetCurrentProvenance())}; - disableSourceContinuation_ = - newLineClass.kind != LineClassification::Kind::Source; if (newLineClass.kind == LineClassification::Kind::CompilerDirective) { directiveSentinel_ = newLineClass.sentinel; + disableSourceContinuation_ = false; + } else { + disableSourceContinuation_ = + newLineClass.kind != LineClassification::Kind::Source; } } } @@ -1114,39 +1116,33 @@ bool Prescanner::SkipCommentLine(bool afterAmpersand) { SkipToEndOfLine(); omitNewline_ = true; } - return false; - } - auto lineClass{ClassifyLine(nextLine_)}; - if (lineClass.kind == LineClassification::Kind::Comment) { - NextLine(); - return true; } else if (inPreprocessorDirective_) { - return false; - } else if (afterAmpersand && - (lineClass.kind == - LineClassification::Kind::ConditionalCompilationDirective || - lineClass.kind == LineClassification::Kind::DefinitionDirective || - lineClass.kind == LineClassification::Kind::PreprocessorDirective || - lineClass.kind == LineClassification::Kind::IncludeDirective || - lineClass.kind == LineClassification::Kind::IncludeLine)) { - SkipToEndOfLine(); - omitNewline_ = true; - skipLeadingAmpersand_ = true; - return false; - } else if (lineClass.kind == - LineClassification::Kind::ConditionalCompilationDirective || - lineClass.kind == LineClassification::Kind::PreprocessorDirective) { - // Allow conditional compilation directives (e.g., #ifdef) to affect - // continuation lines. - // Allow other preprocessor directives, too, except #include - // (when it does not follow '&'), #define, and #undef (because - // they cannot be allowed to affect preceding text on a - // continued line). - preprocessor_.Directive(TokenizePreprocessorDirective(), *this); - return true; } else { - return false; + auto lineClass{ClassifyLine(nextLine_)}; + if (lineClass.kind == LineClassification::Kind::Comment) { + NextLine(); + return true; + } else if (lineClass.kind == + LineClassification::Kind::ConditionalCompilationDirective || + lineClass.kind == LineClassification::Kind::PreprocessorDirective) { + // Allow conditional compilation directives (e.g., #ifdef) to affect + // continuation lines. + // Allow other preprocessor directives, too, except #include + // (when it does not follow '&'), #define, and #undef (because + // they cannot be allowed to affect preceding text on a + // continued line). + preprocessor_.Directive(TokenizePreprocessorDirective(), *this); + return true; + } else if (afterAmpersand && + (lineClass.kind == LineClassification::Kind::DefinitionDirective || + lineClass.kind == LineClassification::Kind::IncludeDirective || + lineClass.kind == LineClassification::Kind::IncludeLine)) { + SkipToEndOfLine(); + omitNewline_ = true; + skipLeadingAmpersand_ = true; + } } + return false; } const char *Prescanner::FixedFormContinuationLine(bool mightNeedSpace) { diff --git a/flang/lib/Semantics/check-call.cpp b/flang/lib/Semantics/check-call.cpp index 4708d51d3af4d..c7ec873365564 100644 --- a/flang/lib/Semantics/check-call.cpp +++ b/flang/lib/Semantics/check-call.cpp @@ -56,6 +56,10 @@ static void CheckImplicitInterfaceArg(evaluate::ActualArgument &arg, "%VAL argument must be a scalar numeric or logical expression"_err_en_US); } if (const auto *expr{arg.UnwrapExpr()}) { + if (const Symbol * base{GetFirstSymbol(*expr)}; + base && IsFunctionResult(*base)) { + context.NoteDefinedSymbol(*base); + } if (IsBOZLiteral(*expr)) { messages.Say("BOZ argument requires an explicit interface"_err_en_US); } else if (evaluate::IsNullPointer(*expr)) { @@ -79,10 +83,6 @@ static void CheckImplicitInterfaceArg(evaluate::ActualArgument &arg, messages.Say( "VOLATILE argument requires an explicit interface"_err_en_US); } - if (const Symbol & base{named->GetFirstSymbol()}; - IsFunctionResult(base)) { - context.NoteDefinedSymbol(base); - } } else if (auto argChars{characteristics::DummyArgument::FromActual( "actual argument", *expr, context.foldingContext(), /*forImplicitInterface=*/true)}) { diff --git a/flang/lib/Semantics/check-declarations.cpp b/flang/lib/Semantics/check-declarations.cpp index de3fa8794caed..734c34276b13b 100644 --- a/flang/lib/Semantics/check-declarations.cpp +++ b/flang/lib/Semantics/check-declarations.cpp @@ -256,6 +256,9 @@ static bool IsBlockData(const Symbol &symbol) { } void CheckHelper::Check(const Symbol &symbol) { + if (symbol.has()) { + return; + } if (symbol.name().size() > common::maxNameLen && &symbol == &symbol.GetUltimate()) { if (context_.ShouldWarn(common::LanguageFeature::LongNames)) { diff --git a/flang/lib/Semantics/resolve-names.cpp b/flang/lib/Semantics/resolve-names.cpp index c0478fd439007..ec8f854f64d10 100644 --- a/flang/lib/Semantics/resolve-names.cpp +++ b/flang/lib/Semantics/resolve-names.cpp @@ -1797,6 +1797,9 @@ void AttrsVisitor::SetBindNameOn(Symbol &symbol) { } auto last{label->find_last_not_of(" ")}; label = label->substr(first, last - first + 1); + } else if (symbol.GetIsExplicitBindName()) { + // don't try to override explicit binding name with default + return; } else if (ClassifyProcedure(symbol) == ProcedureDefinitionClass::Internal) { // BIND(C) does not give an implicit binding label to internal procedures. return; diff --git a/flang/runtime/external-unit.cpp b/flang/runtime/external-unit.cpp index 8009151a8a370..d17a92622f844 100644 --- a/flang/runtime/external-unit.cpp +++ b/flang/runtime/external-unit.cpp @@ -65,9 +65,13 @@ ExternalFileUnit *ExternalFileUnit::LookUpOrCreateAnonymous(int unit, bool exists{false}; ExternalFileUnit *result{GetUnitMap().LookUpOrCreate(unit, handler, exists)}; if (result && !exists) { + common::optional action; + if (dir == Direction::Output) { + action = Action::ReadWrite; + } if (!result->OpenAnonymousUnit( dir == Direction::Input ? OpenStatus::Unknown : OpenStatus::Replace, - Action::ReadWrite, Position::Rewind, Convert::Unknown, handler)) { + action, Position::Rewind, Convert::Unknown, handler)) { // fort.N isn't a writable file if (ExternalFileUnit * closed{LookUpForClose(result->unitNumber())}) { closed->DestroyClosed(); diff --git a/flang/runtime/numeric.cpp b/flang/runtime/numeric.cpp index 40bacf07157a2..b5e0851a16cd1 100644 --- a/flang/runtime/numeric.cpp +++ b/flang/runtime/numeric.cpp @@ -95,20 +95,22 @@ template inline RT_API_ATTRS T Scale(T x, std::int64_t p) { } // SELECTED_INT_KIND (16.9.169) -template -inline RT_API_ATTRS CppTypeFor SelectedIntKind(T x) { - if (x <= 2) { +template +inline RT_API_ATTRS CppTypeFor SelectedIntKind( + X x, M mask) { +#if !defined __SIZEOF_INT128__ || defined FLANG_RUNTIME_NO_INTEGER_16 + mask &= ~(1 << 16); +#endif + if (x <= 2 && (mask & (1 << 1))) { return 1; - } else if (x <= 4) { + } else if (x <= 4 && (mask & (1 << 2))) { return 2; - } else if (x <= 9) { + } else if (x <= 9 && (mask & (1 << 4))) { return 4; - } else if (x <= 18) { + } else if (x <= 18 && (mask & (1 << 8))) { return 8; -#if defined __SIZEOF_INT128__ && !defined FLANG_RUNTIME_NO_INTEGER_16 - } else if (x <= 38) { + } else if (x <= 38 && (mask & (1 << 16))) { return 16; -#endif } return -1; } @@ -130,60 +132,52 @@ inline RT_API_ATTRS CppTypeFor SelectedLogicalKind( } // SELECTED_REAL_KIND (16.9.170) -template +template inline RT_API_ATTRS CppTypeFor SelectedRealKind( - P p, R r, D d) { + P p, R r, D d, M mask) { if (d != 2) { return -5; } - -#ifndef FLANG_RUNTIME_NO_REAL_2 - constexpr bool hasReal2{true}; -#else - constexpr bool hasReal2{false}; +#ifdef FLANG_RUNTIME_NO_REAL_2 + mask &= ~(1 << 2); #endif -#ifndef FLANG_RUNTIME_NO_REAL_3 - constexpr bool hasReal3{true}; -#else - constexpr bool hasReal3{false}; +#ifdef FLANG_RUNTIME_NO_REAL_3 + mask &= ~(1 << 3); #endif -#if defined LDBL_MANT_DIG == 64 && !defined FLANG_RUNTIME_NO_REAL_10 - constexpr bool hasReal10{true}; -#else - constexpr bool hasReal10{false}; +#if LDBL_MANT_DIG < 64 || defined FLANG_RUNTIME_NO_REAL_10 + mask &= ~(1 << 10); #endif -#if (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && \ - !defined FLANG_RUNTIME_NO_REAL_16 - constexpr bool hasReal16{true}; -#else - constexpr bool hasReal16{false}; +#if LDBL_MANT_DIG < 64 || defined FLANG_RUNTIME_NO_REAL_16 + mask &= ~(1 << 16); #endif int error{0}; int kind{0}; - if (hasReal2 && p <= 3) { + if (p <= 3 && (mask & (1 << 2))) { kind = 2; - } else if (p <= 6) { + } else if (p <= 6 && (mask & (1 << 4))) { kind = 4; - } else if (p <= 15) { + } else if (p <= 15 && (mask & (1 << 8))) { kind = 8; - } else if (hasReal10 && p <= 18) { + } else if (p <= 18 && (mask & (1 << 10))) { kind = 10; - } else if (hasReal16 && p <= 33) { + } else if (p <= 33 && (mask & (1 << 16))) { kind = 16; } else { error -= 1; } - if (r <= 4) { - kind = kind < 2 ? (hasReal2 ? 2 : 4) : kind; - } else if (r <= 37) { - kind = kind < 3 ? (hasReal3 && p != 3 ? 3 : 4) : kind; - } else if (r <= 307) { + if (r <= 4 && (mask & (1 << 2))) { + kind = kind < 2 ? 2 : kind; + } else if (r <= 37 && p != 3 && (mask & (1 << 3))) { + kind = kind < 3 ? 3 : kind; + } else if (r <= 37 && (mask & (1 << 4))) { + kind = kind < 4 ? 4 : kind; + } else if (r <= 307 && (mask & (1 << 8))) { kind = kind < 8 ? 8 : kind; - } else if (hasReal10 && r <= 4931) { + } else if (r <= 4931 && (mask & (1 << 10))) { kind = kind < 10 ? 10 : kind; - } else if (hasReal16 && r <= 4931) { + } else if (r <= 4931 && (mask & (1 << 16))) { kind = kind < 16 ? 16 : kind; } else { error -= 2; @@ -790,6 +784,12 @@ CppTypeFor RTDEF(SelectedCharKind)( // SELECTED_INT_KIND CppTypeFor RTDEF(SelectedIntKind)( const char *source, int line, void *x, int xKind) { + return RTNAME(SelectedIntKindMasked)(source, line, x, xKind, + (1 << 1) | (1 << 2) | (1 << 4) | (1 << 8) | (1 << 16)); +} + +CppTypeFor RTDEF(SelectedIntKindMasked)( + const char *source, int line, void *x, int xKind, int mask) { #ifdef __SIZEOF_INT128__ CppTypeFor r = GetIntArgValue>( @@ -798,7 +798,7 @@ CppTypeFor RTDEF(SelectedIntKind)( std::int64_t r = GetIntArgValue( source, line, x, xKind, /*defaultValue*/ 0, /*resKind*/ 8); #endif - return SelectedIntKind(r); + return SelectedIntKind(r, mask); } // SELECTED_LOGICAL_KIND @@ -819,6 +819,14 @@ CppTypeFor RTDEF(SelectedLogicalKind)( CppTypeFor RTDEF(SelectedRealKind)(const char *source, int line, void *precision, int pKind, void *range, int rKind, void *radix, int dKind) { + return RTNAME(SelectedRealKindMasked)(source, line, precision, pKind, range, + rKind, radix, dKind, + (1 << 2) | (1 << 3) | (1 << 4) | (1 << 8) | (1 << 10) | (1 << 16)); +} + +CppTypeFor RTDEF(SelectedRealKindMasked)( + const char *source, int line, void *precision, int pKind, void *range, + int rKind, void *radix, int dKind, int mask) { #ifdef __SIZEOF_INT128__ CppTypeFor p = GetIntArgValue>( @@ -837,7 +845,7 @@ CppTypeFor RTDEF(SelectedRealKind)(const char *source, std::int64_t d = GetIntArgValue( source, line, radix, dKind, /*defaultValue*/ 2, /*resKind*/ 8); #endif - return SelectedRealKind(p, r, d); + return SelectedRealKind(p, r, d, mask); } CppTypeFor RTDEF(Spacing4)( diff --git a/flang/test/Driver/mlir-debug-pass-pipeline.f90 b/flang/test/Driver/mlir-debug-pass-pipeline.f90 index a6316ee7c8312..e44f4e62a7148 100644 --- a/flang/test/Driver/mlir-debug-pass-pipeline.f90 +++ b/flang/test/Driver/mlir-debug-pass-pipeline.f90 @@ -22,6 +22,9 @@ ! DEBUG-ERR: error: invalid value 'invalid' in '-debug-info-kind=invalid' ! DEBUG-ERR-NOT: Pass statistics report +! ALL: Pass statistics report +! ALL: Fortran::lower::VerifierPass + ! ALL: Pass statistics report ! ALL: Fortran::lower::VerifierPass diff --git a/flang/test/Driver/mlir-pass-pipeline.f90 b/flang/test/Driver/mlir-pass-pipeline.f90 index 2f35f928e99cf..6c2829d3cc5c5 100644 --- a/flang/test/Driver/mlir-pass-pipeline.f90 +++ b/flang/test/Driver/mlir-pass-pipeline.f90 @@ -9,6 +9,9 @@ end program +! ALL: Pass statistics report +! ALL: Fortran::lower::VerifierPass + ! ALL: Pass statistics report ! ALL: Fortran::lower::VerifierPass diff --git a/flang/test/Driver/mmlir-opts-vs-opts.f90 b/flang/test/Driver/mmlir-opts-vs-opts.f90 new file mode 100644 index 0000000000000..8cd14e02c3fcc --- /dev/null +++ b/flang/test/Driver/mmlir-opts-vs-opts.f90 @@ -0,0 +1,8 @@ +! Verify that mlir pass options are only accessible under `-mmlir`. + +!RUN: %flang_fc1 -emit-hlfir -mmlir -mlir-pass-statistics %s -o /dev/null 2>&1 | FileCheck %s --check-prefix=MMLIR +!RUN: not %flang_fc1 -emit-hlfir -mlir-pass-statistics %s -o /dev/null 2>&1 | FileCheck %s --check-prefix=NOMMLIR + +!MMLIR: Pass statistics report +!NOMMLIR: error: unknown argument: '-mlir-pass-statistics' +end diff --git a/flang/test/Fir/cuf-invalid.fir b/flang/test/Fir/cuf-invalid.fir index 06e08d14b2435..e9aeaa281e2a8 100644 --- a/flang/test/Fir/cuf-invalid.fir +++ b/flang/test/Fir/cuf-invalid.fir @@ -94,3 +94,34 @@ func.func @_QPsub1() { cuf.free %0 : !fir.ref {data_attr = #cuf.cuda} return } + +// ----- + +func.func @_QPsub1(%arg0: !fir.ref> {cuf.data_attr = #cuf.cuda, fir.bindc_name = "adev"}, %arg1: !fir.ref> {fir.bindc_name = "ahost"}, %arg2: !fir.ref {fir.bindc_name = "n"}, %arg3: !fir.ref {fir.bindc_name = "m"}) { + %0 = fir.dummy_scope : !fir.dscope + %1:2 = hlfir.declare %arg2 dummy_scope %0 {uniq_name = "_QFsub1En"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) + %2:2 = hlfir.declare %arg3 dummy_scope %0 {uniq_name = "_QFsub1Em"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) + %3 = fir.load %1#0 : !fir.ref + %4 = fir.load %2#0 : !fir.ref + %5 = arith.muli %3, %4 : i32 + %6 = fir.convert %5 : (i32) -> i64 + %7 = fir.convert %6 : (i64) -> index + %c0 = arith.constant 0 : index + %8 = arith.cmpi sgt, %7, %c0 : index + %9 = arith.select %8, %7, %c0 : index + %10 = fir.shape %9 : (index) -> !fir.shape<1> + %11:2 = hlfir.declare %arg0(%10) dummy_scope %0 {data_attr = #cuf.cuda, uniq_name = "_QFsub1Eadev"} : (!fir.ref>, !fir.shape<1>, !fir.dscope) -> (!fir.box>, !fir.ref>) + %12 = fir.load %1#0 : !fir.ref + %13 = fir.load %2#0 : !fir.ref + %14 = arith.muli %12, %13 : i32 + %15 = fir.convert %14 : (i32) -> i64 + %16 = fir.convert %15 : (i64) -> index + %c0_0 = arith.constant 0 : index + %17 = arith.cmpi sgt, %16, %c0_0 : index + %18 = arith.select %17, %16, %c0_0 : index + %19 = fir.shape %18 : (index) -> !fir.shape<1> + %20:2 = hlfir.declare %arg1(%19) dummy_scope %0 {uniq_name = "_QFsub1Eahost"} : (!fir.ref>, !fir.shape<1>, !fir.dscope) -> (!fir.box>, !fir.ref>) + // expected-error@+1{{'cuf.data_transfer' op shape can only be specified on data transfer with references}} + cuf.data_transfer %20#0 to %11#0, %19 : !fir.shape<1> {transfer_kind = #cuf.cuda_transfer} : !fir.box>, !fir.box> + return +} diff --git a/flang/test/Integration/debug-cyclic-derived-type.f90 b/flang/test/Integration/debug-cyclic-derived-type.f90 new file mode 100644 index 0000000000000..03e06336a6e08 --- /dev/null +++ b/flang/test/Integration/debug-cyclic-derived-type.f90 @@ -0,0 +1,15 @@ +! RUN: %flang_fc1 -emit-llvm -debug-info-kind=standalone %s -o - | FileCheck %s + +module m + type t1 + type(t2), pointer :: p + end type + type t2 + type(t1) :: v1 + end type + type(t1) :: v2 + type(t2) :: v3 +end module + +! CHECK-DAG: !DICompositeType(tag: DW_TAG_structure_type, name: "t1"{{.*}}) +! CHECK-DAG: !DICompositeType(tag: DW_TAG_structure_type, name: "t2"{{.*}}) diff --git a/flang/test/Lower/OpenMP/delayed-privatization-allocatable-firstprivate.f90 b/flang/test/Lower/OpenMP/delayed-privatization-allocatable-firstprivate.f90 index 833976ff284a8..5f09371bbaba2 100644 --- a/flang/test/Lower/OpenMP/delayed-privatization-allocatable-firstprivate.f90 +++ b/flang/test/Lower/OpenMP/delayed-privatization-allocatable-firstprivate.f90 @@ -57,6 +57,5 @@ end program compilation_to_obj ! LLVM: @[[GLOB_VAR:[^[:space:]]+]]t = internal global ! LLVM: define internal void @_QQmain..omp_par -! LLVM: %[[LOCAL_VAR:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, align 8 -! LLVM-NEXT: %[[GLOB_VAL:.*]] = load { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr @[[GLOB_VAR]]t, align 8 -! LLVM-NEXT: store { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] } %[[GLOB_VAL]], ptr %[[LOCAL_VAR]], align 8 +! LLVM: %[[GLOB_VAL:.*]] = load { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr @[[GLOB_VAR]]t, align 8 +! LLVM-NEXT: store { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] } %[[GLOB_VAL]], ptr %{{.*}}, align 8 diff --git a/flang/test/Lower/OpenMP/parallel-reduction-mixed.f90 b/flang/test/Lower/OpenMP/parallel-reduction-mixed.f90 index 1457be05ca102..262075ec9b25d 100644 --- a/flang/test/Lower/OpenMP/parallel-reduction-mixed.f90 +++ b/flang/test/Lower/OpenMP/parallel-reduction-mixed.f90 @@ -26,10 +26,10 @@ end subroutine proc !CHECK: store i32 %[[TID]], ptr %[[TID_LOCAL]] !CHECK: %[[F_priv:.*]] = alloca ptr !CHECK: %[[I_priv:.*]] = alloca i32 -!CHECK: store ptr %{{.*}}, ptr %[[F_priv]] !CHECK: omp.reduction.init: -!CHECK: store i32 0, ptr %[[I_priv]] +!CHECK: store ptr %{{.*}}, ptr %[[F_priv]] +!CHECK: store i32 0, ptr %[[I_priv]] !CHECK: omp.par.region: !CHECK: br label %[[MALLOC_BB:.*]] diff --git a/flang/test/Parser/recovery05.f90 b/flang/test/Parser/recovery05.f90 new file mode 100644 index 0000000000000..9c8c3689b27bd --- /dev/null +++ b/flang/test/Parser/recovery05.f90 @@ -0,0 +1,5 @@ +! RUN: not %flang_fc1 -fsyntax-only %s 2>&1 | FileCheck %s +continue +! CHECK: error: expected end of statement +flush iostat=1 +end diff --git a/flang/test/Preprocessing/line-in-contin.F90 b/flang/test/Preprocessing/line-in-contin.F90 index 138e579bffaa2..28efbd02d3ae8 100644 --- a/flang/test/Preprocessing/line-in-contin.F90 +++ b/flang/test/Preprocessing/line-in-contin.F90 @@ -1,8 +1,10 @@ -! RUN: %flang_fc1 -E %s 2>&1 | FileCheck %s -! CHECK: call foo( 0.) -! CHECK: call foo( 1.) -! CHECK: call foo( 2.) -! CHECK: call foo( 3.) +! RUN: %flang_fc1 -fopenmp -E %s 2>&1 | FileCheck %s +! CHECK: call foo(0.) +! CHECK: call foo(1.) +! CHECK: call foo(2.) +! CHECK: call foo(3.) +! CHECK: !$omp parallel do default(none) private(j) +! CHECK: !$omp end parallel do call foo( & # 100 "bar.h" & 0.) @@ -17,4 +19,16 @@ # 103 "bar.h" & 3. & ) +!$omp parallel do & +#ifdef undef +!$omp garbage & +#else +!$omp default(none) & +#endif +!$omp private(j) + do j=1,100 + end do +!$omp end & +# 104 "bar.h" +!$omp parallel do end diff --git a/flang/test/Semantics/declarations03.f90 b/flang/test/Semantics/declarations03.f90 index 65b07e7d5c656..8e6f0a4aaf6bd 100644 --- a/flang/test/Semantics/declarations03.f90 +++ b/flang/test/Semantics/declarations03.f90 @@ -50,6 +50,9 @@ module m !ERROR: BIND_C attribute was already specified on 's5' integer, bind(c, name="ss2") :: s5 + integer, bind(c, name="s6explicit") :: s6 + dimension s6(10) ! caused spurious error + end subroutine common1() diff --git a/flang/test/Semantics/resolve82.f90 b/flang/test/Semantics/resolve82.f90 index 88339742efdb3..989ce1d837c70 100644 --- a/flang/test/Semantics/resolve82.f90 +++ b/flang/test/Semantics/resolve82.f90 @@ -34,6 +34,7 @@ end function procFunc real y common /blk/ y protected y + logical,protected,external,pointer :: z contains @@ -60,3 +61,8 @@ subroutine testProcDecl(arg4, arg5, arg6, arg7, arg8, arg9, arg10, arg11) end subroutine testProcDecl end module m + +subroutine subb() + !Ensure no spurious error from a benign UseError + use m, testProcDecl=>z +end diff --git a/flang/test/Semantics/undef-result01.f90 b/flang/test/Semantics/undef-result01.f90 index bf6af11a8d7b9..08e7fe1e44899 100644 --- a/flang/test/Semantics/undef-result01.f90 +++ b/flang/test/Semantics/undef-result01.f90 @@ -148,3 +148,8 @@ function defdByAssociate() s = 1. end associate end + +function defdByElementArgToImplicit() result(r) + real r(1) + call define(r(1)) +end diff --git a/flang/test/Transforms/debug-derived-type-1.fir b/flang/test/Transforms/debug-derived-type-1.fir new file mode 100644 index 0000000000000..e453db6ae6fbb --- /dev/null +++ b/flang/test/Transforms/debug-derived-type-1.fir @@ -0,0 +1,73 @@ +// RUN: fir-opt --add-debug-info --mlir-print-debuginfo %s | FileCheck %s + +// Only enabled on x86_64 +// REQUIRES: x86-registered-target + +module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry, dense<64> : vector<4xi64>>, #dlti.dl_entry, dense<32> : vector<4xi64>>, #dlti.dl_entry, dense<32> : vector<4xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry : vector<4xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry<"dlti.stack_alignment", 128 : i64>, #dlti.dl_entry<"dlti.endianness", "little">>, fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", llvm.target_triple = "x86_64-unknown-linux-gnu", omp.is_gpu = false, omp.is_target_device = false, omp.version = #omp.version} { + fir.global @_QMm_employeeEemployee : !fir.type<_QMm_employeeTt_employee{t_person:!fir.type<_QMm_employeeTt_person{t_address:!fir.type<_QMm_employeeTt_address{house_number:i32}>,name:!fir.char<1,20>}>,hired_date:!fir.type<_QMm_employeeTt_date{year:i32,month:i32,day:i32}>,monthly_salary:f32}> { + %0 = fir.zero_bits !fir.type<_QMm_employeeTt_employee{t_person:!fir.type<_QMm_employeeTt_person{t_address:!fir.type<_QMm_employeeTt_address{house_number:i32}>,name:!fir.char<1,20>}>,hired_date:!fir.type<_QMm_employeeTt_date{year:i32,month:i32,day:i32}>,monthly_salary:f32}> + fir.has_value %0 : !fir.type<_QMm_employeeTt_employee{t_person:!fir.type<_QMm_employeeTt_person{t_address:!fir.type<_QMm_employeeTt_address{house_number:i32}>,name:!fir.char<1,20>}>,hired_date:!fir.type<_QMm_employeeTt_date{year:i32,month:i32,day:i32}>,monthly_salary:f32}> + } loc(#loc5) + fir.global @_QMt1Evar : !fir.type<_QMt1Tt_t1{age:i32,points:!fir.array<3x!fir.complex<4>>,cond:!fir.logical<1>,name:!fir.char<1,20>,ratio:f64}> { + %0 = fir.zero_bits !fir.type<_QMt1Tt_t1{age:i32,points:!fir.array<3x!fir.complex<4>>,cond:!fir.logical<1>,name:!fir.char<1,20>,ratio:f64}> + fir.has_value %0 : !fir.type<_QMt1Tt_t1{age:i32,points:!fir.array<3x!fir.complex<4>>,cond:!fir.logical<1>,name:!fir.char<1,20>,ratio:f64}> + } loc(#loc6) + fir.type_info @_QMt1Tt_t1 noinit nodestroy nofinal : !fir.type<_QMt1Tt_t1{age:i32,points:!fir.array<3x!fir.complex<4>>,cond:!fir.logical<1>,name:!fir.char<1,20>,ratio:f64}> loc(#loc7) + fir.type_info @_QMm_employeeTt_address noinit nodestroy nofinal : !fir.type<_QMm_employeeTt_address{house_number:i32}> loc(#loc1) + fir.type_info @_QMm_employeeTt_person noinit nodestroy nofinal extends !fir.type<_QMm_employeeTt_address{house_number:i32}> : !fir.type<_QMm_employeeTt_person{t_address:!fir.type<_QMm_employeeTt_address{house_number:i32}>,name:!fir.char<1,20>}> loc(#loc2) + fir.type_info @_QMm_employeeTt_date noinit nodestroy nofinal : !fir.type<_QMm_employeeTt_date{year:i32,month:i32,day:i32}> loc(#loc3) + fir.type_info @_QMm_employeeTt_employee noinit nodestroy nofinal extends !fir.type<_QMm_employeeTt_person{t_address:!fir.type<_QMm_employeeTt_address{house_number:i32}>,name:!fir.char<1,20>}> : !fir.type<_QMm_employeeTt_employee{t_person:!fir.type<_QMm_employeeTt_person{t_address:!fir.type<_QMm_employeeTt_address{house_number:i32}>,name:!fir.char<1,20>}>,hired_date:!fir.type<_QMm_employeeTt_date{year:i32,month:i32,day:i32}>,monthly_salary:f32}> loc(#loc4) + fir.type_info @_QFTt_pair noinit nodestroy nofinal : !fir.type<_QFTt_pair{i:i64,x:f64}> loc(#loc8) + func.func @_QQmain() attributes {fir.bindc_name = "test"} { + %1 = fir.alloca !fir.type<_QFTt_pair{i:i64,x:f64}> {bindc_name = "pair", uniq_name = "_QFEpair"} + %2 = fircg.ext_declare %1 {uniq_name = "_QFEpair"} : (!fir.ref>) -> !fir.ref> loc(#loc9) + return + } loc(#loc10) +} +#loc1 = loc("derived1.f90":24:1) +#loc2 = loc("derived1.f90":35:25) +#loc3 = loc("derived1.f90":17:1) +#loc4 = loc("derived1.f90":46:1) +#loc5 = loc("derived1.f90":50:3) +#loc6 = loc("derived1.f90":62:3) +#loc7 = loc("derived1.f90":70:3) +#loc8 = loc("derived1.f90":85:3) +#loc9 = loc("derived1.f90":77:3) +#loc10 = loc("derived1.f90":75:3) + + +// CHECK-DAG: #[[INT_TY:.*]] = #llvm.di_basic_type +// CHECK-DAG: #[[INT8_TY:.*]] = #llvm.di_basic_type +// CHECK-DAG: #[[REAL4_TY:.*]] = #llvm.di_basic_type +// CHECK-DAG: #[[CMX8_TY:.*]] = #llvm.di_basic_type +// CHECK-DAG: #[[CMX_ARR:.*]] = #llvm.di_composite_type +// CHECK-DAG: #[[LOG_TY:.*]] = #llvm.di_basic_type +// CHECK-DAG: #[[REAL8_TY:.*]] = #llvm.di_basic_type +// CHECK-DAG: #[[STR_TY:.*]] = #llvm.di_string_type +// CHECK-DAG: #[[MOD:.*]] = #llvm.di_module<{{.*}}name = "m_employee"{{.*}}> +// CHECK-DAG: #[[MOD1:.*]] = #llvm.di_module<{{.*}}name = "t1"{{.*}}> +// CHECK-DAG: #[[ELMA1:.*]] = #llvm.di_derived_type +// CHECK-DAG: #[[ADDR:.*]] = #llvm.di_composite_type +// CHECK-DAG: #[[ELMD1:.*]] = #llvm.di_derived_type +// CHECK-DAG: #[[ELMD2:.*]] = #llvm.di_derived_type +// CHECK-DAG: #[[ELMD3:.*]] = #llvm.di_derived_type +// CHECK-DAG: #[[DATE:.*]] = #llvm.di_composite_type +// CHECK-DAG: #[[ELMP1:.*]] = #llvm.di_derived_type +// CHECK-DAG: #[[ELMP2:.*]] = #llvm.di_derived_type +// CHECK-DAG: #[[PERS:.*]] = #llvm.di_composite_type +// CHECK-DAG: #[[ELME1:.*]] = #llvm.di_derived_type +// CHECK-DAG: #[[ELME2:.*]] = #llvm.di_derived_type +// CHECK-DAG: #[[ELME3:.*]] = #llvm.di_derived_type +// CHECK-DAG: #[[EMP:.*]] = #llvm.di_composite_type + +// CHECK-DAG: #[[ELM1:.*]] = #llvm.di_derived_type +// CHECK-DAG: #[[ELM2:.*]] = #llvm.di_derived_type +// CHECK-DAG: #[[ELM3:.*]] = #llvm.di_derived_type +// CHECK-DAG: #[[ELM4:.*]] = #llvm.di_derived_type +// CHECK-DAG: #[[ELM5:.*]] = #llvm.di_derived_type +// CHECK-DAG: #llvm.di_composite_type + +// CHECK-DAG: #[[SP:.*]] = #llvm.di_subprogram +// CHECK-DAG: #[[ELML1:.*]] = #llvm.di_derived_type +// CHECK-DAG: #[[ELML2:.*]] = #llvm.di_derived_type +// CHECK-DAG: #llvm.di_composite_type diff --git a/libc/test/src/stdio/vfscanf_test.cpp b/libc/test/src/stdio/vfscanf_test.cpp index 7a9cbf7f12388..fa4e27582375f 100644 --- a/libc/test/src/stdio/vfscanf_test.cpp +++ b/libc/test/src/stdio/vfscanf_test.cpp @@ -43,8 +43,8 @@ static int call_vfscanf(::FILE *stream, const char *__restrict format, ...) { return ret; } -TEST(LlvmLibcFScanfTest, WriteToFile) { - const char *FILENAME = "fscanf_output.test"; +TEST(LlvmLibcVFScanfTest, WriteToFile) { + const char *FILENAME = "vfscanf_output.test"; auto FILE_PATH = libc_make_test_file_path(FILENAME); ::FILE *file = scanf_test::fopen(FILE_PATH, "w"); ASSERT_FALSE(file == nullptr); diff --git a/libcxx/cmake/caches/Generic-hardening-mode-fast-with-abi-breaks.cmake b/libcxx/cmake/caches/Generic-hardening-mode-fast-with-abi-breaks.cmake index 4a9389fdcb41c..c0f2bad1c95af 100644 --- a/libcxx/cmake/caches/Generic-hardening-mode-fast-with-abi-breaks.cmake +++ b/libcxx/cmake/caches/Generic-hardening-mode-fast-with-abi-breaks.cmake @@ -1,4 +1,2 @@ set(LIBCXX_HARDENING_MODE "fast" CACHE STRING "") -set(LIBCXX_ABI_DEFINES "_LIBCPP_ABI_BOUNDED_ITERATORS" CACHE STRING "") -set(LIBCXX_ABI_DEFINES "_LIBCPP_ABI_BOUNDED_ITERATORS_IN_STRING" CACHE STRING "") -set(LIBCXX_ABI_DEFINES "_LIBCPP_ABI_BOUNDED_ITERATORS_IN_VECTOR" CACHE STRING "") +set(LIBCXX_ABI_DEFINES "_LIBCPP_ABI_BOUNDED_ITERATORS;_LIBCPP_ABI_BOUNDED_ITERATORS_IN_STRING;_LIBCPP_ABI_BOUNDED_ITERATORS_IN_VECTOR" CACHE STRING "") diff --git a/libcxx/include/forward_list b/libcxx/include/forward_list index b14d2cb6c7803..b8e3d05588f96 100644 --- a/libcxx/include/forward_list +++ b/libcxx/include/forward_list @@ -620,8 +620,7 @@ inline void __forward_list_base<_Tp, _Alloc>::swap(__forward_list_base& __x) _NOEXCEPT_(!__node_traits::propagate_on_container_swap::value || __is_nothrow_swappable_v<__node_allocator>) #endif { - std::__swap_allocator( - __alloc(), __x.__alloc(), integral_constant()); + std::__swap_allocator(__alloc(), __x.__alloc()); using std::swap; swap(__before_begin()->__next_, __x.__before_begin()->__next_); } diff --git a/libcxx/include/string b/libcxx/include/string index 6e93a6230cc2c..05d42afb7c9c3 100644 --- a/libcxx/include/string +++ b/libcxx/include/string @@ -827,8 +827,8 @@ public: // Users might provide custom allocators, and prior to C++20 we have no existing way to detect whether the allocator's // pointer type is contiguous (though it has to be by the Standard). Using the wrapper type ensures the iterator is // considered contiguous. - typedef __bounded_iter<__wrap_iter> iterator; - typedef __bounded_iter<__wrap_iter> const_iterator; + typedef __bounded_iter<__wrap_iter > iterator; + typedef __bounded_iter<__wrap_iter > const_iterator; #else typedef __wrap_iter iterator; typedef __wrap_iter const_iterator; @@ -1213,7 +1213,7 @@ public: } _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 operator __self_view() const _NOEXCEPT { - return __self_view(data(), size()); + return __self_view(typename __self_view::__assume_valid(), data(), size()); } _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_STRING_INTERNAL_MEMORY_ACCESS basic_string& @@ -1822,7 +1822,7 @@ public: #if _LIBCPP_STD_VER >= 20 constexpr _LIBCPP_HIDE_FROM_ABI bool starts_with(__self_view __sv) const noexcept { - return __self_view(data(), size()).starts_with(__sv); + return __self_view(typename __self_view::__assume_valid(), data(), size()).starts_with(__sv); } constexpr _LIBCPP_HIDE_FROM_ABI bool starts_with(value_type __c) const noexcept { @@ -1834,7 +1834,7 @@ public: } constexpr _LIBCPP_HIDE_FROM_ABI bool ends_with(__self_view __sv) const noexcept { - return __self_view(data(), size()).ends_with(__sv); + return __self_view(typename __self_view::__assume_valid(), data(), size()).ends_with(__sv); } constexpr _LIBCPP_HIDE_FROM_ABI bool ends_with(value_type __c) const noexcept { @@ -1848,15 +1848,15 @@ public: #if _LIBCPP_STD_VER >= 23 constexpr _LIBCPP_HIDE_FROM_ABI bool contains(__self_view __sv) const noexcept { - return __self_view(data(), size()).contains(__sv); + return __self_view(typename __self_view::__assume_valid(), data(), size()).contains(__sv); } constexpr _LIBCPP_HIDE_FROM_ABI bool contains(value_type __c) const noexcept { - return __self_view(data(), size()).contains(__c); + return __self_view(typename __self_view::__assume_valid(), data(), size()).contains(__c); } constexpr _LIBCPP_HIDE_FROM_ABI bool contains(const value_type* __s) const { - return __self_view(data(), size()).contains(__s); + return __self_view(typename __self_view::__assume_valid(), data(), size()).contains(__s); } #endif diff --git a/libcxx/include/string_view b/libcxx/include/string_view index 2a03ee99e9ab5..cf97e3a9be314 100644 --- a/libcxx/include/string_view +++ b/libcxx/include/string_view @@ -211,6 +211,7 @@ namespace std { #include <__functional/hash.h> #include <__functional/unary_function.h> #include <__fwd/ostream.h> +#include <__fwd/string.h> #include <__fwd/string_view.h> #include <__iterator/bounded_iter.h> #include <__iterator/concepts.h> @@ -689,6 +690,9 @@ private: const value_type* __data_; size_type __size_; + + template + friend class basic_string; }; _LIBCPP_CTAD_SUPPORTED_FOR_TYPE(basic_string_view); diff --git a/libcxx/include/vector b/libcxx/include/vector index 3aa23d8fc1e24..81aab9407714c 100644 --- a/libcxx/include/vector +++ b/libcxx/include/vector @@ -404,8 +404,8 @@ public: // Users might provide custom allocators, and prior to C++20 we have no existing way to detect whether the allocator's // pointer type is contiguous (though it has to be by the Standard). Using the wrapper type ensures the iterator is // considered contiguous. - typedef __bounded_iter<__wrap_iter> iterator; - typedef __bounded_iter<__wrap_iter> const_iterator; + typedef __bounded_iter<__wrap_iter > iterator; + typedef __bounded_iter<__wrap_iter > const_iterator; #else typedef __wrap_iter iterator; typedef __wrap_iter const_iterator; @@ -1821,8 +1821,7 @@ _LIBCPP_CONSTEXPR_SINCE_CXX20 void vector<_Tp, _Allocator>::swap(vector& __x) std::swap(this->__begin_, __x.__begin_); std::swap(this->__end_, __x.__end_); std::swap(this->__end_cap(), __x.__end_cap()); - std::__swap_allocator( - this->__alloc(), __x.__alloc(), integral_constant()); + std::__swap_allocator(this->__alloc(), __x.__alloc()); } template @@ -2820,8 +2819,7 @@ _LIBCPP_CONSTEXPR_SINCE_CXX20 void vector::swap(vector& __x) std::swap(this->__begin_, __x.__begin_); std::swap(this->__size_, __x.__size_); std::swap(this->__cap(), __x.__cap()); - std::__swap_allocator( - this->__alloc(), __x.__alloc(), integral_constant()); + std::__swap_allocator(this->__alloc(), __x.__alloc()); } template diff --git a/libcxx/test/std/atomics/atomics.lockfree/is_always_lock_free.cpp b/libcxx/test/std/atomics/atomics.lockfree/is_always_lock_free.cpp index 2dc7f5c765419..db17221e515d3 100644 --- a/libcxx/test/std/atomics/atomics.lockfree/is_always_lock_free.cpp +++ b/libcxx/test/std/atomics/atomics.lockfree/is_always_lock_free.cpp @@ -17,6 +17,7 @@ #include #include +#include #include #include "test_macros.h" diff --git a/libcxx/test/support/fp_compare.h b/libcxx/test/support/fp_compare.h index 3088a211dadc3..237ee50d1f66b 100644 --- a/libcxx/test/support/fp_compare.h +++ b/libcxx/test/support/fp_compare.h @@ -9,16 +9,17 @@ #ifndef SUPPORT_FP_COMPARE_H #define SUPPORT_FP_COMPARE_H -#include // for std::abs #include // for std::max #include -#include <__config> +#include // for std::abs + +#include "test_macros.h" // See https://www.boost.org/doc/libs/1_70_0/libs/test/doc/html/boost_test/testing_tools/extended_comparison/floating_point/floating_points_comparison_theory.html template bool fptest_close(T val, T expected, T eps) { - _LIBCPP_CONSTEXPR T zero = T(0); + TEST_CONSTEXPR T zero = T(0); assert(eps >= zero); // Handle the zero cases diff --git a/libcxx/utils/ci/run-buildbot b/libcxx/utils/ci/run-buildbot index 6353c87c3d865..1c7085d90f0af 100755 --- a/libcxx/utils/ci/run-buildbot +++ b/libcxx/utils/ci/run-buildbot @@ -417,7 +417,7 @@ generic-hardening-mode-fast-with-abi-breaks) clean generate-cmake -C "${MONOREPO_ROOT}/libcxx/cmake/caches/Generic-hardening-mode-fast-with-abi-breaks.cmake" check-runtimes - check-abi-list + # Not checking ABI list since we purposefully enable ABI breaking changes ;; generic-hardening-mode-extensive) clean diff --git a/lld/COFF/InputFiles.cpp b/lld/COFF/InputFiles.cpp index f9a22b0a7a5f0..3ecd4d241f2cd 100644 --- a/lld/COFF/InputFiles.cpp +++ b/lld/COFF/InputFiles.cpp @@ -61,7 +61,7 @@ static StringRef getBasename(StringRef path) { std::string lld::toString(const coff::InputFile *file) { if (!file) return ""; - if (file->parentName.empty() || file->kind() == coff::InputFile::ImportKind) + if (file->parentName.empty()) return std::string(file->getName()); return (getBasename(file->parentName) + "(" + getBasename(file->getName()) + diff --git a/lld/COFF/SymbolTable.cpp b/lld/COFF/SymbolTable.cpp index 1dfff0a90f4ae..a5f155bc05bc9 100644 --- a/lld/COFF/SymbolTable.cpp +++ b/lld/COFF/SymbolTable.cpp @@ -494,20 +494,8 @@ void SymbolTable::resolveRemainingUndefines() { StringRef name = undef->getName(); // A weak alias may have been resolved, so check for that. - if (Defined *d = undef->getWeakAlias()) { - // We want to replace Sym with D. However, we can't just blindly - // copy sizeof(SymbolUnion) bytes from D to Sym because D may be an - // internal symbol, and internal symbols are stored as "unparented" - // Symbols. For that reason we need to check which type of symbol we - // are dealing with and copy the correct number of bytes. - if (isa(d)) - memcpy(sym, d, sizeof(DefinedRegular)); - else if (isa(d)) - memcpy(sym, d, sizeof(DefinedAbsolute)); - else - memcpy(sym, d, sizeof(SymbolUnion)); + if (undef->resolveWeakAlias()) continue; - } // If we can resolve a symbol by removing __imp_ prefix, do that. // This odd rule is for compatibility with MSVC linker. diff --git a/lld/COFF/Symbols.cpp b/lld/COFF/Symbols.cpp index ff8ad1e619116..b098abb80d6f1 100644 --- a/lld/COFF/Symbols.cpp +++ b/lld/COFF/Symbols.cpp @@ -136,6 +136,29 @@ Defined *Undefined::getWeakAlias() { return nullptr; } +bool Undefined::resolveWeakAlias() { + Defined *d = getWeakAlias(); + if (!d) + return false; + + // We want to replace Sym with D. However, we can't just blindly + // copy sizeof(SymbolUnion) bytes from D to Sym because D may be an + // internal symbol, and internal symbols are stored as "unparented" + // Symbols. For that reason we need to check which type of symbol we + // are dealing with and copy the correct number of bytes. + StringRef name = getName(); + if (isa(d)) + memcpy(this, d, sizeof(DefinedRegular)); + else if (isa(d)) + memcpy(this, d, sizeof(DefinedAbsolute)); + else + memcpy(this, d, sizeof(SymbolUnion)); + + nameData = name.data(); + nameSize = name.size(); + return true; +} + MemoryBufferRef LazyArchive::getMemberBuffer() { Archive::Child c = CHECK(sym.getMember(), "could not get the member for symbol " + diff --git a/lld/COFF/Symbols.h b/lld/COFF/Symbols.h index 56b137d56873a..c427a062dc82b 100644 --- a/lld/COFF/Symbols.h +++ b/lld/COFF/Symbols.h @@ -341,6 +341,9 @@ class Undefined : public Symbol { // symbol by searching the chain of fallback symbols. Returns the symbol if // successful, otherwise returns null. Defined *getWeakAlias(); + + // If this symbol is external weak, replace this object with aliased symbol. + bool resolveWeakAlias(); }; // Windows-specific classes. diff --git a/lld/COFF/Writer.cpp b/lld/COFF/Writer.cpp index 0360e186ecf0c..35e0f98926ee8 100644 --- a/lld/COFF/Writer.cpp +++ b/lld/COFF/Writer.cpp @@ -2062,6 +2062,24 @@ void Writer::createECChunks() { if (auto thunk = dyn_cast(sym->getChunk())) { hexpthkSec->addChunk(thunk); exportThunks.push_back({thunk, thunk->target}); + } else if (auto def = dyn_cast(sym)) { + // Allow section chunk to be treated as an export thunk if it looks like + // one. + SectionChunk *chunk = def->getChunk(); + if (!chunk->live || chunk->getMachine() != AMD64) + continue; + assert(sym->getName().starts_with("EXP+")); + StringRef targetName = sym->getName().substr(strlen("EXP+")); + // If EXP+#foo is an export thunk of a hybrid patchable function, + // we should use the #foo$hp_target symbol as the redirection target. + // First, try to look up the $hp_target symbol. If it can't be found, + // assume it's a regular function and look for #foo instead. + Symbol *targetSym = ctx.symtab.find((targetName + "$hp_target").str()); + if (!targetSym) + targetSym = ctx.symtab.find(targetName); + Defined *t = dyn_cast_or_null(targetSym); + if (t && isArm64EC(t->getChunk()->getMachine())) + exportThunks.push_back({chunk, t}); } } diff --git a/lld/test/COFF/arm64ec-cust-export-thunk.s b/lld/test/COFF/arm64ec-cust-export-thunk.s new file mode 100644 index 0000000000000..e56f34c875217 --- /dev/null +++ b/lld/test/COFF/arm64ec-cust-export-thunk.s @@ -0,0 +1,82 @@ +# REQUIRES: aarch64, x86 +# RUN: split-file %s %t.dir && cd %t.dir + +# Test that metadata is generated when a custom export thunk is supplied. + +# RUN: llvm-mc -filetype=obj -triple=arm64ec-windows func.s -o func.obj +# RUN: llvm-mc -filetype=obj -triple=arm64ec-windows hp-func.s -o hp-func.obj +# RUN: llvm-mc -filetype=obj -triple=x86_64-windows thunk.s -o thunk.obj +# RUN: llvm-mc -filetype=obj -triple=arm64ec-windows %S/Inputs/loadconfig-arm64ec.s -o loadconfig-arm64ec.obj + +# RUN: lld-link -out:out.dll -machine:arm64ec func.obj thunk.obj loadconfig-arm64ec.obj -dll -noentry "-export:#func,EXPORTAS,func" + +# RUN: llvm-objdump -d out.dll | FileCheck --check-prefixes=DISASM,DISASM-EXP %s +# DISASM: Disassembly of section .text: +# DISASM-EMPTY: +# DISASM-NEXT: 0000000180001000 <.text>: +# DISASM-NEXT: 180001000: 52800040 mov w0, #0x2 // =2 +# DISASM-NEXT: 180001004: d65f03c0 ret +# DISASM-NEXT: ... +# DISASM-EXP-EMPTY: +# DISASM-EXP-NEXT: 0000000180002000 : +# DISASM-NEXT: 180002000: b8 03 00 00 00 movl $0x3, %eax +# DISASM-NEXT: 180002005: c3 retq + +# RUN: llvm-objdump -p out.dll | FileCheck --check-prefix=EXPORT %s +# EXPORT: Ordinal RVA Name +# EXPORT-NEXT: 1 0x2000 func + +# RUN: llvm-readobj --coff-load-config out.dll | FileCheck --check-prefix=CHPE %s +# CHPE: CodeMap [ +# CHPE-NEXT: 0x1000 - 0x1008 ARM64EC +# CHPE-NEXT: 0x2000 - 0x2006 X64 +# CHPE-NEXT: ] +# CHPE-NEXT: CodeRangesToEntryPoints [ +# CHPE-NEXT: 0x2000 - 0x2006 -> 0x2000 +# CHPE-NEXT: ] +# CHPE-NEXT: RedirectionMetadata [ +# CHPE-NEXT: 0x2000 -> 0x1000 +# CHPE-NEXT: ] + +# RUN: lld-link -out:out2.dll -machine:arm64ec hp-func.obj thunk.obj loadconfig-arm64ec.obj -dll -noentry +# RUN: llvm-objdump -d out2.dll | FileCheck --check-prefix=DISASM %s +# RUN: llvm-readobj --coff-load-config out2.dll | FileCheck --check-prefix=CHPE %s + +#--- func.s + .globl "#func" + .p2align 2, 0x0 +"#func": + mov w0, #2 + ret + +#--- hp-func.s + .section .text,"xr",discard,"#func$hp_target" + .globl "#func$hp_target" + .p2align 2, 0x0 +"#func$hp_target": + mov w0, #2 + ret + + .def "EXP+#func" + .scl 2 + .type 32 + .endef + .weak func +.set func, "EXP+#func" + .weak "#func" +.set "#func", "#func$hp_target" + + .data + .rva func + +#--- thunk.s + .def "EXP+#func" + .scl 2 + .type 32 + .endef + .section .wowthk$aa,"xr",discard,"EXP+#func" + .globl "EXP+#func" + .p2align 2, 0x0 +"EXP+#func": + movl $3, %eax + retq diff --git a/lld/test/COFF/delayimports-error.test b/lld/test/COFF/delayimports-error.test index cced9fb65e614..5f45083cf5e61 100644 --- a/lld/test/COFF/delayimports-error.test +++ b/lld/test/COFF/delayimports-error.test @@ -8,7 +8,7 @@ # RUN: /alternatename:__delayLoadHelper2=main /opt:noref >& %t.log # RUN: FileCheck %s < %t.log -# CHECK: cannot delay-load foo.dll due to import of data: __declspec(dllimport) datasym +# CHECK: cannot delay-load foo.lib(foo.dll) due to import of data: __declspec(dllimport) datasym --- !COFF header: diff --git a/lld/test/COFF/duplicate.test b/lld/test/COFF/duplicate.test index 76c88b070ff37..11e5aa06318fe 100644 --- a/lld/test/COFF/duplicate.test +++ b/lld/test/COFF/duplicate.test @@ -13,5 +13,5 @@ RUN: not lld-link /out:gamma.exe /subsystem:console /entry:mainCRTStartup gamma. CHECK-GAMMA: error: duplicate symbol: __declspec(dllimport) f CHECK-GAMMA: defined at {{.*}}gamma.obj -CHECK-GAMMA: defined at alpha.dll +CHECK-GAMMA: defined at alpha.lib(alpha.dll) diff --git a/lld/test/COFF/implib-machine.s b/lld/test/COFF/implib-machine.s index 32deff0fc25f8..92f01bbc72799 100644 --- a/lld/test/COFF/implib-machine.s +++ b/lld/test/COFF/implib-machine.s @@ -6,10 +6,10 @@ # RUN: llvm-mc -triple x86_64-windows-msvc %t.dir/test.s -filetype=obj -o %t.dir/test64.obj # RUN: not lld-link -dll -noentry -out:%t32.dll %t.dir/test32.obj %t.dir/test64.lib 2>&1 | FileCheck --check-prefix=ERR32 %s -# ERR32: error: test.dll: machine type x64 conflicts with x86 +# ERR32: error: test64.lib(test.dll): machine type x64 conflicts with x86 # RUN: not lld-link -dll -noentry -out:%t64.dll %t.dir/test64.obj %t.dir/test32.lib 2>&1 | FileCheck --check-prefix=ERR64 %s -# ERR64: error: test.dll: machine type x86 conflicts with x64 +# ERR64: error: test32.lib(test.dll): machine type x86 conflicts with x64 #--- test.s .def @feat.00; diff --git a/lld/test/COFF/symtab.test b/lld/test/COFF/symtab.test index 45e8ed39737a4..6ef2b4d47503c 100644 --- a/lld/test/COFF/symtab.test +++ b/lld/test/COFF/symtab.test @@ -86,6 +86,15 @@ # CHECK-NEXT: StorageClass: External (0x2) # CHECK-NEXT: AuxSymbolCount: 0 # CHECK-NEXT: } +# CHECK-NEXT: Symbol { +# CHECK-NEXT: Name: weak_main +# CHECK-NEXT: Value: 0 +# CHECK-NEXT: Section: .text (1) +# CHECK-NEXT: BaseType: Null (0x0) +# CHECK-NEXT: ComplexType: Null (0x0) +# CHECK-NEXT: StorageClass: External (0x2) +# CHECK-NEXT: AuxSymbolCount: 0 +# CHECK-NEXT: } # CHECK-NEXT: ] # NO: Symbols [ @@ -237,4 +246,13 @@ symbols: SimpleType: IMAGE_SYM_TYPE_NULL ComplexType: IMAGE_SYM_DTYPE_NULL StorageClass: IMAGE_SYM_CLASS_LABEL + - Name: weak_main + Value: 0 + SectionNumber: 0 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_WEAK_EXTERNAL + WeakExternal: + TagIndex: 10 + Characteristics: IMAGE_WEAK_EXTERN_SEARCH_ALIAS ... diff --git a/lldb/include/lldb/Host/Socket.h b/lldb/include/lldb/Host/Socket.h index 573c881f727d8..304a91bdf6741 100644 --- a/lldb/include/lldb/Host/Socket.h +++ b/lldb/include/lldb/Host/Socket.h @@ -19,6 +19,7 @@ #include "lldb/Utility/Status.h" #ifdef _WIN32 +#include "lldb/Host/Pipe.h" #include "lldb/Host/windows/windows.h" #include #include @@ -32,12 +33,35 @@ namespace lldb_private { #if defined(_WIN32) typedef SOCKET NativeSocket; +typedef lldb::pipe_t shared_fd_t; #else typedef int NativeSocket; +typedef NativeSocket shared_fd_t; #endif +class Socket; class TCPSocket; class UDPSocket; +class SharedSocket { +public: + static const shared_fd_t kInvalidFD; + + SharedSocket(const Socket *socket, Status &error); + + shared_fd_t GetSendableFD() { return m_fd; } + + Status CompleteSending(lldb::pid_t child_pid); + + static Status GetNativeSocket(shared_fd_t fd, NativeSocket &socket); + +private: +#ifdef _WIN32 + Pipe m_socket_pipe; + NativeSocket m_socket; +#endif + shared_fd_t m_fd; +}; + class Socket : public IOObject { public: enum SocketProtocol { diff --git a/lldb/include/lldb/Utility/Status.h b/lldb/include/lldb/Utility/Status.h index fa5768141fa45..a80ebe89e562d 100644 --- a/lldb/include/lldb/Utility/Status.h +++ b/lldb/include/lldb/Utility/Status.h @@ -181,11 +181,12 @@ class Status { bool Success() const; protected: - /// Member variables - ValueType m_code = 0; ///< Status code as an integer value. - lldb::ErrorType m_type = - lldb::eErrorTypeInvalid; ///< The type of the above error code. - mutable std::string m_string; ///< A string representation of the error code. + /// Status code as an integer value. + ValueType m_code = 0; + /// The type of the above error code. + lldb::ErrorType m_type = lldb::eErrorTypeInvalid; + /// A string representation of the error code. + mutable std::string m_string; private: explicit Status(const llvm::formatv_object_base &payload) { SetErrorToGenericError(); diff --git a/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py b/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py index 874383a13e2bb..b095171d8fd1a 100644 --- a/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py +++ b/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py @@ -1099,6 +1099,20 @@ def terminate(self): self.send.close() # self.recv.close() + def request_setInstructionBreakpoints(self, memory_reference=[]): + breakpoints = [] + for i in memory_reference: + args_dict = { + "instructionReference": i, + } + breakpoints.append(args_dict) + args_dict = {"breakpoints": breakpoints} + command_dict = { + "command": "setInstructionBreakpoints", + "type": "request", + "arguments": args_dict, + } + return self.send_recv(command_dict) class DebugAdaptorServer(DebugCommunication): def __init__( diff --git a/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/lldbdap_testcase.py b/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/lldbdap_testcase.py index 86eba355da83d..709b7aff11d7f 100644 --- a/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/lldbdap_testcase.py +++ b/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/lldbdap_testcase.py @@ -81,7 +81,10 @@ def verify_breakpoint_hit(self, breakpoint_ids): body = stopped_event["body"] if "reason" not in body: continue - if body["reason"] != "breakpoint": + if ( + body["reason"] != "breakpoint" + and body["reason"] != "instruction breakpoint" + ): continue if "description" not in body: continue diff --git a/lldb/source/Host/common/Socket.cpp b/lldb/source/Host/common/Socket.cpp index 7364a12280cfd..aabd562b0557c 100644 --- a/lldb/source/Host/common/Socket.cpp +++ b/lldb/source/Host/common/Socket.cpp @@ -56,10 +56,12 @@ using namespace lldb_private; typedef const char *set_socket_option_arg_type; typedef char *get_socket_option_arg_type; const NativeSocket Socket::kInvalidSocketValue = INVALID_SOCKET; +const shared_fd_t SharedSocket::kInvalidFD = LLDB_INVALID_PIPE; #else // #if defined(_WIN32) typedef const void *set_socket_option_arg_type; typedef void *get_socket_option_arg_type; const NativeSocket Socket::kInvalidSocketValue = -1; +const shared_fd_t SharedSocket::kInvalidFD = Socket::kInvalidSocketValue; #endif // #if defined(_WIN32) static bool IsInterrupted() { @@ -70,6 +72,80 @@ static bool IsInterrupted() { #endif } +SharedSocket::SharedSocket(const Socket *socket, Status &error) { +#ifdef _WIN32 + m_socket = socket->GetNativeSocket(); + m_fd = kInvalidFD; + + // Create a pipe to transfer WSAPROTOCOL_INFO to the child process. + error = m_socket_pipe.CreateNew(true); + if (error.Fail()) + return; + + m_fd = m_socket_pipe.GetReadPipe(); +#else + m_fd = socket->GetNativeSocket(); + error = Status(); +#endif +} + +Status SharedSocket::CompleteSending(lldb::pid_t child_pid) { +#ifdef _WIN32 + // Transfer WSAPROTOCOL_INFO to the child process. + m_socket_pipe.CloseReadFileDescriptor(); + + WSAPROTOCOL_INFO protocol_info; + if (::WSADuplicateSocket(m_socket, child_pid, &protocol_info) == + SOCKET_ERROR) { + int last_error = ::WSAGetLastError(); + return Status("WSADuplicateSocket() failed, error: %d", last_error); + } + + size_t num_bytes; + Status error = + m_socket_pipe.WriteWithTimeout(&protocol_info, sizeof(protocol_info), + std::chrono::seconds(10), num_bytes); + if (error.Fail()) + return error; + if (num_bytes != sizeof(protocol_info)) + return Status("WriteWithTimeout(WSAPROTOCOL_INFO) failed: %d bytes", + num_bytes); +#endif + return Status(); +} + +Status SharedSocket::GetNativeSocket(shared_fd_t fd, NativeSocket &socket) { +#ifdef _WIN32 + socket = Socket::kInvalidSocketValue; + // Read WSAPROTOCOL_INFO from the parent process and create NativeSocket. + WSAPROTOCOL_INFO protocol_info; + { + Pipe socket_pipe(fd, LLDB_INVALID_PIPE); + size_t num_bytes; + Status error = + socket_pipe.ReadWithTimeout(&protocol_info, sizeof(protocol_info), + std::chrono::seconds(10), num_bytes); + if (error.Fail()) + return error; + if (num_bytes != sizeof(protocol_info)) { + return Status( + "socket_pipe.ReadWithTimeout(WSAPROTOCOL_INFO) failed: % d bytes", + num_bytes); + } + } + socket = ::WSASocket(FROM_PROTOCOL_INFO, FROM_PROTOCOL_INFO, + FROM_PROTOCOL_INFO, &protocol_info, 0, 0); + if (socket == INVALID_SOCKET) { + return Status("WSASocket(FROM_PROTOCOL_INFO) failed: error %d", + ::WSAGetLastError()); + } + return Status(); +#else + socket = fd; + return Status(); +#endif +} + struct SocketScheme { const char *m_scheme; const Socket::SocketProtocol m_protocol; diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangHost.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ClangHost.cpp index 6064c02c7fd67..6de851081598f 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/ClangHost.cpp +++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangHost.cpp @@ -53,7 +53,7 @@ static bool DefaultComputeClangResourceDirectory(FileSpec &lldb_shlib_spec, std::string raw_path = lldb_shlib_spec.GetPath(); llvm::StringRef parent_dir = llvm::sys::path::parent_path(raw_path); static const std::string clang_resource_path = - clang::driver::Driver::GetResourcesPath("bin/lldb", CLANG_RESOURCE_DIR); + clang::driver::Driver::GetResourcesPath("bin/lldb"); static const llvm::StringRef kResourceDirSuffixes[] = { // LLVM.org's build of LLDB uses the clang resource directory placed diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp index 7e0cf36d0de1b..d887d81912b27 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp @@ -2737,10 +2737,19 @@ void SymbolFileDWARF::FindTypes(const TypeQuery &query, TypeResults &results) { if (results.AlreadySearched(this)) return; + auto type_basename = query.GetTypeBasename(); + + Log *log = GetLog(DWARFLog::Lookups); + if (log) { + GetObjectFile()->GetModule()->LogMessage( + log, "SymbolFileDWARF::FindTypes(type_basename=\"{0}\")", + type_basename); + } + std::lock_guard guard(GetModuleMutex()); bool have_index_match = false; - m_index->GetTypes(query.GetTypeBasename(), [&](DWARFDIE die) { + m_index->GetTypes(type_basename, [&](DWARFDIE die) { // Check the language, but only if we have a language filter. if (query.HasLanguage()) { if (!query.LanguageMatches(GetLanguageFamily(*die.GetCU()))) @@ -2779,8 +2788,14 @@ void SymbolFileDWARF::FindTypes(const TypeQuery &query, TypeResults &results) { return !results.Done(query); // Keep iterating if we aren't done. }); - if (results.Done(query)) + if (results.Done(query)) { + if (log) { + GetObjectFile()->GetModule()->LogMessage( + log, "SymbolFileDWARF::FindTypes(type_basename=\"{0}\") => {1}", + type_basename, results.GetTypeMap().GetSize()); + } return; + } // With -gsimple-template-names, a templated type's DW_AT_name will not // contain the template parameters. Try again stripping '<' and anything @@ -2795,10 +2810,10 @@ void SymbolFileDWARF::FindTypes(const TypeQuery &query, TypeResults &results) { // it trims any context items down by removing template parameter names. TypeQuery query_simple(query); if (UpdateCompilerContextForSimpleTemplateNames(query_simple)) { - + auto type_basename_simple = query_simple.GetTypeBasename(); // Copy our match's context and update the basename we are looking for // so we can use this only to compare the context correctly. - m_index->GetTypes(query_simple.GetTypeBasename(), [&](DWARFDIE die) { + m_index->GetTypes(type_basename_simple, [&](DWARFDIE die) { // Check the language, but only if we have a language filter. if (query.HasLanguage()) { if (!query.LanguageMatches(GetLanguageFamily(*die.GetCU()))) @@ -2834,8 +2849,17 @@ void SymbolFileDWARF::FindTypes(const TypeQuery &query, TypeResults &results) { } return !results.Done(query); // Keep iterating if we aren't done. }); - if (results.Done(query)) + if (results.Done(query)) { + if (log) { + GetObjectFile()->GetModule()->LogMessage( + log, + "SymbolFileDWARF::FindTypes(type_basename=\"{0}\") => {1} " + "(simplified as \"{2}\")", + type_basename, results.GetTypeMap().GetSize(), + type_basename_simple); + } return; + } } } @@ -2847,8 +2871,11 @@ void SymbolFileDWARF::FindTypes(const TypeQuery &query, TypeResults &results) { for (const auto &pair : m_external_type_modules) { if (ModuleSP external_module_sp = pair.second) { external_module_sp->FindTypes(query, results); - if (results.Done(query)) + if (results.Done(query)) { + // We don't log the results here as they are already logged in the + // nested FindTypes call return; + } } } } diff --git a/lldb/test/API/tools/lldb-dap/instruction-breakpoint/Makefile b/lldb/test/API/tools/lldb-dap/instruction-breakpoint/Makefile new file mode 100644 index 0000000000000..697527c4e5522 --- /dev/null +++ b/lldb/test/API/tools/lldb-dap/instruction-breakpoint/Makefile @@ -0,0 +1,6 @@ +CXX_SOURCES := main-copy.cpp +CXXFLAGS_EXTRAS := -O0 -g +include Makefile.rules + +main-copy.cpp: main.cpp + cp -f $< $@ diff --git a/lldb/test/API/tools/lldb-dap/instruction-breakpoint/TestDAP_instruction_breakpoint.py b/lldb/test/API/tools/lldb-dap/instruction-breakpoint/TestDAP_instruction_breakpoint.py new file mode 100644 index 0000000000000..3862be7bfa34c --- /dev/null +++ b/lldb/test/API/tools/lldb-dap/instruction-breakpoint/TestDAP_instruction_breakpoint.py @@ -0,0 +1,98 @@ +import dap_server +import shutil +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil +import lldbdap_testcase +import os +import lldb + + +class TestDAP_InstructionBreakpointTestCase(lldbdap_testcase.DAPTestCaseBase): + NO_DEBUG_INFO_TESTCASE = True + + def setUp(self): + lldbdap_testcase.DAPTestCaseBase.setUp(self) + + self.main_basename = "main-copy.cpp" + self.main_path = os.path.realpath(self.getBuildArtifact(self.main_basename)) + + @skipIfWindows + def test_instruction_breakpoint(self): + self.build() + self.instruction_breakpoint_test() + + def instruction_breakpoint_test(self): + """Sample test to ensure SBFrame::Disassemble produces SOME output""" + # Create a target by the debugger. + target = self.createTestTarget() + + main_line = line_number("main.cpp", "breakpoint 1") + + program = self.getBuildArtifact("a.out") + self.build_and_launch(program) + + # Set source breakpoint 1 + response = self.dap_server.request_setBreakpoints(self.main_path, [main_line]) + breakpoints = response["body"]["breakpoints"] + self.assertEquals(len(breakpoints), 1) + breakpoint = breakpoints[0] + self.assertEqual( + breakpoint["line"], main_line, "incorrect breakpoint source line" + ) + self.assertTrue(breakpoint["verified"], "breakpoint is not verified") + self.assertEqual( + self.main_basename, breakpoint["source"]["name"], "incorrect source name" + ) + self.assertEqual( + self.main_path, breakpoint["source"]["path"], "incorrect source file path" + ) + other_breakpoint_id = breakpoint["id"] + + # Continue and then verifiy the breakpoint + self.dap_server.request_continue() + self.verify_breakpoint_hit([other_breakpoint_id]) + + # now we check the stack trace making sure that we got mapped source paths + frames = self.dap_server.request_stackTrace()["body"]["stackFrames"] + intstructionPointerReference = [] + setIntstructionBreakpoints = [] + intstructionPointerReference.append(frames[0]["instructionPointerReference"]) + self.assertEqual( + frames[0]["source"]["name"], self.main_basename, "incorrect source name" + ) + self.assertEqual( + frames[0]["source"]["path"], self.main_path, "incorrect source file path" + ) + + # Check disassembly view + instruction = self.disassemble(frameIndex=0) + self.assertEqual( + instruction["address"], + intstructionPointerReference[0], + "current breakpoint reference is not in the disaasembly view", + ) + + # Get next instruction address to set instruction breakpoint + disassembled_instruction_list = self.dap_server.disassembled_instructions + instruction_addr_list = list(disassembled_instruction_list.keys()) + index = instruction_addr_list.index(intstructionPointerReference[0]) + if len(instruction_addr_list) >= (index + 1): + next_inst_addr = instruction_addr_list[index + 1] + if len(next_inst_addr) > 2: + setIntstructionBreakpoints.append(next_inst_addr) + instruction_breakpoint_response = ( + self.dap_server.request_setInstructionBreakpoints( + setIntstructionBreakpoints + ) + ) + inst_breakpoints = instruction_breakpoint_response["body"][ + "breakpoints" + ] + self.assertEqual( + inst_breakpoints[0]["instructionReference"], + next_inst_addr, + "Instruction breakpoint has not been resolved or failed to relocate the instruction breakpoint", + ) + self.dap_server.request_continue() + self.verify_breakpoint_hit([inst_breakpoints[0]["id"]]) diff --git a/lldb/test/API/tools/lldb-dap/instruction-breakpoint/main.cpp b/lldb/test/API/tools/lldb-dap/instruction-breakpoint/main.cpp new file mode 100644 index 0000000000000..3c710d6417157 --- /dev/null +++ b/lldb/test/API/tools/lldb-dap/instruction-breakpoint/main.cpp @@ -0,0 +1,18 @@ +#include +#include + +int function(int x) { + + if (x == 0) // breakpoint 1 + return x; + + if ((x % 2) != 0) + return x; + else + return function(x - 1) + x; +} + +int main(int argc, char const *argv[]) { + int n = function(2); + return n; +} \ No newline at end of file diff --git a/lldb/tools/lldb-dap/CMakeLists.txt b/lldb/tools/lldb-dap/CMakeLists.txt index f8f0d86453f58..d68098bf7b326 100644 --- a/lldb/tools/lldb-dap/CMakeLists.txt +++ b/lldb/tools/lldb-dap/CMakeLists.txt @@ -38,6 +38,7 @@ add_lldb_tool(lldb-dap SourceBreakpoint.cpp DAP.cpp Watchpoint.cpp + InstructionBreakpoint.cpp LINK_LIBS liblldb diff --git a/lldb/tools/lldb-dap/DAP.cpp b/lldb/tools/lldb-dap/DAP.cpp index 57b93c28ce930..6012ee52110b7 100644 --- a/lldb/tools/lldb-dap/DAP.cpp +++ b/lldb/tools/lldb-dap/DAP.cpp @@ -68,7 +68,7 @@ static std::string capitalize(llvm::StringRef str) { void DAP::PopulateExceptionBreakpoints() { llvm::call_once(init_exception_breakpoints_flag, [this]() { - exception_breakpoints = std::vector {}; + exception_breakpoints = std::vector{}; if (lldb::SBDebugger::SupportsLanguage(lldb::eLanguageTypeC_plus_plus)) { exception_breakpoints->emplace_back("cpp_catch", "C++ Catch", @@ -996,4 +996,32 @@ void DAP::SetThreadFormat(llvm::StringRef format) { } } +InstructionBreakpoint * +DAP::GetInstructionBreakpoint(const lldb::break_id_t bp_id) { + for (auto &bp : instruction_breakpoints) { + if (bp.second.id == bp_id) + return &bp.second; + } + return nullptr; +} + +InstructionBreakpoint * +DAP::GetInstructionBPFromStopReason(lldb::SBThread &thread) { + const auto num = thread.GetStopReasonDataCount(); + InstructionBreakpoint *inst_bp = nullptr; + for (size_t i = 0; i < num; i += 2) { + // thread.GetStopReasonDataAtIndex(i) will return the bp ID and + // thread.GetStopReasonDataAtIndex(i+1) will return the location + // within that breakpoint. We only care about the bp ID so we can + // see if this is an instruction breakpoint that is getting hit. + lldb::break_id_t bp_id = thread.GetStopReasonDataAtIndex(i); + inst_bp = GetInstructionBreakpoint(bp_id); + // If any breakpoint is not an instruction breakpoint, then stop and + // report this as a normal breakpoint + if (inst_bp == nullptr) + return nullptr; + } + return inst_bp; +} + } // namespace lldb_dap diff --git a/lldb/tools/lldb-dap/DAP.h b/lldb/tools/lldb-dap/DAP.h index 0fc77ac1e8168..f4fdec6e895ad 100644 --- a/lldb/tools/lldb-dap/DAP.h +++ b/lldb/tools/lldb-dap/DAP.h @@ -54,6 +54,7 @@ #include "ExceptionBreakpoint.h" #include "FunctionBreakpoint.h" #include "IOStream.h" +#include "InstructionBreakpoint.h" #include "ProgressEvent.h" #include "RunInTerminal.h" #include "SourceBreakpoint.h" @@ -68,6 +69,8 @@ namespace lldb_dap { typedef llvm::DenseMap SourceBreakpointMap; typedef llvm::StringMap FunctionBreakpointMap; +typedef llvm::DenseMap + InstructionBreakpointMap; enum class OutputType { Console, Stdout, Stderr, Telemetry }; @@ -160,6 +163,7 @@ struct DAP { std::unique_ptr log; llvm::StringMap source_breakpoints; FunctionBreakpointMap function_breakpoints; + InstructionBreakpointMap instruction_breakpoints; std::optional> exception_breakpoints; llvm::once_flag init_exception_breakpoints_flag; std::vector pre_init_commands; @@ -334,6 +338,10 @@ struct DAP { void SetThreadFormat(llvm::StringRef format); + InstructionBreakpoint *GetInstructionBreakpoint(const lldb::break_id_t bp_id); + + InstructionBreakpoint *GetInstructionBPFromStopReason(lldb::SBThread &thread); + private: // Send the JSON in "json_str" to the "out" stream. Correctly send the // "Content-Length:" field followed by the length, followed by the raw diff --git a/lldb/tools/lldb-dap/DAPForward.h b/lldb/tools/lldb-dap/DAPForward.h index 8c79488fae8db..159d999a63c82 100644 --- a/lldb/tools/lldb-dap/DAPForward.h +++ b/lldb/tools/lldb-dap/DAPForward.h @@ -15,6 +15,7 @@ struct ExceptionBreakpoint; struct FunctionBreakpoint; struct SourceBreakpoint; struct Watchpoint; +struct InstructionBreakpoint; } // namespace lldb_dap namespace lldb { diff --git a/lldb/tools/lldb-dap/InstructionBreakpoint.cpp b/lldb/tools/lldb-dap/InstructionBreakpoint.cpp new file mode 100644 index 0000000000000..de4f6f5d86717 --- /dev/null +++ b/lldb/tools/lldb-dap/InstructionBreakpoint.cpp @@ -0,0 +1,28 @@ +//===-- InstructionBreakpoint.cpp ------------------------------------*- C++ +//-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "InstructionBreakpoint.h" +#include "DAP.h" + +namespace lldb_dap { + +// Instruction Breakpoint +InstructionBreakpoint::InstructionBreakpoint(const llvm::json::Object &obj) + : Breakpoint(obj), instructionAddressReference(LLDB_INVALID_ADDRESS), id(0), + offset(GetSigned(obj, "offset", 0)) { + GetString(obj, "instructionReference") + .getAsInteger(0, instructionAddressReference); + instructionAddressReference += offset; +} + +void InstructionBreakpoint::SetInstructionBreakpoint() { + bp = g_dap.target.BreakpointCreateByAddress(instructionAddressReference); + id = bp.GetID(); +} +} // namespace lldb_dap diff --git a/lldb/tools/lldb-dap/InstructionBreakpoint.h b/lldb/tools/lldb-dap/InstructionBreakpoint.h new file mode 100644 index 0000000000000..cf1516f46e955 --- /dev/null +++ b/lldb/tools/lldb-dap/InstructionBreakpoint.h @@ -0,0 +1,36 @@ +//===-- InstructionBreakpoint.h --------------------------------------*- C++ +//-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLDB_TOOLS_LLDB_DAP_INSTRUCTIONBREAKPOINT_H +#define LLDB_TOOLS_LLDB_DAP_INSTRUCTIONBREAKPOINT_H + +#include "Breakpoint.h" +#include "llvm/ADT/StringRef.h" + +namespace lldb_dap { + +// Instruction Breakpoint +struct InstructionBreakpoint : public Breakpoint { + + lldb::addr_t instructionAddressReference; + int32_t id; + int32_t offset; + + InstructionBreakpoint() + : Breakpoint(), instructionAddressReference(LLDB_INVALID_ADDRESS), id(0), + offset(0) {} + InstructionBreakpoint(const llvm::json::Object &obj); + + // Set instruction breakpoint in LLDB as a new breakpoint + void SetInstructionBreakpoint(); +}; + +} // namespace lldb_dap + +#endif diff --git a/lldb/tools/lldb-dap/JSONUtils.cpp b/lldb/tools/lldb-dap/JSONUtils.cpp index c080fd395b728..7338e7cf41eb0 100644 --- a/lldb/tools/lldb-dap/JSONUtils.cpp +++ b/lldb/tools/lldb-dap/JSONUtils.cpp @@ -769,6 +769,70 @@ llvm::json::Value CreateStackFrame(lldb::SBFrame &frame) { return llvm::json::Value(std::move(object)); } +// Response to `setInstructionBreakpoints` request. +// "Breakpoint": { +// "type": "object", +// "description": "Response to `setInstructionBreakpoints` request.", +// "properties": { +// "id": { +// "type": "number", +// "description": "The identifier for the breakpoint. It is needed if +// breakpoint events are used to update or remove breakpoints." +// }, +// "verified": { +// "type": "boolean", +// "description": "If true, the breakpoint could be set (but not +// necessarily at the desired location." +// }, +// "message": { +// "type": "string", +// "description": "A message about the state of the breakpoint. +// This is shown to the user and can be used to explain why a breakpoint +// could not be verified." +// }, +// "source": { +// "type": "Source", +// "description": "The source where the breakpoint is located." +// }, +// "line": { +// "type": "number", +// "description": "The start line of the actual range covered by the +// breakpoint." +// }, +// "column": { +// "type": "number", +// "description": "The start column of the actual range covered by the +// breakpoint." +// }, +// "endLine": { +// "type": "number", +// "description": "The end line of the actual range covered by the +// breakpoint." +// }, +// "endColumn": { +// "type": "number", +// "description": "The end column of the actual range covered by the +// breakpoint. If no end line is given, then the end column is assumed to +// be in the start line." +// }, +// "instructionReference": { +// "type": "string", +// "description": "A memory reference to where the breakpoint is set." +// }, +// "offset": { +// "type": "number", +// "description": "The offset from the instruction reference. +// This can be negative." +// }, +// }, +// "required": [ "id", "verified", "line"] +// } +llvm::json::Value CreateInstructionBreakpoint(BreakpointBase *ibp) { + llvm::json::Object object; + ibp->CreateJsonObject(object); + return llvm::json::Value(std::move(object)); +} + // "Thread": { // "type": "object", // "description": "A Thread", @@ -893,7 +957,13 @@ llvm::json::Value CreateThreadStopped(lldb::SBThread &thread, body.try_emplace("reason", "exception"); EmplaceSafeString(body, "description", exc_bp->label); } else { - body.try_emplace("reason", "breakpoint"); + InstructionBreakpoint *inst_bp = + g_dap.GetInstructionBPFromStopReason(thread); + if (inst_bp) { + body.try_emplace("reason", "instruction breakpoint"); + } else { + body.try_emplace("reason", "breakpoint"); + } lldb::break_id_t bp_id = thread.GetStopReasonDataAtIndex(0); lldb::break_id_t bp_loc_id = thread.GetStopReasonDataAtIndex(1); std::string desc_str = diff --git a/lldb/tools/lldb-dap/JSONUtils.h b/lldb/tools/lldb-dap/JSONUtils.h index 1515f5ba2e5f4..b6356630b7268 100644 --- a/lldb/tools/lldb-dap/JSONUtils.h +++ b/lldb/tools/lldb-dap/JSONUtils.h @@ -322,6 +322,17 @@ llvm::json::Value CreateSource(llvm::StringRef source_path); /// definition outlined by Microsoft. llvm::json::Value CreateStackFrame(lldb::SBFrame &frame); +/// Create a "instruction" object for a LLDB disassemble object as described in +/// the Visual Studio Code debug adaptor definition. +/// +/// \param[in] bp +/// The LLDB instruction object used to populate the disassembly +/// instruction. +/// \return +/// A "Scope" JSON object with that follows the formal JSON +/// definition outlined by Microsoft. +llvm::json::Value CreateInstructionBreakpoint(BreakpointBase *ibp); + /// Create a "Thread" object for a LLDB thread object. /// /// This function will fill in the following keys in the returned diff --git a/lldb/tools/lldb-dap/lldb-dap.cpp b/lldb/tools/lldb-dap/lldb-dap.cpp index 495ed0256120e..c5c4b09f15622 100644 --- a/lldb/tools/lldb-dap/lldb-dap.cpp +++ b/lldb/tools/lldb-dap/lldb-dap.cpp @@ -1723,6 +1723,8 @@ void request_initialize(const llvm::json::Object &request) { body.try_emplace("supportsLogPoints", true); // The debug adapter supports data watchpoints. body.try_emplace("supportsDataBreakpoints", true); + // The debug adapter support for instruction breakpoint. + body.try_emplace("supportsInstructionBreakpoints", true); // Put in non-DAP specification lldb specific information. llvm::json::Object lldb_json; @@ -4082,6 +4084,254 @@ void request__testGetTargetBreakpoints(const llvm::json::Object &request) { g_dap.SendJSON(llvm::json::Value(std::move(response))); } +// "SetInstructionBreakpointsRequest" : { +// "allOf" : [ +// {"$ref" : "#/definitions/Request"}, { +// "type" : "object", +// "description" : +// "Replaces all existing instruction breakpoints. Typically, " +// "instruction breakpoints would be set from a disassembly window. " +// "\nTo clear all instruction breakpoints, specify an empty " +// "array.\nWhen an instruction breakpoint is hit, a `stopped` event " +// "(with reason `instruction breakpoint`) is generated.\nClients " +// "should only call this request if the corresponding capability " +// "`supportsInstructionBreakpoints` is true.", +// "properties" : { +// "command" : {"type" : "string", "enum" : +// ["setInstructionBreakpoints"]}, "arguments" : +// {"$ref" : "#/definitions/SetInstructionBreakpointsArguments"} +// }, +// "required" : [ "command", "arguments" ] +// } +// ] +// }, +// "SetInstructionBreakpointsArguments" +// : { +// "type" : "object", +// "description" : "Arguments for `setInstructionBreakpoints` request", +// "properties" : { +// "breakpoints" : { +// "type" : "array", +// "items" : {"$ref" : "#/definitions/InstructionBreakpoint"}, +// "description" : "The instruction references of the breakpoints" +// } +// }, +// "required" : ["breakpoints"] +// }, +// "SetInstructionBreakpointsResponse" +// : { +// "allOf" : [ +// {"$ref" : "#/definitions/Response"}, { +// "type" : "object", +// "description" : "Response to `setInstructionBreakpoints` request", +// "properties" : { +// "body" : { +// "type" : "object", +// "properties" : { +// "breakpoints" : { +// "type" : "array", +// "items" : {"$ref" : "#/definitions/Breakpoint"}, +// "description" : +// "Information about the breakpoints. The array elements +// " "correspond to the elements of the `breakpoints` +// array." +// } +// }, +// "required" : ["breakpoints"] +// } +// }, +// "required" : ["body"] +// } +// ] +// }, +// "InstructionBreakpoint" : { +// "type" : "object", +// "description" : "Properties of a breakpoint passed to the " +// "`setInstructionBreakpoints` request", +// "properties" : { +// "instructionReference" : { +// "type" : "string", +// "description" : +// "The instruction reference of the breakpoint.\nThis should be a " +// "memory or instruction pointer reference from an +// `EvaluateResponse`, " +// "`Variable`, `StackFrame`, `GotoTarget`, or `Breakpoint`." +// }, +// "offset" : { +// "type" : "integer", +// "description" : "The offset from the instruction reference in " +// "bytes.\nThis can be negative." +// }, +// "condition" : { +// "type" : "string", +// "description" : "An expression for conditional breakpoints.\nIt is only +// " +// "honored by a debug adapter if the corresponding " +// "capability `supportsConditionalBreakpoints` is true." +// }, +// "hitCondition" : { +// "type" : "string", +// "description" : "An expression that controls how many hits of the " +// "breakpoint are ignored.\nThe debug adapter is expected +// " "to interpret the expression as needed.\nThe +// attribute " "is only honored by a debug adapter if the +// corresponding " "capability +// `supportsHitConditionalBreakpoints` is true." +// }, +// "mode" : { +// "type" : "string", +// "description" : "The mode of this breakpoint. If defined, this must be +// " +// "one of the `breakpointModes` the debug adapter " +// "advertised in its `Capabilities`." +// } +// }, +// "required" : ["instructionReference"] +// }, +// "Breakpoint" +// : { +// "type" : "object", +// "description" : +// "Information about a breakpoint created in `setBreakpoints`, " +// "`setFunctionBreakpoints`, `setInstructionBreakpoints`, or " +// "`setDataBreakpoints` requests.", +// "properties" : { +// "id" : { +// "type" : "integer", +// "description" : +// "The identifier for the breakpoint. It is needed if breakpoint +// " "events are used to update or remove breakpoints." +// }, +// "verified" : { +// "type" : "boolean", +// "description" : "If true, the breakpoint could be set (but not " +// "necessarily at the desired location)." +// }, +// "message" : { +// "type" : "string", +// "description" : "A message about the state of the breakpoint.\nThis +// " +// "is shown to the user and can be used to explain +// why " "a breakpoint could not be verified." +// }, +// "source" : { +// "$ref" : "#/definitions/Source", +// "description" : "The source where the breakpoint is located." +// }, +// "line" : { +// "type" : "integer", +// "description" : +// "The start line of the actual range covered by the breakpoint." +// }, +// "column" : { +// "type" : "integer", +// "description" : +// "Start position of the source range covered by the breakpoint. +// " "It is measured in UTF-16 code units and the client +// capability " +// "`columnsStartAt1` determines whether it is 0- or 1-based." +// }, +// "endLine" : { +// "type" : "integer", +// "description" : +// "The end line of the actual range covered by the breakpoint." +// }, +// "endColumn" : { +// "type" : "integer", +// "description" : +// "End position of the source range covered by the breakpoint. It +// " "is measured in UTF-16 code units and the client capability " +// "`columnsStartAt1` determines whether it is 0- or 1-based.\nIf +// " "no end line is given, then the end column is assumed to be +// in " "the start line." +// }, +// "instructionReference" : { +// "type" : "string", +// "description" : "A memory reference to where the breakpoint is +// set." +// }, +// "offset" : { +// "type" : "integer", +// "description" : "The offset from the instruction reference.\nThis " +// "can be negative." +// }, +// "reason" : { +// "type" : "string", +// "description" : +// "A machine-readable explanation of why a breakpoint may not be +// " "verified. If a breakpoint is verified or a specific reason +// is " "not known, the adapter should omit this property. +// Possible " "values include:\n\n- `pending`: Indicates a +// breakpoint might be " "verified in the future, but the adapter +// cannot verify it in the " "current state.\n - `failed`: +// Indicates a breakpoint was not " "able to be verified, and the +// adapter does not believe it can be " "verified without +// intervention.", +// "enum" : [ "pending", "failed" ] +// } +// }, +// "required" : ["verified"] +// }, + +void request_setInstructionBreakpoints(const llvm::json::Object &request) { + llvm::json::Object response; + llvm::json::Array response_breakpoints; + llvm::json::Object body; + FillResponse(request, response); + + auto arguments = request.getObject("arguments"); + auto breakpoints = arguments->getArray("breakpoints"); + + // It holds active instruction breakpoint list received from DAP. + InstructionBreakpointMap request_ibp; + if (breakpoints) { + for (const auto &bp : *breakpoints) { + auto bp_obj = bp.getAsObject(); + if (bp_obj) { + // Read instruction breakpoint request. + InstructionBreakpoint inst_bp(*bp_obj); + // Store them into map for reference. + request_ibp[inst_bp.instructionAddressReference] = std::move(inst_bp); + } + } + + // Iterate previous active instruction breakpoint list. + for (auto &prev_ibp : g_dap.instruction_breakpoints) { + // Find previous instruction breakpoint reference address in newly + // received instruction breakpoint list. + auto inst_reference = request_ibp.find(prev_ibp.first); + // Request for remove and delete the breakpoint, if the prev instruction + // breakpoint ID is not available in active instrcation breakpoint list. + // Means delete removed breakpoint instance. + if (inst_reference == request_ibp.end()) { + g_dap.target.BreakpointDelete(prev_ibp.second.id); + // Update Prev instruction breakpoint list. + g_dap.instruction_breakpoints.erase(prev_ibp.first); + } else { + // Instead of recreating breakpoint instance, update the breakpoint if + // there are any conditional changes. + prev_ibp.second.UpdateBreakpoint(inst_reference->second); + request_ibp.erase(inst_reference); + response_breakpoints.emplace_back( + CreateInstructionBreakpoint(&prev_ibp.second)); + } + } + + for (auto &req_bpi : request_ibp) { + // Add this breakpoint info to the response + g_dap.instruction_breakpoints[req_bpi.first] = std::move(req_bpi.second); + InstructionBreakpoint &new_bp = + g_dap.instruction_breakpoints[req_bpi.first]; + new_bp.SetInstructionBreakpoint(); + response_breakpoints.emplace_back(CreateInstructionBreakpoint(&new_bp)); + } + } + + body.try_emplace("breakpoints", std::move(response_breakpoints)); + response.try_emplace("body", std::move(body)); + g_dap.SendJSON(llvm::json::Value(std::move(response))); +} + void RegisterRequestCallbacks() { g_dap.RegisterRequestCallback("attach", request_attach); g_dap.RegisterRequestCallback("completions", request_completions); @@ -4114,6 +4364,9 @@ void RegisterRequestCallbacks() { g_dap.RegisterRequestCallback("threads", request_threads); g_dap.RegisterRequestCallback("variables", request_variables); g_dap.RegisterRequestCallback("disassemble", request_disassemble); + // Instruction breakpoint request + g_dap.RegisterRequestCallback("setInstructionBreakpoints", + request_setInstructionBreakpoints); // Custom requests g_dap.RegisterRequestCallback("compileUnits", request_compileUnits); g_dap.RegisterRequestCallback("modules", request_modules); diff --git a/lldb/tools/lldb-server/lldb-platform.cpp b/lldb/tools/lldb-server/lldb-platform.cpp index 82a3a0d6b4e51..75f51132aa9cc 100644 --- a/lldb/tools/lldb-server/lldb-platform.cpp +++ b/lldb/tools/lldb-server/lldb-platform.cpp @@ -47,108 +47,6 @@ using namespace llvm; // option descriptors for getopt_long_only() -#ifdef _WIN32 -typedef pipe_t shared_fd_t; -const shared_fd_t kInvalidSharedFD = LLDB_INVALID_PIPE; -#else -typedef NativeSocket shared_fd_t; -const shared_fd_t kInvalidSharedFD = Socket::kInvalidSocketValue; -#endif - -class SharedSocket { -public: - SharedSocket(Connection *conn, Status &error) { - m_fd = kInvalidSharedFD; - - const Socket *socket = - static_cast(conn->GetReadObject().get()); - if (socket == nullptr) { - error = Status("invalid conn socket"); - return; - } - -#ifdef _WIN32 - m_socket = socket->GetNativeSocket(); - - // Create a pipe to transfer WSAPROTOCOL_INFO to the child process. - error = m_socket_pipe.CreateNew(true); - if (error.Fail()) - return; - - m_fd = m_socket_pipe.GetReadPipe(); -#else - m_fd = socket->GetNativeSocket(); - error = Status(); -#endif - } - - shared_fd_t GetSendableFD() { return m_fd; } - - Status CompleteSending(lldb::pid_t child_pid) { -#ifdef _WIN32 - // Transfer WSAPROTOCOL_INFO to the child process. - m_socket_pipe.CloseReadFileDescriptor(); - - WSAPROTOCOL_INFO protocol_info; - if (::WSADuplicateSocket(m_socket, child_pid, &protocol_info) == - SOCKET_ERROR) { - int last_error = ::WSAGetLastError(); - return Status("WSADuplicateSocket() failed, error: %d", last_error); - } - - size_t num_bytes; - Status error = - m_socket_pipe.WriteWithTimeout(&protocol_info, sizeof(protocol_info), - std::chrono::seconds(10), num_bytes); - if (error.Fail()) - return error; - if (num_bytes != sizeof(protocol_info)) - return Status("WriteWithTimeout(WSAPROTOCOL_INFO) failed: %d bytes", - num_bytes); -#endif - return Status(); - } - - static Status GetNativeSocket(shared_fd_t fd, NativeSocket &socket) { -#ifdef _WIN32 - socket = Socket::kInvalidSocketValue; - // Read WSAPROTOCOL_INFO from the parent process and create NativeSocket. - WSAPROTOCOL_INFO protocol_info; - { - Pipe socket_pipe(fd, LLDB_INVALID_PIPE); - size_t num_bytes; - Status error = - socket_pipe.ReadWithTimeout(&protocol_info, sizeof(protocol_info), - std::chrono::seconds(10), num_bytes); - if (error.Fail()) - return error; - if (num_bytes != sizeof(protocol_info)) { - return Status( - "socket_pipe.ReadWithTimeout(WSAPROTOCOL_INFO) failed: % d bytes", - num_bytes); - } - } - socket = ::WSASocket(FROM_PROTOCOL_INFO, FROM_PROTOCOL_INFO, - FROM_PROTOCOL_INFO, &protocol_info, 0, 0); - if (socket == INVALID_SOCKET) { - return Status("WSASocket(FROM_PROTOCOL_INFO) failed: error %d", - ::WSAGetLastError()); - } - return Status(); -#else - socket = fd; - return Status(); -#endif - } - -private: -#ifdef _WIN32 - Pipe m_socket_pipe; - NativeSocket m_socket; -#endif - shared_fd_t m_fd; -}; - static int g_debug = 0; static int g_verbose = 0; static int g_server = 0; @@ -259,13 +157,13 @@ static void spawn_process_reaped(lldb::pid_t pid, int signal, int status) { gdbserver_portmap.FreePortForProcess(pid); } -static Status spawn_process(const char *progname, Connection *conn, +static Status spawn_process(const char *progname, const Socket *conn_socket, uint16_t gdb_port, uint16_t port_offset, const lldb_private::Args &args, const std::string &log_file, const StringRef log_channels) { Status error; - SharedSocket shared_socket(conn, error); + SharedSocket shared_socket(conn_socket, error); if (error.Fail()) return error; @@ -363,7 +261,7 @@ int main_platform(int argc, char *argv[]) { StringRef log_channels; // e.g. "lldb process threads:gdb-remote default:linux all" - shared_fd_t fd = kInvalidSharedFD; + shared_fd_t fd = SharedSocket::kInvalidFD; int min_gdbserver_port = 0; int max_gdbserver_port = 0; @@ -480,7 +378,7 @@ int main_platform(int argc, char *argv[]) { } // Print usage and exit if no listening port is specified. - if (listen_host_port.empty() && fd == kInvalidSharedFD) + if (listen_host_port.empty() && fd == SharedSocket::kInvalidFD) show_usage = true; if (show_usage || option_error) { @@ -494,7 +392,7 @@ int main_platform(int argc, char *argv[]) { lldb_private::Args inferior_arguments; inferior_arguments.SetArguments(argc, const_cast(argv)); - if (fd != kInvalidSharedFD) { + if (fd != SharedSocket::kInvalidFD) { // Child process will handle the connection and exit. Log *log = GetLog(LLDBLog::Platform); if (!listen_host_port.empty()) { @@ -510,13 +408,14 @@ int main_platform(int argc, char *argv[]) { return socket_error; } - Connection *conn = - new ConnectionFileDescriptor(new TCPSocket(socket, true, false)); GDBRemoteCommunicationServerPlatform platform(Socket::ProtocolTcp, "tcp"); if (port_offset > 0) platform.SetPortOffset(port_offset); platform.SetPortMap(std::move(gdbserver_portmap)); - platform.SetConnection(std::unique_ptr(conn)); + platform.SetConnection( + std::unique_ptr(new ConnectionFileDescriptor( + new TCPSocket(socket, /*should_close=*/true, + /*child_processes_inherit=*/false)))); client_handle(platform, inferior_arguments); return 0; } @@ -578,8 +477,11 @@ int main_platform(int argc, char *argv[]) { fprintf(stderr, "no available gdbserver port for connection - dropping...\n"); } else { - error = spawn_process(progname, conn, *available_port, port_offset, - inferior_arguments, log_file, log_channels); + const Socket *conn_socket = + static_cast(conn->GetReadObject().get()); + error = + spawn_process(progname, conn_socket, *available_port, port_offset, + inferior_arguments, log_file, log_channels); if (error.Fail()) { { diff --git a/lldb/unittests/Expression/ClangParserTest.cpp b/lldb/unittests/Expression/ClangParserTest.cpp index 6f682f6c97fdb..fab4487c73719 100644 --- a/lldb/unittests/Expression/ClangParserTest.cpp +++ b/lldb/unittests/Expression/ClangParserTest.cpp @@ -42,8 +42,8 @@ TEST_F(ClangHostTest, ComputeClangResourceDirectory) { #else std::string path_to_liblldb = "C:\\foo\\bar\\lib\\"; #endif - std::string path_to_clang_dir = clang::driver::Driver::GetResourcesPath( - path_to_liblldb + "liblldb", CLANG_RESOURCE_DIR); + std::string path_to_clang_dir = + clang::driver::Driver::GetResourcesPath(path_to_liblldb + "liblldb"); llvm::SmallString<256> path_to_clang_lib_dir_real; llvm::sys::fs::real_path(path_to_clang_dir, path_to_clang_lib_dir_real); diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 445980a18e7e9..4887ae6ee9966 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -2189,6 +2189,10 @@ example: ``nosanitize_coverage`` This attribute indicates that SanitizerCoverage instrumentation is disabled for this function. +``nosanitize_realtime`` + This attribute indicates that the Realtime Sanitizer instrumentation is + disabled for this function. + This attribute is incompatible with the ``sanitize_realtime`` attribute. ``null_pointer_is_valid`` If ``null_pointer_is_valid`` is set, then the ``null`` address in address-space 0 is considered to be a valid address for memory loads and @@ -2315,6 +2319,7 @@ example: This attribute indicates that RealtimeSanitizer checks (realtime safety analysis - no allocations, syscalls or exceptions) are enabled for this function. + This attribute is incompatible with the ``nosanitize_realtime`` attribute. ``speculative_load_hardening`` This attribute indicates that `Speculative Load Hardening `_ diff --git a/llvm/docs/Statepoints.rst b/llvm/docs/Statepoints.rst index ed137ed4fa8ed..27a46a43b5190 100644 --- a/llvm/docs/Statepoints.rst +++ b/llvm/docs/Statepoints.rst @@ -167,10 +167,11 @@ Let's consider a simple call in LLVM IR: .. code-block:: llvm - define i8 addrspace(1)* @test1(i8 addrspace(1)* %obj) + declare void @foo() + define ptr addrspace(1) @test1(ptr addrspace(1) %obj) gc "statepoint-example" { - call void ()* @foo() - ret i8 addrspace(1)* %obj + call void @foo() + ret ptr addrspace(1) %obj } Depending on our language we may need to allow a safepoint during the execution @@ -186,11 +187,11 @@ resulting relocation sequence is: .. code-block:: llvm - define i8 addrspace(1)* @test1(i8 addrspace(1)* %obj) + define ptr addrspace(1) @test(ptr addrspace(1) %obj) gc "statepoint-example" { - %0 = call token (i64, i32, void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0, i8 addrspace(1)* %obj) - %obj.relocated = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %0, i32 7, i32 7) - ret i8 addrspace(1)* %obj.relocated + %safepoint = call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, ptr elementtype(void ()) @foo, i32 0, i32 0, i32 0, i32 0) ["gc-live" (ptr addrspace(1) %obj)] + %obj.relocated = call ptr addrspace(1) @llvm.experimental.gc.relocate.p1(token %safepoint, i32 0, i32 0) + ret ptr addrspace(1) %obj.relocated } Ideally, this sequence would have been represented as a M argument, N @@ -269,10 +270,13 @@ collector: .. code-block:: llvm - define i8 addrspace(1)* @test1(i8 addrspace(1)* %obj) - gc "statepoint-example" { - call token (i64, i32, void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0, i8 addrspace(1)* %obj) - ret i8 addrspace(1)* %obj + define void @manual_frame(ptr %a, ptr %b) gc "statepoint-example" { + %alloca = alloca ptr + %allocb = alloca ptr + store ptr %a, ptr %alloca + store ptr %b, ptr %allocb + call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 0, i32 0, ptr elementtype(void ()) @func, i32 0, i32 0, i32 0, i32 0) ["gc-live" (ptr %alloca, ptr %allocb)] + ret void } Recording On Stack Regions @@ -332,25 +336,6 @@ lowering both the base and derived pointer operands are required to be live over the associated call safepoint even if the base is otherwise unused afterwards. -If we extend our previous example to include a pointless derived pointer, -we get: - -.. code-block:: llvm - - define i8 addrspace(1)* @test1(i8 addrspace(1)* %obj) - gc "statepoint-example" { - %gep = getelementptr i8, i8 addrspace(1)* %obj, i64 20000 - %token = call token (i64, i32, void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0, i8 addrspace(1)* %obj, i8 addrspace(1)* %gep) - %obj.relocated = call i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %token, i32 7, i32 7) - %gep.relocated = call i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %token, i32 7, i32 8) - %p = getelementptr i8, i8 addrspace(1)* %gep, i64 -20000 - ret i8 addrspace(1)* %p - } - -Note that in this example %p and %obj.relocate are the same address and we -could replace one with the other, potentially removing the derived pointer -from the live set at the safepoint entirely. - .. _gc_transition_args: GC Transitions @@ -564,21 +549,20 @@ As an example, given this code: .. code-block:: llvm - define i8 addrspace(1)* @test1(i8 addrspace(1)* %obj) + define ptr addrspace(1) @test1(ptr addrspace(1) %obj) gc "statepoint-example" { call void @foo() - ret i8 addrspace(1)* %obj + ret ptr addrspace(1) %obj } The pass would produce this IR: .. code-block:: llvm - define i8 addrspace(1)* @test1(i8 addrspace(1)* %obj) - gc "statepoint-example" { - %0 = call token (i64, i32, void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i8 addrspace(1)* %obj) - %obj.relocated = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %0, i32 12, i32 12) - ret i8 addrspace(1)* %obj.relocated + define ptr addrspace(1) @test_rs4gc(ptr addrspace(1) %obj) gc "statepoint-example" { + %statepoint_token = call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 2882400000, i32 0, ptr elementtype(void ()) @foo, i32 0, i32 0, i32 0, i32 0) [ "gc-live"(ptr addrspace(1) %obj) ] + %obj.relocated = call coldcc ptr addrspace(1) @llvm.experimental.gc.relocate.p1(token %statepoint_token, i32 0, i32 0) ; (%obj, %obj) + ret ptr addrspace(1) %obj.relocated } In the above examples, the addrspace(1) marker on the pointers is the mechanism diff --git a/llvm/docs/WritingAnLLVMPass.rst b/llvm/docs/WritingAnLLVMPass.rst index d2afa3913ee07..31194e8b0389c 100644 --- a/llvm/docs/WritingAnLLVMPass.rst +++ b/llvm/docs/WritingAnLLVMPass.rst @@ -489,7 +489,7 @@ spanning the range from ``DominatorSet`` to ``BreakCriticalEdges``. Requiring edges in the CFG when your pass has been run. Some analyses chain to other analyses to do their job. For example, an -`AliasAnalysis ` implementation is required to :ref:`chain +`AliasAnalysis `_ implementation is required to :ref:`chain ` to other alias analysis passes. In cases where analyses chain, the ``addRequiredTransitive`` method should be used instead of the ``addRequired`` method. This informs the ``PassManager`` that the diff --git a/llvm/include/llvm/ADT/STLFunctionalExtras.h b/llvm/include/llvm/ADT/STLFunctionalExtras.h index dd7fc6dc74864..6f172504b3c16 100644 --- a/llvm/include/llvm/ADT/STLFunctionalExtras.h +++ b/llvm/include/llvm/ADT/STLFunctionalExtras.h @@ -69,6 +69,10 @@ class function_ref { } explicit operator bool() const { return callback; } + + bool operator==(const function_ref &Other) const { + return callable == Other.callable; + } }; } // end namespace llvm diff --git a/llvm/include/llvm/Analysis/DXILResource.h b/llvm/include/llvm/Analysis/DXILResource.h index d9b4e968fe3e9..6b577c02f0545 100644 --- a/llvm/include/llvm/Analysis/DXILResource.h +++ b/llvm/include/llvm/Analysis/DXILResource.h @@ -164,6 +164,10 @@ class ResourceInfo { UAVFlags.HasCounter = HasCounter; UAVFlags.IsROV = IsROV; } + const UAVInfo &getUAV() const { + assert(isUAV() && "Not a UAV"); + return UAVFlags; + } void setCBuffer(uint32_t Size) { assert(isCBuffer() && "Not a CBuffer"); CBufferSize = Size; @@ -179,6 +183,10 @@ class ResourceInfo { Typed.ElementTy = ElementTy; Typed.ElementCount = ElementCount; } + const TypedInfo &getTyped() const { + assert(isTyped() && "Not typed"); + return Typed; + } void setFeedback(dxil::SamplerFeedbackType Type) { assert(isFeedback() && "Not Feedback"); Feedback.Type = Type; @@ -187,8 +195,14 @@ class ResourceInfo { assert(isMultiSample() && "Not MultiSampled"); MultiSample.Count = Count; } + const MSInfo &getMultiSample() const { + assert(isMultiSample() && "Not MultiSampled"); + return MultiSample; + } + StringRef getName() const { return Name; } dxil::ResourceClass getResourceClass() const { return RC; } + dxil::ResourceKind getResourceKind() const { return Kind; } bool operator==(const ResourceInfo &RHS) const; bool operator!=(const ResourceInfo &RHS) const { return !(*this == RHS); } @@ -280,6 +294,46 @@ class DXILResourceMap { : (Resources.begin() + Pos->second); } + iterator srv_begin() { return begin(); } + const_iterator srv_begin() const { return begin(); } + iterator srv_end() { return begin() + FirstUAV; } + const_iterator srv_end() const { return begin() + FirstUAV; } + iterator_range srvs() { return make_range(srv_begin(), srv_end()); } + iterator_range srvs() const { + return make_range(srv_begin(), srv_end()); + } + + iterator uav_begin() { return begin() + FirstUAV; } + const_iterator uav_begin() const { return begin() + FirstUAV; } + iterator uav_end() { return begin() + FirstCBuffer; } + const_iterator uav_end() const { return begin() + FirstCBuffer; } + iterator_range uavs() { return make_range(uav_begin(), uav_end()); } + iterator_range uavs() const { + return make_range(uav_begin(), uav_end()); + } + + iterator cbuffer_begin() { return begin() + FirstCBuffer; } + const_iterator cbuffer_begin() const { return begin() + FirstCBuffer; } + iterator cbuffer_end() { return begin() + FirstSampler; } + const_iterator cbuffer_end() const { return begin() + FirstSampler; } + iterator_range cbuffers() { + return make_range(cbuffer_begin(), cbuffer_end()); + } + iterator_range cbuffers() const { + return make_range(cbuffer_begin(), cbuffer_end()); + } + + iterator sampler_begin() { return begin() + FirstSampler; } + const_iterator sampler_begin() const { return begin() + FirstSampler; } + iterator sampler_end() { return end(); } + const_iterator sampler_end() const { return end(); } + iterator_range samplers() { + return make_range(sampler_begin(), sampler_end()); + } + iterator_range samplers() const { + return make_range(sampler_begin(), sampler_end()); + } + void print(raw_ostream &OS) const; }; diff --git a/llvm/include/llvm/Analysis/FunctionPropertiesAnalysis.h b/llvm/include/llvm/Analysis/FunctionPropertiesAnalysis.h index af72f6e0f90b1..ee447d3e4ebb6 100644 --- a/llvm/include/llvm/Analysis/FunctionPropertiesAnalysis.h +++ b/llvm/include/llvm/Analysis/FunctionPropertiesAnalysis.h @@ -15,7 +15,6 @@ #define LLVM_ANALYSIS_FUNCTIONPROPERTIESANALYSIS_H #include "llvm/ADT/DenseSet.h" -#include "llvm/IR/Dominators.h" #include "llvm/IR/PassManager.h" namespace llvm { @@ -187,12 +186,7 @@ class FunctionPropertiesUpdater { static bool isUpdateValid(Function &F, const FunctionPropertiesInfo &FPI, FunctionAnalysisManager &FAM); - DominatorTree &getUpdatedDominatorTree(FunctionAnalysisManager &FAM) const; - DenseSet Successors; - - // Edges we might potentially need to remove from the dominator tree. - SmallVector DomTreeUpdates; }; } // namespace llvm #endif // LLVM_ANALYSIS_FUNCTIONPROPERTIESANALYSIS_H diff --git a/llvm/include/llvm/Analysis/LoopUnrollAnalyzer.h b/llvm/include/llvm/Analysis/LoopUnrollAnalyzer.h index eada6a647763b..12b906ec9dd58 100644 --- a/llvm/include/llvm/Analysis/LoopUnrollAnalyzer.h +++ b/llvm/include/llvm/Analysis/LoopUnrollAnalyzer.h @@ -44,7 +44,7 @@ class UnrolledInstAnalyzer : private InstVisitor { friend class InstVisitor; struct SimplifiedAddress { Value *Base = nullptr; - ConstantInt *Offset = nullptr; + APInt Offset; }; public: diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h index 5154e2f6659c1..fe46a504bce5d 100644 --- a/llvm/include/llvm/Analysis/ScalarEvolution.h +++ b/llvm/include/llvm/Analysis/ScalarEvolution.h @@ -887,8 +887,8 @@ class ScalarEvolution { /// SCEV predicates to Predicates that are required to be true in order for /// the answer to be correct. Predicates can be checked with run-time /// checks and can be used to perform loop versioning. - const SCEV *getPredicatedBackedgeTakenCount(const Loop *L, - SmallVector &Predicates); + const SCEV *getPredicatedBackedgeTakenCount( + const Loop *L, SmallVectorImpl &Predicates); /// When successful, this returns a SCEVConstant that is greater than or equal /// to (i.e. a "conservative over-approximation") of the value returend by @@ -911,7 +911,7 @@ class ScalarEvolution { /// the answer to be correct. Predicates can be checked with run-time /// checks and can be used to perform loop versioning. const SCEV *getPredicatedSymbolicMaxBackedgeTakenCount( - const Loop *L, SmallVector &Predicates); + const Loop *L, SmallVectorImpl &Predicates); /// Return true if the backedge taken count is either the value returned by /// getConstantMaxBackedgeTakenCount or zero. @@ -1556,8 +1556,9 @@ class ScalarEvolution { /// If we allowed SCEV predicates to be generated when populating this /// vector, this information can contain them and therefore a /// SCEVPredicate argument should be added to getExact. - const SCEV *getExact(const Loop *L, ScalarEvolution *SE, - SmallVector *Predicates = nullptr) const; + const SCEV *getExact( + const Loop *L, ScalarEvolution *SE, + SmallVectorImpl *Predicates = nullptr) const; /// Return the number of times this loop exit may fall through to the back /// edge, or SCEVCouldNotCompute. The loop is guaranteed not to exit via @@ -1574,9 +1575,9 @@ class ScalarEvolution { ScalarEvolution *SE) const; /// Get the symbolic max backedge taken count for the loop. - const SCEV * - getSymbolicMax(const Loop *L, ScalarEvolution *SE, - SmallVector *Predicates = nullptr); + const SCEV *getSymbolicMax( + const Loop *L, ScalarEvolution *SE, + SmallVectorImpl *Predicates = nullptr); /// Get the symbolic max backedge taken count for the particular loop exit. const SCEV *getSymbolicMax(const BasicBlock *ExitingBlock, diff --git a/llvm/include/llvm/Bitcode/LLVMBitCodes.h b/llvm/include/llvm/Bitcode/LLVMBitCodes.h index 4beac37a58344..8a2e6583af87c 100644 --- a/llvm/include/llvm/Bitcode/LLVMBitCodes.h +++ b/llvm/include/llvm/Bitcode/LLVMBitCodes.h @@ -759,6 +759,7 @@ enum AttributeKindCodes { ATTR_KIND_INITIALIZES = 94, ATTR_KIND_HYBRID_PATCHABLE = 95, ATTR_KIND_SANITIZE_REALTIME = 96, + ATTR_KIND_NO_SANITIZE_REALTIME = 97, }; enum ComdatSelectionKindCodes { diff --git a/llvm/include/llvm/CodeGen/CallingConvLower.h b/llvm/include/llvm/CodeGen/CallingConvLower.h index 932a2a94ab1f1..d5a63c8dd627a 100644 --- a/llvm/include/llvm/CodeGen/CallingConvLower.h +++ b/llvm/include/llvm/CodeGen/CallingConvLower.h @@ -81,16 +81,16 @@ class CCValAssign { } public: - static CCValAssign getReg(unsigned ValNo, MVT ValVT, unsigned RegNo, + static CCValAssign getReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP, bool IsCustom = false) { CCValAssign Ret(HTP, ValNo, ValVT, LocVT, IsCustom); - Ret.Data = Register(RegNo); + Ret.Data = Register(Reg); return Ret; } - static CCValAssign getCustomReg(unsigned ValNo, MVT ValVT, unsigned RegNo, + static CCValAssign getCustomReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP) { - return getReg(ValNo, ValVT, RegNo, LocVT, HTP, /*IsCustom=*/true); + return getReg(ValNo, ValVT, Reg, LocVT, HTP, /*IsCustom=*/true); } static CCValAssign getMem(unsigned ValNo, MVT ValVT, int64_t Offset, @@ -112,7 +112,7 @@ class CCValAssign { return Ret; } - void convertToReg(unsigned RegNo) { Data = Register(RegNo); } + void convertToReg(MCRegister Reg) { Data = Register(Reg); } void convertToMem(int64_t Offset) { Data = Offset; } @@ -346,7 +346,7 @@ class CCState { /// AllocateReg - Attempt to allocate one of the specified registers. If none /// are available, return zero. Otherwise, return the first one available, /// marking it and any aliases as allocated. - MCPhysReg AllocateReg(ArrayRef Regs) { + MCRegister AllocateReg(ArrayRef Regs) { unsigned FirstUnalloc = getFirstUnallocated(Regs); if (FirstUnalloc == Regs.size()) return MCRegister(); // Didn't find the reg. diff --git a/llvm/include/llvm/CodeGen/LiveVariables.h b/llvm/include/llvm/CodeGen/LiveVariables.h index b73850bb757ec..89d1b5edf3fa6 100644 --- a/llvm/include/llvm/CodeGen/LiveVariables.h +++ b/llvm/include/llvm/CodeGen/LiveVariables.h @@ -253,8 +253,8 @@ class LiveVariables { return false; bool Removed = false; - for (MachineOperand &MO : MI.operands()) { - if (MO.isReg() && MO.isDef() && MO.getReg() == Reg) { + for (MachineOperand &MO : MI.all_defs()) { + if (MO.getReg() == Reg) { MO.setIsDead(false); Removed = true; break; diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h index 882cadea22369..b8a3c2ac2ac83 100644 --- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h +++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h @@ -1018,10 +1018,16 @@ class TargetInstrInfo : public MCInstrInfo { /// The source and destination registers may overlap, which may require a /// careful implementation when multiple copy instructions are required for /// large registers. See for example the ARM target. + /// + /// If RenamableDest is true, the copy instruction's destination operand is + /// marked renamable. + /// If RenamableSrc is true, the copy instruction's source operand is + /// marked renamable. virtual void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, - MCRegister DestReg, MCRegister SrcReg, - bool KillSrc) const { + MCRegister DestReg, MCRegister SrcReg, bool KillSrc, + bool RenamableDest = false, + bool RenamableSrc = false) const { llvm_unreachable("Target didn't implement TargetInstrInfo::copyPhysReg!"); } diff --git a/llvm/include/llvm/IR/Attributes.td b/llvm/include/llvm/IR/Attributes.td index 891e34fec0c79..80936c0ee8335 100644 --- a/llvm/include/llvm/IR/Attributes.td +++ b/llvm/include/llvm/IR/Attributes.td @@ -212,6 +212,9 @@ def NoSanitizeBounds : EnumAttr<"nosanitize_bounds", [FnAttr]>; /// No SanitizeCoverage instrumentation. def NoSanitizeCoverage : EnumAttr<"nosanitize_coverage", [FnAttr]>; +/// No SanitizeRealtime instrumentation. +def NoSanitizeRealtime : EnumAttr<"nosanitize_realtime", [FnAttr]>; + /// Null pointer in address space zero is valid. def NullPointerIsValid : EnumAttr<"null_pointer_is_valid", [FnAttr]>; diff --git a/llvm/include/llvm/IR/Statepoint.h b/llvm/include/llvm/IR/Statepoint.h index 21c4a3eaa5ac2..8c508492f704a 100644 --- a/llvm/include/llvm/IR/Statepoint.h +++ b/llvm/include/llvm/IR/Statepoint.h @@ -176,23 +176,23 @@ class GCStatepointInst : public CallBase { } /// Returns an iterator to the begining of the argument range describing gc - /// values for the statepoint. - const_op_iterator gc_args_begin() const { + /// live values for the statepoint. + const_op_iterator gc_live_begin() const { if (auto Opt = getOperandBundle(LLVMContext::OB_gc_live)) return Opt->Inputs.begin(); return arg_end(); } - /// Return an end iterator for the gc argument range - const_op_iterator gc_args_end() const { + /// Return an end iterator for the gc live range + const_op_iterator gc_live_end() const { if (auto Opt = getOperandBundle(LLVMContext::OB_gc_live)) return Opt->Inputs.end(); return arg_end(); } - /// range adapter for gc arguments - iterator_range gc_args() const { - return make_range(gc_args_begin(), gc_args_end()); + /// range adapter for gc live arguments + iterator_range gc_live() const { + return make_range(gc_live_begin(), gc_live_end()); } diff --git a/llvm/include/llvm/MC/MCInst.h b/llvm/include/llvm/MC/MCInst.h index 578b7328970b7..b3d615b4392f5 100644 --- a/llvm/include/llvm/MC/MCInst.h +++ b/llvm/include/llvm/MC/MCInst.h @@ -18,6 +18,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/bit.h" +#include "llvm/MC/MCRegister.h" #include "llvm/Support/SMLoc.h" #include #include @@ -66,15 +67,15 @@ class MCOperand { bool isInst() const { return Kind == kInst; } /// Returns the register number. - unsigned getReg() const { + MCRegister getReg() const { assert(isReg() && "This is not a register operand!"); return RegVal; } /// Set the register number. - void setReg(unsigned Reg) { + void setReg(MCRegister Reg) { assert(isReg() && "This is not a register operand!"); - RegVal = Reg; + RegVal = Reg.id(); } int64_t getImm() const { @@ -131,10 +132,10 @@ class MCOperand { InstVal = Val; } - static MCOperand createReg(unsigned Reg) { + static MCOperand createReg(MCRegister Reg) { MCOperand Op; Op.Kind = kRegister; - Op.RegVal = Reg; + Op.RegVal = Reg.id(); return Op; } diff --git a/llvm/include/llvm/MC/MCInstBuilder.h b/llvm/include/llvm/MC/MCInstBuilder.h index d06ed4c6c840a..de45ffb4b2dc7 100644 --- a/llvm/include/llvm/MC/MCInstBuilder.h +++ b/llvm/include/llvm/MC/MCInstBuilder.h @@ -34,7 +34,7 @@ class MCInstBuilder { } /// Add a new register operand. - MCInstBuilder &addReg(unsigned Reg) { + MCInstBuilder &addReg(MCRegister Reg) { Inst.addOperand(MCOperand::createReg(Reg)); return *this; } diff --git a/llvm/include/llvm/MC/MCPseudoProbe.h b/llvm/include/llvm/MC/MCPseudoProbe.h index 13ad1c38f3b3b..32905c1e9a424 100644 --- a/llvm/include/llvm/MC/MCPseudoProbe.h +++ b/llvm/include/llvm/MC/MCPseudoProbe.h @@ -54,20 +54,21 @@ #ifndef LLVM_MC_MCPSEUDOPROBE_H #define LLVM_MC_MCPSEUDOPROBE_H +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/iterator.h" #include "llvm/IR/PseudoProbe.h" +#include "llvm/Support/Allocator.h" #include "llvm/Support/ErrorOr.h" -#include -#include +#include #include #include #include #include #include -#include #include namespace llvm { @@ -86,7 +87,7 @@ enum class MCPseudoProbeFlag { struct MCPseudoProbeFuncDesc { uint64_t FuncGUID = 0; uint64_t FuncHash = 0; - std::string FuncName; + StringRef FuncName; MCPseudoProbeFuncDesc(uint64_t GUID, uint64_t Hash, StringRef Name) : FuncGUID(GUID), FuncHash(Hash), FuncName(Name){}; @@ -100,17 +101,24 @@ class MCDecodedPseudoProbe; using InlineSite = std::tuple; using MCPseudoProbeInlineStack = SmallVector; // GUID to PseudoProbeFuncDesc map -using GUIDProbeFunctionMap = - std::unordered_map; -// Address to pseudo probes map. -using AddressProbesMap = std::map>; +class GUIDProbeFunctionMap : public std::vector { +public: + auto find(uint64_t GUID) const { + auto CompareDesc = [](const MCPseudoProbeFuncDesc &Desc, uint64_t GUID) { + return Desc.FuncGUID < GUID; + }; + auto It = llvm::lower_bound(*this, GUID, CompareDesc); + if (It->FuncGUID != GUID) + return end(); + return It; + } +}; class MCDecodedPseudoProbeInlineTree; class MCPseudoProbeBase { protected: - uint64_t Guid; - uint64_t Index; + uint32_t Index; uint32_t Discriminator; uint8_t Attributes; uint8_t Type; @@ -120,14 +128,12 @@ class MCPseudoProbeBase { const static uint32_t PseudoProbeFirstId = 1; public: - MCPseudoProbeBase(uint64_t G, uint64_t I, uint64_t At, uint8_t T, uint32_t D) - : Guid(G), Index(I), Discriminator(D), Attributes(At), Type(T) {} + MCPseudoProbeBase(uint64_t I, uint64_t At, uint8_t T, uint32_t D) + : Index(I), Discriminator(D), Attributes(At), Type(T) {} bool isEntry() const { return Index == PseudoProbeFirstId; } - uint64_t getGuid() const { return Guid; } - - uint64_t getIndex() const { return Index; } + uint32_t getIndex() const { return Index; } uint32_t getDiscriminator() const { return Discriminator; } @@ -157,18 +163,20 @@ class MCPseudoProbeBase { /// uses an address from a temporary label created at the current address in the /// current section. class MCPseudoProbe : public MCPseudoProbeBase { + uint64_t Guid; MCSymbol *Label; public: MCPseudoProbe(MCSymbol *Label, uint64_t Guid, uint64_t Index, uint64_t Type, uint64_t Attributes, uint32_t Discriminator) - : MCPseudoProbeBase(Guid, Index, Attributes, Type, Discriminator), + : MCPseudoProbeBase(Index, Attributes, Type, Discriminator), Guid(Guid), Label(Label) { assert(Type <= 0xFF && "Probe type too big to encode, exceeding 2^8"); assert(Attributes <= 0xFF && "Probe attributes too big to encode, exceeding 2^16"); } + uint64_t getGuid() const { return Guid; }; MCSymbol *getLabel() const { return Label; } void emit(MCObjectStreamer *MCOS, const MCPseudoProbe *LastProbe) const; }; @@ -181,11 +189,11 @@ class MCDecodedPseudoProbe : public MCPseudoProbeBase { MCDecodedPseudoProbeInlineTree *InlineTree; public: - MCDecodedPseudoProbe(uint64_t Ad, uint64_t G, uint32_t I, PseudoProbeType K, - uint8_t At, uint32_t D, - MCDecodedPseudoProbeInlineTree *Tree) - : MCPseudoProbeBase(G, I, At, static_cast(K), D), Address(Ad), + MCDecodedPseudoProbe(uint64_t Ad, uint32_t I, PseudoProbeType K, uint8_t At, + uint32_t D, MCDecodedPseudoProbeInlineTree *Tree) + : MCPseudoProbeBase(I, At, static_cast(K), D), Address(Ad), InlineTree(Tree){}; + uint64_t getGuid() const; uint64_t getAddress() const { return Address; } @@ -211,21 +219,39 @@ class MCDecodedPseudoProbe : public MCPseudoProbeBase { bool ShowName) const; }; -template -class MCPseudoProbeInlineTreeBase { - struct InlineSiteHash { - uint64_t operator()(const InlineSite &Site) const { - return std::get<0>(Site) ^ std::get<1>(Site); - } - }; +// Address to pseudo probes map. +class AddressProbesMap + : public std::vector> { + auto getIt(uint64_t Addr) const { + auto CompareProbe = [](const MCDecodedPseudoProbe &Probe, uint64_t Addr) { + return Probe.getAddress() < Addr; + }; + return llvm::lower_bound(*this, Addr, CompareProbe); + } +public: + // Returns range of probes within [\p From, \p To) address range. + auto find(uint64_t From, uint64_t To) const { + return llvm::make_range(getIt(From), getIt(To)); + } + // Returns range of probes with given \p Address. + auto find(uint64_t Address) const { + auto FromIt = getIt(Address); + if (FromIt == end() || FromIt->get().getAddress() != Address) + return llvm::make_range(end(), end()); + auto ToIt = getIt(Address + 1); + return llvm::make_range(FromIt, ToIt); + } +}; + +template +class MCPseudoProbeInlineTreeBase { protected: // Track children (e.g. inlinees) of current context - using InlinedProbeTreeMap = std::unordered_map< - InlineSite, std::unique_ptr, InlineSiteHash>; InlinedProbeTreeMap Children; // Set of probes that come with the function. - std::vector Probes; + ProbesType Probes; MCPseudoProbeInlineTreeBase() { static_assert(std::is_base_of::value, @@ -240,12 +266,10 @@ class MCPseudoProbeInlineTreeBase { bool isRoot() const { return Guid == 0; } InlinedProbeTreeMap &getChildren() { return Children; } const InlinedProbeTreeMap &getChildren() const { return Children; } - std::vector &getProbes() { return Probes; } - const std::vector &getProbes() const { return Probes; } - void addProbes(ProbeType Probe) { Probes.push_back(Probe); } + const ProbesType &getProbes() const { return Probes; } // Caller node of the inline site - MCPseudoProbeInlineTreeBase *Parent = - nullptr; + MCPseudoProbeInlineTreeBase *Parent = nullptr; DerivedProbeInlineTreeType *getOrAddNode(const InlineSite &Site) { auto Ret = Children.emplace( Site, std::make_unique(Site)); @@ -259,9 +283,17 @@ class MCPseudoProbeInlineTreeBase { // instance is created as the root of a tree. // A real instance of this class is created for each function, either a // not inlined function that has code in .text section or an inlined function. +struct InlineSiteHash { + uint64_t operator()(const InlineSite &Site) const { + return std::get<0>(Site) ^ std::get<1>(Site); + } +}; class MCPseudoProbeInlineTree - : public MCPseudoProbeInlineTreeBase { + : public MCPseudoProbeInlineTreeBase< + std::vector, MCPseudoProbeInlineTree, + std::unordered_map, + InlineSiteHash>> { public: MCPseudoProbeInlineTree() = default; MCPseudoProbeInlineTree(uint64_t Guid) { this->Guid = Guid; } @@ -277,16 +309,31 @@ class MCPseudoProbeInlineTree // inline tree node for the decoded pseudo probe class MCDecodedPseudoProbeInlineTree - : public MCPseudoProbeInlineTreeBase { -public: - InlineSite ISite; + : public MCPseudoProbeInlineTreeBase< + MCDecodedPseudoProbe *, MCDecodedPseudoProbeInlineTree, + MutableArrayRef> { + uint32_t NumProbes = 0; + uint32_t ProbeId = 0; +public: MCDecodedPseudoProbeInlineTree() = default; - MCDecodedPseudoProbeInlineTree(const InlineSite &Site) : ISite(Site){}; + MCDecodedPseudoProbeInlineTree(const InlineSite &Site, + MCDecodedPseudoProbeInlineTree *Parent) + : ProbeId(std::get<1>(Site)) { + this->Guid = std::get<0>(Site); + this->Parent = Parent; + } // Return false if it's a dummy inline site bool hasInlineSite() const { return !isRoot() && !Parent->isRoot(); } + InlineSite getInlineSite() const { return InlineSite(Guid, ProbeId); } + void setProbes(MutableArrayRef ProbesRef) { + Probes = ProbesRef.data(); + NumProbes = ProbesRef.size(); + } + auto getProbes() const { + return MutableArrayRef(Probes, NumProbes); + } }; /// Instances of this class represent the pseudo probes inserted into a compile @@ -336,9 +383,25 @@ class MCPseudoProbeTable { }; class MCPseudoProbeDecoder { + // Decoded pseudo probes vector. + std::vector PseudoProbeVec; + // Injected pseudo probes, identified by the containing inline tree node. + // Need to keep injected probes separately for two reasons: + // 1) Probes cannot be added to the PseudoProbeVec: appending may cause + // reallocation so that pointers to its elements will become invalid. + // 2) Probes belonging to function record must be contiguous in PseudoProbeVec + // as owning InlineTree references them with an ArrayRef to save space. + std::unordered_map> + InjectedProbeMap; + // Decoded inline records vector. + std::vector InlineTreeVec; + // GUID to PseudoProbeFuncDesc map. GUIDProbeFunctionMap GUID2FuncDescMap; + BumpPtrAllocator FuncNameAllocator; + // Address to probes map. AddressProbesMap Address2ProbesMap; @@ -370,16 +433,18 @@ class MCPseudoProbeDecoder { // Decode pseudo_probe_desc section to build GUID to PseudoProbeFuncDesc map. bool buildGUID2FuncDescMap(const uint8_t *Start, std::size_t Size); + // Decode pseudo_probe section to count the number of probes and inlined + // function records for each function record. + template + bool countRecords(bool &Discard, uint32_t &ProbeCount, uint32_t &InlinedCount, + const Uint64Set &GuidFilter); + // Decode pseudo_probe section to build address to probes map for specifed // functions only. bool buildAddress2ProbeMap(const uint8_t *Start, std::size_t Size, const Uint64Set &GuildFilter, const Uint64Map &FuncStartAddrs); - bool buildAddress2ProbeMap(MCDecodedPseudoProbeInlineTree *Cur, - uint64_t &LastAddr, const Uint64Set &GuildFilter, - const Uint64Map &FuncStartAddrs); - // Print pseudo_probe_desc section info void printGUID2FuncDescMap(raw_ostream &OS); @@ -422,6 +487,34 @@ class MCPseudoProbeDecoder { const MCDecodedPseudoProbeInlineTree &getDummyInlineRoot() const { return DummyInlineRoot; } + + void addInjectedProbe(const MCDecodedPseudoProbe &Probe, uint64_t Address) { + const MCDecodedPseudoProbeInlineTree *Parent = Probe.getInlineTreeNode(); + InjectedProbeMap[Parent].emplace_back(Probe).setAddress(Address); + } + + size_t + getNumInjectedProbes(const MCDecodedPseudoProbeInlineTree *Parent) const { + auto It = InjectedProbeMap.find(Parent); + if (It == InjectedProbeMap.end()) + return 0; + return It->second.size(); + } + + auto getInjectedProbes(MCDecodedPseudoProbeInlineTree *Parent) { + auto It = InjectedProbeMap.find(Parent); + assert(It != InjectedProbeMap.end()); + return iterator_range(It->second); + } + +private: + // Recursively parse an inlining tree encoded in pseudo_probe section. Returns + // whether the the top-level node should be skipped. + template + bool buildAddress2ProbeMap(MCDecodedPseudoProbeInlineTree *Cur, + uint64_t &LastAddr, const Uint64Set &GuildFilter, + const Uint64Map &FuncStartAddrs, + const uint32_t CurChildIndex); }; } // end namespace llvm diff --git a/llvm/include/llvm/SandboxIR/SandboxIR.h b/llvm/include/llvm/SandboxIR/SandboxIR.h index b8a28669cdd07..e4c48e39a4001 100644 --- a/llvm/include/llvm/SandboxIR/SandboxIR.h +++ b/llvm/include/llvm/SandboxIR/SandboxIR.h @@ -111,6 +111,8 @@ class ConstantInt; class Context; class Function; class Instruction; +class FreezeInst; +class FenceInst; class SelectInst; class ExtractElementInst; class InsertElementInst; @@ -127,6 +129,7 @@ class CallBase; class CallInst; class InvokeInst; class CallBrInst; +class LandingPadInst; class FuncletPadInst; class CatchPadInst; class CleanupPadInst; @@ -134,6 +137,7 @@ class CatchReturnInst; class CleanupReturnInst; class GetElementPtrInst; class CastInst; +class PossiblyNonNegInst; class PtrToIntInst; class BitCastInst; class AllocaInst; @@ -141,6 +145,7 @@ class CatchSwitchInst; class SwitchInst; class UnaryOperator; class BinaryOperator; +class PossiblyDisjointInst; class AtomicRMWInst; class AtomicCmpXchgInst; @@ -249,6 +254,8 @@ class Value { friend class Context; // For getting `Val`. friend class User; // For getting `Val`. friend class Use; // For getting `Val`. + friend class FreezeInst; // For getting `Val`. + friend class FenceInst; // For getting `Val`. friend class SelectInst; // For getting `Val`. friend class ExtractElementInst; // For getting `Val`. friend class InsertElementInst; // For getting `Val`. @@ -261,6 +268,7 @@ class Value { friend class CallInst; // For getting `Val`. friend class InvokeInst; // For getting `Val`. friend class CallBrInst; // For getting `Val`. + friend class LandingPadInst; // For getting `Val`. friend class FuncletPadInst; // For getting `Val`. friend class CatchPadInst; // For getting `Val`. friend class CleanupPadInst; // For getting `Val`. @@ -678,6 +686,8 @@ class Instruction : public sandboxir::User { /// A SandboxIR Instruction may map to multiple LLVM IR Instruction. This /// returns its topmost LLVM IR instruction. llvm::Instruction *getTopmostLLVMInstruction() const; + friend class FreezeInst; // For getTopmostLLVMInstruction(). + friend class FenceInst; // For getTopmostLLVMInstruction(). friend class SelectInst; // For getTopmostLLVMInstruction(). friend class ExtractElementInst; // For getTopmostLLVMInstruction(). friend class InsertElementInst; // For getTopmostLLVMInstruction(). @@ -689,6 +699,7 @@ class Instruction : public sandboxir::User { friend class CallInst; // For getTopmostLLVMInstruction(). friend class InvokeInst; // For getTopmostLLVMInstruction(). friend class CallBrInst; // For getTopmostLLVMInstruction(). + friend class LandingPadInst; // For getTopmostLLVMInstruction(). friend class CatchPadInst; // For getTopmostLLVMInstruction(). friend class CleanupPadInst; // For getTopmostLLVMInstruction(). friend class CatchReturnInst; // For getTopmostLLVMInstruction(). @@ -882,6 +893,33 @@ template class SingleLLVMInstructionImpl : public Instruction { #endif }; +class FenceInst : public SingleLLVMInstructionImpl { + FenceInst(llvm::FenceInst *FI, Context &Ctx) + : SingleLLVMInstructionImpl(ClassID::Fence, Opcode::Fence, FI, Ctx) {} + friend Context; // For constructor; + +public: + static FenceInst *create(AtomicOrdering Ordering, BBIterator WhereIt, + BasicBlock *WhereBB, Context &Ctx, + SyncScope::ID SSID = SyncScope::System); + /// Returns the ordering constraint of this fence instruction. + AtomicOrdering getOrdering() const { + return cast(Val)->getOrdering(); + } + /// Sets the ordering constraint of this fence instruction. May only be + /// Acquire, Release, AcquireRelease, or SequentiallyConsistent. + void setOrdering(AtomicOrdering Ordering); + /// Returns the synchronization scope ID of this fence instruction. + SyncScope::ID getSyncScopeID() const { + return cast(Val)->getSyncScopeID(); + } + /// Sets the synchronization scope ID of this fence instruction. + void setSyncScopeID(SyncScope::ID SSID); + static bool classof(const Value *From) { + return From->getSubclassID() == ClassID::Fence; + } +}; + class SelectInst : public SingleLLVMInstructionImpl { /// Use Context::createSelectInst(). Don't call the /// constructor directly. @@ -1495,13 +1533,26 @@ class UnaryInstruction public: static bool classof(const Instruction *I) { - return isa(I) || isa(I); + return isa(I) || isa(I) || isa(I); } static bool classof(const Value *V) { return isa(V) && classof(cast(V)); } }; +class FreezeInst : public UnaryInstruction { + FreezeInst(llvm::FreezeInst *FI, Context &Ctx) + : UnaryInstruction(ClassID::Freeze, Opcode::Freeze, FI, Ctx) {} + friend Context; // For constructor; + +public: + static FreezeInst *create(Value *V, BBIterator WhereIt, BasicBlock *WhereBB, + Context &Ctx, const Twine &Name = ""); + static bool classof(const Value *From) { + return From->getSubclassID() == ClassID::Freeze; + } +}; + class LoadInst final : public UnaryInstruction { /// Use LoadInst::create() instead of calling the constructor. LoadInst(llvm::LoadInst *LI, Context &Ctx) @@ -1799,8 +1850,7 @@ class InvokeInst final : public CallBase { BasicBlock *getUnwindDest() const; void setNormalDest(BasicBlock *BB); void setUnwindDest(BasicBlock *BB); - // TODO: Return a `LandingPadInst` once implemented. - Instruction *getLandingPadInst() const; + LandingPadInst *getLandingPadInst() const; BasicBlock *getSuccessor(unsigned SuccIdx) const; void setSuccessor(unsigned SuccIdx, BasicBlock *NewSucc) { assert(SuccIdx < 2 && "Successor # out of range for invoke!"); @@ -1858,6 +1908,50 @@ class CallBrInst final : public CallBase { } }; +class LandingPadInst : public SingleLLVMInstructionImpl { + LandingPadInst(llvm::LandingPadInst *LP, Context &Ctx) + : SingleLLVMInstructionImpl(ClassID::LandingPad, Opcode::LandingPad, LP, + Ctx) {} + friend class Context; // For constructor. + +public: + static LandingPadInst *create(Type *RetTy, unsigned NumReservedClauses, + BBIterator WhereIt, BasicBlock *WhereBB, + Context &Ctx, const Twine &Name = ""); + /// Return 'true' if this landingpad instruction is a + /// cleanup. I.e., it should be run when unwinding even if its landing pad + /// doesn't catch the exception. + bool isCleanup() const { + return cast(Val)->isCleanup(); + } + /// Indicate that this landingpad instruction is a cleanup. + void setCleanup(bool V); + + // TODO: We are not implementing addClause() because we have no way to revert + // it for now. + + /// Get the value of the clause at index Idx. Use isCatch/isFilter to + /// determine what type of clause this is. + Constant *getClause(unsigned Idx) const; + + /// Return 'true' if the clause and index Idx is a catch clause. + bool isCatch(unsigned Idx) const { + return cast(Val)->isCatch(Idx); + } + /// Return 'true' if the clause and index Idx is a filter clause. + bool isFilter(unsigned Idx) const { + return cast(Val)->isFilter(Idx); + } + /// Get the number of clauses for this landing pad. + unsigned getNumClauses() const { + return cast(Val)->getNumOperands(); + } + // TODO: We are not implementing reserveClauses() because we can't revert it. + static bool classof(const Value *From) { + return From->getSubclassID() == ClassID::LandingPad; + } +}; + class FuncletPadInst : public SingleLLVMInstructionImpl { FuncletPadInst(ClassID SubclassID, Opcode Opc, llvm::Instruction *I, Context &Ctx) @@ -2282,6 +2376,7 @@ class UnaryOperator : public UnaryInstruction { }; class BinaryOperator : public SingleLLVMInstructionImpl { +protected: static Opcode getBinOpOpcode(llvm::Instruction::BinaryOps BinOp) { switch (BinOp) { case llvm::Instruction::Add: @@ -2361,6 +2456,22 @@ class BinaryOperator : public SingleLLVMInstructionImpl { void swapOperands() { swapOperandsInternal(0, 1); } }; +/// An or instruction, which can be marked as "disjoint", indicating that the +/// inputs don't have a 1 in the same bit position. Meaning this instruction +/// can also be treated as an add. +class PossiblyDisjointInst : public BinaryOperator { +public: + void setIsDisjoint(bool B); + bool isDisjoint() const { + return cast(Val)->isDisjoint(); + } + /// For isa/dyn_cast. + static bool classof(const Value *From) { + return isa(From) && + cast(From)->getOpcode() == Opcode::Or; + } +}; + class AtomicRMWInst : public SingleLLVMInstructionImpl { AtomicRMWInst(llvm::AtomicRMWInst *Atomic, Context &Ctx) : SingleLLVMInstructionImpl(ClassID::AtomicRMW, @@ -2652,6 +2763,28 @@ class CastInst : public UnaryInstruction { Type *getDestTy() const { return cast(Val)->getDestTy(); } }; +/// Instruction that can have a nneg flag (zext/uitofp). +class PossiblyNonNegInst : public CastInst { +public: + bool hasNonNeg() const { + return cast(Val)->hasNonNeg(); + } + void setNonNeg(bool B); + /// For isa/dyn_cast. + static bool classof(const Value *From) { + if (auto *I = dyn_cast(From)) { + switch (I->getOpcode()) { + case Opcode::ZExt: + case Opcode::UIToFP: + return true; + default: + return false; + } + } + return false; + } +}; + // Helper class to simplify stamping out CastInst subclasses. template class CastInstImpl : public CastInst { public: @@ -2854,6 +2987,10 @@ class Context { IRBuilder LLVMIRBuilder; auto &getLLVMIRBuilder() { return LLVMIRBuilder; } + FreezeInst *createFreezeInst(llvm::FreezeInst *SI); + friend FreezeInst; // For createFreezeInst() + FenceInst *createFenceInst(llvm::FenceInst *SI); + friend FenceInst; // For createFenceInst() SelectInst *createSelectInst(llvm::SelectInst *SI); friend SelectInst; // For createSelectInst() InsertElementInst *createInsertElementInst(llvm::InsertElementInst *IEI); @@ -2876,6 +3013,8 @@ class Context { friend InvokeInst; // For createInvokeInst() CallBrInst *createCallBrInst(llvm::CallBrInst *I); friend CallBrInst; // For createCallBrInst() + LandingPadInst *createLandingPadInst(llvm::LandingPadInst *I); + friend LandingPadInst; // For createLandingPadInst() CatchPadInst *createCatchPadInst(llvm::CatchPadInst *I); friend CatchPadInst; // For createCatchPadInst() CleanupPadInst *createCleanupPadInst(llvm::CleanupPadInst *I); diff --git a/llvm/include/llvm/SandboxIR/SandboxIRValues.def b/llvm/include/llvm/SandboxIR/SandboxIRValues.def index 14cb2d72ad3af..8d79523253f23 100644 --- a/llvm/include/llvm/SandboxIR/SandboxIRValues.def +++ b/llvm/include/llvm/SandboxIR/SandboxIRValues.def @@ -37,6 +37,8 @@ DEF_USER(ConstantInt, ConstantInt) DEF_INSTR(Opaque, OP(Opaque), OpaqueInst) DEF_INSTR(ExtractElement, OP(ExtractElement), ExtractElementInst) DEF_INSTR(InsertElement, OP(InsertElement), InsertElementInst) +DEF_INSTR(Freeze, OP(Freeze), FreezeInst) +DEF_INSTR(Fence, OP(Fence), FenceInst) DEF_INSTR(ShuffleVector, OP(ShuffleVector), ShuffleVectorInst) DEF_INSTR(Select, OP(Select), SelectInst) DEF_INSTR(Br, OP(Br), BranchInst) @@ -46,6 +48,7 @@ DEF_INSTR(Ret, OP(Ret), ReturnInst) DEF_INSTR(Call, OP(Call), CallInst) DEF_INSTR(Invoke, OP(Invoke), InvokeInst) DEF_INSTR(CallBr, OP(CallBr), CallBrInst) +DEF_INSTR(LandingPad, OP(LandingPad), LandingPadInst) DEF_INSTR(CatchPad, OP(CatchPad), CatchPadInst) DEF_INSTR(CleanupPad, OP(CleanupPad), CleanupPadInst) DEF_INSTR(CatchRet, OP(CatchRet), CatchReturnInst) diff --git a/llvm/include/llvm/TableGen/Record.h b/llvm/include/llvm/TableGen/Record.h index e6e87eee2c6ba..a339946e67cf2 100644 --- a/llvm/include/llvm/TableGen/Record.h +++ b/llvm/include/llvm/TableGen/Record.h @@ -2328,8 +2328,8 @@ class HasReferenceResolver final : public Resolver { Init *resolve(Init *VarName) override; }; -void EmitDetailedRecords(RecordKeeper &RK, raw_ostream &OS); -void EmitJSON(RecordKeeper &RK, raw_ostream &OS); +void EmitDetailedRecords(const RecordKeeper &RK, raw_ostream &OS); +void EmitJSON(const RecordKeeper &RK, raw_ostream &OS); } // end namespace llvm diff --git a/llvm/include/llvm/TableGen/TableGenBackend.h b/llvm/include/llvm/TableGen/TableGenBackend.h index 9c5a785f45a40..80134179850b0 100644 --- a/llvm/include/llvm/TableGen/TableGenBackend.h +++ b/llvm/include/llvm/TableGen/TableGenBackend.h @@ -13,9 +13,8 @@ #ifndef LLVM_TABLEGEN_TABLEGENBACKEND_H #define LLVM_TABLEGEN_TABLEGENBACKEND_H +#include "llvm/ADT/STLFunctionalExtras.h" #include "llvm/ADT/StringRef.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/ManagedStatic.h" #include "llvm/TableGen/Record.h" namespace llvm { @@ -24,22 +23,19 @@ class RecordKeeper; class raw_ostream; namespace TableGen::Emitter { -using FnT = void (*)(RecordKeeper &Records, raw_ostream &OS); - -struct OptCreatorT { - static void *call(); -}; - -extern ManagedStatic, OptCreatorT> Action; +// Supports const and non-const forms of callback functions. +using FnT = function_ref; +/// Creating an `Opt` object registers the command line option \p Name with +/// TableGen backend and associates the callback \p CB with that option. If +/// \p ByDefault is true, then that callback is applied by default if no +/// command line option was specified. struct Opt { - Opt(StringRef Name, FnT CB, StringRef Desc, bool ByDefault = false) { - if (ByDefault) - Action->setInitialValue(CB); - Action->getParser().addLiteralOption(Name, CB, Desc); - } + Opt(StringRef Name, FnT CB, StringRef Desc, bool ByDefault = false); }; +/// Convienence wrapper around `Opt` that registers `EmitterClass::run` as the +/// callback. template class OptClass : Opt { static void run(RecordKeeper &RK, raw_ostream &OS) { EmitterC(RK).run(OS); } @@ -47,6 +43,10 @@ template class OptClass : Opt { OptClass(StringRef Name, StringRef Desc) : Opt(Name, run, Desc) {} }; +/// Apply callback for any command line option registered above. Returns false +/// is no callback was applied. +bool ApplyCallback(RecordKeeper &Records, raw_ostream &OS); + } // namespace TableGen::Emitter /// emitSourceFileHeader - Output an LLVM style file header to the specified @@ -54,6 +54,6 @@ template class OptClass : Opt { void emitSourceFileHeader(StringRef Desc, raw_ostream &OS, const RecordKeeper &Record = RecordKeeper()); -} // End llvm namespace +} // namespace llvm -#endif +#endif // LLVM_TABLEGEN_TABLEGENBACKEND_H diff --git a/llvm/include/llvm/Transforms/IPO/FunctionImport.h b/llvm/include/llvm/Transforms/IPO/FunctionImport.h index 93d831c26938b..b5b969220df85 100644 --- a/llvm/include/llvm/Transforms/IPO/FunctionImport.h +++ b/llvm/include/llvm/Transforms/IPO/FunctionImport.h @@ -139,6 +139,10 @@ class FunctionImporter { maybeAddDeclaration(FromModule, GUID); } + // Return the list of source modules sorted in the ascending alphabetical + // order. + SmallVector getSourceModules() const; + const ImportMapTyImpl &getImportMap() const { return ImportMap; } private: diff --git a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h index 68eb00a50fe03..826347e79f719 100644 --- a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h +++ b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h @@ -187,7 +187,8 @@ class CodeExtractorAnalysisCache { /// sets, before extraction occurs. These modifications won't have any /// significant impact on the cost however. void findInputsOutputs(ValueSet &Inputs, ValueSet &Outputs, - const ValueSet &Allocas) const; + const ValueSet &Allocas, + bool CollectGlobalInputs = false) const; /// Check if life time marker nodes can be hoisted/sunk into the outline /// region. diff --git a/llvm/lib/Analysis/FunctionPropertiesAnalysis.cpp b/llvm/lib/Analysis/FunctionPropertiesAnalysis.cpp index 07906fa1aa6c6..6d6ec6c7b1cc7 100644 --- a/llvm/lib/Analysis/FunctionPropertiesAnalysis.cpp +++ b/llvm/lib/Analysis/FunctionPropertiesAnalysis.cpp @@ -326,14 +326,6 @@ FunctionPropertiesUpdater::FunctionPropertiesUpdater( // with the CB BB ('Entry') between which the inlined callee will be pasted. Successors.insert(succ_begin(&CallSiteBB), succ_end(&CallSiteBB)); - // the outcome of the inlining may be that some edges get lost (DCEd BBs - // because inlining brought some constant, for example). We don't know which - // edges will be removed, so we list all of them as potentially removable. - for (auto *Succ : successors(&CallSiteBB)) - DomTreeUpdates.emplace_back(DominatorTree::UpdateKind::Delete, - const_cast(&CallSiteBB), - const_cast(Succ)); - // Inlining only handles invoke and calls. If this is an invoke, and inlining // it pulls another invoke, the original landing pad may get split, so as to // share its content with other potential users. So the edge up to which we @@ -344,11 +336,6 @@ FunctionPropertiesUpdater::FunctionPropertiesUpdater( if (const auto *II = dyn_cast(&CB)) { const auto *UnwindDest = II->getUnwindDest(); Successors.insert(succ_begin(UnwindDest), succ_end(UnwindDest)); - // Same idea as above, we pretend we lose all these edges. - for (auto *Succ : successors(UnwindDest)) - DomTreeUpdates.emplace_back(DominatorTree::UpdateKind::Delete, - const_cast(UnwindDest), - const_cast(Succ)); } // Exclude the CallSiteBB, if it happens to be its own successor (1-BB loop). @@ -369,30 +356,6 @@ FunctionPropertiesUpdater::FunctionPropertiesUpdater( FPI.updateForBB(*BB, -1); } -DominatorTree &FunctionPropertiesUpdater::getUpdatedDominatorTree( - FunctionAnalysisManager &FAM) const { - auto &DT = - FAM.getResult(const_cast(Caller)); - - SmallVector FinalDomTreeUpdates; - - for (auto &Upd : DomTreeUpdates) - FinalDomTreeUpdates.push_back(Upd); - - DenseSet Inserted; - for (auto *Succ : successors(&CallSiteBB)) - if (Inserted.insert(Succ).second) - FinalDomTreeUpdates.push_back({DominatorTree::UpdateKind::Insert, - const_cast(&CallSiteBB), - const_cast(Succ)}); - - DT.applyUpdates(FinalDomTreeUpdates); -#ifdef EXPENSIVE_CHECKS - assert(DT.verify(DominatorTree::VerificationLevel::Full)); -#endif - return DT; -} - void FunctionPropertiesUpdater::finish(FunctionAnalysisManager &FAM) const { // Update feature values from the BBs that were copied from the callee, or // might have been modified because of inlining. The latter have been @@ -420,7 +383,8 @@ void FunctionPropertiesUpdater::finish(FunctionAnalysisManager &FAM) const { // remove E. SetVector Reinclude; SetVector Unreachable; - auto &DT = getUpdatedDominatorTree(FAM); + const auto &DT = + FAM.getResult(const_cast(Caller)); if (&CallSiteBB != &*Caller.begin()) Reinclude.insert(&*Caller.begin()); @@ -463,9 +427,6 @@ void FunctionPropertiesUpdater::finish(FunctionAnalysisManager &FAM) const { const auto &LI = FAM.getResult(const_cast(Caller)); FPI.updateAggregateStats(Caller, LI); -#ifdef EXPENSIVE_CHECKS - assert(isUpdateValid(Caller, FPI, FAM)); -#endif } bool FunctionPropertiesUpdater::isUpdateValid(Function &F, diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp index 8d53a27fb75eb..980f142f11326 100644 --- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -1937,6 +1937,27 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize( LLVM_DEBUG(dbgs() << "LAA: Distance for " << *AInst << " to " << *BInst << ": " << *Dist << "\n"); + // Check if we can prove that Sink only accesses memory after Src's end or + // vice versa. At the moment this is limited to cases where either source or + // sink are loop invariant to avoid compile-time increases. This is not + // required for correctness. + if (SE.isLoopInvariant(Src, InnermostLoop) || + SE.isLoopInvariant(Sink, InnermostLoop)) { + const auto &[SrcStart, SrcEnd] = + getStartAndEndForAccess(InnermostLoop, Src, ATy, PSE, PointerBounds); + const auto &[SinkStart, SinkEnd] = + getStartAndEndForAccess(InnermostLoop, Sink, BTy, PSE, PointerBounds); + if (!isa(SrcStart) && + !isa(SrcEnd) && + !isa(SinkStart) && + !isa(SinkEnd)) { + if (SE.isKnownPredicate(CmpInst::ICMP_ULE, SrcEnd, SinkStart)) + return MemoryDepChecker::Dependence::NoDep; + if (SE.isKnownPredicate(CmpInst::ICMP_ULE, SinkEnd, SrcStart)) + return MemoryDepChecker::Dependence::NoDep; + } + } + // Need accesses with constant strides and the same direction for further // dependence analysis. We don't want to vectorize "A[B[i]] += ..." and // similar code or pointer arithmetic that could wrap in the address space. @@ -1982,45 +2003,12 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, const MemAccessInfo &B, unsigned BIdx) { assert(AIdx < BIdx && "Must pass arguments in program order"); - // Check if we can prove that Sink only accesses memory after Src's end or - // vice versa. The helper is used to perform the checks only on the exit paths - // where it helps to improve the analysis result. - auto CheckCompletelyBeforeOrAfter = [&]() { - auto *APtr = A.getPointer(); - auto *BPtr = B.getPointer(); - - Type *ATy = getLoadStoreType(InstMap[AIdx]); - Type *BTy = getLoadStoreType(InstMap[BIdx]); - - const SCEV *Src = PSE.getSCEV(APtr); - const SCEV *Sink = PSE.getSCEV(BPtr); - - const auto &[SrcStart, SrcEnd] = - getStartAndEndForAccess(InnermostLoop, Src, ATy, PSE, PointerBounds); - if (isa(SrcStart) || isa(SrcEnd)) - return false; - - const auto &[SinkStart, SinkEnd] = - getStartAndEndForAccess(InnermostLoop, Sink, BTy, PSE, PointerBounds); - if (isa(SinkStart) || - isa(SinkEnd)) - return false; - - auto &SE = *PSE.getSE(); - return SE.isKnownPredicate(CmpInst::ICMP_ULE, SrcEnd, SinkStart) || - SE.isKnownPredicate(CmpInst::ICMP_ULE, SinkEnd, SrcStart); - }; - // Get the dependence distance, stride, type size and what access writes for // the dependence between A and B. auto Res = getDependenceDistanceStrideAndSize(A, InstMap[AIdx], B, InstMap[BIdx]); - if (std::holds_alternative(Res)) { - if (std::get(Res) == Dependence::Unknown && - CheckCompletelyBeforeOrAfter()) - return Dependence::NoDep; + if (std::holds_alternative(Res)) return std::get(Res); - } auto &[Dist, StrideA, StrideB, TypeByteSize, AIsWrite, BIsWrite] = std::get(Res); @@ -2029,9 +2017,6 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, std::optional CommonStride = StrideA == StrideB ? std::make_optional(StrideA) : std::nullopt; if (isa(Dist)) { - if (CheckCompletelyBeforeOrAfter()) - return Dependence::NoDep; - // TODO: Relax requirement that there is a common stride to retry with // non-constant distance dependencies. FoundNonConstantDistanceDependence |= CommonStride.has_value(); @@ -2083,8 +2068,6 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, // Write to the same location with the same size. return Dependence::Forward; } - assert(!CheckCompletelyBeforeOrAfter() && - "unexpectedly proved no dependence"); LLVM_DEBUG(dbgs() << "LAA: possibly zero dependence difference but " "different type sizes\n"); return Dependence::Unknown; @@ -2106,8 +2089,6 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, // did not set it when strides were different but there is no inherent // reason to. FoundNonConstantDistanceDependence |= CommonStride.has_value(); - if (CheckCompletelyBeforeOrAfter()) - return Dependence::NoDep; return Dependence::Unknown; } if (!HasSameSize || @@ -2127,9 +2108,6 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, // Below we only handle strictly positive distances. if (MinDistance <= 0) { FoundNonConstantDistanceDependence |= CommonStride.has_value(); - if (CheckCompletelyBeforeOrAfter()) - return Dependence::NoDep; - return Dependence::Unknown; } @@ -2146,18 +2124,13 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, } if (!HasSameSize) { - if (CheckCompletelyBeforeOrAfter()) - return Dependence::NoDep; LLVM_DEBUG(dbgs() << "LAA: ReadWrite-Write positive dependency with " "different type sizes\n"); return Dependence::Unknown; } - if (!CommonStride) { - if (CheckCompletelyBeforeOrAfter()) - return Dependence::NoDep; + if (!CommonStride) return Dependence::Unknown; - } // Bail out early if passed-in parameters make vectorization not feasible. unsigned ForcedFactor = (VectorizerParams::VectorizationFactor ? @@ -2205,10 +2178,6 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, // dependence distance and the distance may be larger at runtime (and safe // for vectorization). Classify it as Unknown, so we re-try with runtime // checks. - // - if (CheckCompletelyBeforeOrAfter()) - return Dependence::NoDep; - return Dependence::Unknown; } LLVM_DEBUG(dbgs() << "LAA: Failure because of positive minimum distance " @@ -2221,8 +2190,6 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, if (MinDistanceNeeded > MinDepDistBytes) { LLVM_DEBUG(dbgs() << "LAA: Failure because it needs at least " << MinDistanceNeeded << " size in bytes\n"); - assert(!CheckCompletelyBeforeOrAfter() && - "unexpectedly proved no dependence"); return Dependence::Backward; } @@ -2270,8 +2237,6 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, // For non-constant distances, we checked the lower bound of the dependence // distance and the distance may be larger at runtime (and safe for // vectorization). Classify it as Unknown, so we re-try with runtime checks. - assert(!CheckCompletelyBeforeOrAfter() && - "unexpectedly proved no dependence"); return Dependence::Unknown; } diff --git a/llvm/lib/Analysis/LoopUnrollAnalyzer.cpp b/llvm/lib/Analysis/LoopUnrollAnalyzer.cpp index 7b6ca4d711fcd..8824cec86924f 100644 --- a/llvm/lib/Analysis/LoopUnrollAnalyzer.cpp +++ b/llvm/lib/Analysis/LoopUnrollAnalyzer.cpp @@ -58,13 +58,13 @@ bool UnrolledInstAnalyzer::simplifyInstWithSCEV(Instruction *I) { auto *Base = dyn_cast(SE.getPointerBase(S)); if (!Base) return false; - auto *Offset = - dyn_cast(SE.getMinusSCEV(ValueAtIteration, Base)); + std::optional Offset = + SE.computeConstantDifference(ValueAtIteration, Base); if (!Offset) return false; SimplifiedAddress Address; Address.Base = Base->getValue(); - Address.Offset = Offset->getValue(); + Address.Offset = *Offset; SimplifiedAddresses[I] = Address; return false; } @@ -105,7 +105,7 @@ bool UnrolledInstAnalyzer::visitLoad(LoadInst &I) { auto AddressIt = SimplifiedAddresses.find(AddrOp); if (AddressIt == SimplifiedAddresses.end()) return false; - ConstantInt *SimplifiedAddrOp = AddressIt->second.Offset; + const APInt &SimplifiedAddrOp = AddressIt->second.Offset; auto *GV = dyn_cast(AddressIt->second.Base); // We're only interested in loads that can be completely folded to a @@ -125,9 +125,9 @@ bool UnrolledInstAnalyzer::visitLoad(LoadInst &I) { return false; unsigned ElemSize = CDS->getElementType()->getPrimitiveSizeInBits() / 8U; - if (SimplifiedAddrOp->getValue().getActiveBits() > 64) + if (SimplifiedAddrOp.getActiveBits() > 64) return false; - int64_t SimplifiedAddrOpV = SimplifiedAddrOp->getSExtValue(); + int64_t SimplifiedAddrOpV = SimplifiedAddrOp.getSExtValue(); if (SimplifiedAddrOpV < 0) { // FIXME: For now we conservatively ignore out of bound accesses, but // we're allowed to perform the optimization in this case. @@ -186,10 +186,9 @@ bool UnrolledInstAnalyzer::visitCmpInst(CmpInst &I) { if (SimplifiedRHS != SimplifiedAddresses.end()) { SimplifiedAddress &LHSAddr = SimplifiedLHS->second; SimplifiedAddress &RHSAddr = SimplifiedRHS->second; - if (LHSAddr.Base == RHSAddr.Base) { - LHS = LHSAddr.Offset; - RHS = RHSAddr.Offset; - } + if (LHSAddr.Base == RHSAddr.Base) + return ICmpInst::compare(LHSAddr.Offset, RHSAddr.Offset, + I.getPredicate()); } } } diff --git a/llvm/lib/Analysis/MLInlineAdvisor.cpp b/llvm/lib/Analysis/MLInlineAdvisor.cpp index 8bb5efcf1b2ec..b59aa4810005b 100644 --- a/llvm/lib/Analysis/MLInlineAdvisor.cpp +++ b/llvm/lib/Analysis/MLInlineAdvisor.cpp @@ -288,6 +288,7 @@ void MLInlineAdvisor::onSuccessfulInlining(const MLInlineAdvice &Advice, { PreservedAnalyses PA = PreservedAnalyses::all(); PA.abandon(); + PA.abandon(); PA.abandon(); FAM.invalidate(*Caller, PA); } diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index 21a1c74eefc07..54dde8401cdff 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -8247,9 +8247,8 @@ const SCEV *ScalarEvolution::getExitCount(const Loop *L, llvm_unreachable("Invalid ExitCountKind!"); } -const SCEV * -ScalarEvolution::getPredicatedBackedgeTakenCount(const Loop *L, - SmallVector &Preds) { +const SCEV *ScalarEvolution::getPredicatedBackedgeTakenCount( + const Loop *L, SmallVectorImpl &Preds) { return getPredicatedBackedgeTakenInfo(L).getExact(L, this, &Preds); } @@ -8267,7 +8266,7 @@ const SCEV *ScalarEvolution::getBackedgeTakenCount(const Loop *L, } const SCEV *ScalarEvolution::getPredicatedSymbolicMaxBackedgeTakenCount( - const Loop *L, SmallVector &Preds) { + const Loop *L, SmallVectorImpl &Preds) { return getPredicatedBackedgeTakenInfo(L).getSymbolicMax(L, this, &Preds); } @@ -8537,9 +8536,9 @@ void ScalarEvolution::forgetBlockAndLoopDispositions(Value *V) { /// is never skipped. This is a valid assumption as long as the loop exits via /// that test. For precise results, it is the caller's responsibility to specify /// the relevant loop exiting block using getExact(ExitingBlock, SE). -const SCEV * -ScalarEvolution::BackedgeTakenInfo::getExact(const Loop *L, ScalarEvolution *SE, - SmallVector *Preds) const { +const SCEV *ScalarEvolution::BackedgeTakenInfo::getExact( + const Loop *L, ScalarEvolution *SE, + SmallVectorImpl *Preds) const { // If any exits were not computable, the loop is not computable. if (!isComplete() || ExitNotTaken.empty()) return SE->getCouldNotCompute(); @@ -8622,7 +8621,7 @@ ScalarEvolution::BackedgeTakenInfo::getConstantMax(ScalarEvolution *SE) const { const SCEV *ScalarEvolution::BackedgeTakenInfo::getSymbolicMax( const Loop *L, ScalarEvolution *SE, - SmallVector *Predicates) { + SmallVectorImpl *Predicates) { if (!SymbolicMax) { // Form an expression for the maximum exit count possible for this loop. We // merge the max and exact information to approximate a version of diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp index d4dbab04e8ecd..6767c7ca288bc 100644 --- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -2093,6 +2093,8 @@ static Attribute::AttrKind getAttrFromCode(uint64_t Code) { return Attribute::NoSanitizeBounds; case bitc::ATTR_KIND_NO_SANITIZE_COVERAGE: return Attribute::NoSanitizeCoverage; + case bitc::ATTR_KIND_NO_SANITIZE_REALTIME: + return Attribute::NoSanitizeRealtime; case bitc::ATTR_KIND_NULL_POINTER_IS_VALID: return Attribute::NullPointerIsValid; case bitc::ATTR_KIND_OPTIMIZE_FOR_DEBUGGING: diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp index 2f3e90d6e3821..d21ff10d51d00 100644 --- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -795,6 +795,8 @@ static uint64_t getAttrKindEncoding(Attribute::AttrKind Kind) { return bitc::ATTR_KIND_NO_SANITIZE_BOUNDS; case Attribute::NoSanitizeCoverage: return bitc::ATTR_KIND_NO_SANITIZE_COVERAGE; + case llvm::Attribute::NoSanitizeRealtime: + return bitc::ATTR_KIND_NO_SANITIZE_REALTIME; case Attribute::NullPointerIsValid: return bitc::ATTR_KIND_NULL_POINTER_IS_VALID; case Attribute::OptimizeForDebugging: diff --git a/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp index bccd9b04cd2c5..e40248197c7c7 100644 --- a/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp +++ b/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp @@ -402,8 +402,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction( // Scan the register defs for this instruction and update // live-ranges. - for (const MachineOperand &MO : MI.operands()) { - if (!MO.isReg() || !MO.isDef()) continue; + for (const MachineOperand &MO : MI.all_defs()) { Register Reg = MO.getReg(); if (Reg == 0) continue; // Ignore KILLs and passthru registers for liveness... diff --git a/llvm/lib/CodeGen/MachineInstr.cpp b/llvm/lib/CodeGen/MachineInstr.cpp index 0f2acdb12389d..f21910ee3a444 100644 --- a/llvm/lib/CodeGen/MachineInstr.cpp +++ b/llvm/lib/CodeGen/MachineInstr.cpp @@ -2125,19 +2125,15 @@ bool MachineInstr::addRegisterDead(Register Reg, } void MachineInstr::clearRegisterDeads(Register Reg) { - for (MachineOperand &MO : operands()) { - if (!MO.isReg() || !MO.isDef() || MO.getReg() != Reg) - continue; - MO.setIsDead(false); - } + for (MachineOperand &MO : all_defs()) + if (MO.getReg() == Reg) + MO.setIsDead(false); } void MachineInstr::setRegisterDefReadUndef(Register Reg, bool IsUndef) { - for (MachineOperand &MO : operands()) { - if (!MO.isReg() || !MO.isDef() || MO.getReg() != Reg || MO.getSubReg() == 0) - continue; - MO.setIsUndef(IsUndef); - } + for (MachineOperand &MO : all_defs()) + if (MO.getReg() == Reg && MO.getSubReg() != 0) + MO.setIsUndef(IsUndef); } void MachineInstr::addRegisterDefined(Register Reg, @@ -2147,9 +2143,8 @@ void MachineInstr::addRegisterDefined(Register Reg, if (MO) return; } else { - for (const MachineOperand &MO : operands()) { - if (MO.isReg() && MO.getReg() == Reg && MO.isDef() && - MO.getSubReg() == 0) + for (const MachineOperand &MO : all_defs()) { + if (MO.getReg() == Reg && MO.getSubReg() == 0) return; } } diff --git a/llvm/lib/CodeGen/ModuloSchedule.cpp b/llvm/lib/CodeGen/ModuloSchedule.cpp index 78201d9bfb79a..99c82bc3a2660 100644 --- a/llvm/lib/CodeGen/ModuloSchedule.cpp +++ b/llvm/lib/CodeGen/ModuloSchedule.cpp @@ -2667,8 +2667,8 @@ void ModuloScheduleExpanderMVE::calcNumUnroll() { void ModuloScheduleExpanderMVE::updateInstrDef(MachineInstr *NewMI, ValueMapTy &VRMap, bool LastDef) { - for (MachineOperand &MO : NewMI->operands()) { - if (!MO.isReg() || !MO.getReg().isVirtual() || !MO.isDef()) + for (MachineOperand &MO : NewMI->all_defs()) { + if (!MO.getReg().isVirtual()) continue; Register Reg = MO.getReg(); const TargetRegisterClass *RC = MRI.getRegClass(Reg); diff --git a/llvm/lib/CodeGen/RegAllocFast.cpp b/llvm/lib/CodeGen/RegAllocFast.cpp index 62f7ed29c8c81..6babd5a3f1f96 100644 --- a/llvm/lib/CodeGen/RegAllocFast.cpp +++ b/llvm/lib/CodeGen/RegAllocFast.cpp @@ -1329,9 +1329,8 @@ void RegAllocFastImpl::findAndSortDefOperandIndexes(const MachineInstr &MI) { // we assign these. SmallVector RegClassDefCounts(TRI->getNumRegClasses(), 0); - for (const MachineOperand &MO : MI.operands()) - if (MO.isReg() && MO.isDef()) - addRegClassDefCounts(RegClassDefCounts, MO.getReg()); + for (const MachineOperand &MO : MI.all_defs()) + addRegClassDefCounts(RegClassDefCounts, MO.getReg()); llvm::sort(DefOperandIndexes, [&](unsigned I0, unsigned I1) { const MachineOperand &MO0 = MI.getOperand(I0); @@ -1481,9 +1480,7 @@ void RegAllocFastImpl::allocateInstruction(MachineInstr &MI) { // Assign virtual register defs. while (ReArrangedImplicitOps) { ReArrangedImplicitOps = false; - for (MachineOperand &MO : MI.operands()) { - if (!MO.isReg() || !MO.isDef()) - continue; + for (MachineOperand &MO : MI.all_defs()) { Register Reg = MO.getReg(); if (Reg.isVirtual()) { ReArrangedImplicitOps = @@ -1499,10 +1496,7 @@ void RegAllocFastImpl::allocateInstruction(MachineInstr &MI) { // Free registers occupied by defs. // Iterate operands in reverse order, so we see the implicit super register // defs first (we added them earlier in case of ). - for (MachineOperand &MO : reverse(MI.operands())) { - if (!MO.isReg() || !MO.isDef()) - continue; - + for (MachineOperand &MO : reverse(MI.all_defs())) { Register Reg = MO.getReg(); // subreg defs don't free the full register. We left the subreg number diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp index f6c53f3051c2f..97f8346df0e8f 100644 --- a/llvm/lib/CodeGen/RegisterCoalescer.cpp +++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp @@ -3230,8 +3230,8 @@ void JoinVals::pruneValues(JoinVals &Other, // Also remove dead flags since the joined live range will // continue past this instruction. for (MachineOperand &MO : - Indexes->getInstructionFromIndex(Def)->operands()) { - if (MO.isReg() && MO.isDef() && MO.getReg() == Reg) { + Indexes->getInstructionFromIndex(Def)->all_defs()) { + if (MO.getReg() == Reg) { if (MO.getSubReg() != 0 && MO.isUndef() && !EraseImpDef) MO.setIsUndef(false); MO.setIsDead(false); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index 1a98fbd7589fb..b13a2df7b48eb 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -439,8 +439,8 @@ class SelectionDAGBuilder { /// The set of gc.relocate calls associated with this gc.statepoint. SmallVector GCRelocates; - /// The full list of gc arguments to the gc.statepoint being lowered. - ArrayRef GCArgs; + /// The full list of gc-live arguments to the gc.statepoint being lowered. + ArrayRef GCLives; /// The gc.statepoint instruction. const Instruction *StatepointInstr = nullptr; diff --git a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp index 4268da8670d50..a1f87d2c62573 100644 --- a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp @@ -673,7 +673,7 @@ lowerStatepointMetaArgs(SmallVectorImpl &Ops, // it is the contents of the slot which may get updated, not the pointer to // the alloca SmallVector Allocas; - for (Value *V : SI.GCArgs) { + for (Value *V : SI.GCLives) { SDValue Incoming = Builder.getValue(V); if (FrameIndexSDNode *FI = dyn_cast(Incoming)) { // This handles allocas as arguments to the statepoint @@ -1086,7 +1086,7 @@ SelectionDAGBuilder::LowerStatepoint(const GCStatepointInst &I, } } - SI.GCArgs = ArrayRef(I.gc_args_begin(), I.gc_args_end()); + SI.GCLives = ArrayRef(I.gc_live_begin(), I.gc_live_end()); SI.StatepointInstr = &I; SI.ID = I.getID(); diff --git a/llvm/lib/CodeGen/TargetInstrInfo.cpp b/llvm/lib/CodeGen/TargetInstrInfo.cpp index c2b8c39662bb6..38bd0b0ba4114 100644 --- a/llvm/lib/CodeGen/TargetInstrInfo.cpp +++ b/llvm/lib/CodeGen/TargetInstrInfo.cpp @@ -823,7 +823,9 @@ void TargetInstrInfo::lowerCopy(MachineInstr *MI, } copyPhysReg(*MI->getParent(), MI, MI->getDebugLoc(), DstMO.getReg(), - SrcMO.getReg(), SrcMO.isKill()); + SrcMO.getReg(), SrcMO.isKill(), + DstMO.getReg().isPhysical() ? DstMO.isRenamable() : false, + SrcMO.getReg().isPhysical() ? SrcMO.isRenamable() : false); if (MI->getNumOperands() > 2) transferImplicitOperands(MI, TRI); diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 70a6e74b94d55..532313a31fc13 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -1548,7 +1548,16 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel( BasicBlock *CommonExit = nullptr; SetVector Inputs, Outputs, SinkingCands, HoistingCands; Extractor.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit); - Extractor.findInputsOutputs(Inputs, Outputs, SinkingCands); + + Extractor.findInputsOutputs(Inputs, Outputs, SinkingCands, + /*CollectGlobalInputs=*/true); + + Inputs.remove_if([&](Value *I) { + if (auto *GV = dyn_cast_if_present(I)) + return GV->getValueType() == OpenMPIRBuilder::Ident; + + return false; + }); LLVM_DEBUG(dbgs() << "Before privatization: " << *OuterFn << "\n"); diff --git a/llvm/lib/IR/AttributeImpl.h b/llvm/lib/IR/AttributeImpl.h index b9441729b48c6..2f1c7b85e6650 100644 --- a/llvm/lib/IR/AttributeImpl.h +++ b/llvm/lib/IR/AttributeImpl.h @@ -275,7 +275,7 @@ class ConstantRangeListAttributeImpl final class AttributeBitSet { /// Bitset with a bit for each available attribute Attribute::AttrKind. - uint8_t AvailableAttrs[12] = {}; + uint8_t AvailableAttrs[16] = {}; static_assert(Attribute::EndAttrKinds <= sizeof(AvailableAttrs) * CHAR_BIT, "Too many attributes"); diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index ac754b5d9638d..5ff1f3dfb0dc9 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -2223,6 +2223,12 @@ void Verifier::verifyFunctionAttrs(FunctionType *FT, AttributeList Attrs, "Attributes 'optdebug and optnone' are incompatible!", V); } + Check(!(Attrs.hasFnAttr(Attribute::SanitizeRealtime) && + Attrs.hasFnAttr(Attribute::NoSanitizeRealtime)), + "Attributes " + "'sanitize_realtime and nosanitize_realtime' are incompatible!", + V); + if (Attrs.hasFnAttr(Attribute::OptimizeForDebugging)) { Check(!Attrs.hasFnAttr(Attribute::OptimizeForSize), "Attributes 'optsize and optdebug' are incompatible!", V); diff --git a/llvm/lib/MC/MCParser/AsmParser.cpp b/llvm/lib/MC/MCParser/AsmParser.cpp index af09f2a0ee0cd..66e52fe2d08f8 100644 --- a/llvm/lib/MC/MCParser/AsmParser.cpp +++ b/llvm/lib/MC/MCParser/AsmParser.cpp @@ -5784,7 +5784,7 @@ bool AsmParser::parseDirectiveIrpc(SMLoc DirectiveLoc) { : A[0][0].getString(); for (std::size_t I = 0, End = Values.size(); I != End; ++I) { MCAsmMacroArgument Arg; - Arg.emplace_back(AsmToken::Identifier, Values.slice(I, I + 1)); + Arg.emplace_back(AsmToken::Identifier, Values.substr(I, 1)); // Note that the AtPseudoVariable is enabled for instantiations of .irpc. // This is undocumented, but GAS seems to support it. diff --git a/llvm/lib/MC/MCParser/MasmParser.cpp b/llvm/lib/MC/MCParser/MasmParser.cpp index f64b7f62d61d0..9f619c5018b50 100644 --- a/llvm/lib/MC/MCParser/MasmParser.cpp +++ b/llvm/lib/MC/MCParser/MasmParser.cpp @@ -7125,7 +7125,7 @@ bool MasmParser::parseDirectiveForc(SMLoc DirectiveLoc, StringRef Directive) { StringRef Values(Argument); for (std::size_t I = 0, End = Values.size(); I != End; ++I) { MCAsmMacroArgument Arg; - Arg.emplace_back(AsmToken::Identifier, Values.slice(I, I + 1)); + Arg.emplace_back(AsmToken::Identifier, Values.substr(I, 1)); if (expandMacro(OS, M->Body, Parameter, Arg, M->Locals, getTok().getLoc())) return true; diff --git a/llvm/lib/MC/MCPseudoProbe.cpp b/llvm/lib/MC/MCPseudoProbe.cpp index 3f6f605149b47..90d7588407068 100644 --- a/llvm/lib/MC/MCPseudoProbe.cpp +++ b/llvm/lib/MC/MCPseudoProbe.cpp @@ -18,6 +18,7 @@ #include "llvm/MC/MCObjectStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/Endian.h" +#include "llvm/Support/Error.h" #include "llvm/Support/LEB128.h" #include "llvm/Support/MD5.h" #include "llvm/Support/raw_ostream.h" @@ -48,6 +49,8 @@ static const MCExpr *buildSymbolDiff(MCObjectStreamer *MCOS, const MCSymbol *A, return AddrDelta; } +uint64_t MCDecodedPseudoProbe::getGuid() const { return InlineTree->Guid; } + void MCPseudoProbe::emit(MCObjectStreamer *MCOS, const MCPseudoProbe *LastProbe) const { bool IsSentinel = isSentinelProbe(getAttributes()); @@ -271,7 +274,7 @@ static StringRef getProbeFNameForGUID(const GUIDProbeFunctionMap &GUID2FuncMAP, auto It = GUID2FuncMAP.find(GUID); assert(It != GUID2FuncMAP.end() && "Probe function must exist for a valid GUID"); - return It->second.FuncName; + return It->FuncName; } void MCPseudoProbeFuncDesc::print(raw_ostream &OS) { @@ -288,8 +291,8 @@ void MCDecodedPseudoProbe::getInlineContext( // Note that it won't include the probe's belonging function(leaf location) while (Cur->hasInlineSite()) { StringRef FuncName = getProbeFNameForGUID(GUID2FuncMAP, Cur->Parent->Guid); - ContextStack.emplace_back( - MCPseudoProbeFrameLocation(FuncName, std::get<1>(Cur->ISite))); + ContextStack.emplace_back(MCPseudoProbeFrameLocation( + FuncName, std::get<1>(Cur->getInlineSite()))); Cur = static_cast(Cur->Parent); } // Make the ContextStack in caller-callee order @@ -317,10 +320,10 @@ void MCDecodedPseudoProbe::print(raw_ostream &OS, bool ShowName) const { OS << "FUNC: "; if (ShowName) { - StringRef FuncName = getProbeFNameForGUID(GUID2FuncMAP, Guid); + StringRef FuncName = getProbeFNameForGUID(GUID2FuncMAP, getGuid()); OS << FuncName.str() << " "; } else { - OS << Guid << " "; + OS << getGuid() << " "; } OS << "Index: " << Index << " "; if (Discriminator) @@ -387,59 +390,68 @@ bool MCPseudoProbeDecoder::buildGUID2FuncDescMap(const uint8_t *Start, Data = Start; End = Data + Size; + uint32_t FuncDescCount = 0; while (Data < End) { - auto ErrorOrGUID = readUnencodedNumber(); - if (!ErrorOrGUID) + // GUID + if (!readUnencodedNumber()) return false; - - auto ErrorOrHash = readUnencodedNumber(); - if (!ErrorOrHash) + // Hash + if (!readUnencodedNumber()) return false; auto ErrorOrNameSize = readUnsignedNumber(); if (!ErrorOrNameSize) return false; - uint32_t NameSize = std::move(*ErrorOrNameSize); - - auto ErrorOrName = readString(NameSize); - if (!ErrorOrName) + // Function name + if (!readString(*ErrorOrNameSize)) return false; + ++FuncDescCount; + } + assert(Data == End && "Have unprocessed data in pseudo_probe_desc section"); + GUID2FuncDescMap.reserve(FuncDescCount); - uint64_t GUID = std::move(*ErrorOrGUID); - uint64_t Hash = std::move(*ErrorOrHash); - StringRef Name = std::move(*ErrorOrName); + Data = Start; + End = Data + Size; + while (Data < End) { + uint64_t GUID = + cantFail(errorOrToExpected(readUnencodedNumber())); + uint64_t Hash = + cantFail(errorOrToExpected(readUnencodedNumber())); + uint32_t NameSize = + cantFail(errorOrToExpected(readUnsignedNumber())); + StringRef Name = cantFail(errorOrToExpected(readString(NameSize))); // Initialize PseudoProbeFuncDesc and populate it into GUID2FuncDescMap - GUID2FuncDescMap.emplace(GUID, MCPseudoProbeFuncDesc(GUID, Hash, Name)); + GUID2FuncDescMap.emplace_back(GUID, Hash, Name.copy(FuncNameAllocator)); } assert(Data == End && "Have unprocessed data in pseudo_probe_desc section"); + assert(GUID2FuncDescMap.size() == FuncDescCount && + "Mismatching function description count pre- and post-parsing"); + llvm::sort(GUID2FuncDescMap, [](const auto &LHS, const auto &RHS) { + return LHS.FuncGUID < RHS.FuncGUID; + }); return true; } +template bool MCPseudoProbeDecoder::buildAddress2ProbeMap( MCDecodedPseudoProbeInlineTree *Cur, uint64_t &LastAddr, - const Uint64Set &GuidFilter, const Uint64Map &FuncStartAddrs) { + const Uint64Set &GuidFilter, const Uint64Map &FuncStartAddrs, + const uint32_t CurChildIndex) { // The pseudo_probe section encodes an inline forest and each tree has a // format defined in MCPseudoProbe.h uint32_t Index = 0; - bool IsTopLevelFunc = Cur == &DummyInlineRoot; if (IsTopLevelFunc) { // Use a sequential id for top level inliner. - Index = Cur->getChildren().size(); + Index = CurChildIndex; } else { // Read inline site for inlinees - auto ErrorOrIndex = readUnsignedNumber(); - if (!ErrorOrIndex) - return false; - Index = std::move(*ErrorOrIndex); + Index = cantFail(errorOrToExpected(readUnsignedNumber())); } // Read guid - auto ErrorOrCurGuid = readUnencodedNumber(); - if (!ErrorOrCurGuid) - return false; - uint64_t Guid = std::move(*ErrorOrCurGuid); + uint64_t Guid = cantFail(errorOrToExpected(readUnencodedNumber())); // Decide if top-level node should be disgarded. if (IsTopLevelFunc && !GuidFilter.empty() && !GuidFilter.count(Guid)) @@ -448,8 +460,9 @@ bool MCPseudoProbeDecoder::buildAddress2ProbeMap( // If the incoming node is null, all its children nodes should be disgarded. if (Cur) { // Switch/add to a new tree node(inlinee) - Cur = Cur->getOrAddNode(std::make_tuple(Guid, Index)); - Cur->Guid = Guid; + Cur->getChildren()[CurChildIndex] = + MCDecodedPseudoProbeInlineTree(InlineSite(Guid, Index), Cur); + Cur = &Cur->getChildren()[CurChildIndex]; if (IsTopLevelFunc && !EncodingIsAddrBased) { if (auto V = FuncStartAddrs.lookup(Guid)) LastAddr = V; @@ -457,41 +470,28 @@ bool MCPseudoProbeDecoder::buildAddress2ProbeMap( } // Read number of probes in the current node. - auto ErrorOrNodeCount = readUnsignedNumber(); - if (!ErrorOrNodeCount) - return false; - uint32_t NodeCount = std::move(*ErrorOrNodeCount); + uint32_t NodeCount = + cantFail(errorOrToExpected(readUnsignedNumber())); + uint32_t CurrentProbeCount = 0; // Read number of direct inlinees - auto ErrorOrCurChildrenToProcess = readUnsignedNumber(); - if (!ErrorOrCurChildrenToProcess) - return false; + uint32_t ChildrenToProcess = + cantFail(errorOrToExpected(readUnsignedNumber())); // Read all probes in this node for (std::size_t I = 0; I < NodeCount; I++) { // Read index - auto ErrorOrIndex = readUnsignedNumber(); - if (!ErrorOrIndex) - return false; - uint32_t Index = std::move(*ErrorOrIndex); + uint32_t Index = + cantFail(errorOrToExpected(readUnsignedNumber())); // Read type | flag. - auto ErrorOrValue = readUnencodedNumber(); - if (!ErrorOrValue) - return false; - uint8_t Value = std::move(*ErrorOrValue); + uint8_t Value = cantFail(errorOrToExpected(readUnencodedNumber())); uint8_t Kind = Value & 0xf; uint8_t Attr = (Value & 0x70) >> 4; // Read address uint64_t Addr = 0; if (Value & 0x80) { - auto ErrorOrOffset = readSignedNumber(); - if (!ErrorOrOffset) - return false; - int64_t Offset = std::move(*ErrorOrOffset); + int64_t Offset = cantFail(errorOrToExpected(readSignedNumber())); Addr = LastAddr + Offset; } else { - auto ErrorOrAddr = readUnencodedNumber(); - if (!ErrorOrAddr) - return false; - Addr = std::move(*ErrorOrAddr); + Addr = cantFail(errorOrToExpected(readUnencodedNumber())); if (isSentinelProbe(Attr)) { // For sentinel probe, the addr field actually stores the GUID of the // split function. Convert it to the real address. @@ -508,85 +508,189 @@ bool MCPseudoProbeDecoder::buildAddress2ProbeMap( uint32_t Discriminator = 0; if (hasDiscriminator(Attr)) { - auto ErrorOrDiscriminator = readUnsignedNumber(); - if (!ErrorOrDiscriminator) - return false; - Discriminator = std::move(*ErrorOrDiscriminator); + Discriminator = + cantFail(errorOrToExpected(readUnsignedNumber())); } if (Cur && !isSentinelProbe(Attr)) { - // Populate Address2ProbesMap - auto &Probes = Address2ProbesMap[Addr]; - Probes.emplace_back(Addr, Cur->Guid, Index, PseudoProbeType(Kind), Attr, - Discriminator, Cur); - Cur->addProbes(&Probes.back()); + PseudoProbeVec.emplace_back(Addr, Index, PseudoProbeType(Kind), Attr, + Discriminator, Cur); + ++CurrentProbeCount; } LastAddr = Addr; } - uint32_t ChildrenToProcess = std::move(*ErrorOrCurChildrenToProcess); + if (Cur) { + Cur->setProbes( + MutableArrayRef(PseudoProbeVec).take_back(CurrentProbeCount)); + InlineTreeVec.resize(InlineTreeVec.size() + ChildrenToProcess); + Cur->getChildren() = + MutableArrayRef(InlineTreeVec).take_back(ChildrenToProcess); + } for (uint32_t I = 0; I < ChildrenToProcess; I++) { - buildAddress2ProbeMap(Cur, LastAddr, GuidFilter, FuncStartAddrs); + buildAddress2ProbeMap(Cur, LastAddr, GuidFilter, FuncStartAddrs, I); + } + return Cur; +} + +template +bool MCPseudoProbeDecoder::countRecords(bool &Discard, uint32_t &ProbeCount, + uint32_t &InlinedCount, + const Uint64Set &GuidFilter) { + if (!IsTopLevelFunc) + // Read inline site for inlinees + if (!readUnsignedNumber()) + return false; + + // Read guid + auto ErrorOrCurGuid = readUnencodedNumber(); + if (!ErrorOrCurGuid) + return false; + uint64_t Guid = std::move(*ErrorOrCurGuid); + + // Decide if top-level node should be disgarded. + if (IsTopLevelFunc) { + Discard = !GuidFilter.empty() && !GuidFilter.count(Guid); + if (!Discard) + // Allocate an entry for top-level function record. + ++InlinedCount; + } + + // Read number of probes in the current node. + auto ErrorOrNodeCount = readUnsignedNumber(); + if (!ErrorOrNodeCount) + return false; + uint32_t NodeCount = std::move(*ErrorOrNodeCount); + uint32_t CurrentProbeCount = 0; + + // Read number of direct inlinees + auto ErrorOrCurChildrenToProcess = readUnsignedNumber(); + if (!ErrorOrCurChildrenToProcess) + return false; + uint32_t ChildrenToProcess = std::move(*ErrorOrCurChildrenToProcess); + + // Read all probes in this node + for (std::size_t I = 0; I < NodeCount; I++) { + // Read index + if (!readUnsignedNumber()) + return false; + + // Read type | flag. + auto ErrorOrValue = readUnencodedNumber(); + if (!ErrorOrValue) + return false; + uint8_t Value = std::move(*ErrorOrValue); + + uint8_t Attr = (Value & 0x70) >> 4; + if (Value & 0x80) { + // Offset + if (!readSignedNumber()) + return false; + } else { + // Addr + if (!readUnencodedNumber()) + return false; + } + + if (hasDiscriminator(Attr)) + // Discriminator + if (!readUnsignedNumber()) + return false; + + if (!Discard && !isSentinelProbe(Attr)) + ++CurrentProbeCount; + } + + if (!Discard) { + ProbeCount += CurrentProbeCount; + InlinedCount += ChildrenToProcess; } + for (uint32_t I = 0; I < ChildrenToProcess; I++) + if (!countRecords(Discard, ProbeCount, InlinedCount, GuidFilter)) + return false; return true; } bool MCPseudoProbeDecoder::buildAddress2ProbeMap( const uint8_t *Start, std::size_t Size, const Uint64Set &GuidFilter, const Uint64Map &FuncStartAddrs) { + // For function records in the order of their appearance in the encoded data + // (DFS), count the number of contained probes and inlined function records. + uint32_t ProbeCount = 0; + uint32_t InlinedCount = 0; + uint32_t TopLevelFuncs = 0; + Data = Start; + End = Data + Size; + bool Discard = false; + while (Data < End) { + if (!countRecords(Discard, ProbeCount, InlinedCount, GuidFilter)) + return false; + TopLevelFuncs += !Discard; + } + assert(Data == End && "Have unprocessed data in pseudo_probe section"); + PseudoProbeVec.reserve(ProbeCount); + InlineTreeVec.reserve(InlinedCount); + + // Allocate top-level function records as children of DummyInlineRoot. + InlineTreeVec.resize(TopLevelFuncs); + DummyInlineRoot.getChildren() = MutableArrayRef(InlineTreeVec); + Data = Start; End = Data + Size; uint64_t LastAddr = 0; + uint32_t CurChildIndex = 0; while (Data < End) - buildAddress2ProbeMap(&DummyInlineRoot, LastAddr, GuidFilter, - FuncStartAddrs); + CurChildIndex += buildAddress2ProbeMap( + &DummyInlineRoot, LastAddr, GuidFilter, FuncStartAddrs, CurChildIndex); assert(Data == End && "Have unprocessed data in pseudo_probe section"); + assert(PseudoProbeVec.size() == ProbeCount && + "Mismatching probe count pre- and post-parsing"); + assert(InlineTreeVec.size() == InlinedCount && + "Mismatching function records count pre- and post-parsing"); + + std::vector> SortedA2P(ProbeCount); + for (const auto &[I, Probe] : llvm::enumerate(PseudoProbeVec)) + SortedA2P[I] = {Probe.getAddress(), I}; + llvm::sort(SortedA2P); + Address2ProbesMap.reserve(ProbeCount); + for (const uint32_t I : llvm::make_second_range(SortedA2P)) + Address2ProbesMap.emplace_back(PseudoProbeVec[I]); + SortedA2P.clear(); return true; } void MCPseudoProbeDecoder::printGUID2FuncDescMap(raw_ostream &OS) { OS << "Pseudo Probe Desc:\n"; - // Make the output deterministic - std::map OrderedMap(GUID2FuncDescMap.begin(), - GUID2FuncDescMap.end()); - for (auto &I : OrderedMap) { - I.second.print(OS); - } + for (auto &I : GUID2FuncDescMap) + I.print(OS); } void MCPseudoProbeDecoder::printProbeForAddress(raw_ostream &OS, uint64_t Address) { - auto It = Address2ProbesMap.find(Address); - if (It != Address2ProbesMap.end()) { - for (const MCDecodedPseudoProbe &Probe : It->second) { - OS << " [Probe]:\t"; - Probe.print(OS, GUID2FuncDescMap, true); - } + for (const MCDecodedPseudoProbe &Probe : Address2ProbesMap.find(Address)) { + OS << " [Probe]:\t"; + Probe.print(OS, GUID2FuncDescMap, true); } } void MCPseudoProbeDecoder::printProbesForAllAddresses(raw_ostream &OS) { - auto Entries = make_first_range(Address2ProbesMap); - SmallVector Addresses(Entries.begin(), Entries.end()); - llvm::sort(Addresses); - for (auto K : Addresses) { - OS << "Address:\t"; - OS << K; - OS << "\n"; - printProbeForAddress(OS, K); + uint64_t PrevAddress = INT64_MAX; + for (MCDecodedPseudoProbe &Probe : Address2ProbesMap) { + uint64_t Address = Probe.getAddress(); + if (Address != PrevAddress) { + PrevAddress = Address; + OS << "Address:\t" << Address << '\n'; + } + OS << " [Probe]:\t"; + Probe.print(OS, GUID2FuncDescMap, true); } } const MCDecodedPseudoProbe * MCPseudoProbeDecoder::getCallProbeForAddr(uint64_t Address) const { - auto It = Address2ProbesMap.find(Address); - if (It == Address2ProbesMap.end()) - return nullptr; - const auto &Probes = It->second; - const MCDecodedPseudoProbe *CallProbe = nullptr; - for (const MCDecodedPseudoProbe &Probe : Probes) { + for (const MCDecodedPseudoProbe &Probe : Address2ProbesMap.find(Address)) { if (Probe.isCall()) { // Disabling the assert and returning first call probe seen so far. // Subsequent call probes, if any, are ignored. Due to the the way @@ -611,7 +715,7 @@ const MCPseudoProbeFuncDesc * MCPseudoProbeDecoder::getFuncDescForGUID(uint64_t GUID) const { auto It = GUID2FuncDescMap.find(GUID); assert(It != GUID2FuncDescMap.end() && "Function descriptor doesn't exist"); - return &It->second; + return &*It; } void MCPseudoProbeDecoder::getInlineContextForProbe( diff --git a/llvm/lib/MCA/InstrBuilder.cpp b/llvm/lib/MCA/InstrBuilder.cpp index 32b20d758ee70..c4d88856abdfb 100644 --- a/llvm/lib/MCA/InstrBuilder.cpp +++ b/llvm/lib/MCA/InstrBuilder.cpp @@ -799,7 +799,7 @@ InstrBuilder::createInstruction(const MCInst &MCI, unsigned WriteIndex = 0; Idx = 0U; for (const WriteDescriptor &WD : D.Writes) { - RegID = WD.isImplicitWrite() ? WD.RegisterID + RegID = WD.isImplicitWrite() ? MCRegister(WD.RegisterID) : MCI.getOperand(WD.OpIndex).getReg(); // Check if this is a optional definition that references NoReg or a write // to a constant register. diff --git a/llvm/lib/Object/COFFObjectFile.cpp b/llvm/lib/Object/COFFObjectFile.cpp index ff7129ba178cf..5fdf3baf8c02c 100644 --- a/llvm/lib/Object/COFFObjectFile.cpp +++ b/llvm/lib/Object/COFFObjectFile.cpp @@ -2369,7 +2369,7 @@ ResourceSectionRef::getContents(const coff_resource_data_entry &Entry) { Expected Contents = S.getContents(); if (!Contents) return Contents.takeError(); - return Contents->slice(Offset, Offset + Entry.DataSize); + return Contents->substr(Offset, Entry.DataSize); } } return createStringError(object_error::parse_failed, diff --git a/llvm/lib/Object/MachOObjectFile.cpp b/llvm/lib/Object/MachOObjectFile.cpp index 6f3dd4d8b5180..8fa3f67ea00f3 100644 --- a/llvm/lib/Object/MachOObjectFile.cpp +++ b/llvm/lib/Object/MachOObjectFile.cpp @@ -2099,7 +2099,7 @@ ArrayRef getSegmentContents(const MachOObjectFile &Obj, } auto &Segment = SegmentOrErr.get(); return arrayRefFromStringRef( - Obj.getData().slice(Segment.fileoff, Segment.fileoff + Segment.filesize)); + Obj.getData().substr(Segment.fileoff, Segment.filesize)); } } // namespace @@ -2454,9 +2454,8 @@ StringRef MachOObjectFile::guessLibraryShortName(StringRef Name, Idx = 0; else Idx = b+1; - F = Name.slice(Idx, Idx + Foo.size()); - DotFramework = Name.slice(Idx + Foo.size(), - Idx + Foo.size() + sizeof(".framework/")-1); + F = Name.substr(Idx, Foo.size()); + DotFramework = Name.substr(Idx + Foo.size(), sizeof(".framework/") - 1); if (F == Foo && DotFramework == ".framework/") { isFramework = true; return Foo; @@ -2476,9 +2475,8 @@ StringRef MachOObjectFile::guessLibraryShortName(StringRef Name, Idx = 0; else Idx = d+1; - F = Name.slice(Idx, Idx + Foo.size()); - DotFramework = Name.slice(Idx + Foo.size(), - Idx + Foo.size() + sizeof(".framework/")-1); + F = Name.substr(Idx, Foo.size()); + DotFramework = Name.substr(Idx + Foo.size(), sizeof(".framework/") - 1); if (F == Foo && DotFramework == ".framework/") { isFramework = true; return Foo; @@ -2495,7 +2493,7 @@ StringRef MachOObjectFile::guessLibraryShortName(StringRef Name, // First pull off the version letter for the form Foo.A.dylib if any. if (a >= 3) { - Dot = Name.slice(a-2, a-1); + Dot = Name.substr(a - 2, 1); if (Dot == ".") a = a - 2; } @@ -2520,7 +2518,7 @@ StringRef MachOObjectFile::guessLibraryShortName(StringRef Name, // There are incorrect library names of the form: // libATS.A_profile.dylib so check for these. if (Lib.size() >= 3) { - Dot = Lib.slice(Lib.size()-2, Lib.size()-1); + Dot = Lib.substr(Lib.size() - 2, 1); if (Dot == ".") Lib = Lib.slice(0, Lib.size()-2); } @@ -2537,7 +2535,7 @@ StringRef MachOObjectFile::guessLibraryShortName(StringRef Name, Lib = Name.slice(b+1, a); // There are library names of the form: QT.A.qtx so check for these. if (Lib.size() >= 3) { - Dot = Lib.slice(Lib.size()-2, Lib.size()-1); + Dot = Lib.substr(Lib.size() - 2, 1); if (Dot == ".") Lib = Lib.slice(0, Lib.size()-2); } diff --git a/llvm/lib/SandboxIR/SandboxIR.cpp b/llvm/lib/SandboxIR/SandboxIR.cpp index f92e9d3812513..35b642ac7f3a2 100644 --- a/llvm/lib/SandboxIR/SandboxIR.cpp +++ b/llvm/lib/SandboxIR/SandboxIR.cpp @@ -575,6 +575,45 @@ void Instruction::dumpOS(raw_ostream &OS) const { } #endif // NDEBUG +FreezeInst *FreezeInst::create(Value *V, BBIterator WhereIt, + BasicBlock *WhereBB, Context &Ctx, + const Twine &Name) { + auto &Builder = Ctx.getLLVMIRBuilder(); + if (WhereIt != WhereBB->end()) + Builder.SetInsertPoint((*WhereIt).getTopmostLLVMInstruction()); + else + Builder.SetInsertPoint(cast(WhereBB->Val)); + auto *LLVMI = cast(Builder.CreateFreeze(V->Val, Name)); + return Ctx.createFreezeInst(LLVMI); +} + +FenceInst *FenceInst::create(AtomicOrdering Ordering, BBIterator WhereIt, + BasicBlock *WhereBB, Context &Ctx, + SyncScope::ID SSID) { + auto &Builder = Ctx.getLLVMIRBuilder(); + if (WhereIt != WhereBB->end()) + Builder.SetInsertPoint((*WhereIt).getTopmostLLVMInstruction()); + else + Builder.SetInsertPoint(cast(WhereBB->Val)); + llvm::FenceInst *LLVMI = Builder.CreateFence(Ordering, SSID); + return Ctx.createFenceInst(LLVMI); +} + +void FenceInst::setOrdering(AtomicOrdering Ordering) { + Ctx.getTracker() + .emplaceIfTracking< + GenericSetter<&FenceInst::getOrdering, &FenceInst::setOrdering>>( + this); + cast(Val)->setOrdering(Ordering); +} + +void FenceInst::setSyncScopeID(SyncScope::ID SSID) { + Ctx.getTracker() + .emplaceIfTracking>(this); + cast(Val)->setSyncScopeID(SSID); +} + Value *SelectInst::createCommon(Value *Cond, Value *True, Value *False, const Twine &Name, IRBuilder<> &Builder, Context &Ctx) { @@ -945,8 +984,8 @@ void InvokeInst::setUnwindDest(BasicBlock *BB) { setOperand(2, BB); assert(getUnwindDest() == BB && "LLVM IR uses a different operan index!"); } -Instruction *InvokeInst::getLandingPadInst() const { - return cast( +LandingPadInst *InvokeInst::getLandingPadInst() const { + return cast( Ctx.getValue(cast(Val)->getLandingPadInst())); ; } @@ -1043,6 +1082,31 @@ BasicBlock *CallBrInst::getSuccessor(unsigned Idx) const { Ctx.getValue(cast(Val)->getSuccessor(Idx))); } +LandingPadInst *LandingPadInst::create(Type *RetTy, unsigned NumReservedClauses, + BBIterator WhereIt, BasicBlock *WhereBB, + Context &Ctx, const Twine &Name) { + auto &Builder = Ctx.getLLVMIRBuilder(); + if (WhereIt != WhereBB->end()) + Builder.SetInsertPoint((*WhereIt).getTopmostLLVMInstruction()); + else + Builder.SetInsertPoint(cast(WhereBB->Val)); + llvm::LandingPadInst *LLVMI = + Builder.CreateLandingPad(RetTy, NumReservedClauses, Name); + return Ctx.createLandingPadInst(LLVMI); +} + +void LandingPadInst::setCleanup(bool V) { + Ctx.getTracker() + .emplaceIfTracking>(this); + cast(Val)->setCleanup(V); +} + +Constant *LandingPadInst::getClause(unsigned Idx) const { + return cast( + Ctx.getValue(cast(Val)->getClause(Idx))); +} + Value *FuncletPadInst::getParentPad() const { return Ctx.getValue(cast(Val)->getParentPad()); } @@ -1666,6 +1730,14 @@ Value *BinaryOperator::createWithCopiedFlags(Instruction::Opcode Op, Value *LHS, InsertAtEnd, Ctx, Name); } +void PossiblyDisjointInst::setIsDisjoint(bool B) { + Ctx.getTracker() + .emplaceIfTracking>( + this); + cast(Val)->setIsDisjoint(B); +} + void AtomicRMWInst::setAlignment(Align Align) { Ctx.getTracker() .emplaceIfTrackinggetSubclassID() == ClassID::Cast; } +void PossiblyNonNegInst::setNonNeg(bool B) { + Ctx.getTracker() + .emplaceIfTracking>(this); + cast(Val)->setNonNeg(B); +} + Value *InsertElementInst::create(Value *Vec, Value *NewElt, Value *Idx, Instruction *InsertBefore, Context &Ctx, const Twine &Name) { @@ -2157,6 +2236,16 @@ Value *Context::getOrCreateValueInternal(llvm::Value *LLVMV, llvm::User *U) { assert(isa(LLVMV) && "Expected Instruction"); switch (cast(LLVMV)->getOpcode()) { + case llvm::Instruction::Freeze: { + auto *LLVMFreeze = cast(LLVMV); + It->second = std::unique_ptr(new FreezeInst(LLVMFreeze, *this)); + return It->second.get(); + } + case llvm::Instruction::Fence: { + auto *LLVMFence = cast(LLVMV); + It->second = std::unique_ptr(new FenceInst(LLVMFence, *this)); + return It->second.get(); + } case llvm::Instruction::Select: { auto *LLVMSel = cast(LLVMV); It->second = std::unique_ptr(new SelectInst(LLVMSel, *this)); @@ -2215,6 +2304,12 @@ Value *Context::getOrCreateValueInternal(llvm::Value *LLVMV, llvm::User *U) { It->second = std::unique_ptr(new CallBrInst(LLVMCallBr, *this)); return It->second.get(); } + case llvm::Instruction::LandingPad: { + auto *LLVMLPad = cast(LLVMV); + It->second = + std::unique_ptr(new LandingPadInst(LLVMLPad, *this)); + return It->second.get(); + } case llvm::Instruction::CatchPad: { auto *LLVMCPI = cast(LLVMV); It->second = @@ -2349,6 +2444,16 @@ BasicBlock *Context::createBasicBlock(llvm::BasicBlock *LLVMBB) { return BB; } +FreezeInst *Context::createFreezeInst(llvm::FreezeInst *SI) { + auto NewPtr = std::unique_ptr(new FreezeInst(SI, *this)); + return cast(registerValue(std::move(NewPtr))); +} + +FenceInst *Context::createFenceInst(llvm::FenceInst *SI) { + auto NewPtr = std::unique_ptr(new FenceInst(SI, *this)); + return cast(registerValue(std::move(NewPtr))); +} + SelectInst *Context::createSelectInst(llvm::SelectInst *SI) { auto NewPtr = std::unique_ptr(new SelectInst(SI, *this)); return cast(registerValue(std::move(NewPtr))); @@ -2415,6 +2520,10 @@ UnreachableInst *Context::createUnreachableInst(llvm::UnreachableInst *UI) { std::unique_ptr(new UnreachableInst(UI, *this)); return cast(registerValue(std::move(NewPtr))); } +LandingPadInst *Context::createLandingPadInst(llvm::LandingPadInst *I) { + auto NewPtr = std::unique_ptr(new LandingPadInst(I, *this)); + return cast(registerValue(std::move(NewPtr))); +} CatchPadInst *Context::createCatchPadInst(llvm::CatchPadInst *I) { auto NewPtr = std::unique_ptr(new CatchPadInst(I, *this)); return cast(registerValue(std::move(NewPtr))); diff --git a/llvm/lib/TableGen/DetailedRecordsBackend.cpp b/llvm/lib/TableGen/DetailedRecordsBackend.cpp index 500aa4c78225d..45e621483c817 100644 --- a/llvm/lib/TableGen/DetailedRecordsBackend.cpp +++ b/llvm/lib/TableGen/DetailedRecordsBackend.cpp @@ -21,36 +21,30 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/TableGen/Error.h" #include "llvm/TableGen/Record.h" -#include -#include #include #include -#define DEBUG_TYPE "detailed-records-backend" - -#define NL "\n" - using namespace llvm; namespace { class DetailedRecordsEmitter { private: - RecordKeeper &Records; + const RecordKeeper &Records; public: - DetailedRecordsEmitter(RecordKeeper &RK) : Records(RK) {} + explicit DetailedRecordsEmitter(const RecordKeeper &RK) : Records(RK) {} void run(raw_ostream &OS); void printReportHeading(raw_ostream &OS); + void printSectionHeading(StringRef Title, int Count, raw_ostream &OS); void printVariables(raw_ostream &OS); void printClasses(raw_ostream &OS); void printRecords(raw_ostream &OS); - void printSectionHeading(StringRef Title, int Count, raw_ostream &OS); - void printDefms(Record *Rec, raw_ostream &OS); - void printTemplateArgs(Record *Rec, raw_ostream &OS); - void printSuperclasses(Record *Rec, raw_ostream &OS); - void printFields(Record *Rec, raw_ostream &OS); + void printDefms(const Record &Rec, raw_ostream &OS); + void printTemplateArgs(const Record &Rec, raw_ostream &OS); + void printSuperclasses(const Record &Rec, raw_ostream &OS); + void printFields(const Record &Rec, raw_ostream &OS); }; // emitter class } // anonymous namespace @@ -68,15 +62,21 @@ void DetailedRecordsEmitter::printReportHeading(raw_ostream &OS) { OS << formatv("DETAILED RECORDS for file {0}\n", Records.getInputFilename()); } +// Print a section heading with the name of the section and +// the item count. +void DetailedRecordsEmitter::printSectionHeading(StringRef Title, int Count, + raw_ostream &OS) { + OS << formatv("\n{0} {1} ({2}) {0}\n", "--------------------", Title, Count); +} + // Print the global variables. void DetailedRecordsEmitter::printVariables(raw_ostream &OS) { const auto GlobalList = Records.getGlobals(); printSectionHeading("Global Variables", GlobalList.size(), OS); - OS << NL; - for (const auto &Var : GlobalList) { - OS << Var.first << " = " << Var.second->getAsString() << NL; - } + OS << '\n'; + for (const auto &Var : GlobalList) + OS << Var.first << " = " << Var.second->getAsString() << '\n'; } // Print the classes, including the template arguments, superclasses, @@ -85,13 +85,12 @@ void DetailedRecordsEmitter::printClasses(raw_ostream &OS) { const auto &ClassList = Records.getClasses(); printSectionHeading("Classes", ClassList.size(), OS); - for (const auto &ClassPair : ClassList) { - auto *const Class = ClassPair.second.get(); + for (const auto &[Name, Class] : ClassList) { OS << formatv("\n{0} |{1}|\n", Class->getNameInitAsString(), SrcMgr.getFormattedLocationNoOffset(Class->getLoc().front())); - printTemplateArgs(Class, OS); - printSuperclasses(Class, OS); - printFields(Class, OS); + printTemplateArgs(*Class, OS); + printSuperclasses(*Class, OS); + printFields(*Class, OS); } } @@ -101,42 +100,33 @@ void DetailedRecordsEmitter::printRecords(raw_ostream &OS) { const auto &RecordList = Records.getDefs(); printSectionHeading("Records", RecordList.size(), OS); - for (const auto &RecPair : RecordList) { - auto *const Rec = RecPair.second.get(); + for (const auto &[DefName, Rec] : RecordList) { std::string Name = Rec->getNameInitAsString(); OS << formatv("\n{0} |{1}|\n", Name.empty() ? "\"\"" : Name, SrcMgr.getFormattedLocationNoOffset(Rec->getLoc().front())); - printDefms(Rec, OS); - printSuperclasses(Rec, OS); - printFields(Rec, OS); + printDefms(*Rec, OS); + printSuperclasses(*Rec, OS); + printFields(*Rec, OS); } } -// Print a section heading with the name of the section and -// the item count. -void DetailedRecordsEmitter::printSectionHeading(StringRef Title, int Count, - raw_ostream &OS) { - OS << formatv("\n{0} {1} ({2}) {0}\n", "--------------------", Title, Count); -} - // Print the record's defm source locations, if any. Note that they // are stored in the reverse order of their invocation. -void DetailedRecordsEmitter::printDefms(Record *Rec, raw_ostream &OS) { - const auto &LocList = Rec->getLoc(); +void DetailedRecordsEmitter::printDefms(const Record &Rec, raw_ostream &OS) { + const auto &LocList = Rec.getLoc(); if (LocList.size() < 2) return; OS << " Defm sequence:"; - for (unsigned I = LocList.size() - 1; I >= 1; --I) { - OS << formatv(" |{0}|", SrcMgr.getFormattedLocationNoOffset(LocList[I])); - } - OS << NL; + for (const SMLoc Loc : reverse(LocList)) + OS << formatv(" |{0}|", SrcMgr.getFormattedLocationNoOffset(Loc)); + OS << '\n'; } // Print the template arguments of a class. -void DetailedRecordsEmitter::printTemplateArgs(Record *Rec, +void DetailedRecordsEmitter::printTemplateArgs(const Record &Rec, raw_ostream &OS) { - ArrayRef Args = Rec->getTemplateArgs(); + ArrayRef Args = Rec.getTemplateArgs(); if (Args.empty()) { OS << " Template args: (none)\n"; return; @@ -144,38 +134,38 @@ void DetailedRecordsEmitter::printTemplateArgs(Record *Rec, OS << " Template args:\n"; for (const Init *ArgName : Args) { - const RecordVal *Value = Rec->getValue(ArgName); + const RecordVal *Value = Rec.getValue(ArgName); assert(Value && "Template argument value not found."); OS << " "; Value->print(OS, false); - OS << formatv(" |{0}|", SrcMgr.getFormattedLocationNoOffset(Value->getLoc())); - OS << NL; + OS << formatv(" |{0}|\n", + SrcMgr.getFormattedLocationNoOffset(Value->getLoc())); } } // Print the superclasses of a class or record. Indirect superclasses // are enclosed in parentheses. -void DetailedRecordsEmitter::printSuperclasses(Record *Rec, raw_ostream &OS) { - ArrayRef> Superclasses = Rec->getSuperClasses(); +void DetailedRecordsEmitter::printSuperclasses(const Record &Rec, + raw_ostream &OS) { + ArrayRef> Superclasses = Rec.getSuperClasses(); if (Superclasses.empty()) { OS << " Superclasses: (none)\n"; return; } OS << " Superclasses:"; - for (const auto &SuperclassPair : Superclasses) { - auto *ClassRec = SuperclassPair.first; - if (Rec->hasDirectSuperClass(ClassRec)) + for (const auto &[ClassRec, Loc] : Superclasses) { + if (Rec.hasDirectSuperClass(ClassRec)) OS << formatv(" {0}", ClassRec->getNameInitAsString()); else OS << formatv(" ({0})", ClassRec->getNameInitAsString()); } - OS << NL; + OS << '\n'; } // Print the fields of a class or record, including their source locations. -void DetailedRecordsEmitter::printFields(Record *Rec, raw_ostream &OS) { - const auto &ValueList = Rec->getValues(); +void DetailedRecordsEmitter::printFields(const Record &Rec, raw_ostream &OS) { + const auto &ValueList = Rec.getValues(); if (ValueList.empty()) { OS << " Fields: (none)\n"; return; @@ -183,7 +173,7 @@ void DetailedRecordsEmitter::printFields(Record *Rec, raw_ostream &OS) { OS << " Fields:\n"; for (const RecordVal &Value : ValueList) - if (!Rec->isTemplateArg(Value.getNameInit())) { + if (!Rec.isTemplateArg(Value.getNameInit())) { OS << " "; Value.print(OS, false); OS << formatv(" |{0}|\n", @@ -191,13 +181,8 @@ void DetailedRecordsEmitter::printFields(Record *Rec, raw_ostream &OS) { } } -namespace llvm { - // This function is called by TableGen after parsing the files. - -void EmitDetailedRecords(RecordKeeper &RK, raw_ostream &OS) { +void llvm::EmitDetailedRecords(const RecordKeeper &RK, raw_ostream &OS) { // Instantiate the emitter class and invoke run(). DetailedRecordsEmitter(RK).run(OS); } - -} // namespace llvm diff --git a/llvm/lib/TableGen/JSONBackend.cpp b/llvm/lib/TableGen/JSONBackend.cpp index cd10c22094e45..d648019ac46e8 100644 --- a/llvm/lib/TableGen/JSONBackend.cpp +++ b/llvm/lib/TableGen/JSONBackend.cpp @@ -26,43 +26,41 @@ namespace { class JSONEmitter { private: - RecordKeeper &Records; + const RecordKeeper &Records; json::Value translateInit(const Init &I); public: - JSONEmitter(RecordKeeper &R); + explicit JSONEmitter(const RecordKeeper &R) : Records(R) {} void run(raw_ostream &OS); }; } // end anonymous namespace -JSONEmitter::JSONEmitter(RecordKeeper &R) : Records(R) {} - json::Value JSONEmitter::translateInit(const Init &I) { - // Init subclasses that we return as JSON primitive values of one // kind or another. - if (isa(&I)) { + if (isa(&I)) return nullptr; - } else if (auto *Bit = dyn_cast(&I)) { + if (const auto *Bit = dyn_cast(&I)) return Bit->getValue() ? 1 : 0; - } else if (auto *Bits = dyn_cast(&I)) { - json::Array array; - for (unsigned i = 0, limit = Bits->getNumBits(); i < limit; i++) - array.push_back(translateInit(*Bits->getBit(i))); - return std::move(array); - } else if (auto *Int = dyn_cast(&I)) { + if (const auto *Bits = dyn_cast(&I)) { + json::Array Array; + for (unsigned Idx = 0, E = Bits->getNumBits(); Idx < E; ++Idx) + Array.push_back(translateInit(*Bits->getBit(Idx))); + return std::move(Array); + } + if (const auto *Int = dyn_cast(&I)) return Int->getValue(); - } else if (auto *Str = dyn_cast(&I)) { + if (const auto *Str = dyn_cast(&I)) return Str->getValue(); - } else if (auto *List = dyn_cast(&I)) { - json::Array array; - for (auto *val : *List) - array.push_back(translateInit(*val)); - return std::move(array); + if (const auto *List = dyn_cast(&I)) { + json::Array Array; + for (const auto *Val : *List) + Array.push_back(translateInit(*Val)); + return std::move(Array); } // Init subclasses that we return as JSON objects containing a @@ -70,56 +68,58 @@ json::Value JSONEmitter::translateInit(const Init &I) { // translation back into TableGen input syntax that -print-records // would give. - json::Object obj; - obj["printable"] = I.getAsString(); - - if (auto *Def = dyn_cast(&I)) { - obj["kind"] = "def"; - obj["def"] = Def->getDef()->getName(); - return std::move(obj); - } else if (auto *Var = dyn_cast(&I)) { - obj["kind"] = "var"; - obj["var"] = Var->getName(); - return std::move(obj); - } else if (auto *VarBit = dyn_cast(&I)) { - if (auto *Var = dyn_cast(VarBit->getBitVar())) { - obj["kind"] = "varbit"; - obj["var"] = Var->getName(); - obj["index"] = VarBit->getBitNum(); - return std::move(obj); + json::Object Obj; + Obj["printable"] = I.getAsString(); + + if (const auto *Def = dyn_cast(&I)) { + Obj["kind"] = "def"; + Obj["def"] = Def->getDef()->getName(); + return std::move(Obj); + } + if (const auto *Var = dyn_cast(&I)) { + Obj["kind"] = "var"; + Obj["var"] = Var->getName(); + return std::move(Obj); + } + if (const auto *VarBit = dyn_cast(&I)) { + if (const auto *Var = dyn_cast(VarBit->getBitVar())) { + Obj["kind"] = "varbit"; + Obj["var"] = Var->getName(); + Obj["index"] = VarBit->getBitNum(); + return std::move(Obj); } - } else if (auto *Dag = dyn_cast(&I)) { - obj["kind"] = "dag"; - obj["operator"] = translateInit(*Dag->getOperator()); + } + if (const auto *Dag = dyn_cast(&I)) { + Obj["kind"] = "dag"; + Obj["operator"] = translateInit(*Dag->getOperator()); if (auto name = Dag->getName()) - obj["name"] = name->getAsUnquotedString(); - json::Array args; - for (unsigned i = 0, limit = Dag->getNumArgs(); i < limit; ++i) { - json::Array arg; - arg.push_back(translateInit(*Dag->getArg(i))); - if (auto argname = Dag->getArgName(i)) - arg.push_back(argname->getAsUnquotedString()); + Obj["name"] = name->getAsUnquotedString(); + json::Array Args; + for (unsigned Idx = 0, E = Dag->getNumArgs(); Idx < E; ++Idx) { + json::Array Arg; + Arg.push_back(translateInit(*Dag->getArg(Idx))); + if (const auto ArgName = Dag->getArgName(Idx)) + Arg.push_back(ArgName->getAsUnquotedString()); else - arg.push_back(nullptr); - args.push_back(std::move(arg)); + Arg.push_back(nullptr); + Args.push_back(std::move(Arg)); } - obj["args"] = std::move(args); - return std::move(obj); + Obj["args"] = std::move(Args); + return std::move(Obj); } // Final fallback: anything that gets past here is simply given a // kind field of 'complex', and the only other field is the standard // 'printable' representation. - assert(!I.isConcrete()); - obj["kind"] = "complex"; - return std::move(obj); + Obj["kind"] = "complex"; + return std::move(Obj); } void JSONEmitter::run(raw_ostream &OS) { - json::Object root; + json::Object Root; - root["!tablegen_json_version"] = 1; + Root["!tablegen_json_version"] = 1; // Prepare the arrays that will list the instances of every class. // We mostly fill those in by iterating over the superclasses of @@ -127,64 +127,59 @@ void JSONEmitter::run(raw_ostream &OS) { // class with no instances at all, so we do a preliminary iteration // over the classes, invoking std::map::operator[] to default- // construct the array for each one. - std::map instance_lists; - for (const auto &C : Records.getClasses()) { - const auto Name = C.second->getNameInitAsString(); - (void)instance_lists[Name]; - } + std::map InstanceLists; + for (const auto &[ClassName, ClassRec] : Records.getClasses()) + InstanceLists.emplace(ClassRec->getNameInitAsString(), json::Array()); // Main iteration over the defs. - for (const auto &D : Records.getDefs()) { - const auto Name = D.second->getNameInitAsString(); - auto &Def = *D.second; + for (const auto &[DefName, Def] : Records.getDefs()) { + const std::string Name = Def->getNameInitAsString(); - json::Object obj; - json::Array fields; + json::Object Obj; + json::Array Fields; - for (const RecordVal &RV : Def.getValues()) { - if (!Def.isTemplateArg(RV.getNameInit())) { + for (const RecordVal &RV : Def->getValues()) { + if (!Def->isTemplateArg(RV.getNameInit())) { auto Name = RV.getNameInitAsString(); if (RV.isNonconcreteOK()) - fields.push_back(Name); - obj[Name] = translateInit(*RV.getValue()); + Fields.push_back(Name); + Obj[Name] = translateInit(*RV.getValue()); } } - obj["!fields"] = std::move(fields); + Obj["!fields"] = std::move(Fields); - json::Array superclasses; - for (const auto &SuperPair : Def.getSuperClasses()) - superclasses.push_back(SuperPair.first->getNameInitAsString()); - obj["!superclasses"] = std::move(superclasses); + json::Array SuperClasses; + // Add this def to the instance list for each of its superclasses. + for (const auto &[SuperClass, Loc] : Def->getSuperClasses()) { + std::string SuperName = SuperClass->getNameInitAsString(); + SuperClasses.push_back(SuperName); + InstanceLists[SuperName].push_back(Name); + } - obj["!name"] = Name; - obj["!anonymous"] = Def.isAnonymous(); + Obj["!superclasses"] = std::move(SuperClasses); - json::Array locs; - for (const SMLoc Loc : Def.getLoc()) - locs.push_back(SrcMgr.getFormattedLocationNoOffset(Loc)); - obj["!locs"] = std::move(locs); + Obj["!name"] = Name; + Obj["!anonymous"] = Def->isAnonymous(); - root[Name] = std::move(obj); + json::Array Locs; + for (const SMLoc Loc : Def->getLoc()) + Locs.push_back(SrcMgr.getFormattedLocationNoOffset(Loc)); + Obj["!locs"] = std::move(Locs); - // Add this def to the instance list for each of its superclasses. - for (const auto &SuperPair : Def.getSuperClasses()) { - auto SuperName = SuperPair.first->getNameInitAsString(); - instance_lists[SuperName].push_back(Name); - } + Root[Name] = std::move(Obj); } // Make a JSON object from the std::map of instance lists. - json::Object instanceof; - for (auto kv: instance_lists) - instanceof[kv.first] = std::move(kv.second); - root["!instanceof"] = std::move(instanceof); + json::Object InstanceOf; + for (auto &[ClassName, Instances] : InstanceLists) + InstanceOf[ClassName] = std::move(Instances); + Root["!instanceof"] = std::move(InstanceOf); // Done. Write the output. - OS << json::Value(std::move(root)) << "\n"; + OS << json::Value(std::move(Root)) << "\n"; } -namespace llvm { - -void EmitJSON(RecordKeeper &RK, raw_ostream &OS) { JSONEmitter(RK).run(OS); } -} // end namespace llvm +void llvm::EmitJSON(const RecordKeeper &RK, raw_ostream &OS) { + JSONEmitter(RK).run(OS); +} diff --git a/llvm/lib/TableGen/Main.cpp b/llvm/lib/TableGen/Main.cpp index 841fa7c3f3690..a4c41223c0762 100644 --- a/llvm/lib/TableGen/Main.cpp +++ b/llvm/lib/TableGen/Main.cpp @@ -131,13 +131,10 @@ int llvm::TableGenMain(const char *argv0, std::string OutString; raw_string_ostream Out(OutString); unsigned status = 0; - TableGen::Emitter::FnT ActionFn = TableGen::Emitter::Action->getValue(); - if (ActionFn) - ActionFn(Records, Out); - else if (MainFn) - status = MainFn(Out, Records); - else - return 1; + // ApplyCallback will return true if it did not apply any callback. In that + // case, attempt to apply the MainFn. + if (TableGen::Emitter::ApplyCallback(Records, Out)) + status = MainFn ? MainFn(Out, Records) : 1; Records.stopBackendTimer(); if (status) return 1; diff --git a/llvm/lib/TableGen/TableGenBackend.cpp b/llvm/lib/TableGen/TableGenBackend.cpp index 035abe936e114..210fff6545862 100644 --- a/llvm/lib/TableGen/TableGenBackend.cpp +++ b/llvm/lib/TableGen/TableGenBackend.cpp @@ -12,6 +12,8 @@ #include "llvm/TableGen/TableGenBackend.h" #include "llvm/ADT/Twine.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ManagedStatic.h" #include "llvm/Support/Path.h" #include "llvm/Support/raw_ostream.h" #include @@ -19,15 +21,53 @@ #include using namespace llvm; +using namespace TableGen::Emitter; const size_t MAX_LINE_LEN = 80U; -namespace llvm::TableGen::Emitter { -ManagedStatic, OptCreatorT> Action; -void *OptCreatorT::call() { - return new cl::opt(cl::desc("Action to perform:")); +// CommandLine options of class type are not directly supported with some +// specific exceptions like std::string which are safe to copy. In our case, +// the `FnT` function_ref object is also safe to copy. So provide a +// specialization of `OptionValue` for `FnT` type that stores it as a copy. +// This is essentially similar to OptionValue specialization for +// strings. +template <> struct cl::OptionValue final : cl::OptionValueCopy { + OptionValue() = default; + + OptionValue(const FnT &V) { this->setValue(V); } + + OptionValue &operator=(const FnT &V) { + setValue(V); + return *this; + } +}; + +namespace { +struct OptCreatorT { + static void *call() { + return new cl::opt(cl::desc("Action to perform:")); + } +}; +} // namespace + +static ManagedStatic, OptCreatorT> CallbackFunction; + +Opt::Opt(StringRef Name, FnT CB, StringRef Desc, bool ByDefault) { + if (ByDefault) + CallbackFunction->setInitialValue(CB); + CallbackFunction->getParser().addLiteralOption(Name, CB, Desc); +} + +/// Apply callback specified on the command line. Returns true if no callback +/// was applied. +bool llvm::TableGen::Emitter::ApplyCallback(RecordKeeper &Records, + raw_ostream &OS) { + FnT Fn = CallbackFunction->getValue(); + if (!Fn) + return true; + Fn(Records, OS); + return false; } -} // namespace llvm::TableGen::Emitter static void printLine(raw_ostream &OS, const Twine &Prefix, char Fill, StringRef Suffix) { @@ -59,7 +99,7 @@ void llvm::emitSourceFileHeader(StringRef Desc, raw_ostream &OS, printLine(OS, Prefix + "Automatically generated file, do not edit!", ' ', Suffix); - // Print the filename of source file + // Print the filename of source file. if (!Record.getInputFilename().empty()) printLine( OS, Prefix + "From: " + sys::path::filename(Record.getInputFilename()), diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 5ac5b7f8a5ab1..215f30128e703 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1496,7 +1496,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::AVGCEILU, VT, Custom); if (!Subtarget->isLittleEndian()) - setOperationAction(ISD::BITCAST, VT, Expand); + setOperationAction(ISD::BITCAST, VT, Custom); if (Subtarget->hasSVE2() || (Subtarget->hasSME() && Subtarget->isStreaming())) @@ -1510,9 +1510,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); } - // Legalize unpacked bitcasts to REINTERPRET_CAST. - for (auto VT : {MVT::nxv2i16, MVT::nxv4i16, MVT::nxv2i32, MVT::nxv2bf16, - MVT::nxv4bf16, MVT::nxv2f16, MVT::nxv4f16, MVT::nxv2f32}) + // Type legalize unpacked bitcasts. + for (auto VT : {MVT::nxv2i16, MVT::nxv4i16, MVT::nxv2i32}) setOperationAction(ISD::BITCAST, VT, Custom); for (auto VT : @@ -1587,6 +1586,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, for (auto VT : {MVT::nxv2f16, MVT::nxv4f16, MVT::nxv8f16, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv2f64}) { + setOperationAction(ISD::BITCAST, VT, Custom); setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); setOperationAction(ISD::MLOAD, VT, Custom); @@ -1658,20 +1658,15 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setCondCodeAction(ISD::SETUGT, VT, Expand); setCondCodeAction(ISD::SETUEQ, VT, Expand); setCondCodeAction(ISD::SETONE, VT, Expand); - - if (!Subtarget->isLittleEndian()) - setOperationAction(ISD::BITCAST, VT, Expand); } for (auto VT : {MVT::nxv2bf16, MVT::nxv4bf16, MVT::nxv8bf16}) { + setOperationAction(ISD::BITCAST, VT, Custom); setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); setOperationAction(ISD::MLOAD, VT, Custom); setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); setOperationAction(ISD::SPLAT_VECTOR, VT, Legal); setOperationAction(ISD::VECTOR_SPLICE, VT, Custom); - - if (!Subtarget->isLittleEndian()) - setOperationAction(ISD::BITCAST, VT, Expand); } setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom); @@ -4962,22 +4957,35 @@ SDValue AArch64TargetLowering::LowerBITCAST(SDValue Op, return LowerFixedLengthBitcastToSVE(Op, DAG); if (OpVT.isScalableVector()) { - // Bitcasting between unpacked vector types of different element counts is - // not a NOP because the live elements are laid out differently. - // 01234567 - // e.g. nxv2i32 = XX??XX?? - // nxv4f16 = X?X?X?X? - if (OpVT.getVectorElementCount() != ArgVT.getVectorElementCount()) - return SDValue(); + assert(isTypeLegal(OpVT) && "Unexpected result type!"); - if (isTypeLegal(OpVT) && !isTypeLegal(ArgVT)) { + // Handle type legalisation first. + if (!isTypeLegal(ArgVT)) { assert(OpVT.isFloatingPoint() && !ArgVT.isFloatingPoint() && "Expected int->fp bitcast!"); + + // Bitcasting between unpacked vector types of different element counts is + // not a NOP because the live elements are laid out differently. + // 01234567 + // e.g. nxv2i32 = XX??XX?? + // nxv4f16 = X?X?X?X? + if (OpVT.getVectorElementCount() != ArgVT.getVectorElementCount()) + return SDValue(); + SDValue ExtResult = DAG.getNode(ISD::ANY_EXTEND, SDLoc(Op), getSVEContainerType(ArgVT), Op.getOperand(0)); return getSVESafeBitCast(OpVT, ExtResult, DAG); } + + // Bitcasts between legal types with the same element count are legal. + if (OpVT.getVectorElementCount() == ArgVT.getVectorElementCount()) + return Op; + + // getSVESafeBitCast does not support casting between unpacked types. + if (!isPackedVectorType(OpVT, DAG)) + return SDValue(); + return getSVESafeBitCast(OpVT, Op.getOperand(0), DAG); } @@ -28906,7 +28914,22 @@ SDValue AArch64TargetLowering::getSVESafeBitCast(EVT VT, SDValue Op, if (InVT != PackedInVT) Op = DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, PackedInVT, Op); - Op = DAG.getNode(ISD::BITCAST, DL, PackedVT, Op); + if (Subtarget->isLittleEndian() || + PackedVT.getScalarSizeInBits() == PackedInVT.getScalarSizeInBits()) + Op = DAG.getNode(ISD::BITCAST, DL, PackedVT, Op); + else { + EVT PackedVTAsInt = PackedVT.changeTypeToInteger(); + EVT PackedInVTAsInt = PackedInVT.changeTypeToInteger(); + + // Simulate the effect of casting through memory. + Op = DAG.getNode(ISD::BITCAST, DL, PackedInVTAsInt, Op); + if (PackedInVTAsInt.getScalarSizeInBits() != 8) + Op = DAG.getNode(ISD::BSWAP, DL, PackedInVTAsInt, Op); + Op = DAG.getNode(AArch64ISD::NVCAST, DL, PackedVTAsInt, Op); + if (PackedVTAsInt.getScalarSizeInBits() != 8) + Op = DAG.getNode(ISD::BSWAP, DL, PackedVTAsInt, Op); + Op = DAG.getNode(ISD::BITCAST, DL, PackedVT, Op); + } // Unpack result if required. if (VT != PackedVT) diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index 2d41aff605a54..9fab886743241 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -4851,7 +4851,9 @@ void AArch64InstrInfo::copyGPRRegTuple(MachineBasicBlock &MBB, void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, - MCRegister SrcReg, bool KillSrc) const { + MCRegister SrcReg, bool KillSrc, + bool RenamableDest, + bool RenamableSrc) const { if (AArch64::GPR32spRegClass.contains(DestReg) && (AArch64::GPR32spRegClass.contains(SrcReg) || SrcReg == AArch64::WZR)) { const TargetRegisterInfo *TRI = &getRegisterInfo(); diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h index 1580cdc0a2ba2..dbd820afb3be0 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h @@ -344,7 +344,8 @@ class AArch64InstrInfo final : public AArch64GenInstrInfo { llvm::ArrayRef Indices) const; void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, - bool KillSrc) const override; + bool KillSrc, bool RenamableDest = false, + bool RenamableSrc = false) const override; void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index 3f4651ea9c2b6..694b7fb2068a2 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -2650,113 +2650,62 @@ let Predicates = [HasSVEorSME] in { sub_32)>; } - // FIXME: BigEndian requires an additional REV instruction to satisfy the - // constraint that none of the bits change when stored to memory as one - // type, and reloaded as another type. - let Predicates = [IsLE] in { - def : Pat<(nxv16i8 (bitconvert nxv8i16:$src)), (nxv16i8 ZPR:$src)>; - def : Pat<(nxv16i8 (bitconvert nxv4i32:$src)), (nxv16i8 ZPR:$src)>; - def : Pat<(nxv16i8 (bitconvert nxv2i64:$src)), (nxv16i8 ZPR:$src)>; - def : Pat<(nxv16i8 (bitconvert nxv8f16:$src)), (nxv16i8 ZPR:$src)>; - def : Pat<(nxv16i8 (bitconvert nxv4f32:$src)), (nxv16i8 ZPR:$src)>; - def : Pat<(nxv16i8 (bitconvert nxv2f64:$src)), (nxv16i8 ZPR:$src)>; - - def : Pat<(nxv8i16 (bitconvert nxv16i8:$src)), (nxv8i16 ZPR:$src)>; - def : Pat<(nxv8i16 (bitconvert nxv4i32:$src)), (nxv8i16 ZPR:$src)>; - def : Pat<(nxv8i16 (bitconvert nxv2i64:$src)), (nxv8i16 ZPR:$src)>; - def : Pat<(nxv8i16 (bitconvert nxv8f16:$src)), (nxv8i16 ZPR:$src)>; - def : Pat<(nxv8i16 (bitconvert nxv4f32:$src)), (nxv8i16 ZPR:$src)>; - def : Pat<(nxv8i16 (bitconvert nxv2f64:$src)), (nxv8i16 ZPR:$src)>; - - def : Pat<(nxv4i32 (bitconvert nxv16i8:$src)), (nxv4i32 ZPR:$src)>; - def : Pat<(nxv4i32 (bitconvert nxv8i16:$src)), (nxv4i32 ZPR:$src)>; - def : Pat<(nxv4i32 (bitconvert nxv2i64:$src)), (nxv4i32 ZPR:$src)>; - def : Pat<(nxv4i32 (bitconvert nxv8f16:$src)), (nxv4i32 ZPR:$src)>; - def : Pat<(nxv4i32 (bitconvert nxv4f32:$src)), (nxv4i32 ZPR:$src)>; - def : Pat<(nxv4i32 (bitconvert nxv2f64:$src)), (nxv4i32 ZPR:$src)>; - - def : Pat<(nxv2i64 (bitconvert nxv16i8:$src)), (nxv2i64 ZPR:$src)>; - def : Pat<(nxv2i64 (bitconvert nxv8i16:$src)), (nxv2i64 ZPR:$src)>; - def : Pat<(nxv2i64 (bitconvert nxv4i32:$src)), (nxv2i64 ZPR:$src)>; - def : Pat<(nxv2i64 (bitconvert nxv8f16:$src)), (nxv2i64 ZPR:$src)>; - def : Pat<(nxv2i64 (bitconvert nxv4f32:$src)), (nxv2i64 ZPR:$src)>; - def : Pat<(nxv2i64 (bitconvert nxv2f64:$src)), (nxv2i64 ZPR:$src)>; - - def : Pat<(nxv8f16 (bitconvert nxv16i8:$src)), (nxv8f16 ZPR:$src)>; - def : Pat<(nxv8f16 (bitconvert nxv8i16:$src)), (nxv8f16 ZPR:$src)>; - def : Pat<(nxv8f16 (bitconvert nxv4i32:$src)), (nxv8f16 ZPR:$src)>; - def : Pat<(nxv8f16 (bitconvert nxv2i64:$src)), (nxv8f16 ZPR:$src)>; - def : Pat<(nxv8f16 (bitconvert nxv4f32:$src)), (nxv8f16 ZPR:$src)>; - def : Pat<(nxv8f16 (bitconvert nxv2f64:$src)), (nxv8f16 ZPR:$src)>; - - def : Pat<(nxv4f32 (bitconvert nxv16i8:$src)), (nxv4f32 ZPR:$src)>; - def : Pat<(nxv4f32 (bitconvert nxv8i16:$src)), (nxv4f32 ZPR:$src)>; - def : Pat<(nxv4f32 (bitconvert nxv4i32:$src)), (nxv4f32 ZPR:$src)>; - def : Pat<(nxv4f32 (bitconvert nxv2i64:$src)), (nxv4f32 ZPR:$src)>; - def : Pat<(nxv4f32 (bitconvert nxv8f16:$src)), (nxv4f32 ZPR:$src)>; - def : Pat<(nxv4f32 (bitconvert nxv2f64:$src)), (nxv4f32 ZPR:$src)>; - - def : Pat<(nxv2f64 (bitconvert nxv16i8:$src)), (nxv2f64 ZPR:$src)>; - def : Pat<(nxv2f64 (bitconvert nxv8i16:$src)), (nxv2f64 ZPR:$src)>; - def : Pat<(nxv2f64 (bitconvert nxv4i32:$src)), (nxv2f64 ZPR:$src)>; - def : Pat<(nxv2f64 (bitconvert nxv2i64:$src)), (nxv2f64 ZPR:$src)>; - def : Pat<(nxv2f64 (bitconvert nxv8f16:$src)), (nxv2f64 ZPR:$src)>; - def : Pat<(nxv2f64 (bitconvert nxv4f32:$src)), (nxv2f64 ZPR:$src)>; - - def : Pat<(nxv8bf16 (bitconvert nxv16i8:$src)), (nxv8bf16 ZPR:$src)>; - def : Pat<(nxv8bf16 (bitconvert nxv8i16:$src)), (nxv8bf16 ZPR:$src)>; - def : Pat<(nxv8bf16 (bitconvert nxv4i32:$src)), (nxv8bf16 ZPR:$src)>; - def : Pat<(nxv8bf16 (bitconvert nxv2i64:$src)), (nxv8bf16 ZPR:$src)>; - def : Pat<(nxv8bf16 (bitconvert nxv8f16:$src)), (nxv8bf16 ZPR:$src)>; - def : Pat<(nxv8bf16 (bitconvert nxv4f32:$src)), (nxv8bf16 ZPR:$src)>; - def : Pat<(nxv8bf16 (bitconvert nxv2f64:$src)), (nxv8bf16 ZPR:$src)>; - - def : Pat<(nxv16i8 (bitconvert nxv8bf16:$src)), (nxv16i8 ZPR:$src)>; - def : Pat<(nxv8i16 (bitconvert nxv8bf16:$src)), (nxv8i16 ZPR:$src)>; - def : Pat<(nxv4i32 (bitconvert nxv8bf16:$src)), (nxv4i32 ZPR:$src)>; - def : Pat<(nxv2i64 (bitconvert nxv8bf16:$src)), (nxv2i64 ZPR:$src)>; - def : Pat<(nxv8f16 (bitconvert nxv8bf16:$src)), (nxv8f16 ZPR:$src)>; - def : Pat<(nxv4f32 (bitconvert nxv8bf16:$src)), (nxv4f32 ZPR:$src)>; - def : Pat<(nxv2f64 (bitconvert nxv8bf16:$src)), (nxv2f64 ZPR:$src)>; - - def : Pat<(nxv16i1 (bitconvert aarch64svcount:$src)), (nxv16i1 PPR:$src)>; - def : Pat<(aarch64svcount (bitconvert nxv16i1:$src)), (aarch64svcount PNR:$src)>; - } - - // These allow casting from/to unpacked predicate types. - def : Pat<(nxv16i1 (reinterpret_cast nxv16i1:$src)), (COPY_TO_REGCLASS PPR:$src, PPR)>; - def : Pat<(nxv16i1 (reinterpret_cast nxv8i1:$src)), (COPY_TO_REGCLASS PPR:$src, PPR)>; - def : Pat<(nxv16i1 (reinterpret_cast nxv4i1:$src)), (COPY_TO_REGCLASS PPR:$src, PPR)>; - def : Pat<(nxv16i1 (reinterpret_cast nxv2i1:$src)), (COPY_TO_REGCLASS PPR:$src, PPR)>; - def : Pat<(nxv16i1 (reinterpret_cast nxv1i1:$src)), (COPY_TO_REGCLASS PPR:$src, PPR)>; - def : Pat<(nxv8i1 (reinterpret_cast nxv16i1:$src)), (COPY_TO_REGCLASS PPR:$src, PPR)>; - def : Pat<(nxv8i1 (reinterpret_cast nxv4i1:$src)), (COPY_TO_REGCLASS PPR:$src, PPR)>; - def : Pat<(nxv8i1 (reinterpret_cast nxv2i1:$src)), (COPY_TO_REGCLASS PPR:$src, PPR)>; - def : Pat<(nxv8i1 (reinterpret_cast nxv1i1:$src)), (COPY_TO_REGCLASS PPR:$src, PPR)>; - def : Pat<(nxv4i1 (reinterpret_cast nxv16i1:$src)), (COPY_TO_REGCLASS PPR:$src, PPR)>; - def : Pat<(nxv4i1 (reinterpret_cast nxv8i1:$src)), (COPY_TO_REGCLASS PPR:$src, PPR)>; - def : Pat<(nxv4i1 (reinterpret_cast nxv2i1:$src)), (COPY_TO_REGCLASS PPR:$src, PPR)>; - def : Pat<(nxv4i1 (reinterpret_cast nxv1i1:$src)), (COPY_TO_REGCLASS PPR:$src, PPR)>; - def : Pat<(nxv2i1 (reinterpret_cast nxv16i1:$src)), (COPY_TO_REGCLASS PPR:$src, PPR)>; - def : Pat<(nxv2i1 (reinterpret_cast nxv8i1:$src)), (COPY_TO_REGCLASS PPR:$src, PPR)>; - def : Pat<(nxv2i1 (reinterpret_cast nxv4i1:$src)), (COPY_TO_REGCLASS PPR:$src, PPR)>; - def : Pat<(nxv2i1 (reinterpret_cast nxv1i1:$src)), (COPY_TO_REGCLASS PPR:$src, PPR)>; - def : Pat<(nxv1i1 (reinterpret_cast nxv16i1:$src)), (COPY_TO_REGCLASS PPR:$src, PPR)>; - def : Pat<(nxv1i1 (reinterpret_cast nxv8i1:$src)), (COPY_TO_REGCLASS PPR:$src, PPR)>; - def : Pat<(nxv1i1 (reinterpret_cast nxv4i1:$src)), (COPY_TO_REGCLASS PPR:$src, PPR)>; - def : Pat<(nxv1i1 (reinterpret_cast nxv2i1:$src)), (COPY_TO_REGCLASS PPR:$src, PPR)>; - - // These allow casting from/to unpacked floating-point types. - def : Pat<(nxv2f16 (reinterpret_cast nxv8f16:$src)), (COPY_TO_REGCLASS ZPR:$src, ZPR)>; - def : Pat<(nxv8f16 (reinterpret_cast nxv2f16:$src)), (COPY_TO_REGCLASS ZPR:$src, ZPR)>; - def : Pat<(nxv4f16 (reinterpret_cast nxv8f16:$src)), (COPY_TO_REGCLASS ZPR:$src, ZPR)>; - def : Pat<(nxv8f16 (reinterpret_cast nxv4f16:$src)), (COPY_TO_REGCLASS ZPR:$src, ZPR)>; - def : Pat<(nxv2f32 (reinterpret_cast nxv4f32:$src)), (COPY_TO_REGCLASS ZPR:$src, ZPR)>; - def : Pat<(nxv4f32 (reinterpret_cast nxv2f32:$src)), (COPY_TO_REGCLASS ZPR:$src, ZPR)>; - def : Pat<(nxv2bf16 (reinterpret_cast nxv8bf16:$src)), (COPY_TO_REGCLASS ZPR:$src, ZPR)>; - def : Pat<(nxv8bf16 (reinterpret_cast nxv2bf16:$src)), (COPY_TO_REGCLASS ZPR:$src, ZPR)>; - def : Pat<(nxv4bf16 (reinterpret_cast nxv8bf16:$src)), (COPY_TO_REGCLASS ZPR:$src, ZPR)>; - def : Pat<(nxv8bf16 (reinterpret_cast nxv4bf16:$src)), (COPY_TO_REGCLASS ZPR:$src, ZPR)>; + // For big endian, only BITCASTs involving same sized vector types with same + // size vector elements can be isel'd directly. + let Predicates = [IsLE] in + foreach VT = [ nxv16i8, nxv8i16, nxv4i32, nxv2i64, nxv8f16, nxv4f32, nxv2f64, nxv8bf16 ] in + foreach VT2 = [ nxv16i8, nxv8i16, nxv4i32, nxv2i64, nxv8f16, nxv4f32, nxv2f64, nxv8bf16 ] in + if !ne(VT,VT2) then + def : Pat<(VT (bitconvert (VT2 ZPR:$src))), (VT ZPR:$src)>; + + def : Pat<(nxv8i16 (bitconvert (nxv8f16 ZPR:$src))), (nxv8i16 ZPR:$src)>; + def : Pat<(nxv8f16 (bitconvert (nxv8i16 ZPR:$src))), (nxv8f16 ZPR:$src)>; + + def : Pat<(nxv4i32 (bitconvert (nxv4f32 ZPR:$src))), (nxv4i32 ZPR:$src)>; + def : Pat<(nxv4f32 (bitconvert (nxv4i32 ZPR:$src))), (nxv4f32 ZPR:$src)>; + + def : Pat<(nxv2i64 (bitconvert (nxv2f64 ZPR:$src))), (nxv2i64 ZPR:$src)>; + def : Pat<(nxv2f64 (bitconvert (nxv2i64 ZPR:$src))), (nxv2f64 ZPR:$src)>; + + def : Pat<(nxv8i16 (bitconvert (nxv8bf16 ZPR:$src))), (nxv8i16 ZPR:$src)>; + def : Pat<(nxv8bf16 (bitconvert (nxv8i16 ZPR:$src))), (nxv8bf16 ZPR:$src)>; + + def : Pat<(nxv8bf16 (bitconvert (nxv8f16 ZPR:$src))), (nxv8bf16 ZPR:$src)>; + def : Pat<(nxv8f16 (bitconvert (nxv8bf16 ZPR:$src))), (nxv8f16 ZPR:$src)>; + + def : Pat<(nxv4bf16 (bitconvert (nxv4f16 ZPR:$src))), (nxv4bf16 ZPR:$src)>; + def : Pat<(nxv4f16 (bitconvert (nxv4bf16 ZPR:$src))), (nxv4f16 ZPR:$src)>; + + def : Pat<(nxv2bf16 (bitconvert (nxv2f16 ZPR:$src))), (nxv2bf16 ZPR:$src)>; + def : Pat<(nxv2f16 (bitconvert (nxv2bf16 ZPR:$src))), (nxv2f16 ZPR:$src)>; + + def : Pat<(nxv16i1 (bitconvert (aarch64svcount PNR:$src))), (nxv16i1 PPR:$src)>; + def : Pat<(aarch64svcount (bitconvert (nxv16i1 PPR:$src))), (aarch64svcount PNR:$src)>; + + // These allow nop casting between predicate vector types. + foreach VT = [ nxv16i1, nxv8i1, nxv4i1, nxv2i1, nxv1i1 ] in + foreach VT2 = [ nxv16i1, nxv8i1, nxv4i1, nxv2i1, nxv1i1 ] in + def : Pat<(VT (reinterpret_cast (VT2 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>; + + // These allow nop casting between half vector types. + foreach VT = [ nxv2f16, nxv4f16, nxv8f16 ] in + foreach VT2 = [ nxv2f16, nxv4f16, nxv8f16 ] in + def : Pat<(VT (reinterpret_cast (VT2 ZPR:$src))), (COPY_TO_REGCLASS ZPR:$src, ZPR)>; + + // These allow nop casting between float vector types. + foreach VT = [ nxv2f32, nxv4f32 ] in + foreach VT2 = [ nxv2f32, nxv4f32 ] in + def : Pat<(VT (reinterpret_cast (VT2 ZPR:$src))), (COPY_TO_REGCLASS ZPR:$src, ZPR)>; + + // These allow nop casting between bfloat vector types. + foreach VT = [ nxv2bf16, nxv4bf16, nxv8bf16 ] in + foreach VT2 = [ nxv2bf16, nxv4bf16, nxv8bf16 ] in + def : Pat<(VT (reinterpret_cast (VT2 ZPR:$src))), (COPY_TO_REGCLASS ZPR:$src, ZPR)>; + + // These allow nop casting between all packed vector types. + foreach VT = [ nxv16i8, nxv8i16, nxv4i32, nxv2i64, nxv8f16, nxv4f32, nxv2f64, nxv8bf16 ] in + foreach VT2 = [ nxv16i8, nxv8i16, nxv4i32, nxv2i64, nxv8f16, nxv4f32, nxv2f64, nxv8bf16 ] in + def : Pat<(VT (AArch64NvCast (VT2 ZPR:$src))), (VT ZPR:$src)>; def : Pat<(nxv16i1 (and PPR:$Ps1, PPR:$Ps2)), (AND_PPzPP (PTRUE_B 31), PPR:$Ps1, PPR:$Ps2)>; diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index 1a10206eea237..3914f36338fa5 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -3815,7 +3815,7 @@ bool AMDGPUAsmParser::validateVOPDRegBankConstraints( const MCOperand &Opr = Inst.getOperand(OperandIdx); return (Opr.isReg() && !isSGPR(mc2PseudoReg(Opr.getReg()), TRI)) ? Opr.getReg() - : MCRegister::NoRegister; + : MCRegister(); }; // On GFX12 if both OpX and OpY are V_MOV_B32 then OPY uses SRC2 source-cache. @@ -4753,7 +4753,7 @@ static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx, if (!Op.isReg()) return -1; - unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); + MCRegister Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); auto Reg = Sub ? Sub : Op.getReg(); const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); return AGPR32.contains(Reg) ? 1 : 0; diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp index 1a0dc7098347a..b1da9da19c69b 100644 --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -382,7 +382,7 @@ static bool IsAGPROperand(const MCInst &Inst, int OpIdx, if (!Op.isReg()) return false; - unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); + MCRegister Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); auto Reg = Sub ? Sub : Op.getReg(); return Reg >= AMDGPU::AGPR0 && Reg <= AMDGPU::AGPR255; } diff --git a/llvm/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp b/llvm/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp index de3c06f3a71e2..0fa8d4847931a 100644 --- a/llvm/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp +++ b/llvm/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp @@ -180,11 +180,8 @@ class R600EmitClauseMarkers : public MachineFunctionPass { MachineBasicBlock::iterator BBEnd) { const R600RegisterInfo &TRI = TII->getRegisterInfo(); //TODO: change this to defs? - for (MachineInstr::const_mop_iterator - MOI = Def->operands_begin(), - MOE = Def->operands_end(); MOI != MOE; ++MOI) { - if (!MOI->isReg() || !MOI->isDef() || - TRI.isPhysRegLiveAcrossClauses(MOI->getReg())) + for (MachineOperand &MO : Def->all_defs()) { + if (TRI.isPhysRegLiveAcrossClauses(MO.getReg())) continue; // Def defines a clause local register, so check that its use will fit @@ -208,11 +205,11 @@ class R600EmitClauseMarkers : public MachineFunctionPass { // occur in the same basic block as its definition, because // it is illegal for the scheduler to schedule them in // different blocks. - if (UseI->readsRegister(MOI->getReg(), &TRI)) + if (UseI->readsRegister(MO.getReg(), &TRI)) LastUseCount = AluInstCount; // Exit early if the current use kills the register - if (UseI != Def && UseI->killsRegister(MOI->getReg(), &TRI)) + if (UseI != Def && UseI->killsRegister(MO.getReg(), &TRI)) break; } if (LastUseCount) diff --git a/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp b/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp index ff44454339268..d826ae8c5da23 100644 --- a/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp @@ -40,7 +40,8 @@ bool R600InstrInfo::isVector(const MachineInstr &MI) const { void R600InstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, - MCRegister SrcReg, bool KillSrc) const { + MCRegister SrcReg, bool KillSrc, + bool RenamableDest, bool RenamableSrc) const { unsigned VectorComponents = 0; if ((R600::R600_Reg128RegClass.contains(DestReg) || R600::R600_Reg128VerticalRegClass.contains(DestReg)) && diff --git a/llvm/lib/Target/AMDGPU/R600InstrInfo.h b/llvm/lib/Target/AMDGPU/R600InstrInfo.h index f720e4656348c..c767ecb24590b 100644 --- a/llvm/lib/Target/AMDGPU/R600InstrInfo.h +++ b/llvm/lib/Target/AMDGPU/R600InstrInfo.h @@ -73,7 +73,8 @@ class R600InstrInfo final : public R600GenInstrInfo { void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, - bool KillSrc) const override; + bool KillSrc, bool RenamableDest = false, + bool RenamableSrc = false) const override; bool isLegalToSplitMBBAt(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const override; diff --git a/llvm/lib/Target/AMDGPU/R600MachineScheduler.cpp b/llvm/lib/Target/AMDGPU/R600MachineScheduler.cpp index eded8063feaaa..a2ce8ee361040 100644 --- a/llvm/lib/Target/AMDGPU/R600MachineScheduler.cpp +++ b/llvm/lib/Target/AMDGPU/R600MachineScheduler.cpp @@ -350,13 +350,9 @@ void R600SchedStrategy::AssignSlot(MachineInstr* MI, unsigned Slot) { Register DestReg = MI->getOperand(DstIndex).getReg(); // PressureRegister crashes if an operand is def and used in the same inst // and we try to constraint its regclass - for (MachineInstr::mop_iterator It = MI->operands_begin(), - E = MI->operands_end(); It != E; ++It) { - MachineOperand &MO = *It; - if (MO.isReg() && !MO.isDef() && - MO.getReg() == DestReg) + for (const MachineOperand &MO : MI->all_uses()) + if (MO.getReg() == DestReg) return; - } // Constrains the regclass of DestReg to assign it to Slot switch (Slot) { case 0: diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp index eafe20be17d5b..8ae7f2910ec5a 100644 --- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -883,10 +883,8 @@ void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII, // can be used as the actual source after export patching, so // we need to treat them like sources and set the EXP_CNT // score. - for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) { - MachineOperand &DefMO = Inst.getOperand(I); - if (DefMO.isReg() && DefMO.isDef() && - TRI->isVGPR(*MRI, DefMO.getReg())) { + for (MachineOperand &DefMO : Inst.all_defs()) { + if (TRI->isVGPR(*MRI, DefMO.getReg())) { setRegScore( TRI->getEncodingValue(AMDGPU::getMCReg(DefMO.getReg(), *ST)), EXP_CNT, CurrScore); diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 84d25a1fbd272..a857bdba53c3e 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -794,7 +794,8 @@ static void expandSGPRCopy(const SIInstrInfo &TII, MachineBasicBlock &MBB, void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, - MCRegister SrcReg, bool KillSrc) const { + MCRegister SrcReg, bool KillSrc, + bool RenamableDest, bool RenamableSrc) const { const TargetRegisterClass *RC = RI.getPhysRegBaseClass(DestReg); unsigned Size = RI.getRegSizeInBits(*RC); const TargetRegisterClass *SrcRC = RI.getPhysRegBaseClass(SrcReg); diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h index badfd91c0b972..4fd9b4366159b 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -255,7 +255,8 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo { void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, - bool KillSrc) const override; + bool KillSrc, bool RenamableDest = false, + bool RenamableSrc = false) const override; void materializeImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, diff --git a/llvm/lib/Target/ARC/ARCInstrInfo.cpp b/llvm/lib/Target/ARC/ARCInstrInfo.cpp index 9b5e45cb5fe97..78db68fca3050 100644 --- a/llvm/lib/Target/ARC/ARCInstrInfo.cpp +++ b/llvm/lib/Target/ARC/ARCInstrInfo.cpp @@ -281,7 +281,8 @@ unsigned ARCInstrInfo::removeBranch(MachineBasicBlock &MBB, void ARCInstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, - MCRegister SrcReg, bool KillSrc) const { + MCRegister SrcReg, bool KillSrc, + bool RenamableDest, bool RenamableSrc) const { assert(ARC::GPR32RegClass.contains(SrcReg) && "Only GPR32 src copy supported."); assert(ARC::GPR32RegClass.contains(DestReg) && diff --git a/llvm/lib/Target/ARC/ARCInstrInfo.h b/llvm/lib/Target/ARC/ARCInstrInfo.h index 1875aafbde826..e25f990252260 100644 --- a/llvm/lib/Target/ARC/ARCInstrInfo.h +++ b/llvm/lib/Target/ARC/ARCInstrInfo.h @@ -65,7 +65,8 @@ class ARCInstrInfo : public ARCGenInstrInfo { void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &, MCRegister DestReg, MCRegister SrcReg, - bool KillSrc) const override; + bool KillSrc, bool RenamableDest = false, + bool RenamableSrc = false) const override; void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg, diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp index 1199052ca97e9..49513fe10945a 100644 --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -892,7 +892,9 @@ void llvm::addPredicatedMveVpredROp(MachineInstrBuilder &MIB, void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, - MCRegister SrcReg, bool KillSrc) const { + MCRegister SrcReg, bool KillSrc, + bool RenamableDest, + bool RenamableSrc) const { bool GPRDest = ARM::GPRRegClass.contains(DestReg); bool GPRSrc = ARM::GPRRegClass.contains(SrcReg); diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h index 8521e3ef91399..9e4e12a9a7441 100644 --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h @@ -209,7 +209,8 @@ class ARMBaseInstrInfo : public ARMGenInstrInfo { void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, - bool KillSrc) const override; + bool KillSrc, bool RenamableDest = false, + bool RenamableSrc = false) const override; void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, diff --git a/llvm/lib/Target/ARM/ARMCallingConv.cpp b/llvm/lib/Target/ARM/ARMCallingConv.cpp index 4878c73138940..2ab66da4b4d2d 100644 --- a/llvm/lib/Target/ARM/ARMCallingConv.cpp +++ b/llvm/lib/Target/ARM/ARMCallingConv.cpp @@ -24,7 +24,7 @@ static bool f64AssignAPCS(unsigned ValNo, MVT ValVT, MVT LocVT, static const MCPhysReg RegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 }; // Try to get the first register. - if (unsigned Reg = State.AllocateReg(RegList)) + if (MCRegister Reg = State.AllocateReg(RegList)) State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo)); else { // For the 2nd half of a v2f64, do not fail. @@ -38,7 +38,7 @@ static bool f64AssignAPCS(unsigned ValNo, MVT ValVT, MVT LocVT, } // Try to get the second register. - if (unsigned Reg = State.AllocateReg(RegList)) + if (MCRegister Reg = State.AllocateReg(RegList)) State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo)); else State.addLoc(CCValAssign::getCustomMem( @@ -67,8 +67,8 @@ static bool f64AssignAAPCS(unsigned ValNo, MVT ValVT, MVT LocVT, static const MCPhysReg ShadowRegList[] = { ARM::R0, ARM::R1 }; static const MCPhysReg GPRArgRegs[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 }; - unsigned Reg = State.AllocateReg(HiRegList, ShadowRegList); - if (Reg == 0) { + MCRegister Reg = State.AllocateReg(HiRegList, ShadowRegList); + if (!Reg) { // If we had R3 unallocated only, now we still must to waste it. Reg = State.AllocateReg(GPRArgRegs); @@ -89,7 +89,7 @@ static bool f64AssignAAPCS(unsigned ValNo, MVT ValVT, MVT LocVT, if (HiRegList[i] == Reg) break; - unsigned T = State.AllocateReg(LoRegList[i]); + MCRegister T = State.AllocateReg(LoRegList[i]); (void)T; assert(T == LoRegList[i] && "Could not allocate register"); @@ -116,8 +116,8 @@ static bool f64RetAssign(unsigned ValNo, MVT ValVT, MVT LocVT, static const MCPhysReg HiRegList[] = { ARM::R0, ARM::R2 }; static const MCPhysReg LoRegList[] = { ARM::R1, ARM::R3 }; - unsigned Reg = State.AllocateReg(HiRegList, LoRegList); - if (Reg == 0) + MCRegister Reg = State.AllocateReg(HiRegList, LoRegList); + if (!Reg) return false; // we didn't handle it unsigned i; @@ -287,7 +287,7 @@ static bool CC_ARM_AAPCS_Custom_Aggregate(unsigned ValNo, MVT ValVT, static bool CustomAssignInRegList(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, CCState &State, ArrayRef RegList) { - unsigned Reg = State.AllocateReg(RegList); + MCRegister Reg = State.AllocateReg(RegList); if (Reg) { State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo)); return true; diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 4ab0433069ae6..853f54943eebf 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -2915,7 +2915,7 @@ void ARMTargetLowering::HandleByVal(CCState *State, unsigned &Size, // Byval (as with any stack) slots are always at least 4 byte aligned. Alignment = std::max(Alignment, Align(4)); - unsigned Reg = State->AllocateReg(GPRArgRegs); + MCRegister Reg = State->AllocateReg(GPRArgRegs); if (!Reg) return; diff --git a/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp b/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp index 396328e958d18..a38aa3de40d90 100644 --- a/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp +++ b/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp @@ -42,7 +42,8 @@ unsigned Thumb1InstrInfo::getUnindexedOpcode(unsigned Opc) const { void Thumb1InstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, - MCRegister SrcReg, bool KillSrc) const { + MCRegister SrcReg, bool KillSrc, + bool RenamableDest, bool RenamableSrc) const { // Need to check the arch. MachineFunction &MF = *MBB.getParent(); const ARMSubtarget &st = MF.getSubtarget(); diff --git a/llvm/lib/Target/ARM/Thumb1InstrInfo.h b/llvm/lib/Target/ARM/Thumb1InstrInfo.h index 984bec4e64490..84241fb8a9a66 100644 --- a/llvm/lib/Target/ARM/Thumb1InstrInfo.h +++ b/llvm/lib/Target/ARM/Thumb1InstrInfo.h @@ -39,7 +39,8 @@ class Thumb1InstrInfo : public ARMBaseInstrInfo { void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, - bool KillSrc) const override; + bool KillSrc, bool RenamableDest = false, + bool RenamableSrc = false) const override; void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, bool isKill, int FrameIndex, diff --git a/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp b/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp index 09bcd3109f2b3..d1e07b6703a5e 100644 --- a/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -151,7 +151,8 @@ Thumb2InstrInfo::optimizeSelect(MachineInstr &MI, void Thumb2InstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, - MCRegister SrcReg, bool KillSrc) const { + MCRegister SrcReg, bool KillSrc, + bool RenamableDest, bool RenamableSrc) const { // Handle SPR, DPR, and QPR copies. if (!ARM::GPRRegClass.contains(DestReg, SrcReg)) return ARMBaseInstrInfo::copyPhysReg(MBB, I, DL, DestReg, SrcReg, KillSrc); diff --git a/llvm/lib/Target/ARM/Thumb2InstrInfo.h b/llvm/lib/Target/ARM/Thumb2InstrInfo.h index 8915da8c5bf3c..70ee3270e64ac 100644 --- a/llvm/lib/Target/ARM/Thumb2InstrInfo.h +++ b/llvm/lib/Target/ARM/Thumb2InstrInfo.h @@ -39,7 +39,8 @@ class Thumb2InstrInfo : public ARMBaseInstrInfo { void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, - bool KillSrc) const override; + bool KillSrc, bool RenamableDest = false, + bool RenamableSrc = false) const override; void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, diff --git a/llvm/lib/Target/AVR/AVRInstrInfo.cpp b/llvm/lib/Target/AVR/AVRInstrInfo.cpp index 18b7365fc5aa0..7b0f8d74e77c2 100644 --- a/llvm/lib/Target/AVR/AVRInstrInfo.cpp +++ b/llvm/lib/Target/AVR/AVRInstrInfo.cpp @@ -42,7 +42,8 @@ AVRInstrInfo::AVRInstrInfo(AVRSubtarget &STI) void AVRInstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, - MCRegister SrcReg, bool KillSrc) const { + MCRegister SrcReg, bool KillSrc, + bool RenamableDest, bool RenamableSrc) const { const AVRRegisterInfo &TRI = *STI.getRegisterInfo(); unsigned Opc; diff --git a/llvm/lib/Target/AVR/AVRInstrInfo.h b/llvm/lib/Target/AVR/AVRInstrInfo.h index 28c0e0319d46e..8eb4292f2422d 100644 --- a/llvm/lib/Target/AVR/AVRInstrInfo.h +++ b/llvm/lib/Target/AVR/AVRInstrInfo.h @@ -75,7 +75,8 @@ class AVRInstrInfo : public AVRGenInstrInfo { void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, - bool KillSrc) const override; + bool KillSrc, bool RenamableDest = false, + bool RenamableSrc = false) const override; void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg, bool isKill, int FrameIndex, diff --git a/llvm/lib/Target/BPF/BPFInstrInfo.cpp b/llvm/lib/Target/BPF/BPFInstrInfo.cpp index 2209f1f1462b4..1b07e7ffc0d31 100644 --- a/llvm/lib/Target/BPF/BPFInstrInfo.cpp +++ b/llvm/lib/Target/BPF/BPFInstrInfo.cpp @@ -31,7 +31,8 @@ BPFInstrInfo::BPFInstrInfo() void BPFInstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, - MCRegister SrcReg, bool KillSrc) const { + MCRegister SrcReg, bool KillSrc, + bool RenamableDest, bool RenamableSrc) const { if (BPF::GPRRegClass.contains(DestReg, SrcReg)) BuildMI(MBB, I, DL, get(BPF::MOV_rr), DestReg) .addReg(SrcReg, getKillRegState(KillSrc)); diff --git a/llvm/lib/Target/BPF/BPFInstrInfo.h b/llvm/lib/Target/BPF/BPFInstrInfo.h index 354aca1bd2f93..a6b6fd7dc4d96 100644 --- a/llvm/lib/Target/BPF/BPFInstrInfo.h +++ b/llvm/lib/Target/BPF/BPFInstrInfo.h @@ -31,7 +31,8 @@ class BPFInstrInfo : public BPFGenInstrInfo { void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, - bool KillSrc) const override; + bool KillSrc, bool RenamableDest = false, + bool RenamableSrc = false) const override; bool expandPostRAPseudo(MachineInstr &MI) const override; diff --git a/llvm/lib/Target/CSKY/CSKYInstrInfo.cpp b/llvm/lib/Target/CSKY/CSKYInstrInfo.cpp index 6baca84ab3d0a..a2bb87bcaaf94 100644 --- a/llvm/lib/Target/CSKY/CSKYInstrInfo.cpp +++ b/llvm/lib/Target/CSKY/CSKYInstrInfo.cpp @@ -478,7 +478,8 @@ void CSKYInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, void CSKYInstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, - MCRegister SrcReg, bool KillSrc) const { + MCRegister SrcReg, bool KillSrc, + bool RenamableDest, bool RenamableSrc) const { if (CSKY::GPRRegClass.contains(SrcReg) && CSKY::CARRYRegClass.contains(DestReg)) { if (STI.hasE2()) { diff --git a/llvm/lib/Target/CSKY/CSKYInstrInfo.h b/llvm/lib/Target/CSKY/CSKYInstrInfo.h index 4e3866b1188ca..54c1106310d85 100644 --- a/llvm/lib/Target/CSKY/CSKYInstrInfo.h +++ b/llvm/lib/Target/CSKY/CSKYInstrInfo.h @@ -55,7 +55,8 @@ class CSKYInstrInfo : public CSKYGenInstrInfo { void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, - bool KillSrc) const override; + bool KillSrc, bool RenamableDest = false, + bool RenamableSrc = false) const override; unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef Cond, diff --git a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp index e49169cff8aa8..2daa4f825c3b2 100644 --- a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp +++ b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp @@ -38,6 +38,7 @@ static bool isIntrinsicExpansion(Function &F) { case Intrinsic::log: case Intrinsic::log10: case Intrinsic::pow: + case Intrinsic::dx_all: case Intrinsic::dx_any: case Intrinsic::dx_clamp: case Intrinsic::dx_uclamp: @@ -54,8 +55,7 @@ static bool isIntrinsicExpansion(Function &F) { static Value *expandAbs(CallInst *Orig) { Value *X = Orig->getOperand(0); - IRBuilder<> Builder(Orig->getParent()); - Builder.SetInsertPoint(Orig); + IRBuilder<> Builder(Orig); Type *Ty = X->getType(); Type *EltTy = Ty->getScalarType(); Constant *Zero = Ty->isVectorTy() @@ -148,8 +148,7 @@ static Value *expandIntegerDotIntrinsic(CallInst *Orig, static Value *expandExpIntrinsic(CallInst *Orig) { Value *X = Orig->getOperand(0); - IRBuilder<> Builder(Orig->getParent()); - Builder.SetInsertPoint(Orig); + IRBuilder<> Builder(Orig); Type *Ty = X->getType(); Type *EltTy = Ty->getScalarType(); Constant *Log2eConst = @@ -166,13 +165,21 @@ static Value *expandExpIntrinsic(CallInst *Orig) { return Exp2Call; } -static Value *expandAnyIntrinsic(CallInst *Orig) { +static Value *expandAnyOrAllIntrinsic(CallInst *Orig, + Intrinsic::ID intrinsicId) { Value *X = Orig->getOperand(0); - IRBuilder<> Builder(Orig->getParent()); - Builder.SetInsertPoint(Orig); + IRBuilder<> Builder(Orig); Type *Ty = X->getType(); Type *EltTy = Ty->getScalarType(); + auto ApplyOp = [&Builder](Intrinsic::ID IntrinsicId, Value *Result, + Value *Elt) { + if (IntrinsicId == Intrinsic::dx_any) + return Builder.CreateOr(Result, Elt); + assert(IntrinsicId == Intrinsic::dx_all); + return Builder.CreateAnd(Result, Elt); + }; + Value *Result = nullptr; if (!Ty->isVectorTy()) { Result = EltTy->isFloatingPointTy() @@ -193,7 +200,7 @@ static Value *expandAnyIntrinsic(CallInst *Orig) { Result = Builder.CreateExtractElement(Cond, (uint64_t)0); for (unsigned I = 1; I < XVec->getNumElements(); I++) { Value *Elt = Builder.CreateExtractElement(Cond, I); - Result = Builder.CreateOr(Result, Elt); + Result = ApplyOp(intrinsicId, Result, Elt); } } return Result; @@ -201,8 +208,7 @@ static Value *expandAnyIntrinsic(CallInst *Orig) { static Value *expandLengthIntrinsic(CallInst *Orig) { Value *X = Orig->getOperand(0); - IRBuilder<> Builder(Orig->getParent()); - Builder.SetInsertPoint(Orig); + IRBuilder<> Builder(Orig); Type *Ty = X->getType(); Type *EltTy = Ty->getScalarType(); @@ -230,8 +236,7 @@ static Value *expandLerpIntrinsic(CallInst *Orig) { Value *X = Orig->getOperand(0); Value *Y = Orig->getOperand(1); Value *S = Orig->getOperand(2); - IRBuilder<> Builder(Orig->getParent()); - Builder.SetInsertPoint(Orig); + IRBuilder<> Builder(Orig); auto *V = Builder.CreateFSub(Y, X); V = Builder.CreateFMul(S, V); return Builder.CreateFAdd(X, V, "dx.lerp"); @@ -240,8 +245,7 @@ static Value *expandLerpIntrinsic(CallInst *Orig) { static Value *expandLogIntrinsic(CallInst *Orig, float LogConstVal = numbers::ln2f) { Value *X = Orig->getOperand(0); - IRBuilder<> Builder(Orig->getParent()); - Builder.SetInsertPoint(Orig); + IRBuilder<> Builder(Orig); Type *Ty = X->getType(); Type *EltTy = Ty->getScalarType(); Constant *Ln2Const = @@ -266,8 +270,7 @@ static Value *expandNormalizeIntrinsic(CallInst *Orig) { Value *X = Orig->getOperand(0); Type *Ty = Orig->getType(); Type *EltTy = Ty->getScalarType(); - IRBuilder<> Builder(Orig->getParent()); - Builder.SetInsertPoint(Orig); + IRBuilder<> Builder(Orig); auto *XVec = dyn_cast(Ty); if (!XVec) { @@ -305,8 +308,7 @@ static Value *expandPowIntrinsic(CallInst *Orig) { Value *X = Orig->getOperand(0); Value *Y = Orig->getOperand(1); Type *Ty = X->getType(); - IRBuilder<> Builder(Orig->getParent()); - Builder.SetInsertPoint(Orig); + IRBuilder<> Builder(Orig); auto *Log2Call = Builder.CreateIntrinsic(Ty, Intrinsic::log2, {X}, nullptr, "elt.log2"); @@ -350,8 +352,7 @@ static Value *expandClampIntrinsic(CallInst *Orig, Value *Min = Orig->getOperand(1); Value *Max = Orig->getOperand(2); Type *Ty = X->getType(); - IRBuilder<> Builder(Orig->getParent()); - Builder.SetInsertPoint(Orig); + IRBuilder<> Builder(Orig); auto *MaxCall = Builder.CreateIntrinsic( Ty, getMaxForClamp(Ty, ClampIntrinsic), {X, Min}, nullptr, "dx.max"); return Builder.CreateIntrinsic(Ty, getMinForClamp(Ty, ClampIntrinsic), @@ -360,7 +361,8 @@ static Value *expandClampIntrinsic(CallInst *Orig, static bool expandIntrinsic(Function &F, CallInst *Orig) { Value *Result = nullptr; - switch (F.getIntrinsicID()) { + Intrinsic::ID IntrinsicId = F.getIntrinsicID(); + switch (IntrinsicId) { case Intrinsic::abs: Result = expandAbs(Orig); break; @@ -376,12 +378,13 @@ static bool expandIntrinsic(Function &F, CallInst *Orig) { case Intrinsic::pow: Result = expandPowIntrinsic(Orig); break; + case Intrinsic::dx_all: case Intrinsic::dx_any: - Result = expandAnyIntrinsic(Orig); + Result = expandAnyOrAllIntrinsic(Orig, IntrinsicId); break; case Intrinsic::dx_uclamp: case Intrinsic::dx_clamp: - Result = expandClampIntrinsic(Orig, F.getIntrinsicID()); + Result = expandClampIntrinsic(Orig, IntrinsicId); break; case Intrinsic::dx_lerp: Result = expandLerpIntrinsic(Orig); @@ -397,7 +400,7 @@ static bool expandIntrinsic(Function &F, CallInst *Orig) { break; case Intrinsic::dx_sdot: case Intrinsic::dx_udot: - Result = expandIntegerDotIntrinsic(Orig, F.getIntrinsicID()); + Result = expandIntegerDotIntrinsic(Orig, IntrinsicId); break; } diff --git a/llvm/lib/Target/DirectX/DXILMetadata.cpp b/llvm/lib/Target/DirectX/DXILMetadata.cpp index ed0434ac98a18..1f5759c363013 100644 --- a/llvm/lib/Target/DirectX/DXILMetadata.cpp +++ b/llvm/lib/Target/DirectX/DXILMetadata.cpp @@ -284,6 +284,11 @@ void dxil::createEntryMD(Module &M, const uint64_t ShaderFlags) { EntryList.emplace_back(&F); } + // If there are no entries, do nothing. This is mostly to allow for writing + // tests with no actual entry functions. + if (EntryList.empty()) + return; + auto &Ctx = M.getContext(); // FIXME: generate metadata for resource. // See https://github.com/llvm/llvm-project/issues/57926. diff --git a/llvm/lib/Target/DirectX/DXILPrettyPrinter.cpp b/llvm/lib/Target/DirectX/DXILPrettyPrinter.cpp index 7d2abb7078b8a..f282eff6c002b 100644 --- a/llvm/lib/Target/DirectX/DXILPrettyPrinter.cpp +++ b/llvm/lib/Target/DirectX/DXILPrettyPrinter.cpp @@ -10,21 +10,248 @@ #include "DXILResourceAnalysis.h" #include "DirectX.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Analysis/DXILResource.h" #include "llvm/IR/PassManager.h" +#include "llvm/InitializePasses.h" #include "llvm/Pass.h" +#include "llvm/Support/FormatAdapters.h" +#include "llvm/Support/FormatVariadic.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; -static void prettyPrintResources(raw_ostream &OS, +static StringRef getRCName(dxil::ResourceClass RC) { + switch (RC) { + case dxil::ResourceClass::SRV: + return "SRV"; + case dxil::ResourceClass::UAV: + return "UAV"; + case dxil::ResourceClass::CBuffer: + return "cbuffer"; + case dxil::ResourceClass::Sampler: + return "sampler"; + } + llvm_unreachable("covered switch"); +} + +static StringRef getRCPrefix(dxil::ResourceClass RC) { + switch (RC) { + case dxil::ResourceClass::SRV: + return "t"; + case dxil::ResourceClass::UAV: + return "u"; + case dxil::ResourceClass::CBuffer: + return "cb"; + case dxil::ResourceClass::Sampler: + return "s"; + } +} + +static StringRef getFormatName(const dxil::ResourceInfo &RI) { + if (RI.isTyped()) { + switch (RI.getTyped().ElementTy) { + case dxil::ElementType::I1: + return "i1"; + case dxil::ElementType::I16: + return "i16"; + case dxil::ElementType::U16: + return "u16"; + case dxil::ElementType::I32: + return "i32"; + case dxil::ElementType::U32: + return "u32"; + case dxil::ElementType::I64: + return "i64"; + case dxil::ElementType::U64: + return "u64"; + case dxil::ElementType::F16: + return "f16"; + case dxil::ElementType::F32: + return "f32"; + case dxil::ElementType::F64: + return "f64"; + case dxil::ElementType::SNormF16: + return "snorm_f16"; + case dxil::ElementType::UNormF16: + return "unorm_f16"; + case dxil::ElementType::SNormF32: + return "snorm_f32"; + case dxil::ElementType::UNormF32: + return "unorm_f32"; + case dxil::ElementType::SNormF64: + return "snorm_f64"; + case dxil::ElementType::UNormF64: + return "unorm_f64"; + case dxil::ElementType::PackedS8x32: + return "p32i8"; + case dxil::ElementType::PackedU8x32: + return "p32u8"; + case dxil::ElementType::Invalid: + llvm_unreachable("Invalid ElementType"); + } + llvm_unreachable("Unhandled ElementType"); + } else if (RI.isStruct()) + return "struct"; + else if (RI.isCBuffer() || RI.isSampler()) + return "NA"; + return "byte"; +} + +static StringRef getTextureDimName(dxil::ResourceKind RK) { + switch (RK) { + case dxil::ResourceKind::Texture1D: + return "1d"; + case dxil::ResourceKind::Texture2D: + return "2d"; + case dxil::ResourceKind::Texture3D: + return "3d"; + case dxil::ResourceKind::TextureCube: + return "cube"; + case dxil::ResourceKind::Texture1DArray: + return "1darray"; + case dxil::ResourceKind::Texture2DArray: + return "2darray"; + case dxil::ResourceKind::TextureCubeArray: + return "cubearray"; + case dxil::ResourceKind::TBuffer: + return "tbuffer"; + case dxil::ResourceKind::FeedbackTexture2D: + return "fbtex2d"; + case dxil::ResourceKind::FeedbackTexture2DArray: + return "fbtex2darray"; + case dxil::ResourceKind::Texture2DMS: + return "2dMS"; + case dxil::ResourceKind::Texture2DMSArray: + return "2darrayMS"; + case dxil::ResourceKind::Invalid: + case dxil::ResourceKind::NumEntries: + case dxil::ResourceKind::CBuffer: + case dxil::ResourceKind::RawBuffer: + case dxil::ResourceKind::Sampler: + case dxil::ResourceKind::StructuredBuffer: + case dxil::ResourceKind::TypedBuffer: + case dxil::ResourceKind::RTAccelerationStructure: + llvm_unreachable("Invalid ResourceKind for texture"); + } + llvm_unreachable("Unhandled ResourceKind"); +} + +namespace { +struct FormatResourceDimension + : public llvm::FormatAdapter { + explicit FormatResourceDimension(const dxil::ResourceInfo &RI) + : llvm::FormatAdapter(RI) {} + + void format(llvm::raw_ostream &OS, StringRef Style) override { + dxil::ResourceKind RK = Item.getResourceKind(); + switch (RK) { + default: { + OS << getTextureDimName(RK); + if (Item.isMultiSample()) + OS << Item.getMultiSample().Count; + break; + } + case dxil::ResourceKind::RawBuffer: + case dxil::ResourceKind::StructuredBuffer: + if (!Item.isUAV()) + OS << "r/o"; + else if (Item.getUAV().HasCounter) + OS << "r/w+cnt"; + else + OS << "r/w"; + break; + case dxil::ResourceKind::TypedBuffer: + OS << "buf"; + break; + case dxil::ResourceKind::RTAccelerationStructure: + // TODO: dxc would print "ras" here. Can/should this happen? + llvm_unreachable("RTAccelerationStructure printing is not implemented"); + } + } +}; + +struct FormatBindingID + : public llvm::FormatAdapter { + explicit FormatBindingID(const dxil::ResourceInfo &RI) + : llvm::FormatAdapter(RI) {} + + void format(llvm::raw_ostream &OS, StringRef Style) override { + OS << getRCPrefix(Item.getResourceClass()).upper() + << Item.getBinding().RecordID; + } +}; + +struct FormatBindingLocation + : public llvm::FormatAdapter { + explicit FormatBindingLocation(const dxil::ResourceInfo &RI) + : llvm::FormatAdapter(RI) {} + + void format(llvm::raw_ostream &OS, StringRef Style) override { + const auto &Binding = Item.getBinding(); + OS << getRCPrefix(Item.getResourceClass()) << Binding.LowerBound; + if (Binding.Space) + OS << ",space" << Binding.Space; + } +}; + +struct FormatBindingSize + : public llvm::FormatAdapter { + explicit FormatBindingSize(const dxil::ResourceInfo &RI) + : llvm::FormatAdapter(RI) {} + + void format(llvm::raw_ostream &OS, StringRef Style) override { + uint32_t Size = Item.getBinding().Size; + if (Size == std::numeric_limits::max()) + OS << "unbounded"; + else + OS << Size; + } +}; + +} // namespace + +static void prettyPrintResources(raw_ostream &OS, const DXILResourceMap &DRM, const dxil::Resources &MDResources) { - MDResources.print(OS); + // Column widths are arbitrary but match the widths DXC uses. + OS << ";\n; Resource Bindings:\n;\n"; + OS << formatv("; {0,-30} {1,10} {2,7} {3,11} {4,7} {5,14} {6,9}\n", "Name", + "Type", "Format", "Dim", "ID", "HLSL Bind", "Count"); + OS << formatv( + "; {0,-+30} {1,-+10} {2,-+7} {3,-+11} {4,-+7} {5,-+14} {6,-+9}\n", "", "", + "", "", "", "", ""); + + // TODO: Do we want to sort these by binding or something like that? + for (const dxil::ResourceInfo &RI : DRM) { + dxil::ResourceClass RC = RI.getResourceClass(); + assert((RC != dxil::ResourceClass::CBuffer || !MDResources.hasCBuffers()) && + "Old and new cbuffer representations can't coexist"); + assert((RC != dxil::ResourceClass::UAV || !MDResources.hasUAVs()) && + "Old and new UAV representations can't coexist"); + + StringRef Name(RI.getName()); + StringRef Type(getRCName(RC)); + StringRef Format(getFormatName(RI)); + FormatResourceDimension Dim(RI); + FormatBindingID ID(RI); + FormatBindingLocation Bind(RI); + FormatBindingSize Count(RI); + OS << formatv("; {0,-30} {1,10} {2,7} {3,11} {4,7} {5,14} {6,9}\n", Name, + Type, Format, Dim, ID, Bind, Count); + } + + if (MDResources.hasCBuffers()) + MDResources.printCBuffers(OS); + if (MDResources.hasUAVs()) + MDResources.printUAVs(OS); + + OS << ";\n"; } PreservedAnalyses DXILPrettyPrinterPass::run(Module &M, ModuleAnalysisManager &MAM) { + const DXILResourceMap &DRM = MAM.getResult(M); const dxil::Resources &MDResources = MAM.getResult(M); - prettyPrintResources(OS, MDResources); + prettyPrintResources(OS, DRM, MDResources); return PreservedAnalyses::all(); } @@ -49,6 +276,7 @@ class DXILPrettyPrinterLegacy : public llvm::ModulePass { bool runOnModule(Module &M) override; void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesAll(); + AU.addRequired(); AU.addRequired(); } }; @@ -57,13 +285,16 @@ class DXILPrettyPrinterLegacy : public llvm::ModulePass { char DXILPrettyPrinterLegacy::ID = 0; INITIALIZE_PASS_BEGIN(DXILPrettyPrinterLegacy, "dxil-pretty-printer", "DXIL Metadata Pretty Printer", true, true) +INITIALIZE_PASS_DEPENDENCY(DXILResourceWrapperPass) INITIALIZE_PASS_DEPENDENCY(DXILResourceMDWrapper) INITIALIZE_PASS_END(DXILPrettyPrinterLegacy, "dxil-pretty-printer", "DXIL Metadata Pretty Printer", true, true) bool DXILPrettyPrinterLegacy::runOnModule(Module &M) { + const DXILResourceMap &DRM = + getAnalysis().getResourceMap(); dxil::Resources &Res = getAnalysis().getDXILResource(); - Res.print(OS); + prettyPrintResources(OS, DRM, Res); return false; } diff --git a/llvm/lib/Target/DirectX/DXILResource.cpp b/llvm/lib/Target/DirectX/DXILResource.cpp index 8e5b9867e6661..f027283b70521 100644 --- a/llvm/lib/Target/DirectX/DXILResource.cpp +++ b/llvm/lib/Target/DirectX/DXILResource.cpp @@ -333,37 +333,14 @@ template MDNode *ResourceTable::write(Module &M) const { return MDNode::get(M.getContext(), MDs); } -void Resources::write(Module &M) const { - Metadata *ResourceMDs[4] = {nullptr, nullptr, nullptr, nullptr}; - - ResourceMDs[1] = UAVs.write(M); - - ResourceMDs[2] = CBuffers.write(M); - - bool HasResource = ResourceMDs[0] != nullptr || ResourceMDs[1] != nullptr || - ResourceMDs[2] != nullptr || ResourceMDs[3] != nullptr; - - if (HasResource) { - NamedMDNode *DXResMD = M.getOrInsertNamedMetadata("dx.resources"); - DXResMD->addOperand(MDNode::get(M.getContext(), ResourceMDs)); - } - - NamedMDNode *Entry = M.getNamedMetadata("hlsl.uavs"); - if (Entry) - Entry->eraseFromParent(); +Metadata *Resources::writeUAVs(Module &M) const { return UAVs.write(M); } +void Resources::printUAVs(raw_ostream &OS) const { UAVs.print(OS); } +Metadata *Resources::writeCBuffers(Module &M) const { + return CBuffers.write(M); } +void Resources::printCBuffers(raw_ostream &OS) const { CBuffers.print(OS); } -void Resources::print(raw_ostream &O) const { - O << ";\n" - << "; Resource Bindings:\n" - << ";\n" - << "; Name Type Format Dim " - "ID HLSL Bind Count\n" - << "; ------------------------------ ---------- ------- ----------- " - "------- -------------- ------\n"; - - CBuffers.print(O); - UAVs.print(O); +void Resources::dump() const { + printCBuffers(dbgs()); + printUAVs(dbgs()); } - -void Resources::dump() const { print(dbgs()); } diff --git a/llvm/lib/Target/DirectX/DXILResource.h b/llvm/lib/Target/DirectX/DXILResource.h index 06902fe2b87b0..812729bc4dc57 100644 --- a/llvm/lib/Target/DirectX/DXILResource.h +++ b/llvm/lib/Target/DirectX/DXILResource.h @@ -103,6 +103,7 @@ template class ResourceTable { public: ResourceTable(StringRef Name) : MDName(Name) {} void collect(Module &M); + bool empty() const { return Data.empty(); } MDNode *write(Module &M) const; void print(raw_ostream &O) const; }; @@ -117,8 +118,12 @@ class Resources { public: void collect(Module &M); - void write(Module &M) const; - void print(raw_ostream &O) const; + bool hasUAVs() const { return !UAVs.empty(); } + Metadata *writeUAVs(Module &M) const; + void printUAVs(raw_ostream &OS) const; + bool hasCBuffers() const { return !CBuffers.empty(); } + Metadata *writeCBuffers(Module &M) const; + void printCBuffers(raw_ostream &OS) const; LLVM_DUMP_METHOD void dump() const; }; diff --git a/llvm/lib/Target/DirectX/DXILTranslateMetadata.cpp b/llvm/lib/Target/DirectX/DXILTranslateMetadata.cpp index 55b3d94568426..2c6d20112060d 100644 --- a/llvm/lib/Target/DirectX/DXILTranslateMetadata.cpp +++ b/llvm/lib/Target/DirectX/DXILTranslateMetadata.cpp @@ -13,16 +13,59 @@ #include "DXILShaderFlags.h" #include "DirectX.h" #include "llvm/ADT/StringSet.h" +#include "llvm/Analysis/DXILResource.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" +#include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/TargetParser/Triple.h" using namespace llvm; using namespace llvm::dxil; -static void translateMetadata(Module &M, const dxil::Resources &MDResources, +static void emitResourceMetadata(Module &M, const DXILResourceMap &DRM, + const dxil::Resources &MDResources) { + LLVMContext &Context = M.getContext(); + + SmallVector SRVs, UAVs, CBufs, Smps; + for (const ResourceInfo &RI : DRM.srvs()) + SRVs.push_back(RI.getAsMetadata(Context)); + for (const ResourceInfo &RI : DRM.uavs()) + UAVs.push_back(RI.getAsMetadata(Context)); + for (const ResourceInfo &RI : DRM.cbuffers()) + CBufs.push_back(RI.getAsMetadata(Context)); + for (const ResourceInfo &RI : DRM.samplers()) + Smps.push_back(RI.getAsMetadata(Context)); + + Metadata *SRVMD = SRVs.empty() ? nullptr : MDNode::get(Context, SRVs); + Metadata *UAVMD = UAVs.empty() ? nullptr : MDNode::get(Context, UAVs); + Metadata *CBufMD = CBufs.empty() ? nullptr : MDNode::get(Context, CBufs); + Metadata *SmpMD = Smps.empty() ? nullptr : MDNode::get(Context, Smps); + bool HasResources = !DRM.empty(); + + if (MDResources.hasUAVs()) { + assert(!UAVMD && "Old and new UAV representations can't coexist"); + UAVMD = MDResources.writeUAVs(M); + HasResources = true; + } + + if (MDResources.hasCBuffers()) { + assert(!CBufMD && "Old and new cbuffer representations can't coexist"); + CBufMD = MDResources.writeCBuffers(M); + HasResources = true; + } + + if (!HasResources) + return; + + NamedMDNode *ResourceMD = M.getOrInsertNamedMetadata("dx.resources"); + ResourceMD->addOperand( + MDNode::get(M.getContext(), {SRVMD, UAVMD, CBufMD, SmpMD})); +} + +static void translateMetadata(Module &M, const DXILResourceMap &DRM, + const dxil::Resources &MDResources, const ComputedShaderFlags &ShaderFlags) { dxil::ValidatorVersionMD ValVerMD(M); if (ValVerMD.isEmpty()) @@ -30,18 +73,19 @@ static void translateMetadata(Module &M, const dxil::Resources &MDResources, dxil::createShaderModelMD(M); dxil::createDXILVersionMD(M); - MDResources.write(M); + emitResourceMetadata(M, DRM, MDResources); dxil::createEntryMD(M, static_cast(ShaderFlags)); } PreservedAnalyses DXILTranslateMetadata::run(Module &M, ModuleAnalysisManager &MAM) { + const DXILResourceMap &DRM = MAM.getResult(M); const dxil::Resources &MDResources = MAM.getResult(M); const ComputedShaderFlags &ShaderFlags = MAM.getResult(M); - translateMetadata(M, MDResources, ShaderFlags); + translateMetadata(M, DRM, MDResources, ShaderFlags); return PreservedAnalyses::all(); } @@ -56,17 +100,20 @@ class DXILTranslateMetadataLegacy : public ModulePass { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesAll(); + AU.addRequired(); AU.addRequired(); AU.addRequired(); } bool runOnModule(Module &M) override { + const DXILResourceMap &DRM = + getAnalysis().getResourceMap(); const dxil::Resources &MDResources = getAnalysis().getDXILResource(); const ComputedShaderFlags &ShaderFlags = getAnalysis().getShaderFlags(); - translateMetadata(M, MDResources, ShaderFlags); + translateMetadata(M, DRM, MDResources, ShaderFlags); return true; } }; @@ -81,6 +128,7 @@ ModulePass *llvm::createDXILTranslateMetadataLegacyPass() { INITIALIZE_PASS_BEGIN(DXILTranslateMetadataLegacy, "dxil-translate-metadata", "DXIL Translate Metadata", false, false) +INITIALIZE_PASS_DEPENDENCY(DXILResourceWrapperPass) INITIALIZE_PASS_DEPENDENCY(DXILResourceMDWrapper) INITIALIZE_PASS_DEPENDENCY(ShaderFlagsAnalysisWrapper) INITIALIZE_PASS_END(DXILTranslateMetadataLegacy, "dxil-translate-metadata", diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp index 3f6de365fe393..7c77bf2b31b8e 100644 --- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp +++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -857,7 +857,9 @@ static void getLiveOutRegsAt(LivePhysRegs &Regs, const MachineInstr &MI) { void HexagonInstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, - MCRegister SrcReg, bool KillSrc) const { + MCRegister SrcReg, bool KillSrc, + bool RenamableDest, + bool RenamableSrc) const { const HexagonRegisterInfo &HRI = *Subtarget.getRegisterInfo(); unsigned KillFlag = getKillRegState(KillSrc); diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.h b/llvm/lib/Target/Hexagon/HexagonInstrInfo.h index 4efc62fd717c6..854c3694ceba7 100644 --- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.h +++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.h @@ -174,7 +174,8 @@ class HexagonInstrInfo : public HexagonGenInstrInfo { /// large registers. See for example the ARM target. void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, - bool KillSrc) const override; + bool KillSrc, bool RenamableDest = false, + bool RenamableSrc = false) const override; /// Store the specified register of the given register class to the specified /// stack frame index. The store instruction is to be added to the given diff --git a/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp b/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp index 9f3d9b7aaa6f9..06ef5d114f455 100644 --- a/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp +++ b/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp @@ -35,8 +35,8 @@ void LanaiInstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator Position, const DebugLoc &DL, MCRegister DestinationRegister, - MCRegister SourceRegister, - bool KillSource) const { + MCRegister SourceRegister, bool KillSource, + bool RenamableDest, bool RenamableSrc) const { if (!Lanai::GPRRegClass.contains(DestinationRegister, SourceRegister)) { llvm_unreachable("Impossible reg-to-reg copy"); } diff --git a/llvm/lib/Target/Lanai/LanaiInstrInfo.h b/llvm/lib/Target/Lanai/LanaiInstrInfo.h index 8ad2b9237c928..2630464f0a76f 100644 --- a/llvm/lib/Target/Lanai/LanaiInstrInfo.h +++ b/llvm/lib/Target/Lanai/LanaiInstrInfo.h @@ -49,7 +49,9 @@ class LanaiInstrInfo : public LanaiGenInstrInfo { void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator Position, const DebugLoc &DL, MCRegister DestinationRegister, - MCRegister SourceRegister, bool KillSource) const override; + MCRegister SourceRegister, bool KillSource, + bool RenamableDest = false, + bool RenamableSrc = false) const override; void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator Position, diff --git a/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp b/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp index f52e188f87792..c2ae4a0734b6a 100644 --- a/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp +++ b/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp @@ -1314,8 +1314,8 @@ void LoongArchAsmParser::emitFuncCall36(MCInst &Inst, SMLoc IDLoc, // expands to: // pcaddu18i $rj, %call36(sym) // jirl $r0, $rj, 0 - unsigned ScratchReg = - IsTailCall ? Inst.getOperand(0).getReg() : (unsigned)LoongArch::R1; + MCRegister ScratchReg = + IsTailCall ? Inst.getOperand(0).getReg() : MCRegister(LoongArch::R1); const MCExpr *Sym = IsTailCall ? Inst.getOperand(1).getExpr() : Inst.getOperand(0).getExpr(); const LoongArchMCExpr *LE = LoongArchMCExpr::create( @@ -1326,7 +1326,7 @@ void LoongArchAsmParser::emitFuncCall36(MCInst &Inst, SMLoc IDLoc, getSTI()); Out.emitInstruction( MCInstBuilder(LoongArch::JIRL) - .addReg(IsTailCall ? (unsigned)LoongArch::R0 : ScratchReg) + .addReg(IsTailCall ? MCRegister(LoongArch::R0) : ScratchReg) .addReg(ScratchReg) .addImm(0), getSTI()); diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index 50c6c263e966b..95c1b150722f6 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -5012,7 +5012,7 @@ static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, LoongArch::R23, LoongArch::R24, LoongArch::R25, LoongArch::R26, LoongArch::R27, LoongArch::R28, LoongArch::R29, LoongArch::R30, LoongArch::R31}; - if (unsigned Reg = State.AllocateReg(GPRList)) { + if (MCRegister Reg = State.AllocateReg(GPRList)) { State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); return false; } @@ -5023,7 +5023,7 @@ static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, // fs0,fs1,fs2,fs3 static const MCPhysReg FPR32List[] = {LoongArch::F24, LoongArch::F25, LoongArch::F26, LoongArch::F27}; - if (unsigned Reg = State.AllocateReg(FPR32List)) { + if (MCRegister Reg = State.AllocateReg(FPR32List)) { State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); return false; } @@ -5034,7 +5034,7 @@ static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, // fs4,fs5,fs6,fs7 static const MCPhysReg FPR64List[] = {LoongArch::F28_64, LoongArch::F29_64, LoongArch::F30_64, LoongArch::F31_64}; - if (unsigned Reg = State.AllocateReg(FPR64List)) { + if (MCRegister Reg = State.AllocateReg(FPR64List)) { State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); return false; } diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp index 90d94e96b0efd..9059da460f135 100644 --- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp @@ -39,7 +39,9 @@ MCInst LoongArchInstrInfo::getNop() const { void LoongArchInstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, MCRegister DstReg, - MCRegister SrcReg, bool KillSrc) const { + MCRegister SrcReg, bool KillSrc, + bool RenamableDest, + bool RenamableSrc) const { if (LoongArch::GPRRegClass.contains(DstReg, SrcReg)) { BuildMI(MBB, MBBI, DL, get(LoongArch::OR), DstReg) .addReg(SrcReg, getKillRegState(KillSrc)) diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h index d66b2cb8efb33..ef9970783107e 100644 --- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h +++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h @@ -31,7 +31,8 @@ class LoongArchInstrInfo : public LoongArchGenInstrInfo { void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, MCRegister DstReg, MCRegister SrcReg, - bool KillSrc) const override; + bool KillSrc, bool RenamableDest = false, + bool RenamableSrc = false) const override; void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, diff --git a/llvm/lib/Target/M68k/M68kInstrInfo.cpp b/llvm/lib/Target/M68k/M68kInstrInfo.cpp index 338db45782c96..23c5c76a47479 100644 --- a/llvm/lib/Target/M68k/M68kInstrInfo.cpp +++ b/llvm/lib/Target/M68k/M68kInstrInfo.cpp @@ -663,7 +663,8 @@ bool M68kInstrInfo::isPCRelRegisterOperandLegal( void M68kInstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DstReg, - MCRegister SrcReg, bool KillSrc) const { + MCRegister SrcReg, bool KillSrc, + bool RenamableDest, bool RenamableSrc) const { unsigned Opc = 0; // First deal with the normal symmetric copies. diff --git a/llvm/lib/Target/M68k/M68kInstrInfo.h b/llvm/lib/Target/M68k/M68kInstrInfo.h index d1e1e1cd99987..5d81956d89fdf 100644 --- a/llvm/lib/Target/M68k/M68kInstrInfo.h +++ b/llvm/lib/Target/M68k/M68kInstrInfo.h @@ -271,7 +271,8 @@ class M68kInstrInfo : public M68kGenInstrInfo { void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, - bool KillSrc) const override; + bool KillSrc, bool RenamableDest = false, + bool RenamableSrc = false) const override; bool getStackSlotRange(const TargetRegisterClass *RC, unsigned SubIdx, unsigned &Size, unsigned &Offset, diff --git a/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp b/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp index ba7b6c85bd81a..1c7a14464d7bb 100644 --- a/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp +++ b/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp @@ -527,7 +527,7 @@ static void AnalyzeArguments(CCState &State, if (!UsedStack && Parts == 2 && RegsLeft == 1) { // Special case for 32-bit register split, see EABI section 3.3.3 - unsigned Reg = State.AllocateReg(RegList); + MCRegister Reg = State.AllocateReg(RegList); State.addLoc(CCValAssign::getReg(ValNo++, ArgVT, Reg, LocVT, LocInfo)); RegsLeft -= 1; @@ -535,7 +535,7 @@ static void AnalyzeArguments(CCState &State, CC_MSP430_AssignStack(ValNo++, ArgVT, LocVT, LocInfo, ArgFlags, State); } else if (Parts <= RegsLeft) { for (unsigned j = 0; j < Parts; j++) { - unsigned Reg = State.AllocateReg(RegList); + MCRegister Reg = State.AllocateReg(RegList); State.addLoc(CCValAssign::getReg(ValNo++, ArgVT, Reg, LocVT, LocInfo)); RegsLeft--; } diff --git a/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp b/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp index 7405716516643..ae1228ceaa4e3 100644 --- a/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp +++ b/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp @@ -90,7 +90,8 @@ void MSP430InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, void MSP430InstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, - MCRegister SrcReg, bool KillSrc) const { + MCRegister SrcReg, bool KillSrc, + bool RenamableDest, bool RenamableSrc) const { unsigned Opc; if (MSP430::GR16RegClass.contains(DestReg, SrcReg)) Opc = MSP430::MOV16rr; diff --git a/llvm/lib/Target/MSP430/MSP430InstrInfo.h b/llvm/lib/Target/MSP430/MSP430InstrInfo.h index b8d015a21cd15..113a22318bec5 100644 --- a/llvm/lib/Target/MSP430/MSP430InstrInfo.h +++ b/llvm/lib/Target/MSP430/MSP430InstrInfo.h @@ -37,7 +37,8 @@ class MSP430InstrInfo : public MSP430GenInstrInfo { void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, - bool KillSrc) const override; + bool KillSrc, bool RenamableDest = false, + bool RenamableSrc = false) const override; void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg, diff --git a/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp index 076e0a20cb97e..c50c2063ee8ed 100644 --- a/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp +++ b/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp @@ -5782,9 +5782,9 @@ bool MipsAsmParser::expandMXTRAlias(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out, sel = 3; break; } - unsigned Op0 = IsMFTR ? Inst.getOperand(0).getReg() : rd; - unsigned Op1 = - IsMFTR ? rd + MCRegister Op0 = IsMFTR ? Inst.getOperand(0).getReg() : MCRegister(rd); + MCRegister Op1 = + IsMFTR ? MCRegister(rd) : (Inst.getOpcode() != Mips::MTTDSP ? Inst.getOperand(1).getReg() : Inst.getOperand(0).getReg()); diff --git a/llvm/lib/Target/Mips/Mips16InstrInfo.cpp b/llvm/lib/Target/Mips/Mips16InstrInfo.cpp index 30ac96936de28..1bc1ed7ab93e3 100644 --- a/llvm/lib/Target/Mips/Mips16InstrInfo.cpp +++ b/llvm/lib/Target/Mips/Mips16InstrInfo.cpp @@ -69,7 +69,8 @@ Register Mips16InstrInfo::isStoreToStackSlot(const MachineInstr &MI, void Mips16InstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, - MCRegister SrcReg, bool KillSrc) const { + MCRegister SrcReg, bool KillSrc, + bool RenamableDest, bool RenamableSrc) const { unsigned Opc = 0; if (Mips::CPU16RegsRegClass.contains(DestReg) && diff --git a/llvm/lib/Target/Mips/Mips16InstrInfo.h b/llvm/lib/Target/Mips/Mips16InstrInfo.h index e8567ee3b9ce5..8e73c8079b0f8 100644 --- a/llvm/lib/Target/Mips/Mips16InstrInfo.h +++ b/llvm/lib/Target/Mips/Mips16InstrInfo.h @@ -50,7 +50,8 @@ class Mips16InstrInfo : public MipsInstrInfo { void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, - bool KillSrc) const override; + bool KillSrc, bool RenamableDest = false, + bool RenamableSrc = false) const override; void storeRegToStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, diff --git a/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp b/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp index b2ba0f8fe74dc..311b73710fb7a 100644 --- a/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp +++ b/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp @@ -1631,28 +1631,26 @@ MipsConstantIslands::fixupConditionalBr(ImmBranch &Br) { void MipsConstantIslands::prescanForConstants() { for (MachineBasicBlock &B : *MF) { - for (MachineBasicBlock::instr_iterator I = B.instr_begin(), - EB = B.instr_end(); - I != EB; ++I) { - switch(I->getDesc().getOpcode()) { + for (MachineInstr &MI : B) { + switch (MI.getDesc().getOpcode()) { case Mips::LwConstant32: { PrescannedForConstants = true; - LLVM_DEBUG(dbgs() << "constant island constant " << *I << "\n"); - LLVM_DEBUG(dbgs() << "num operands " << I->getNumOperands() << "\n"); - MachineOperand &Literal = I->getOperand(1); + LLVM_DEBUG(dbgs() << "constant island constant " << MI << "\n"); + LLVM_DEBUG(dbgs() << "num operands " << MI.getNumOperands() << "\n"); + MachineOperand &Literal = MI.getOperand(1); if (Literal.isImm()) { int64_t V = Literal.getImm(); LLVM_DEBUG(dbgs() << "literal " << V << "\n"); Type *Int32Ty = Type::getInt32Ty(MF->getFunction().getContext()); const Constant *C = ConstantInt::get(Int32Ty, V); unsigned index = MCP->getConstantPoolIndex(C, Align(4)); - I->getOperand(2).ChangeToImmediate(index); - LLVM_DEBUG(dbgs() << "constant island constant " << *I << "\n"); - I->setDesc(TII->get(Mips::LwRxPcTcp16)); - I->removeOperand(1); - I->removeOperand(1); - I->addOperand(MachineOperand::CreateCPI(index, 0)); - I->addOperand(MachineOperand::CreateImm(4)); + MI.getOperand(2).ChangeToImmediate(index); + LLVM_DEBUG(dbgs() << "constant island constant " << MI << "\n"); + MI.setDesc(TII->get(Mips::LwRxPcTcp16)); + MI.removeOperand(1); + MI.removeOperand(1); + MI.addOperand(MachineOperand::CreateCPI(index, 0)); + MI.addOperand(MachineOperand::CreateImm(4)); } break; } diff --git a/llvm/lib/Target/Mips/MipsISelLowering.cpp b/llvm/lib/Target/Mips/MipsISelLowering.cpp index 0f2047fcac640..fa57a3fa9b155 100644 --- a/llvm/lib/Target/Mips/MipsISelLowering.cpp +++ b/llvm/lib/Target/Mips/MipsISelLowering.cpp @@ -370,6 +370,9 @@ MipsTargetLowering::MipsTargetLowering(const MipsTargetMachine &TM, setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal); setOperationAction(ISD::FMINNUM, MVT::f64, Expand); setOperationAction(ISD::FMAXNUM, MVT::f64, Expand); + } else { + setOperationAction(ISD::FCANONICALIZE, MVT::f32, Custom); + setOperationAction(ISD::FCANONICALIZE, MVT::f64, Custom); } if (Subtarget.isGP64bit()) { @@ -1251,6 +1254,8 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const case ISD::VAARG: return lowerVAARG(Op, DAG); case ISD::FCOPYSIGN: return lowerFCOPYSIGN(Op, DAG); case ISD::FABS: return lowerFABS(Op, DAG); + case ISD::FCANONICALIZE: + return lowerFCANONICALIZE(Op, DAG); case ISD::FRAMEADDR: return lowerFRAMEADDR(Op, DAG); case ISD::RETURNADDR: return lowerRETURNADDR(Op, DAG); case ISD::EH_RETURN: return lowerEH_RETURN(Op, DAG); @@ -2520,6 +2525,20 @@ SDValue MipsTargetLowering::lowerFABS(SDValue Op, SelectionDAG &DAG) const { return lowerFABS32(Op, DAG, Subtarget.hasExtractInsert()); } +SDValue MipsTargetLowering::lowerFCANONICALIZE(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + EVT VT = Op.getValueType(); + SDValue Operand = Op.getOperand(0); + SDNodeFlags Flags = Op->getFlags(); + + if (Flags.hasNoNaNs() || DAG.isKnownNeverNaN(Operand)) + return Operand; + + SDValue Quiet = DAG.getNode(ISD::FADD, DL, VT, Operand, Operand); + return DAG.getSelectCC(DL, Operand, Operand, Quiet, Operand, ISD::SETUO); +} + SDValue MipsTargetLowering:: lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { // check the depth @@ -2991,7 +3010,7 @@ static bool CC_MipsO32(unsigned ValNo, MVT ValVT, MVT LocVT, } else { Reg = State.AllocateReg(F64Regs); // Shadow int registers - unsigned Reg2 = State.AllocateReg(IntRegs); + MCRegister Reg2 = State.AllocateReg(IntRegs); if (Reg2 == Mips::A1 || Reg2 == Mips::A3) State.AllocateReg(IntRegs); State.AllocateReg(IntRegs); diff --git a/llvm/lib/Target/Mips/MipsISelLowering.h b/llvm/lib/Target/Mips/MipsISelLowering.h index 84ad40d6bbbe2..2b18b29918092 100644 --- a/llvm/lib/Target/Mips/MipsISelLowering.h +++ b/llvm/lib/Target/Mips/MipsISelLowering.h @@ -550,6 +550,7 @@ class TargetRegisterClass; bool HasExtractInsert) const; SDValue lowerFABS64(SDValue Op, SelectionDAG &DAG, bool HasExtractInsert) const; + SDValue lowerFCANONICALIZE(SDValue Op, SelectionDAG &DAG) const; SDValue lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; SDValue lowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; SDValue lowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp b/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp index b99ddfab2a47d..87e9ef1c26420 100644 --- a/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp +++ b/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp @@ -83,7 +83,8 @@ Register MipsSEInstrInfo::isStoreToStackSlot(const MachineInstr &MI, void MipsSEInstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, - MCRegister SrcReg, bool KillSrc) const { + MCRegister SrcReg, bool KillSrc, + bool RenamableDest, bool RenamableSrc) const { unsigned Opc = 0, ZeroReg = 0; bool isMicroMips = Subtarget.inMicroMipsMode(); diff --git a/llvm/lib/Target/Mips/MipsSEInstrInfo.h b/llvm/lib/Target/Mips/MipsSEInstrInfo.h index a8855e26ad10f..36bddba10410c 100644 --- a/llvm/lib/Target/Mips/MipsSEInstrInfo.h +++ b/llvm/lib/Target/Mips/MipsSEInstrInfo.h @@ -44,7 +44,8 @@ class MipsSEInstrInfo : public MipsInstrInfo { void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, - bool KillSrc) const override; + bool KillSrc, bool RenamableDest = false, + bool RenamableSrc = false) const override; void storeRegToStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp index 673858f92e7ce..bec40874c8948 100644 --- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp @@ -32,7 +32,8 @@ NVPTXInstrInfo::NVPTXInstrInfo() : RegInfo() {} void NVPTXInstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, - MCRegister SrcReg, bool KillSrc) const { + MCRegister SrcReg, bool KillSrc, + bool RenamableDest, bool RenamableSrc) const { const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); const TargetRegisterClass *DestRC = MRI.getRegClass(DestReg); const TargetRegisterClass *SrcRC = MRI.getRegClass(SrcReg); diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.h b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.h index d6cbeae6984c9..f674a00bc351b 100644 --- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.h +++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.h @@ -53,7 +53,8 @@ class NVPTXInstrInfo : public NVPTXGenInstrInfo { void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, - bool KillSrc) const override; + bool KillSrc, bool RenamableDest = false, + bool RenamableSrc = false) const override; // Branch analysis. bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, diff --git a/llvm/lib/Target/PowerPC/CMakeLists.txt b/llvm/lib/Target/PowerPC/CMakeLists.txt index d866ef6b88a1d..cd4c76013d204 100644 --- a/llvm/lib/Target/PowerPC/CMakeLists.txt +++ b/llvm/lib/Target/PowerPC/CMakeLists.txt @@ -54,7 +54,6 @@ add_llvm_target(PowerPCCodeGen PPCReduceCRLogicals.cpp PPCVSXFMAMutate.cpp PPCVSXSwapRemoval.cpp - PPCExpandISEL.cpp PPCPreEmitPeephole.cpp PPCLowerMASSVEntries.cpp PPCGenScalarMASSEntries.cpp diff --git a/llvm/lib/Target/PowerPC/PPCCallingConv.cpp b/llvm/lib/Target/PowerPC/PPCCallingConv.cpp index 188fc96bc7c2a..d5077ab279651 100644 --- a/llvm/lib/Target/PowerPC/PPCCallingConv.cpp +++ b/llvm/lib/Target/PowerPC/PPCCallingConv.cpp @@ -151,7 +151,7 @@ static bool CC_PPC32_SPE_CustomSplitFP64(unsigned &ValNo, MVT &ValVT, static const MCPhysReg LoRegList[] = { PPC::R4, PPC::R6, PPC::R8, PPC::R10 }; // Try to get the first register. - unsigned Reg = State.AllocateReg(HiRegList); + MCRegister Reg = State.AllocateReg(HiRegList); if (!Reg) return false; @@ -160,7 +160,7 @@ static bool CC_PPC32_SPE_CustomSplitFP64(unsigned &ValNo, MVT &ValVT, if (HiRegList[i] == Reg) break; - unsigned T = State.AllocateReg(LoRegList[i]); + MCRegister T = State.AllocateReg(LoRegList[i]); (void)T; assert(T == LoRegList[i] && "Could not allocate register"); @@ -180,7 +180,7 @@ static bool CC_PPC32_SPE_RetF64(unsigned &ValNo, MVT &ValVT, static const MCPhysReg LoRegList[] = { PPC::R4 }; // Try to get the first register. - unsigned Reg = State.AllocateReg(HiRegList, LoRegList); + MCRegister Reg = State.AllocateReg(HiRegList, LoRegList); if (!Reg) return false; diff --git a/llvm/lib/Target/PowerPC/PPCExpandISEL.cpp b/llvm/lib/Target/PowerPC/PPCExpandISEL.cpp deleted file mode 100644 index 4c74e82cf0412..0000000000000 --- a/llvm/lib/Target/PowerPC/PPCExpandISEL.cpp +++ /dev/null @@ -1,491 +0,0 @@ -//===------------- PPCExpandISEL.cpp - Expand ISEL instruction ------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// A pass that expands the ISEL instruction into an if-then-else sequence. -// This pass must be run post-RA since all operands must be physical registers. -// -//===----------------------------------------------------------------------===// - -#include "PPC.h" -#include "PPCInstrInfo.h" -#include "PPCSubtarget.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/CodeGen/LivePhysRegs.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" - -using namespace llvm; - -#define DEBUG_TYPE "ppc-expand-isel" - -STATISTIC(NumExpanded, "Number of ISEL instructions expanded"); -STATISTIC(NumRemoved, "Number of ISEL instructions removed"); -STATISTIC(NumFolded, "Number of ISEL instructions folded"); - -// If -ppc-gen-isel=false is set, we will disable generating the ISEL -// instruction on all PPC targets. Otherwise, if the user set option -// -misel or the platform supports ISEL by default, still generate the -// ISEL instruction, else expand it. -static cl::opt - GenerateISEL("ppc-gen-isel", - cl::desc("Enable generating the ISEL instruction."), - cl::init(true), cl::Hidden); - -namespace { -class PPCExpandISEL : public MachineFunctionPass { - DebugLoc dl; - MachineFunction *MF; - const TargetInstrInfo *TII; - bool IsTrueBlockRequired; - bool IsFalseBlockRequired; - MachineBasicBlock *TrueBlock; - MachineBasicBlock *FalseBlock; - MachineBasicBlock *NewSuccessor; - MachineBasicBlock::iterator TrueBlockI; - MachineBasicBlock::iterator FalseBlockI; - - typedef SmallVector BlockISELList; - typedef SmallDenseMap ISELInstructionList; - - // A map of MBB numbers to their lists of contained ISEL instructions. - // Please note when we traverse this list and expand ISEL, we only remove - // the ISEL from the MBB not from this list. - ISELInstructionList ISELInstructions; - - /// Initialize the object. - void initialize(MachineFunction &MFParam); - - void handleSpecialCases(BlockISELList &BIL, MachineBasicBlock *MBB); - void reorganizeBlockLayout(BlockISELList &BIL, MachineBasicBlock *MBB); - void populateBlocks(BlockISELList &BIL); - void expandMergeableISELs(BlockISELList &BIL); - void expandAndMergeISELs(); - - bool canMerge(MachineInstr *PrevPushedMI, MachineInstr *MI); - - /// Is this instruction an ISEL or ISEL8? - static bool isISEL(const MachineInstr &MI) { - return (MI.getOpcode() == PPC::ISEL || MI.getOpcode() == PPC::ISEL8); - } - - /// Is this instruction an ISEL8? - static bool isISEL8(const MachineInstr &MI) { - return (MI.getOpcode() == PPC::ISEL8); - } - - /// Are the two operands using the same register? - bool useSameRegister(const MachineOperand &Op1, const MachineOperand &Op2) { - return (Op1.getReg() == Op2.getReg()); - } - - /// - /// Collect all ISEL instructions from the current function. - /// - /// Walk the current function and collect all the ISEL instructions that are - /// found. The instructions are placed in the ISELInstructions vector. - /// - /// \return true if any ISEL instructions were found, false otherwise - /// - bool collectISELInstructions(); - -public: - static char ID; - PPCExpandISEL() : MachineFunctionPass(ID) { - initializePPCExpandISELPass(*PassRegistry::getPassRegistry()); - } - - /// - /// Determine whether to generate the ISEL instruction or expand it. - /// - /// Expand ISEL instruction into if-then-else sequence when one of - /// the following two conditions hold: - /// (1) -ppc-gen-isel=false - /// (2) hasISEL() return false - /// Otherwise, still generate ISEL instruction. - /// The -ppc-gen-isel option is set to true by default. Which means the ISEL - /// instruction is still generated by default on targets that support them. - /// - /// \return true if ISEL should be expanded into if-then-else code sequence; - /// false if ISEL instruction should be generated, i.e. not expanded. - /// - static bool isExpandISELEnabled(const MachineFunction &MF); - -#ifndef NDEBUG - void DumpISELInstructions() const; -#endif - - bool runOnMachineFunction(MachineFunction &MF) override { - LLVM_DEBUG(dbgs() << "Function: "; MF.dump(); dbgs() << "\n"); - initialize(MF); - - if (!collectISELInstructions()) { - LLVM_DEBUG(dbgs() << "No ISEL instructions in this function\n"); - return false; - } - -#ifndef NDEBUG - DumpISELInstructions(); -#endif - - expandAndMergeISELs(); - - return true; - } -}; -} // end anonymous namespace - -void PPCExpandISEL::initialize(MachineFunction &MFParam) { - MF = &MFParam; - TII = MF->getSubtarget().getInstrInfo(); - ISELInstructions.clear(); -} - -bool PPCExpandISEL::isExpandISELEnabled(const MachineFunction &MF) { - return !GenerateISEL || !MF.getSubtarget().hasISEL(); -} - -bool PPCExpandISEL::collectISELInstructions() { - for (MachineBasicBlock &MBB : *MF) { - BlockISELList thisBlockISELs; - for (MachineInstr &MI : MBB) - if (isISEL(MI)) - thisBlockISELs.push_back(&MI); - if (!thisBlockISELs.empty()) - ISELInstructions.insert(std::make_pair(MBB.getNumber(), thisBlockISELs)); - } - return !ISELInstructions.empty(); -} - -#ifndef NDEBUG -void PPCExpandISEL::DumpISELInstructions() const { - for (const auto &I : ISELInstructions) { - LLVM_DEBUG(dbgs() << printMBBReference(*MF->getBlockNumbered(I.first)) - << ":\n"); - for (const auto &VI : I.second) - LLVM_DEBUG(dbgs() << " "; VI->print(dbgs())); - } -} -#endif - -/// Contiguous ISELs that have the same condition can be merged. -bool PPCExpandISEL::canMerge(MachineInstr *PrevPushedMI, MachineInstr *MI) { - // Same Condition Register? - if (!useSameRegister(PrevPushedMI->getOperand(3), MI->getOperand(3))) - return false; - - MachineBasicBlock::iterator PrevPushedMBBI = *PrevPushedMI; - MachineBasicBlock::iterator MBBI = *MI; - return (std::prev(MBBI) == PrevPushedMBBI); // Contiguous ISELs? -} - -void PPCExpandISEL::expandAndMergeISELs() { - bool ExpandISELEnabled = isExpandISELEnabled(*MF); - - for (auto &BlockList : ISELInstructions) { - LLVM_DEBUG( - dbgs() << "Expanding ISEL instructions in " - << printMBBReference(*MF->getBlockNumbered(BlockList.first)) - << "\n"); - BlockISELList &CurrentISELList = BlockList.second; - auto I = CurrentISELList.begin(); - auto E = CurrentISELList.end(); - - while (I != E) { - assert(isISEL(**I) && "Expecting an ISEL instruction"); - MachineOperand &Dest = (*I)->getOperand(0); - MachineOperand &TrueValue = (*I)->getOperand(1); - MachineOperand &FalseValue = (*I)->getOperand(2); - - // Special case 1, all registers used by ISEL are the same one. - // The non-redundant isel 0, 0, 0, N would not satisfy these conditions - // as it would be ISEL %R0, %ZERO, %R0, %CRN. - if (useSameRegister(Dest, TrueValue) && - useSameRegister(Dest, FalseValue)) { - LLVM_DEBUG(dbgs() << "Remove redundant ISEL instruction: " << **I - << "\n"); - // FIXME: if the CR field used has no other uses, we could eliminate the - // instruction that defines it. This would have to be done manually - // since this pass runs too late to run DCE after it. - NumRemoved++; - (*I)->eraseFromParent(); - I++; - } else if (useSameRegister(TrueValue, FalseValue)) { - // Special case 2, the two input registers used by ISEL are the same. - // Note: the non-foldable isel RX, 0, 0, N would not satisfy this - // condition as it would be ISEL %RX, %ZERO, %R0, %CRN, which makes it - // safe to fold ISEL to MR(OR) instead of ADDI. - MachineBasicBlock *MBB = (*I)->getParent(); - LLVM_DEBUG( - dbgs() << "Fold the ISEL instruction to an unconditional copy:\n"); - LLVM_DEBUG(dbgs() << "ISEL: " << **I << "\n"); - NumFolded++; - // Note: we're using both the TrueValue and FalseValue operands so as - // not to lose the kill flag if it is set on either of them. - BuildMI(*MBB, (*I), dl, TII->get(isISEL8(**I) ? PPC::OR8 : PPC::OR)) - .add(Dest) - .add(TrueValue) - .add(FalseValue); - (*I)->eraseFromParent(); - I++; - } else if (ExpandISELEnabled) { // Normal cases expansion enabled - LLVM_DEBUG(dbgs() << "Expand ISEL instructions:\n"); - LLVM_DEBUG(dbgs() << "ISEL: " << **I << "\n"); - BlockISELList SubISELList; - SubISELList.push_back(*I++); - // Collect the ISELs that can be merged together. - // This will eat up ISEL instructions without considering whether they - // may be redundant or foldable to a register copy. So we still keep - // the handleSpecialCases() downstream to handle them. - while (I != E && canMerge(SubISELList.back(), *I)) { - LLVM_DEBUG(dbgs() << "ISEL: " << **I << "\n"); - SubISELList.push_back(*I++); - } - - expandMergeableISELs(SubISELList); - } else { // Normal cases expansion disabled - I++; // leave the ISEL as it is - } - } // end while - } // end for -} - -void PPCExpandISEL::handleSpecialCases(BlockISELList &BIL, - MachineBasicBlock *MBB) { - IsTrueBlockRequired = false; - IsFalseBlockRequired = false; - - auto MI = BIL.begin(); - while (MI != BIL.end()) { - assert(isISEL(**MI) && "Expecting an ISEL instruction"); - LLVM_DEBUG(dbgs() << "ISEL: " << **MI << "\n"); - - MachineOperand &Dest = (*MI)->getOperand(0); - MachineOperand &TrueValue = (*MI)->getOperand(1); - MachineOperand &FalseValue = (*MI)->getOperand(2); - - // If at least one of the ISEL instructions satisfy the following - // condition, we need the True Block: - // The Dest Register and True Value Register are not the same - // Similarly, if at least one of the ISEL instructions satisfy the - // following condition, we need the False Block: - // The Dest Register and False Value Register are not the same. - bool IsADDIInstRequired = !useSameRegister(Dest, TrueValue); - bool IsORIInstRequired = !useSameRegister(Dest, FalseValue); - - // Special case 1, all registers used by ISEL are the same one. - if (!IsADDIInstRequired && !IsORIInstRequired) { - LLVM_DEBUG(dbgs() << "Remove redundant ISEL instruction."); - // FIXME: if the CR field used has no other uses, we could eliminate the - // instruction that defines it. This would have to be done manually - // since this pass runs too late to run DCE after it. - NumRemoved++; - (*MI)->eraseFromParent(); - // Setting MI to the erase result keeps the iterator valid and increased. - MI = BIL.erase(MI); - continue; - } - - // Special case 2, the two input registers used by ISEL are the same. - // Note 1: We favor merging ISEL expansions over folding a single one. If - // the passed list has multiple merge-able ISEL's, we won't fold any. - // Note 2: There is no need to test for PPC::R0/PPC::X0 because PPC::ZERO/ - // PPC::ZERO8 will be used for the first operand if the value is meant to - // be zero. In this case, the useSameRegister method will return false, - // thereby preventing this ISEL from being folded. - if (useSameRegister(TrueValue, FalseValue) && (BIL.size() == 1)) { - LLVM_DEBUG( - dbgs() << "Fold the ISEL instruction to an unconditional copy."); - NumFolded++; - // Note: we're using both the TrueValue and FalseValue operands so as - // not to lose the kill flag if it is set on either of them. - BuildMI(*MBB, (*MI), dl, TII->get(isISEL8(**MI) ? PPC::OR8 : PPC::OR)) - .add(Dest) - .add(TrueValue) - .add(FalseValue); - (*MI)->eraseFromParent(); - // Setting MI to the erase result keeps the iterator valid and increased. - MI = BIL.erase(MI); - continue; - } - - IsTrueBlockRequired |= IsADDIInstRequired; - IsFalseBlockRequired |= IsORIInstRequired; - MI++; - } -} - -void PPCExpandISEL::reorganizeBlockLayout(BlockISELList &BIL, - MachineBasicBlock *MBB) { - if (BIL.empty()) - return; - - assert((IsTrueBlockRequired || IsFalseBlockRequired) && - "Should have been handled by special cases earlier!"); - - MachineBasicBlock *Successor = nullptr; - const BasicBlock *LLVM_BB = MBB->getBasicBlock(); - MachineBasicBlock::iterator MBBI = (*BIL.back()); - NewSuccessor = (MBBI != MBB->getLastNonDebugInstr() || !MBB->canFallThrough()) - // Another BB is needed to move the instructions that - // follow this ISEL. If the ISEL is the last instruction - // in a block that can't fall through, we also need a block - // to branch to. - ? MF->CreateMachineBasicBlock(LLVM_BB) - : nullptr; - - MachineFunction::iterator It = MBB->getIterator(); - ++It; // Point to the successor block of MBB. - - // If NewSuccessor is NULL then the last ISEL in this group is the last - // non-debug instruction in this block. Find the fall-through successor - // of this block to use when updating the CFG below. - if (!NewSuccessor) { - for (auto &Succ : MBB->successors()) { - if (MBB->isLayoutSuccessor(Succ)) { - Successor = Succ; - break; - } - } - } else - Successor = NewSuccessor; - - // The FalseBlock and TrueBlock are inserted after the MBB block but before - // its successor. - // Note this need to be done *after* the above setting the Successor code. - if (IsFalseBlockRequired) { - FalseBlock = MF->CreateMachineBasicBlock(LLVM_BB); - MF->insert(It, FalseBlock); - } - - if (IsTrueBlockRequired) { - TrueBlock = MF->CreateMachineBasicBlock(LLVM_BB); - MF->insert(It, TrueBlock); - } - - if (NewSuccessor) { - MF->insert(It, NewSuccessor); - - // Transfer the rest of this block into the new successor block. - NewSuccessor->splice(NewSuccessor->end(), MBB, - std::next(MachineBasicBlock::iterator(BIL.back())), - MBB->end()); - NewSuccessor->transferSuccessorsAndUpdatePHIs(MBB); - - // Update the liveins for NewSuccessor. - LivePhysRegs LPR; - computeAndAddLiveIns(LPR, *NewSuccessor); - - } else { - // Remove successor from MBB. - MBB->removeSuccessor(Successor); - } - - // Note that this needs to be done *after* transfering the successors from MBB - // to the NewSuccessor block, otherwise these blocks will also be transferred - // as successors! - MBB->addSuccessor(IsTrueBlockRequired ? TrueBlock : Successor); - MBB->addSuccessor(IsFalseBlockRequired ? FalseBlock : Successor); - - if (IsTrueBlockRequired) { - TrueBlockI = TrueBlock->begin(); - TrueBlock->addSuccessor(Successor); - } - - if (IsFalseBlockRequired) { - FalseBlockI = FalseBlock->begin(); - FalseBlock->addSuccessor(Successor); - } - - // Conditional branch to the TrueBlock or Successor - BuildMI(*MBB, BIL.back(), dl, TII->get(PPC::BC)) - .add(BIL.back()->getOperand(3)) - .addMBB(IsTrueBlockRequired ? TrueBlock : Successor); - - // Jump over the true block to the new successor if the condition is false. - BuildMI(*(IsFalseBlockRequired ? FalseBlock : MBB), - (IsFalseBlockRequired ? FalseBlockI : BIL.back()), dl, - TII->get(PPC::B)) - .addMBB(Successor); - - if (IsFalseBlockRequired) - FalseBlockI = FalseBlock->begin(); // get the position of PPC::B -} - -void PPCExpandISEL::populateBlocks(BlockISELList &BIL) { - for (auto &MI : BIL) { - assert(isISEL(*MI) && "Expecting an ISEL instruction"); - - MachineOperand &Dest = MI->getOperand(0); // location to store to - MachineOperand &TrueValue = MI->getOperand(1); // Value to store if - // condition is true - MachineOperand &FalseValue = MI->getOperand(2); // Value to store if - // condition is false - - LLVM_DEBUG(dbgs() << "Dest: " << Dest << "\n"); - LLVM_DEBUG(dbgs() << "TrueValue: " << TrueValue << "\n"); - LLVM_DEBUG(dbgs() << "FalseValue: " << FalseValue << "\n"); - LLVM_DEBUG(dbgs() << "ConditionRegister: " << MI->getOperand(3) << "\n"); - - // If the Dest Register and True Value Register are not the same one, we - // need the True Block. - bool IsADDIInstRequired = !useSameRegister(Dest, TrueValue); - bool IsORIInstRequired = !useSameRegister(Dest, FalseValue); - - // Copy the result into the destination if the condition is true. - if (IsADDIInstRequired) - BuildMI(*TrueBlock, TrueBlockI, dl, - TII->get(isISEL8(*MI) ? PPC::ADDI8 : PPC::ADDI)) - .add(Dest) - .add(TrueValue) - .add(MachineOperand::CreateImm(0)); - - // Copy the result into the destination if the condition is false. - if (IsORIInstRequired) - BuildMI(*FalseBlock, FalseBlockI, dl, - TII->get(isISEL8(*MI) ? PPC::ORI8 : PPC::ORI)) - .add(Dest) - .add(FalseValue) - .add(MachineOperand::CreateImm(0)); - - MI->eraseFromParent(); // Remove the ISEL instruction. - - NumExpanded++; - } - - if (IsTrueBlockRequired) { - // Update the liveins for TrueBlock. - LivePhysRegs LPR; - computeAndAddLiveIns(LPR, *TrueBlock); - } - - if (IsFalseBlockRequired) { - // Update the liveins for FalseBlock. - LivePhysRegs LPR; - computeAndAddLiveIns(LPR, *FalseBlock); - } -} - -void PPCExpandISEL::expandMergeableISELs(BlockISELList &BIL) { - // At this stage all the ISELs of BIL are in the same MBB. - MachineBasicBlock *MBB = BIL.back()->getParent(); - - handleSpecialCases(BIL, MBB); - reorganizeBlockLayout(BIL, MBB); - populateBlocks(BIL); -} - -INITIALIZE_PASS(PPCExpandISEL, DEBUG_TYPE, "PowerPC Expand ISEL Generation", - false, false) -char PPCExpandISEL::ID = 0; - -FunctionPass *llvm::createPPCExpandISELPass() { return new PPCExpandISEL(); } diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 459a96eca1ff2..efabfa0b511a6 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -6904,7 +6904,7 @@ static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT, while (NextReg != GPRs.size() && !isGPRShadowAligned(GPRs[NextReg], ObjAlign)) { // Shadow allocate next registers since its aligment is not strict enough. - unsigned Reg = State.AllocateReg(GPRs); + MCRegister Reg = State.AllocateReg(GPRs); // Allocate the stack space shadowed by said register. State.AllocateStack(PtrSize, PtrAlign); assert(Reg && "Alocating register unexpectedly failed."); @@ -6915,7 +6915,7 @@ static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT, const unsigned StackSize = alignTo(ByValSize, ObjAlign); unsigned Offset = State.AllocateStack(StackSize, ObjAlign); for (const unsigned E = Offset + StackSize; Offset < E; Offset += PtrSize) { - if (unsigned Reg = State.AllocateReg(GPRs)) + if (MCRegister Reg = State.AllocateReg(GPRs)) State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo)); else { State.addLoc(CCValAssign::getMem(ValNo, MVT::INVALID_SIMPLE_VALUE_TYPE, @@ -6942,7 +6942,7 @@ static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT, if (ValVT.getFixedSizeInBits() < RegVT.getFixedSizeInBits()) LocInfo = ArgFlags.isSExt() ? CCValAssign::LocInfo::SExt : CCValAssign::LocInfo::ZExt; - if (unsigned Reg = State.AllocateReg(GPRs)) + if (MCRegister Reg = State.AllocateReg(GPRs)) State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo)); else State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, RegVT, LocInfo)); @@ -6957,13 +6957,13 @@ static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT, // This includes f64 in 64-bit mode for ABI compatibility. const unsigned Offset = State.AllocateStack(IsPPC64 ? 8 : StoreSize, Align(4)); - unsigned FReg = State.AllocateReg(FPR); + MCRegister FReg = State.AllocateReg(FPR); if (FReg) State.addLoc(CCValAssign::getReg(ValNo, ValVT, FReg, LocVT, LocInfo)); // Reserve and initialize GPRs or initialize the PSA as required. for (unsigned I = 0; I < StoreSize; I += PtrSize) { - if (unsigned Reg = State.AllocateReg(GPRs)) { + if (MCRegister Reg = State.AllocateReg(GPRs)) { assert(FReg && "An FPR should be available when a GPR is reserved."); if (State.isVarArg()) { // Successfully reserved GPRs are only initialized for vararg calls. @@ -7003,7 +7003,7 @@ static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT, if (!State.isVarArg()) { // If there are vector registers remaining we don't consume any stack // space. - if (unsigned VReg = State.AllocateReg(VR)) { + if (MCRegister VReg = State.AllocateReg(VR)) { State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo)); return false; } @@ -7021,7 +7021,7 @@ static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT, while (NextRegIndex != GPRs.size() && !isGPRShadowAligned(GPRs[NextRegIndex], VecAlign)) { // Shadow allocate register and its stack shadow. - unsigned Reg = State.AllocateReg(GPRs); + MCRegister Reg = State.AllocateReg(GPRs); State.AllocateStack(PtrSize, PtrAlign); assert(Reg && "Allocating register unexpectedly failed."); (void)Reg; @@ -7033,7 +7033,7 @@ static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT, // through ellipses) and shadow GPRs (unlike arguments to non-vaarg // functions) if (State.isFixed(ValNo)) { - if (unsigned VReg = State.AllocateReg(VR)) { + if (MCRegister VReg = State.AllocateReg(VR)) { State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo)); // Shadow allocate GPRs and stack space even though we pass in a VR. for (unsigned I = 0; I != VecSize; I += PtrSize) @@ -7062,8 +7062,8 @@ static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT, State.addLoc( CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo)); - const unsigned FirstReg = State.AllocateReg(PPC::R9); - const unsigned SecondReg = State.AllocateReg(PPC::R10); + const MCRegister FirstReg = State.AllocateReg(PPC::R9); + const MCRegister SecondReg = State.AllocateReg(PPC::R10); assert(FirstReg && SecondReg && "Allocating R9 or R10 unexpectedly failed."); State.addLoc( @@ -7080,7 +7080,7 @@ static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT, State.addLoc( CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo)); for (unsigned I = 0; I != VecSize; I += PtrSize) { - const unsigned Reg = State.AllocateReg(GPRs); + const MCRegister Reg = State.AllocateReg(GPRs); assert(Reg && "Failed to allocated register for vararg vector argument"); State.addLoc( CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo)); diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp index 81f16eb1a905b..48833e8f88066 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -1678,7 +1678,8 @@ static unsigned getCRBitValue(unsigned CRBit) { void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, - MCRegister SrcReg, bool KillSrc) const { + MCRegister SrcReg, bool KillSrc, + bool RenamableDest, bool RenamableSrc) const { // We can end up with self copies and similar things as a result of VSX copy // legalization. Promote them here. const TargetRegisterInfo *TRI = &getRegisterInfo(); diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/llvm/lib/Target/PowerPC/PPCInstrInfo.h index 1e2687f92c61e..40996f6fbb75e 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.h +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.h @@ -454,7 +454,8 @@ class PPCInstrInfo : public PPCGenInstrInfo { void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, - bool KillSrc) const override; + bool KillSrc, bool RenamableDest = false, + bool RenamableSrc = false) const override; void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, diff --git a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp index 6ce345dd44138..7d0455942923d 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -141,7 +141,6 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializePowerPCTarget() { initializePPCBSelPass(PR); initializePPCBranchCoalescingPass(PR); initializePPCBoolRetToIntPass(PR); - initializePPCExpandISELPass(PR); initializePPCPreEmitPeepholePass(PR); initializePPCTLSDynamicCallPass(PR); initializePPCMIPeepholePass(PR); @@ -600,7 +599,6 @@ void PPCPassConfig::addPreSched2() { void PPCPassConfig::addPreEmitPass() { addPass(createPPCPreEmitPeepholePass()); - addPass(createPPCExpandISELPass()); if (getOptLevel() != CodeGenOptLevel::None) addPass(createPPCEarlyReturnPass()); diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 670dee2edb1df..790107b772fcb 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1285,8 +1285,14 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, if (VT.getVectorElementType() == MVT::bf16) { setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom); - // FIXME: We should prefer BUILD_VECTOR over SPLAT_VECTOR. - setOperationAction(ISD::SPLAT_VECTOR, VT, Custom); + if (Subtarget.hasStdExtZfbfmin()) { + // FIXME: We should prefer BUILD_VECTOR over SPLAT_VECTOR. + setOperationAction(ISD::SPLAT_VECTOR, VT, Custom); + } else { + // We need to custom legalize bf16 build vectors if Zfbfmin isn't + // available. + setOperationAction(ISD::BUILD_VECTOR, MVT::bf16, Custom); + } setOperationAction( {ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT, Custom); @@ -3935,9 +3941,9 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, MVT VT = Op.getSimpleValueType(); assert(VT.isFixedLengthVector() && "Unexpected vector!"); - // If we don't have scalar f16, we need to bitcast to an i16 vector. - if (VT.getVectorElementType() == MVT::f16 && - !Subtarget.hasStdExtZfhmin()) + // If we don't have scalar f16/bf16, we need to bitcast to an i16 vector. + if ((VT.getVectorElementType() == MVT::f16 && !Subtarget.hasStdExtZfhmin()) || + (VT.getVectorElementType() == MVT::bf16 && !Subtarget.hasStdExtZfbfmin())) return lowerBUILD_VECTORvXf16(Op, DAG); if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) || @@ -6883,10 +6889,8 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, case ISD::SMAX: case ISD::UMIN: case ISD::UMAX: - return lowerToScalableOp(Op, DAG); case ISD::UADDSAT: case ISD::USUBSAT: - return lowerToScalableOp(Op, DAG); case ISD::SADDSAT: case ISD::SSUBSAT: return lowerToScalableOp(Op, DAG); @@ -18625,7 +18629,7 @@ bool RISCV::CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo, // Static chain parameter must not be passed in normal argument registers, // so we assign t2 for it as done in GCC's __builtin_call_with_static_chain if (ArgFlags.isNest()) { - if (unsigned Reg = State.AllocateReg(RISCV::X7)) { + if (MCRegister Reg = State.AllocateReg(RISCV::X7)) { State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); return false; } @@ -19098,7 +19102,7 @@ bool RISCV::CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI, const RISCVTargetLowering &TLI, RVVArgDispatcher &RVVDispatcher) { if (LocVT == MVT::i32 || LocVT == MVT::i64) { - if (unsigned Reg = State.AllocateReg(getFastCCArgGPRs(ABI))) { + if (MCRegister Reg = State.AllocateReg(getFastCCArgGPRs(ABI))) { State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); return false; } @@ -19106,14 +19110,13 @@ bool RISCV::CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI, const RISCVSubtarget &Subtarget = TLI.getSubtarget(); - if (LocVT == MVT::f16 && - (Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZfhmin())) { + if (LocVT == MVT::f16 && Subtarget.hasStdExtZfhmin()) { static const MCPhysReg FPR16List[] = { RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H, RISCV::F14_H, RISCV::F15_H, RISCV::F16_H, RISCV::F17_H, RISCV::F0_H, RISCV::F1_H, RISCV::F2_H, RISCV::F3_H, RISCV::F4_H, RISCV::F5_H, RISCV::F6_H, RISCV::F7_H, RISCV::F28_H, RISCV::F29_H, RISCV::F30_H, RISCV::F31_H}; - if (unsigned Reg = State.AllocateReg(FPR16List)) { + if (MCRegister Reg = State.AllocateReg(FPR16List)) { State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); return false; } @@ -19125,7 +19128,7 @@ bool RISCV::CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI, RISCV::F15_F, RISCV::F16_F, RISCV::F17_F, RISCV::F0_F, RISCV::F1_F, RISCV::F2_F, RISCV::F3_F, RISCV::F4_F, RISCV::F5_F, RISCV::F6_F, RISCV::F7_F, RISCV::F28_F, RISCV::F29_F, RISCV::F30_F, RISCV::F31_F}; - if (unsigned Reg = State.AllocateReg(FPR32List)) { + if (MCRegister Reg = State.AllocateReg(FPR32List)) { State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); return false; } @@ -19137,19 +19140,20 @@ bool RISCV::CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI, RISCV::F15_D, RISCV::F16_D, RISCV::F17_D, RISCV::F0_D, RISCV::F1_D, RISCV::F2_D, RISCV::F3_D, RISCV::F4_D, RISCV::F5_D, RISCV::F6_D, RISCV::F7_D, RISCV::F28_D, RISCV::F29_D, RISCV::F30_D, RISCV::F31_D}; - if (unsigned Reg = State.AllocateReg(FPR64List)) { + if (MCRegister Reg = State.AllocateReg(FPR64List)) { State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); return false; } } // Check if there is an available GPR before hitting the stack. - if ((LocVT == MVT::f16 && - (Subtarget.hasStdExtZhinx() || Subtarget.hasStdExtZhinxmin())) || + if ((LocVT == MVT::f16 && Subtarget.hasStdExtZhinxmin()) || (LocVT == MVT::f32 && Subtarget.hasStdExtZfinx()) || (LocVT == MVT::f64 && Subtarget.is64Bit() && Subtarget.hasStdExtZdinx())) { - if (unsigned Reg = State.AllocateReg(getFastCCArgGPRs(ABI))) { + if (MCRegister Reg = State.AllocateReg(getFastCCArgGPRs(ABI))) { + LocInfo = CCValAssign::BCvt; + LocVT = Subtarget.getXLenVT(); State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); return false; } @@ -19184,7 +19188,7 @@ bool RISCV::CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI, CCValAssign::getReg(ValNo, ValVT, AllocatedVReg, LocVT, LocInfo)); } else { // Try and pass the address via a "fast" GPR. - if (unsigned GPRReg = State.AllocateReg(getFastCCArgGPRs(ABI))) { + if (MCRegister GPRReg = State.AllocateReg(getFastCCArgGPRs(ABI))) { LocInfo = CCValAssign::Indirect; LocVT = TLI.getSubtarget().getXLenVT(); State.addLoc(CCValAssign::getReg(ValNo, ValVT, GPRReg, LocVT, LocInfo)); @@ -19222,7 +19226,7 @@ bool RISCV::CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, if (LocVT == MVT::i32 || LocVT == MVT::i64) { // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, R7, SpLim // s1 s2 s3 s4 s5 s6 s7 s8 s9 s10 s11 - if (unsigned Reg = State.AllocateReg(GPRList)) { + if (MCRegister Reg = State.AllocateReg(GPRList)) { State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); return false; } @@ -19237,7 +19241,7 @@ bool RISCV::CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, static const MCPhysReg FPR32List[] = {RISCV::F8_F, RISCV::F9_F, RISCV::F18_F, RISCV::F19_F, RISCV::F20_F, RISCV::F21_F}; - if (unsigned Reg = State.AllocateReg(FPR32List)) { + if (MCRegister Reg = State.AllocateReg(FPR32List)) { State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); return false; } @@ -19249,7 +19253,7 @@ bool RISCV::CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, static const MCPhysReg FPR64List[] = {RISCV::F22_D, RISCV::F23_D, RISCV::F24_D, RISCV::F25_D, RISCV::F26_D, RISCV::F27_D}; - if (unsigned Reg = State.AllocateReg(FPR64List)) { + if (MCRegister Reg = State.AllocateReg(FPR64List)) { State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); return false; } @@ -19258,7 +19262,7 @@ bool RISCV::CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, if ((LocVT == MVT::f32 && Subtarget.hasStdExtZfinx()) || (LocVT == MVT::f64 && Subtarget.hasStdExtZdinx() && Subtarget.is64Bit())) { - if (unsigned Reg = State.AllocateReg(GPRList)) { + if (MCRegister Reg = State.AllocateReg(GPRList)) { State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); return false; } diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index 9dd79027d7a16..77072edab4d13 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -440,12 +440,14 @@ void RISCVInstrInfo::copyPhysRegVector( void RISCVInstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, MCRegister DstReg, - MCRegister SrcReg, bool KillSrc) const { + MCRegister SrcReg, bool KillSrc, + bool RenamableDest, bool RenamableSrc) const { const TargetRegisterInfo *TRI = STI.getRegisterInfo(); if (RISCV::GPRRegClass.contains(DstReg, SrcReg)) { BuildMI(MBB, MBBI, DL, get(RISCV::ADDI), DstReg) - .addReg(SrcReg, getKillRegState(KillSrc)) + .addReg(SrcReg, + getKillRegState(KillSrc) | getRenamableRegState(RenamableSrc)) .addImm(0); return; } diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h index ecb7982b3e5e3..f25e5ee42a737 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h @@ -84,7 +84,8 @@ class RISCVInstrInfo : public RISCVGenInstrInfo { const TargetRegisterClass *RegClass) const; void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, MCRegister DstReg, MCRegister SrcReg, - bool KillSrc) const override; + bool KillSrc, bool RenamableDest = false, + bool RenamableSrc = false) const override; void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index 1af873f85d03c..5ec07b2a0aa8f 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -447,21 +447,30 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, auto *SubVecTy = FixedVectorType::get(Tp->getElementType(), SubVF); InstructionCost Cost = 0; - for (unsigned I = 0; I < NumRegs; ++I) { + for (unsigned I = 0, NumSrcRegs = divideCeil(Mask.size(), SubVF); + I < NumSrcRegs; ++I) { bool IsSingleVector = true; SmallVector SubMask(SubVF, PoisonMaskElem); - transform(Mask.slice(I * SubVF, - I == NumRegs - 1 ? Mask.size() % SubVF : SubVF), - SubMask.begin(), [&](int I) { - bool SingleSubVector = I / VF == 0; - IsSingleVector &= SingleSubVector; - return (SingleSubVector ? 0 : 1) * SubVF + I % VF; - }); + transform( + Mask.slice(I * SubVF, + I == NumSrcRegs - 1 ? Mask.size() % SubVF : SubVF), + SubMask.begin(), [&](int I) -> int { + if (I == PoisonMaskElem) + return PoisonMaskElem; + bool SingleSubVector = I / VF == 0; + IsSingleVector &= SingleSubVector; + return (SingleSubVector ? 0 : 1) * SubVF + (I % VF) % SubVF; + }); + if (all_of(enumerate(SubMask), [](auto &&P) { + return P.value() == PoisonMaskElem || + static_cast(P.value()) == P.index(); + })) + continue; Cost += getShuffleCost(IsSingleVector ? TTI::SK_PermuteSingleSrc : TTI::SK_PermuteTwoSrc, SubVecTy, SubMask, CostKind, 0, nullptr); - return Cost; } + return Cost; } break; } diff --git a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp index 822ab492c710b..34e5d9224f715 100644 --- a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp +++ b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp @@ -131,6 +131,9 @@ bool RISCVVectorPeephole::tryToReduceVL(MachineInstr &MI) const { case RISCV::VMV_V_V: SrcIdx = 2; break; + case RISCV::VMERGE_VVM: + SrcIdx = 3; // TODO: We can also handle the false operand. + break; } MachineOperand &VL = MI.getOperand(RISCVII::getVLOpNum(MI.getDesc())); diff --git a/llvm/lib/Target/SPIRV/SPIRVBuiltins.td b/llvm/lib/Target/SPIRV/SPIRVBuiltins.td index 5c057a79afa0c..dd97263316505 100644 --- a/llvm/lib/Target/SPIRV/SPIRVBuiltins.td +++ b/llvm/lib/Target/SPIRV/SPIRVBuiltins.td @@ -1500,7 +1500,7 @@ foreach i = ["", "2", "3", "4", "8", "16"] in { defm : DemangledVectorLoadStoreBuiltin<"vload_half", 2, 2, 173>; defm : DemangledVectorLoadStoreBuiltin<"vstore_half", 3, 3, 175>; } else { - defm : DemangledVectorLoadStoreBuiltin; + defm : DemangledVectorLoadStoreBuiltin; defm : DemangledVectorLoadStoreBuiltin; } defm : DemangledVectorLoadStoreBuiltin; diff --git a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp index d9864ab50ecfe..4175f766ac69a 100644 --- a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp @@ -24,6 +24,7 @@ #include "llvm/IR/TypedPointerType.h" #include +#include // This pass performs the following transformation on LLVM IR level required // for the following translation to SPIR-V: diff --git a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.cpp b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.cpp index dac7640cdddd6..e8a6b4fdbae97 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.cpp @@ -241,7 +241,8 @@ unsigned SPIRVInstrInfo::insertBranch( void SPIRVInstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, - MCRegister SrcReg, bool KillSrc) const { + MCRegister SrcReg, bool KillSrc, + bool RenamableDest, bool RenamableSrc) const { // Actually we don't need this COPY instruction. However if we do nothing with // it, post RA pseudo instrs expansion just removes it and we get the code // with undef registers. Therefore, we need to replace all uses of dst with diff --git a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.h b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.h index 95f3874913572..67d2d979cb5a1 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.h +++ b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.h @@ -51,7 +51,8 @@ class SPIRVInstrInfo : public SPIRVGenInstrInfo { int *BytesAdded = nullptr) const override; void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, - bool KillSrc) const override; + bool KillSrc, bool RenamableDest = false, + bool RenamableSrc = false) const override; bool expandPostRAPseudo(MachineInstr &MI) const override; }; diff --git a/llvm/lib/Target/Sparc/SparcInstrInfo.cpp b/llvm/lib/Target/Sparc/SparcInstrInfo.cpp index 2727a9f2efbb1..0bb2540a97d72 100644 --- a/llvm/lib/Target/Sparc/SparcInstrInfo.cpp +++ b/llvm/lib/Target/Sparc/SparcInstrInfo.cpp @@ -438,7 +438,8 @@ bool SparcInstrInfo::isBranchOffsetInRange(unsigned BranchOpc, void SparcInstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, - MCRegister SrcReg, bool KillSrc) const { + MCRegister SrcReg, bool KillSrc, + bool RenamableDest, bool RenamableSrc) const { unsigned numSubRegs = 0; unsigned movOpc = 0; const unsigned *subRegIdx = nullptr; diff --git a/llvm/lib/Target/Sparc/SparcInstrInfo.h b/llvm/lib/Target/Sparc/SparcInstrInfo.h index a7bb34c6c8e77..fc04542c819d4 100644 --- a/llvm/lib/Target/Sparc/SparcInstrInfo.h +++ b/llvm/lib/Target/Sparc/SparcInstrInfo.h @@ -87,7 +87,8 @@ class SparcInstrInfo : public SparcGenInstrInfo { void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, - bool KillSrc) const override; + bool KillSrc, bool RenamableDest = false, + bool RenamableSrc = false) const override; void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp index d0758891fe570..91db858f5cdaf 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -857,7 +857,9 @@ bool SystemZInstrInfo::PredicateInstruction( void SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, MCRegister DestReg, - MCRegister SrcReg, bool KillSrc) const { + MCRegister SrcReg, bool KillSrc, + bool RenamableDest, + bool RenamableSrc) const { // Split 128-bit GPR moves into two 64-bit moves. Add implicit uses of the // super register in case one of the subregs is undefined. // This handles ADDR128 too. diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.h b/llvm/lib/Target/SystemZ/SystemZInstrInfo.h index 61338b0816155..cc8a4ccd234cd 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.h +++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.h @@ -276,7 +276,8 @@ class SystemZInstrInfo : public SystemZGenInstrInfo { ArrayRef Pred) const override; void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, - bool KillSrc) const override; + bool KillSrc, bool RenamableDest = false, + bool RenamableSrc = false) const override; void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, bool isKill, int FrameIndex, diff --git a/llvm/lib/Target/VE/VEInstrInfo.cpp b/llvm/lib/Target/VE/VEInstrInfo.cpp index c001dc4d92b9a..fccbed3bdec8b 100644 --- a/llvm/lib/Target/VE/VEInstrInfo.cpp +++ b/llvm/lib/Target/VE/VEInstrInfo.cpp @@ -359,7 +359,8 @@ static void copyPhysSubRegs(MachineBasicBlock &MBB, void VEInstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, - bool KillSrc) const { + bool KillSrc, bool RenamableDest, + bool RenamableSrc) const { if (IsAliasOfSX(SrcReg) && IsAliasOfSX(DestReg)) { BuildMI(MBB, I, DL, get(VE::ORri), DestReg) diff --git a/llvm/lib/Target/VE/VEInstrInfo.h b/llvm/lib/Target/VE/VEInstrInfo.h index 4fcc479a13d57..3a9718f2f2603 100644 --- a/llvm/lib/Target/VE/VEInstrInfo.h +++ b/llvm/lib/Target/VE/VEInstrInfo.h @@ -81,7 +81,8 @@ class VEInstrInfo : public VEGenInstrInfo { void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, - bool KillSrc) const override; + bool KillSrc, bool RenamableDest = false, + bool RenamableSrc = false) const override; /// Stack Spill & Reload { Register isLoadFromStackSlot(const MachineInstr &MI, diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp index 32a4accd040eb..75011ab3c8721 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp @@ -57,7 +57,9 @@ bool WebAssemblyInstrInfo::isReallyTriviallyReMaterializable( void WebAssemblyInstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, - MCRegister SrcReg, bool KillSrc) const { + MCRegister SrcReg, bool KillSrc, + bool RenamableDest, + bool RenamableSrc) const { // This method is called by post-RA expansion, which expects only pregs to // exist. However we need to handle both here. auto &MRI = MBB.getParent()->getRegInfo(); diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.h b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.h index c1e1a790c60e2..8cb692f9bc0c4 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.h +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.h @@ -47,7 +47,8 @@ class WebAssemblyInstrInfo final : public WebAssemblyGenInstrInfo { void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, - bool KillSrc) const override; + bool KillSrc, bool RenamableDest = false, + bool RenamableSrc = false) const override; MachineInstr *commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const override; diff --git a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp index eda3c9fd50bf5..864b7d8e769ab 100644 --- a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -1956,7 +1956,7 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) { if (DotOffset != StringRef::npos) { consumeToken(); StringRef LHS = Identifier.slice(0, DotOffset); - StringRef Dot = Identifier.slice(DotOffset, DotOffset + 1); + StringRef Dot = Identifier.substr(DotOffset, 1); StringRef RHS = Identifier.substr(DotOffset + 1); if (!RHS.empty()) { getLexer().UnLex(AsmToken(AsmToken::Identifier, RHS)); diff --git a/llvm/lib/Target/X86/X86CallingConv.cpp b/llvm/lib/Target/X86/X86CallingConv.cpp index 0ea51bec29b81..154cb1399880b 100644 --- a/llvm/lib/Target/X86/X86CallingConv.cpp +++ b/llvm/lib/Target/X86/X86CallingConv.cpp @@ -51,7 +51,7 @@ static bool CC_X86_32_RegCall_Assign2Regs(unsigned &ValNo, MVT &ValVT, for (unsigned I = 0; I < RequiredGprsUponSplit; I++) { // Marking the register as located. - unsigned Reg = State.AllocateReg(AvailableRegs[I]); + MCRegister Reg = State.AllocateReg(AvailableRegs[I]); // Since we previously made sure that 2 registers are available // we expect that a real register number will be returned. @@ -102,7 +102,7 @@ static bool CC_X86_VectorCallAssignRegister(unsigned &ValNo, MVT &ValVT, for (auto Reg : RegList) { // If the register is not marked as allocated - assign to it. if (!State.isAllocated(Reg)) { - unsigned AssigedReg = State.AllocateReg(Reg); + MCRegister AssigedReg = State.AllocateReg(Reg); assert(AssigedReg == Reg && "Expecting a valid register allocation"); State.addLoc( CCValAssign::getReg(ValNo, ValVT, AssigedReg, LocVT, LocInfo)); @@ -158,7 +158,7 @@ static bool CC_X86_64_VectorCall(unsigned &ValNo, MVT &ValVT, MVT &LocVT, (void)State.AllocateReg(CC_X86_64_VectorCallGetGPRs()); // Assign XMM register - (shadow for HVA and non-shadow for non HVA). - if (unsigned Reg = State.AllocateReg(CC_X86_VectorCallGetSSEs(ValVT))) { + if (MCRegister Reg = State.AllocateReg(CC_X86_VectorCallGetSSEs(ValVT))) { // In Vectorcall Calling convention, additional shadow stack can be // created on top of the basic 32 bytes of win64. // It can happen if the fifth or sixth argument is vector type or HVA. @@ -209,7 +209,7 @@ static bool CC_X86_32_VectorCall(unsigned &ValNo, MVT &ValVT, MVT &LocVT, return true; // If this is an HVA - Stop the search. // Assign XMM register. - if (unsigned Reg = State.AllocateReg(CC_X86_VectorCallGetSSEs(ValVT))) { + if (MCRegister Reg = State.AllocateReg(CC_X86_VectorCallGetSSEs(ValVT))) { State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); return true; } @@ -259,7 +259,7 @@ static bool CC_X86_32_MCUInReg(unsigned &ValNo, MVT &ValVT, MVT &LocVT, // If there are no pending members, we are not in the middle of a split, // so do the usual inreg stuff. if (PendingMembers.empty()) { - if (unsigned Reg = State.AllocateReg(RegList)) { + if (MCRegister Reg = State.AllocateReg(RegList)) { State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); return true; } diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index 39ba7ea777909..30428b9c3dcdd 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -4293,7 +4293,8 @@ static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg, void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, - MCRegister SrcReg, bool KillSrc) const { + MCRegister SrcReg, bool KillSrc, + bool RenamableDest, bool RenamableSrc) const { // First deal with the normal symmetric copies. bool HasAVX = Subtarget.hasAVX(); bool HasVLX = Subtarget.hasVLX(); diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h index 3100a9e5699f0..1c6362f911e4a 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.h +++ b/llvm/lib/Target/X86/X86InstrInfo.h @@ -418,7 +418,8 @@ class X86InstrInfo final : public X86GenInstrInfo { Register FalseReg) const override; void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, - bool KillSrc) const override; + bool KillSrc, bool RenamableDest = false, + bool RenamableSrc = false) const override; void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg, bool isKill, int FrameIndex, diff --git a/llvm/lib/Target/XCore/XCoreInstrInfo.cpp b/llvm/lib/Target/XCore/XCoreInstrInfo.cpp index ae2e0fec3f899..90a195e928a59 100644 --- a/llvm/lib/Target/XCore/XCoreInstrInfo.cpp +++ b/llvm/lib/Target/XCore/XCoreInstrInfo.cpp @@ -331,7 +331,8 @@ XCoreInstrInfo::removeBranch(MachineBasicBlock &MBB, int *BytesRemoved) const { void XCoreInstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, - MCRegister SrcReg, bool KillSrc) const { + MCRegister SrcReg, bool KillSrc, + bool RenamableDest, bool RenamableSrc) const { bool GRDest = XCore::GRRegsRegClass.contains(DestReg); bool GRSrc = XCore::GRRegsRegClass.contains(SrcReg); diff --git a/llvm/lib/Target/XCore/XCoreInstrInfo.h b/llvm/lib/Target/XCore/XCoreInstrInfo.h index 1dafb6ea7d211..7f330539dd76a 100644 --- a/llvm/lib/Target/XCore/XCoreInstrInfo.h +++ b/llvm/lib/Target/XCore/XCoreInstrInfo.h @@ -64,7 +64,8 @@ class XCoreInstrInfo : public XCoreGenInstrInfo { void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, - bool KillSrc) const override; + bool KillSrc, bool RenamableDest = false, + bool RenamableSrc = false) const override; void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg, diff --git a/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp b/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp index c7675c2f50176..0d2ce26a942e0 100644 --- a/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp +++ b/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp @@ -70,11 +70,12 @@ XtensaTargetLowering::XtensaTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand); setOperationAction(ISD::FP_TO_SINT, MVT::i32, Expand); - // No sign extend instructions for i1 + // No sign extend instructions for i1 and sign extend load i8 for (MVT VT : MVT::integer_valuetypes()) { setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote); setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote); + setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand); } setOperationAction(ISD::ConstantPool, PtrVT, Custom); diff --git a/llvm/lib/Target/Xtensa/XtensaInstrInfo.cpp b/llvm/lib/Target/Xtensa/XtensaInstrInfo.cpp index 491defb867643..2263aadcb0dd3 100644 --- a/llvm/lib/Target/Xtensa/XtensaInstrInfo.cpp +++ b/llvm/lib/Target/Xtensa/XtensaInstrInfo.cpp @@ -105,7 +105,8 @@ void XtensaInstrInfo::adjustStackPtr(unsigned SP, int64_t Amount, void XtensaInstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, MCRegister DestReg, - MCRegister SrcReg, bool KillSrc) const { + MCRegister SrcReg, bool KillSrc, + bool RenamableDest, bool RenamableSrc) const { // The MOV instruction is not present in core ISA, // so use OR instruction. if (Xtensa::ARRegClass.contains(DestReg, SrcReg)) diff --git a/llvm/lib/Target/Xtensa/XtensaInstrInfo.h b/llvm/lib/Target/Xtensa/XtensaInstrInfo.h index 37f157f832464..8bf3f0618f285 100644 --- a/llvm/lib/Target/Xtensa/XtensaInstrInfo.h +++ b/llvm/lib/Target/Xtensa/XtensaInstrInfo.h @@ -49,7 +49,8 @@ class XtensaInstrInfo : public XtensaGenInstrInfo { void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, - bool KillSrc) const override; + bool KillSrc, bool RenamableDest = false, + bool RenamableSrc = false) const override; void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, diff --git a/llvm/lib/Transforms/IPO/FunctionImport.cpp b/llvm/lib/Transforms/IPO/FunctionImport.cpp index 74a71cbf101b5..dd01d143b066b 100644 --- a/llvm/lib/Transforms/IPO/FunctionImport.cpp +++ b/llvm/lib/Transforms/IPO/FunctionImport.cpp @@ -352,6 +352,13 @@ void FunctionImporter::ImportMapTy::maybeAddDeclaration( ImportMap[FromModule].try_emplace(GUID, GlobalValueSummary::Declaration); } +SmallVector +FunctionImporter::ImportMapTy::getSourceModules() const { + SmallVector Modules(make_first_range(ImportMap)); + llvm::sort(Modules); + return Modules; +} + /// Import globals referenced by a function or other globals that are being /// imported, if importing such global is possible. class GlobalsImporter final { @@ -1770,11 +1777,6 @@ Expected FunctionImporter::importFunctions( unsigned ImportedCount = 0, ImportedGVCount = 0; IRMover Mover(DestModule); - // Do the actual import of functions now, one Module at a time - std::set ModuleNameOrderedList; - for (const auto &FunctionsToImportPerModule : ImportList.getImportMap()) { - ModuleNameOrderedList.insert(FunctionsToImportPerModule.first); - } auto getImportType = [&](const FunctionsToImportTy &GUIDToImportType, GlobalValue::GUID GUID) @@ -1785,7 +1787,8 @@ Expected FunctionImporter::importFunctions( return Iter->second; }; - for (const auto &Name : ModuleNameOrderedList) { + // Do the actual import of functions now, one Module at a time + for (const auto &Name : ImportList.getSourceModules()) { // Get the module for the import const auto &FunctionsToImportPerModule = ImportList.getImportMap().find(Name); diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp index aacfe39f16fbc..8dd0cfdb2ae0a 100644 --- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -1658,11 +1658,17 @@ void PGOUseFunc::setBranchWeights() { continue; // We have a non-zero Branch BB. - unsigned Size = BBCountInfo.OutEdges.size(); - SmallVector EdgeCounts(Size, 0); + + // SuccessorCount can be greater than OutEdgesCount, because + // removed edges don't appear in OutEdges. + unsigned OutEdgesCount = BBCountInfo.OutEdges.size(); + unsigned SuccessorCount = BB.getTerminator()->getNumSuccessors(); + assert(OutEdgesCount <= SuccessorCount); + + SmallVector EdgeCounts(SuccessorCount, 0); uint64_t MaxCount = 0; - for (unsigned s = 0; s < Size; s++) { - const PGOUseEdge *E = BBCountInfo.OutEdges[s]; + for (unsigned It = 0; It < OutEdgesCount; It++) { + const PGOUseEdge *E = BBCountInfo.OutEdges[It]; const BasicBlock *SrcBB = E->SrcBB; const BasicBlock *DestBB = E->DestBB; if (DestBB == nullptr) diff --git a/llvm/lib/Transforms/Instrumentation/RealtimeSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/RealtimeSanitizer.cpp index 7854cf4f2c625..1707ec298d6d2 100644 --- a/llvm/lib/Transforms/Instrumentation/RealtimeSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/RealtimeSanitizer.cpp @@ -19,6 +19,8 @@ #include "llvm/Transforms/Instrumentation/RealtimeSanitizer.h" +#include + using namespace llvm; static void insertCallBeforeInstruction(Function &Fn, Instruction &Instruction, @@ -51,6 +53,7 @@ RealtimeSanitizerPass::RealtimeSanitizerPass( PreservedAnalyses RealtimeSanitizerPass::run(Function &F, AnalysisManager &AM) { if (F.hasFnAttribute(Attribute::SanitizeRealtime)) { + assert(!F.hasFnAttribute(Attribute::NoSanitizeRealtime)); insertCallAtFunctionEntryPoint(F, "__rtsan_realtime_enter"); insertCallAtAllFunctionExitPoints(F, "__rtsan_realtime_exit"); @@ -59,5 +62,15 @@ PreservedAnalyses RealtimeSanitizerPass::run(Function &F, return PA; } + if (F.hasFnAttribute(Attribute::NoSanitizeRealtime)) { + assert(!F.hasFnAttribute(Attribute::SanitizeRealtime)); + insertCallAtFunctionEntryPoint(F, "__rtsan_off"); + insertCallAtAllFunctionExitPoints(F, "__rtsan_on"); + + PreservedAnalyses PA; + PA.preserveSet(); + return PA; + } + return PreservedAnalyses::all(); } diff --git a/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp index 2b99e28acb4e9..ef6bbd37295ca 100644 --- a/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp +++ b/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp @@ -1654,7 +1654,7 @@ makeStatepointExplicitImpl(CallBase *Call, /* to replace */ // be replacing a terminator. IRBuilder<> Builder(Call); - ArrayRef GCArgs(LiveVariables); + ArrayRef GCLive(LiveVariables); uint64_t StatepointID = StatepointDirectives::DefaultStatepointID; uint32_t NumPatchBytes = 0; uint32_t Flags = uint32_t(StatepointFlags::None); @@ -1827,7 +1827,7 @@ makeStatepointExplicitImpl(CallBase *Call, /* to replace */ if (auto *CI = dyn_cast(Call)) { CallInst *SPCall = Builder.CreateGCStatepointCall( StatepointID, NumPatchBytes, CallTarget, Flags, CallArgs, - TransitionArgs, DeoptArgs, GCArgs, "safepoint_token"); + TransitionArgs, DeoptArgs, GCLive, "safepoint_token"); SPCall->setTailCallKind(CI->getTailCallKind()); SPCall->setCallingConv(CI->getCallingConv()); @@ -1852,8 +1852,8 @@ makeStatepointExplicitImpl(CallBase *Call, /* to replace */ // original block. InvokeInst *SPInvoke = Builder.CreateGCStatepointInvoke( StatepointID, NumPatchBytes, CallTarget, II->getNormalDest(), - II->getUnwindDest(), Flags, CallArgs, TransitionArgs, DeoptArgs, GCArgs, - "statepoint_token"); + II->getUnwindDest(), Flags, CallArgs, TransitionArgs, DeoptArgs, + GCLive, "statepoint_token"); SPInvoke->setCallingConv(II->getCallingConv()); @@ -2839,7 +2839,7 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, // That Value* no longer exists and we need to use the new gc_result. // Thankfully, the live set is embedded in the statepoint (and updated), so // we just grab that. - llvm::append_range(Live, Info.StatepointToken->gc_args()); + llvm::append_range(Live, Info.StatepointToken->gc_live()); #ifndef NDEBUG // Do some basic validation checking on our liveness results before // performing relocation. Relocation can and will turn mistakes in liveness @@ -2847,7 +2847,7 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, // TODO: It would be nice to test consistency as well assert(DT.isReachableFromEntry(Info.StatepointToken->getParent()) && "statepoint must be reachable or liveness is meaningless"); - for (Value *V : Info.StatepointToken->gc_args()) { + for (Value *V : Info.StatepointToken->gc_live()) { if (!isa(V)) // Non-instruction values trivial dominate all possible uses continue; diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 81d3243c887fc..cf00299812bb7 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -632,14 +632,17 @@ bool CodeExtractor::isEligible() const { } void CodeExtractor::findInputsOutputs(ValueSet &Inputs, ValueSet &Outputs, - const ValueSet &SinkCands) const { + const ValueSet &SinkCands, + bool CollectGlobalInputs) const { for (BasicBlock *BB : Blocks) { // If a used value is defined outside the region, it's an input. If an // instruction is used outside the region, it's an output. for (Instruction &II : *BB) { for (auto &OI : II.operands()) { Value *V = OI; - if (!SinkCands.count(V) && definedInCaller(Blocks, V)) + if (!SinkCands.count(V) && + (definedInCaller(Blocks, V) || + (CollectGlobalInputs && llvm::isa(V)))) Inputs.insert(V); } @@ -934,6 +937,7 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs, case Attribute::NoUnwind: case Attribute::NoSanitizeBounds: case Attribute::NoSanitizeCoverage: + case Attribute::NoSanitizeRealtime: case Attribute::NullPointerIsValid: case Attribute::OptimizeForDebugging: case Attribute::OptForFuzzing: diff --git a/llvm/lib/Transforms/Utils/SCCPSolver.cpp b/llvm/lib/Transforms/Utils/SCCPSolver.cpp index 670d88ac7cf8f..982b1041c7c51 100644 --- a/llvm/lib/Transforms/Utils/SCCPSolver.cpp +++ b/llvm/lib/Transforms/Utils/SCCPSolver.cpp @@ -446,6 +446,12 @@ class SCCPInstVisitor : public InstVisitor { return markConstant(ValueState[V], V, C); } + bool markNotConstant(ValueLatticeElement &IV, Value *V, Constant *C); + + bool markNotNull(ValueLatticeElement &IV, Value *V) { + return markNotConstant(IV, V, Constant::getNullValue(V->getType())); + } + /// markConstantRange - Mark the object as constant range with \p CR. If the /// object is not a constant range with the range \p CR, add it to the /// instruction work list so that the users of the instruction are updated @@ -667,6 +673,7 @@ class SCCPInstVisitor : public InstVisitor { void visitStoreInst(StoreInst &I); void visitLoadInst(LoadInst &I); void visitGetElementPtrInst(GetElementPtrInst &I); + void visitAllocaInst(AllocaInst &AI); void visitInvokeInst(InvokeInst &II) { visitCallBase(II); @@ -820,7 +827,11 @@ class SCCPInstVisitor : public InstVisitor { return; } } - // Assume nothing about the incoming arguments without range. + if (A->hasNonNullAttr()) { + markNotNull(ValueState[A], A); + return; + } + // Assume nothing about the incoming arguments without attributes. markOverdefined(A); } @@ -905,6 +916,15 @@ bool SCCPInstVisitor::markConstant(ValueLatticeElement &IV, Value *V, return true; } +bool SCCPInstVisitor::markNotConstant(ValueLatticeElement &IV, Value *V, + Constant *C) { + if (!IV.markNotConstant(C)) + return false; + LLVM_DEBUG(dbgs() << "markNotConstant: " << *C << ": " << *V << '\n'); + pushToWorkList(IV, V); + return true; +} + bool SCCPInstVisitor::markConstantRange(ValueLatticeElement &IV, Value *V, const ConstantRange &CR) { if (!IV.markConstantRange(CR)) @@ -1364,7 +1384,7 @@ void SCCPInstVisitor::visitInsertValueInst(InsertValueInst &IVI) { // resolvedUndefsIn might mark I as overdefined. Bail out, even if we would // discover a concrete value later. - if (SCCPSolver::isOverdefined(ValueState[&IVI])) + if (ValueState[&IVI].isOverdefined()) return (void)markOverdefined(&IVI); // If this has more than one index, we can't handle it, drive all results to @@ -1436,7 +1456,7 @@ void SCCPInstVisitor::visitUnaryOperator(Instruction &I) { ValueLatticeElement &IV = ValueState[&I]; // resolvedUndefsIn might mark I as overdefined. Bail out, even if we would // discover a concrete value later. - if (SCCPSolver::isOverdefined(IV)) + if (IV.isOverdefined()) return (void)markOverdefined(&I); // If something is unknown/undef, wait for it to resolve. @@ -1461,7 +1481,7 @@ void SCCPInstVisitor::visitFreezeInst(FreezeInst &I) { ValueLatticeElement &IV = ValueState[&I]; // resolvedUndefsIn might mark I as overdefined. Bail out, even if we would // discover a concrete value later. - if (SCCPSolver::isOverdefined(IV)) + if (IV.isOverdefined()) return (void)markOverdefined(&I); // If something is unknown/undef, wait for it to resolve. @@ -1541,7 +1561,7 @@ void SCCPInstVisitor::visitBinaryOperator(Instruction &I) { void SCCPInstVisitor::visitCmpInst(CmpInst &I) { // Do not cache this lookup, getValueState calls later in the function might // invalidate the reference. - if (SCCPSolver::isOverdefined(ValueState[&I])) + if (ValueState[&I].isOverdefined()) return (void)markOverdefined(&I); Value *Op1 = I.getOperand(0); @@ -1571,9 +1591,22 @@ void SCCPInstVisitor::visitCmpInst(CmpInst &I) { // Handle getelementptr instructions. If all operands are constants then we // can turn this into a getelementptr ConstantExpr. void SCCPInstVisitor::visitGetElementPtrInst(GetElementPtrInst &I) { - if (SCCPSolver::isOverdefined(ValueState[&I])) + if (ValueState[&I].isOverdefined()) return (void)markOverdefined(&I); + const ValueLatticeElement &PtrState = getValueState(I.getPointerOperand()); + if (PtrState.isUnknownOrUndef()) + return; + + // gep inbounds/nuw of non-null is non-null. + if (PtrState.isNotConstant() && PtrState.getNotConstant()->isNullValue()) { + if (I.hasNoUnsignedWrap() || + (I.isInBounds() && + !NullPointerIsDefined(I.getFunction(), I.getAddressSpace()))) + return (void)markNotNull(ValueState[&I], &I); + return (void)markOverdefined(&I); + } + SmallVector Operands; Operands.reserve(I.getNumOperands()); @@ -1582,9 +1615,6 @@ void SCCPInstVisitor::visitGetElementPtrInst(GetElementPtrInst &I) { if (State.isUnknownOrUndef()) return; // Operands are not resolved yet. - if (SCCPSolver::isOverdefined(State)) - return (void)markOverdefined(&I); - if (Constant *C = getConstant(State, I.getOperand(i)->getType())) { Operands.push_back(C); continue; @@ -1597,6 +1627,13 @@ void SCCPInstVisitor::visitGetElementPtrInst(GetElementPtrInst &I) { markConstant(&I, C); } +void SCCPInstVisitor::visitAllocaInst(AllocaInst &I) { + if (!NullPointerIsDefined(I.getFunction(), I.getAddressSpace())) + return (void)markNotNull(ValueState[&I], &I); + + markOverdefined(&I); +} + void SCCPInstVisitor::visitStoreInst(StoreInst &SI) { // If this store is of a struct, ignore it. if (SI.getOperand(0)->getType()->isStructTy()) @@ -1618,18 +1655,23 @@ void SCCPInstVisitor::visitStoreInst(StoreInst &SI) { } static ValueLatticeElement getValueFromMetadata(const Instruction *I) { - if (I->getType()->isIntOrIntVectorTy()) { - if (MDNode *Ranges = I->getMetadata(LLVMContext::MD_range)) - return ValueLatticeElement::getRange( - getConstantRangeFromMetadata(*Ranges)); - - if (const auto *CB = dyn_cast(I)) + if (const auto *CB = dyn_cast(I)) { + if (CB->getType()->isIntOrIntVectorTy()) if (std::optional Range = CB->getRange()) return ValueLatticeElement::getRange(*Range); + if (CB->getType()->isPointerTy() && CB->isReturnNonNull()) + return ValueLatticeElement::getNot( + ConstantPointerNull::get(cast(I->getType()))); } + + if (I->getType()->isIntOrIntVectorTy()) + if (MDNode *Ranges = I->getMetadata(LLVMContext::MD_range)) + return ValueLatticeElement::getRange( + getConstantRangeFromMetadata(*Ranges)); if (I->hasMetadata(LLVMContext::MD_nonnull)) return ValueLatticeElement::getNot( ConstantPointerNull::get(cast(I->getType()))); + return ValueLatticeElement::getOverdefined(); } diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp index fb2efe581ac6b..1e6dc88ed9353 100644 --- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -1454,10 +1454,12 @@ Value *LibCallSimplifier::optimizeMemChr(CallInst *CI, IRBuilderBase &B) { if (NonContRanges > 2) return nullptr; + // Slice off the character's high end bits. + CharVal = B.CreateTrunc(CharVal, B.getInt8Ty()); + SmallVector CharCompares; for (unsigned char C : SortedStr) - CharCompares.push_back( - B.CreateICmpEQ(CharVal, ConstantInt::get(CharVal->getType(), C))); + CharCompares.push_back(B.CreateICmpEQ(CharVal, B.getInt8(C))); return B.CreateIntToPtr(B.CreateOr(CharCompares), CI->getType()); } diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index def73e8d0c0db..f471d2b1561d8 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -4991,9 +4991,9 @@ static bool clusterSortPtrAccesses(ArrayRef VL, Type *ElemTy, auto *Mid = std::stable_partition( Begin, End, [&Root](auto V) { return std::get<2>(V) == Root; }); DenseMap> LessThan; - for (auto I = Begin; I < Mid; ++I) + for (auto *I = Begin; I < Mid; ++I) LessThan.try_emplace(std::get<1>(*I)); - for (auto I = Begin; I < Mid; ++I) { + for (auto *I = Begin; I < Mid; ++I) { Value *V = std::get<1>(*I); while (auto *Gep = dyn_cast(V)) { V = Gep->getOperand(0); @@ -9455,7 +9455,7 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, auto It = MinBWs.find(E); Type *OrigScalarTy = ScalarTy; if (It != MinBWs.end()) { - auto VecTy = dyn_cast(ScalarTy); + auto *VecTy = dyn_cast(ScalarTy); ScalarTy = IntegerType::get(F->getContext(), It->second.first); if (VecTy) ScalarTy = getWidenedType(ScalarTy, VecTy->getNumElements()); @@ -13133,7 +13133,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { ScalarTy = IE->getOperand(1)->getType(); auto It = MinBWs.find(E); if (It != MinBWs.end()) { - auto VecTy = dyn_cast(ScalarTy); + auto *VecTy = dyn_cast(ScalarTy); ScalarTy = IntegerType::get(F->getContext(), It->second.first); if (VecTy) ScalarTy = getWidenedType(ScalarTy, VecTy->getNumElements()); @@ -13765,6 +13765,27 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n"); return E->VectorizedValue; } + if (isa(ScalarTy)) { + assert(SLPReVec && "FixedVectorType is not expected."); + // CreateMaskedGather expects VecTy and VecPtr have same size. We need + // to expand VecPtr if ScalarTy is a vector type. + unsigned ScalarTyNumElements = + cast(ScalarTy)->getNumElements(); + unsigned VecTyNumElements = + cast(VecTy)->getNumElements(); + assert(VecTyNumElements % ScalarTyNumElements == 0 && + "Cannot expand getelementptr."); + unsigned VF = VecTyNumElements / ScalarTyNumElements; + SmallVector Indices(VecTyNumElements); + transform(seq(VecTyNumElements), Indices.begin(), [=](unsigned I) { + return Builder.getInt64(I % ScalarTyNumElements); + }); + VecPtr = Builder.CreateGEP( + VecTy->getElementType(), + Builder.CreateShuffleVector( + VecPtr, createReplicatedMask(ScalarTyNumElements, VF)), + ConstantVector::get(Indices)); + } // Use the minimum alignment of the gathered loads. Align CommonAlignment = computeCommonAlignment(E->Scalars); NewLI = Builder.CreateMaskedGather(VecTy, VecPtr, CommonAlignment); @@ -15975,6 +15996,10 @@ void BoUpSLP::computeMinimumValueSizes() { auto It = MinBWs.find(TE); if (It != MinBWs.end() && It->second.first > UserTESz) return true; + // The size of icmp is always 1 and should not be + // considered. + if (TE->getOpcode() == Instruction::ICmp) + return true; return DL->getTypeSizeInBits(U->getType()) > UserTESz; })); })) { diff --git a/llvm/test/Analysis/LoopAccessAnalysis/depend_diff_types.ll b/llvm/test/Analysis/LoopAccessAnalysis/depend_diff_types.ll index 809b15b200495..81d8b01fe7fb7 100644 --- a/llvm/test/Analysis/LoopAccessAnalysis/depend_diff_types.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/depend_diff_types.ll @@ -130,8 +130,16 @@ define void @neg_dist_dep_type_size_equivalence(ptr nocapture %vec, i64 %n) { ; CHECK-LABEL: 'neg_dist_dep_type_size_equivalence' ; CHECK-NEXT: loop: ; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop -; CHECK-NEXT: Backward loop carried data dependence that prevents store-to-load forwarding. +; CHECK-NEXT: Unknown data dependence. ; CHECK-NEXT: Dependences: +; CHECK-NEXT: Unknown: +; CHECK-NEXT: %ld.f64 = load double, ptr %gep.iv, align 8 -> +; CHECK-NEXT: store i32 %ld.i64.i32, ptr %gep.iv.n.i64, align 8 +; CHECK-EMPTY: +; CHECK-NEXT: Unknown: +; CHECK-NEXT: %ld.i64 = load i64, ptr %gep.iv, align 8 -> +; CHECK-NEXT: store i32 %ld.i64.i32, ptr %gep.iv.n.i64, align 8 +; CHECK-EMPTY: ; CHECK-NEXT: BackwardVectorizableButPreventsForwarding: ; CHECK-NEXT: %ld.f64 = load double, ptr %gep.iv, align 8 -> ; CHECK-NEXT: store double %val, ptr %gep.iv.101.i64, align 8 diff --git a/llvm/test/Analysis/LoopAccessAnalysis/non-constant-strides-backward.ll b/llvm/test/Analysis/LoopAccessAnalysis/non-constant-strides-backward.ll index 845ff078ee0eb..416742a94e0d3 100644 --- a/llvm/test/Analysis/LoopAccessAnalysis/non-constant-strides-backward.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/non-constant-strides-backward.ll @@ -45,8 +45,13 @@ exit: define void @different_non_constant_strides_known_backward_distance_larger_than_trip_count(ptr %A) { ; CHECK-LABEL: 'different_non_constant_strides_known_backward_distance_larger_than_trip_count' ; CHECK-NEXT: loop: -; CHECK-NEXT: Memory dependences are safe +; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop +; CHECK-NEXT: Unknown data dependence. ; CHECK-NEXT: Dependences: +; CHECK-NEXT: Unknown: +; CHECK-NEXT: %l = load i32, ptr %gep, align 4 -> +; CHECK-NEXT: store i32 %add, ptr %gep.mul.2, align 4 +; CHECK-EMPTY: ; CHECK-NEXT: Run-time memory checks: ; CHECK-NEXT: Grouped accesses: ; CHECK-EMPTY: diff --git a/llvm/test/Bitcode/attributes.ll b/llvm/test/Bitcode/attributes.ll index 4402289ac170d..835622276ef27 100644 --- a/llvm/test/Bitcode/attributes.ll +++ b/llvm/test/Bitcode/attributes.ll @@ -511,6 +511,12 @@ define void @f92() sanitize_realtime ret void; } +; CHECK: define void @f93() #54 +define void @f93() nosanitize_realtime +{ + ret void; +} + ; CHECK: define void @f87() [[FNRETTHUNKEXTERN:#[0-9]+]] define void @f87() fn_ret_thunk_extern { ret void } @@ -606,6 +612,7 @@ define void @initializes(ptr initializes((-4, 0), (4, 8)) %a) { ; CHECK: attributes #51 = { uwtable(sync) } ; CHECK: attributes #52 = { nosanitize_bounds } ; CHECK: attributes #53 = { sanitize_realtime } +; CHECK: attributes #54 = { nosanitize_realtime } ; CHECK: attributes [[FNRETTHUNKEXTERN]] = { fn_ret_thunk_extern } ; CHECK: attributes [[SKIPPROFILE]] = { skipprofile } ; CHECK: attributes [[OPTDEBUG]] = { optdebug } diff --git a/llvm/test/Bitcode/compatibility.ll b/llvm/test/Bitcode/compatibility.ll index fd60c49a4be39..c401cde8e146e 100644 --- a/llvm/test/Bitcode/compatibility.ll +++ b/llvm/test/Bitcode/compatibility.ll @@ -1562,7 +1562,7 @@ exit: ; CHECK: select <2 x i1> , <2 x i8> , <2 x i8> call void @f.nobuiltin() builtin - ; CHECK: call void @f.nobuiltin() #53 + ; CHECK: call void @f.nobuiltin() #54 call fastcc noalias ptr @f.noalias() noinline ; CHECK: call fastcc noalias ptr @f.noalias() #12 @@ -1992,6 +1992,9 @@ declare void @f.sanitize_numerical_stability() sanitize_numerical_stability declare void @f.sanitize_realtime() sanitize_realtime ; CHECK: declare void @f.sanitize_realtime() #52 +declare void @f.nosanitize_realtime() nosanitize_realtime +; CHECK: declare void @f.nosanitize_realtime() #53 + ; CHECK: declare nofpclass(snan) float @nofpclass_snan(float nofpclass(snan)) declare nofpclass(snan) float @nofpclass_snan(float nofpclass(snan)) @@ -2115,7 +2118,8 @@ define float @nofpclass_callsites(float %arg) { ; CHECK: attributes #50 = { allockind("alloc,uninitialized") } ; CHECK: attributes #51 = { sanitize_numerical_stability } ; CHECK: attributes #52 = { sanitize_realtime } -; CHECK: attributes #53 = { builtin } +; CHECK: attributes #53 = { nosanitize_realtime } +; CHECK: attributes #54 = { builtin } ;; Metadata diff --git a/llvm/test/CodeGen/AArch64/sve-bitcast.ll b/llvm/test/CodeGen/AArch64/sve-bitcast.ll index 95f43ba512632..5d12d41ac3332 100644 --- a/llvm/test/CodeGen/AArch64/sve-bitcast.ll +++ b/llvm/test/CodeGen/AArch64/sve-bitcast.ll @@ -13,14 +13,8 @@ define @bitcast_nxv8i16_to_nxv16i8( %v) #0 ; ; CHECK_BE-LABEL: bitcast_nxv8i16_to_nxv16i8: ; CHECK_BE: // %bb.0: -; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK_BE-NEXT: addvl sp, sp, #-1 ; CHECK_BE-NEXT: ptrue p0.h -; CHECK_BE-NEXT: ptrue p1.b -; CHECK_BE-NEXT: st1h { z0.h }, p0, [sp] -; CHECK_BE-NEXT: ld1b { z0.b }, p1/z, [sp] -; CHECK_BE-NEXT: addvl sp, sp, #1 -; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK_BE-NEXT: revb z0.h, p0/m, z0.h ; CHECK_BE-NEXT: ret %bc = bitcast %v to ret %bc @@ -33,14 +27,8 @@ define @bitcast_nxv4i32_to_nxv16i8( %v) #0 ; ; CHECK_BE-LABEL: bitcast_nxv4i32_to_nxv16i8: ; CHECK_BE: // %bb.0: -; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK_BE-NEXT: addvl sp, sp, #-1 ; CHECK_BE-NEXT: ptrue p0.s -; CHECK_BE-NEXT: ptrue p1.b -; CHECK_BE-NEXT: st1w { z0.s }, p0, [sp] -; CHECK_BE-NEXT: ld1b { z0.b }, p1/z, [sp] -; CHECK_BE-NEXT: addvl sp, sp, #1 -; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK_BE-NEXT: revb z0.s, p0/m, z0.s ; CHECK_BE-NEXT: ret %bc = bitcast %v to ret %bc @@ -53,14 +41,8 @@ define @bitcast_nxv2i64_to_nxv16i8( %v) #0 ; ; CHECK_BE-LABEL: bitcast_nxv2i64_to_nxv16i8: ; CHECK_BE: // %bb.0: -; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK_BE-NEXT: addvl sp, sp, #-1 ; CHECK_BE-NEXT: ptrue p0.d -; CHECK_BE-NEXT: ptrue p1.b -; CHECK_BE-NEXT: st1d { z0.d }, p0, [sp] -; CHECK_BE-NEXT: ld1b { z0.b }, p1/z, [sp] -; CHECK_BE-NEXT: addvl sp, sp, #1 -; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK_BE-NEXT: revb z0.d, p0/m, z0.d ; CHECK_BE-NEXT: ret %bc = bitcast %v to ret %bc @@ -73,14 +55,8 @@ define @bitcast_nxv8f16_to_nxv16i8( %v) #0 ; ; CHECK_BE-LABEL: bitcast_nxv8f16_to_nxv16i8: ; CHECK_BE: // %bb.0: -; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK_BE-NEXT: addvl sp, sp, #-1 ; CHECK_BE-NEXT: ptrue p0.h -; CHECK_BE-NEXT: ptrue p1.b -; CHECK_BE-NEXT: st1h { z0.h }, p0, [sp] -; CHECK_BE-NEXT: ld1b { z0.b }, p1/z, [sp] -; CHECK_BE-NEXT: addvl sp, sp, #1 -; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK_BE-NEXT: revb z0.h, p0/m, z0.h ; CHECK_BE-NEXT: ret %bc = bitcast %v to ret %bc @@ -93,14 +69,8 @@ define @bitcast_nxv4f32_to_nxv16i8( %v) # ; ; CHECK_BE-LABEL: bitcast_nxv4f32_to_nxv16i8: ; CHECK_BE: // %bb.0: -; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK_BE-NEXT: addvl sp, sp, #-1 ; CHECK_BE-NEXT: ptrue p0.s -; CHECK_BE-NEXT: ptrue p1.b -; CHECK_BE-NEXT: st1w { z0.s }, p0, [sp] -; CHECK_BE-NEXT: ld1b { z0.b }, p1/z, [sp] -; CHECK_BE-NEXT: addvl sp, sp, #1 -; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK_BE-NEXT: revb z0.s, p0/m, z0.s ; CHECK_BE-NEXT: ret %bc = bitcast %v to ret %bc @@ -113,14 +83,8 @@ define @bitcast_nxv2f64_to_nxv16i8( %v) ; ; CHECK_BE-LABEL: bitcast_nxv2f64_to_nxv16i8: ; CHECK_BE: // %bb.0: -; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK_BE-NEXT: addvl sp, sp, #-1 ; CHECK_BE-NEXT: ptrue p0.d -; CHECK_BE-NEXT: ptrue p1.b -; CHECK_BE-NEXT: st1d { z0.d }, p0, [sp] -; CHECK_BE-NEXT: ld1b { z0.b }, p1/z, [sp] -; CHECK_BE-NEXT: addvl sp, sp, #1 -; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK_BE-NEXT: revb z0.d, p0/m, z0.d ; CHECK_BE-NEXT: ret %bc = bitcast %v to ret %bc @@ -133,14 +97,8 @@ define @bitcast_nxv8bf16_to_nxv16i8( %v) ; ; CHECK_BE-LABEL: bitcast_nxv8bf16_to_nxv16i8: ; CHECK_BE: // %bb.0: -; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK_BE-NEXT: addvl sp, sp, #-1 ; CHECK_BE-NEXT: ptrue p0.h -; CHECK_BE-NEXT: ptrue p1.b -; CHECK_BE-NEXT: st1h { z0.h }, p0, [sp] -; CHECK_BE-NEXT: ld1b { z0.b }, p1/z, [sp] -; CHECK_BE-NEXT: addvl sp, sp, #1 -; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK_BE-NEXT: revb z0.h, p0/m, z0.h ; CHECK_BE-NEXT: ret %bc = bitcast %v to ret %bc @@ -157,14 +115,8 @@ define @bitcast_nxv16i8_to_nxv8i16( %v) #0 ; ; CHECK_BE-LABEL: bitcast_nxv16i8_to_nxv8i16: ; CHECK_BE: // %bb.0: -; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK_BE-NEXT: addvl sp, sp, #-1 -; CHECK_BE-NEXT: ptrue p0.b -; CHECK_BE-NEXT: ptrue p1.h -; CHECK_BE-NEXT: st1b { z0.b }, p0, [sp] -; CHECK_BE-NEXT: ld1h { z0.h }, p1/z, [sp] -; CHECK_BE-NEXT: addvl sp, sp, #1 -; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK_BE-NEXT: ptrue p0.h +; CHECK_BE-NEXT: revb z0.h, p0/m, z0.h ; CHECK_BE-NEXT: ret %bc = bitcast %v to ret %bc @@ -177,14 +129,10 @@ define @bitcast_nxv4i32_to_nxv8i16( %v) #0 ; ; CHECK_BE-LABEL: bitcast_nxv4i32_to_nxv8i16: ; CHECK_BE: // %bb.0: -; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK_BE-NEXT: addvl sp, sp, #-1 ; CHECK_BE-NEXT: ptrue p0.s -; CHECK_BE-NEXT: ptrue p1.h -; CHECK_BE-NEXT: st1w { z0.s }, p0, [sp] -; CHECK_BE-NEXT: ld1h { z0.h }, p1/z, [sp] -; CHECK_BE-NEXT: addvl sp, sp, #1 -; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK_BE-NEXT: revb z0.s, p0/m, z0.s +; CHECK_BE-NEXT: ptrue p0.h +; CHECK_BE-NEXT: revb z0.h, p0/m, z0.h ; CHECK_BE-NEXT: ret %bc = bitcast %v to ret %bc @@ -197,14 +145,10 @@ define @bitcast_nxv2i64_to_nxv8i16( %v) #0 ; ; CHECK_BE-LABEL: bitcast_nxv2i64_to_nxv8i16: ; CHECK_BE: // %bb.0: -; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK_BE-NEXT: addvl sp, sp, #-1 ; CHECK_BE-NEXT: ptrue p0.d -; CHECK_BE-NEXT: ptrue p1.h -; CHECK_BE-NEXT: st1d { z0.d }, p0, [sp] -; CHECK_BE-NEXT: ld1h { z0.h }, p1/z, [sp] -; CHECK_BE-NEXT: addvl sp, sp, #1 -; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK_BE-NEXT: revb z0.d, p0/m, z0.d +; CHECK_BE-NEXT: ptrue p0.h +; CHECK_BE-NEXT: revb z0.h, p0/m, z0.h ; CHECK_BE-NEXT: ret %bc = bitcast %v to ret %bc @@ -217,13 +161,6 @@ define @bitcast_nxv8f16_to_nxv8i16( %v) #0 ; ; CHECK_BE-LABEL: bitcast_nxv8f16_to_nxv8i16: ; CHECK_BE: // %bb.0: -; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK_BE-NEXT: addvl sp, sp, #-1 -; CHECK_BE-NEXT: ptrue p0.h -; CHECK_BE-NEXT: st1h { z0.h }, p0, [sp] -; CHECK_BE-NEXT: ld1h { z0.h }, p0/z, [sp] -; CHECK_BE-NEXT: addvl sp, sp, #1 -; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK_BE-NEXT: ret %bc = bitcast %v to ret %bc @@ -236,14 +173,10 @@ define @bitcast_nxv4f32_to_nxv8i16( %v) # ; ; CHECK_BE-LABEL: bitcast_nxv4f32_to_nxv8i16: ; CHECK_BE: // %bb.0: -; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK_BE-NEXT: addvl sp, sp, #-1 ; CHECK_BE-NEXT: ptrue p0.s -; CHECK_BE-NEXT: ptrue p1.h -; CHECK_BE-NEXT: st1w { z0.s }, p0, [sp] -; CHECK_BE-NEXT: ld1h { z0.h }, p1/z, [sp] -; CHECK_BE-NEXT: addvl sp, sp, #1 -; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK_BE-NEXT: revb z0.s, p0/m, z0.s +; CHECK_BE-NEXT: ptrue p0.h +; CHECK_BE-NEXT: revb z0.h, p0/m, z0.h ; CHECK_BE-NEXT: ret %bc = bitcast %v to ret %bc @@ -256,14 +189,10 @@ define @bitcast_nxv2f64_to_nxv8i16( %v) ; ; CHECK_BE-LABEL: bitcast_nxv2f64_to_nxv8i16: ; CHECK_BE: // %bb.0: -; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK_BE-NEXT: addvl sp, sp, #-1 ; CHECK_BE-NEXT: ptrue p0.d -; CHECK_BE-NEXT: ptrue p1.h -; CHECK_BE-NEXT: st1d { z0.d }, p0, [sp] -; CHECK_BE-NEXT: ld1h { z0.h }, p1/z, [sp] -; CHECK_BE-NEXT: addvl sp, sp, #1 -; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK_BE-NEXT: revb z0.d, p0/m, z0.d +; CHECK_BE-NEXT: ptrue p0.h +; CHECK_BE-NEXT: revb z0.h, p0/m, z0.h ; CHECK_BE-NEXT: ret %bc = bitcast %v to ret %bc @@ -276,13 +205,6 @@ define @bitcast_nxv8bf16_to_nxv8i16( %v) ; ; CHECK_BE-LABEL: bitcast_nxv8bf16_to_nxv8i16: ; CHECK_BE: // %bb.0: -; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK_BE-NEXT: addvl sp, sp, #-1 -; CHECK_BE-NEXT: ptrue p0.h -; CHECK_BE-NEXT: st1h { z0.h }, p0, [sp] -; CHECK_BE-NEXT: ld1h { z0.h }, p0/z, [sp] -; CHECK_BE-NEXT: addvl sp, sp, #1 -; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK_BE-NEXT: ret %bc = bitcast %v to ret %bc @@ -299,14 +221,8 @@ define @bitcast_nxv16i8_to_nxv4i32( %v) #0 ; ; CHECK_BE-LABEL: bitcast_nxv16i8_to_nxv4i32: ; CHECK_BE: // %bb.0: -; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK_BE-NEXT: addvl sp, sp, #-1 -; CHECK_BE-NEXT: ptrue p0.b -; CHECK_BE-NEXT: ptrue p1.s -; CHECK_BE-NEXT: st1b { z0.b }, p0, [sp] -; CHECK_BE-NEXT: ld1w { z0.s }, p1/z, [sp] -; CHECK_BE-NEXT: addvl sp, sp, #1 -; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK_BE-NEXT: ptrue p0.s +; CHECK_BE-NEXT: revb z0.s, p0/m, z0.s ; CHECK_BE-NEXT: ret %bc = bitcast %v to ret %bc @@ -319,14 +235,10 @@ define @bitcast_nxv8i16_to_nxv4i32( %v) #0 ; ; CHECK_BE-LABEL: bitcast_nxv8i16_to_nxv4i32: ; CHECK_BE: // %bb.0: -; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK_BE-NEXT: addvl sp, sp, #-1 ; CHECK_BE-NEXT: ptrue p0.h -; CHECK_BE-NEXT: ptrue p1.s -; CHECK_BE-NEXT: st1h { z0.h }, p0, [sp] -; CHECK_BE-NEXT: ld1w { z0.s }, p1/z, [sp] -; CHECK_BE-NEXT: addvl sp, sp, #1 -; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK_BE-NEXT: revb z0.h, p0/m, z0.h +; CHECK_BE-NEXT: ptrue p0.s +; CHECK_BE-NEXT: revb z0.s, p0/m, z0.s ; CHECK_BE-NEXT: ret %bc = bitcast %v to ret %bc @@ -339,14 +251,10 @@ define @bitcast_nxv2i64_to_nxv4i32( %v) #0 ; ; CHECK_BE-LABEL: bitcast_nxv2i64_to_nxv4i32: ; CHECK_BE: // %bb.0: -; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK_BE-NEXT: addvl sp, sp, #-1 ; CHECK_BE-NEXT: ptrue p0.d -; CHECK_BE-NEXT: ptrue p1.s -; CHECK_BE-NEXT: st1d { z0.d }, p0, [sp] -; CHECK_BE-NEXT: ld1w { z0.s }, p1/z, [sp] -; CHECK_BE-NEXT: addvl sp, sp, #1 -; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK_BE-NEXT: revb z0.d, p0/m, z0.d +; CHECK_BE-NEXT: ptrue p0.s +; CHECK_BE-NEXT: revb z0.s, p0/m, z0.s ; CHECK_BE-NEXT: ret %bc = bitcast %v to ret %bc @@ -359,14 +267,10 @@ define @bitcast_nxv8f16_to_nxv4i32( %v) #0 ; ; CHECK_BE-LABEL: bitcast_nxv8f16_to_nxv4i32: ; CHECK_BE: // %bb.0: -; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK_BE-NEXT: addvl sp, sp, #-1 ; CHECK_BE-NEXT: ptrue p0.h -; CHECK_BE-NEXT: ptrue p1.s -; CHECK_BE-NEXT: st1h { z0.h }, p0, [sp] -; CHECK_BE-NEXT: ld1w { z0.s }, p1/z, [sp] -; CHECK_BE-NEXT: addvl sp, sp, #1 -; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK_BE-NEXT: revb z0.h, p0/m, z0.h +; CHECK_BE-NEXT: ptrue p0.s +; CHECK_BE-NEXT: revb z0.s, p0/m, z0.s ; CHECK_BE-NEXT: ret %bc = bitcast %v to ret %bc @@ -379,13 +283,6 @@ define @bitcast_nxv4f32_to_nxv4i32( %v) # ; ; CHECK_BE-LABEL: bitcast_nxv4f32_to_nxv4i32: ; CHECK_BE: // %bb.0: -; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK_BE-NEXT: addvl sp, sp, #-1 -; CHECK_BE-NEXT: ptrue p0.s -; CHECK_BE-NEXT: st1w { z0.s }, p0, [sp] -; CHECK_BE-NEXT: ld1w { z0.s }, p0/z, [sp] -; CHECK_BE-NEXT: addvl sp, sp, #1 -; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK_BE-NEXT: ret %bc = bitcast %v to ret %bc @@ -398,14 +295,10 @@ define @bitcast_nxv2f64_to_nxv4i32( %v) ; ; CHECK_BE-LABEL: bitcast_nxv2f64_to_nxv4i32: ; CHECK_BE: // %bb.0: -; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK_BE-NEXT: addvl sp, sp, #-1 ; CHECK_BE-NEXT: ptrue p0.d -; CHECK_BE-NEXT: ptrue p1.s -; CHECK_BE-NEXT: st1d { z0.d }, p0, [sp] -; CHECK_BE-NEXT: ld1w { z0.s }, p1/z, [sp] -; CHECK_BE-NEXT: addvl sp, sp, #1 -; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK_BE-NEXT: revb z0.d, p0/m, z0.d +; CHECK_BE-NEXT: ptrue p0.s +; CHECK_BE-NEXT: revb z0.s, p0/m, z0.s ; CHECK_BE-NEXT: ret %bc = bitcast %v to ret %bc @@ -418,14 +311,10 @@ define @bitcast_nxv8bf16_to_nxv4i32( %v) ; ; CHECK_BE-LABEL: bitcast_nxv8bf16_to_nxv4i32: ; CHECK_BE: // %bb.0: -; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK_BE-NEXT: addvl sp, sp, #-1 ; CHECK_BE-NEXT: ptrue p0.h -; CHECK_BE-NEXT: ptrue p1.s -; CHECK_BE-NEXT: st1h { z0.h }, p0, [sp] -; CHECK_BE-NEXT: ld1w { z0.s }, p1/z, [sp] -; CHECK_BE-NEXT: addvl sp, sp, #1 -; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK_BE-NEXT: revb z0.h, p0/m, z0.h +; CHECK_BE-NEXT: ptrue p0.s +; CHECK_BE-NEXT: revb z0.s, p0/m, z0.s ; CHECK_BE-NEXT: ret %bc = bitcast %v to ret %bc @@ -442,14 +331,8 @@ define @bitcast_nxv16i8_to_nxv2i64( %v) #0 ; ; CHECK_BE-LABEL: bitcast_nxv16i8_to_nxv2i64: ; CHECK_BE: // %bb.0: -; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK_BE-NEXT: addvl sp, sp, #-1 -; CHECK_BE-NEXT: ptrue p0.b -; CHECK_BE-NEXT: ptrue p1.d -; CHECK_BE-NEXT: st1b { z0.b }, p0, [sp] -; CHECK_BE-NEXT: ld1d { z0.d }, p1/z, [sp] -; CHECK_BE-NEXT: addvl sp, sp, #1 -; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK_BE-NEXT: ptrue p0.d +; CHECK_BE-NEXT: revb z0.d, p0/m, z0.d ; CHECK_BE-NEXT: ret %bc = bitcast %v to ret %bc @@ -462,14 +345,10 @@ define @bitcast_nxv8i16_to_nxv2i64( %v) #0 ; ; CHECK_BE-LABEL: bitcast_nxv8i16_to_nxv2i64: ; CHECK_BE: // %bb.0: -; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK_BE-NEXT: addvl sp, sp, #-1 ; CHECK_BE-NEXT: ptrue p0.h -; CHECK_BE-NEXT: ptrue p1.d -; CHECK_BE-NEXT: st1h { z0.h }, p0, [sp] -; CHECK_BE-NEXT: ld1d { z0.d }, p1/z, [sp] -; CHECK_BE-NEXT: addvl sp, sp, #1 -; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK_BE-NEXT: revb z0.h, p0/m, z0.h +; CHECK_BE-NEXT: ptrue p0.d +; CHECK_BE-NEXT: revb z0.d, p0/m, z0.d ; CHECK_BE-NEXT: ret %bc = bitcast %v to ret %bc @@ -482,14 +361,10 @@ define @bitcast_nxv4i32_to_nxv2i64( %v) #0 ; ; CHECK_BE-LABEL: bitcast_nxv4i32_to_nxv2i64: ; CHECK_BE: // %bb.0: -; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK_BE-NEXT: addvl sp, sp, #-1 ; CHECK_BE-NEXT: ptrue p0.s -; CHECK_BE-NEXT: ptrue p1.d -; CHECK_BE-NEXT: st1w { z0.s }, p0, [sp] -; CHECK_BE-NEXT: ld1d { z0.d }, p1/z, [sp] -; CHECK_BE-NEXT: addvl sp, sp, #1 -; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK_BE-NEXT: revb z0.s, p0/m, z0.s +; CHECK_BE-NEXT: ptrue p0.d +; CHECK_BE-NEXT: revb z0.d, p0/m, z0.d ; CHECK_BE-NEXT: ret %bc = bitcast %v to ret %bc @@ -502,14 +377,10 @@ define @bitcast_nxv8f16_to_nxv2i64( %v) #0 ; ; CHECK_BE-LABEL: bitcast_nxv8f16_to_nxv2i64: ; CHECK_BE: // %bb.0: -; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK_BE-NEXT: addvl sp, sp, #-1 ; CHECK_BE-NEXT: ptrue p0.h -; CHECK_BE-NEXT: ptrue p1.d -; CHECK_BE-NEXT: st1h { z0.h }, p0, [sp] -; CHECK_BE-NEXT: ld1d { z0.d }, p1/z, [sp] -; CHECK_BE-NEXT: addvl sp, sp, #1 -; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK_BE-NEXT: revb z0.h, p0/m, z0.h +; CHECK_BE-NEXT: ptrue p0.d +; CHECK_BE-NEXT: revb z0.d, p0/m, z0.d ; CHECK_BE-NEXT: ret %bc = bitcast %v to ret %bc @@ -522,14 +393,10 @@ define @bitcast_nxv4f32_to_nxv2i64( %v) # ; ; CHECK_BE-LABEL: bitcast_nxv4f32_to_nxv2i64: ; CHECK_BE: // %bb.0: -; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK_BE-NEXT: addvl sp, sp, #-1 ; CHECK_BE-NEXT: ptrue p0.s -; CHECK_BE-NEXT: ptrue p1.d -; CHECK_BE-NEXT: st1w { z0.s }, p0, [sp] -; CHECK_BE-NEXT: ld1d { z0.d }, p1/z, [sp] -; CHECK_BE-NEXT: addvl sp, sp, #1 -; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK_BE-NEXT: revb z0.s, p0/m, z0.s +; CHECK_BE-NEXT: ptrue p0.d +; CHECK_BE-NEXT: revb z0.d, p0/m, z0.d ; CHECK_BE-NEXT: ret %bc = bitcast %v to ret %bc @@ -542,13 +409,6 @@ define @bitcast_nxv2f64_to_nxv2i64( %v) ; ; CHECK_BE-LABEL: bitcast_nxv2f64_to_nxv2i64: ; CHECK_BE: // %bb.0: -; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK_BE-NEXT: addvl sp, sp, #-1 -; CHECK_BE-NEXT: ptrue p0.d -; CHECK_BE-NEXT: st1d { z0.d }, p0, [sp] -; CHECK_BE-NEXT: ld1d { z0.d }, p0/z, [sp] -; CHECK_BE-NEXT: addvl sp, sp, #1 -; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK_BE-NEXT: ret %bc = bitcast %v to ret %bc @@ -561,14 +421,10 @@ define @bitcast_nxv8bf16_to_nxv2i64( %v) ; ; CHECK_BE-LABEL: bitcast_nxv8bf16_to_nxv2i64: ; CHECK_BE: // %bb.0: -; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK_BE-NEXT: addvl sp, sp, #-1 ; CHECK_BE-NEXT: ptrue p0.h -; CHECK_BE-NEXT: ptrue p1.d -; CHECK_BE-NEXT: st1h { z0.h }, p0, [sp] -; CHECK_BE-NEXT: ld1d { z0.d }, p1/z, [sp] -; CHECK_BE-NEXT: addvl sp, sp, #1 -; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK_BE-NEXT: revb z0.h, p0/m, z0.h +; CHECK_BE-NEXT: ptrue p0.d +; CHECK_BE-NEXT: revb z0.d, p0/m, z0.d ; CHECK_BE-NEXT: ret %bc = bitcast %v to ret %bc @@ -585,14 +441,8 @@ define @bitcast_nxv16i8_to_nxv8f16( %v) #0 ; ; CHECK_BE-LABEL: bitcast_nxv16i8_to_nxv8f16: ; CHECK_BE: // %bb.0: -; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK_BE-NEXT: addvl sp, sp, #-1 -; CHECK_BE-NEXT: ptrue p0.b -; CHECK_BE-NEXT: ptrue p1.h -; CHECK_BE-NEXT: st1b { z0.b }, p0, [sp] -; CHECK_BE-NEXT: ld1h { z0.h }, p1/z, [sp] -; CHECK_BE-NEXT: addvl sp, sp, #1 -; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK_BE-NEXT: ptrue p0.h +; CHECK_BE-NEXT: revb z0.h, p0/m, z0.h ; CHECK_BE-NEXT: ret %bc = bitcast %v to ret %bc @@ -605,13 +455,6 @@ define @bitcast_nxv8i16_to_nxv8f16( %v) #0 ; ; CHECK_BE-LABEL: bitcast_nxv8i16_to_nxv8f16: ; CHECK_BE: // %bb.0: -; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK_BE-NEXT: addvl sp, sp, #-1 -; CHECK_BE-NEXT: ptrue p0.h -; CHECK_BE-NEXT: st1h { z0.h }, p0, [sp] -; CHECK_BE-NEXT: ld1h { z0.h }, p0/z, [sp] -; CHECK_BE-NEXT: addvl sp, sp, #1 -; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK_BE-NEXT: ret %bc = bitcast %v to ret %bc @@ -624,14 +467,10 @@ define @bitcast_nxv4i32_to_nxv8f16( %v) #0 ; ; CHECK_BE-LABEL: bitcast_nxv4i32_to_nxv8f16: ; CHECK_BE: // %bb.0: -; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK_BE-NEXT: addvl sp, sp, #-1 ; CHECK_BE-NEXT: ptrue p0.s -; CHECK_BE-NEXT: ptrue p1.h -; CHECK_BE-NEXT: st1w { z0.s }, p0, [sp] -; CHECK_BE-NEXT: ld1h { z0.h }, p1/z, [sp] -; CHECK_BE-NEXT: addvl sp, sp, #1 -; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK_BE-NEXT: revb z0.s, p0/m, z0.s +; CHECK_BE-NEXT: ptrue p0.h +; CHECK_BE-NEXT: revb z0.h, p0/m, z0.h ; CHECK_BE-NEXT: ret %bc = bitcast %v to ret %bc @@ -644,14 +483,10 @@ define @bitcast_nxv2i64_to_nxv8f16( %v) #0 ; ; CHECK_BE-LABEL: bitcast_nxv2i64_to_nxv8f16: ; CHECK_BE: // %bb.0: -; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK_BE-NEXT: addvl sp, sp, #-1 ; CHECK_BE-NEXT: ptrue p0.d -; CHECK_BE-NEXT: ptrue p1.h -; CHECK_BE-NEXT: st1d { z0.d }, p0, [sp] -; CHECK_BE-NEXT: ld1h { z0.h }, p1/z, [sp] -; CHECK_BE-NEXT: addvl sp, sp, #1 -; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK_BE-NEXT: revb z0.d, p0/m, z0.d +; CHECK_BE-NEXT: ptrue p0.h +; CHECK_BE-NEXT: revb z0.h, p0/m, z0.h ; CHECK_BE-NEXT: ret %bc = bitcast %v to ret %bc @@ -664,14 +499,10 @@ define @bitcast_nxv4f32_to_nxv8f16( %v) ; ; CHECK_BE-LABEL: bitcast_nxv4f32_to_nxv8f16: ; CHECK_BE: // %bb.0: -; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK_BE-NEXT: addvl sp, sp, #-1 ; CHECK_BE-NEXT: ptrue p0.s -; CHECK_BE-NEXT: ptrue p1.h -; CHECK_BE-NEXT: st1w { z0.s }, p0, [sp] -; CHECK_BE-NEXT: ld1h { z0.h }, p1/z, [sp] -; CHECK_BE-NEXT: addvl sp, sp, #1 -; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK_BE-NEXT: revb z0.s, p0/m, z0.s +; CHECK_BE-NEXT: ptrue p0.h +; CHECK_BE-NEXT: revb z0.h, p0/m, z0.h ; CHECK_BE-NEXT: ret %bc = bitcast %v to ret %bc @@ -684,14 +515,10 @@ define @bitcast_nxv2f64_to_nxv8f16( %v) ; ; CHECK_BE-LABEL: bitcast_nxv2f64_to_nxv8f16: ; CHECK_BE: // %bb.0: -; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK_BE-NEXT: addvl sp, sp, #-1 ; CHECK_BE-NEXT: ptrue p0.d -; CHECK_BE-NEXT: ptrue p1.h -; CHECK_BE-NEXT: st1d { z0.d }, p0, [sp] -; CHECK_BE-NEXT: ld1h { z0.h }, p1/z, [sp] -; CHECK_BE-NEXT: addvl sp, sp, #1 -; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK_BE-NEXT: revb z0.d, p0/m, z0.d +; CHECK_BE-NEXT: ptrue p0.h +; CHECK_BE-NEXT: revb z0.h, p0/m, z0.h ; CHECK_BE-NEXT: ret %bc = bitcast %v to ret %bc @@ -704,13 +531,6 @@ define @bitcast_nxv8bf16_to_nxv8f16( %v ; ; CHECK_BE-LABEL: bitcast_nxv8bf16_to_nxv8f16: ; CHECK_BE: // %bb.0: -; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK_BE-NEXT: addvl sp, sp, #-1 -; CHECK_BE-NEXT: ptrue p0.h -; CHECK_BE-NEXT: st1h { z0.h }, p0, [sp] -; CHECK_BE-NEXT: ld1h { z0.h }, p0/z, [sp] -; CHECK_BE-NEXT: addvl sp, sp, #1 -; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK_BE-NEXT: ret %bc = bitcast %v to ret %bc @@ -727,14 +547,8 @@ define @bitcast_nxv16i8_to_nxv4f32( %v) # ; ; CHECK_BE-LABEL: bitcast_nxv16i8_to_nxv4f32: ; CHECK_BE: // %bb.0: -; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK_BE-NEXT: addvl sp, sp, #-1 -; CHECK_BE-NEXT: ptrue p0.b -; CHECK_BE-NEXT: ptrue p1.s -; CHECK_BE-NEXT: st1b { z0.b }, p0, [sp] -; CHECK_BE-NEXT: ld1w { z0.s }, p1/z, [sp] -; CHECK_BE-NEXT: addvl sp, sp, #1 -; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK_BE-NEXT: ptrue p0.s +; CHECK_BE-NEXT: revb z0.s, p0/m, z0.s ; CHECK_BE-NEXT: ret %bc = bitcast %v to ret %bc @@ -747,14 +561,10 @@ define @bitcast_nxv8i16_to_nxv4f32( %v) # ; ; CHECK_BE-LABEL: bitcast_nxv8i16_to_nxv4f32: ; CHECK_BE: // %bb.0: -; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK_BE-NEXT: addvl sp, sp, #-1 ; CHECK_BE-NEXT: ptrue p0.h -; CHECK_BE-NEXT: ptrue p1.s -; CHECK_BE-NEXT: st1h { z0.h }, p0, [sp] -; CHECK_BE-NEXT: ld1w { z0.s }, p1/z, [sp] -; CHECK_BE-NEXT: addvl sp, sp, #1 -; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK_BE-NEXT: revb z0.h, p0/m, z0.h +; CHECK_BE-NEXT: ptrue p0.s +; CHECK_BE-NEXT: revb z0.s, p0/m, z0.s ; CHECK_BE-NEXT: ret %bc = bitcast %v to ret %bc @@ -767,13 +577,6 @@ define @bitcast_nxv4i32_to_nxv4f32( %v) # ; ; CHECK_BE-LABEL: bitcast_nxv4i32_to_nxv4f32: ; CHECK_BE: // %bb.0: -; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK_BE-NEXT: addvl sp, sp, #-1 -; CHECK_BE-NEXT: ptrue p0.s -; CHECK_BE-NEXT: st1w { z0.s }, p0, [sp] -; CHECK_BE-NEXT: ld1w { z0.s }, p0/z, [sp] -; CHECK_BE-NEXT: addvl sp, sp, #1 -; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK_BE-NEXT: ret %bc = bitcast %v to ret %bc @@ -786,14 +589,10 @@ define @bitcast_nxv2i64_to_nxv4f32( %v) # ; ; CHECK_BE-LABEL: bitcast_nxv2i64_to_nxv4f32: ; CHECK_BE: // %bb.0: -; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK_BE-NEXT: addvl sp, sp, #-1 ; CHECK_BE-NEXT: ptrue p0.d -; CHECK_BE-NEXT: ptrue p1.s -; CHECK_BE-NEXT: st1d { z0.d }, p0, [sp] -; CHECK_BE-NEXT: ld1w { z0.s }, p1/z, [sp] -; CHECK_BE-NEXT: addvl sp, sp, #1 -; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK_BE-NEXT: revb z0.d, p0/m, z0.d +; CHECK_BE-NEXT: ptrue p0.s +; CHECK_BE-NEXT: revb z0.s, p0/m, z0.s ; CHECK_BE-NEXT: ret %bc = bitcast %v to ret %bc @@ -806,14 +605,10 @@ define @bitcast_nxv8f16_to_nxv4f32( %v) ; ; CHECK_BE-LABEL: bitcast_nxv8f16_to_nxv4f32: ; CHECK_BE: // %bb.0: -; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK_BE-NEXT: addvl sp, sp, #-1 ; CHECK_BE-NEXT: ptrue p0.h -; CHECK_BE-NEXT: ptrue p1.s -; CHECK_BE-NEXT: st1h { z0.h }, p0, [sp] -; CHECK_BE-NEXT: ld1w { z0.s }, p1/z, [sp] -; CHECK_BE-NEXT: addvl sp, sp, #1 -; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK_BE-NEXT: revb z0.h, p0/m, z0.h +; CHECK_BE-NEXT: ptrue p0.s +; CHECK_BE-NEXT: revb z0.s, p0/m, z0.s ; CHECK_BE-NEXT: ret %bc = bitcast %v to ret %bc @@ -826,14 +621,10 @@ define @bitcast_nxv2f64_to_nxv4f32( %v ; ; CHECK_BE-LABEL: bitcast_nxv2f64_to_nxv4f32: ; CHECK_BE: // %bb.0: -; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK_BE-NEXT: addvl sp, sp, #-1 ; CHECK_BE-NEXT: ptrue p0.d -; CHECK_BE-NEXT: ptrue p1.s -; CHECK_BE-NEXT: st1d { z0.d }, p0, [sp] -; CHECK_BE-NEXT: ld1w { z0.s }, p1/z, [sp] -; CHECK_BE-NEXT: addvl sp, sp, #1 -; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK_BE-NEXT: revb z0.d, p0/m, z0.d +; CHECK_BE-NEXT: ptrue p0.s +; CHECK_BE-NEXT: revb z0.s, p0/m, z0.s ; CHECK_BE-NEXT: ret %bc = bitcast %v to ret %bc @@ -846,14 +637,10 @@ define @bitcast_nxv8bf16_to_nxv4f32( % ; ; CHECK_BE-LABEL: bitcast_nxv8bf16_to_nxv4f32: ; CHECK_BE: // %bb.0: -; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK_BE-NEXT: addvl sp, sp, #-1 ; CHECK_BE-NEXT: ptrue p0.h -; CHECK_BE-NEXT: ptrue p1.s -; CHECK_BE-NEXT: st1h { z0.h }, p0, [sp] -; CHECK_BE-NEXT: ld1w { z0.s }, p1/z, [sp] -; CHECK_BE-NEXT: addvl sp, sp, #1 -; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK_BE-NEXT: revb z0.h, p0/m, z0.h +; CHECK_BE-NEXT: ptrue p0.s +; CHECK_BE-NEXT: revb z0.s, p0/m, z0.s ; CHECK_BE-NEXT: ret %bc = bitcast %v to ret %bc @@ -870,14 +657,8 @@ define @bitcast_nxv16i8_to_nxv2f64( %v) ; ; CHECK_BE-LABEL: bitcast_nxv16i8_to_nxv2f64: ; CHECK_BE: // %bb.0: -; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK_BE-NEXT: addvl sp, sp, #-1 -; CHECK_BE-NEXT: ptrue p0.b -; CHECK_BE-NEXT: ptrue p1.d -; CHECK_BE-NEXT: st1b { z0.b }, p0, [sp] -; CHECK_BE-NEXT: ld1d { z0.d }, p1/z, [sp] -; CHECK_BE-NEXT: addvl sp, sp, #1 -; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK_BE-NEXT: ptrue p0.d +; CHECK_BE-NEXT: revb z0.d, p0/m, z0.d ; CHECK_BE-NEXT: ret %bc = bitcast %v to ret %bc @@ -890,14 +671,10 @@ define @bitcast_nxv8i16_to_nxv2f64( %v) ; ; CHECK_BE-LABEL: bitcast_nxv8i16_to_nxv2f64: ; CHECK_BE: // %bb.0: -; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK_BE-NEXT: addvl sp, sp, #-1 ; CHECK_BE-NEXT: ptrue p0.h -; CHECK_BE-NEXT: ptrue p1.d -; CHECK_BE-NEXT: st1h { z0.h }, p0, [sp] -; CHECK_BE-NEXT: ld1d { z0.d }, p1/z, [sp] -; CHECK_BE-NEXT: addvl sp, sp, #1 -; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK_BE-NEXT: revb z0.h, p0/m, z0.h +; CHECK_BE-NEXT: ptrue p0.d +; CHECK_BE-NEXT: revb z0.d, p0/m, z0.d ; CHECK_BE-NEXT: ret %bc = bitcast %v to ret %bc @@ -910,14 +687,10 @@ define @bitcast_nxv4i32_to_nxv2f64( %v) ; ; CHECK_BE-LABEL: bitcast_nxv4i32_to_nxv2f64: ; CHECK_BE: // %bb.0: -; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK_BE-NEXT: addvl sp, sp, #-1 ; CHECK_BE-NEXT: ptrue p0.s -; CHECK_BE-NEXT: ptrue p1.d -; CHECK_BE-NEXT: st1w { z0.s }, p0, [sp] -; CHECK_BE-NEXT: ld1d { z0.d }, p1/z, [sp] -; CHECK_BE-NEXT: addvl sp, sp, #1 -; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK_BE-NEXT: revb z0.s, p0/m, z0.s +; CHECK_BE-NEXT: ptrue p0.d +; CHECK_BE-NEXT: revb z0.d, p0/m, z0.d ; CHECK_BE-NEXT: ret %bc = bitcast %v to ret %bc @@ -930,13 +703,6 @@ define @bitcast_nxv2i64_to_nxv2f64( %v) ; ; CHECK_BE-LABEL: bitcast_nxv2i64_to_nxv2f64: ; CHECK_BE: // %bb.0: -; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK_BE-NEXT: addvl sp, sp, #-1 -; CHECK_BE-NEXT: ptrue p0.d -; CHECK_BE-NEXT: st1d { z0.d }, p0, [sp] -; CHECK_BE-NEXT: ld1d { z0.d }, p0/z, [sp] -; CHECK_BE-NEXT: addvl sp, sp, #1 -; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK_BE-NEXT: ret %bc = bitcast %v to ret %bc @@ -949,14 +715,10 @@ define @bitcast_nxv8f16_to_nxv2f64( %v) ; ; CHECK_BE-LABEL: bitcast_nxv8f16_to_nxv2f64: ; CHECK_BE: // %bb.0: -; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK_BE-NEXT: addvl sp, sp, #-1 ; CHECK_BE-NEXT: ptrue p0.h -; CHECK_BE-NEXT: ptrue p1.d -; CHECK_BE-NEXT: st1h { z0.h }, p0, [sp] -; CHECK_BE-NEXT: ld1d { z0.d }, p1/z, [sp] -; CHECK_BE-NEXT: addvl sp, sp, #1 -; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK_BE-NEXT: revb z0.h, p0/m, z0.h +; CHECK_BE-NEXT: ptrue p0.d +; CHECK_BE-NEXT: revb z0.d, p0/m, z0.d ; CHECK_BE-NEXT: ret %bc = bitcast %v to ret %bc @@ -969,14 +731,10 @@ define @bitcast_nxv4f32_to_nxv2f64( %v ; ; CHECK_BE-LABEL: bitcast_nxv4f32_to_nxv2f64: ; CHECK_BE: // %bb.0: -; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK_BE-NEXT: addvl sp, sp, #-1 ; CHECK_BE-NEXT: ptrue p0.s -; CHECK_BE-NEXT: ptrue p1.d -; CHECK_BE-NEXT: st1w { z0.s }, p0, [sp] -; CHECK_BE-NEXT: ld1d { z0.d }, p1/z, [sp] -; CHECK_BE-NEXT: addvl sp, sp, #1 -; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK_BE-NEXT: revb z0.s, p0/m, z0.s +; CHECK_BE-NEXT: ptrue p0.d +; CHECK_BE-NEXT: revb z0.d, p0/m, z0.d ; CHECK_BE-NEXT: ret %bc = bitcast %v to ret %bc @@ -989,14 +747,10 @@ define @bitcast_nxv8bf16_to_nxv2f64( ; ; CHECK_BE-LABEL: bitcast_nxv8bf16_to_nxv2f64: ; CHECK_BE: // %bb.0: -; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK_BE-NEXT: addvl sp, sp, #-1 ; CHECK_BE-NEXT: ptrue p0.h -; CHECK_BE-NEXT: ptrue p1.d -; CHECK_BE-NEXT: st1h { z0.h }, p0, [sp] -; CHECK_BE-NEXT: ld1d { z0.d }, p1/z, [sp] -; CHECK_BE-NEXT: addvl sp, sp, #1 -; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK_BE-NEXT: revb z0.h, p0/m, z0.h +; CHECK_BE-NEXT: ptrue p0.d +; CHECK_BE-NEXT: revb z0.d, p0/m, z0.d ; CHECK_BE-NEXT: ret %bc = bitcast %v to ret %bc @@ -1013,14 +767,8 @@ define @bitcast_nxv16i8_to_nxv8bf16( %v) ; ; CHECK_BE-LABEL: bitcast_nxv16i8_to_nxv8bf16: ; CHECK_BE: // %bb.0: -; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK_BE-NEXT: addvl sp, sp, #-1 -; CHECK_BE-NEXT: ptrue p0.b -; CHECK_BE-NEXT: ptrue p1.h -; CHECK_BE-NEXT: st1b { z0.b }, p0, [sp] -; CHECK_BE-NEXT: ld1h { z0.h }, p1/z, [sp] -; CHECK_BE-NEXT: addvl sp, sp, #1 -; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK_BE-NEXT: ptrue p0.h +; CHECK_BE-NEXT: revb z0.h, p0/m, z0.h ; CHECK_BE-NEXT: ret %bc = bitcast %v to ret %bc @@ -1033,13 +781,6 @@ define @bitcast_nxv8i16_to_nxv8bf16( %v) ; ; CHECK_BE-LABEL: bitcast_nxv8i16_to_nxv8bf16: ; CHECK_BE: // %bb.0: -; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK_BE-NEXT: addvl sp, sp, #-1 -; CHECK_BE-NEXT: ptrue p0.h -; CHECK_BE-NEXT: st1h { z0.h }, p0, [sp] -; CHECK_BE-NEXT: ld1h { z0.h }, p0/z, [sp] -; CHECK_BE-NEXT: addvl sp, sp, #1 -; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK_BE-NEXT: ret %bc = bitcast %v to ret %bc @@ -1052,14 +793,10 @@ define @bitcast_nxv4i32_to_nxv8bf16( %v) ; ; CHECK_BE-LABEL: bitcast_nxv4i32_to_nxv8bf16: ; CHECK_BE: // %bb.0: -; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK_BE-NEXT: addvl sp, sp, #-1 ; CHECK_BE-NEXT: ptrue p0.s -; CHECK_BE-NEXT: ptrue p1.h -; CHECK_BE-NEXT: st1w { z0.s }, p0, [sp] -; CHECK_BE-NEXT: ld1h { z0.h }, p1/z, [sp] -; CHECK_BE-NEXT: addvl sp, sp, #1 -; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK_BE-NEXT: revb z0.s, p0/m, z0.s +; CHECK_BE-NEXT: ptrue p0.h +; CHECK_BE-NEXT: revb z0.h, p0/m, z0.h ; CHECK_BE-NEXT: ret %bc = bitcast %v to ret %bc @@ -1072,14 +809,10 @@ define @bitcast_nxv2i64_to_nxv8bf16( %v) ; ; CHECK_BE-LABEL: bitcast_nxv2i64_to_nxv8bf16: ; CHECK_BE: // %bb.0: -; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK_BE-NEXT: addvl sp, sp, #-1 ; CHECK_BE-NEXT: ptrue p0.d -; CHECK_BE-NEXT: ptrue p1.h -; CHECK_BE-NEXT: st1d { z0.d }, p0, [sp] -; CHECK_BE-NEXT: ld1h { z0.h }, p1/z, [sp] -; CHECK_BE-NEXT: addvl sp, sp, #1 -; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK_BE-NEXT: revb z0.d, p0/m, z0.d +; CHECK_BE-NEXT: ptrue p0.h +; CHECK_BE-NEXT: revb z0.h, p0/m, z0.h ; CHECK_BE-NEXT: ret %bc = bitcast %v to ret %bc @@ -1092,13 +825,6 @@ define @bitcast_nxv8f16_to_nxv8bf16( %v ; ; CHECK_BE-LABEL: bitcast_nxv8f16_to_nxv8bf16: ; CHECK_BE: // %bb.0: -; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK_BE-NEXT: addvl sp, sp, #-1 -; CHECK_BE-NEXT: ptrue p0.h -; CHECK_BE-NEXT: st1h { z0.h }, p0, [sp] -; CHECK_BE-NEXT: ld1h { z0.h }, p0/z, [sp] -; CHECK_BE-NEXT: addvl sp, sp, #1 -; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK_BE-NEXT: ret %bc = bitcast %v to ret %bc @@ -1111,14 +837,10 @@ define @bitcast_nxv4f32_to_nxv8bf16( % ; ; CHECK_BE-LABEL: bitcast_nxv4f32_to_nxv8bf16: ; CHECK_BE: // %bb.0: -; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK_BE-NEXT: addvl sp, sp, #-1 ; CHECK_BE-NEXT: ptrue p0.s -; CHECK_BE-NEXT: ptrue p1.h -; CHECK_BE-NEXT: st1w { z0.s }, p0, [sp] -; CHECK_BE-NEXT: ld1h { z0.h }, p1/z, [sp] -; CHECK_BE-NEXT: addvl sp, sp, #1 -; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK_BE-NEXT: revb z0.s, p0/m, z0.s +; CHECK_BE-NEXT: ptrue p0.h +; CHECK_BE-NEXT: revb z0.h, p0/m, z0.h ; CHECK_BE-NEXT: ret %bc = bitcast %v to ret %bc @@ -1131,14 +853,10 @@ define @bitcast_nxv2f64_to_nxv8bf16( ; ; CHECK_BE-LABEL: bitcast_nxv2f64_to_nxv8bf16: ; CHECK_BE: // %bb.0: -; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK_BE-NEXT: addvl sp, sp, #-1 ; CHECK_BE-NEXT: ptrue p0.d -; CHECK_BE-NEXT: ptrue p1.h -; CHECK_BE-NEXT: st1d { z0.d }, p0, [sp] -; CHECK_BE-NEXT: ld1h { z0.h }, p1/z, [sp] -; CHECK_BE-NEXT: addvl sp, sp, #1 -; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK_BE-NEXT: revb z0.d, p0/m, z0.d +; CHECK_BE-NEXT: ptrue p0.h +; CHECK_BE-NEXT: revb z0.h, p0/m, z0.h ; CHECK_BE-NEXT: ret %bc = bitcast %v to ret %bc @@ -1212,15 +930,9 @@ define @bitcast_nxv1i64_to_nxv8i8( %v) #0 { ; ; CHECK_BE-LABEL: bitcast_nxv1i64_to_nxv8i8: ; CHECK_BE: // %bb.0: -; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK_BE-NEXT: addvl sp, sp, #-1 ; CHECK_BE-NEXT: ptrue p0.d -; CHECK_BE-NEXT: ptrue p1.b -; CHECK_BE-NEXT: st1d { z0.d }, p0, [sp] -; CHECK_BE-NEXT: ld1b { z0.b }, p1/z, [sp] +; CHECK_BE-NEXT: revb z0.d, p0/m, z0.d ; CHECK_BE-NEXT: uunpklo z0.h, z0.b -; CHECK_BE-NEXT: addvl sp, sp, #1 -; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK_BE-NEXT: ret %bc = bitcast %v to ret %bc @@ -1290,15 +1002,9 @@ define @bitcast_nxv1f64_to_nxv8i8( %v) #0 ; ; CHECK_BE-LABEL: bitcast_nxv1f64_to_nxv8i8: ; CHECK_BE: // %bb.0: -; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK_BE-NEXT: addvl sp, sp, #-1 ; CHECK_BE-NEXT: ptrue p0.d -; CHECK_BE-NEXT: ptrue p1.b -; CHECK_BE-NEXT: st1d { z0.d }, p0, [sp] -; CHECK_BE-NEXT: ld1b { z0.b }, p1/z, [sp] +; CHECK_BE-NEXT: revb z0.d, p0/m, z0.d ; CHECK_BE-NEXT: uunpklo z0.h, z0.b -; CHECK_BE-NEXT: addvl sp, sp, #1 -; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK_BE-NEXT: ret %bc = bitcast %v to ret %bc @@ -1400,15 +1106,11 @@ define @bitcast_nxv1i64_to_nxv4i16( %v) #0 ; ; CHECK_BE-LABEL: bitcast_nxv1i64_to_nxv4i16: ; CHECK_BE: // %bb.0: -; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK_BE-NEXT: addvl sp, sp, #-1 ; CHECK_BE-NEXT: ptrue p0.d -; CHECK_BE-NEXT: ptrue p1.h -; CHECK_BE-NEXT: st1d { z0.d }, p0, [sp] -; CHECK_BE-NEXT: ld1h { z0.h }, p1/z, [sp] +; CHECK_BE-NEXT: revb z0.d, p0/m, z0.d +; CHECK_BE-NEXT: ptrue p0.h +; CHECK_BE-NEXT: revb z0.h, p0/m, z0.h ; CHECK_BE-NEXT: uunpklo z0.s, z0.h -; CHECK_BE-NEXT: addvl sp, sp, #1 -; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK_BE-NEXT: ret %bc = bitcast %v to ret %bc @@ -1420,15 +1122,11 @@ define @bitcast_nxv4f16_to_nxv4i16( %v) #0 ; CHECK-NEXT: ret ; ; CHECK_BE-LABEL: bitcast_nxv4f16_to_nxv4i16: -; CHECK_BE: // %bb.0: -; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK_BE-NEXT: addvl sp, sp, #-1 -; CHECK_BE-NEXT: ptrue p0.h -; CHECK_BE-NEXT: ptrue p1.s -; CHECK_BE-NEXT: st1h { z0.h }, p0, [sp] -; CHECK_BE-NEXT: ld1w { z0.s }, p1/z, [sp] -; CHECK_BE-NEXT: addvl sp, sp, #1 -; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK_BE: // %bb.0: +; CHECK_BE-NEXT: ptrue p0.h +; CHECK_BE-NEXT: revb z0.h, p0/m, z0.h +; CHECK_BE-NEXT: ptrue p0.s +; CHECK_BE-NEXT: revb z0.s, p0/m, z0.s ; CHECK_BE-NEXT: ret %bc = bitcast %v to ret %bc @@ -1470,15 +1168,11 @@ define @bitcast_nxv1f64_to_nxv4i16( %v) ; ; CHECK_BE-LABEL: bitcast_nxv1f64_to_nxv4i16: ; CHECK_BE: // %bb.0: -; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK_BE-NEXT: addvl sp, sp, #-1 ; CHECK_BE-NEXT: ptrue p0.d -; CHECK_BE-NEXT: ptrue p1.h -; CHECK_BE-NEXT: st1d { z0.d }, p0, [sp] -; CHECK_BE-NEXT: ld1h { z0.h }, p1/z, [sp] +; CHECK_BE-NEXT: revb z0.d, p0/m, z0.d +; CHECK_BE-NEXT: ptrue p0.h +; CHECK_BE-NEXT: revb z0.h, p0/m, z0.h ; CHECK_BE-NEXT: uunpklo z0.s, z0.h -; CHECK_BE-NEXT: addvl sp, sp, #1 -; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK_BE-NEXT: ret %bc = bitcast %v to ret %bc @@ -1491,14 +1185,10 @@ define @bitcast_nxv4bf16_to_nxv4i16( %v) ; ; CHECK_BE-LABEL: bitcast_nxv4bf16_to_nxv4i16: ; CHECK_BE: // %bb.0: -; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK_BE-NEXT: addvl sp, sp, #-1 ; CHECK_BE-NEXT: ptrue p0.h -; CHECK_BE-NEXT: ptrue p1.s -; CHECK_BE-NEXT: st1h { z0.h }, p0, [sp] -; CHECK_BE-NEXT: ld1w { z0.s }, p1/z, [sp] -; CHECK_BE-NEXT: addvl sp, sp, #1 -; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK_BE-NEXT: revb z0.h, p0/m, z0.h +; CHECK_BE-NEXT: ptrue p0.s +; CHECK_BE-NEXT: revb z0.s, p0/m, z0.s ; CHECK_BE-NEXT: ret %bc = bitcast %v to ret %bc @@ -1572,15 +1262,11 @@ define @bitcast_nxv1i64_to_nxv2i32( %v) #0 ; ; CHECK_BE-LABEL: bitcast_nxv1i64_to_nxv2i32: ; CHECK_BE: // %bb.0: -; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK_BE-NEXT: addvl sp, sp, #-1 ; CHECK_BE-NEXT: ptrue p0.d -; CHECK_BE-NEXT: ptrue p1.s -; CHECK_BE-NEXT: st1d { z0.d }, p0, [sp] -; CHECK_BE-NEXT: ld1w { z0.s }, p1/z, [sp] +; CHECK_BE-NEXT: revb z0.d, p0/m, z0.d +; CHECK_BE-NEXT: ptrue p0.s +; CHECK_BE-NEXT: revb z0.s, p0/m, z0.s ; CHECK_BE-NEXT: uunpklo z0.d, z0.s -; CHECK_BE-NEXT: addvl sp, sp, #1 -; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK_BE-NEXT: ret %bc = bitcast %v to ret %bc @@ -1621,14 +1307,10 @@ define @bitcast_nxv2f32_to_nxv2i32( %v) # ; ; CHECK_BE-LABEL: bitcast_nxv2f32_to_nxv2i32: ; CHECK_BE: // %bb.0: -; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK_BE-NEXT: addvl sp, sp, #-1 ; CHECK_BE-NEXT: ptrue p0.s -; CHECK_BE-NEXT: ptrue p1.d -; CHECK_BE-NEXT: st1w { z0.s }, p0, [sp] -; CHECK_BE-NEXT: ld1d { z0.d }, p1/z, [sp] -; CHECK_BE-NEXT: addvl sp, sp, #1 -; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK_BE-NEXT: revb z0.s, p0/m, z0.s +; CHECK_BE-NEXT: ptrue p0.d +; CHECK_BE-NEXT: revb z0.d, p0/m, z0.d ; CHECK_BE-NEXT: ret %bc = bitcast %v to ret %bc @@ -1642,15 +1324,11 @@ define @bitcast_nxv1f64_to_nxv2i32( %v) ; ; CHECK_BE-LABEL: bitcast_nxv1f64_to_nxv2i32: ; CHECK_BE: // %bb.0: -; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK_BE-NEXT: addvl sp, sp, #-1 ; CHECK_BE-NEXT: ptrue p0.d -; CHECK_BE-NEXT: ptrue p1.s -; CHECK_BE-NEXT: st1d { z0.d }, p0, [sp] -; CHECK_BE-NEXT: ld1w { z0.s }, p1/z, [sp] +; CHECK_BE-NEXT: revb z0.d, p0/m, z0.d +; CHECK_BE-NEXT: ptrue p0.s +; CHECK_BE-NEXT: revb z0.s, p0/m, z0.s ; CHECK_BE-NEXT: uunpklo z0.d, z0.s -; CHECK_BE-NEXT: addvl sp, sp, #1 -; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK_BE-NEXT: ret %bc = bitcast %v to ret %bc @@ -1696,15 +1374,9 @@ define @bitcast_nxv8i8_to_nxv1i64( %v) #0 { ; ; CHECK_BE-LABEL: bitcast_nxv8i8_to_nxv1i64: ; CHECK_BE: // %bb.0: -; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK_BE-NEXT: addvl sp, sp, #-1 ; CHECK_BE-NEXT: uzp1 z0.b, z0.b, z0.b -; CHECK_BE-NEXT: ptrue p0.b -; CHECK_BE-NEXT: ptrue p1.d -; CHECK_BE-NEXT: st1b { z0.b }, p0, [sp] -; CHECK_BE-NEXT: ld1d { z0.d }, p1/z, [sp] -; CHECK_BE-NEXT: addvl sp, sp, #1 -; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK_BE-NEXT: ptrue p0.d +; CHECK_BE-NEXT: revb z0.d, p0/m, z0.d ; CHECK_BE-NEXT: ret %bc = bitcast %v to ret %bc @@ -1718,15 +1390,11 @@ define @bitcast_nxv4i16_to_nxv1i64( %v) #0 ; ; CHECK_BE-LABEL: bitcast_nxv4i16_to_nxv1i64: ; CHECK_BE: // %bb.0: -; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK_BE-NEXT: addvl sp, sp, #-1 ; CHECK_BE-NEXT: uzp1 z0.h, z0.h, z0.h ; CHECK_BE-NEXT: ptrue p0.h -; CHECK_BE-NEXT: ptrue p1.d -; CHECK_BE-NEXT: st1h { z0.h }, p0, [sp] -; CHECK_BE-NEXT: ld1d { z0.d }, p1/z, [sp] -; CHECK_BE-NEXT: addvl sp, sp, #1 -; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK_BE-NEXT: revb z0.h, p0/m, z0.h +; CHECK_BE-NEXT: ptrue p0.d +; CHECK_BE-NEXT: revb z0.d, p0/m, z0.d ; CHECK_BE-NEXT: ret %bc = bitcast %v to ret %bc @@ -1740,15 +1408,11 @@ define @bitcast_nxv2i32_to_nxv1i64( %v) #0 ; ; CHECK_BE-LABEL: bitcast_nxv2i32_to_nxv1i64: ; CHECK_BE: // %bb.0: -; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK_BE-NEXT: addvl sp, sp, #-1 ; CHECK_BE-NEXT: uzp1 z0.s, z0.s, z0.s ; CHECK_BE-NEXT: ptrue p0.s -; CHECK_BE-NEXT: ptrue p1.d -; CHECK_BE-NEXT: st1w { z0.s }, p0, [sp] -; CHECK_BE-NEXT: ld1d { z0.d }, p1/z, [sp] -; CHECK_BE-NEXT: addvl sp, sp, #1 -; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK_BE-NEXT: revb z0.s, p0/m, z0.s +; CHECK_BE-NEXT: ptrue p0.d +; CHECK_BE-NEXT: revb z0.d, p0/m, z0.d ; CHECK_BE-NEXT: ret %bc = bitcast %v to ret %bc @@ -1762,20 +1426,14 @@ define @bitcast_nxv4f16_to_nxv1i64( %v) #0 ; ; CHECK_BE-LABEL: bitcast_nxv4f16_to_nxv1i64: ; CHECK_BE: // %bb.0: -; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK_BE-NEXT: addvl sp, sp, #-3 ; CHECK_BE-NEXT: ptrue p0.h ; CHECK_BE-NEXT: ptrue p1.s -; CHECK_BE-NEXT: st1h { z0.h }, p0, [sp] -; CHECK_BE-NEXT: ld1w { z0.s }, p1/z, [sp] -; CHECK_BE-NEXT: ptrue p1.d +; CHECK_BE-NEXT: revb z0.h, p0/m, z0.h +; CHECK_BE-NEXT: revb z0.s, p1/m, z0.s ; CHECK_BE-NEXT: uzp1 z0.h, z0.h, z0.h -; CHECK_BE-NEXT: st1h { z0.h }, p0, [sp, #1, mul vl] -; CHECK_BE-NEXT: ld1h { z0.h }, p0/z, [sp, #1, mul vl] -; CHECK_BE-NEXT: st1h { z0.h }, p0, [sp, #2, mul vl] -; CHECK_BE-NEXT: ld1d { z0.d }, p1/z, [sp, #2, mul vl] -; CHECK_BE-NEXT: addvl sp, sp, #3 -; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK_BE-NEXT: revb z0.h, p0/m, z0.h +; CHECK_BE-NEXT: ptrue p0.d +; CHECK_BE-NEXT: revb z0.d, p0/m, z0.d ; CHECK_BE-NEXT: ret %bc = bitcast %v to ret %bc @@ -1789,19 +1447,13 @@ define @bitcast_nxv2f32_to_nxv1i64( %v) # ; ; CHECK_BE-LABEL: bitcast_nxv2f32_to_nxv1i64: ; CHECK_BE: // %bb.0: -; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK_BE-NEXT: addvl sp, sp, #-3 ; CHECK_BE-NEXT: ptrue p0.s ; CHECK_BE-NEXT: ptrue p1.d -; CHECK_BE-NEXT: st1w { z0.s }, p0, [sp] -; CHECK_BE-NEXT: ld1d { z0.d }, p1/z, [sp] +; CHECK_BE-NEXT: revb z0.s, p0/m, z0.s +; CHECK_BE-NEXT: revb z0.d, p1/m, z0.d ; CHECK_BE-NEXT: uzp1 z0.s, z0.s, z0.s -; CHECK_BE-NEXT: st1w { z0.s }, p0, [sp, #1, mul vl] -; CHECK_BE-NEXT: ld1w { z0.s }, p0/z, [sp, #1, mul vl] -; CHECK_BE-NEXT: st1w { z0.s }, p0, [sp, #2, mul vl] -; CHECK_BE-NEXT: ld1d { z0.d }, p1/z, [sp, #2, mul vl] -; CHECK_BE-NEXT: addvl sp, sp, #3 -; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK_BE-NEXT: revb z0.s, p0/m, z0.s +; CHECK_BE-NEXT: revb z0.d, p1/m, z0.d ; CHECK_BE-NEXT: ret %bc = bitcast %v to ret %bc @@ -1814,13 +1466,6 @@ define @bitcast_nxv1f64_to_nxv1i64( %v) ; ; CHECK_BE-LABEL: bitcast_nxv1f64_to_nxv1i64: ; CHECK_BE: // %bb.0: -; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK_BE-NEXT: addvl sp, sp, #-1 -; CHECK_BE-NEXT: ptrue p0.d -; CHECK_BE-NEXT: st1d { z0.d }, p0, [sp] -; CHECK_BE-NEXT: ld1d { z0.d }, p0/z, [sp] -; CHECK_BE-NEXT: addvl sp, sp, #1 -; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK_BE-NEXT: ret %bc = bitcast %v to ret %bc @@ -1834,20 +1479,14 @@ define @bitcast_nxv4bf16_to_nxv1i64( %v) ; ; CHECK_BE-LABEL: bitcast_nxv4bf16_to_nxv1i64: ; CHECK_BE: // %bb.0: -; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK_BE-NEXT: addvl sp, sp, #-3 ; CHECK_BE-NEXT: ptrue p0.h ; CHECK_BE-NEXT: ptrue p1.s -; CHECK_BE-NEXT: st1h { z0.h }, p0, [sp] -; CHECK_BE-NEXT: ld1w { z0.s }, p1/z, [sp] -; CHECK_BE-NEXT: ptrue p1.d +; CHECK_BE-NEXT: revb z0.h, p0/m, z0.h +; CHECK_BE-NEXT: revb z0.s, p1/m, z0.s ; CHECK_BE-NEXT: uzp1 z0.h, z0.h, z0.h -; CHECK_BE-NEXT: st1h { z0.h }, p0, [sp, #1, mul vl] -; CHECK_BE-NEXT: ld1h { z0.h }, p0/z, [sp, #1, mul vl] -; CHECK_BE-NEXT: st1h { z0.h }, p0, [sp, #2, mul vl] -; CHECK_BE-NEXT: ld1d { z0.d }, p1/z, [sp, #2, mul vl] -; CHECK_BE-NEXT: addvl sp, sp, #3 -; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK_BE-NEXT: revb z0.h, p0/m, z0.h +; CHECK_BE-NEXT: ptrue p0.d +; CHECK_BE-NEXT: revb z0.d, p0/m, z0.d ; CHECK_BE-NEXT: ret %bc = bitcast %v to ret %bc @@ -1892,14 +1531,10 @@ define @bitcast_nxv4i16_to_nxv4f16( %v) #0 ; ; CHECK_BE-LABEL: bitcast_nxv4i16_to_nxv4f16: ; CHECK_BE: // %bb.0: -; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK_BE-NEXT: addvl sp, sp, #-1 ; CHECK_BE-NEXT: ptrue p0.s -; CHECK_BE-NEXT: ptrue p1.h -; CHECK_BE-NEXT: st1w { z0.s }, p0, [sp] -; CHECK_BE-NEXT: ld1h { z0.h }, p1/z, [sp] -; CHECK_BE-NEXT: addvl sp, sp, #1 -; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK_BE-NEXT: revb z0.s, p0/m, z0.s +; CHECK_BE-NEXT: ptrue p0.h +; CHECK_BE-NEXT: revb z0.h, p0/m, z0.h ; CHECK_BE-NEXT: ret %bc = bitcast %v to ret %bc @@ -1941,15 +1576,11 @@ define @bitcast_nxv1i64_to_nxv4f16( %v) #0 ; ; CHECK_BE-LABEL: bitcast_nxv1i64_to_nxv4f16: ; CHECK_BE: // %bb.0: -; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK_BE-NEXT: addvl sp, sp, #-1 ; CHECK_BE-NEXT: ptrue p0.d -; CHECK_BE-NEXT: ptrue p1.h -; CHECK_BE-NEXT: st1d { z0.d }, p0, [sp] -; CHECK_BE-NEXT: ld1h { z0.h }, p1/z, [sp] +; CHECK_BE-NEXT: revb z0.d, p0/m, z0.d +; CHECK_BE-NEXT: ptrue p0.h +; CHECK_BE-NEXT: revb z0.h, p0/m, z0.h ; CHECK_BE-NEXT: uunpklo z0.s, z0.h -; CHECK_BE-NEXT: addvl sp, sp, #1 -; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK_BE-NEXT: ret %bc = bitcast %v to ret %bc @@ -1991,15 +1622,11 @@ define @bitcast_nxv1f64_to_nxv4f16( %v) ; ; CHECK_BE-LABEL: bitcast_nxv1f64_to_nxv4f16: ; CHECK_BE: // %bb.0: -; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK_BE-NEXT: addvl sp, sp, #-1 ; CHECK_BE-NEXT: ptrue p0.d -; CHECK_BE-NEXT: ptrue p1.h -; CHECK_BE-NEXT: st1d { z0.d }, p0, [sp] -; CHECK_BE-NEXT: ld1h { z0.h }, p1/z, [sp] +; CHECK_BE-NEXT: revb z0.d, p0/m, z0.d +; CHECK_BE-NEXT: ptrue p0.h +; CHECK_BE-NEXT: revb z0.h, p0/m, z0.h ; CHECK_BE-NEXT: uunpklo z0.s, z0.h -; CHECK_BE-NEXT: addvl sp, sp, #1 -; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK_BE-NEXT: ret %bc = bitcast %v to ret %bc @@ -2012,13 +1639,6 @@ define @bitcast_nxv4bf16_to_nxv4f16( %v ; ; CHECK_BE-LABEL: bitcast_nxv4bf16_to_nxv4f16: ; CHECK_BE: // %bb.0: -; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK_BE-NEXT: addvl sp, sp, #-1 -; CHECK_BE-NEXT: ptrue p0.s -; CHECK_BE-NEXT: st1h { z0.s }, p0, [sp, #1, mul vl] -; CHECK_BE-NEXT: ld1h { z0.s }, p0/z, [sp, #1, mul vl] -; CHECK_BE-NEXT: addvl sp, sp, #1 -; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK_BE-NEXT: ret %bc = bitcast %v to ret %bc @@ -2091,14 +1711,10 @@ define @bitcast_nxv2i32_to_nxv2f32( %v) # ; ; CHECK_BE-LABEL: bitcast_nxv2i32_to_nxv2f32: ; CHECK_BE: // %bb.0: -; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK_BE-NEXT: addvl sp, sp, #-1 ; CHECK_BE-NEXT: ptrue p0.d -; CHECK_BE-NEXT: ptrue p1.s -; CHECK_BE-NEXT: st1d { z0.d }, p0, [sp] -; CHECK_BE-NEXT: ld1w { z0.s }, p1/z, [sp] -; CHECK_BE-NEXT: addvl sp, sp, #1 -; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK_BE-NEXT: revb z0.d, p0/m, z0.d +; CHECK_BE-NEXT: ptrue p0.s +; CHECK_BE-NEXT: revb z0.s, p0/m, z0.s ; CHECK_BE-NEXT: ret %bc = bitcast %v to ret %bc @@ -2112,15 +1728,11 @@ define @bitcast_nxv1i64_to_nxv2f32( %v) # ; ; CHECK_BE-LABEL: bitcast_nxv1i64_to_nxv2f32: ; CHECK_BE: // %bb.0: -; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK_BE-NEXT: addvl sp, sp, #-1 ; CHECK_BE-NEXT: ptrue p0.d -; CHECK_BE-NEXT: ptrue p1.s -; CHECK_BE-NEXT: st1d { z0.d }, p0, [sp] -; CHECK_BE-NEXT: ld1w { z0.s }, p1/z, [sp] +; CHECK_BE-NEXT: revb z0.d, p0/m, z0.d +; CHECK_BE-NEXT: ptrue p0.s +; CHECK_BE-NEXT: revb z0.s, p0/m, z0.s ; CHECK_BE-NEXT: uunpklo z0.d, z0.s -; CHECK_BE-NEXT: addvl sp, sp, #1 -; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK_BE-NEXT: ret %bc = bitcast %v to ret %bc @@ -2162,15 +1774,11 @@ define @bitcast_nxv1f64_to_nxv2f32( %v ; ; CHECK_BE-LABEL: bitcast_nxv1f64_to_nxv2f32: ; CHECK_BE: // %bb.0: -; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK_BE-NEXT: addvl sp, sp, #-1 ; CHECK_BE-NEXT: ptrue p0.d -; CHECK_BE-NEXT: ptrue p1.s -; CHECK_BE-NEXT: st1d { z0.d }, p0, [sp] -; CHECK_BE-NEXT: ld1w { z0.s }, p1/z, [sp] +; CHECK_BE-NEXT: revb z0.d, p0/m, z0.d +; CHECK_BE-NEXT: ptrue p0.s +; CHECK_BE-NEXT: revb z0.s, p0/m, z0.s ; CHECK_BE-NEXT: uunpklo z0.d, z0.s -; CHECK_BE-NEXT: addvl sp, sp, #1 -; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK_BE-NEXT: ret %bc = bitcast %v to ret %bc @@ -2216,15 +1824,9 @@ define @bitcast_nxv8i8_to_nxv1f64( %v) #0 ; ; CHECK_BE-LABEL: bitcast_nxv8i8_to_nxv1f64: ; CHECK_BE: // %bb.0: -; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK_BE-NEXT: addvl sp, sp, #-1 ; CHECK_BE-NEXT: uzp1 z0.b, z0.b, z0.b -; CHECK_BE-NEXT: ptrue p0.b -; CHECK_BE-NEXT: ptrue p1.d -; CHECK_BE-NEXT: st1b { z0.b }, p0, [sp] -; CHECK_BE-NEXT: ld1d { z0.d }, p1/z, [sp] -; CHECK_BE-NEXT: addvl sp, sp, #1 -; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK_BE-NEXT: ptrue p0.d +; CHECK_BE-NEXT: revb z0.d, p0/m, z0.d ; CHECK_BE-NEXT: ret %bc = bitcast %v to ret %bc @@ -2238,15 +1840,11 @@ define @bitcast_nxv4i16_to_nxv1f64( %v) ; ; CHECK_BE-LABEL: bitcast_nxv4i16_to_nxv1f64: ; CHECK_BE: // %bb.0: -; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK_BE-NEXT: addvl sp, sp, #-1 ; CHECK_BE-NEXT: uzp1 z0.h, z0.h, z0.h ; CHECK_BE-NEXT: ptrue p0.h -; CHECK_BE-NEXT: ptrue p1.d -; CHECK_BE-NEXT: st1h { z0.h }, p0, [sp] -; CHECK_BE-NEXT: ld1d { z0.d }, p1/z, [sp] -; CHECK_BE-NEXT: addvl sp, sp, #1 -; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK_BE-NEXT: revb z0.h, p0/m, z0.h +; CHECK_BE-NEXT: ptrue p0.d +; CHECK_BE-NEXT: revb z0.d, p0/m, z0.d ; CHECK_BE-NEXT: ret %bc = bitcast %v to ret %bc @@ -2260,15 +1858,11 @@ define @bitcast_nxv2i32_to_nxv1f64( %v) ; ; CHECK_BE-LABEL: bitcast_nxv2i32_to_nxv1f64: ; CHECK_BE: // %bb.0: -; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK_BE-NEXT: addvl sp, sp, #-1 ; CHECK_BE-NEXT: uzp1 z0.s, z0.s, z0.s ; CHECK_BE-NEXT: ptrue p0.s -; CHECK_BE-NEXT: ptrue p1.d -; CHECK_BE-NEXT: st1w { z0.s }, p0, [sp] -; CHECK_BE-NEXT: ld1d { z0.d }, p1/z, [sp] -; CHECK_BE-NEXT: addvl sp, sp, #1 -; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK_BE-NEXT: revb z0.s, p0/m, z0.s +; CHECK_BE-NEXT: ptrue p0.d +; CHECK_BE-NEXT: revb z0.d, p0/m, z0.d ; CHECK_BE-NEXT: ret %bc = bitcast %v to ret %bc @@ -2281,13 +1875,6 @@ define @bitcast_nxv1i64_to_nxv1f64( %v) ; ; CHECK_BE-LABEL: bitcast_nxv1i64_to_nxv1f64: ; CHECK_BE: // %bb.0: -; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK_BE-NEXT: addvl sp, sp, #-1 -; CHECK_BE-NEXT: ptrue p0.d -; CHECK_BE-NEXT: st1d { z0.d }, p0, [sp] -; CHECK_BE-NEXT: ld1d { z0.d }, p0/z, [sp] -; CHECK_BE-NEXT: addvl sp, sp, #1 -; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK_BE-NEXT: ret %bc = bitcast %v to ret %bc @@ -2301,20 +1888,14 @@ define @bitcast_nxv4f16_to_nxv1f64( %v) ; ; CHECK_BE-LABEL: bitcast_nxv4f16_to_nxv1f64: ; CHECK_BE: // %bb.0: -; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK_BE-NEXT: addvl sp, sp, #-3 ; CHECK_BE-NEXT: ptrue p0.h ; CHECK_BE-NEXT: ptrue p1.s -; CHECK_BE-NEXT: st1h { z0.h }, p0, [sp] -; CHECK_BE-NEXT: ld1w { z0.s }, p1/z, [sp] -; CHECK_BE-NEXT: ptrue p1.d +; CHECK_BE-NEXT: revb z0.h, p0/m, z0.h +; CHECK_BE-NEXT: revb z0.s, p1/m, z0.s ; CHECK_BE-NEXT: uzp1 z0.h, z0.h, z0.h -; CHECK_BE-NEXT: st1h { z0.h }, p0, [sp, #1, mul vl] -; CHECK_BE-NEXT: ld1h { z0.h }, p0/z, [sp, #1, mul vl] -; CHECK_BE-NEXT: st1h { z0.h }, p0, [sp, #2, mul vl] -; CHECK_BE-NEXT: ld1d { z0.d }, p1/z, [sp, #2, mul vl] -; CHECK_BE-NEXT: addvl sp, sp, #3 -; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK_BE-NEXT: revb z0.h, p0/m, z0.h +; CHECK_BE-NEXT: ptrue p0.d +; CHECK_BE-NEXT: revb z0.d, p0/m, z0.d ; CHECK_BE-NEXT: ret %bc = bitcast %v to ret %bc @@ -2328,19 +1909,13 @@ define @bitcast_nxv2f32_to_nxv1f64( %v ; ; CHECK_BE-LABEL: bitcast_nxv2f32_to_nxv1f64: ; CHECK_BE: // %bb.0: -; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK_BE-NEXT: addvl sp, sp, #-3 ; CHECK_BE-NEXT: ptrue p0.s ; CHECK_BE-NEXT: ptrue p1.d -; CHECK_BE-NEXT: st1w { z0.s }, p0, [sp] -; CHECK_BE-NEXT: ld1d { z0.d }, p1/z, [sp] +; CHECK_BE-NEXT: revb z0.s, p0/m, z0.s +; CHECK_BE-NEXT: revb z0.d, p1/m, z0.d ; CHECK_BE-NEXT: uzp1 z0.s, z0.s, z0.s -; CHECK_BE-NEXT: st1w { z0.s }, p0, [sp, #1, mul vl] -; CHECK_BE-NEXT: ld1w { z0.s }, p0/z, [sp, #1, mul vl] -; CHECK_BE-NEXT: st1w { z0.s }, p0, [sp, #2, mul vl] -; CHECK_BE-NEXT: ld1d { z0.d }, p1/z, [sp, #2, mul vl] -; CHECK_BE-NEXT: addvl sp, sp, #3 -; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK_BE-NEXT: revb z0.s, p0/m, z0.s +; CHECK_BE-NEXT: revb z0.d, p1/m, z0.d ; CHECK_BE-NEXT: ret %bc = bitcast %v to ret %bc @@ -2354,20 +1929,14 @@ define @bitcast_nxv4bf16_to_nxv1f64( ; ; CHECK_BE-LABEL: bitcast_nxv4bf16_to_nxv1f64: ; CHECK_BE: // %bb.0: -; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK_BE-NEXT: addvl sp, sp, #-3 ; CHECK_BE-NEXT: ptrue p0.h ; CHECK_BE-NEXT: ptrue p1.s -; CHECK_BE-NEXT: st1h { z0.h }, p0, [sp] -; CHECK_BE-NEXT: ld1w { z0.s }, p1/z, [sp] -; CHECK_BE-NEXT: ptrue p1.d +; CHECK_BE-NEXT: revb z0.h, p0/m, z0.h +; CHECK_BE-NEXT: revb z0.s, p1/m, z0.s ; CHECK_BE-NEXT: uzp1 z0.h, z0.h, z0.h -; CHECK_BE-NEXT: st1h { z0.h }, p0, [sp, #1, mul vl] -; CHECK_BE-NEXT: ld1h { z0.h }, p0/z, [sp, #1, mul vl] -; CHECK_BE-NEXT: st1h { z0.h }, p0, [sp, #2, mul vl] -; CHECK_BE-NEXT: ld1d { z0.d }, p1/z, [sp, #2, mul vl] -; CHECK_BE-NEXT: addvl sp, sp, #3 -; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK_BE-NEXT: revb z0.h, p0/m, z0.h +; CHECK_BE-NEXT: ptrue p0.d +; CHECK_BE-NEXT: revb z0.d, p0/m, z0.d ; CHECK_BE-NEXT: ret %bc = bitcast %v to ret %bc @@ -2412,14 +1981,10 @@ define @bitcast_nxv4i16_to_nxv4bf16( %v) ; ; CHECK_BE-LABEL: bitcast_nxv4i16_to_nxv4bf16: ; CHECK_BE: // %bb.0: -; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK_BE-NEXT: addvl sp, sp, #-1 ; CHECK_BE-NEXT: ptrue p0.s -; CHECK_BE-NEXT: ptrue p1.h -; CHECK_BE-NEXT: st1w { z0.s }, p0, [sp] -; CHECK_BE-NEXT: ld1h { z0.h }, p1/z, [sp] -; CHECK_BE-NEXT: addvl sp, sp, #1 -; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK_BE-NEXT: revb z0.s, p0/m, z0.s +; CHECK_BE-NEXT: ptrue p0.h +; CHECK_BE-NEXT: revb z0.h, p0/m, z0.h ; CHECK_BE-NEXT: ret %bc = bitcast %v to ret %bc @@ -2461,15 +2026,11 @@ define @bitcast_nxv1i64_to_nxv4bf16( %v) ; ; CHECK_BE-LABEL: bitcast_nxv1i64_to_nxv4bf16: ; CHECK_BE: // %bb.0: -; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK_BE-NEXT: addvl sp, sp, #-1 ; CHECK_BE-NEXT: ptrue p0.d -; CHECK_BE-NEXT: ptrue p1.h -; CHECK_BE-NEXT: st1d { z0.d }, p0, [sp] -; CHECK_BE-NEXT: ld1h { z0.h }, p1/z, [sp] +; CHECK_BE-NEXT: revb z0.d, p0/m, z0.d +; CHECK_BE-NEXT: ptrue p0.h +; CHECK_BE-NEXT: revb z0.h, p0/m, z0.h ; CHECK_BE-NEXT: uunpklo z0.s, z0.h -; CHECK_BE-NEXT: addvl sp, sp, #1 -; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK_BE-NEXT: ret %bc = bitcast %v to ret %bc @@ -2482,13 +2043,6 @@ define @bitcast_nxv4f16_to_nxv4bf16( %v ; ; CHECK_BE-LABEL: bitcast_nxv4f16_to_nxv4bf16: ; CHECK_BE: // %bb.0: -; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK_BE-NEXT: addvl sp, sp, #-1 -; CHECK_BE-NEXT: ptrue p0.s -; CHECK_BE-NEXT: st1h { z0.s }, p0, [sp, #1, mul vl] -; CHECK_BE-NEXT: ld1h { z0.s }, p0/z, [sp, #1, mul vl] -; CHECK_BE-NEXT: addvl sp, sp, #1 -; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK_BE-NEXT: ret %bc = bitcast %v to ret %bc @@ -2530,15 +2084,11 @@ define @bitcast_nxv1f64_to_nxv4bf16( ; ; CHECK_BE-LABEL: bitcast_nxv1f64_to_nxv4bf16: ; CHECK_BE: // %bb.0: -; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK_BE-NEXT: addvl sp, sp, #-1 ; CHECK_BE-NEXT: ptrue p0.d -; CHECK_BE-NEXT: ptrue p1.h -; CHECK_BE-NEXT: st1d { z0.d }, p0, [sp] -; CHECK_BE-NEXT: ld1h { z0.h }, p1/z, [sp] +; CHECK_BE-NEXT: revb z0.d, p0/m, z0.d +; CHECK_BE-NEXT: ptrue p0.h +; CHECK_BE-NEXT: revb z0.h, p0/m, z0.h ; CHECK_BE-NEXT: uunpklo z0.s, z0.h -; CHECK_BE-NEXT: addvl sp, sp, #1 -; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK_BE-NEXT: ret %bc = bitcast %v to ret %bc @@ -2585,16 +2135,10 @@ define @bitcast_nxv1i32_to_nxv4i8( %v) #0 { ; ; CHECK_BE-LABEL: bitcast_nxv1i32_to_nxv4i8: ; CHECK_BE: // %bb.0: -; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK_BE-NEXT: addvl sp, sp, #-1 ; CHECK_BE-NEXT: ptrue p0.s -; CHECK_BE-NEXT: ptrue p1.b -; CHECK_BE-NEXT: st1w { z0.s }, p0, [sp] -; CHECK_BE-NEXT: ld1b { z0.b }, p1/z, [sp] +; CHECK_BE-NEXT: revb z0.s, p0/m, z0.s ; CHECK_BE-NEXT: uunpklo z0.h, z0.b ; CHECK_BE-NEXT: uunpklo z0.s, z0.h -; CHECK_BE-NEXT: addvl sp, sp, #1 -; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK_BE-NEXT: ret %bc = bitcast %v to ret %bc @@ -2699,16 +2243,12 @@ define @bitcast_nxv1i32_to_nxv2i16( %v) #0 ; ; CHECK_BE-LABEL: bitcast_nxv1i32_to_nxv2i16: ; CHECK_BE: // %bb.0: -; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK_BE-NEXT: addvl sp, sp, #-1 ; CHECK_BE-NEXT: ptrue p0.s -; CHECK_BE-NEXT: ptrue p1.h -; CHECK_BE-NEXT: st1w { z0.s }, p0, [sp] -; CHECK_BE-NEXT: ld1h { z0.h }, p1/z, [sp] +; CHECK_BE-NEXT: revb z0.s, p0/m, z0.s +; CHECK_BE-NEXT: ptrue p0.h +; CHECK_BE-NEXT: revb z0.h, p0/m, z0.h ; CHECK_BE-NEXT: uunpklo z0.s, z0.h ; CHECK_BE-NEXT: uunpklo z0.d, z0.s -; CHECK_BE-NEXT: addvl sp, sp, #1 -; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK_BE-NEXT: ret %bc = bitcast %v to ret %bc @@ -2721,14 +2261,10 @@ define @bitcast_nxv2f16_to_nxv2i16( %v) #0 ; ; CHECK_BE-LABEL: bitcast_nxv2f16_to_nxv2i16: ; CHECK_BE: // %bb.0: -; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK_BE-NEXT: addvl sp, sp, #-1 ; CHECK_BE-NEXT: ptrue p0.h -; CHECK_BE-NEXT: ptrue p1.d -; CHECK_BE-NEXT: st1h { z0.h }, p0, [sp] -; CHECK_BE-NEXT: ld1d { z0.d }, p1/z, [sp] -; CHECK_BE-NEXT: addvl sp, sp, #1 -; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK_BE-NEXT: revb z0.h, p0/m, z0.h +; CHECK_BE-NEXT: ptrue p0.d +; CHECK_BE-NEXT: revb z0.d, p0/m, z0.d ; CHECK_BE-NEXT: ret %bc = bitcast %v to ret %bc @@ -2743,14 +2279,10 @@ define @bitcast_nxv2bf16_to_nxv2i16( %v) ; ; CHECK_BE-LABEL: bitcast_nxv2bf16_to_nxv2i16: ; CHECK_BE: // %bb.0: -; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK_BE-NEXT: addvl sp, sp, #-1 ; CHECK_BE-NEXT: ptrue p0.h -; CHECK_BE-NEXT: ptrue p1.d -; CHECK_BE-NEXT: st1h { z0.h }, p0, [sp] -; CHECK_BE-NEXT: ld1d { z0.d }, p1/z, [sp] -; CHECK_BE-NEXT: addvl sp, sp, #1 -; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK_BE-NEXT: revb z0.h, p0/m, z0.h +; CHECK_BE-NEXT: ptrue p0.d +; CHECK_BE-NEXT: revb z0.d, p0/m, z0.d ; CHECK_BE-NEXT: ret %bc = bitcast %v to ret %bc @@ -2769,16 +2301,10 @@ define @bitcast_nxv4i8_to_nxv1i32( %v) #0 { ; ; CHECK_BE-LABEL: bitcast_nxv4i8_to_nxv1i32: ; CHECK_BE: // %bb.0: -; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK_BE-NEXT: addvl sp, sp, #-1 ; CHECK_BE-NEXT: uzp1 z0.h, z0.h, z0.h -; CHECK_BE-NEXT: ptrue p0.b -; CHECK_BE-NEXT: ptrue p1.s +; CHECK_BE-NEXT: ptrue p0.s ; CHECK_BE-NEXT: uzp1 z0.b, z0.b, z0.b -; CHECK_BE-NEXT: st1b { z0.b }, p0, [sp] -; CHECK_BE-NEXT: ld1w { z0.s }, p1/z, [sp] -; CHECK_BE-NEXT: addvl sp, sp, #1 -; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK_BE-NEXT: revb z0.s, p0/m, z0.s ; CHECK_BE-NEXT: ret %bc = bitcast %v to ret %bc @@ -2793,16 +2319,12 @@ define @bitcast_nxv2i16_to_nxv1i32( %v) #0 ; ; CHECK_BE-LABEL: bitcast_nxv2i16_to_nxv1i32: ; CHECK_BE: // %bb.0: -; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK_BE-NEXT: addvl sp, sp, #-1 ; CHECK_BE-NEXT: uzp1 z0.s, z0.s, z0.s ; CHECK_BE-NEXT: ptrue p0.h -; CHECK_BE-NEXT: ptrue p1.s ; CHECK_BE-NEXT: uzp1 z0.h, z0.h, z0.h -; CHECK_BE-NEXT: st1h { z0.h }, p0, [sp] -; CHECK_BE-NEXT: ld1w { z0.s }, p1/z, [sp] -; CHECK_BE-NEXT: addvl sp, sp, #1 -; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK_BE-NEXT: revb z0.h, p0/m, z0.h +; CHECK_BE-NEXT: ptrue p0.s +; CHECK_BE-NEXT: revb z0.s, p0/m, z0.s ; CHECK_BE-NEXT: ret %bc = bitcast %v to ret %bc @@ -2824,15 +2346,15 @@ define @bitcast_nxv2f16_to_nxv1i32( %v) #0 ; CHECK_BE-LABEL: bitcast_nxv2f16_to_nxv1i32: ; CHECK_BE: // %bb.0: ; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK_BE-NEXT: addvl sp, sp, #-2 +; CHECK_BE-NEXT: addvl sp, sp, #-1 ; CHECK_BE-NEXT: ptrue p0.d ; CHECK_BE-NEXT: ptrue p1.h ; CHECK_BE-NEXT: st1h { z0.d }, p0, [sp] ; CHECK_BE-NEXT: ptrue p0.s ; CHECK_BE-NEXT: ld1h { z0.h }, p1/z, [sp] -; CHECK_BE-NEXT: st1h { z0.h }, p1, [sp, #1, mul vl] -; CHECK_BE-NEXT: ld1w { z0.s }, p0/z, [sp, #1, mul vl] -; CHECK_BE-NEXT: addvl sp, sp, #2 +; CHECK_BE-NEXT: revb z0.h, p1/m, z0.h +; CHECK_BE-NEXT: revb z0.s, p0/m, z0.s +; CHECK_BE-NEXT: addvl sp, sp, #1 ; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK_BE-NEXT: ret %bc = bitcast %v to @@ -2857,15 +2379,15 @@ define @bitcast_nxv2bf16_to_nxv1i32( %v) ; CHECK_BE-LABEL: bitcast_nxv2bf16_to_nxv1i32: ; CHECK_BE: // %bb.0: ; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK_BE-NEXT: addvl sp, sp, #-2 +; CHECK_BE-NEXT: addvl sp, sp, #-1 ; CHECK_BE-NEXT: ptrue p0.d ; CHECK_BE-NEXT: ptrue p1.h ; CHECK_BE-NEXT: st1h { z0.d }, p0, [sp] ; CHECK_BE-NEXT: ptrue p0.s ; CHECK_BE-NEXT: ld1h { z0.h }, p1/z, [sp] -; CHECK_BE-NEXT: st1h { z0.h }, p1, [sp, #1, mul vl] -; CHECK_BE-NEXT: ld1w { z0.s }, p0/z, [sp, #1, mul vl] -; CHECK_BE-NEXT: addvl sp, sp, #2 +; CHECK_BE-NEXT: revb z0.h, p1/m, z0.h +; CHECK_BE-NEXT: revb z0.s, p0/m, z0.s +; CHECK_BE-NEXT: addvl sp, sp, #1 ; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK_BE-NEXT: ret %bc = bitcast %v to @@ -2911,14 +2433,10 @@ define @bitcast_nxv2i16_to_nxv2f16( %v) #0 ; ; CHECK_BE-LABEL: bitcast_nxv2i16_to_nxv2f16: ; CHECK_BE: // %bb.0: -; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK_BE-NEXT: addvl sp, sp, #-1 ; CHECK_BE-NEXT: ptrue p0.d -; CHECK_BE-NEXT: ptrue p1.h -; CHECK_BE-NEXT: st1d { z0.d }, p0, [sp] -; CHECK_BE-NEXT: ld1h { z0.h }, p1/z, [sp] -; CHECK_BE-NEXT: addvl sp, sp, #1 -; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK_BE-NEXT: revb z0.d, p0/m, z0.d +; CHECK_BE-NEXT: ptrue p0.h +; CHECK_BE-NEXT: revb z0.h, p0/m, z0.h ; CHECK_BE-NEXT: ret %bc = bitcast %v to ret %bc @@ -2934,13 +2452,6 @@ define @bitcast_nxv2bf16_to_nxv2f16( %v ; ; CHECK_BE-LABEL: bitcast_nxv2bf16_to_nxv2f16: ; CHECK_BE: // %bb.0: -; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK_BE-NEXT: addvl sp, sp, #-1 -; CHECK_BE-NEXT: ptrue p0.d -; CHECK_BE-NEXT: st1h { z0.d }, p0, [sp, #3, mul vl] -; CHECK_BE-NEXT: ld1h { z0.d }, p0/z, [sp, #3, mul vl] -; CHECK_BE-NEXT: addvl sp, sp, #1 -; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK_BE-NEXT: ret %bc = bitcast %v to ret %bc @@ -2995,14 +2506,10 @@ define @bitcast_nxv2i16_to_nxv2bf16( %v) ; ; CHECK_BE-LABEL: bitcast_nxv2i16_to_nxv2bf16: ; CHECK_BE: // %bb.0: -; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK_BE-NEXT: addvl sp, sp, #-1 ; CHECK_BE-NEXT: ptrue p0.d -; CHECK_BE-NEXT: ptrue p1.h -; CHECK_BE-NEXT: st1d { z0.d }, p0, [sp] -; CHECK_BE-NEXT: ld1h { z0.h }, p1/z, [sp] -; CHECK_BE-NEXT: addvl sp, sp, #1 -; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK_BE-NEXT: revb z0.d, p0/m, z0.d +; CHECK_BE-NEXT: ptrue p0.h +; CHECK_BE-NEXT: revb z0.h, p0/m, z0.h ; CHECK_BE-NEXT: ret %bc = bitcast %v to ret %bc @@ -3017,13 +2524,6 @@ define @bitcast_nxv2f16_to_nxv2bf16( %v ; ; CHECK_BE-LABEL: bitcast_nxv2f16_to_nxv2bf16: ; CHECK_BE: // %bb.0: -; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK_BE-NEXT: addvl sp, sp, #-1 -; CHECK_BE-NEXT: ptrue p0.d -; CHECK_BE-NEXT: st1h { z0.d }, p0, [sp, #3, mul vl] -; CHECK_BE-NEXT: ld1h { z0.d }, p0/z, [sp, #3, mul vl] -; CHECK_BE-NEXT: addvl sp, sp, #1 -; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK_BE-NEXT: ret %bc = bitcast %v to ret %bc @@ -3045,17 +2545,11 @@ define @bitcast_nxv1i16_to_nxv2i8( %v) #0 { ; ; CHECK_BE-LABEL: bitcast_nxv1i16_to_nxv2i8: ; CHECK_BE: // %bb.0: -; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK_BE-NEXT: addvl sp, sp, #-1 ; CHECK_BE-NEXT: ptrue p0.h -; CHECK_BE-NEXT: ptrue p1.b -; CHECK_BE-NEXT: st1h { z0.h }, p0, [sp] -; CHECK_BE-NEXT: ld1b { z0.b }, p1/z, [sp] +; CHECK_BE-NEXT: revb z0.h, p0/m, z0.h ; CHECK_BE-NEXT: uunpklo z0.h, z0.b ; CHECK_BE-NEXT: uunpklo z0.s, z0.h ; CHECK_BE-NEXT: uunpklo z0.d, z0.s -; CHECK_BE-NEXT: addvl sp, sp, #1 -; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK_BE-NEXT: ret %bc = bitcast %v to ret %bc @@ -3078,17 +2572,11 @@ define @bitcast_nxv2i8_to_nxv1i16( %v) #0 { ; ; CHECK_BE-LABEL: bitcast_nxv2i8_to_nxv1i16: ; CHECK_BE: // %bb.0: -; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK_BE-NEXT: addvl sp, sp, #-1 ; CHECK_BE-NEXT: uzp1 z0.s, z0.s, z0.s -; CHECK_BE-NEXT: ptrue p0.b -; CHECK_BE-NEXT: ptrue p1.h +; CHECK_BE-NEXT: ptrue p0.h ; CHECK_BE-NEXT: uzp1 z0.h, z0.h, z0.h ; CHECK_BE-NEXT: uzp1 z0.b, z0.b, z0.b -; CHECK_BE-NEXT: st1b { z0.b }, p0, [sp] -; CHECK_BE-NEXT: ld1h { z0.h }, p1/z, [sp] -; CHECK_BE-NEXT: addvl sp, sp, #1 -; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK_BE-NEXT: revb z0.h, p0/m, z0.h ; CHECK_BE-NEXT: ret %bc = bitcast %v to ret %bc @@ -3126,15 +2614,11 @@ define @bitcast_short_float_to_i32( %v) ; ; CHECK_BE-LABEL: bitcast_short_float_to_i32: ; CHECK_BE: // %bb.0: -; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK_BE-NEXT: addvl sp, sp, #-1 ; CHECK_BE-NEXT: ptrue p0.d ; CHECK_BE-NEXT: ptrue p1.s ; CHECK_BE-NEXT: fcvt z0.s, p0/m, z0.d -; CHECK_BE-NEXT: st1w { z0.s }, p1, [sp] -; CHECK_BE-NEXT: ld1d { z0.d }, p0/z, [sp] -; CHECK_BE-NEXT: addvl sp, sp, #1 -; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK_BE-NEXT: revb z0.s, p1/m, z0.s +; CHECK_BE-NEXT: revb z0.d, p0/m, z0.d ; CHECK_BE-NEXT: ret %trunc = fptrunc %v to %bitcast = bitcast %trunc to @@ -3150,15 +2634,11 @@ define @bitcast_short_i32_to_float( %v) ; ; CHECK_BE-LABEL: bitcast_short_i32_to_float: ; CHECK_BE: // %bb.0: -; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK_BE-NEXT: addvl sp, sp, #-1 ; CHECK_BE-NEXT: ptrue p0.d ; CHECK_BE-NEXT: ptrue p1.s -; CHECK_BE-NEXT: st1d { z0.d }, p0, [sp] -; CHECK_BE-NEXT: ld1w { z0.s }, p1/z, [sp] +; CHECK_BE-NEXT: revb z0.d, p0/m, z0.d +; CHECK_BE-NEXT: revb z0.s, p1/m, z0.s ; CHECK_BE-NEXT: fcvt z0.d, p0/m, z0.s -; CHECK_BE-NEXT: addvl sp, sp, #1 -; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK_BE-NEXT: ret %trunc = trunc %v to %bitcast = bitcast %trunc to diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptosi.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptosi.mir index 7730dde20395d..cd07de690cbde 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptosi.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptosi.mir @@ -17,20 +17,22 @@ body: | ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: %1:vgpr_32 = nofpexcept V_CVT_I32_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: $vgpr0 = COPY %1 + ; GCN-NEXT: [[V_CVT_I32_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr0 = COPY [[V_CVT_I32_F32_e64_]] + ; ; VI-LABEL: name: fptosi_s32_to_s32_vv ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; VI-NEXT: %1:vgpr_32 = nofpexcept V_CVT_I32_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; VI-NEXT: $vgpr0 = COPY %1 + ; VI-NEXT: [[V_CVT_I32_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; VI-NEXT: $vgpr0 = COPY [[V_CVT_I32_F32_e64_]] + ; ; GFX11-LABEL: name: fptosi_s32_to_s32_vv ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: %1:vgpr_32 = nofpexcept V_CVT_I32_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: $vgpr0 = COPY %1 + ; GFX11-NEXT: [[V_CVT_I32_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: $vgpr0 = COPY [[V_CVT_I32_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_FPTOSI %0 $vgpr0 = COPY %1 @@ -50,20 +52,22 @@ body: | ; GCN: liveins: $sgpr0 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: %1:vgpr_32 = nofpexcept V_CVT_I32_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: $vgpr0 = COPY %1 + ; GCN-NEXT: [[V_CVT_I32_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr0 = COPY [[V_CVT_I32_F32_e64_]] + ; ; VI-LABEL: name: fptosi_s32_to_s32_vs ; VI: liveins: $sgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; VI-NEXT: %1:vgpr_32 = nofpexcept V_CVT_I32_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; VI-NEXT: $vgpr0 = COPY %1 + ; VI-NEXT: [[V_CVT_I32_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; VI-NEXT: $vgpr0 = COPY [[V_CVT_I32_F32_e64_]] + ; ; GFX11-LABEL: name: fptosi_s32_to_s32_vs ; GFX11: liveins: $sgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX11-NEXT: %1:vgpr_32 = nofpexcept V_CVT_I32_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: $vgpr0 = COPY %1 + ; GFX11-NEXT: [[V_CVT_I32_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: $vgpr0 = COPY [[V_CVT_I32_F32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = G_FPTOSI %0 $vgpr0 = COPY %1 @@ -83,20 +87,22 @@ body: | ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e64 1, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: $vgpr0 = COPY %2 + ; GCN-NEXT: [[V_CVT_I32_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e64 1, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr0 = COPY [[V_CVT_I32_F32_e64_]] + ; ; VI-LABEL: name: fptosi_s32_to_s32_fneg_vv ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; VI-NEXT: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e64 1, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; VI-NEXT: $vgpr0 = COPY %2 + ; VI-NEXT: [[V_CVT_I32_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e64 1, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; VI-NEXT: $vgpr0 = COPY [[V_CVT_I32_F32_e64_]] + ; ; GFX11-LABEL: name: fptosi_s32_to_s32_fneg_vv ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e64 1, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: $vgpr0 = COPY %2 + ; GFX11-NEXT: [[V_CVT_I32_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e64 1, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: $vgpr0 = COPY [[V_CVT_I32_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_FNEG %0 %2:vgpr(s32) = G_FPTOSI %1 @@ -117,23 +123,25 @@ body: | ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: %3:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %3, implicit $mode, implicit $exec - ; GCN-NEXT: $vgpr0 = COPY %2 + ; GCN-NEXT: [[V_CVT_F32_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_e64_]], implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr0 = COPY [[V_CVT_I32_F32_e32_]] + ; ; VI-LABEL: name: fptosi_s16_to_s32_vv ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; VI-NEXT: %3:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; VI-NEXT: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %3, implicit $mode, implicit $exec - ; VI-NEXT: $vgpr0 = COPY %2 + ; VI-NEXT: [[V_CVT_F32_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; VI-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_e64_]], implicit $mode, implicit $exec + ; VI-NEXT: $vgpr0 = COPY [[V_CVT_I32_F32_e32_]] + ; ; GFX11-LABEL: name: fptosi_s16_to_s32_vv ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: %3:vgpr_32 = nofpexcept V_CVT_F32_F16_t16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %3, implicit $mode, implicit $exec - ; GFX11-NEXT: $vgpr0 = COPY %2 + ; GFX11-NEXT: [[V_CVT_F32_F16_t16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_t16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_t16_e64_]], implicit $mode, implicit $exec + ; GFX11-NEXT: $vgpr0 = COPY [[V_CVT_I32_F32_e32_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 %2:vgpr(s32) = G_FPTOSI %1 @@ -154,23 +162,25 @@ body: | ; GCN: liveins: $sgpr0 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: %3:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %3, implicit $mode, implicit $exec - ; GCN-NEXT: $vgpr0 = COPY %2 + ; GCN-NEXT: [[V_CVT_F32_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_e64_]], implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr0 = COPY [[V_CVT_I32_F32_e32_]] + ; ; VI-LABEL: name: fptosi_s16_to_s32_vs ; VI: liveins: $sgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; VI-NEXT: %3:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; VI-NEXT: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %3, implicit $mode, implicit $exec - ; VI-NEXT: $vgpr0 = COPY %2 + ; VI-NEXT: [[V_CVT_F32_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; VI-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_e64_]], implicit $mode, implicit $exec + ; VI-NEXT: $vgpr0 = COPY [[V_CVT_I32_F32_e32_]] + ; ; GFX11-LABEL: name: fptosi_s16_to_s32_vs ; GFX11: liveins: $sgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX11-NEXT: %3:vgpr_32 = nofpexcept V_CVT_F32_F16_t16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %3, implicit $mode, implicit $exec - ; GFX11-NEXT: $vgpr0 = COPY %2 + ; GFX11-NEXT: [[V_CVT_F32_F16_t16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_t16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_t16_e64_]], implicit $mode, implicit $exec + ; GFX11-NEXT: $vgpr0 = COPY [[V_CVT_I32_F32_e32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s16) = G_TRUNC %0 %2:vgpr(s32) = G_FPTOSI %1 @@ -193,27 +203,29 @@ body: | ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 ; GCN-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; GCN-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %3:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %4, implicit $mode, implicit $exec - ; GCN-NEXT: $vgpr0 = COPY %3 + ; GCN-NEXT: [[V_CVT_F32_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_e64_]], implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr0 = COPY [[V_CVT_I32_F32_e32_]] + ; ; VI-LABEL: name: fptosi_s16_to_s32_fneg_vv ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 ; VI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; VI-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec - ; VI-NEXT: %3:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %4, implicit $mode, implicit $exec - ; VI-NEXT: $vgpr0 = COPY %3 + ; VI-NEXT: [[V_CVT_F32_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec + ; VI-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_e64_]], implicit $mode, implicit $exec + ; VI-NEXT: $vgpr0 = COPY [[V_CVT_I32_F32_e32_]] + ; ; GFX11-LABEL: name: fptosi_s16_to_s32_fneg_vv ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 ; GFX11-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; GFX11-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_t16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: %3:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %4, implicit $mode, implicit $exec - ; GFX11-NEXT: $vgpr0 = COPY %3 + ; GFX11-NEXT: [[V_CVT_F32_F16_t16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_t16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_t16_e64_]], implicit $mode, implicit $exec + ; GFX11-NEXT: $vgpr0 = COPY [[V_CVT_I32_F32_e32_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 %2:vgpr(s16) = G_FNEG %1 @@ -235,23 +247,25 @@ body: | ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %4, implicit $mode, implicit $exec - ; GCN-NEXT: S_ENDPGM 0, implicit %2 + ; GCN-NEXT: [[V_CVT_F32_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_e64_]], implicit $mode, implicit $exec + ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CVT_I32_F32_e32_]] + ; ; VI-LABEL: name: fptosi_s16_to_s1_vv ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; VI-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; VI-NEXT: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %4, implicit $mode, implicit $exec - ; VI-NEXT: S_ENDPGM 0, implicit %2 + ; VI-NEXT: [[V_CVT_F32_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; VI-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_e64_]], implicit $mode, implicit $exec + ; VI-NEXT: S_ENDPGM 0, implicit [[V_CVT_I32_F32_e32_]] + ; ; GFX11-LABEL: name: fptosi_s16_to_s1_vv ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_t16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %4, implicit $mode, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit %2 + ; GFX11-NEXT: [[V_CVT_F32_F16_t16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_t16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_t16_e64_]], implicit $mode, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CVT_I32_F32_e32_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 %2:vgpr(s32) = G_FPTOSI %1 @@ -273,23 +287,25 @@ body: | ; GCN: liveins: $sgpr0 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %4, implicit $mode, implicit $exec - ; GCN-NEXT: S_ENDPGM 0, implicit %2 + ; GCN-NEXT: [[V_CVT_F32_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_e64_]], implicit $mode, implicit $exec + ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CVT_I32_F32_e32_]] + ; ; VI-LABEL: name: fptosi_s16_to_s1_vs ; VI: liveins: $sgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; VI-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; VI-NEXT: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %4, implicit $mode, implicit $exec - ; VI-NEXT: S_ENDPGM 0, implicit %2 + ; VI-NEXT: [[V_CVT_F32_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; VI-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_e64_]], implicit $mode, implicit $exec + ; VI-NEXT: S_ENDPGM 0, implicit [[V_CVT_I32_F32_e32_]] + ; ; GFX11-LABEL: name: fptosi_s16_to_s1_vs ; GFX11: liveins: $sgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX11-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_t16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %4, implicit $mode, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit %2 + ; GFX11-NEXT: [[V_CVT_F32_F16_t16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_t16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_t16_e64_]], implicit $mode, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CVT_I32_F32_e32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s16) = G_TRUNC %0 %2:vgpr(s32) = G_FPTOSI %1 @@ -313,27 +329,29 @@ body: | ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 ; GCN-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; GCN-NEXT: %5:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %3:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %5, implicit $mode, implicit $exec - ; GCN-NEXT: S_ENDPGM 0, implicit %3 + ; GCN-NEXT: [[V_CVT_F32_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_e64_]], implicit $mode, implicit $exec + ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CVT_I32_F32_e32_]] + ; ; VI-LABEL: name: fptosi_s16_to_s1_fneg_vv ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 ; VI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; VI-NEXT: %5:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec - ; VI-NEXT: %3:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %5, implicit $mode, implicit $exec - ; VI-NEXT: S_ENDPGM 0, implicit %3 + ; VI-NEXT: [[V_CVT_F32_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec + ; VI-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_e64_]], implicit $mode, implicit $exec + ; VI-NEXT: S_ENDPGM 0, implicit [[V_CVT_I32_F32_e32_]] + ; ; GFX11-LABEL: name: fptosi_s16_to_s1_fneg_vv ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 ; GFX11-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; GFX11-NEXT: %5:vgpr_32 = nofpexcept V_CVT_F32_F16_t16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: %3:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %5, implicit $mode, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit %3 + ; GFX11-NEXT: [[V_CVT_F32_F16_t16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_t16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_t16_e64_]], implicit $mode, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CVT_I32_F32_e32_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 %2:vgpr(s16) = G_FNEG %1 diff --git a/llvm/test/CodeGen/DirectX/CreateHandle.ll b/llvm/test/CodeGen/DirectX/CreateHandle.ll index 13d59c6caf6c9..40b3b2c712272 100644 --- a/llvm/test/CodeGen/DirectX/CreateHandle.ll +++ b/llvm/test/CodeGen/DirectX/CreateHandle.ll @@ -1,4 +1,14 @@ -; RUN: opt -S -dxil-op-lower %s | FileCheck %s +; RUN: opt -S -passes=dxil-op-lower,dxil-translate-metadata %s | FileCheck %s +; RUN: opt -S -passes=dxil-pretty-printer %s 2>&1 >/dev/null | FileCheck --check-prefix=CHECK-PRETTY %s + +; CHECK-PRETTY: Type Format Dim ID HLSL Bind Count +; CHECK-PRETTY: ---------- ------- ----------- ------- -------------- --------- +; CHECK-PRETTY: SRV f32 buf T0 t0 unbounded +; CHECK-PRETTY: SRV byte r/o T1 t8,space1 1 +; CHECK-PRETTY: SRV struct r/o T2 t2,space4 1 +; CHECK-PRETTY: SRV u32 buf T3 t3,space5 24 +; CHECK-PRETTY: UAV i32 buf U0 u7,space2 1 +; CHECK-PRETTY: UAV f32 buf U1 u5,space3 1 target triple = "dxil-pc-shadermodel6.0-compute" @@ -50,4 +60,12 @@ define void @test_buffers() { ret void } +; Just check that we have the right types and number of metadata nodes, the +; contents of the metadata are tested elsewhere. +; +; CHECK: !dx.resources = !{[[RESMD:![0-9]+]]} +; CHECK: [[RESMD]] = !{[[SRVMD:![0-9]+]], [[UAVMD:![0-9]+]], null, null} +; CHECK-DAG: [[SRVMD]] = !{!{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}} +; CHECK-DAG: [[UAVMD]] = !{!{{[0-9]+}}, !{{[0-9]+}}} + attributes #0 = { nocallback nofree nosync nounwind willreturn memory(none) } diff --git a/llvm/test/CodeGen/DirectX/CreateHandleFromBinding.ll b/llvm/test/CodeGen/DirectX/CreateHandleFromBinding.ll index e78a0bf02e4ae..d0c80c018b8d7 100644 --- a/llvm/test/CodeGen/DirectX/CreateHandleFromBinding.ll +++ b/llvm/test/CodeGen/DirectX/CreateHandleFromBinding.ll @@ -1,4 +1,14 @@ -; RUN: opt -S -dxil-op-lower %s | FileCheck %s +; RUN: opt -S -passes=dxil-op-lower,dxil-translate-metadata %s | FileCheck %s +; RUN: opt -S -passes=dxil-pretty-printer %s 2>&1 >/dev/null | FileCheck --check-prefix=CHECK-PRETTY %s + +; CHECK-PRETTY: Type Format Dim ID HLSL Bind Count +; CHECK-PRETTY: ---------- ------- ----------- ------- -------------- --------- +; CHECK-PRETTY: SRV f32 buf T0 t0 unbounded +; CHECK-PRETTY: SRV byte r/o T1 t8,space1 1 +; CHECK-PRETTY: SRV struct r/o T2 t2,space4 1 +; CHECK-PRETTY: SRV u32 buf T3 t3,space5 24 +; CHECK-PRETTY: UAV i32 buf U0 u7,space2 1 +; CHECK-PRETTY: UAV f32 buf U1 u5,space3 1 target triple = "dxil-pc-shadermodel6.6-compute" @@ -55,4 +65,12 @@ define void @test_bindings() { ret void } +; Just check that we have the right types and number of metadata nodes, the +; contents of the metadata are tested elsewhere. +; +; CHECK: !dx.resources = !{[[RESMD:![0-9]+]]} +; CHECK: [[RESMD]] = !{[[SRVMD:![0-9]+]], [[UAVMD:![0-9]+]], null, null} +; CHECK-DAG: [[SRVMD]] = !{!{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}} +; CHECK-DAG: [[UAVMD]] = !{!{{[0-9]+}}, !{{[0-9]+}}} + attributes #0 = { nocallback nofree nosync nounwind willreturn memory(none) } diff --git a/llvm/test/CodeGen/DirectX/all.ll b/llvm/test/CodeGen/DirectX/all.ll new file mode 100644 index 0000000000000..1c0b6486dc935 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/all.ll @@ -0,0 +1,83 @@ +; RUN: opt -S -passes=dxil-intrinsic-expansion,dxil-op-lower -mtriple=dxil-pc-shadermodel6.0-library < %s | FileCheck %s + +; Make sure dxil operation function calls for all are generated for float and half. + +; CHECK-LABEL: all_bool +; CHECK: icmp ne i1 %{{.*}}, false +define noundef i1 @all_bool(i1 noundef %p0) { +entry: + %dx.all = call i1 @llvm.dx.all.i1(i1 %p0) + ret i1 %dx.all +} + +; CHECK-LABEL: all_int64_t +; CHECK: icmp ne i64 %{{.*}}, 0 +define noundef i1 @all_int64_t(i64 noundef %p0) { +entry: + %dx.all = call i1 @llvm.dx.all.i64(i64 %p0) + ret i1 %dx.all +} + +; CHECK-LABEL: all_int +; CHECK: icmp ne i32 %{{.*}}, 0 +define noundef i1 @all_int(i32 noundef %p0) { +entry: + %dx.all = call i1 @llvm.dx.all.i32(i32 %p0) + ret i1 %dx.all +} + +; CHECK-LABEL: all_int16_t +; CHECK: icmp ne i16 %{{.*}}, 0 +define noundef i1 @all_int16_t(i16 noundef %p0) { +entry: + %dx.all = call i1 @llvm.dx.all.i16(i16 %p0) + ret i1 %dx.all +} + +; CHECK-LABEL: all_double +; CHECK: fcmp une double %{{.*}}, 0.000000e+00 +define noundef i1 @all_double(double noundef %p0) { +entry: + %dx.all = call i1 @llvm.dx.all.f64(double %p0) + ret i1 %dx.all +} + +; CHECK-LABEL: all_float +; CHECK: fcmp une float %{{.*}}, 0.000000e+00 +define noundef i1 @all_float(float noundef %p0) { +entry: + %dx.all = call i1 @llvm.dx.all.f32(float %p0) + ret i1 %dx.all +} + +; CHECK-LABEL: all_half +; CHECK: fcmp une half %{{.*}}, 0xH0000 +define noundef i1 @all_half(half noundef %p0) { +entry: + %dx.all = call i1 @llvm.dx.all.f16(half %p0) + ret i1 %dx.all +} + +; CHECK-LABEL: all_bool4 +; CHECK: icmp ne <4 x i1> %{{.*}}, zeroinitialize +; CHECK: extractelement <4 x i1> %{{.*}}, i64 0 +; CHECK: extractelement <4 x i1> %{{.*}}, i64 1 +; CHECK: and i1 %{{.*}}, %{{.*}} +; CHECK: extractelement <4 x i1> %{{.*}}, i64 2 +; CHECK: and i1 %{{.*}}, %{{.*}} +; CHECK: extractelement <4 x i1> %{{.*}}, i64 3 +; CHECK: and i1 %{{.*}}, %{{.*}} +define noundef i1 @all_bool4(<4 x i1> noundef %p0) { +entry: + %dx.all = call i1 @llvm.dx.all.v4i1(<4 x i1> %p0) + ret i1 %dx.all +} + +declare i1 @llvm.dx.all.v4i1(<4 x i1>) +declare i1 @llvm.dx.all.i1(i1) +declare i1 @llvm.dx.all.i16(i16) +declare i1 @llvm.dx.all.i32(i32) +declare i1 @llvm.dx.all.i64(i64) +declare i1 @llvm.dx.all.f16(half) +declare i1 @llvm.dx.all.f32(float) +declare i1 @llvm.dx.all.f64(double) diff --git a/llvm/test/CodeGen/MLRegAlloc/Inputs/interactive_main.py b/llvm/test/CodeGen/MLRegAlloc/Inputs/interactive_main.py index 53809b0a04008..1f62a5c6c9a3b 100644 --- a/llvm/test/CodeGen/MLRegAlloc/Inputs/interactive_main.py +++ b/llvm/test/CodeGen/MLRegAlloc/Inputs/interactive_main.py @@ -2,6 +2,7 @@ import interactive_host import sys +from typing import Sequence def main(args): # this advisor just picks the first legal register to evict, which is @@ -9,7 +10,7 @@ def main(args): class Advisor: to_return = False - def advice(self, tensor_values: list[log_reader.TensorValue]): + def advice(self, tensor_values: Sequence[log_reader.TensorValue]): for tv in tensor_values: if tv.spec().name != "mask": continue diff --git a/llvm/test/CodeGen/MLRegAlloc/lit.local.cfg b/llvm/test/CodeGen/MLRegAlloc/lit.local.cfg deleted file mode 100644 index e8c7912650cb8..0000000000000 --- a/llvm/test/CodeGen/MLRegAlloc/lit.local.cfg +++ /dev/null @@ -1,3 +0,0 @@ -import sys - -config.unsupported = sys.version_info.minor <= 8 diff --git a/llvm/test/CodeGen/Mips/fp-fcanonicalize.ll b/llvm/test/CodeGen/Mips/fp-fcanonicalize.ll new file mode 100644 index 0000000000000..1faf6dd0891a2 --- /dev/null +++ b/llvm/test/CodeGen/Mips/fp-fcanonicalize.ll @@ -0,0 +1,97 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=mipsisa32r6 < %s | FileCheck %s --check-prefix=MIPS32R6 +; RUN: llc --mtriple=mips < %s | FileCheck %s --check-prefix=MIPS32R2 +; RUN: llc --mtriple=mips64 < %s | FileCheck %s --check-prefix=MIPS64R2 + +declare float @llvm.fcanonicalize.f32(float) +declare double @llvm.fcanonicalize.f64(double) + +define float @fcanonicalize_float(float %x) { +; MIPS32R6-LABEL: fcanonicalize_float: +; MIPS32R6: # %bb.0: +; MIPS32R6-NEXT: jr $ra +; MIPS32R6-NEXT: min.s $f0, $f12, $f12 +; +; MIPS32R2-LABEL: fcanonicalize_float: +; MIPS32R2: # %bb.0: +; MIPS32R2-NEXT: mov.s $f0, $f12 +; MIPS32R2-NEXT: add.s $f1, $f12, $f12 +; MIPS32R2-NEXT: c.un.s $f12, $f12 +; MIPS32R2-NEXT: jr $ra +; MIPS32R2-NEXT: movt.s $f0, $f1, $fcc0 +; +; MIPS64R2-LABEL: fcanonicalize_float: +; MIPS64R2: # %bb.0: +; MIPS64R2-NEXT: mov.s $f0, $f12 +; MIPS64R2-NEXT: add.s $f1, $f12, $f12 +; MIPS64R2-NEXT: c.un.s $f12, $f12 +; MIPS64R2-NEXT: jr $ra +; MIPS64R2-NEXT: movt.s $f0, $f1, $fcc0 + %z = call float @llvm.canonicalize.f32(float %x) + ret float %z +} + +define float @fcanonicalize_float_nnan(float %x) { +; MIPS32R6-LABEL: fcanonicalize_float_nnan: +; MIPS32R6: # %bb.0: +; MIPS32R6-NEXT: jr $ra +; MIPS32R6-NEXT: min.s $f0, $f12, $f12 +; +; MIPS32R2-LABEL: fcanonicalize_float_nnan: +; MIPS32R2: # %bb.0: +; MIPS32R2-NEXT: jr $ra +; MIPS32R2-NEXT: mov.s $f0, $f12 +; +; MIPS64R2-LABEL: fcanonicalize_float_nnan: +; MIPS64R2: # %bb.0: +; MIPS64R2-NEXT: jr $ra +; MIPS64R2-NEXT: mov.s $f0, $f12 + %z = call nnan float @llvm.canonicalize.f32(float %x) + ret float %z +} + + +define double @fcanonicalize_double(double %x) { +; MIPS32R6-LABEL: fcanonicalize_double: +; MIPS32R6: # %bb.0: +; MIPS32R6-NEXT: jr $ra +; MIPS32R6-NEXT: min.d $f0, $f12, $f12 +; +; MIPS32R2-LABEL: fcanonicalize_double: +; MIPS32R2: # %bb.0: +; MIPS32R2-NEXT: mov.d $f0, $f12 +; MIPS32R2-NEXT: add.d $f2, $f12, $f12 +; MIPS32R2-NEXT: c.un.d $f12, $f12 +; MIPS32R2-NEXT: jr $ra +; MIPS32R2-NEXT: movt.d $f0, $f2, $fcc0 +; +; MIPS64R2-LABEL: fcanonicalize_double: +; MIPS64R2: # %bb.0: +; MIPS64R2-NEXT: mov.d $f0, $f12 +; MIPS64R2-NEXT: add.d $f1, $f12, $f12 +; MIPS64R2-NEXT: c.un.d $f12, $f12 +; MIPS64R2-NEXT: jr $ra +; MIPS64R2-NEXT: movt.d $f0, $f1, $fcc0 + %z = call double @llvm.canonicalize.f64(double %x) + ret double %z +} + +define double @fcanonicalize_double_nnan(double %x) { +; MIPS32R6-LABEL: fcanonicalize_double_nnan: +; MIPS32R6: # %bb.0: +; MIPS32R6-NEXT: jr $ra +; MIPS32R6-NEXT: min.d $f0, $f12, $f12 +; +; MIPS32R2-LABEL: fcanonicalize_double_nnan: +; MIPS32R2: # %bb.0: +; MIPS32R2-NEXT: jr $ra +; MIPS32R2-NEXT: mov.d $f0, $f12 +; +; MIPS64R2-LABEL: fcanonicalize_double_nnan: +; MIPS64R2: # %bb.0: +; MIPS64R2-NEXT: jr $ra +; MIPS64R2-NEXT: mov.d $f0, $f12 + %z = call nnan double @llvm.canonicalize.f64(double %x) + ret double %z +} + diff --git a/llvm/test/CodeGen/PowerPC/O0-pipeline.ll b/llvm/test/CodeGen/PowerPC/O0-pipeline.ll index 70b421f8c0c5f..4a17384e49993 100644 --- a/llvm/test/CodeGen/PowerPC/O0-pipeline.ll +++ b/llvm/test/CodeGen/PowerPC/O0-pipeline.ll @@ -59,7 +59,6 @@ ; CHECK-NEXT: Insert XRay ops ; CHECK-NEXT: Implement the 'patchable-function' attribute ; CHECK-NEXT: PowerPC Pre-Emit Peephole -; CHECK-NEXT: PowerPC Expand ISEL Generation ; CHECK-NEXT: Contiguously Lay Out Funclets ; CHECK-NEXT: StackMap Liveness Analysis ; CHECK-NEXT: Live DEBUG_VALUE analysis diff --git a/llvm/test/CodeGen/PowerPC/O3-pipeline.ll b/llvm/test/CodeGen/PowerPC/O3-pipeline.ll index 60d42704ca795..39b23a57513d9 100644 --- a/llvm/test/CodeGen/PowerPC/O3-pipeline.ll +++ b/llvm/test/CodeGen/PowerPC/O3-pipeline.ll @@ -212,7 +212,6 @@ ; CHECK-NEXT: Insert XRay ops ; CHECK-NEXT: Implement the 'patchable-function' attribute ; CHECK-NEXT: PowerPC Pre-Emit Peephole -; CHECK-NEXT: PowerPC Expand ISEL Generation ; CHECK-NEXT: PowerPC Early-Return Creation ; CHECK-NEXT: Contiguously Lay Out Funclets ; CHECK-NEXT: StackMap Liveness Analysis diff --git a/llvm/test/CodeGen/PowerPC/crbit-asm.ll b/llvm/test/CodeGen/PowerPC/crbit-asm.ll index 617d6ec27b63f..2062aa3e34417 100644 --- a/llvm/test/CodeGen/PowerPC/crbit-asm.ll +++ b/llvm/test/CodeGen/PowerPC/crbit-asm.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 ; RUN: llc -verify-machineinstrs -mcpu=pwr7 < %s | FileCheck %s ; RUN: llc -verify-machineinstrs -O1 -mcpu=pwr7 < %s | FileCheck %s -; RUN: llc -verify-machineinstrs -mcpu=pwr7 -ppc-gen-isel=false < %s | FileCheck --check-prefix=CHECK-NO-ISEL %s -; RUN: llc -verify-machineinstrs -O1 -mcpu=pwr7 -ppc-gen-isel=false < %s | FileCheck --check-prefix=CHECK-NO-ISEL %s +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=-isel < %s | FileCheck --check-prefix=CHECK-NO-ISEL %s +; RUN: llc -verify-machineinstrs -O1 -mcpu=pwr7 -mattr=-isel < %s | FileCheck --check-prefix=CHECK-NO-ISEL %s target datalayout = "E-m:e-i64:64-n32:64" target triple = "powerpc64-unknown-linux-gnu" @@ -26,15 +26,13 @@ define zeroext i1 @testi1(i1 zeroext %b1, i1 zeroext %b2) #0 { ; CHECK-NO-ISEL-NEXT: andi. 3, 3, 1 ; CHECK-NO-ISEL-NEXT: crmove 20, 1 ; CHECK-NO-ISEL-NEXT: andi. 3, 4, 1 -; CHECK-NO-ISEL-NEXT: li 3, 0 -; CHECK-NO-ISEL-NEXT: li 4, 1 +; CHECK-NO-ISEL-NEXT: li 3, 1 ; CHECK-NO-ISEL-NEXT: #APP ; CHECK-NO-ISEL-NEXT: crand 20, 20, 1 ; CHECK-NO-ISEL-NEXT: #NO_APP -; CHECK-NO-ISEL-NEXT: bc 12, 20, .LBB0_1 -; CHECK-NO-ISEL-NEXT: blr -; CHECK-NO-ISEL-NEXT: .LBB0_1: # %entry -; CHECK-NO-ISEL-NEXT: addi 3, 4, 0 +; CHECK-NO-ISEL-NEXT: bclr 12, 20, 0 +; CHECK-NO-ISEL-NEXT: # %bb.1: # %entry +; CHECK-NO-ISEL-NEXT: li 3, 0 ; CHECK-NO-ISEL-NEXT: blr entry: %0 = tail call i8 asm "crand $0, $1, $2", "=^wc,^wc,^wc"(i1 %b1, i1 %b2) #0 @@ -63,15 +61,13 @@ define signext i32 @testi32(i32 signext %b1, i32 signext %b2) #0 { ; CHECK-NO-ISEL-NEXT: andi. 3, 3, 1 ; CHECK-NO-ISEL-NEXT: crmove 20, 1 ; CHECK-NO-ISEL-NEXT: andi. 3, 4, 1 -; CHECK-NO-ISEL-NEXT: li 3, 0 -; CHECK-NO-ISEL-NEXT: li 4, -1 +; CHECK-NO-ISEL-NEXT: li 3, -1 ; CHECK-NO-ISEL-NEXT: #APP ; CHECK-NO-ISEL-NEXT: crand 20, 20, 1 ; CHECK-NO-ISEL-NEXT: #NO_APP -; CHECK-NO-ISEL-NEXT: bc 12, 20, .LBB1_1 -; CHECK-NO-ISEL-NEXT: blr -; CHECK-NO-ISEL-NEXT: .LBB1_1: # %entry -; CHECK-NO-ISEL-NEXT: addi 3, 4, 0 +; CHECK-NO-ISEL-NEXT: bclr 12, 20, 0 +; CHECK-NO-ISEL-NEXT: # %bb.1: # %entry +; CHECK-NO-ISEL-NEXT: li 3, 0 ; CHECK-NO-ISEL-NEXT: blr entry: %0 = tail call i32 asm "crand $0, $1, $2", "=^wc,^wc,^wc"(i32 %b1, i32 %b2) #0 @@ -101,15 +97,13 @@ define zeroext i8 @testi8(i8 zeroext %b1, i8 zeroext %b2) #0 { ; CHECK-NO-ISEL-NEXT: andi. 3, 3, 1 ; CHECK-NO-ISEL-NEXT: crmove 20, 1 ; CHECK-NO-ISEL-NEXT: andi. 3, 4, 1 -; CHECK-NO-ISEL-NEXT: li 3, 0 -; CHECK-NO-ISEL-NEXT: li 4, 1 +; CHECK-NO-ISEL-NEXT: li 3, 1 ; CHECK-NO-ISEL-NEXT: #APP ; CHECK-NO-ISEL-NEXT: crand 20, 20, 1 ; CHECK-NO-ISEL-NEXT: #NO_APP -; CHECK-NO-ISEL-NEXT: bc 12, 20, .LBB2_1 -; CHECK-NO-ISEL-NEXT: blr -; CHECK-NO-ISEL-NEXT: .LBB2_1: # %entry -; CHECK-NO-ISEL-NEXT: addi 3, 4, 0 +; CHECK-NO-ISEL-NEXT: bclr 12, 20, 0 +; CHECK-NO-ISEL-NEXT: # %bb.1: # %entry +; CHECK-NO-ISEL-NEXT: li 3, 0 ; CHECK-NO-ISEL-NEXT: blr entry: %0 = tail call i8 asm "crand $0, $1, $2", "=^wc,^wc,^wc"(i8 %b1, i8 %b2) #0 diff --git a/llvm/test/CodeGen/PowerPC/crbits.ll b/llvm/test/CodeGen/PowerPC/crbits.ll index a682f69a2ceb7..763f596777a64 100644 --- a/llvm/test/CodeGen/PowerPC/crbits.ll +++ b/llvm/test/CodeGen/PowerPC/crbits.ll @@ -2,7 +2,7 @@ ; RUN: llc -ppc-gpr-icmps=all -mtriple=powerpc64-unknown-linux-gnu \ ; RUN: -verify-machineinstrs -mcpu=pwr7 < %s | FileCheck %s ; RUN: llc -ppc-gpr-icmps=all -mtriple=powerpc64-unknown-linux-gnu \ -; RUN: -verify-machineinstrs -mcpu=pwr7 -ppc-gen-isel=false < %s | \ +; RUN: -verify-machineinstrs -mcpu=pwr7 -mattr=-isel < %s | \ ; RUN: FileCheck --check-prefix=CHECK-NO-ISEL %s ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -O2 \ ; RUN: -ppc-asm-full-reg-names -mcpu=pwr10 -ppc-gpr-icmps=none < %s | \ @@ -30,16 +30,16 @@ define zeroext i1 @test1(float %v1, float %v2) #0 { ; CHECK-NO-ISEL: # %bb.0: # %entry ; CHECK-NO-ISEL-NEXT: fcmpu 0, 1, 2 ; CHECK-NO-ISEL-NEXT: xxlxor 0, 0, 0 -; CHECK-NO-ISEL-NEXT: li 3, 1 +; CHECK-NO-ISEL-NEXT: li 3, 0 ; CHECK-NO-ISEL-NEXT: fcmpu 1, 2, 2 ; CHECK-NO-ISEL-NEXT: crnor 20, 3, 0 ; CHECK-NO-ISEL-NEXT: fcmpu 0, 2, 0 -; CHECK-NO-ISEL-NEXT: crnor 21, 7, 1 -; CHECK-NO-ISEL-NEXT: crnand 20, 20, 21 -; CHECK-NO-ISEL-NEXT: bc 12, 20, .LBB0_1 -; CHECK-NO-ISEL-NEXT: blr -; CHECK-NO-ISEL-NEXT: .LBB0_1: # %entry -; CHECK-NO-ISEL-NEXT: li 3, 0 +; CHECK-NO-ISEL-NEXT: bclr 4, 20, 0 +; CHECK-NO-ISEL-NEXT: # %bb.1: # %entry +; CHECK-NO-ISEL-NEXT: crnor 20, 7, 1 +; CHECK-NO-ISEL-NEXT: bclr 4, 20, 0 +; CHECK-NO-ISEL-NEXT: # %bb.2: # %entry +; CHECK-NO-ISEL-NEXT: li 3, 1 ; CHECK-NO-ISEL-NEXT: blr ; ; CHECK-P10-LABEL: test1: @@ -81,16 +81,15 @@ define zeroext i1 @test2(float %v1, float %v2) #0 { ; CHECK-NO-ISEL: # %bb.0: # %entry ; CHECK-NO-ISEL-NEXT: fcmpu 0, 1, 2 ; CHECK-NO-ISEL-NEXT: xxlxor 0, 0, 0 -; CHECK-NO-ISEL-NEXT: li 3, 1 +; CHECK-NO-ISEL-NEXT: li 3, 0 ; CHECK-NO-ISEL-NEXT: fcmpu 1, 2, 2 ; CHECK-NO-ISEL-NEXT: crnor 20, 3, 0 ; CHECK-NO-ISEL-NEXT: fcmpu 0, 2, 0 ; CHECK-NO-ISEL-NEXT: crnor 21, 7, 1 ; CHECK-NO-ISEL-NEXT: creqv 20, 20, 21 -; CHECK-NO-ISEL-NEXT: bc 12, 20, .LBB1_1 -; CHECK-NO-ISEL-NEXT: blr -; CHECK-NO-ISEL-NEXT: .LBB1_1: # %entry -; CHECK-NO-ISEL-NEXT: li 3, 0 +; CHECK-NO-ISEL-NEXT: bclr 12, 20, 0 +; CHECK-NO-ISEL-NEXT: # %bb.1: # %entry +; CHECK-NO-ISEL-NEXT: li 3, 1 ; CHECK-NO-ISEL-NEXT: blr ; ; CHECK-P10-LABEL: test2: @@ -134,7 +133,7 @@ define zeroext i1 @test3(float %v1, float %v2, i32 signext %x) #0 { ; CHECK-NO-ISEL: # %bb.0: # %entry ; CHECK-NO-ISEL-NEXT: fcmpu 0, 1, 2 ; CHECK-NO-ISEL-NEXT: xxlxor 0, 0, 0 -; CHECK-NO-ISEL-NEXT: li 3, 1 +; CHECK-NO-ISEL-NEXT: li 3, 0 ; CHECK-NO-ISEL-NEXT: fcmpu 1, 2, 2 ; CHECK-NO-ISEL-NEXT: crnor 20, 3, 0 ; CHECK-NO-ISEL-NEXT: fcmpu 0, 2, 0 @@ -142,10 +141,9 @@ define zeroext i1 @test3(float %v1, float %v2, i32 signext %x) #0 { ; CHECK-NO-ISEL-NEXT: cmpwi 5, -2 ; CHECK-NO-ISEL-NEXT: crandc 21, 21, 2 ; CHECK-NO-ISEL-NEXT: creqv 20, 20, 21 -; CHECK-NO-ISEL-NEXT: bc 12, 20, .LBB2_1 -; CHECK-NO-ISEL-NEXT: blr -; CHECK-NO-ISEL-NEXT: .LBB2_1: # %entry -; CHECK-NO-ISEL-NEXT: li 3, 0 +; CHECK-NO-ISEL-NEXT: bclr 12, 20, 0 +; CHECK-NO-ISEL-NEXT: # %bb.1: # %entry +; CHECK-NO-ISEL-NEXT: li 3, 1 ; CHECK-NO-ISEL-NEXT: blr ; ; CHECK-P10-LABEL: test3: @@ -301,10 +299,9 @@ define signext i32 @test7(i1 zeroext %v2, i32 signext %i1, i32 signext %i2) #0 { ; CHECK-NO-ISEL-NEXT: andi. 3, 3, 1 ; CHECK-NO-ISEL-NEXT: bc 12, 1, .LBB6_2 ; CHECK-NO-ISEL-NEXT: # %bb.1: # %entry -; CHECK-NO-ISEL-NEXT: ori 3, 5, 0 -; CHECK-NO-ISEL-NEXT: blr +; CHECK-NO-ISEL-NEXT: mr 4, 5 ; CHECK-NO-ISEL-NEXT: .LBB6_2: # %entry -; CHECK-NO-ISEL-NEXT: addi 3, 4, 0 +; CHECK-NO-ISEL-NEXT: mr 3, 4 ; CHECK-NO-ISEL-NEXT: blr ; ; CHECK-P10-LABEL: test7: @@ -330,12 +327,10 @@ define signext i32 @exttest7(i32 signext %a) #0 { ; CHECK-NO-ISEL-LABEL: exttest7: ; CHECK-NO-ISEL: # %bb.0: # %entry ; CHECK-NO-ISEL-NEXT: cmplwi 3, 5 +; CHECK-NO-ISEL-NEXT: li 3, 7 +; CHECK-NO-ISEL-NEXT: beqlr 0 +; CHECK-NO-ISEL-NEXT: # %bb.1: # %entry ; CHECK-NO-ISEL-NEXT: li 3, 8 -; CHECK-NO-ISEL-NEXT: li 4, 7 -; CHECK-NO-ISEL-NEXT: bc 12, 2, .LBB7_1 -; CHECK-NO-ISEL-NEXT: blr -; CHECK-NO-ISEL-NEXT: .LBB7_1: # %entry -; CHECK-NO-ISEL-NEXT: addi 3, 4, 0 ; CHECK-NO-ISEL-NEXT: blr ; ; CHECK-P10-LABEL: exttest7: @@ -366,15 +361,15 @@ define zeroext i32 @exttest8() #0 { ; CHECK-NO-ISEL-LABEL: exttest8: ; CHECK-NO-ISEL: # %bb.0: # %entry ; CHECK-NO-ISEL-NEXT: ld 3, 0(3) +; CHECK-NO-ISEL-NEXT: li 4, 0 ; CHECK-NO-ISEL-NEXT: subfic 3, 3, 80 ; CHECK-NO-ISEL-NEXT: rldicl 3, 3, 63, 1 ; CHECK-NO-ISEL-NEXT: cmplwi 3, 80 -; CHECK-NO-ISEL-NEXT: bc 12, 1, .LBB8_1 -; CHECK-NO-ISEL-NEXT: b .LBB8_2 -; CHECK-NO-ISEL-NEXT: .LBB8_1: # %entry -; CHECK-NO-ISEL-NEXT: li 3, 0 +; CHECK-NO-ISEL-NEXT: bgt 0, .LBB8_2 +; CHECK-NO-ISEL-NEXT: # %bb.1: # %entry +; CHECK-NO-ISEL-NEXT: mr 4, 3 ; CHECK-NO-ISEL-NEXT: .LBB8_2: # %entry -; CHECK-NO-ISEL-NEXT: clrldi 3, 3, 32 +; CHECK-NO-ISEL-NEXT: clrldi 3, 4, 32 ; CHECK-NO-ISEL-NEXT: blr ; ; CHECK-P10-LABEL: exttest8: diff --git a/llvm/test/CodeGen/PowerPC/expand-contiguous-isel.ll b/llvm/test/CodeGen/PowerPC/expand-contiguous-isel.ll index 15b7dc1a38fab..9e53c7e88b0e3 100644 --- a/llvm/test/CodeGen/PowerPC/expand-contiguous-isel.ll +++ b/llvm/test/CodeGen/PowerPC/expand-contiguous-isel.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 target datalayout = "e-m:e-i64:64-n32:64" target triple = "powerpc64le-unknown-linux-gnu" ; This file mainly tests that one of the ISEL instruction in the group uses the same register for operand RT, RA, RB @@ -13,8 +14,8 @@ target triple = "powerpc64le-unknown-linux-gnu" ; After that we have: ; updated: 1504B %vreg83 = ISEL8 %vreg83, %vreg83, %vreg33:sub_eq -; RUN: llc -verify-machineinstrs -O2 -ppc-asm-full-reg-names -mcpu=pwr7 -ppc-gen-isel=true < %s | FileCheck %s --check-prefix=CHECK-GEN-ISEL-TRUE -; RUN: llc -verify-machineinstrs -O2 -ppc-asm-full-reg-names -mcpu=pwr7 -ppc-gen-isel=false < %s | FileCheck %s --implicit-check-not isel +; RUN: llc -verify-machineinstrs -O2 -ppc-asm-full-reg-names -mcpu=pwr7 -mattr=+isel < %s | FileCheck %s --check-prefix=CHECK-GEN-ISEL-TRUE +; RUN: llc -verify-machineinstrs -O2 -ppc-asm-full-reg-names -mcpu=pwr7 -mattr=-isel < %s | FileCheck %s --implicit-check-not isel @.str = private unnamed_addr constant [3 x i8] c"]]\00", align 1 @.str.1 = private unnamed_addr constant [35 x i8] c"Index < Length && \22Invalid index!\22\00", align 1 @@ -23,6 +24,219 @@ target triple = "powerpc64le-unknown-linux-gnu" @.str.3 = private unnamed_addr constant [95 x i8] c"(data || length == 0) && \22StringRef cannot be built from a NULL argument with non-null length\22\00", align 1 @__PRETTY_FUNCTION__._ZN4llvm9StringRefC2EPKcm = private unnamed_addr constant [49 x i8] c"llvm::StringRef::StringRef(const char *, size_t)\00", align 1 define i64 @_Z3fn1N4llvm9StringRefE([2 x i64] %Str.coerce) { +; CHECK-GEN-ISEL-TRUE-LABEL: _Z3fn1N4llvm9StringRefE: +; CHECK-GEN-ISEL-TRUE: # %bb.0: # %entry +; CHECK-GEN-ISEL-TRUE-NEXT: mflr r0 +; CHECK-GEN-ISEL-TRUE-NEXT: stdu r1, -32(r1) +; CHECK-GEN-ISEL-TRUE-NEXT: std r0, 48(r1) +; CHECK-GEN-ISEL-TRUE-NEXT: .cfi_def_cfa_offset 32 +; CHECK-GEN-ISEL-TRUE-NEXT: .cfi_offset lr, 16 +; CHECK-GEN-ISEL-TRUE-NEXT: li r5, 2 +; CHECK-GEN-ISEL-TRUE-NEXT: # implicit-def: $x6 +; CHECK-GEN-ISEL-TRUE-NEXT: b .LBB0_3 +; CHECK-GEN-ISEL-TRUE-NEXT: .p2align 4 +; CHECK-GEN-ISEL-TRUE-NEXT: .LBB0_1: # %_ZNK4llvm9StringRefixEm.exit +; CHECK-GEN-ISEL-TRUE-NEXT: # +; CHECK-GEN-ISEL-TRUE-NEXT: cmplwi r7, 93 +; CHECK-GEN-ISEL-TRUE-NEXT: addi r7, r6, -1 +; CHECK-GEN-ISEL-TRUE-NEXT: iseleq r6, r7, r6 +; CHECK-GEN-ISEL-TRUE-NEXT: .LBB0_2: # %_ZNK4llvm9StringRef6substrEmm.exit +; CHECK-GEN-ISEL-TRUE-NEXT: # +; CHECK-GEN-ISEL-TRUE-NEXT: addi r4, r4, -1 +; CHECK-GEN-ISEL-TRUE-NEXT: addi r3, r3, 1 +; CHECK-GEN-ISEL-TRUE-NEXT: .LBB0_3: # %while.cond.outer +; CHECK-GEN-ISEL-TRUE-NEXT: # =>This Loop Header: Depth=1 +; CHECK-GEN-ISEL-TRUE-NEXT: # Child Loop BB0_5 Depth 2 +; CHECK-GEN-ISEL-TRUE-NEXT: # Child Loop BB0_8 Depth 2 +; CHECK-GEN-ISEL-TRUE-NEXT: cmpldi r6, 0 +; CHECK-GEN-ISEL-TRUE-NEXT: beq cr0, .LBB0_8 +; CHECK-GEN-ISEL-TRUE-NEXT: # %bb.4: # %while.cond.preheader +; CHECK-GEN-ISEL-TRUE-NEXT: # +; CHECK-GEN-ISEL-TRUE-NEXT: cmpldi r4, 0 +; CHECK-GEN-ISEL-TRUE-NEXT: beq- cr0, .LBB0_15 +; CHECK-GEN-ISEL-TRUE-NEXT: .p2align 5 +; CHECK-GEN-ISEL-TRUE-NEXT: .LBB0_5: # %_ZNK4llvm9StringRefixEm.exit +; CHECK-GEN-ISEL-TRUE-NEXT: # Parent Loop BB0_3 Depth=1 +; CHECK-GEN-ISEL-TRUE-NEXT: # => This Inner Loop Header: Depth=2 +; CHECK-GEN-ISEL-TRUE-NEXT: lbz r7, 0(r3) +; CHECK-GEN-ISEL-TRUE-NEXT: cmplwi r7, 92 +; CHECK-GEN-ISEL-TRUE-NEXT: bne cr0, .LBB0_1 +; CHECK-GEN-ISEL-TRUE-NEXT: # %bb.6: # %if.then4 +; CHECK-GEN-ISEL-TRUE-NEXT: # +; CHECK-GEN-ISEL-TRUE-NEXT: cmpldi r4, 2 +; CHECK-GEN-ISEL-TRUE-NEXT: isellt r7, r4, r5 +; CHECK-GEN-ISEL-TRUE-NEXT: add r3, r3, r7 +; CHECK-GEN-ISEL-TRUE-NEXT: sub. r4, r4, r7 +; CHECK-GEN-ISEL-TRUE-NEXT: bne+ cr0, .LBB0_5 +; CHECK-GEN-ISEL-TRUE-NEXT: b .LBB0_15 +; CHECK-GEN-ISEL-TRUE-NEXT: .p2align 5 +; CHECK-GEN-ISEL-TRUE-NEXT: .LBB0_7: # %if.then4.us +; CHECK-GEN-ISEL-TRUE-NEXT: # +; CHECK-GEN-ISEL-TRUE-NEXT: isellt r6, r4, r5 +; CHECK-GEN-ISEL-TRUE-NEXT: add r3, r3, r6 +; CHECK-GEN-ISEL-TRUE-NEXT: sub r4, r4, r6 +; CHECK-GEN-ISEL-TRUE-NEXT: .LBB0_8: # %while.cond.us +; CHECK-GEN-ISEL-TRUE-NEXT: # Parent Loop BB0_3 Depth=1 +; CHECK-GEN-ISEL-TRUE-NEXT: # => This Inner Loop Header: Depth=2 +; CHECK-GEN-ISEL-TRUE-NEXT: cmpldi r4, 2 +; CHECK-GEN-ISEL-TRUE-NEXT: bge cr0, .LBB0_10 +; CHECK-GEN-ISEL-TRUE-NEXT: # %bb.9: # %if.end.us +; CHECK-GEN-ISEL-TRUE-NEXT: # +; CHECK-GEN-ISEL-TRUE-NEXT: cmpldi cr1, r4, 0 +; CHECK-GEN-ISEL-TRUE-NEXT: bne+ cr1, .LBB0_11 +; CHECK-GEN-ISEL-TRUE-NEXT: b .LBB0_15 +; CHECK-GEN-ISEL-TRUE-NEXT: .p2align 5 +; CHECK-GEN-ISEL-TRUE-NEXT: .LBB0_10: # %if.end.i.i.us +; CHECK-GEN-ISEL-TRUE-NEXT: # +; CHECK-GEN-ISEL-TRUE-NEXT: lhz r6, 0(r3) +; CHECK-GEN-ISEL-TRUE-NEXT: cmplwi cr1, r6, 23901 +; CHECK-GEN-ISEL-TRUE-NEXT: beq cr1, .LBB0_14 +; CHECK-GEN-ISEL-TRUE-NEXT: .LBB0_11: # %_ZNK4llvm9StringRefixEm.exit.us +; CHECK-GEN-ISEL-TRUE-NEXT: # +; CHECK-GEN-ISEL-TRUE-NEXT: lbz r6, 0(r3) +; CHECK-GEN-ISEL-TRUE-NEXT: cmplwi cr1, r6, 92 +; CHECK-GEN-ISEL-TRUE-NEXT: beq cr1, .LBB0_7 +; CHECK-GEN-ISEL-TRUE-NEXT: # %bb.12: # %_ZNK4llvm9StringRefixEm.exit.us +; CHECK-GEN-ISEL-TRUE-NEXT: # +; CHECK-GEN-ISEL-TRUE-NEXT: cmplwi r6, 93 +; CHECK-GEN-ISEL-TRUE-NEXT: beq cr0, .LBB0_16 +; CHECK-GEN-ISEL-TRUE-NEXT: # %bb.13: # %_ZNK4llvm9StringRef6substrEmm.exit.loopexit +; CHECK-GEN-ISEL-TRUE-NEXT: # +; CHECK-GEN-ISEL-TRUE-NEXT: li r6, 0 +; CHECK-GEN-ISEL-TRUE-NEXT: b .LBB0_2 +; CHECK-GEN-ISEL-TRUE-NEXT: .LBB0_14: # %if.then +; CHECK-GEN-ISEL-TRUE-NEXT: addi r1, r1, 32 +; CHECK-GEN-ISEL-TRUE-NEXT: ld r0, 16(r1) +; CHECK-GEN-ISEL-TRUE-NEXT: mtlr r0 +; CHECK-GEN-ISEL-TRUE-NEXT: blr +; CHECK-GEN-ISEL-TRUE-NEXT: .LBB0_15: # %cond.false.i +; CHECK-GEN-ISEL-TRUE-NEXT: addis r3, r2, .L__ModuleStringPool@toc@ha +; CHECK-GEN-ISEL-TRUE-NEXT: li r5, 225 +; CHECK-GEN-ISEL-TRUE-NEXT: addi r4, r3, .L__ModuleStringPool@toc@l +; CHECK-GEN-ISEL-TRUE-NEXT: addi r3, r4, 53 +; CHECK-GEN-ISEL-TRUE-NEXT: addi r6, r4, 88 +; CHECK-GEN-ISEL-TRUE-NEXT: bl __assert_fail +; CHECK-GEN-ISEL-TRUE-NEXT: nop +; CHECK-GEN-ISEL-TRUE-NEXT: .LBB0_16: # %if.then9 +; CHECK-GEN-ISEL-TRUE-NEXT: li r3, 1 +; CHECK-GEN-ISEL-TRUE-NEXT: bl exit +; CHECK-GEN-ISEL-TRUE-NEXT: nop +; +; CHECK-LABEL: _Z3fn1N4llvm9StringRefE: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: stdu r1, -32(r1) +; CHECK-NEXT: std r0, 48(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: # implicit-def: $x5 +; CHECK-NEXT: b .LBB0_2 +; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: .LBB0_1: # %_ZNK4llvm9StringRef6substrEmm.exit +; CHECK-NEXT: # +; CHECK-NEXT: addi r4, r4, -1 +; CHECK-NEXT: addi r3, r3, 1 +; CHECK-NEXT: .LBB0_2: # %while.cond.outer +; CHECK-NEXT: # =>This Loop Header: Depth=1 +; CHECK-NEXT: # Child Loop BB0_5 Depth 2 +; CHECK-NEXT: # Child Loop BB0_9 Depth 2 +; CHECK-NEXT: cmpldi r5, 0 +; CHECK-NEXT: beq cr0, .LBB0_9 +; CHECK-NEXT: # %bb.3: # %while.cond.preheader +; CHECK-NEXT: # +; CHECK-NEXT: cmpldi r4, 0 +; CHECK-NEXT: bne+ cr0, .LBB0_5 +; CHECK-NEXT: b .LBB0_20 +; CHECK-NEXT: .p2align 5 +; CHECK-NEXT: .LBB0_4: # %if.then4 +; CHECK-NEXT: # +; CHECK-NEXT: add r3, r3, r6 +; CHECK-NEXT: sub. r4, r4, r6 +; CHECK-NEXT: beq- cr0, .LBB0_20 +; CHECK-NEXT: .LBB0_5: # %_ZNK4llvm9StringRefixEm.exit +; CHECK-NEXT: # Parent Loop BB0_2 Depth=1 +; CHECK-NEXT: # => This Inner Loop Header: Depth=2 +; CHECK-NEXT: lbz r6, 0(r3) +; CHECK-NEXT: cmplwi r6, 92 +; CHECK-NEXT: bne cr0, .LBB0_15 +; CHECK-NEXT: # %bb.6: # %if.then4 +; CHECK-NEXT: # +; CHECK-NEXT: cmpldi r4, 2 +; CHECK-NEXT: mr r6, r4 +; CHECK-NEXT: blt cr0, .LBB0_4 +; CHECK-NEXT: # %bb.7: # %if.then4 +; CHECK-NEXT: # +; CHECK-NEXT: li r6, 2 +; CHECK-NEXT: b .LBB0_4 +; CHECK-NEXT: .p2align 5 +; CHECK-NEXT: .LBB0_8: # %if.then4.us +; CHECK-NEXT: # +; CHECK-NEXT: add r3, r3, r5 +; CHECK-NEXT: sub r4, r4, r5 +; CHECK-NEXT: .LBB0_9: # %while.cond.us +; CHECK-NEXT: # Parent Loop BB0_2 Depth=1 +; CHECK-NEXT: # => This Inner Loop Header: Depth=2 +; CHECK-NEXT: cmpldi r4, 2 +; CHECK-NEXT: bge cr0, .LBB0_11 +; CHECK-NEXT: # %bb.10: # %if.end.us +; CHECK-NEXT: # +; CHECK-NEXT: cmpldi cr1, r4, 0 +; CHECK-NEXT: bne+ cr1, .LBB0_12 +; CHECK-NEXT: b .LBB0_20 +; CHECK-NEXT: .p2align 5 +; CHECK-NEXT: .LBB0_11: # %if.end.i.i.us +; CHECK-NEXT: # +; CHECK-NEXT: lhz r5, 0(r3) +; CHECK-NEXT: cmplwi cr1, r5, 23901 +; CHECK-NEXT: beq cr1, .LBB0_19 +; CHECK-NEXT: .LBB0_12: # %_ZNK4llvm9StringRefixEm.exit.us +; CHECK-NEXT: # +; CHECK-NEXT: lbz r5, 0(r3) +; CHECK-NEXT: cmplwi cr1, r5, 92 +; CHECK-NEXT: bne cr1, .LBB0_17 +; CHECK-NEXT: # %bb.13: # %if.then4.us +; CHECK-NEXT: # +; CHECK-NEXT: mr r5, r4 +; CHECK-NEXT: bc 12, lt, .LBB0_8 +; CHECK-NEXT: # %bb.14: # %if.then4.us +; CHECK-NEXT: # +; CHECK-NEXT: li r5, 2 +; CHECK-NEXT: b .LBB0_8 +; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: .LBB0_15: # %_ZNK4llvm9StringRefixEm.exit +; CHECK-NEXT: # +; CHECK-NEXT: cmplwi r6, 93 +; CHECK-NEXT: bne cr0, .LBB0_1 +; CHECK-NEXT: # %bb.16: # %if.end10 +; CHECK-NEXT: # +; CHECK-NEXT: addi r5, r5, -1 +; CHECK-NEXT: b .LBB0_1 +; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: .LBB0_17: # %_ZNK4llvm9StringRefixEm.exit.us +; CHECK-NEXT: # +; CHECK-NEXT: cmplwi r5, 93 +; CHECK-NEXT: beq cr0, .LBB0_21 +; CHECK-NEXT: # %bb.18: # %_ZNK4llvm9StringRef6substrEmm.exit.loopexit +; CHECK-NEXT: # +; CHECK-NEXT: li r5, 0 +; CHECK-NEXT: b .LBB0_1 +; CHECK-NEXT: .LBB0_19: # %if.then +; CHECK-NEXT: addi r1, r1, 32 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB0_20: # %cond.false.i +; CHECK-NEXT: addis r3, r2, .L__ModuleStringPool@toc@ha +; CHECK-NEXT: li r5, 225 +; CHECK-NEXT: addi r4, r3, .L__ModuleStringPool@toc@l +; CHECK-NEXT: addi r3, r4, 53 +; CHECK-NEXT: addi r6, r4, 88 +; CHECK-NEXT: bl __assert_fail +; CHECK-NEXT: nop +; CHECK-NEXT: .LBB0_21: # %if.then9 +; CHECK-NEXT: li r3, 1 +; CHECK-NEXT: bl exit +; CHECK-NEXT: nop entry: %Str.coerce.fca.0.extract = extractvalue [2 x i64] %Str.coerce, 0 %Str.coerce.fca.1.extract = extractvalue [2 x i64] %Str.coerce, 1 @@ -130,16 +344,7 @@ _ZNK4llvm9StringRef6substrEmm.exit: %8 = ptrtoint ptr %add.ptr.i to i64 br label %while.cond.outer -; CHECK-LABEL: @_Z3fn1N4llvm9StringRefE ; Unecessary ISEL (all the registers are the same) is always removed -; CHECK-GEN-ISEL-TRUE-NOT: iseleq [[SAME:r[0-9]+]], [[SAME]], [[SAME]] -; CHECK-GEN-ISEL-TRUE: iseleq [[SAME:r[0-9]+]], {{r[0-9]+}}, [[SAME]] -; CHECK: bc 12, eq, [[TRUE:.LBB[0-9]+]] -; CHECK-NEXT: b [[SUCCESSOR:.LBB[0-9]+]] -; CHECK-NEXT: [[TRUE]] -; CHECK-NEXT: # in Loop: Header -; CHECK-NEXT: addi {{r[0-9]+}}, {{r[0-9]+}}, 0 -; CHECK-NEXT: [[SUCCESSOR]] } diff --git a/llvm/test/CodeGen/PowerPC/expand-foldable-isel.ll b/llvm/test/CodeGen/PowerPC/expand-foldable-isel.ll index 8da7519fa6dc7..5425996032e38 100644 --- a/llvm/test/CodeGen/PowerPC/expand-foldable-isel.ll +++ b/llvm/test/CodeGen/PowerPC/expand-foldable-isel.ll @@ -15,8 +15,8 @@ target triple = "powerpc64le-unknown-linux-gnu" ; After that we have: ; updated: 416B %vreg18 = ISEL8 %vreg5, %vreg5, %vreg15; -; RUN: llc -verify-machineinstrs -O2 -ppc-asm-full-reg-names -mcpu=pwr7 -ppc-gen-isel=true < %s | FileCheck %s --check-prefix=CHECK-GEN-ISEL-TRUE -; RUN: llc -verify-machineinstrs -O2 -ppc-asm-full-reg-names -mcpu=pwr7 -ppc-gen-isel=false < %s | FileCheck %s --implicit-check-not isel +; RUN: llc -verify-machineinstrs -O2 -ppc-asm-full-reg-names -mcpu=pwr7 -mattr=+isel < %s | FileCheck %s --check-prefix=CHECK-GEN-ISEL-TRUE +; RUN: llc -verify-machineinstrs -O2 -ppc-asm-full-reg-names -mcpu=pwr7 -mattr=-isel < %s | FileCheck %s --implicit-check-not isel %"struct.pov::ot_block_struct" = type { ptr, [3 x double], [3 x double], float, float, float, float, float, float, float, float, float, [3 x float], float, float, [3 x double], i16 } %"struct.pov::ot_node_struct" = type { %"struct.pov::ot_id_struct", ptr, [8 x ptr] } %"struct.pov::ot_id_struct" = type { i32, i32, i32, i32 } diff --git a/llvm/test/CodeGen/PowerPC/expand-isel-1.mir b/llvm/test/CodeGen/PowerPC/expand-isel-1.mir deleted file mode 100644 index 35e5398070528..0000000000000 --- a/llvm/test/CodeGen/PowerPC/expand-isel-1.mir +++ /dev/null @@ -1,57 +0,0 @@ -# This file tests the scenario: ISEL R0, ZERO, R0, CR -# RUN: llc -ppc-gen-isel=false -run-pass ppc-expand-isel -o - %s | FileCheck %s - ---- | - target datalayout = "E-m:e-i64:64-n32:64" - target triple = "powerpc64-unknown-linux-gnu" - define signext i32 @testExpandISEL(i32 signext %i, i32 signext %j) { - entry: - %cmp = icmp sgt i32 %i, 0 - %add = add nsw i32 %i, 1 - %cond = select i1 %cmp, i32 %add, i32 %j - ret i32 %cond - } - -... ---- -name: testExpandISEL -alignment: 4 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -tracksRegLiveness: true -liveins: - - { reg: '$x0' } - - { reg: '$x3' } -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 0 - adjustsStack: false - hasCalls: false - maxCallFrameSize: 0 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false -body: | - bb.0.entry: - liveins: $x0, $x3 - - $r5 = ADDI $r3, 1 - $cr0 = CMPWI $r3, 0 - $r0 = ISEL $zero, $r0, $cr0gt - ; CHECK-LABEL: testExpandISEL - ; CHECK: BC $cr0gt, %[[TRUE:bb.[0-9]+]] - ; CHECK-NEXT: B %[[SUCCESSOR:bb.[0-9]+]] - ; CHECK: [[TRUE]] - ; CHECK: $r0 = ADDI $zero, 0 - - $x3 = EXTSW_32_64 $r0 - -... - diff --git a/llvm/test/CodeGen/PowerPC/expand-isel-10.mir b/llvm/test/CodeGen/PowerPC/expand-isel-10.mir deleted file mode 100644 index 6d51246336c22..0000000000000 --- a/llvm/test/CodeGen/PowerPC/expand-isel-10.mir +++ /dev/null @@ -1,54 +0,0 @@ -# This file tests the scenario: ISEL RX, RX, RX, CR (X != 0), -# which is redudant and removed. -# RUN: llc -ppc-gen-isel=true -run-pass ppc-expand-isel -o - %s | FileCheck %s - ---- | - target datalayout = "E-m:e-i64:64-n32:64" - target triple = "powerpc64-unknown-linux-gnu" - define signext i32 @testExpandISEL(i32 signext %i, i32 signext %j) { - entry: - %cmp = icmp sgt i32 %i, 0 - %add = add nsw i32 %i, 1 - %cond = select i1 %cmp, i32 %add, i32 %j - ret i32 %cond - } - -... ---- -name: testExpandISEL -alignment: 4 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -tracksRegLiveness: true -liveins: - - { reg: '$x3' } -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 0 - adjustsStack: false - hasCalls: false - maxCallFrameSize: 0 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false -body: | - bb.0.entry: - liveins: $x3 - - $r5 = ADDI $r3, 1 - $cr0 = CMPWI $r3, 0 - $r3 = ISEL $r3, $r3, $cr0gt - $x3 = EXTSW_32_64 $r3 - ; CHECK: $r5 = ADDI $r3, 1 - ; CHECK: $cr0 = CMPWI $r3, 0 - ; CHECK-NOT: $r3 = ISEL $r3, $r3, $cr0gt - ; CHECK: $x3 = EXTSW_32_64 $r3 - -... diff --git a/llvm/test/CodeGen/PowerPC/expand-isel-2.mir b/llvm/test/CodeGen/PowerPC/expand-isel-2.mir deleted file mode 100644 index a4265e07f81eb..0000000000000 --- a/llvm/test/CodeGen/PowerPC/expand-isel-2.mir +++ /dev/null @@ -1,57 +0,0 @@ -# This file tests the scenario: ISEL RX, ZERO, RY, CR (X != 0 && Y != 0) -# RUN: llc -ppc-gen-isel=false -run-pass ppc-expand-isel -o - %s | FileCheck %s - ---- | - target datalayout = "E-m:e-i64:64-n32:64" - target triple = "powerpc64-unknown-linux-gnu" - define signext i32 @testExpandISEL(i32 signext %i, i32 signext %j) { - entry: - %cmp = icmp sgt i32 %i, 0 - %add = add nsw i32 %i, 1 - %cond = select i1 %cmp, i32 %add, i32 %j - ret i32 %cond - } - -... ---- -name: testExpandISEL -alignment: 4 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -tracksRegLiveness: true -liveins: - - { reg: '$x0' } - - { reg: '$x3' } - - { reg: '$x4' } -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 0 - adjustsStack: false - hasCalls: false - maxCallFrameSize: 0 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false -body: | - bb.0.entry: - liveins: $x0, $x3, $x4 - - $r5 = ADDI $r3, 1 - $cr0 = CMPWI $r3, 0 - $r3 = ISEL $zero, $r4, $cr0gt - ; CHECK: BC $cr0gt, %[[TRUE:bb.[0-9]+]] - ; CHECK: %[[FALSE:bb.[0-9]+]] - ; CHECK: $r3 = ORI $r4, 0 - ; CHECK: B %[[SUCCESSOR:bb.[0-9]+]] - ; CHECK: [[TRUE]] - ; CHECK: $r3 = ADDI $zero, 0 - - $x3 = EXTSW_32_64 $r3 -... diff --git a/llvm/test/CodeGen/PowerPC/expand-isel-3.mir b/llvm/test/CodeGen/PowerPC/expand-isel-3.mir deleted file mode 100644 index 28273602f91e6..0000000000000 --- a/llvm/test/CodeGen/PowerPC/expand-isel-3.mir +++ /dev/null @@ -1,58 +0,0 @@ -# This file tests the scenario: ISEL RX, RY, R0, CR (X != 0 && Y != 0) -# RUN: llc -ppc-gen-isel=false -run-pass ppc-expand-isel -o - %s | FileCheck %s - ---- | - target datalayout = "E-m:e-i64:64-n32:64" - target triple = "powerpc64-unknown-linux-gnu" - define signext i32 @testExpandISEL(i32 signext %i, i32 signext %j) { - entry: - %cmp = icmp sgt i32 %i, 0 - %add = add nsw i32 %i, 1 - %cond = select i1 %cmp, i32 %add, i32 %j - ret i32 %cond - } - -... ---- -name: testExpandISEL -alignment: 4 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -tracksRegLiveness: true -liveins: - - { reg: '$x0' } - - { reg: '$x3' } - - { reg: '$x4' } -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 0 - adjustsStack: false - hasCalls: false - maxCallFrameSize: 0 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false -body: | - bb.0.entry: - liveins: $x0, $x3, $x4 - - $r5 = ADDI $r3, 1 - $cr0 = CMPWI $r3, 0 - $r3 = ISEL $r4, $r0, $cr0gt - ; CHECK: BC $cr0gt, %[[TRUE:bb.[0-9]+]] - ; CHECK: %[[FALSE:bb.[0-9]+]] - ; CHECK: $r3 = ORI $r0, 0 - ; CHECK: B %[[SUCCESSOR:bb.[0-9]+]] - ; CHECK: [[TRUE]] - ; CHECK: $r3 = ADDI $r4, 0 - - $x3 = EXTSW_32_64 $r3 - -... diff --git a/llvm/test/CodeGen/PowerPC/expand-isel-4.mir b/llvm/test/CodeGen/PowerPC/expand-isel-4.mir deleted file mode 100644 index d4484f6d527c0..0000000000000 --- a/llvm/test/CodeGen/PowerPC/expand-isel-4.mir +++ /dev/null @@ -1,59 +0,0 @@ -# This file tests the scenario: ISEL R0, ZERO, RX, CR (X != 0) -# It also tests redundant liveins ($x7) and killed registers. -# RUN: llc -ppc-gen-isel=false -run-pass ppc-expand-isel -o - %s | FileCheck %s - ---- | - target datalayout = "E-m:e-i64:64-n32:64" - target triple = "powerpc64-unknown-linux-gnu" - define signext i32 @testExpandISEL(i32 signext %i, i32 signext %j) { - entry: - %cmp = icmp sgt i32 %i, 0 - %add = add nsw i32 %i, 1 - %cond = select i1 %cmp, i32 %add, i32 %j - ret i32 %cond - } - -... ---- -name: testExpandISEL -alignment: 4 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -tracksRegLiveness: true -liveins: - - { reg: '$x0' } - - { reg: '$x3' } - - { reg: '$x7' } -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 0 - adjustsStack: false - hasCalls: false - maxCallFrameSize: 0 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false -body: | - bb.0.entry: - liveins: $x0, $x3, $x7 - - $r5 = ADDI $r3, 1 - $cr0 = CMPWI $r3, 0 - $r0 = ISEL killed $zero, killed $r5, killed $cr0gt, implicit killed $cr0 - ; CHECK: BC killed $cr0gt, %[[TRUE:bb.[0-9]+]] - ; CHECK: %[[FALSE:bb.[0-9]+]] - ; CHECK: $r0 = ORI killed $r5, 0 - ; CHECK: B %[[SUCCESSOR:bb.[0-9]+]] - ; CHECK: [[TRUE]] - ; CHECK: $r0 = ADDI killed $zero, 0 - - $x0 = EXTSW_32_64 killed $r0 - -... diff --git a/llvm/test/CodeGen/PowerPC/expand-isel-5.mir b/llvm/test/CodeGen/PowerPC/expand-isel-5.mir deleted file mode 100644 index 4142ef0fe89e4..0000000000000 --- a/llvm/test/CodeGen/PowerPC/expand-isel-5.mir +++ /dev/null @@ -1,54 +0,0 @@ -# This file tests the scenario: ISEL R0, RX, R0, CR (X != 0) -# RUN: llc -ppc-gen-isel=false -run-pass ppc-expand-isel -o - %s | FileCheck %s - ---- | - target datalayout = "E-m:e-i64:64-n32:64" - target triple = "powerpc64-unknown-linux-gnu" - define signext i32 @testExpandISEL(i32 signext %i, i32 signext %j) { - entry: - %cmp = icmp sgt i32 %i, 0 - %add = add nsw i32 %i, 1 - %cond = select i1 %cmp, i32 %add, i32 %j - ret i32 %cond - } - -... ---- -name: testExpandISEL -alignment: 4 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -tracksRegLiveness: true -liveins: - - { reg: '$x0' } - - { reg: '$x3' } -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 0 - adjustsStack: false - hasCalls: false - maxCallFrameSize: 0 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false -body: | - bb.0.entry: - liveins: $x0, $x3 - - $r5 = ADDI $r3, 1 - $cr0 = CMPWI $r3, 0 - $r0 = ISEL $r5, $r0, $cr0gt - ; CHECK: BC $cr0gt, %[[TRUE:bb.[0-9]+]] - ; CHECK: B %[[SUCCESSOR:bb.[0-9]+]] - ; CHECK: [[TRUE]] - ; CHECK: $r0 = ADDI $r5, 0 - $x3 = EXTSW_32_64 $r0 - -... diff --git a/llvm/test/CodeGen/PowerPC/expand-isel-6.mir b/llvm/test/CodeGen/PowerPC/expand-isel-6.mir deleted file mode 100644 index 9ab511e695931..0000000000000 --- a/llvm/test/CodeGen/PowerPC/expand-isel-6.mir +++ /dev/null @@ -1,57 +0,0 @@ -# This file tests the scenario when ISEL is the last instruction of the last -# Basic Block, i.e., the BB cannot fall through to its successor situation. -# RUN: llc -ppc-gen-isel=false -run-pass ppc-expand-isel -o - %s | FileCheck %s - ---- | - target datalayout = "E-m:e-i64:64-n32:64" - target triple = "powerpc64-unknown-linux-gnu" - define signext i32 @testExpandISEL(i32 signext %i, i32 signext %j) { - entry: - %cmp = icmp sgt i32 %i, 0 - %add = add nsw i32 %i, 1 - %cond = select i1 %cmp, i32 %add, i32 %j - ret i32 %cond - } - -... ---- -name: testExpandISEL -alignment: 4 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -tracksRegLiveness: true -liveins: - - { reg: '$x0' } - - { reg: '$x3' } -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 0 - adjustsStack: false - hasCalls: false - maxCallFrameSize: 0 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false -body: | - bb.0.entry: - liveins: $x0, $x3 - - $r5 = ADDI $r3, 1 - $cr0 = CMPWI $r3, 0 - $r3 = ISEL $zero, $r0, $cr0gt - ; CHECK: BC $cr0gt, %[[TRUE:bb.[0-9]+]] - ; CHECK: %[[FALSE:bb.[0-9]+]] - ; CHECK: $r3 = ORI $r0, 0 - ; CHECK: B %[[SUCCESSOR:bb.[0-9]+]] - ; CHECK: [[TRUE]] - ; CHECK: $r3 = ADDI $zero, 0 - - -... diff --git a/llvm/test/CodeGen/PowerPC/expand-isel-7.mir b/llvm/test/CodeGen/PowerPC/expand-isel-7.mir deleted file mode 100644 index 64c2624700005..0000000000000 --- a/llvm/test/CodeGen/PowerPC/expand-isel-7.mir +++ /dev/null @@ -1,58 +0,0 @@ -# This file tests the scenario: ISEL RX, RY, RZ, CR (X != 0 && Y != 0, Z != 0) -# RUN: llc -ppc-gen-isel=false -run-pass ppc-expand-isel -o - %s | FileCheck %s - ---- | - target datalayout = "E-m:e-i64:64-n32:64" - target triple = "powerpc64-unknown-linux-gnu" - define signext i32 @testExpandISEL(i32 signext %i, i32 signext %j) { - entry: - %cmp = icmp sgt i32 %i, 0 - %add = add nsw i32 %i, 1 - %cond = select i1 %cmp, i32 %add, i32 %j - ret i32 %cond - } - -... ---- -name: testExpandISEL -alignment: 4 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -tracksRegLiveness: true -liveins: - - { reg: '$x3' } - - { reg: '$x4' } - - { reg: '$x5' } -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 0 - adjustsStack: false - hasCalls: false - maxCallFrameSize: 0 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false -body: | - bb.0.entry: - liveins: $x3, $x4, $x5 - - $r4 = ADDI $r3, 1 - $cr0 = CMPWI $r3, 0 - $r5 = ISEL $r3, $r4, $cr0gt - ; CHECK: BC $cr0gt, %[[TRUE:bb.[0-9]+]] - ; CHECK: %[[FALSE:bb.[0-9]+]] - ; CHECK: $r5 = ORI $r4, 0 - ; CHECK: B %[[SUCCESSOR:bb.[0-9]+]] - ; CHECK: [[TRUE]] - ; CHECK: $r5 = ADDI $r3, 0 - - $x5 = EXTSW_32_64 $r5 - -... diff --git a/llvm/test/CodeGen/PowerPC/expand-isel-8.mir b/llvm/test/CodeGen/PowerPC/expand-isel-8.mir deleted file mode 100644 index 1799676afee71..0000000000000 --- a/llvm/test/CodeGen/PowerPC/expand-isel-8.mir +++ /dev/null @@ -1,65 +0,0 @@ -# This file tests combining three consecutive ISELs scenario. -# RUN: llc -ppc-gen-isel=false -run-pass ppc-expand-isel -o - %s | FileCheck %s - ---- | - target datalayout = "E-m:e-i64:64-n32:64" - target triple = "powerpc64-unknown-linux-gnu" - define signext i32 @testExpandISEL(i32 signext %i, i32 signext %j) { - entry: - %cmp = icmp sgt i32 %i, 0 - %add = add nsw i32 %i, 1 - %cond = select i1 %cmp, i32 %add, i32 %j - ret i32 %cond - } - -... ---- -name: testExpandISEL -alignment: 4 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -tracksRegLiveness: true -liveins: - - { reg: '$x3' } - - { reg: '$x4' } - - { reg: '$x5' } -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 0 - adjustsStack: false - hasCalls: false - maxCallFrameSize: 0 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false -body: | - bb.0.entry: - liveins: $x3, $x4, $x5 - - $r4 = ADDI $r3, 1 - $cr0 = CMPWI $r3, 0 - $r5 = ISEL $r3, $r4, $cr0gt - $r3 = ISEL $r4, $r5, $cr0gt - $r4 = ISEL $r3, $r5, $cr0gt - ; CHECK: BC $cr0gt, %[[TRUE:bb.[0-9]+]] - ; CHECK: %[[FALSE:bb.[0-9]+]] - ; CHECK: $r5 = ORI $r4, 0 - ; CHECK: $r3 = ORI $r5, 0 - ; CHECK: $r4 = ORI $r5, 0 - ; CHECK: B %[[SUCCESSOR:bb.[0-9]+]] - ; CHECK: [[TRUE]] - ; CHECK: $r5 = ADDI $r3, 0 - ; CHECK: $r3 = ADDI $r4, 0 - ; CHECK: $r4 = ADDI $r3, 0 - - $x5 = EXTSW_32_64 $r5 - $x3 = EXTSW_32_64 $r3 - -... diff --git a/llvm/test/CodeGen/PowerPC/expand-isel-9.mir b/llvm/test/CodeGen/PowerPC/expand-isel-9.mir deleted file mode 100644 index 2f0cdca8496b0..0000000000000 --- a/llvm/test/CodeGen/PowerPC/expand-isel-9.mir +++ /dev/null @@ -1,54 +0,0 @@ -# This file tests the scenario: ISEL RX, RY, RY, CR (X != 0 && Y != 0) -# It is folded into a copy (%RX = OR %RY, %RY) -# RUN: llc -ppc-gen-isel=true -run-pass ppc-expand-isel -o - %s | FileCheck %s - ---- | - target datalayout = "E-m:e-i64:64-n32:64" - target triple = "powerpc64-unknown-linux-gnu" - define signext i32 @testExpandISEL(i32 signext %i, i32 signext %j) { - entry: - %cmp = icmp sgt i32 %i, 0 - %add = add nsw i32 %i, 1 - %cond = select i1 %cmp, i32 %add, i32 %j - ret i32 %cond - } - -... ---- -name: testExpandISEL -alignment: 4 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -tracksRegLiveness: true -liveins: - - { reg: '$x3' } - - { reg: '$x4' } -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 0 - adjustsStack: false - hasCalls: false - maxCallFrameSize: 0 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false -body: | - bb.0.entry: - liveins: $x3, $x4 - - $r5 = ADDI $r3, 1 - $cr0 = CMPWI $r3, 0 - $r3 = ISEL $r4, $r4, $cr0gt - ; Test fold ISEL to a copy - ; CHECK: $r3 = OR $r4, $r4 - - $x3 = EXTSW_32_64 $r3 - -... diff --git a/llvm/test/CodeGen/PowerPC/expand-isel-liveness.mir b/llvm/test/CodeGen/PowerPC/expand-isel-liveness.mir deleted file mode 100644 index 262e71d48fc09..0000000000000 --- a/llvm/test/CodeGen/PowerPC/expand-isel-liveness.mir +++ /dev/null @@ -1,80 +0,0 @@ -# RUN: llc -mtriple powerpc64-unknown-linux-gnu -run-pass=ppc-expand-isel -o \ -# RUN: - %s -verify-machineinstrs | FileCheck %s - ---- -name: expand_isel_liveness1 -tracksRegLiveness: true -registers: [] -liveins: - - { reg: '$x3', virtual-reg: '' } - - { reg: '$x4', virtual-reg: '' } - - { reg: '$x5', virtual-reg: '' } - - { reg: '$x6', virtual-reg: '' } -body: | - bb.0: - liveins: $x3, $x4, $x5, $x6 - - renamable $x8 = MULLD renamable $x5, renamable $x4 - renamable $cr5 = CMPDI renamable $x3, 0 - dead renamable $x9 = MULHDU_rec renamable $x3, renamable $x6, implicit-def $cr0 - renamable $x3 = MULLD killed renamable $x3, renamable $x6 - $cr1 = MCRF killed $cr0 - renamable $x3 = ADD8 killed renamable $x3, killed renamable $x8 - renamable $cr0 = CMPDI renamable $x5, 0 - renamable $cr5lt = CRNOR killed renamable $cr0eq, killed renamable $cr5eq, implicit $cr5, implicit $cr0 - renamable $cr0 = CMPLDI renamable $x3, 0 - renamable $x8 = MULHDU renamable $x4, renamable $x6 - renamable $x3 = ADD8 renamable $x8, killed renamable $x3 - renamable $cr6 = CMPLD renamable $x3, killed renamable $x8 - renamable $cr5gt = CRANDC killed renamable $cr6lt, killed renamable $cr0eq, implicit $cr0, implicit $cr6 - renamable $cr5lt = CRORC killed renamable $cr5lt, killed renamable $cr1eq, implicit $cr1 - renamable $x7 = LI8 1 - dead renamable $x5 = MULHDU_rec killed renamable $x5, renamable $x4, implicit-def $cr0 - renamable $cr5lt = CRORC killed renamable $cr5lt, killed renamable $cr0eq, implicit $cr0 - renamable $cr5lt = CRNOR killed renamable $cr5lt, killed renamable $cr5gt - renamable $x4 = MULLD killed renamable $x4, killed renamable $x6 - renamable $x5 = ISEL8 $zero8, killed renamable $x7, killed renamable $cr5lt - BLR8 implicit $lr8, implicit $rm, implicit killed $x3, implicit killed $x4, implicit killed $x5 - - ; CHECK-LABEL: name: expand_isel_liveness1 - ; CHECK: bb.1: - ; CHECK: liveins: $x3, $x4, $x7 - ; CHECK: renamable $x5 = ORI8 killed renamable $x7, 0 - ; CHECK: B %bb.3 - ; CHECK: bb.2: - ; CHECK: liveins: $x3, $x4 - ; CHECK: renamable $x5 = ADDI8 $zero8, 0 - ; CHECK: bb.3: - ; CHECK: liveins: $x3, $x4, $x5 - ; CHECK: BLR8 implicit $lr8, implicit $rm, implicit killed $x3, implicit killed $x4, implicit killed $x5 -... - ---- -name: expand_isel_liveness2 -tracksRegLiveness: true -liveins: - - { reg: '$r0' } - - { reg: '$r3' } -body: | - bb.0.entry: - liveins: $r0, $r3 - - $r5 = ADDI $r3, 1 - $cr0 = CMPWI $r3, 0 - $r3 = ISEL $zero, killed $r0, killed $cr0gt - - ; CHECK-LABEL: name: expand_isel_liveness2 - ; CHECK: bb.0.entry: - ; CHECK: liveins: $r0, $r3 - ; CHECK: $r5 = ADDI $r3, 1 - ; CHECK: $cr0 = CMPWI $r3, 0 - ; CHECK: BC killed $cr0gt, %bb.2 - ; CHECK: bb.1.entry: - ; CHECK: liveins: $r0 - ; CHECK: $r3 = ORI killed $r0, 0 - ; CHECK: B %bb.3 - ; CHECK: bb.2.entry: - ; CHECK-NOT: liveins: $zero - ; CHECK: $r3 = ADDI $zero, 0 -... - diff --git a/llvm/test/CodeGen/PowerPC/expand-isel.ll b/llvm/test/CodeGen/PowerPC/expand-isel.ll index cf403d6db14da..16e18b595da14 100644 --- a/llvm/test/CodeGen/PowerPC/expand-isel.ll +++ b/llvm/test/CodeGen/PowerPC/expand-isel.ll @@ -1,150 +1,161 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64" target triple = "powerpc64-unknown-linux-gnu" -; RUN: llc -ppc-gpr-icmps=all -verify-machineinstrs -O2 -ppc-asm-full-reg-names -mcpu=pwr7 -ppc-gen-isel=false < %s | FileCheck %s --implicit-check-not isel +; RUN: llc -ppc-gpr-icmps=all -verify-machineinstrs -O2 -ppc-asm-full-reg-names -mcpu=pwr7 -mattr=-isel < %s | FileCheck %s --implicit-check-not isel define signext i32 @testExpandISELToIfElse(i32 signext %i, i32 signext %j) { +; CHECK-LABEL: testExpandISELToIfElse: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpwi r3, 0 +; CHECK-NEXT: ble cr0, .LBB0_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: addi r4, r3, 1 +; CHECK-NEXT: .LBB0_2: # %entry +; CHECK-NEXT: extsw r3, r4 +; CHECK-NEXT: blr entry: %cmp = icmp sgt i32 %i, 0 %add = add nsw i32 %i, 1 %cond = select i1 %cmp, i32 %add, i32 %j ret i32 %cond -; CHECK-LABEL: @testExpandISELToIfElse -; CHECK: addi r5, r3, 1 -; CHECK-NEXT: cmpwi r3, 0 -; CHECK-NEXT: bc 12, gt, [[TRUE:.LBB[0-9]+]] -; CHECK: ori r3, r4, 0 -; CHECK-NEXT: b [[SUCCESSOR:.LBB[0-9]+]] -; CHECK-NEXT: [[TRUE]] -; CHECK-NEXT: addi r3, r5, 0 -; CHECK-NEXT: [[SUCCESSOR]] -; CHECK-NEXT: extsw r3, r3 -; CHECK-NEXT: blr } - define signext i32 @testExpandISELToIf(i32 signext %i, i32 signext %j) { +; CHECK-LABEL: testExpandISELToIf: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpwi r3, 0 +; CHECK-NEXT: bgt cr0, .LBB1_2 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: mr r4, r3 +; CHECK-NEXT: .LBB1_2: # %entry +; CHECK-NEXT: mr r3, r4 +; CHECK-NEXT: blr entry: %cmp = icmp sgt i32 %i, 0 %cond = select i1 %cmp, i32 %j, i32 %i ret i32 %cond -; CHECK-LABEL: @testExpandISELToIf -; CHECK: cmpwi r3, 0 -; CHECK-NEXT: bc 12, gt, [[TRUE:.LBB[0-9]+]] -; CHECK-NEXT: blr -; CHECK-NEXT: [[TRUE]] -; CHECK-NEXT: addi r3, r4, 0 -; CHECK-NEXT: blr } define signext i32 @testExpandISELToElse(i32 signext %i, i32 signext %j) { +; CHECK-LABEL: testExpandISELToElse: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpwi r3, 0 +; CHECK-NEXT: bgtlr cr0 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: mr r3, r4 +; CHECK-NEXT: blr entry: %cmp = icmp sgt i32 %i, 0 %cond = select i1 %cmp, i32 %i, i32 %j ret i32 %cond -; CHECK-LABEL: @testExpandISELToElse -; CHECK: cmpwi r3, 0 -; CHECK-NEXT: bclr 12, gt, 0 -; CHECK: ori r3, r4, 0 -; CHECK-NEXT: blr } - define signext i32 @testExpandISELToNull(i32 signext %i, i32 signext %j) { +; CHECK-LABEL: testExpandISELToNull: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: blr entry: %cmp = icmp sgt i32 %i, 0 %cond = select i1 %cmp, i32 %i, i32 %i ret i32 %cond -; CHECK-LABEL: @testExpandISELToNull -; CHECK-NOT: b {{.LBB[0-9]+}} -; CHECK-NOT: bc -; CHECK: blr } -define signext i32 @testExpandISELsTo2ORIs2ADDIs - (i32 signext %a, i32 signext %b, i32 signext %d, - i32 signext %f, i32 signext %g) { +define signext i32 @testExpandISELsTo2ORIs2ADDIs(i32 signext %a, i32 signext %b, i32 signext %d, i32 signext %f, i32 signext %g) { +; CHECK-LABEL: testExpandISELsTo2ORIs2ADDIs: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpwi r7, 0 +; CHECK-NEXT: bgt cr0, .LBB4_2 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: mr r7, r4 +; CHECK-NEXT: .LBB4_2: # %entry +; CHECK-NEXT: bgt cr0, .LBB4_4 +; CHECK-NEXT: # %bb.3: # %entry +; CHECK-NEXT: mr r5, r6 +; CHECK-NEXT: .LBB4_4: # %entry +; CHECK-NEXT: add r3, r7, r5 +; CHECK-NEXT: extsw r3, r3 +; CHECK-NEXT: blr entry: - %cmp = icmp sgt i32 %g, 0 %a.b = select i1 %cmp, i32 %g, i32 %b %d.f = select i1 %cmp, i32 %d, i32 %f %add = add nsw i32 %a.b, %d.f ret i32 %add - -; CHECK-LABEL: @testExpandISELsTo2ORIs2ADDIs -; CHECK: cmpwi r7, 0 -; CHECK-NEXT: bc 12, gt, [[TRUE:.LBB[0-9]+]] -; CHECK: ori r3, r4, 0 -; CHECK-NEXT: ori r4, r6, 0 -; CHECK-NEXT: b [[SUCCESSOR:.LBB[0-9]+]] -; CHECK-NEXT: [[TRUE]] -; CHECK-NEXT: addi r3, r7, 0 -; CHECK-NEXT: addi r4, r5, 0 -; CHECK-NEXT: [[SUCCESSOR]] -; CHECK-NEXT: add r3, r3, r4 -; CHECK-NEXT: extsw r3, r3 -; CHECK-NEXT: blr } -define signext i32 @testExpandISELsTo2ORIs1ADDI - (i32 signext %a, i32 signext %b, i32 signext %d, - i32 signext %f, i32 signext %g) { +define signext i32 @testExpandISELsTo2ORIs1ADDI(i32 signext %a, i32 signext %b, i32 signext %d, i32 signext %f, i32 signext %g) { +; CHECK-LABEL: testExpandISELsTo2ORIs1ADDI: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpwi r7, 0 +; CHECK-NEXT: bgt cr0, .LBB5_2 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: mr r3, r4 +; CHECK-NEXT: .LBB5_2: # %entry +; CHECK-NEXT: bgt cr0, .LBB5_4 +; CHECK-NEXT: # %bb.3: # %entry +; CHECK-NEXT: mr r5, r6 +; CHECK-NEXT: .LBB5_4: # %entry +; CHECK-NEXT: add r3, r3, r5 +; CHECK-NEXT: extsw r3, r3 +; CHECK-NEXT: blr entry: %cmp = icmp sgt i32 %g, 0 %a.b = select i1 %cmp, i32 %a, i32 %b %d.f = select i1 %cmp, i32 %d, i32 %f %add = add nsw i32 %a.b, %d.f ret i32 %add - -; CHECK-LABEL: @testExpandISELsTo2ORIs1ADDI -; CHECK: cmpwi r7, 0 -; CHECK-NEXT: bc 12, gt, [[TRUE:.LBB[0-9]+]] -; CHECK: ori r3, r4, 0 -; CHECK-NEXT: ori r4, r6, 0 -; CHECK-NEXT: b [[SUCCESSOR:.LBB[0-9]+]] -; CHECK-NEXT: [[TRUE]] -; CHECK-NEXT: addi r4, r5, 0 -; CHECK-NEXT: [[SUCCESSOR]] -; CHECK-NEXT: add r3, r3, r4 -; CHECK-NEXT: extsw r3, r3 -; CHECK-NEXT: blr } -define signext i32 @testExpandISELsTo1ORI1ADDI - (i32 signext %a, i32 signext %b, i32 signext %d, - i32 signext %f, i32 signext %g) { +define signext i32 @testExpandISELsTo1ORI1ADDI(i32 signext %a, i32 signext %b, i32 signext %d, i32 signext %f, i32 signext %g) { +; CHECK-LABEL: testExpandISELsTo1ORI1ADDI: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpwi r7, 0 +; CHECK-NEXT: mr r7, r3 +; CHECK-NEXT: bgt cr0, .LBB6_2 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: mr r7, r4 +; CHECK-NEXT: .LBB6_2: # %entry +; CHECK-NEXT: bgt cr0, .LBB6_4 +; CHECK-NEXT: # %bb.3: # %entry +; CHECK-NEXT: mr r5, r6 +; CHECK-NEXT: .LBB6_4: # %entry +; CHECK-NEXT: add r4, r7, r5 +; CHECK-NEXT: add r3, r3, r4 +; CHECK-NEXT: extsw r3, r3 +; CHECK-NEXT: blr entry: - %cmp = icmp sgt i32 %g, 0 %a.b = select i1 %cmp, i32 %a, i32 %b %d.f = select i1 %cmp, i32 %d, i32 %f %add1 = add nsw i32 %a.b, %d.f %add2 = add nsw i32 %a, %add1 ret i32 %add2 - -; CHECK-LABEL: @testExpandISELsTo1ORI1ADDI -; CHECK: cmpwi r7, 0 -; CHECK-NEXT: bc 12, gt, [[TRUE:.LBB[0-9]+]] -; CHECK: ori r5, r6, 0 -; CHECK-NEXT: b [[SUCCESSOR:.LBB[0-9]+]] -; CHECK-NEXT: [[TRUE]] -; CHECK-NEXT: addi r4, r3, 0 -; CHECK-NEXT: [[SUCCESSOR]] -; CHECK-NEXT: add r4, r4, r5 -; CHECK-NEXT: add r3, r3, r4 -; CHECK-NEXT: extsw r3, r3 -; CHECK-NEXT: blr } -define signext i32 @testExpandISELsTo0ORI2ADDIs - (i32 signext %a, i32 signext %b, i32 signext %d, - i32 signext %f, i32 signext %g) { +define signext i32 @testExpandISELsTo0ORI2ADDIs(i32 signext %a, i32 signext %b, i32 signext %d, i32 signext %f, i32 signext %g) { +; CHECK-LABEL: testExpandISELsTo0ORI2ADDIs: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpwi r7, 0 +; CHECK-NEXT: mr r7, r3 +; CHECK-NEXT: bgt cr0, .LBB7_2 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: mr r7, r4 +; CHECK-NEXT: .LBB7_2: # %entry +; CHECK-NEXT: mr r4, r5 +; CHECK-NEXT: bgt cr0, .LBB7_4 +; CHECK-NEXT: # %bb.3: # %entry +; CHECK-NEXT: mr r4, r6 +; CHECK-NEXT: .LBB7_4: # %entry +; CHECK-NEXT: add r4, r7, r4 +; CHECK-NEXT: add r3, r3, r4 +; CHECK-NEXT: sub r3, r3, r5 +; CHECK-NEXT: extsw r3, r3 +; CHECK-NEXT: blr entry: - %cmp = icmp sgt i32 %g, 0 %a.b = select i1 %cmp, i32 %a, i32 %b %d.f = select i1 %cmp, i32 %d, i32 %f @@ -152,27 +163,30 @@ entry: %add2 = add nsw i32 %a, %add1 %sub1 = sub nsw i32 %add2, %d ret i32 %sub1 - -; CHECK-LABEL: @testExpandISELsTo0ORI2ADDIs -; CHECK: cmpwi r7, 0 -; CHECK-NEXT: bc 12, gt, [[TRUE:.LBB[0-9]+]] -; CHECK-NEXT: b [[SUCCESSOR:.LBB[0-9]+]] -; CHECK-NEXT: [[TRUE]] -; CHECK-NEXT: addi r4, r3, 0 -; CHECK-NEXT: addi r6, r5, 0 -; CHECK-NEXT: [[SUCCESSOR]] -; CHECK-NEXT: add r4, r4, r6 -; CHECK-NEXT: add r3, r3, r4 -; CHECK-NEXT: sub r3, r3, r5 -; CHECK-NEXT: extsw r3, r3 -; CHECK-NEXT: blr } - @b = local_unnamed_addr global i32 0, align 4 @a = local_unnamed_addr global i32 0, align 4 ; Function Attrs: norecurse nounwind readonly define signext i32 @testComplexISEL() #0 { +; CHECK-LABEL: testComplexISEL: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addis r3, r2, .LC0@toc@ha +; CHECK-NEXT: ld r3, .LC0@toc@l(r3) +; CHECK-NEXT: lwz r4, 0(r3) +; CHECK-NEXT: li r3, 1 +; CHECK-NEXT: cmplwi r4, 0 +; CHECK-NEXT: bnelr cr0 +; CHECK-NEXT: # %bb.1: # %if.end +; CHECK-NEXT: addis r3, r2, .LC1@toc@ha +; CHECK-NEXT: addis r4, r2, .LC2@toc@ha +; CHECK-NEXT: ld r3, .LC1@toc@l(r3) +; CHECK-NEXT: ld r4, .LC2@toc@l(r4) +; CHECK-NEXT: lwa r3, 0(r3) +; CHECK-NEXT: xor r3, r3, r4 +; CHECK-NEXT: cntlzd r3, r3 +; CHECK-NEXT: rldicl r3, r3, 58, 63 +; CHECK-NEXT: blr entry: %0 = load i32, ptr @b, align 4, !tbaa !1 %tobool = icmp eq i32 %0, 0 @@ -190,13 +204,6 @@ cleanup: %retval.0 = phi i32 [ %conv3, %if.end ], [ 1, %entry ] ret i32 %retval.0 -; CHECK-LABEL: @testComplexISEL -; CHECK: li r3, 1 -; CHECK: cmplwi r4, 0 -; CHECK: bnelr cr0 -; CHECK: xor [[XOR:r[0-9]+]] -; CHECK: cntlzd [[CZ:r[0-9]+]], [[XOR]] -; CHECK: rldicl [[SH:r[0-9]+]], [[CZ]], 58, 63 } !1 = !{!2, !2, i64 0} diff --git a/llvm/test/CodeGen/PowerPC/fold-zero.ll b/llvm/test/CodeGen/PowerPC/fold-zero.ll index 6262d24040a3e..a071464ac6410 100644 --- a/llvm/test/CodeGen/PowerPC/fold-zero.ll +++ b/llvm/test/CodeGen/PowerPC/fold-zero.ll @@ -1,40 +1,62 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=-crbits | FileCheck %s ; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck --check-prefix=CHECK-CRB %s -; RUN: llc -verify-machineinstrs -ppc-gen-isel=false < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck --check-prefix=CHECK-NO-ISEL %s +; RUN: llc -verify-machineinstrs -mattr=-isel < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck --check-prefix=CHECK-NO-ISEL %s target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" target triple = "powerpc64-unknown-linux-gnu" define i32 @test1(i1 %a, i32 %c) nounwind { +; CHECK-LABEL: test1: +; CHECK: # %bb.0: +; CHECK-NEXT: andi. 3, 3, 1 +; CHECK-NEXT: iseleq 3, 0, 4 +; CHECK-NEXT: blr +; +; CHECK-CRB-LABEL: test1: +; CHECK-CRB: # %bb.0: +; CHECK-CRB-NEXT: andi. 3, 3, 1 +; CHECK-CRB-NEXT: li 3, 0 +; CHECK-CRB-NEXT: iselgt 3, 4, 3 +; CHECK-CRB-NEXT: blr +; +; CHECK-NO-ISEL-LABEL: test1: +; CHECK-NO-ISEL: # %bb.0: +; CHECK-NO-ISEL-NEXT: andi. 3, 3, 1 +; CHECK-NO-ISEL-NEXT: bc 12, 1, .LBB0_2 +; CHECK-NO-ISEL-NEXT: # %bb.1: +; CHECK-NO-ISEL-NEXT: li 4, 0 +; CHECK-NO-ISEL-NEXT: .LBB0_2: +; CHECK-NO-ISEL-NEXT: mr 3, 4 +; CHECK-NO-ISEL-NEXT: blr %x = select i1 %a, i32 %c, i32 0 ret i32 %x -; CHECK-LABEL: @test1 -; CHECK-NOT: li {{[0-9]+}}, 0 -; CHECK: iseleq 3, 0, -; CHECK: blr -; CHECK-NO-ISEL-LABEL: @test1 -; CHECK-NO-ISEL: li 3, 0 -; CHECK-NO-ISEL-NEXT: bc 12, 1, [[TRUE:.LBB[0-9]+]] -; CHECK-NO-ISEL-NEXT: blr -; CHECK-NO-ISEL-NEXT: [[TRUE]] -; CHECK-NO-ISEL-NEXT: addi 3, 4, 0 -; CHECK-NO-ISEL-NEXT: blr } define i32 @test2(i1 %a, i32 %c) nounwind { +; CHECK-LABEL: test2: +; CHECK: # %bb.0: +; CHECK-NEXT: andi. 3, 3, 1 +; CHECK-NEXT: li 3, 0 +; CHECK-NEXT: iseleq 3, 4, 3 +; CHECK-NEXT: blr +; +; CHECK-CRB-LABEL: test2: +; CHECK-CRB: # %bb.0: +; CHECK-CRB-NEXT: andi. 3, 3, 1 +; CHECK-CRB-NEXT: iselgt 3, 0, 4 +; CHECK-CRB-NEXT: blr +; +; CHECK-NO-ISEL-LABEL: test2: +; CHECK-NO-ISEL: # %bb.0: +; CHECK-NO-ISEL-NEXT: andi. 3, 3, 1 +; CHECK-NO-ISEL-NEXT: li 3, 0 +; CHECK-NO-ISEL-NEXT: bclr 12, 1, 0 +; CHECK-NO-ISEL-NEXT: # %bb.1: +; CHECK-NO-ISEL-NEXT: mr 3, 4 +; CHECK-NO-ISEL-NEXT: blr %x = select i1 %a, i32 0, i32 %c ret i32 %x -; CHECK-CRB-LABEL: @test2 -; CHECK-CRB-NOT: li {{[0-9]+}}, 0 -; CHECK-CRB: iselgt 3, 0, -; CHECK-CRB: blr -; CHECK-NO-ISEL-LABEL: @test2 -; CHECK-NO-ISEL: bc 12, 1, [[TRUE:.LBB[0-9]+]] -; CHECK-NO-ISEL: ori 3, 4, 0 -; CHECK-NO-ISEL-NEXT: blr -; CHECK-NO-ISEL-NEXT: [[TRUE]] -; CHECK-NO-ISEL-NEXT: li 3, 0 -; CHECK-NO-ISEL-NEXT: blr } diff --git a/llvm/test/CodeGen/PowerPC/i1-ext-fold.ll b/llvm/test/CodeGen/PowerPC/i1-ext-fold.ll index 0a666860cbd76..a1be8d39994d5 100644 --- a/llvm/test/CodeGen/PowerPC/i1-ext-fold.ll +++ b/llvm/test/CodeGen/PowerPC/i1-ext-fold.ll @@ -1,32 +1,54 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ; RUN: llc -verify-machineinstrs -mcpu=pwr7 < %s | FileCheck %s -; RUN: llc -verify-machineinstrs -mcpu=pwr7 -ppc-gen-isel=false < %s | FileCheck --check-prefix=CHECK-NO-ISEL %s +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=-isel < %s | FileCheck --check-prefix=CHECK-NO-ISEL %s target datalayout = "E-m:e-i64:64-n32:64" target triple = "powerpc64-unknown-linux-gnu" ; Function Attrs: nounwind readnone define signext i32 @foo(i32 signext %a, i32 signext %b) #0 { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpw 3, 4 +; CHECK-NEXT: li 3, 0 +; CHECK-NEXT: li 4, 16 +; CHECK-NEXT: isellt 3, 4, 3 +; CHECK-NEXT: blr +; +; CHECK-NO-ISEL-LABEL: foo: +; CHECK-NO-ISEL: # %bb.0: # %entry +; CHECK-NO-ISEL-NEXT: cmpw 3, 4 +; CHECK-NO-ISEL-NEXT: li 3, 16 +; CHECK-NO-ISEL-NEXT: bclr 12, 0, 0 +; CHECK-NO-ISEL-NEXT: # %bb.1: # %entry +; CHECK-NO-ISEL-NEXT: li 3, 0 +; CHECK-NO-ISEL-NEXT: blr entry: %cmp = icmp slt i32 %a, %b %conv = zext i1 %cmp to i32 %shl = shl nuw nsw i32 %conv, 4 ret i32 %shl -; CHECK-LABEL: @foo -; CHECK-NO-ISEL-LABEL: @foo -; CHECK-DAG: cmpw -; CHECK-DAG: li [[REG1:[0-9]+]], 0 -; CHECK-DAG: li [[REG2:[0-9]+]], 16 -; CHECK: isellt 3, [[REG2]], [[REG1]] -; CHECK: blr -; CHECK-NO-ISEL: bc 12, 0, -; CHECK-NO-ISEL: blr -; CHECK-NO-ISEL: addi 3, 4, 0 -; CHECK-NO-ISEL-NEXT: blr } ; Function Attrs: nounwind readnone define signext i32 @foo2(i32 signext %a, i32 signext %b) #0 { +; CHECK-LABEL: foo2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpw 3, 4 +; CHECK-NEXT: li 3, 5 +; CHECK-NEXT: li 4, 21 +; CHECK-NEXT: isellt 3, 4, 3 +; CHECK-NEXT: blr +; +; CHECK-NO-ISEL-LABEL: foo2: +; CHECK-NO-ISEL: # %bb.0: # %entry +; CHECK-NO-ISEL-NEXT: cmpw 3, 4 +; CHECK-NO-ISEL-NEXT: li 3, 21 +; CHECK-NO-ISEL-NEXT: bclr 12, 0, 0 +; CHECK-NO-ISEL-NEXT: # %bb.1: # %entry +; CHECK-NO-ISEL-NEXT: li 3, 5 +; CHECK-NO-ISEL-NEXT: blr entry: %cmp = icmp slt i32 %a, %b %conv = zext i1 %cmp to i32 @@ -34,40 +56,33 @@ entry: %add1 = or i32 %shl, 5 ret i32 %add1 -; CHECK-LABEL: @foo2 -; CHECK-NO-ISEL-LABEL: @foo2 -; CHECK-DAG: cmpw -; CHECK-DAG: li [[REG1:[0-9]+]], 5 -; CHECK-DAG: li [[REG2:[0-9]+]], 21 -; CHECK: isellt 3, [[REG2]], [[REG1]] -; CHECK: blr -; CHECK-NO-ISEL: bc 12, 0, -; CHECK-NO-ISEL: blr -; CHECK-NO-ISEL: addi 3, 4, 0 -; CHECK-NO-ISEL-NEXT: blr } ; Function Attrs: nounwind readnone define signext i32 @foo3(i32 signext %a, i32 signext %b) #0 { +; CHECK-LABEL: foo3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpw 3, 4 +; CHECK-NEXT: li 3, 16 +; CHECK-NEXT: iselgt 3, 0, 3 +; CHECK-NEXT: blr +; +; CHECK-NO-ISEL-LABEL: foo3: +; CHECK-NO-ISEL: # %bb.0: # %entry +; CHECK-NO-ISEL-NEXT: cmpw 3, 4 +; CHECK-NO-ISEL-NEXT: li 3, 0 +; CHECK-NO-ISEL-NEXT: bclr 12, 1, 0 +; CHECK-NO-ISEL-NEXT: # %bb.1: # %entry +; CHECK-NO-ISEL-NEXT: li 3, 16 +; CHECK-NO-ISEL-NEXT: blr entry: %cmp = icmp sle i32 %a, %b %conv = zext i1 %cmp to i32 %shl = shl nuw nsw i32 %conv, 4 ret i32 %shl -; CHECK-LABEL: @foo3 -; CHECK-NO-ISEL-LABEL: @foo3 -; CHECK-DAG: cmpw -; CHECK-DAG: li [[REG1:[0-9]+]], 16 -; CHECK: iselgt 3, 0, [[REG1]] -; CHECK: blr -; CHECK-NO-ISEL: bc 12, 1, [[TRUE:.LBB[0-9]+]] -; CHECK-NO-ISEL-NEXT: blr -; CHECK-NO-ISEL-NEXT: [[TRUE]] -; CHECK-NO-ISEL-NEXT: li 3, 0 -; CHECK-NO-ISEL-NEXT: blr } attributes #0 = { nounwind readnone } diff --git a/llvm/test/CodeGen/PowerPC/i64_fp_round.ll b/llvm/test/CodeGen/PowerPC/i64_fp_round.ll index 340d9aff8f85b..f7df003fcc3f8 100644 --- a/llvm/test/CodeGen/PowerPC/i64_fp_round.ll +++ b/llvm/test/CodeGen/PowerPC/i64_fp_round.ll @@ -1,37 +1,68 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; Verify that we get the code sequence needed to avoid double-rounding. +; Note that only parts of the sequence are checked for here, to allow +; for minor code generation differences. ; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=-fpcvt < %s | FileCheck %s -; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=-fpcvt -ppc-gen-isel=false < %s | FileCheck %s --check-prefix=CHECK-NO-ISEL +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=-fpcvt -mattr=-isel < %s | FileCheck %s --check-prefix=CHECK-NO-ISEL +; Also check that with -enable-unsafe-fp-math we do not get that extra +; code sequence. Simply verify that there is no "isel" present. +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=-fpcvt -enable-unsafe-fp-math < %s | FileCheck %s -check-prefix=CHECK-UNSAFE +; CHECK-UNSAFE-NOT: isel target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64" target triple = "powerpc64-unknown-linux-gnu" define float @test(i64 %x) nounwind readnone { -entry: - %conv = sitofp i64 %x to float - ret float %conv -} - ; Verify that we get the code sequence needed to avoid double-rounding. ; Note that only parts of the sequence are checked for here, to allow ; for minor code generation differences. - -;CHECK-LABEL: test -;CHECK-NO-ISEL-LABEL: test -; CHECK: sradi [[REG1:[0-9]+]], 3, 53 -; CHECK: addi [[REG2:[0-9]+]], [[REG1]], 1 -; CHECK: cmpldi [[REG2]], 1 -; CHECK: iselgt [[REG3:[0-9]+]], {{[0-9]+}}, 3 -; CHECK-NO-ISEL: rldicr [[REG2:[0-9]+]], {{[0-9]+}}, 0, 52 -; CHECK-NO-ISEL: bc 12, 1, [[TRUE:.LBB[0-9]+]] -; CHECK-NO-ISEL: b [[SUCCESSOR:.LBB[0-9]+]] -; CHECK-NO-ISEL-NEXT: [[TRUE]] -; CHECK-NO-ISEL-NEXT: addi {{[0-9]+}}, [[REG2]], 0 -; CHECK-NO-ISEL-NEXT: [[SUCCESSOR]] -; CHECK-NO-ISEL: std {{[0-9]+}}, -{{[0-9]+}}(1) -; CHECK: std [[REG3]], -{{[0-9]+}}(1) - - ; Also check that with -enable-unsafe-fp-math we do not get that extra ; code sequence. Simply verify that there is no "isel" present. - ; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=-fpcvt -enable-unsafe-fp-math < %s | FileCheck %s -check-prefix=CHECK-UNSAFE -; CHECK-UNSAFE-NOT: isel +; CHECK-LABEL: test: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: clrldi 4, 3, 53 +; CHECK-NEXT: sradi 5, 3, 53 +; CHECK-NEXT: addi 4, 4, 2047 +; CHECK-NEXT: addi 5, 5, 1 +; CHECK-NEXT: or 4, 4, 3 +; CHECK-NEXT: cmpldi 5, 1 +; CHECK-NEXT: rldicr 4, 4, 0, 52 +; CHECK-NEXT: iselgt 3, 4, 3 +; CHECK-NEXT: std 3, -8(1) +; CHECK-NEXT: lfd 0, -8(1) +; CHECK-NEXT: xscvsxddp 0, 0 +; CHECK-NEXT: frsp 1, 0 +; CHECK-NEXT: blr +; +; CHECK-NO-ISEL-LABEL: test: +; CHECK-NO-ISEL: # %bb.0: # %entry +; CHECK-NO-ISEL-NEXT: sradi 4, 3, 53 +; CHECK-NO-ISEL-NEXT: addi 4, 4, 1 +; CHECK-NO-ISEL-NEXT: cmpldi 4, 1 +; CHECK-NO-ISEL-NEXT: bc 4, 1, .LBB0_2 +; CHECK-NO-ISEL-NEXT: # %bb.1: +; CHECK-NO-ISEL-NEXT: clrldi 4, 3, 53 +; CHECK-NO-ISEL-NEXT: addi 4, 4, 2047 +; CHECK-NO-ISEL-NEXT: or 3, 4, 3 +; CHECK-NO-ISEL-NEXT: rldicr 3, 3, 0, 52 +; CHECK-NO-ISEL-NEXT: .LBB0_2: # %entry +; CHECK-NO-ISEL-NEXT: std 3, -8(1) +; CHECK-NO-ISEL-NEXT: lfd 0, -8(1) +; CHECK-NO-ISEL-NEXT: xscvsxddp 0, 0 +; CHECK-NO-ISEL-NEXT: frsp 1, 0 +; CHECK-NO-ISEL-NEXT: blr +; +; CHECK-UNSAFE-LABEL: test: +; CHECK-UNSAFE: # %bb.0: # %entry +; CHECK-UNSAFE-NEXT: std 3, -8(1) +; CHECK-UNSAFE-NEXT: lfd 0, -8(1) +; CHECK-UNSAFE-NEXT: xscvsxddp 0, 0 +; CHECK-UNSAFE-NEXT: frsp 1, 0 +; CHECK-UNSAFE-NEXT: blr + +entry: + %conv = sitofp i64 %x to float + ret float %conv +} + diff --git a/llvm/test/CodeGen/PowerPC/ifcvt.ll b/llvm/test/CodeGen/PowerPC/ifcvt.ll index f04deb37a5755..6b9d872f4aad7 100644 --- a/llvm/test/CodeGen/PowerPC/ifcvt.ll +++ b/llvm/test/CodeGen/PowerPC/ifcvt.ll @@ -1,9 +1,41 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -verify-machineinstrs | FileCheck %s -; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -verify-machineinstrs -ppc-gen-isel=false | FileCheck --check-prefix=CHECK-NO-ISEL %s +; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -verify-machineinstrs -mattr=-isel | FileCheck --check-prefix=CHECK-NO-ISEL %s target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" target triple = "powerpc64-unknown-linux-gnu" define i32 @test(i32 %a, i32 %b, i32 %c, i32 %d) { +; CHECK-LABEL: test: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: slwi 5, 6, 16 +; CHECK-NEXT: extsh 6, 6 +; CHECK-NEXT: cmpwi 5, -1 +; CHECK-NEXT: add 5, 6, 3 +; CHECK-NEXT: clrlwi 6, 6, 17 +; CHECK-NEXT: sub 6, 3, 6 +; CHECK-NEXT: sub 3, 4, 3 +; CHECK-NEXT: iselgt 5, 5, 6 +; CHECK-NEXT: extsh 5, 5 +; CHECK-NEXT: add 3, 3, 5 +; CHECK-NEXT: blr +; +; CHECK-NO-ISEL-LABEL: test: +; CHECK-NO-ISEL: # %bb.0: # %entry +; CHECK-NO-ISEL-NEXT: slwi 7, 6, 16 +; CHECK-NO-ISEL-NEXT: extsh 5, 6 +; CHECK-NO-ISEL-NEXT: cmpwi 7, -1 +; CHECK-NO-ISEL-NEXT: ble 0, .LBB0_2 +; CHECK-NO-ISEL-NEXT: # %bb.1: # %cond.false +; CHECK-NO-ISEL-NEXT: add 5, 5, 3 +; CHECK-NO-ISEL-NEXT: b .LBB0_3 +; CHECK-NO-ISEL-NEXT: .LBB0_2: # %cond.true +; CHECK-NO-ISEL-NEXT: clrlwi 5, 5, 17 +; CHECK-NO-ISEL-NEXT: sub 5, 3, 5 +; CHECK-NO-ISEL-NEXT: .LBB0_3: # %cond.end +; CHECK-NO-ISEL-NEXT: extsh 5, 5 +; CHECK-NO-ISEL-NEXT: sub 3, 4, 3 +; CHECK-NO-ISEL-NEXT: add 3, 3, 5 +; CHECK-NO-ISEL-NEXT: blr entry: %sext82 = shl i32 %d, 16 %conv29 = ashr exact i32 %sext82, 16 @@ -19,18 +51,6 @@ cond.false: ; preds = %sw.epilog %add37 = add nsw i32 %conv29, %a br label %cond.end -; CHECK-LABEL: @test -; CHECK-NO-ISEL-LABEL: @test -; CHECK: add [[REG:[0-9]+]], -; CHECK: sub [[REG2:[0-9]+]], -; CHECK: iselgt {{[0-9]+}}, [[REG]], [[REG2]] -; CHECK-NO-ISEL: bc 12, 1, [[TRUE:.LBB[0-9]+]] -; CHECK-NO-ISEL: ori 5, 6, 0 -; CHECK-NO-ISEL-NEXT: b [[SUCCESSOR:.LBB[0-9]+]] -; CHECK-NO-ISEL: [[TRUE]] -; CHECK-NO-ISEL: extsh 5, 5 -; CHECK-NO-ISEL-NEXT: add 3, 3, 5 -; CHECK-NO-ISEL-NEXT: blr cond.end: ; preds = %cond.false, %cond.true %cond = phi i32 [ %sub34, %cond.true ], [ %add37, %cond.false ] diff --git a/llvm/test/CodeGen/PowerPC/isel.ll b/llvm/test/CodeGen/PowerPC/isel.ll index c1cceb9670180..5f64df5dfeb91 100644 --- a/llvm/test/CodeGen/PowerPC/isel.ll +++ b/llvm/test/CodeGen/PowerPC/isel.ll @@ -1,38 +1,51 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64" target triple = "powerpc64-unknown-linux-gnu" ; RUN: llc -verify-machineinstrs -mcpu=a2 < %s | FileCheck %s ; RUN: llc -verify-machineinstrs -mcpu=pwr7 < %s | FileCheck %s -; RUN: llc -verify-machineinstrs -mcpu=pwr7 -ppc-gen-isel=false < %s | FileCheck --check-prefix=CHECK-NO-ISEL %s +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=-isel < %s | FileCheck --check-prefix=CHECK-NO-ISEL %s define i64 @test1(i64 %a, i64 %b, i64 %c, i64 %d) { +; CHECK-LABEL: test1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpld 3, 4 +; CHECK-NEXT: isellt 3, 6, 5 +; CHECK-NEXT: blr +; +; CHECK-NO-ISEL-LABEL: test1: +; CHECK-NO-ISEL: # %bb.0: # %entry +; CHECK-NO-ISEL-NEXT: cmpld 3, 4 +; CHECK-NO-ISEL-NEXT: bge 0, .LBB0_2 +; CHECK-NO-ISEL-NEXT: # %bb.1: # %entry +; CHECK-NO-ISEL-NEXT: mr 5, 6 +; CHECK-NO-ISEL-NEXT: .LBB0_2: # %entry +; CHECK-NO-ISEL-NEXT: mr 3, 5 +; CHECK-NO-ISEL-NEXT: blr entry: %p = icmp uge i64 %a, %b %x = select i1 %p, i64 %c, i64 %d ret i64 %x -; CHECK-LABEL: @test1 -; CHECK-NO-ISEL-LABEL: @test1 -; CHECK: isel -; CHECK-NO-ISEL: bc 12, 0, [[TRUE:.LBB[0-9]+]] -; CHECK-NO-ISEL: ori 3, 5, 0 -; CHECK-NO-ISEL-NEXT: blr -; CHECK-NO-ISEL: [[TRUE]] -; CHECK-NO-ISEL-NEXT: addi 3, 6, 0 -; CHECK-NO-ISEL-NEXT: blr } define i32 @test2(i32 %a, i32 %b, i32 %c, i32 %d) { +; CHECK-LABEL: test2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmplw 3, 4 +; CHECK-NEXT: isellt 3, 6, 5 +; CHECK-NEXT: blr +; +; CHECK-NO-ISEL-LABEL: test2: +; CHECK-NO-ISEL: # %bb.0: # %entry +; CHECK-NO-ISEL-NEXT: cmplw 3, 4 +; CHECK-NO-ISEL-NEXT: bge 0, .LBB1_2 +; CHECK-NO-ISEL-NEXT: # %bb.1: # %entry +; CHECK-NO-ISEL-NEXT: mr 5, 6 +; CHECK-NO-ISEL-NEXT: .LBB1_2: # %entry +; CHECK-NO-ISEL-NEXT: mr 3, 5 +; CHECK-NO-ISEL-NEXT: blr entry: %p = icmp uge i32 %a, %b %x = select i1 %p, i32 %c, i32 %d ret i32 %x -; CHECK-LABEL: @test2 -; CHECK-NO-ISEL-LABEL: @test2 -; CHECK: isel -; CHECK-NO-ISEL: bc 12, 0, [[TRUE:.LBB[0-9]+]] -; CHECK-NO-ISEL: ori 3, 5, 0 -; CHECK-NO-ISEL-NEXT: blr -; CHECK-NO-ISEL: [[TRUE]] -; CHECK-NO-ISEL-NEXT: addi 3, 6, 0 -; CHECK-NO-ISEL-NEXT: blr } diff --git a/llvm/test/CodeGen/PowerPC/optcmp.ll b/llvm/test/CodeGen/PowerPC/optcmp.ll index bc265c646d471..831bc97cc0e9f 100644 --- a/llvm/test/CodeGen/PowerPC/optcmp.ll +++ b/llvm/test/CodeGen/PowerPC/optcmp.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -ppc-gpr-icmps=all -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2 -mattr=-crbits -disable-ppc-cmp-opt=0 | FileCheck %s -; RUN: llc -ppc-gpr-icmps=all -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2 -mattr=-crbits -disable-ppc-cmp-opt=0 -ppc-gen-isel=false | FileCheck --check-prefix=CHECK-NO-ISEL %s +; RUN: llc -ppc-gpr-icmps=all -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2 -mattr=-crbits -disable-ppc-cmp-opt=0 -mattr=-isel | FileCheck --check-prefix=CHECK-NO-ISEL %s target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" target triple = "powerpc64-unknown-linux-gnu" @@ -16,14 +16,12 @@ define signext i32 @foo(i32 signext %a, i32 signext %b, ptr nocapture %c) #0 { ; ; CHECK-NO-ISEL-LABEL: foo: ; CHECK-NO-ISEL: # %bb.0: # %entry -; CHECK-NO-ISEL-NEXT: cmpw 3, 4 ; CHECK-NO-ISEL-NEXT: sub 6, 3, 4 -; CHECK-NO-ISEL-NEXT: bc 12, 1, .LBB0_2 -; CHECK-NO-ISEL-NEXT: # %bb.1: # %entry -; CHECK-NO-ISEL-NEXT: ori 3, 4, 0 -; CHECK-NO-ISEL-NEXT: b .LBB0_2 -; CHECK-NO-ISEL-NEXT: .LBB0_2: # %entry +; CHECK-NO-ISEL-NEXT: cmpw 3, 4 ; CHECK-NO-ISEL-NEXT: stw 6, 0(5) +; CHECK-NO-ISEL-NEXT: bgtlr 0 +; CHECK-NO-ISEL-NEXT: # %bb.1: # %entry +; CHECK-NO-ISEL-NEXT: mr 3, 4 ; CHECK-NO-ISEL-NEXT: blr entry: %sub = sub nsw i32 %a, %b @@ -46,14 +44,14 @@ define signext i32 @foo2(i32 signext %a, i32 signext %b, ptr nocapture %c) #0 { ; ; CHECK-NO-ISEL-LABEL: foo2: ; CHECK-NO-ISEL: # %bb.0: # %entry -; CHECK-NO-ISEL-NEXT: slw 4, 3, 4 -; CHECK-NO-ISEL-NEXT: li 6, 0 +; CHECK-NO-ISEL-NEXT: mr 6, 3 ; CHECK-NO-ISEL-NEXT: li 3, 1 +; CHECK-NO-ISEL-NEXT: slw 4, 6, 4 ; CHECK-NO-ISEL-NEXT: cmpwi 4, 0 ; CHECK-NO-ISEL-NEXT: stw 4, 0(5) -; CHECK-NO-ISEL-NEXT: bclr 12, 1, 0 +; CHECK-NO-ISEL-NEXT: bgtlr 0 ; CHECK-NO-ISEL-NEXT: # %bb.1: # %entry -; CHECK-NO-ISEL-NEXT: ori 3, 6, 0 +; CHECK-NO-ISEL-NEXT: li 3, 0 ; CHECK-NO-ISEL-NEXT: blr entry: %shl = shl i32 %a, %b @@ -74,12 +72,10 @@ define i64 @fool(i64 %a, i64 %b, ptr nocapture %c) #0 { ; CHECK-NO-ISEL-LABEL: fool: ; CHECK-NO-ISEL: # %bb.0: # %entry ; CHECK-NO-ISEL-NEXT: sub. 6, 3, 4 -; CHECK-NO-ISEL-NEXT: bc 12, 1, .LBB2_2 -; CHECK-NO-ISEL-NEXT: # %bb.1: # %entry -; CHECK-NO-ISEL-NEXT: ori 3, 4, 0 -; CHECK-NO-ISEL-NEXT: b .LBB2_2 -; CHECK-NO-ISEL-NEXT: .LBB2_2: # %entry ; CHECK-NO-ISEL-NEXT: std 6, 0(5) +; CHECK-NO-ISEL-NEXT: bgtlr 0 +; CHECK-NO-ISEL-NEXT: # %bb.1: # %entry +; CHECK-NO-ISEL-NEXT: mr 3, 4 ; CHECK-NO-ISEL-NEXT: blr entry: %sub = sub nsw i64 %a, %b @@ -100,12 +96,10 @@ define i64 @foolb(i64 %a, i64 %b, ptr nocapture %c) #0 { ; CHECK-NO-ISEL-LABEL: foolb: ; CHECK-NO-ISEL: # %bb.0: # %entry ; CHECK-NO-ISEL-NEXT: sub. 6, 3, 4 -; CHECK-NO-ISEL-NEXT: bc 12, 1, .LBB3_1 -; CHECK-NO-ISEL-NEXT: b .LBB3_2 -; CHECK-NO-ISEL-NEXT: .LBB3_1: # %entry -; CHECK-NO-ISEL-NEXT: addi 3, 4, 0 -; CHECK-NO-ISEL-NEXT: .LBB3_2: # %entry ; CHECK-NO-ISEL-NEXT: std 6, 0(5) +; CHECK-NO-ISEL-NEXT: blelr 0 +; CHECK-NO-ISEL-NEXT: # %bb.1: # %entry +; CHECK-NO-ISEL-NEXT: mr 3, 4 ; CHECK-NO-ISEL-NEXT: blr entry: %sub = sub nsw i64 %a, %b @@ -126,12 +120,10 @@ define i64 @foolc(i64 %a, i64 %b, ptr nocapture %c) #0 { ; CHECK-NO-ISEL-LABEL: foolc: ; CHECK-NO-ISEL: # %bb.0: # %entry ; CHECK-NO-ISEL-NEXT: sub. 6, 4, 3 -; CHECK-NO-ISEL-NEXT: bc 12, 0, .LBB4_2 -; CHECK-NO-ISEL-NEXT: # %bb.1: # %entry -; CHECK-NO-ISEL-NEXT: ori 3, 4, 0 -; CHECK-NO-ISEL-NEXT: b .LBB4_2 -; CHECK-NO-ISEL-NEXT: .LBB4_2: # %entry ; CHECK-NO-ISEL-NEXT: std 6, 0(5) +; CHECK-NO-ISEL-NEXT: bltlr 0 +; CHECK-NO-ISEL-NEXT: # %bb.1: # %entry +; CHECK-NO-ISEL-NEXT: mr 3, 4 ; CHECK-NO-ISEL-NEXT: blr entry: %sub = sub nsw i64 %b, %a @@ -152,12 +144,10 @@ define i64 @foold(i64 %a, i64 %b, ptr nocapture %c) #0 { ; CHECK-NO-ISEL-LABEL: foold: ; CHECK-NO-ISEL: # %bb.0: # %entry ; CHECK-NO-ISEL-NEXT: sub. 6, 4, 3 -; CHECK-NO-ISEL-NEXT: bc 12, 1, .LBB5_2 -; CHECK-NO-ISEL-NEXT: # %bb.1: # %entry -; CHECK-NO-ISEL-NEXT: ori 3, 4, 0 -; CHECK-NO-ISEL-NEXT: b .LBB5_2 -; CHECK-NO-ISEL-NEXT: .LBB5_2: # %entry ; CHECK-NO-ISEL-NEXT: std 6, 0(5) +; CHECK-NO-ISEL-NEXT: bgtlr 0 +; CHECK-NO-ISEL-NEXT: # %bb.1: # %entry +; CHECK-NO-ISEL-NEXT: mr 3, 4 ; CHECK-NO-ISEL-NEXT: blr entry: %sub = sub nsw i64 %b, %a @@ -178,12 +168,10 @@ define i64 @foold2(i64 %a, i64 %b, ptr nocapture %c) #0 { ; CHECK-NO-ISEL-LABEL: foold2: ; CHECK-NO-ISEL: # %bb.0: # %entry ; CHECK-NO-ISEL-NEXT: sub. 6, 3, 4 -; CHECK-NO-ISEL-NEXT: bc 12, 0, .LBB6_2 -; CHECK-NO-ISEL-NEXT: # %bb.1: # %entry -; CHECK-NO-ISEL-NEXT: ori 3, 4, 0 -; CHECK-NO-ISEL-NEXT: b .LBB6_2 -; CHECK-NO-ISEL-NEXT: .LBB6_2: # %entry ; CHECK-NO-ISEL-NEXT: std 6, 0(5) +; CHECK-NO-ISEL-NEXT: bltlr 0 +; CHECK-NO-ISEL-NEXT: # %bb.1: # %entry +; CHECK-NO-ISEL-NEXT: mr 3, 4 ; CHECK-NO-ISEL-NEXT: blr entry: %sub = sub nsw i64 %a, %b @@ -336,12 +324,10 @@ define signext i64 @fooct(i64 signext %a, i64 signext %b, ptr nocapture %c) #0 { ; CHECK-NO-ISEL-NEXT: and 6, 6, 7 ; CHECK-NO-ISEL-NEXT: mulld 6, 6, 9 ; CHECK-NO-ISEL-NEXT: rldicl. 6, 6, 8, 56 -; CHECK-NO-ISEL-NEXT: bc 12, 1, .LBB10_2 -; CHECK-NO-ISEL-NEXT: # %bb.1: # %entry -; CHECK-NO-ISEL-NEXT: ori 3, 4, 0 -; CHECK-NO-ISEL-NEXT: b .LBB10_2 -; CHECK-NO-ISEL-NEXT: .LBB10_2: # %entry ; CHECK-NO-ISEL-NEXT: std 6, 0(5) +; CHECK-NO-ISEL-NEXT: bgtlr 0 +; CHECK-NO-ISEL-NEXT: # %bb.1: # %entry +; CHECK-NO-ISEL-NEXT: mr 3, 4 ; CHECK-NO-ISEL-NEXT: blr entry: %sub = sub nsw i64 %a, %b diff --git a/llvm/test/CodeGen/PowerPC/p8-isel-sched.ll b/llvm/test/CodeGen/PowerPC/p8-isel-sched.ll index 7e2515ff70938..cde5870db3940 100644 --- a/llvm/test/CodeGen/PowerPC/p8-isel-sched.ll +++ b/llvm/test/CodeGen/PowerPC/p8-isel-sched.ll @@ -1,10 +1,62 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ; RUN: llc -verify-machineinstrs -mcpu=pwr8 < %s | FileCheck %s -; RUN: llc -verify-machineinstrs -mcpu=pwr8 -ppc-gen-isel=false < %s | FileCheck --check-prefix=CHECK-NO-ISEL %s +; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mattr=-isel < %s | FileCheck --check-prefix=CHECK-NO-ISEL %s target datalayout = "E-m:e-i64:64-n32:64" target triple = "powerpc64-unknown-linux-gnu" ; Function Attrs: nounwind define void @foo(ptr nocapture %r1, ptr nocapture %r2, ptr nocapture %r3, ptr nocapture %r4, i32 signext %a, i32 signext %b, i32 signext %c, i32 signext %d) #0 { +; Make sure that we don't schedule all of the isels together, they should be +; intermixed with the adds because each isel starts a new dispatch group. +; CHECK-LABEL: foo: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmplwi 7, 0 +; CHECK-NEXT: addi 7, 8, 1 +; CHECK-NEXT: iseleq 9, 9, 8 +; CHECK-NEXT: stw 9, 0(3) +; CHECK-NEXT: addi 3, 10, -2 +; CHECK-NEXT: iseleq 9, 10, 8 +; CHECK-NEXT: iseleq 3, 3, 7 +; CHECK-NEXT: stw 9, 0(4) +; CHECK-NEXT: addi 4, 10, -5 +; CHECK-NEXT: stw 3, 0(5) +; CHECK-NEXT: addi 3, 8, 3 +; CHECK-NEXT: iseleq 3, 4, 3 +; CHECK-NEXT: stw 3, 0(6) +; CHECK-NEXT: blr +; +; CHECK-NO-ISEL-LABEL: foo: +; CHECK-NO-ISEL: # %bb.0: # %entry +; CHECK-NO-ISEL-NEXT: cmplwi 7, 0 +; CHECK-NO-ISEL-NEXT: mr 7, 8 +; CHECK-NO-ISEL-NEXT: bne 0, .LBB0_2 +; CHECK-NO-ISEL-NEXT: # %bb.1: # %entry +; CHECK-NO-ISEL-NEXT: mr 7, 9 +; CHECK-NO-ISEL-NEXT: .LBB0_2: # %entry +; CHECK-NO-ISEL-NEXT: stw 7, 0(3) +; CHECK-NO-ISEL-NEXT: mr 3, 8 +; CHECK-NO-ISEL-NEXT: bne 0, .LBB0_4 +; CHECK-NO-ISEL-NEXT: # %bb.3: # %entry +; CHECK-NO-ISEL-NEXT: mr 3, 10 +; CHECK-NO-ISEL-NEXT: .LBB0_4: # %entry +; CHECK-NO-ISEL-NEXT: stw 3, 0(4) +; CHECK-NO-ISEL-NEXT: bne 0, .LBB0_7 +; CHECK-NO-ISEL-NEXT: # %bb.5: # %entry +; CHECK-NO-ISEL-NEXT: addi 3, 10, -2 +; CHECK-NO-ISEL-NEXT: stw 3, 0(5) +; CHECK-NO-ISEL-NEXT: beq 0, .LBB0_8 +; CHECK-NO-ISEL-NEXT: .LBB0_6: +; CHECK-NO-ISEL-NEXT: addi 3, 8, 3 +; CHECK-NO-ISEL-NEXT: stw 3, 0(6) +; CHECK-NO-ISEL-NEXT: blr +; CHECK-NO-ISEL-NEXT: .LBB0_7: +; CHECK-NO-ISEL-NEXT: addi 3, 8, 1 +; CHECK-NO-ISEL-NEXT: stw 3, 0(5) +; CHECK-NO-ISEL-NEXT: bne 0, .LBB0_6 +; CHECK-NO-ISEL-NEXT: .LBB0_8: # %entry +; CHECK-NO-ISEL-NEXT: addi 3, 10, -5 +; CHECK-NO-ISEL-NEXT: stw 3, 0(6) +; CHECK-NO-ISEL-NEXT: blr entry: %tobool = icmp ne i32 %a, 0 %cond = select i1 %tobool, i32 %b, i32 %c @@ -22,21 +74,4 @@ entry: ret void } -; Make sure that we don't schedule all of the isels together, they should be -; intermixed with the adds because each isel starts a new dispatch group. -; CHECK-LABEL: @foo -; CHECK-NO-ISEL-LABEL: @foo -; CHECK: isel -; CHECK-NO-ISEL: bc 12, 2, [[TRUE:.LBB[0-9]+]] -; CHECK-NO-ISEL: b [[SUCCESSOR:.LBB[0-9]+]] -; CHECK-NO-ISEL: [[TRUE]] -; CHECK-NO-ISEL: addi {{[0-9]+}}, {{[0-9]+}}, -2 -; CHECK: addi -; CHECK: isel -; CHECK-NO-ISEL: bc 12, 2, [[TRUE:.LBB[0-9]+]] -; CHECK-NO-ISEL: ori 3, 7, 0 -; CHECK-NO-ISEL-NEXT: b [[SUCCESSOR:.LBB[0-9]+]] -; CHECK-NO-ISEL: [[TRUE]] -; CHECK: blr - attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/PowerPC/ppc-crbits-onoff.ll b/llvm/test/CodeGen/PowerPC/ppc-crbits-onoff.ll index 3d8dcbd00d01a..49a5687afe4c8 100644 --- a/llvm/test/CodeGen/PowerPC/ppc-crbits-onoff.ll +++ b/llvm/test/CodeGen/PowerPC/ppc-crbits-onoff.ll @@ -1,10 +1,33 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ; RUN: llc -ppc-gpr-icmps=all -verify-machineinstrs -mcpu=pwr7 < %s | FileCheck %s -; RUN: llc -ppc-gpr-icmps=all -verify-machineinstrs -mcpu=pwr7 -ppc-gen-isel=false < %s | FileCheck --check-prefix=CHECK-NO-ISEL %s +; RUN: llc -ppc-gpr-icmps=all -verify-machineinstrs -mcpu=pwr7 -mattr=-isel < %s | FileCheck --check-prefix=CHECK-NO-ISEL %s target datalayout = "E-m:e-i64:64-n32:64" target triple = "powerpc64-unknown-linux-gnu" ; Function Attrs: nounwind readnone define signext i32 @crbitsoff(i32 signext %v1, i32 signext %v2) #0 { +; CHECK-LABEL: crbitsoff: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cntlzw 4, 4 +; CHECK-NEXT: cmplwi 3, 0 +; CHECK-NEXT: li 3, 1 +; CHECK-NEXT: iseleq 3, 0, 3 +; CHECK-NEXT: rlwinm 4, 4, 27, 5, 31 +; CHECK-NEXT: and 3, 3, 4 +; CHECK-NEXT: blr +; +; CHECK-NO-ISEL-LABEL: crbitsoff: +; CHECK-NO-ISEL: # %bb.0: # %entry +; CHECK-NO-ISEL-NEXT: cmplwi 3, 0 +; CHECK-NO-ISEL-NEXT: li 3, 1 +; CHECK-NO-ISEL-NEXT: bne 0, .LBB0_2 +; CHECK-NO-ISEL-NEXT: # %bb.1: # %entry +; CHECK-NO-ISEL-NEXT: li 3, 0 +; CHECK-NO-ISEL-NEXT: .LBB0_2: # %entry +; CHECK-NO-ISEL-NEXT: cntlzw 4, 4 +; CHECK-NO-ISEL-NEXT: rlwinm 4, 4, 27, 5, 31 +; CHECK-NO-ISEL-NEXT: and 3, 3, 4 +; CHECK-NO-ISEL-NEXT: blr entry: %tobool = icmp ne i32 %v1, 0 %lnot = icmp eq i32 %v2, 0 @@ -12,21 +35,28 @@ entry: %and = zext i1 %and3 to i32 ret i32 %and -; CHECK-LABEL: @crbitsoff -; CHECK-NO-ISEL-LABEL: @crbitsoff -; CHECK-DAG: cmplwi 3, 0 -; CHECK-DAG: li [[REG2:[0-9]+]], 1 -; CHECK-DAG: cntlzw [[REG3:[0-9]+]], -; CHECK: iseleq [[REG4:[0-9]+]], 0, [[REG2]] -; CHECK-NO-ISEL: bc 12, 2, [[TRUE:.LBB[0-9]+]] -; CHECK-NO-ISEL-NEXT: b [[SUCCESSOR:.LBB[0-9]+]] -; CHECK-NO-ISEL: [[TRUE]] -; CHECK-NO-ISEL-NEXT: li 3, 0 -; CHECK: and 3, [[REG4]], [[REG3]] -; CHECK: blr } define signext i32 @crbitson(i32 signext %v1, i32 signext %v2) #1 { +; CHECK-LABEL: crbitson: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cntlzw 3, 3 +; CHECK-NEXT: cntlzw 4, 4 +; CHECK-NEXT: srwi 3, 3, 5 +; CHECK-NEXT: srwi 4, 4, 5 +; CHECK-NEXT: xori 3, 3, 1 +; CHECK-NEXT: and 3, 3, 4 +; CHECK-NEXT: blr +; +; CHECK-NO-ISEL-LABEL: crbitson: +; CHECK-NO-ISEL: # %bb.0: # %entry +; CHECK-NO-ISEL-NEXT: cntlzw 3, 3 +; CHECK-NO-ISEL-NEXT: cntlzw 4, 4 +; CHECK-NO-ISEL-NEXT: srwi 3, 3, 5 +; CHECK-NO-ISEL-NEXT: srwi 4, 4, 5 +; CHECK-NO-ISEL-NEXT: xori 3, 3, 1 +; CHECK-NO-ISEL-NEXT: and 3, 3, 4 +; CHECK-NO-ISEL-NEXT: blr entry: %tobool = icmp ne i32 %v1, 0 %lnot = icmp eq i32 %v2, 0 @@ -34,15 +64,6 @@ entry: %and = zext i1 %and3 to i32 ret i32 %and -; CHECK-LABEL: @crbitson -; CHECK-NO-ISEL-LABEL: @crbitson -; CHECK-DAG: cntlzw [[REG1:[0-9]+]], 3 -; CHECK-DAG: cntlzw [[REG2:[0-9]+]], 4 -; CHECK: srwi [[REG3:[0-9]+]], [[REG1]], 5 -; CHECK: srwi [[REG4:[0-9]+]], [[REG2]], 5 -; CHECK: xori [[REG5:[0-9]+]], [[REG3]], 1 -; CHECK: and 3, [[REG5]], [[REG4]] -; CHECK-NEXT: blr } diff --git a/llvm/test/CodeGen/PowerPC/remove-implicit-use.mir b/llvm/test/CodeGen/PowerPC/remove-implicit-use.mir index 28faace491173..f5b931e1e4238 100644 --- a/llvm/test/CodeGen/PowerPC/remove-implicit-use.mir +++ b/llvm/test/CodeGen/PowerPC/remove-implicit-use.mir @@ -1,5 +1,5 @@ # RUN: llc -mtriple=powerpc64le-unknown-unknown -start-after=ppc-mi-peepholes \ -# RUN: -stop-before=ppc-expand-isel -verify-machineinstrs %s -o - | FileCheck %s +# RUN: -stop-after=ppc-pre-emit-peephole -verify-machineinstrs %s -o - | FileCheck %s --- | ; ModuleID = 'a.ll' source_filename = "a.c" diff --git a/llvm/test/CodeGen/PowerPC/select-i1-vs-i1.ll b/llvm/test/CodeGen/PowerPC/select-i1-vs-i1.ll index d5e77a5cda067..ebf4cbcaac94f 100644 --- a/llvm/test/CodeGen/PowerPC/select-i1-vs-i1.ll +++ b/llvm/test/CodeGen/PowerPC/select-i1-vs-i1.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; RUN: llc -ppc-reduce-cr-logicals -verify-machineinstrs -tail-dup-placement=false < %s | FileCheck %s ; RUN: llc -ppc-reduce-cr-logicals -verify-machineinstrs \ -; RUN: -ppc-gen-isel=false < %s | FileCheck --check-prefix=CHECK-NO-ISEL %s +; RUN: -mattr=-isel < %s | FileCheck --check-prefix=CHECK-NO-ISEL %s target datalayout = "E-m:e-i64:64-n32:64" target triple = "powerpc64-unknown-linux-gnu" @@ -21,14 +21,16 @@ define signext i32 @testi32slt(i32 signext %c1, i32 signext %c2, i32 signext %c3 ; CHECK-NO-ISEL-LABEL: testi32slt: ; CHECK-NO-ISEL: # %bb.0: # %entry ; CHECK-NO-ISEL-NEXT: cmpw 5, 6 -; CHECK-NO-ISEL-NEXT: cmpw 1, 3, 4 -; CHECK-NO-ISEL-NEXT: crandc 20, 6, 2 -; CHECK-NO-ISEL-NEXT: bc 12, 20, .LBB0_2 +; CHECK-NO-ISEL-NEXT: bc 12, 2, .LBB0_3 ; CHECK-NO-ISEL-NEXT: # %bb.1: # %entry -; CHECK-NO-ISEL-NEXT: ori 3, 8, 0 +; CHECK-NO-ISEL-NEXT: cmpw 3, 4 +; CHECK-NO-ISEL-NEXT: bc 4, 2, .LBB0_3 +; CHECK-NO-ISEL-NEXT: # %bb.2: # %entry +; CHECK-NO-ISEL-NEXT: mr 3, 7 ; CHECK-NO-ISEL-NEXT: blr -; CHECK-NO-ISEL-NEXT: .LBB0_2: # %entry -; CHECK-NO-ISEL-NEXT: addi 3, 7, 0 +; CHECK-NO-ISEL-NEXT: .LBB0_3: # %entry +; CHECK-NO-ISEL-NEXT: mr 7, 8 +; CHECK-NO-ISEL-NEXT: mr 3, 7 ; CHECK-NO-ISEL-NEXT: blr entry: %cmp1 = icmp eq i32 %c3, %c4 @@ -51,14 +53,16 @@ define signext i32 @testi32ult(i32 signext %c1, i32 signext %c2, i32 signext %c3 ; CHECK-NO-ISEL-LABEL: testi32ult: ; CHECK-NO-ISEL: # %bb.0: # %entry ; CHECK-NO-ISEL-NEXT: cmpw 5, 6 -; CHECK-NO-ISEL-NEXT: cmpw 1, 3, 4 -; CHECK-NO-ISEL-NEXT: crandc 20, 2, 6 -; CHECK-NO-ISEL-NEXT: bc 12, 20, .LBB1_2 +; CHECK-NO-ISEL-NEXT: bc 4, 2, .LBB1_3 ; CHECK-NO-ISEL-NEXT: # %bb.1: # %entry -; CHECK-NO-ISEL-NEXT: ori 3, 8, 0 +; CHECK-NO-ISEL-NEXT: cmpw 3, 4 +; CHECK-NO-ISEL-NEXT: bc 12, 2, .LBB1_3 +; CHECK-NO-ISEL-NEXT: # %bb.2: # %entry +; CHECK-NO-ISEL-NEXT: mr 3, 7 ; CHECK-NO-ISEL-NEXT: blr -; CHECK-NO-ISEL-NEXT: .LBB1_2: # %entry -; CHECK-NO-ISEL-NEXT: addi 3, 7, 0 +; CHECK-NO-ISEL-NEXT: .LBB1_3: # %entry +; CHECK-NO-ISEL-NEXT: mr 7, 8 +; CHECK-NO-ISEL-NEXT: mr 3, 7 ; CHECK-NO-ISEL-NEXT: blr entry: %cmp1 = icmp eq i32 %c3, %c4 @@ -81,14 +85,14 @@ define signext i32 @testi32sle(i32 signext %c1, i32 signext %c2, i32 signext %c3 ; CHECK-NO-ISEL-LABEL: testi32sle: ; CHECK-NO-ISEL: # %bb.0: # %entry ; CHECK-NO-ISEL-NEXT: cmpw 5, 6 -; CHECK-NO-ISEL-NEXT: cmpw 1, 3, 4 -; CHECK-NO-ISEL-NEXT: crorc 20, 6, 2 -; CHECK-NO-ISEL-NEXT: bc 12, 20, .LBB2_2 +; CHECK-NO-ISEL-NEXT: bc 4, 2, .LBB2_3 ; CHECK-NO-ISEL-NEXT: # %bb.1: # %entry -; CHECK-NO-ISEL-NEXT: ori 3, 8, 0 -; CHECK-NO-ISEL-NEXT: blr -; CHECK-NO-ISEL-NEXT: .LBB2_2: # %entry -; CHECK-NO-ISEL-NEXT: addi 3, 7, 0 +; CHECK-NO-ISEL-NEXT: cmpw 3, 4 +; CHECK-NO-ISEL-NEXT: bc 12, 2, .LBB2_3 +; CHECK-NO-ISEL-NEXT: # %bb.2: # %entry +; CHECK-NO-ISEL-NEXT: mr 7, 8 +; CHECK-NO-ISEL-NEXT: .LBB2_3: # %entry +; CHECK-NO-ISEL-NEXT: mr 3, 7 ; CHECK-NO-ISEL-NEXT: blr entry: %cmp1 = icmp eq i32 %c3, %c4 @@ -111,14 +115,14 @@ define signext i32 @testi32ule(i32 signext %c1, i32 signext %c2, i32 signext %c3 ; CHECK-NO-ISEL-LABEL: testi32ule: ; CHECK-NO-ISEL: # %bb.0: # %entry ; CHECK-NO-ISEL-NEXT: cmpw 5, 6 -; CHECK-NO-ISEL-NEXT: cmpw 1, 3, 4 -; CHECK-NO-ISEL-NEXT: crorc 20, 2, 6 -; CHECK-NO-ISEL-NEXT: bc 12, 20, .LBB3_2 +; CHECK-NO-ISEL-NEXT: bc 12, 2, .LBB3_3 ; CHECK-NO-ISEL-NEXT: # %bb.1: # %entry -; CHECK-NO-ISEL-NEXT: ori 3, 8, 0 -; CHECK-NO-ISEL-NEXT: blr -; CHECK-NO-ISEL-NEXT: .LBB3_2: # %entry -; CHECK-NO-ISEL-NEXT: addi 3, 7, 0 +; CHECK-NO-ISEL-NEXT: cmpw 3, 4 +; CHECK-NO-ISEL-NEXT: bc 4, 2, .LBB3_3 +; CHECK-NO-ISEL-NEXT: # %bb.2: # %entry +; CHECK-NO-ISEL-NEXT: mr 7, 8 +; CHECK-NO-ISEL-NEXT: .LBB3_3: # %entry +; CHECK-NO-ISEL-NEXT: mr 3, 7 ; CHECK-NO-ISEL-NEXT: blr entry: %cmp1 = icmp eq i32 %c3, %c4 @@ -145,10 +149,9 @@ define signext i32 @testi32eq(i32 signext %c1, i32 signext %c2, i32 signext %c3, ; CHECK-NO-ISEL-NEXT: creqv 20, 6, 2 ; CHECK-NO-ISEL-NEXT: bc 12, 20, .LBB4_2 ; CHECK-NO-ISEL-NEXT: # %bb.1: # %entry -; CHECK-NO-ISEL-NEXT: ori 3, 8, 0 -; CHECK-NO-ISEL-NEXT: blr +; CHECK-NO-ISEL-NEXT: mr 7, 8 ; CHECK-NO-ISEL-NEXT: .LBB4_2: # %entry -; CHECK-NO-ISEL-NEXT: addi 3, 7, 0 +; CHECK-NO-ISEL-NEXT: mr 3, 7 ; CHECK-NO-ISEL-NEXT: blr entry: %cmp1 = icmp eq i32 %c3, %c4 @@ -171,14 +174,14 @@ define signext i32 @testi32sge(i32 signext %c1, i32 signext %c2, i32 signext %c3 ; CHECK-NO-ISEL-LABEL: testi32sge: ; CHECK-NO-ISEL: # %bb.0: # %entry ; CHECK-NO-ISEL-NEXT: cmpw 5, 6 -; CHECK-NO-ISEL-NEXT: cmpw 1, 3, 4 -; CHECK-NO-ISEL-NEXT: crorc 20, 2, 6 -; CHECK-NO-ISEL-NEXT: bc 12, 20, .LBB5_2 +; CHECK-NO-ISEL-NEXT: bc 12, 2, .LBB5_3 ; CHECK-NO-ISEL-NEXT: # %bb.1: # %entry -; CHECK-NO-ISEL-NEXT: ori 3, 8, 0 -; CHECK-NO-ISEL-NEXT: blr -; CHECK-NO-ISEL-NEXT: .LBB5_2: # %entry -; CHECK-NO-ISEL-NEXT: addi 3, 7, 0 +; CHECK-NO-ISEL-NEXT: cmpw 3, 4 +; CHECK-NO-ISEL-NEXT: bc 4, 2, .LBB5_3 +; CHECK-NO-ISEL-NEXT: # %bb.2: # %entry +; CHECK-NO-ISEL-NEXT: mr 7, 8 +; CHECK-NO-ISEL-NEXT: .LBB5_3: # %entry +; CHECK-NO-ISEL-NEXT: mr 3, 7 ; CHECK-NO-ISEL-NEXT: blr entry: %cmp1 = icmp eq i32 %c3, %c4 @@ -201,14 +204,14 @@ define signext i32 @testi32uge(i32 signext %c1, i32 signext %c2, i32 signext %c3 ; CHECK-NO-ISEL-LABEL: testi32uge: ; CHECK-NO-ISEL: # %bb.0: # %entry ; CHECK-NO-ISEL-NEXT: cmpw 5, 6 -; CHECK-NO-ISEL-NEXT: cmpw 1, 3, 4 -; CHECK-NO-ISEL-NEXT: crorc 20, 6, 2 -; CHECK-NO-ISEL-NEXT: bc 12, 20, .LBB6_2 +; CHECK-NO-ISEL-NEXT: bc 4, 2, .LBB6_3 ; CHECK-NO-ISEL-NEXT: # %bb.1: # %entry -; CHECK-NO-ISEL-NEXT: ori 3, 8, 0 -; CHECK-NO-ISEL-NEXT: blr -; CHECK-NO-ISEL-NEXT: .LBB6_2: # %entry -; CHECK-NO-ISEL-NEXT: addi 3, 7, 0 +; CHECK-NO-ISEL-NEXT: cmpw 3, 4 +; CHECK-NO-ISEL-NEXT: bc 12, 2, .LBB6_3 +; CHECK-NO-ISEL-NEXT: # %bb.2: # %entry +; CHECK-NO-ISEL-NEXT: mr 7, 8 +; CHECK-NO-ISEL-NEXT: .LBB6_3: # %entry +; CHECK-NO-ISEL-NEXT: mr 3, 7 ; CHECK-NO-ISEL-NEXT: blr entry: %cmp1 = icmp eq i32 %c3, %c4 @@ -231,14 +234,16 @@ define signext i32 @testi32sgt(i32 signext %c1, i32 signext %c2, i32 signext %c3 ; CHECK-NO-ISEL-LABEL: testi32sgt: ; CHECK-NO-ISEL: # %bb.0: # %entry ; CHECK-NO-ISEL-NEXT: cmpw 5, 6 -; CHECK-NO-ISEL-NEXT: cmpw 1, 3, 4 -; CHECK-NO-ISEL-NEXT: crandc 20, 2, 6 -; CHECK-NO-ISEL-NEXT: bc 12, 20, .LBB7_2 +; CHECK-NO-ISEL-NEXT: bc 4, 2, .LBB7_3 ; CHECK-NO-ISEL-NEXT: # %bb.1: # %entry -; CHECK-NO-ISEL-NEXT: ori 3, 8, 0 +; CHECK-NO-ISEL-NEXT: cmpw 3, 4 +; CHECK-NO-ISEL-NEXT: bc 12, 2, .LBB7_3 +; CHECK-NO-ISEL-NEXT: # %bb.2: # %entry +; CHECK-NO-ISEL-NEXT: mr 3, 7 ; CHECK-NO-ISEL-NEXT: blr -; CHECK-NO-ISEL-NEXT: .LBB7_2: # %entry -; CHECK-NO-ISEL-NEXT: addi 3, 7, 0 +; CHECK-NO-ISEL-NEXT: .LBB7_3: # %entry +; CHECK-NO-ISEL-NEXT: mr 7, 8 +; CHECK-NO-ISEL-NEXT: mr 3, 7 ; CHECK-NO-ISEL-NEXT: blr entry: %cmp1 = icmp eq i32 %c3, %c4 @@ -261,14 +266,16 @@ define signext i32 @testi32ugt(i32 signext %c1, i32 signext %c2, i32 signext %c3 ; CHECK-NO-ISEL-LABEL: testi32ugt: ; CHECK-NO-ISEL: # %bb.0: # %entry ; CHECK-NO-ISEL-NEXT: cmpw 5, 6 -; CHECK-NO-ISEL-NEXT: cmpw 1, 3, 4 -; CHECK-NO-ISEL-NEXT: crandc 20, 6, 2 -; CHECK-NO-ISEL-NEXT: bc 12, 20, .LBB8_2 +; CHECK-NO-ISEL-NEXT: bc 12, 2, .LBB8_3 ; CHECK-NO-ISEL-NEXT: # %bb.1: # %entry -; CHECK-NO-ISEL-NEXT: ori 3, 8, 0 +; CHECK-NO-ISEL-NEXT: cmpw 3, 4 +; CHECK-NO-ISEL-NEXT: bc 4, 2, .LBB8_3 +; CHECK-NO-ISEL-NEXT: # %bb.2: # %entry +; CHECK-NO-ISEL-NEXT: mr 3, 7 ; CHECK-NO-ISEL-NEXT: blr -; CHECK-NO-ISEL-NEXT: .LBB8_2: # %entry -; CHECK-NO-ISEL-NEXT: addi 3, 7, 0 +; CHECK-NO-ISEL-NEXT: .LBB8_3: # %entry +; CHECK-NO-ISEL-NEXT: mr 7, 8 +; CHECK-NO-ISEL-NEXT: mr 3, 7 ; CHECK-NO-ISEL-NEXT: blr entry: %cmp1 = icmp eq i32 %c3, %c4 @@ -295,10 +302,9 @@ define signext i32 @testi32ne(i32 signext %c1, i32 signext %c2, i32 signext %c3, ; CHECK-NO-ISEL-NEXT: crxor 20, 6, 2 ; CHECK-NO-ISEL-NEXT: bc 12, 20, .LBB9_2 ; CHECK-NO-ISEL-NEXT: # %bb.1: # %entry -; CHECK-NO-ISEL-NEXT: ori 3, 8, 0 -; CHECK-NO-ISEL-NEXT: blr +; CHECK-NO-ISEL-NEXT: mr 7, 8 ; CHECK-NO-ISEL-NEXT: .LBB9_2: # %entry -; CHECK-NO-ISEL-NEXT: addi 3, 7, 0 +; CHECK-NO-ISEL-NEXT: mr 3, 7 ; CHECK-NO-ISEL-NEXT: blr entry: %cmp1 = icmp eq i32 %c3, %c4 @@ -321,14 +327,16 @@ define i64 @testi64slt(i64 %c1, i64 %c2, i64 %c3, i64 %c4, i64 %a1, i64 %a2) #0 ; CHECK-NO-ISEL-LABEL: testi64slt: ; CHECK-NO-ISEL: # %bb.0: # %entry ; CHECK-NO-ISEL-NEXT: cmpd 5, 6 -; CHECK-NO-ISEL-NEXT: cmpd 1, 3, 4 -; CHECK-NO-ISEL-NEXT: crandc 20, 6, 2 -; CHECK-NO-ISEL-NEXT: bc 12, 20, .LBB10_2 +; CHECK-NO-ISEL-NEXT: bc 12, 2, .LBB10_3 ; CHECK-NO-ISEL-NEXT: # %bb.1: # %entry -; CHECK-NO-ISEL-NEXT: ori 3, 8, 0 +; CHECK-NO-ISEL-NEXT: cmpd 3, 4 +; CHECK-NO-ISEL-NEXT: bc 4, 2, .LBB10_3 +; CHECK-NO-ISEL-NEXT: # %bb.2: # %entry +; CHECK-NO-ISEL-NEXT: mr 3, 7 ; CHECK-NO-ISEL-NEXT: blr -; CHECK-NO-ISEL-NEXT: .LBB10_2: # %entry -; CHECK-NO-ISEL-NEXT: addi 3, 7, 0 +; CHECK-NO-ISEL-NEXT: .LBB10_3: # %entry +; CHECK-NO-ISEL-NEXT: mr 7, 8 +; CHECK-NO-ISEL-NEXT: mr 3, 7 ; CHECK-NO-ISEL-NEXT: blr entry: %cmp1 = icmp eq i64 %c3, %c4 @@ -351,14 +359,16 @@ define i64 @testi64ult(i64 %c1, i64 %c2, i64 %c3, i64 %c4, i64 %a1, i64 %a2) #0 ; CHECK-NO-ISEL-LABEL: testi64ult: ; CHECK-NO-ISEL: # %bb.0: # %entry ; CHECK-NO-ISEL-NEXT: cmpd 5, 6 -; CHECK-NO-ISEL-NEXT: cmpd 1, 3, 4 -; CHECK-NO-ISEL-NEXT: crandc 20, 2, 6 -; CHECK-NO-ISEL-NEXT: bc 12, 20, .LBB11_2 +; CHECK-NO-ISEL-NEXT: bc 4, 2, .LBB11_3 ; CHECK-NO-ISEL-NEXT: # %bb.1: # %entry -; CHECK-NO-ISEL-NEXT: ori 3, 8, 0 +; CHECK-NO-ISEL-NEXT: cmpd 3, 4 +; CHECK-NO-ISEL-NEXT: bc 12, 2, .LBB11_3 +; CHECK-NO-ISEL-NEXT: # %bb.2: # %entry +; CHECK-NO-ISEL-NEXT: mr 3, 7 ; CHECK-NO-ISEL-NEXT: blr -; CHECK-NO-ISEL-NEXT: .LBB11_2: # %entry -; CHECK-NO-ISEL-NEXT: addi 3, 7, 0 +; CHECK-NO-ISEL-NEXT: .LBB11_3: # %entry +; CHECK-NO-ISEL-NEXT: mr 7, 8 +; CHECK-NO-ISEL-NEXT: mr 3, 7 ; CHECK-NO-ISEL-NEXT: blr entry: %cmp1 = icmp eq i64 %c3, %c4 @@ -381,14 +391,14 @@ define i64 @testi64sle(i64 %c1, i64 %c2, i64 %c3, i64 %c4, i64 %a1, i64 %a2) #0 ; CHECK-NO-ISEL-LABEL: testi64sle: ; CHECK-NO-ISEL: # %bb.0: # %entry ; CHECK-NO-ISEL-NEXT: cmpd 5, 6 -; CHECK-NO-ISEL-NEXT: cmpd 1, 3, 4 -; CHECK-NO-ISEL-NEXT: crorc 20, 6, 2 -; CHECK-NO-ISEL-NEXT: bc 12, 20, .LBB12_2 +; CHECK-NO-ISEL-NEXT: bc 4, 2, .LBB12_3 ; CHECK-NO-ISEL-NEXT: # %bb.1: # %entry -; CHECK-NO-ISEL-NEXT: ori 3, 8, 0 -; CHECK-NO-ISEL-NEXT: blr -; CHECK-NO-ISEL-NEXT: .LBB12_2: # %entry -; CHECK-NO-ISEL-NEXT: addi 3, 7, 0 +; CHECK-NO-ISEL-NEXT: cmpd 3, 4 +; CHECK-NO-ISEL-NEXT: bc 12, 2, .LBB12_3 +; CHECK-NO-ISEL-NEXT: # %bb.2: # %entry +; CHECK-NO-ISEL-NEXT: mr 7, 8 +; CHECK-NO-ISEL-NEXT: .LBB12_3: # %entry +; CHECK-NO-ISEL-NEXT: mr 3, 7 ; CHECK-NO-ISEL-NEXT: blr entry: %cmp1 = icmp eq i64 %c3, %c4 @@ -411,14 +421,14 @@ define i64 @testi64ule(i64 %c1, i64 %c2, i64 %c3, i64 %c4, i64 %a1, i64 %a2) #0 ; CHECK-NO-ISEL-LABEL: testi64ule: ; CHECK-NO-ISEL: # %bb.0: # %entry ; CHECK-NO-ISEL-NEXT: cmpd 5, 6 -; CHECK-NO-ISEL-NEXT: cmpd 1, 3, 4 -; CHECK-NO-ISEL-NEXT: crorc 20, 2, 6 -; CHECK-NO-ISEL-NEXT: bc 12, 20, .LBB13_2 +; CHECK-NO-ISEL-NEXT: bc 12, 2, .LBB13_3 ; CHECK-NO-ISEL-NEXT: # %bb.1: # %entry -; CHECK-NO-ISEL-NEXT: ori 3, 8, 0 -; CHECK-NO-ISEL-NEXT: blr -; CHECK-NO-ISEL-NEXT: .LBB13_2: # %entry -; CHECK-NO-ISEL-NEXT: addi 3, 7, 0 +; CHECK-NO-ISEL-NEXT: cmpd 3, 4 +; CHECK-NO-ISEL-NEXT: bc 4, 2, .LBB13_3 +; CHECK-NO-ISEL-NEXT: # %bb.2: # %entry +; CHECK-NO-ISEL-NEXT: mr 7, 8 +; CHECK-NO-ISEL-NEXT: .LBB13_3: # %entry +; CHECK-NO-ISEL-NEXT: mr 3, 7 ; CHECK-NO-ISEL-NEXT: blr entry: %cmp1 = icmp eq i64 %c3, %c4 @@ -445,10 +455,9 @@ define i64 @testi64eq(i64 %c1, i64 %c2, i64 %c3, i64 %c4, i64 %a1, i64 %a2) #0 { ; CHECK-NO-ISEL-NEXT: creqv 20, 6, 2 ; CHECK-NO-ISEL-NEXT: bc 12, 20, .LBB14_2 ; CHECK-NO-ISEL-NEXT: # %bb.1: # %entry -; CHECK-NO-ISEL-NEXT: ori 3, 8, 0 -; CHECK-NO-ISEL-NEXT: blr +; CHECK-NO-ISEL-NEXT: mr 7, 8 ; CHECK-NO-ISEL-NEXT: .LBB14_2: # %entry -; CHECK-NO-ISEL-NEXT: addi 3, 7, 0 +; CHECK-NO-ISEL-NEXT: mr 3, 7 ; CHECK-NO-ISEL-NEXT: blr entry: %cmp1 = icmp eq i64 %c3, %c4 @@ -471,14 +480,14 @@ define i64 @testi64sge(i64 %c1, i64 %c2, i64 %c3, i64 %c4, i64 %a1, i64 %a2) #0 ; CHECK-NO-ISEL-LABEL: testi64sge: ; CHECK-NO-ISEL: # %bb.0: # %entry ; CHECK-NO-ISEL-NEXT: cmpd 5, 6 -; CHECK-NO-ISEL-NEXT: cmpd 1, 3, 4 -; CHECK-NO-ISEL-NEXT: crorc 20, 2, 6 -; CHECK-NO-ISEL-NEXT: bc 12, 20, .LBB15_2 +; CHECK-NO-ISEL-NEXT: bc 12, 2, .LBB15_3 ; CHECK-NO-ISEL-NEXT: # %bb.1: # %entry -; CHECK-NO-ISEL-NEXT: ori 3, 8, 0 -; CHECK-NO-ISEL-NEXT: blr -; CHECK-NO-ISEL-NEXT: .LBB15_2: # %entry -; CHECK-NO-ISEL-NEXT: addi 3, 7, 0 +; CHECK-NO-ISEL-NEXT: cmpd 3, 4 +; CHECK-NO-ISEL-NEXT: bc 4, 2, .LBB15_3 +; CHECK-NO-ISEL-NEXT: # %bb.2: # %entry +; CHECK-NO-ISEL-NEXT: mr 7, 8 +; CHECK-NO-ISEL-NEXT: .LBB15_3: # %entry +; CHECK-NO-ISEL-NEXT: mr 3, 7 ; CHECK-NO-ISEL-NEXT: blr entry: %cmp1 = icmp eq i64 %c3, %c4 @@ -501,14 +510,14 @@ define i64 @testi64uge(i64 %c1, i64 %c2, i64 %c3, i64 %c4, i64 %a1, i64 %a2) #0 ; CHECK-NO-ISEL-LABEL: testi64uge: ; CHECK-NO-ISEL: # %bb.0: # %entry ; CHECK-NO-ISEL-NEXT: cmpd 5, 6 -; CHECK-NO-ISEL-NEXT: cmpd 1, 3, 4 -; CHECK-NO-ISEL-NEXT: crorc 20, 6, 2 -; CHECK-NO-ISEL-NEXT: bc 12, 20, .LBB16_2 +; CHECK-NO-ISEL-NEXT: bc 4, 2, .LBB16_3 ; CHECK-NO-ISEL-NEXT: # %bb.1: # %entry -; CHECK-NO-ISEL-NEXT: ori 3, 8, 0 -; CHECK-NO-ISEL-NEXT: blr -; CHECK-NO-ISEL-NEXT: .LBB16_2: # %entry -; CHECK-NO-ISEL-NEXT: addi 3, 7, 0 +; CHECK-NO-ISEL-NEXT: cmpd 3, 4 +; CHECK-NO-ISEL-NEXT: bc 12, 2, .LBB16_3 +; CHECK-NO-ISEL-NEXT: # %bb.2: # %entry +; CHECK-NO-ISEL-NEXT: mr 7, 8 +; CHECK-NO-ISEL-NEXT: .LBB16_3: # %entry +; CHECK-NO-ISEL-NEXT: mr 3, 7 ; CHECK-NO-ISEL-NEXT: blr entry: %cmp1 = icmp eq i64 %c3, %c4 @@ -531,14 +540,16 @@ define i64 @testi64sgt(i64 %c1, i64 %c2, i64 %c3, i64 %c4, i64 %a1, i64 %a2) #0 ; CHECK-NO-ISEL-LABEL: testi64sgt: ; CHECK-NO-ISEL: # %bb.0: # %entry ; CHECK-NO-ISEL-NEXT: cmpd 5, 6 -; CHECK-NO-ISEL-NEXT: cmpd 1, 3, 4 -; CHECK-NO-ISEL-NEXT: crandc 20, 2, 6 -; CHECK-NO-ISEL-NEXT: bc 12, 20, .LBB17_2 +; CHECK-NO-ISEL-NEXT: bc 4, 2, .LBB17_3 ; CHECK-NO-ISEL-NEXT: # %bb.1: # %entry -; CHECK-NO-ISEL-NEXT: ori 3, 8, 0 +; CHECK-NO-ISEL-NEXT: cmpd 3, 4 +; CHECK-NO-ISEL-NEXT: bc 12, 2, .LBB17_3 +; CHECK-NO-ISEL-NEXT: # %bb.2: # %entry +; CHECK-NO-ISEL-NEXT: mr 3, 7 ; CHECK-NO-ISEL-NEXT: blr -; CHECK-NO-ISEL-NEXT: .LBB17_2: # %entry -; CHECK-NO-ISEL-NEXT: addi 3, 7, 0 +; CHECK-NO-ISEL-NEXT: .LBB17_3: # %entry +; CHECK-NO-ISEL-NEXT: mr 7, 8 +; CHECK-NO-ISEL-NEXT: mr 3, 7 ; CHECK-NO-ISEL-NEXT: blr entry: %cmp1 = icmp eq i64 %c3, %c4 @@ -561,14 +572,16 @@ define i64 @testi64ugt(i64 %c1, i64 %c2, i64 %c3, i64 %c4, i64 %a1, i64 %a2) #0 ; CHECK-NO-ISEL-LABEL: testi64ugt: ; CHECK-NO-ISEL: # %bb.0: # %entry ; CHECK-NO-ISEL-NEXT: cmpd 5, 6 -; CHECK-NO-ISEL-NEXT: cmpd 1, 3, 4 -; CHECK-NO-ISEL-NEXT: crandc 20, 6, 2 -; CHECK-NO-ISEL-NEXT: bc 12, 20, .LBB18_2 +; CHECK-NO-ISEL-NEXT: bc 12, 2, .LBB18_3 ; CHECK-NO-ISEL-NEXT: # %bb.1: # %entry -; CHECK-NO-ISEL-NEXT: ori 3, 8, 0 +; CHECK-NO-ISEL-NEXT: cmpd 3, 4 +; CHECK-NO-ISEL-NEXT: bc 4, 2, .LBB18_3 +; CHECK-NO-ISEL-NEXT: # %bb.2: # %entry +; CHECK-NO-ISEL-NEXT: mr 3, 7 ; CHECK-NO-ISEL-NEXT: blr -; CHECK-NO-ISEL-NEXT: .LBB18_2: # %entry -; CHECK-NO-ISEL-NEXT: addi 3, 7, 0 +; CHECK-NO-ISEL-NEXT: .LBB18_3: # %entry +; CHECK-NO-ISEL-NEXT: mr 7, 8 +; CHECK-NO-ISEL-NEXT: mr 3, 7 ; CHECK-NO-ISEL-NEXT: blr entry: %cmp1 = icmp eq i64 %c3, %c4 @@ -595,10 +608,9 @@ define i64 @testi64ne(i64 %c1, i64 %c2, i64 %c3, i64 %c4, i64 %a1, i64 %a2) #0 { ; CHECK-NO-ISEL-NEXT: crxor 20, 6, 2 ; CHECK-NO-ISEL-NEXT: bc 12, 20, .LBB19_2 ; CHECK-NO-ISEL-NEXT: # %bb.1: # %entry -; CHECK-NO-ISEL-NEXT: ori 3, 8, 0 -; CHECK-NO-ISEL-NEXT: blr +; CHECK-NO-ISEL-NEXT: mr 7, 8 ; CHECK-NO-ISEL-NEXT: .LBB19_2: # %entry -; CHECK-NO-ISEL-NEXT: addi 3, 7, 0 +; CHECK-NO-ISEL-NEXT: mr 3, 7 ; CHECK-NO-ISEL-NEXT: blr entry: %cmp1 = icmp eq i64 %c3, %c4 diff --git a/llvm/test/CodeGen/PowerPC/subreg-postra-2.ll b/llvm/test/CodeGen/PowerPC/subreg-postra-2.ll index f952c0e2d8fba..f696745c9d414 100644 --- a/llvm/test/CodeGen/PowerPC/subreg-postra-2.ll +++ b/llvm/test/CodeGen/PowerPC/subreg-postra-2.ll @@ -1,10 +1,89 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ; RUN: llc -verify-machineinstrs -mcpu=pwr7 -ppc-gep-opt=0 < %s | FileCheck %s -; RUN: llc -verify-machineinstrs -mcpu=pwr7 -ppc-gen-isel=false -ppc-gep-opt=0 < %s | FileCheck --check-prefix=CHECK-NO-ISEL %s +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=-isel -ppc-gep-opt=0 < %s | FileCheck --check-prefix=CHECK-NO-ISEL %s target datalayout = "E-m:e-i64:64-n32:64" target triple = "powerpc64-unknown-linux-gnu" ; Function Attrs: nounwind define void @jbd2_journal_commit_transaction(i32 %input1, ptr %input2, ptr %input3, ptr %input4) #0 { +; CHECK-LABEL: jbd2_journal_commit_transaction: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi 7, 3, 1 +; CHECK-NEXT: cmplwi 1, 3, 0 +; CHECK-NEXT: li 8, -5 +; CHECK-NEXT: lis 9, 4 +; CHECK-NEXT: cmpld 6, 4, 5 +; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: .LBB0_1: # %while.body392 +; CHECK-NEXT: # +; CHECK-NEXT: bne- 1, .LBB0_4 +; CHECK-NEXT: # %bb.2: # %wait_on_buffer.exit1319 +; CHECK-NEXT: # +; CHECK-NEXT: ld 4, 0(6) +; CHECK-NEXT: mr 5, 4 +; CHECK-NEXT: ldu 10, -72(5) +; CHECK-NEXT: andi. 10, 10, 1 +; CHECK-NEXT: crmove 20, 1 +; CHECK-NEXT: #APP +; CHECK-NEXT: .Ltmp0: +; CHECK-NEXT: .long 2101356712 +; CHECK-NEXT: andc 10, 10, 9 +; CHECK-NEXT: stdcx. 10, 0, 5 +; CHECK-NEXT: bne- 0, .Ltmp0 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: std 4, 0(6) +; CHECK-NEXT: bne+ 6, .LBB0_1 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: isel 7, 3, 8, 20 +; CHECK-NEXT: .LBB0_4: # %while.end418 +; CHECK-NEXT: cmplwi 7, 0 +; CHECK-NEXT: beq 0, .LBB0_6 +; CHECK-NEXT: # %bb.5: # %if.then420 +; CHECK-NEXT: .LBB0_6: # %if.end421 +; +; CHECK-NO-ISEL-LABEL: jbd2_journal_commit_transaction: +; CHECK-NO-ISEL: # %bb.0: # %entry +; CHECK-NO-ISEL-NEXT: addi 7, 3, 1 +; CHECK-NO-ISEL-NEXT: cmplwi 1, 3, 0 +; CHECK-NO-ISEL-NEXT: lis 8, 4 +; CHECK-NO-ISEL-NEXT: cmpld 5, 4, 5 +; CHECK-NO-ISEL-NEXT: b .LBB0_2 +; CHECK-NO-ISEL-NEXT: .p2align 4 +; CHECK-NO-ISEL-NEXT: .LBB0_1: # %wait_on_buffer.exit1319 +; CHECK-NO-ISEL-NEXT: # +; CHECK-NO-ISEL-NEXT: #APP +; CHECK-NO-ISEL-NEXT: .Ltmp0: +; CHECK-NO-ISEL-NEXT: .long 2101364904 +; CHECK-NO-ISEL-NEXT: andc 10, 10, 8 +; CHECK-NO-ISEL-NEXT: stdcx. 10, 0, 9 +; CHECK-NO-ISEL-NEXT: bne- 0, .Ltmp0 +; CHECK-NO-ISEL-EMPTY: +; CHECK-NO-ISEL-NEXT: #NO_APP +; CHECK-NO-ISEL-NEXT: std 5, 0(6) +; CHECK-NO-ISEL-NEXT: beq- 5, .LBB0_6 +; CHECK-NO-ISEL-NEXT: .LBB0_2: # %while.body392 +; CHECK-NO-ISEL-NEXT: # +; CHECK-NO-ISEL-NEXT: bne- 1, .LBB0_5 +; CHECK-NO-ISEL-NEXT: # %bb.3: # %wait_on_buffer.exit1319 +; CHECK-NO-ISEL-NEXT: # +; CHECK-NO-ISEL-NEXT: ld 5, 0(6) +; CHECK-NO-ISEL-NEXT: mr 9, 5 +; CHECK-NO-ISEL-NEXT: ldu 4, -72(9) +; CHECK-NO-ISEL-NEXT: andi. 4, 4, 1 +; CHECK-NO-ISEL-NEXT: mr 4, 3 +; CHECK-NO-ISEL-NEXT: bc 12, 1, .LBB0_1 +; CHECK-NO-ISEL-NEXT: # %bb.4: # %wait_on_buffer.exit1319 +; CHECK-NO-ISEL-NEXT: # +; CHECK-NO-ISEL-NEXT: li 4, -5 +; CHECK-NO-ISEL-NEXT: b .LBB0_1 +; CHECK-NO-ISEL-NEXT: .LBB0_5: +; CHECK-NO-ISEL-NEXT: mr 4, 7 +; CHECK-NO-ISEL-NEXT: .LBB0_6: # %while.end418 +; CHECK-NO-ISEL-NEXT: cmplwi 4, 0 +; CHECK-NO-ISEL-NEXT: beq 0, .LBB0_8 +; CHECK-NO-ISEL-NEXT: # %bb.7: # %if.then420 +; CHECK-NO-ISEL-NEXT: .LBB0_8: # %if.end421 entry: br label %while.body392 @@ -30,17 +109,6 @@ while.end418: ; preds = %wait_on_buffer.exit %tobool419 = icmp eq i32 %err.4.lcssa, 0 br i1 %tobool419, label %if.end421, label %if.then420 -; CHECK-LABEL: @jbd2_journal_commit_transaction -; CHECK-NO-ISEL-LABEL: @jbd2_journal_commit_transaction -; CHECK: andi. -; CHECK: crmove [[REG:[0-9]+]], 1 -; CHECK: stdcx. -; CHECK: isel {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}, [[REG]] -; CHECK-NO-ISEL: bc 12, 20, [[TRUE:.LBB[0-9]+]] -; CHECK-NO-ISEL: ori 7, 8, 0 -; CHECK-NO-ISEL-NEXT: b [[SUCCESSOR:.LBB[0-9]+]] -; CHECK-NO-ISEL: [[TRUE]] -; CHECK-NO-ISEL: addi 7, 3, 0 if.then420: ; preds = %while.end418 unreachable diff --git a/llvm/test/CodeGen/PowerPC/subreg-postra.ll b/llvm/test/CodeGen/PowerPC/subreg-postra.ll index 32a1b85cac8f5..a315da545ba0f 100644 --- a/llvm/test/CodeGen/PowerPC/subreg-postra.ll +++ b/llvm/test/CodeGen/PowerPC/subreg-postra.ll @@ -1,10 +1,232 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ; RUN: llc -verify-machineinstrs -mcpu=pwr7 < %s | FileCheck %s -; RUN: llc -verify-machineinstrs -mcpu=pwr7 -ppc-gen-isel=false < %s | FileCheck --check-prefix=CHECK-NO-ISEL %s +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=-isel < %s | FileCheck --check-prefix=CHECK-NO-ISEL %s target datalayout = "E-m:e-i64:64-n32:64" target triple = "powerpc64-unknown-linux-gnu" ; Function Attrs: nounwind define void @jbd2_journal_commit_transaction(ptr %journal, i64 %inp1, i32 %inp2, +; CHECK-LABEL: jbd2_journal_commit_transaction: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mfcr 12 +; CHECK-NEXT: mflr 0 +; CHECK-NEXT: stw 12, 8(1) +; CHECK-NEXT: stdu 1, -176(1) +; CHECK-NEXT: lbz 6, 295(1) +; CHECK-NEXT: std 0, 192(1) +; CHECK-NEXT: andi. 6, 6, 1 +; CHECK-NEXT: std 25, 120(1) # 8-byte Folded Spill +; CHECK-NEXT: std 26, 128(1) # 8-byte Folded Spill +; CHECK-NEXT: std 27, 136(1) # 8-byte Folded Spill +; CHECK-NEXT: std 28, 144(1) # 8-byte Folded Spill +; CHECK-NEXT: std 29, 152(1) # 8-byte Folded Spill +; CHECK-NEXT: std 30, 160(1) # 8-byte Folded Spill +; CHECK-NEXT: crmove 9, 1 +; CHECK-NEXT: andi. 6, 10, 1 +; CHECK-NEXT: crmove 8, 1 +; CHECK-NEXT: andi. 6, 9, 1 +; CHECK-NEXT: bc 4, 20, .LBB0_24 +; CHECK-NEXT: # %bb.1: # %do.body +; CHECK-NEXT: bc 4, 20, .LBB0_25 +; CHECK-NEXT: # %bb.2: # %trace_jbd2_start_commit.exit +; CHECK-NEXT: mr 30, 8 +; CHECK-NEXT: mr 29, 7 +; CHECK-NEXT: bc 12, 20, .LBB0_4 +; CHECK-NEXT: # %bb.3: # %do.body.i1116 +; CHECK-NEXT: bc 4, 20, .LBB0_26 +; CHECK-NEXT: .LBB0_4: # %trace_jbd2_commit_locking.exit +; CHECK-NEXT: bc 4, 20, .LBB0_27 +; CHECK-NEXT: # %bb.5: # %spin_unlock.exit1146 +; CHECK-NEXT: bc 4, 20, .LBB0_28 +; CHECK-NEXT: # %bb.6: # %trace_jbd2_commit_flushing.exit +; CHECK-NEXT: bc 4, 20, .LBB0_29 +; CHECK-NEXT: # %bb.7: # %for.end.i +; CHECK-NEXT: bc 4, 20, .LBB0_31 +; CHECK-NEXT: # %bb.8: # %journal_submit_data_buffers.exit +; CHECK-NEXT: bc 4, 20, .LBB0_32 +; CHECK-NEXT: # %bb.9: # %if.end103 +; CHECK-NEXT: bc 4, 20, .LBB0_33 +; CHECK-NEXT: # %bb.10: # %trace_jbd2_commit_logging.exit +; CHECK-NEXT: bc 4, 20, .LBB0_34 +; CHECK-NEXT: # %bb.11: # %for.end.i1287 +; CHECK-NEXT: bc 4, 20, .LBB0_35 +; CHECK-NEXT: # %bb.12: # %journal_finish_inode_data_buffers.exit +; CHECK-NEXT: bc 4, 20, .LBB0_36 +; CHECK-NEXT: # %bb.13: # %if.end256 +; CHECK-NEXT: cmpdi 1, 4, 0 +; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: .LBB0_14: # %while.body318 +; CHECK-NEXT: # +; CHECK-NEXT: bc 4, 6, .LBB0_19 +; CHECK-NEXT: # %bb.15: # %wait_on_buffer.exit +; CHECK-NEXT: # +; CHECK-NEXT: bc 4, 1, .LBB0_14 +; CHECK-NEXT: # %bb.16: # %do.body378 +; CHECK-NEXT: bc 4, 8, .LBB0_20 +; CHECK-NEXT: # %bb.17: # %while.end418 +; CHECK-NEXT: bc 4, 8, .LBB0_23 +; CHECK-NEXT: .LBB0_18: # %if.end421 +; CHECK-NEXT: .LBB0_19: # %if.then.i1296 +; CHECK-NEXT: .LBB0_20: # %while.body392.lr.ph +; CHECK-NEXT: lis 26, 4 +; CHECK-NEXT: mr 27, 5 +; CHECK-NEXT: mr 28, 3 +; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: .LBB0_21: # %while.body392 +; CHECK-NEXT: # +; CHECK-NEXT: ld 3, 0(3) +; CHECK-NEXT: ldu 25, -72(3) +; CHECK-NEXT: #APP +; CHECK-NEXT: .Ltmp0: +; CHECK-NEXT: .long 2088769704 +; CHECK-NEXT: andc 4, 4, 26 +; CHECK-NEXT: stdcx. 4, 0, 3 +; CHECK-NEXT: bne- 0, .Ltmp0 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: ld 3, 0(29) +; CHECK-NEXT: std 3, 0(30) +; CHECK-NEXT: bl __brelse +; CHECK-NEXT: nop +; CHECK-NEXT: bc 4, 9, .LBB0_21 +; CHECK-NEXT: # %bb.22: # %while.end418.loopexit +; CHECK-NEXT: andi. 3, 25, 1 +; CHECK-NEXT: li 3, -5 +; CHECK-NEXT: mr 5, 27 +; CHECK-NEXT: iselgt 5, 5, 3 +; CHECK-NEXT: mr 3, 28 +; CHECK-NEXT: bc 12, 8, .LBB0_18 +; CHECK-NEXT: .LBB0_23: # %if.then420 +; CHECK-NEXT: extsw 4, 5 +; CHECK-NEXT: bl jbd2_journal_abort +; CHECK-NEXT: nop +; CHECK-NEXT: .LBB0_24: # %if.then5 +; CHECK-NEXT: .LBB0_25: # %do.body.i +; CHECK-NEXT: .LBB0_26: # %do.body5.i1122 +; CHECK-NEXT: .LBB0_27: # %if.then.i.i.i.i1144 +; CHECK-NEXT: .LBB0_28: # %do.body.i1159 +; CHECK-NEXT: .LBB0_29: # %for.body.lr.ph.i +; CHECK-NEXT: bc 4, 20, .LBB0_37 +; CHECK-NEXT: # %bb.30: # %spin_unlock.exit.i +; CHECK-NEXT: .LBB0_31: # %if.then.i.i.i.i31.i +; CHECK-NEXT: .LBB0_32: # %if.then102 +; CHECK-NEXT: .LBB0_33: # %do.body.i1182 +; CHECK-NEXT: .LBB0_34: # %for.body.i1277 +; CHECK-NEXT: .LBB0_35: # %if.then.i.i.i.i84.i +; CHECK-NEXT: .LBB0_36: # %if.then249 +; CHECK-NEXT: .LBB0_37: # %if.then.i.i.i.i.i +; +; CHECK-NO-ISEL-LABEL: jbd2_journal_commit_transaction: +; CHECK-NO-ISEL: # %bb.0: # %entry +; CHECK-NO-ISEL-NEXT: mfcr 12 +; CHECK-NO-ISEL-NEXT: mflr 0 +; CHECK-NO-ISEL-NEXT: stw 12, 8(1) +; CHECK-NO-ISEL-NEXT: stdu 1, -176(1) +; CHECK-NO-ISEL-NEXT: lbz 6, 295(1) +; CHECK-NO-ISEL-NEXT: std 0, 192(1) +; CHECK-NO-ISEL-NEXT: andi. 6, 6, 1 +; CHECK-NO-ISEL-NEXT: std 25, 120(1) # 8-byte Folded Spill +; CHECK-NO-ISEL-NEXT: std 26, 128(1) # 8-byte Folded Spill +; CHECK-NO-ISEL-NEXT: std 27, 136(1) # 8-byte Folded Spill +; CHECK-NO-ISEL-NEXT: std 28, 144(1) # 8-byte Folded Spill +; CHECK-NO-ISEL-NEXT: std 29, 152(1) # 8-byte Folded Spill +; CHECK-NO-ISEL-NEXT: std 30, 160(1) # 8-byte Folded Spill +; CHECK-NO-ISEL-NEXT: crmove 9, 1 +; CHECK-NO-ISEL-NEXT: andi. 6, 10, 1 +; CHECK-NO-ISEL-NEXT: crmove 8, 1 +; CHECK-NO-ISEL-NEXT: andi. 6, 9, 1 +; CHECK-NO-ISEL-NEXT: bc 4, 20, .LBB0_26 +; CHECK-NO-ISEL-NEXT: # %bb.1: # %do.body +; CHECK-NO-ISEL-NEXT: bc 4, 20, .LBB0_27 +; CHECK-NO-ISEL-NEXT: # %bb.2: # %trace_jbd2_start_commit.exit +; CHECK-NO-ISEL-NEXT: mr 30, 8 +; CHECK-NO-ISEL-NEXT: mr 29, 7 +; CHECK-NO-ISEL-NEXT: bc 12, 20, .LBB0_4 +; CHECK-NO-ISEL-NEXT: # %bb.3: # %do.body.i1116 +; CHECK-NO-ISEL-NEXT: bc 4, 20, .LBB0_28 +; CHECK-NO-ISEL-NEXT: .LBB0_4: # %trace_jbd2_commit_locking.exit +; CHECK-NO-ISEL-NEXT: bc 4, 20, .LBB0_29 +; CHECK-NO-ISEL-NEXT: # %bb.5: # %spin_unlock.exit1146 +; CHECK-NO-ISEL-NEXT: bc 4, 20, .LBB0_30 +; CHECK-NO-ISEL-NEXT: # %bb.6: # %trace_jbd2_commit_flushing.exit +; CHECK-NO-ISEL-NEXT: bc 4, 20, .LBB0_31 +; CHECK-NO-ISEL-NEXT: # %bb.7: # %for.end.i +; CHECK-NO-ISEL-NEXT: bc 4, 20, .LBB0_33 +; CHECK-NO-ISEL-NEXT: # %bb.8: # %journal_submit_data_buffers.exit +; CHECK-NO-ISEL-NEXT: bc 4, 20, .LBB0_34 +; CHECK-NO-ISEL-NEXT: # %bb.9: # %if.end103 +; CHECK-NO-ISEL-NEXT: bc 4, 20, .LBB0_35 +; CHECK-NO-ISEL-NEXT: # %bb.10: # %trace_jbd2_commit_logging.exit +; CHECK-NO-ISEL-NEXT: bc 4, 20, .LBB0_36 +; CHECK-NO-ISEL-NEXT: # %bb.11: # %for.end.i1287 +; CHECK-NO-ISEL-NEXT: bc 4, 20, .LBB0_37 +; CHECK-NO-ISEL-NEXT: # %bb.12: # %journal_finish_inode_data_buffers.exit +; CHECK-NO-ISEL-NEXT: bc 4, 20, .LBB0_38 +; CHECK-NO-ISEL-NEXT: # %bb.13: # %if.end256 +; CHECK-NO-ISEL-NEXT: cmpdi 1, 4, 0 +; CHECK-NO-ISEL-NEXT: .p2align 4 +; CHECK-NO-ISEL-NEXT: .LBB0_14: # %while.body318 +; CHECK-NO-ISEL-NEXT: # +; CHECK-NO-ISEL-NEXT: bc 4, 6, .LBB0_19 +; CHECK-NO-ISEL-NEXT: # %bb.15: # %wait_on_buffer.exit +; CHECK-NO-ISEL-NEXT: # +; CHECK-NO-ISEL-NEXT: bc 4, 1, .LBB0_14 +; CHECK-NO-ISEL-NEXT: # %bb.16: # %do.body378 +; CHECK-NO-ISEL-NEXT: bc 4, 8, .LBB0_20 +; CHECK-NO-ISEL-NEXT: # %bb.17: # %while.end418 +; CHECK-NO-ISEL-NEXT: bc 4, 8, .LBB0_25 +; CHECK-NO-ISEL-NEXT: .LBB0_18: # %if.end421 +; CHECK-NO-ISEL-NEXT: .LBB0_19: # %if.then.i1296 +; CHECK-NO-ISEL-NEXT: .LBB0_20: # %while.body392.lr.ph +; CHECK-NO-ISEL-NEXT: lis 26, 4 +; CHECK-NO-ISEL-NEXT: mr 27, 5 +; CHECK-NO-ISEL-NEXT: mr 28, 3 +; CHECK-NO-ISEL-NEXT: .p2align 4 +; CHECK-NO-ISEL-NEXT: .LBB0_21: # %while.body392 +; CHECK-NO-ISEL-NEXT: # +; CHECK-NO-ISEL-NEXT: ld 3, 0(3) +; CHECK-NO-ISEL-NEXT: ldu 25, -72(3) +; CHECK-NO-ISEL-NEXT: #APP +; CHECK-NO-ISEL-NEXT: .Ltmp0: +; CHECK-NO-ISEL-NEXT: .long 2088769704 +; CHECK-NO-ISEL-NEXT: andc 4, 4, 26 +; CHECK-NO-ISEL-NEXT: stdcx. 4, 0, 3 +; CHECK-NO-ISEL-NEXT: bne- 0, .Ltmp0 +; CHECK-NO-ISEL-EMPTY: +; CHECK-NO-ISEL-NEXT: #NO_APP +; CHECK-NO-ISEL-NEXT: ld 3, 0(29) +; CHECK-NO-ISEL-NEXT: std 3, 0(30) +; CHECK-NO-ISEL-NEXT: bl __brelse +; CHECK-NO-ISEL-NEXT: nop +; CHECK-NO-ISEL-NEXT: bc 4, 9, .LBB0_21 +; CHECK-NO-ISEL-NEXT: # %bb.22: # %while.end418.loopexit +; CHECK-NO-ISEL-NEXT: andi. 3, 25, 1 +; CHECK-NO-ISEL-NEXT: mr 5, 27 +; CHECK-NO-ISEL-NEXT: bc 12, 1, .LBB0_24 +; CHECK-NO-ISEL-NEXT: # %bb.23: # %while.end418.loopexit +; CHECK-NO-ISEL-NEXT: li 5, -5 +; CHECK-NO-ISEL-NEXT: .LBB0_24: # %while.end418.loopexit +; CHECK-NO-ISEL-NEXT: mr 3, 28 +; CHECK-NO-ISEL-NEXT: bc 12, 8, .LBB0_18 +; CHECK-NO-ISEL-NEXT: .LBB0_25: # %if.then420 +; CHECK-NO-ISEL-NEXT: extsw 4, 5 +; CHECK-NO-ISEL-NEXT: bl jbd2_journal_abort +; CHECK-NO-ISEL-NEXT: nop +; CHECK-NO-ISEL-NEXT: .LBB0_26: # %if.then5 +; CHECK-NO-ISEL-NEXT: .LBB0_27: # %do.body.i +; CHECK-NO-ISEL-NEXT: .LBB0_28: # %do.body5.i1122 +; CHECK-NO-ISEL-NEXT: .LBB0_29: # %if.then.i.i.i.i1144 +; CHECK-NO-ISEL-NEXT: .LBB0_30: # %do.body.i1159 +; CHECK-NO-ISEL-NEXT: .LBB0_31: # %for.body.lr.ph.i +; CHECK-NO-ISEL-NEXT: bc 4, 20, .LBB0_39 +; CHECK-NO-ISEL-NEXT: # %bb.32: # %spin_unlock.exit.i +; CHECK-NO-ISEL-NEXT: .LBB0_33: # %if.then.i.i.i.i31.i +; CHECK-NO-ISEL-NEXT: .LBB0_34: # %if.then102 +; CHECK-NO-ISEL-NEXT: .LBB0_35: # %do.body.i1182 +; CHECK-NO-ISEL-NEXT: .LBB0_36: # %for.body.i1277 +; CHECK-NO-ISEL-NEXT: .LBB0_37: # %if.then.i.i.i.i84.i +; CHECK-NO-ISEL-NEXT: .LBB0_38: # %if.then249 +; CHECK-NO-ISEL-NEXT: .LBB0_39: # %if.then.i.i.i.i.i ptr %inp3, ptr %inp4, ptr %inp5, i1 %inp6, i1 %inp7, i1 %inp8) #0 { @@ -144,15 +366,6 @@ wait_on_buffer.exit1319: ; preds = %while.body392 call void @__brelse(ptr %3) #1 br i1 %inp8, label %while.end418, label %while.body392 -; CHECK-LABEL: @jbd2_journal_commit_transaction -; CHECK-NO-ISEL-LABEL: @jbd2_journal_commit_transaction -; CHECK: andi. -; CHECK: crmove -; CHECK: stdcx. -; CHECK: iselgt {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} -; CHECK-NO-ISEL: bc 12, 1, [[TRUE:.LBB[0-9]+]] -; CHECK-NO-ISEL: ori 5, 3, 0 -; CHECK-NO-ISEL: b [[SUCCESSOR:.LBB[0-9]+]] while.end418: ; preds = %wait_on_buffer.exit1319, %do.body378 diff --git a/llvm/test/CodeGen/RISCV/fastcc-without-f-reg.ll b/llvm/test/CodeGen/RISCV/fastcc-without-f-reg.ll index 1dbb060fc35fa..2e7ca02531470 100644 --- a/llvm/test/CodeGen/RISCV/fastcc-without-f-reg.ll +++ b/llvm/test/CodeGen/RISCV/fastcc-without-f-reg.ll @@ -260,13 +260,13 @@ define half @caller_half_32(<32 x half> %A) nounwind { ; ZHINX32-NEXT: sw s9, 68(sp) # 4-byte Folded Spill ; ZHINX32-NEXT: sw s10, 64(sp) # 4-byte Folded Spill ; ZHINX32-NEXT: sw s11, 60(sp) # 4-byte Folded Spill -; ZHINX32-NEXT: lh t0, 112(sp) +; ZHINX32-NEXT: lh t0, 124(sp) ; ZHINX32-NEXT: sw t0, 56(sp) # 4-byte Folded Spill -; ZHINX32-NEXT: lh t0, 116(sp) -; ZHINX32-NEXT: sw t0, 52(sp) # 4-byte Folded Spill ; ZHINX32-NEXT: lh t0, 120(sp) +; ZHINX32-NEXT: sw t0, 52(sp) # 4-byte Folded Spill +; ZHINX32-NEXT: lh t0, 116(sp) ; ZHINX32-NEXT: sw t0, 48(sp) # 4-byte Folded Spill -; ZHINX32-NEXT: lh t0, 124(sp) +; ZHINX32-NEXT: lh t0, 112(sp) ; ZHINX32-NEXT: sw t0, 44(sp) # 4-byte Folded Spill ; ZHINX32-NEXT: lh t6, 128(sp) ; ZHINX32-NEXT: lh t5, 132(sp) @@ -308,10 +308,10 @@ define half @caller_half_32(<32 x half> %A) nounwind { ; ZHINX32-NEXT: sh t4, 4(sp) ; ZHINX32-NEXT: sh t5, 2(sp) ; ZHINX32-NEXT: sh t6, 0(sp) -; ZHINX32-NEXT: lw t3, 56(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: lw t4, 52(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: lw t5, 48(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: lw t6, 44(sp) # 4-byte Folded Reload +; ZHINX32-NEXT: lw t3, 44(sp) # 4-byte Folded Reload +; ZHINX32-NEXT: lw t4, 48(sp) # 4-byte Folded Reload +; ZHINX32-NEXT: lw t5, 52(sp) # 4-byte Folded Reload +; ZHINX32-NEXT: lw t6, 56(sp) # 4-byte Folded Reload ; ZHINX32-NEXT: call callee_half_32 ; ZHINX32-NEXT: lw ra, 108(sp) # 4-byte Folded Reload ; ZHINX32-NEXT: lw s0, 104(sp) # 4-byte Folded Reload @@ -345,13 +345,13 @@ define half @caller_half_32(<32 x half> %A) nounwind { ; ZHINX64-NEXT: sd s9, 88(sp) # 8-byte Folded Spill ; ZHINX64-NEXT: sd s10, 80(sp) # 8-byte Folded Spill ; ZHINX64-NEXT: sd s11, 72(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: lh t0, 176(sp) +; ZHINX64-NEXT: lh t0, 200(sp) ; ZHINX64-NEXT: sd t0, 64(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: lh t0, 184(sp) -; ZHINX64-NEXT: sd t0, 56(sp) # 8-byte Folded Spill ; ZHINX64-NEXT: lh t0, 192(sp) +; ZHINX64-NEXT: sd t0, 56(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: lh t0, 184(sp) ; ZHINX64-NEXT: sd t0, 48(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: lh t0, 200(sp) +; ZHINX64-NEXT: lh t0, 176(sp) ; ZHINX64-NEXT: sd t0, 40(sp) # 8-byte Folded Spill ; ZHINX64-NEXT: lh t6, 208(sp) ; ZHINX64-NEXT: lh t5, 216(sp) @@ -393,10 +393,10 @@ define half @caller_half_32(<32 x half> %A) nounwind { ; ZHINX64-NEXT: sh t4, 4(sp) ; ZHINX64-NEXT: sh t5, 2(sp) ; ZHINX64-NEXT: sh t6, 0(sp) -; ZHINX64-NEXT: ld t3, 64(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld t4, 56(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld t5, 48(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld t6, 40(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld t3, 40(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld t4, 48(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld t5, 56(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld t6, 64(sp) # 8-byte Folded Reload ; ZHINX64-NEXT: call callee_half_32 ; ZHINX64-NEXT: ld ra, 168(sp) # 8-byte Folded Reload ; ZHINX64-NEXT: ld s0, 160(sp) # 8-byte Folded Reload @@ -832,32 +832,28 @@ define fastcc float @callee_float_32(<32 x float> %A) nounwind { define float @caller_float_32(<32 x float> %A) nounwind { ; ZHINX32-LABEL: caller_float_32: ; ZHINX32: # %bb.0: -; ZHINX32-NEXT: addi sp, sp, -160 -; ZHINX32-NEXT: sw ra, 156(sp) # 4-byte Folded Spill -; ZHINX32-NEXT: sw s0, 152(sp) # 4-byte Folded Spill -; ZHINX32-NEXT: sw s1, 148(sp) # 4-byte Folded Spill -; ZHINX32-NEXT: sw s2, 144(sp) # 4-byte Folded Spill -; ZHINX32-NEXT: sw s3, 140(sp) # 4-byte Folded Spill -; ZHINX32-NEXT: sw s4, 136(sp) # 4-byte Folded Spill -; ZHINX32-NEXT: sw s5, 132(sp) # 4-byte Folded Spill -; ZHINX32-NEXT: sw s6, 128(sp) # 4-byte Folded Spill -; ZHINX32-NEXT: sw s7, 124(sp) # 4-byte Folded Spill -; ZHINX32-NEXT: sw s8, 120(sp) # 4-byte Folded Spill -; ZHINX32-NEXT: sw s9, 116(sp) # 4-byte Folded Spill -; ZHINX32-NEXT: sw s10, 112(sp) # 4-byte Folded Spill -; ZHINX32-NEXT: sw s11, 108(sp) # 4-byte Folded Spill +; ZHINX32-NEXT: addi sp, sp, -144 +; ZHINX32-NEXT: sw ra, 140(sp) # 4-byte Folded Spill +; ZHINX32-NEXT: sw s0, 136(sp) # 4-byte Folded Spill +; ZHINX32-NEXT: sw s1, 132(sp) # 4-byte Folded Spill +; ZHINX32-NEXT: sw s2, 128(sp) # 4-byte Folded Spill +; ZHINX32-NEXT: sw s3, 124(sp) # 4-byte Folded Spill +; ZHINX32-NEXT: sw s4, 120(sp) # 4-byte Folded Spill +; ZHINX32-NEXT: sw s5, 116(sp) # 4-byte Folded Spill +; ZHINX32-NEXT: sw s6, 112(sp) # 4-byte Folded Spill +; ZHINX32-NEXT: sw s7, 108(sp) # 4-byte Folded Spill +; ZHINX32-NEXT: sw s8, 104(sp) # 4-byte Folded Spill +; ZHINX32-NEXT: sw s9, 100(sp) # 4-byte Folded Spill +; ZHINX32-NEXT: sw s10, 96(sp) # 4-byte Folded Spill +; ZHINX32-NEXT: sw s11, 92(sp) # 4-byte Folded Spill ; ZHINX32-NEXT: lw t0, 160(sp) -; ZHINX32-NEXT: sw t0, 104(sp) # 4-byte Folded Spill -; ZHINX32-NEXT: lw t0, 164(sp) -; ZHINX32-NEXT: sw t0, 100(sp) # 4-byte Folded Spill -; ZHINX32-NEXT: lw t0, 168(sp) -; ZHINX32-NEXT: sw t0, 96(sp) # 4-byte Folded Spill -; ZHINX32-NEXT: lw t0, 172(sp) -; ZHINX32-NEXT: sw t0, 92(sp) # 4-byte Folded Spill -; ZHINX32-NEXT: lw t6, 176(sp) -; ZHINX32-NEXT: lw t5, 180(sp) -; ZHINX32-NEXT: lw t4, 184(sp) -; ZHINX32-NEXT: lw s0, 188(sp) +; ZHINX32-NEXT: lw t1, 164(sp) +; ZHINX32-NEXT: lw t2, 168(sp) +; ZHINX32-NEXT: lw s0, 172(sp) +; ZHINX32-NEXT: lw t3, 176(sp) +; ZHINX32-NEXT: lw t4, 180(sp) +; ZHINX32-NEXT: lw t5, 184(sp) +; ZHINX32-NEXT: lw t6, 188(sp) ; ZHINX32-NEXT: lw s1, 192(sp) ; ZHINX32-NEXT: lw s2, 196(sp) ; ZHINX32-NEXT: lw s3, 200(sp) @@ -870,49 +866,45 @@ define float @caller_float_32(<32 x float> %A) nounwind { ; ZHINX32-NEXT: lw s10, 228(sp) ; ZHINX32-NEXT: lw s11, 232(sp) ; ZHINX32-NEXT: lw ra, 236(sp) -; ZHINX32-NEXT: lw t3, 240(sp) -; ZHINX32-NEXT: lw t2, 244(sp) -; ZHINX32-NEXT: lw t1, 248(sp) -; ZHINX32-NEXT: lw t0, 252(sp) -; ZHINX32-NEXT: sw t0, 76(sp) -; ZHINX32-NEXT: sw t1, 72(sp) -; ZHINX32-NEXT: sw t2, 68(sp) -; ZHINX32-NEXT: sw t3, 64(sp) -; ZHINX32-NEXT: sw ra, 60(sp) -; ZHINX32-NEXT: sw s11, 56(sp) -; ZHINX32-NEXT: sw s10, 52(sp) -; ZHINX32-NEXT: sw s9, 48(sp) -; ZHINX32-NEXT: sw s8, 44(sp) -; ZHINX32-NEXT: sw s7, 40(sp) -; ZHINX32-NEXT: sw s6, 36(sp) -; ZHINX32-NEXT: sw s5, 32(sp) -; ZHINX32-NEXT: sw s4, 28(sp) -; ZHINX32-NEXT: sw s3, 24(sp) -; ZHINX32-NEXT: sw s2, 20(sp) -; ZHINX32-NEXT: sw s1, 16(sp) +; ZHINX32-NEXT: sw ra, 76(sp) +; ZHINX32-NEXT: sw s11, 72(sp) +; ZHINX32-NEXT: sw s10, 68(sp) +; ZHINX32-NEXT: sw s9, 64(sp) +; ZHINX32-NEXT: sw s8, 60(sp) +; ZHINX32-NEXT: sw s7, 56(sp) +; ZHINX32-NEXT: sw s6, 52(sp) +; ZHINX32-NEXT: sw s5, 48(sp) +; ZHINX32-NEXT: sw s4, 44(sp) +; ZHINX32-NEXT: sw s3, 40(sp) +; ZHINX32-NEXT: sw s2, 36(sp) +; ZHINX32-NEXT: sw s1, 32(sp) +; ZHINX32-NEXT: sw t6, 28(sp) +; ZHINX32-NEXT: sw t5, 24(sp) +; ZHINX32-NEXT: sw t4, 20(sp) +; ZHINX32-NEXT: sw t3, 16(sp) +; ZHINX32-NEXT: lw t3, 144(sp) +; ZHINX32-NEXT: lw t4, 148(sp) +; ZHINX32-NEXT: lw t5, 152(sp) +; ZHINX32-NEXT: lw t6, 156(sp) ; ZHINX32-NEXT: sw s0, 12(sp) -; ZHINX32-NEXT: sw t4, 8(sp) -; ZHINX32-NEXT: sw t5, 4(sp) -; ZHINX32-NEXT: sw t6, 0(sp) -; ZHINX32-NEXT: lw t3, 104(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: lw t4, 100(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: lw t5, 96(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: lw t6, 92(sp) # 4-byte Folded Reload +; ZHINX32-NEXT: sw t2, 8(sp) +; ZHINX32-NEXT: sw t1, 4(sp) +; ZHINX32-NEXT: sw t0, 0(sp) ; ZHINX32-NEXT: call callee_float_32 -; ZHINX32-NEXT: lw ra, 156(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: lw s0, 152(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: lw s1, 148(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: lw s2, 144(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: lw s3, 140(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: lw s4, 136(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: lw s5, 132(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: lw s6, 128(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: lw s7, 124(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: lw s8, 120(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: lw s9, 116(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: lw s10, 112(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: lw s11, 108(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: addi sp, sp, 160 +; ZHINX32-NEXT: lw ra, 140(sp) # 4-byte Folded Reload +; ZHINX32-NEXT: lw s0, 136(sp) # 4-byte Folded Reload +; ZHINX32-NEXT: lw s1, 132(sp) # 4-byte Folded Reload +; ZHINX32-NEXT: lw s2, 128(sp) # 4-byte Folded Reload +; ZHINX32-NEXT: lw s3, 124(sp) # 4-byte Folded Reload +; ZHINX32-NEXT: lw s4, 120(sp) # 4-byte Folded Reload +; ZHINX32-NEXT: lw s5, 116(sp) # 4-byte Folded Reload +; ZHINX32-NEXT: lw s6, 112(sp) # 4-byte Folded Reload +; ZHINX32-NEXT: lw s7, 108(sp) # 4-byte Folded Reload +; ZHINX32-NEXT: lw s8, 104(sp) # 4-byte Folded Reload +; ZHINX32-NEXT: lw s9, 100(sp) # 4-byte Folded Reload +; ZHINX32-NEXT: lw s10, 96(sp) # 4-byte Folded Reload +; ZHINX32-NEXT: lw s11, 92(sp) # 4-byte Folded Reload +; ZHINX32-NEXT: addi sp, sp, 144 ; ZHINX32-NEXT: ret ; ; ZHINX64-LABEL: caller_float_32: @@ -931,13 +923,13 @@ define float @caller_float_32(<32 x float> %A) nounwind { ; ZHINX64-NEXT: sd s9, 136(sp) # 8-byte Folded Spill ; ZHINX64-NEXT: sd s10, 128(sp) # 8-byte Folded Spill ; ZHINX64-NEXT: sd s11, 120(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: lw t0, 224(sp) +; ZHINX64-NEXT: lw t0, 248(sp) ; ZHINX64-NEXT: sd t0, 112(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: lw t0, 232(sp) -; ZHINX64-NEXT: sd t0, 104(sp) # 8-byte Folded Spill ; ZHINX64-NEXT: lw t0, 240(sp) +; ZHINX64-NEXT: sd t0, 104(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: lw t0, 232(sp) ; ZHINX64-NEXT: sd t0, 96(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: lw t0, 248(sp) +; ZHINX64-NEXT: lw t0, 224(sp) ; ZHINX64-NEXT: sd t0, 88(sp) # 8-byte Folded Spill ; ZHINX64-NEXT: lw t6, 256(sp) ; ZHINX64-NEXT: lw t5, 264(sp) @@ -979,10 +971,10 @@ define float @caller_float_32(<32 x float> %A) nounwind { ; ZHINX64-NEXT: sw t4, 8(sp) ; ZHINX64-NEXT: sw t5, 4(sp) ; ZHINX64-NEXT: sw t6, 0(sp) -; ZHINX64-NEXT: ld t3, 112(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld t4, 104(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld t5, 96(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld t6, 88(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld t3, 88(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld t4, 96(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld t5, 104(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld t6, 112(sp) # 8-byte Folded Reload ; ZHINX64-NEXT: call callee_float_32 ; ZHINX64-NEXT: ld ra, 216(sp) # 8-byte Folded Reload ; ZHINX64-NEXT: ld s0, 208(sp) # 8-byte Folded Reload @@ -1002,32 +994,28 @@ define float @caller_float_32(<32 x float> %A) nounwind { ; ; ZFINX32-LABEL: caller_float_32: ; ZFINX32: # %bb.0: -; ZFINX32-NEXT: addi sp, sp, -160 -; ZFINX32-NEXT: sw ra, 156(sp) # 4-byte Folded Spill -; ZFINX32-NEXT: sw s0, 152(sp) # 4-byte Folded Spill -; ZFINX32-NEXT: sw s1, 148(sp) # 4-byte Folded Spill -; ZFINX32-NEXT: sw s2, 144(sp) # 4-byte Folded Spill -; ZFINX32-NEXT: sw s3, 140(sp) # 4-byte Folded Spill -; ZFINX32-NEXT: sw s4, 136(sp) # 4-byte Folded Spill -; ZFINX32-NEXT: sw s5, 132(sp) # 4-byte Folded Spill -; ZFINX32-NEXT: sw s6, 128(sp) # 4-byte Folded Spill -; ZFINX32-NEXT: sw s7, 124(sp) # 4-byte Folded Spill -; ZFINX32-NEXT: sw s8, 120(sp) # 4-byte Folded Spill -; ZFINX32-NEXT: sw s9, 116(sp) # 4-byte Folded Spill -; ZFINX32-NEXT: sw s10, 112(sp) # 4-byte Folded Spill -; ZFINX32-NEXT: sw s11, 108(sp) # 4-byte Folded Spill +; ZFINX32-NEXT: addi sp, sp, -144 +; ZFINX32-NEXT: sw ra, 140(sp) # 4-byte Folded Spill +; ZFINX32-NEXT: sw s0, 136(sp) # 4-byte Folded Spill +; ZFINX32-NEXT: sw s1, 132(sp) # 4-byte Folded Spill +; ZFINX32-NEXT: sw s2, 128(sp) # 4-byte Folded Spill +; ZFINX32-NEXT: sw s3, 124(sp) # 4-byte Folded Spill +; ZFINX32-NEXT: sw s4, 120(sp) # 4-byte Folded Spill +; ZFINX32-NEXT: sw s5, 116(sp) # 4-byte Folded Spill +; ZFINX32-NEXT: sw s6, 112(sp) # 4-byte Folded Spill +; ZFINX32-NEXT: sw s7, 108(sp) # 4-byte Folded Spill +; ZFINX32-NEXT: sw s8, 104(sp) # 4-byte Folded Spill +; ZFINX32-NEXT: sw s9, 100(sp) # 4-byte Folded Spill +; ZFINX32-NEXT: sw s10, 96(sp) # 4-byte Folded Spill +; ZFINX32-NEXT: sw s11, 92(sp) # 4-byte Folded Spill ; ZFINX32-NEXT: lw t0, 160(sp) -; ZFINX32-NEXT: sw t0, 104(sp) # 4-byte Folded Spill -; ZFINX32-NEXT: lw t0, 164(sp) -; ZFINX32-NEXT: sw t0, 100(sp) # 4-byte Folded Spill -; ZFINX32-NEXT: lw t0, 168(sp) -; ZFINX32-NEXT: sw t0, 96(sp) # 4-byte Folded Spill -; ZFINX32-NEXT: lw t0, 172(sp) -; ZFINX32-NEXT: sw t0, 92(sp) # 4-byte Folded Spill -; ZFINX32-NEXT: lw t6, 176(sp) -; ZFINX32-NEXT: lw t5, 180(sp) -; ZFINX32-NEXT: lw t4, 184(sp) -; ZFINX32-NEXT: lw s0, 188(sp) +; ZFINX32-NEXT: lw t1, 164(sp) +; ZFINX32-NEXT: lw t2, 168(sp) +; ZFINX32-NEXT: lw s0, 172(sp) +; ZFINX32-NEXT: lw t3, 176(sp) +; ZFINX32-NEXT: lw t4, 180(sp) +; ZFINX32-NEXT: lw t5, 184(sp) +; ZFINX32-NEXT: lw t6, 188(sp) ; ZFINX32-NEXT: lw s1, 192(sp) ; ZFINX32-NEXT: lw s2, 196(sp) ; ZFINX32-NEXT: lw s3, 200(sp) @@ -1040,49 +1028,45 @@ define float @caller_float_32(<32 x float> %A) nounwind { ; ZFINX32-NEXT: lw s10, 228(sp) ; ZFINX32-NEXT: lw s11, 232(sp) ; ZFINX32-NEXT: lw ra, 236(sp) -; ZFINX32-NEXT: lw t3, 240(sp) -; ZFINX32-NEXT: lw t2, 244(sp) -; ZFINX32-NEXT: lw t1, 248(sp) -; ZFINX32-NEXT: lw t0, 252(sp) -; ZFINX32-NEXT: sw t0, 76(sp) -; ZFINX32-NEXT: sw t1, 72(sp) -; ZFINX32-NEXT: sw t2, 68(sp) -; ZFINX32-NEXT: sw t3, 64(sp) -; ZFINX32-NEXT: sw ra, 60(sp) -; ZFINX32-NEXT: sw s11, 56(sp) -; ZFINX32-NEXT: sw s10, 52(sp) -; ZFINX32-NEXT: sw s9, 48(sp) -; ZFINX32-NEXT: sw s8, 44(sp) -; ZFINX32-NEXT: sw s7, 40(sp) -; ZFINX32-NEXT: sw s6, 36(sp) -; ZFINX32-NEXT: sw s5, 32(sp) -; ZFINX32-NEXT: sw s4, 28(sp) -; ZFINX32-NEXT: sw s3, 24(sp) -; ZFINX32-NEXT: sw s2, 20(sp) -; ZFINX32-NEXT: sw s1, 16(sp) +; ZFINX32-NEXT: sw ra, 76(sp) +; ZFINX32-NEXT: sw s11, 72(sp) +; ZFINX32-NEXT: sw s10, 68(sp) +; ZFINX32-NEXT: sw s9, 64(sp) +; ZFINX32-NEXT: sw s8, 60(sp) +; ZFINX32-NEXT: sw s7, 56(sp) +; ZFINX32-NEXT: sw s6, 52(sp) +; ZFINX32-NEXT: sw s5, 48(sp) +; ZFINX32-NEXT: sw s4, 44(sp) +; ZFINX32-NEXT: sw s3, 40(sp) +; ZFINX32-NEXT: sw s2, 36(sp) +; ZFINX32-NEXT: sw s1, 32(sp) +; ZFINX32-NEXT: sw t6, 28(sp) +; ZFINX32-NEXT: sw t5, 24(sp) +; ZFINX32-NEXT: sw t4, 20(sp) +; ZFINX32-NEXT: sw t3, 16(sp) +; ZFINX32-NEXT: lw t3, 144(sp) +; ZFINX32-NEXT: lw t4, 148(sp) +; ZFINX32-NEXT: lw t5, 152(sp) +; ZFINX32-NEXT: lw t6, 156(sp) ; ZFINX32-NEXT: sw s0, 12(sp) -; ZFINX32-NEXT: sw t4, 8(sp) -; ZFINX32-NEXT: sw t5, 4(sp) -; ZFINX32-NEXT: sw t6, 0(sp) -; ZFINX32-NEXT: lw t3, 104(sp) # 4-byte Folded Reload -; ZFINX32-NEXT: lw t4, 100(sp) # 4-byte Folded Reload -; ZFINX32-NEXT: lw t5, 96(sp) # 4-byte Folded Reload -; ZFINX32-NEXT: lw t6, 92(sp) # 4-byte Folded Reload +; ZFINX32-NEXT: sw t2, 8(sp) +; ZFINX32-NEXT: sw t1, 4(sp) +; ZFINX32-NEXT: sw t0, 0(sp) ; ZFINX32-NEXT: call callee_float_32 -; ZFINX32-NEXT: lw ra, 156(sp) # 4-byte Folded Reload -; ZFINX32-NEXT: lw s0, 152(sp) # 4-byte Folded Reload -; ZFINX32-NEXT: lw s1, 148(sp) # 4-byte Folded Reload -; ZFINX32-NEXT: lw s2, 144(sp) # 4-byte Folded Reload -; ZFINX32-NEXT: lw s3, 140(sp) # 4-byte Folded Reload -; ZFINX32-NEXT: lw s4, 136(sp) # 4-byte Folded Reload -; ZFINX32-NEXT: lw s5, 132(sp) # 4-byte Folded Reload -; ZFINX32-NEXT: lw s6, 128(sp) # 4-byte Folded Reload -; ZFINX32-NEXT: lw s7, 124(sp) # 4-byte Folded Reload -; ZFINX32-NEXT: lw s8, 120(sp) # 4-byte Folded Reload -; ZFINX32-NEXT: lw s9, 116(sp) # 4-byte Folded Reload -; ZFINX32-NEXT: lw s10, 112(sp) # 4-byte Folded Reload -; ZFINX32-NEXT: lw s11, 108(sp) # 4-byte Folded Reload -; ZFINX32-NEXT: addi sp, sp, 160 +; ZFINX32-NEXT: lw ra, 140(sp) # 4-byte Folded Reload +; ZFINX32-NEXT: lw s0, 136(sp) # 4-byte Folded Reload +; ZFINX32-NEXT: lw s1, 132(sp) # 4-byte Folded Reload +; ZFINX32-NEXT: lw s2, 128(sp) # 4-byte Folded Reload +; ZFINX32-NEXT: lw s3, 124(sp) # 4-byte Folded Reload +; ZFINX32-NEXT: lw s4, 120(sp) # 4-byte Folded Reload +; ZFINX32-NEXT: lw s5, 116(sp) # 4-byte Folded Reload +; ZFINX32-NEXT: lw s6, 112(sp) # 4-byte Folded Reload +; ZFINX32-NEXT: lw s7, 108(sp) # 4-byte Folded Reload +; ZFINX32-NEXT: lw s8, 104(sp) # 4-byte Folded Reload +; ZFINX32-NEXT: lw s9, 100(sp) # 4-byte Folded Reload +; ZFINX32-NEXT: lw s10, 96(sp) # 4-byte Folded Reload +; ZFINX32-NEXT: lw s11, 92(sp) # 4-byte Folded Reload +; ZFINX32-NEXT: addi sp, sp, 144 ; ZFINX32-NEXT: ret ; ; ZFINX64-LABEL: caller_float_32: @@ -1101,13 +1085,13 @@ define float @caller_float_32(<32 x float> %A) nounwind { ; ZFINX64-NEXT: sd s9, 136(sp) # 8-byte Folded Spill ; ZFINX64-NEXT: sd s10, 128(sp) # 8-byte Folded Spill ; ZFINX64-NEXT: sd s11, 120(sp) # 8-byte Folded Spill -; ZFINX64-NEXT: lw t0, 224(sp) +; ZFINX64-NEXT: lw t0, 248(sp) ; ZFINX64-NEXT: sd t0, 112(sp) # 8-byte Folded Spill -; ZFINX64-NEXT: lw t0, 232(sp) -; ZFINX64-NEXT: sd t0, 104(sp) # 8-byte Folded Spill ; ZFINX64-NEXT: lw t0, 240(sp) +; ZFINX64-NEXT: sd t0, 104(sp) # 8-byte Folded Spill +; ZFINX64-NEXT: lw t0, 232(sp) ; ZFINX64-NEXT: sd t0, 96(sp) # 8-byte Folded Spill -; ZFINX64-NEXT: lw t0, 248(sp) +; ZFINX64-NEXT: lw t0, 224(sp) ; ZFINX64-NEXT: sd t0, 88(sp) # 8-byte Folded Spill ; ZFINX64-NEXT: lw t6, 256(sp) ; ZFINX64-NEXT: lw t5, 264(sp) @@ -1149,10 +1133,10 @@ define float @caller_float_32(<32 x float> %A) nounwind { ; ZFINX64-NEXT: sw t4, 8(sp) ; ZFINX64-NEXT: sw t5, 4(sp) ; ZFINX64-NEXT: sw t6, 0(sp) -; ZFINX64-NEXT: ld t3, 112(sp) # 8-byte Folded Reload -; ZFINX64-NEXT: ld t4, 104(sp) # 8-byte Folded Reload -; ZFINX64-NEXT: ld t5, 96(sp) # 8-byte Folded Reload -; ZFINX64-NEXT: ld t6, 88(sp) # 8-byte Folded Reload +; ZFINX64-NEXT: ld t3, 88(sp) # 8-byte Folded Reload +; ZFINX64-NEXT: ld t4, 96(sp) # 8-byte Folded Reload +; ZFINX64-NEXT: ld t5, 104(sp) # 8-byte Folded Reload +; ZFINX64-NEXT: ld t6, 112(sp) # 8-byte Folded Reload ; ZFINX64-NEXT: call callee_float_32 ; ZFINX64-NEXT: ld ra, 216(sp) # 8-byte Folded Reload ; ZFINX64-NEXT: ld s0, 208(sp) # 8-byte Folded Reload @@ -1172,32 +1156,28 @@ define float @caller_float_32(<32 x float> %A) nounwind { ; ; ZDINX32-LABEL: caller_float_32: ; ZDINX32: # %bb.0: -; ZDINX32-NEXT: addi sp, sp, -160 -; ZDINX32-NEXT: sw ra, 156(sp) # 4-byte Folded Spill -; ZDINX32-NEXT: sw s0, 152(sp) # 4-byte Folded Spill -; ZDINX32-NEXT: sw s1, 148(sp) # 4-byte Folded Spill -; ZDINX32-NEXT: sw s2, 144(sp) # 4-byte Folded Spill -; ZDINX32-NEXT: sw s3, 140(sp) # 4-byte Folded Spill -; ZDINX32-NEXT: sw s4, 136(sp) # 4-byte Folded Spill -; ZDINX32-NEXT: sw s5, 132(sp) # 4-byte Folded Spill -; ZDINX32-NEXT: sw s6, 128(sp) # 4-byte Folded Spill -; ZDINX32-NEXT: sw s7, 124(sp) # 4-byte Folded Spill -; ZDINX32-NEXT: sw s8, 120(sp) # 4-byte Folded Spill -; ZDINX32-NEXT: sw s9, 116(sp) # 4-byte Folded Spill -; ZDINX32-NEXT: sw s10, 112(sp) # 4-byte Folded Spill -; ZDINX32-NEXT: sw s11, 108(sp) # 4-byte Folded Spill +; ZDINX32-NEXT: addi sp, sp, -144 +; ZDINX32-NEXT: sw ra, 140(sp) # 4-byte Folded Spill +; ZDINX32-NEXT: sw s0, 136(sp) # 4-byte Folded Spill +; ZDINX32-NEXT: sw s1, 132(sp) # 4-byte Folded Spill +; ZDINX32-NEXT: sw s2, 128(sp) # 4-byte Folded Spill +; ZDINX32-NEXT: sw s3, 124(sp) # 4-byte Folded Spill +; ZDINX32-NEXT: sw s4, 120(sp) # 4-byte Folded Spill +; ZDINX32-NEXT: sw s5, 116(sp) # 4-byte Folded Spill +; ZDINX32-NEXT: sw s6, 112(sp) # 4-byte Folded Spill +; ZDINX32-NEXT: sw s7, 108(sp) # 4-byte Folded Spill +; ZDINX32-NEXT: sw s8, 104(sp) # 4-byte Folded Spill +; ZDINX32-NEXT: sw s9, 100(sp) # 4-byte Folded Spill +; ZDINX32-NEXT: sw s10, 96(sp) # 4-byte Folded Spill +; ZDINX32-NEXT: sw s11, 92(sp) # 4-byte Folded Spill ; ZDINX32-NEXT: lw t0, 160(sp) -; ZDINX32-NEXT: sw t0, 104(sp) # 4-byte Folded Spill -; ZDINX32-NEXT: lw t0, 164(sp) -; ZDINX32-NEXT: sw t0, 100(sp) # 4-byte Folded Spill -; ZDINX32-NEXT: lw t0, 168(sp) -; ZDINX32-NEXT: sw t0, 96(sp) # 4-byte Folded Spill -; ZDINX32-NEXT: lw t0, 172(sp) -; ZDINX32-NEXT: sw t0, 92(sp) # 4-byte Folded Spill -; ZDINX32-NEXT: lw t6, 176(sp) -; ZDINX32-NEXT: lw t5, 180(sp) -; ZDINX32-NEXT: lw t4, 184(sp) -; ZDINX32-NEXT: lw s0, 188(sp) +; ZDINX32-NEXT: lw t1, 164(sp) +; ZDINX32-NEXT: lw t2, 168(sp) +; ZDINX32-NEXT: lw s0, 172(sp) +; ZDINX32-NEXT: lw t3, 176(sp) +; ZDINX32-NEXT: lw t4, 180(sp) +; ZDINX32-NEXT: lw t5, 184(sp) +; ZDINX32-NEXT: lw t6, 188(sp) ; ZDINX32-NEXT: lw s1, 192(sp) ; ZDINX32-NEXT: lw s2, 196(sp) ; ZDINX32-NEXT: lw s3, 200(sp) @@ -1210,49 +1190,45 @@ define float @caller_float_32(<32 x float> %A) nounwind { ; ZDINX32-NEXT: lw s10, 228(sp) ; ZDINX32-NEXT: lw s11, 232(sp) ; ZDINX32-NEXT: lw ra, 236(sp) -; ZDINX32-NEXT: lw t3, 240(sp) -; ZDINX32-NEXT: lw t2, 244(sp) -; ZDINX32-NEXT: lw t1, 248(sp) -; ZDINX32-NEXT: lw t0, 252(sp) -; ZDINX32-NEXT: sw t0, 76(sp) -; ZDINX32-NEXT: sw t1, 72(sp) -; ZDINX32-NEXT: sw t2, 68(sp) -; ZDINX32-NEXT: sw t3, 64(sp) -; ZDINX32-NEXT: sw ra, 60(sp) -; ZDINX32-NEXT: sw s11, 56(sp) -; ZDINX32-NEXT: sw s10, 52(sp) -; ZDINX32-NEXT: sw s9, 48(sp) -; ZDINX32-NEXT: sw s8, 44(sp) -; ZDINX32-NEXT: sw s7, 40(sp) -; ZDINX32-NEXT: sw s6, 36(sp) -; ZDINX32-NEXT: sw s5, 32(sp) -; ZDINX32-NEXT: sw s4, 28(sp) -; ZDINX32-NEXT: sw s3, 24(sp) -; ZDINX32-NEXT: sw s2, 20(sp) -; ZDINX32-NEXT: sw s1, 16(sp) +; ZDINX32-NEXT: sw ra, 76(sp) +; ZDINX32-NEXT: sw s11, 72(sp) +; ZDINX32-NEXT: sw s10, 68(sp) +; ZDINX32-NEXT: sw s9, 64(sp) +; ZDINX32-NEXT: sw s8, 60(sp) +; ZDINX32-NEXT: sw s7, 56(sp) +; ZDINX32-NEXT: sw s6, 52(sp) +; ZDINX32-NEXT: sw s5, 48(sp) +; ZDINX32-NEXT: sw s4, 44(sp) +; ZDINX32-NEXT: sw s3, 40(sp) +; ZDINX32-NEXT: sw s2, 36(sp) +; ZDINX32-NEXT: sw s1, 32(sp) +; ZDINX32-NEXT: sw t6, 28(sp) +; ZDINX32-NEXT: sw t5, 24(sp) +; ZDINX32-NEXT: sw t4, 20(sp) +; ZDINX32-NEXT: sw t3, 16(sp) +; ZDINX32-NEXT: lw t3, 144(sp) +; ZDINX32-NEXT: lw t4, 148(sp) +; ZDINX32-NEXT: lw t5, 152(sp) +; ZDINX32-NEXT: lw t6, 156(sp) ; ZDINX32-NEXT: sw s0, 12(sp) -; ZDINX32-NEXT: sw t4, 8(sp) -; ZDINX32-NEXT: sw t5, 4(sp) -; ZDINX32-NEXT: sw t6, 0(sp) -; ZDINX32-NEXT: lw t3, 104(sp) # 4-byte Folded Reload -; ZDINX32-NEXT: lw t4, 100(sp) # 4-byte Folded Reload -; ZDINX32-NEXT: lw t5, 96(sp) # 4-byte Folded Reload -; ZDINX32-NEXT: lw t6, 92(sp) # 4-byte Folded Reload +; ZDINX32-NEXT: sw t2, 8(sp) +; ZDINX32-NEXT: sw t1, 4(sp) +; ZDINX32-NEXT: sw t0, 0(sp) ; ZDINX32-NEXT: call callee_float_32 -; ZDINX32-NEXT: lw ra, 156(sp) # 4-byte Folded Reload -; ZDINX32-NEXT: lw s0, 152(sp) # 4-byte Folded Reload -; ZDINX32-NEXT: lw s1, 148(sp) # 4-byte Folded Reload -; ZDINX32-NEXT: lw s2, 144(sp) # 4-byte Folded Reload -; ZDINX32-NEXT: lw s3, 140(sp) # 4-byte Folded Reload -; ZDINX32-NEXT: lw s4, 136(sp) # 4-byte Folded Reload -; ZDINX32-NEXT: lw s5, 132(sp) # 4-byte Folded Reload -; ZDINX32-NEXT: lw s6, 128(sp) # 4-byte Folded Reload -; ZDINX32-NEXT: lw s7, 124(sp) # 4-byte Folded Reload -; ZDINX32-NEXT: lw s8, 120(sp) # 4-byte Folded Reload -; ZDINX32-NEXT: lw s9, 116(sp) # 4-byte Folded Reload -; ZDINX32-NEXT: lw s10, 112(sp) # 4-byte Folded Reload -; ZDINX32-NEXT: lw s11, 108(sp) # 4-byte Folded Reload -; ZDINX32-NEXT: addi sp, sp, 160 +; ZDINX32-NEXT: lw ra, 140(sp) # 4-byte Folded Reload +; ZDINX32-NEXT: lw s0, 136(sp) # 4-byte Folded Reload +; ZDINX32-NEXT: lw s1, 132(sp) # 4-byte Folded Reload +; ZDINX32-NEXT: lw s2, 128(sp) # 4-byte Folded Reload +; ZDINX32-NEXT: lw s3, 124(sp) # 4-byte Folded Reload +; ZDINX32-NEXT: lw s4, 120(sp) # 4-byte Folded Reload +; ZDINX32-NEXT: lw s5, 116(sp) # 4-byte Folded Reload +; ZDINX32-NEXT: lw s6, 112(sp) # 4-byte Folded Reload +; ZDINX32-NEXT: lw s7, 108(sp) # 4-byte Folded Reload +; ZDINX32-NEXT: lw s8, 104(sp) # 4-byte Folded Reload +; ZDINX32-NEXT: lw s9, 100(sp) # 4-byte Folded Reload +; ZDINX32-NEXT: lw s10, 96(sp) # 4-byte Folded Reload +; ZDINX32-NEXT: lw s11, 92(sp) # 4-byte Folded Reload +; ZDINX32-NEXT: addi sp, sp, 144 ; ZDINX32-NEXT: ret ; ; ZDINX64-LABEL: caller_float_32: @@ -1271,13 +1247,13 @@ define float @caller_float_32(<32 x float> %A) nounwind { ; ZDINX64-NEXT: sd s9, 136(sp) # 8-byte Folded Spill ; ZDINX64-NEXT: sd s10, 128(sp) # 8-byte Folded Spill ; ZDINX64-NEXT: sd s11, 120(sp) # 8-byte Folded Spill -; ZDINX64-NEXT: lw t0, 224(sp) +; ZDINX64-NEXT: lw t0, 248(sp) ; ZDINX64-NEXT: sd t0, 112(sp) # 8-byte Folded Spill -; ZDINX64-NEXT: lw t0, 232(sp) -; ZDINX64-NEXT: sd t0, 104(sp) # 8-byte Folded Spill ; ZDINX64-NEXT: lw t0, 240(sp) +; ZDINX64-NEXT: sd t0, 104(sp) # 8-byte Folded Spill +; ZDINX64-NEXT: lw t0, 232(sp) ; ZDINX64-NEXT: sd t0, 96(sp) # 8-byte Folded Spill -; ZDINX64-NEXT: lw t0, 248(sp) +; ZDINX64-NEXT: lw t0, 224(sp) ; ZDINX64-NEXT: sd t0, 88(sp) # 8-byte Folded Spill ; ZDINX64-NEXT: lw t6, 256(sp) ; ZDINX64-NEXT: lw t5, 264(sp) @@ -1319,10 +1295,10 @@ define float @caller_float_32(<32 x float> %A) nounwind { ; ZDINX64-NEXT: sw t4, 8(sp) ; ZDINX64-NEXT: sw t5, 4(sp) ; ZDINX64-NEXT: sw t6, 0(sp) -; ZDINX64-NEXT: ld t3, 112(sp) # 8-byte Folded Reload -; ZDINX64-NEXT: ld t4, 104(sp) # 8-byte Folded Reload -; ZDINX64-NEXT: ld t5, 96(sp) # 8-byte Folded Reload -; ZDINX64-NEXT: ld t6, 88(sp) # 8-byte Folded Reload +; ZDINX64-NEXT: ld t3, 88(sp) # 8-byte Folded Reload +; ZDINX64-NEXT: ld t4, 96(sp) # 8-byte Folded Reload +; ZDINX64-NEXT: ld t5, 104(sp) # 8-byte Folded Reload +; ZDINX64-NEXT: ld t6, 112(sp) # 8-byte Folded Reload ; ZDINX64-NEXT: call callee_float_32 ; ZDINX64-NEXT: ld ra, 216(sp) # 8-byte Folded Reload ; ZDINX64-NEXT: ld s0, 208(sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/renamable-copy.mir b/llvm/test/CodeGen/RISCV/renamable-copy.mir new file mode 100644 index 0000000000000..06f17f4edbccf --- /dev/null +++ b/llvm/test/CodeGen/RISCV/renamable-copy.mir @@ -0,0 +1,31 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -o - %s -mtriple=riscv32 -simplify-mir \ +# RUN: -run-pass=postrapseudos | FileCheck --check-prefix=RV32 %s +# RUN: llc -o - %s -mtriple=riscv64 -simplify-mir \ +# RUN: -run-pass=postrapseudos | FileCheck --check-prefix=RV64 %s + +--- | + define void @foo() { + entry: + ret void + } +... +--- +name: foo +body: | + bb.0.entry: + liveins: $x11 + ; RV32-LABEL: name: foo + ; RV32: liveins: $x11 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: $x10 = ADDI renamable $x11, 0 + ; RV32-NEXT: PseudoRET implicit $x10 + ; + ; RV64-LABEL: name: foo + ; RV64: liveins: $x11 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: $x10 = ADDI renamable $x11, 0 + ; RV64-NEXT: PseudoRET implicit $x10 + renamable $x10 = COPY renamable $x11 + PseudoRET implicit $x10 +... diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-splat-bf16.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-splat-bf16.ll new file mode 100644 index 0000000000000..b1250f4804549 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-splat-bf16.ll @@ -0,0 +1,111 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfbfmin,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZFBFMIN-ZVFBFMIN +; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFBFMIN +; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfbfmin,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZFBFMIN-ZVFBFMIN +; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFBFMIN + +define <8 x bfloat> @splat_v8bf16(ptr %x, bfloat %y) { +; ZFBFMIN-ZVFBFMIN-LABEL: splat_v8bf16: +; ZFBFMIN-ZVFBFMIN: # %bb.0: +; ZFBFMIN-ZVFBFMIN-NEXT: fcvt.s.bf16 fa5, fa0 +; ZFBFMIN-ZVFBFMIN-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; ZFBFMIN-ZVFBFMIN-NEXT: vfmv.v.f v10, fa5 +; ZFBFMIN-ZVFBFMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZFBFMIN-ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v10 +; ZFBFMIN-ZVFBFMIN-NEXT: ret +; +; ZVFBFMIN-LABEL: splat_v8bf16: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: fmv.x.w a0, fa0 +; ZVFBFMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFBFMIN-NEXT: vmv.v.x v8, a0 +; ZVFBFMIN-NEXT: ret + %a = insertelement <8 x bfloat> poison, bfloat %y, i32 0 + %b = shufflevector <8 x bfloat> %a, <8 x bfloat> poison, <8 x i32> zeroinitializer + ret <8 x bfloat> %b +} + +define <16 x bfloat> @splat_16bf16(ptr %x, bfloat %y) { +; ZFBFMIN-ZVFBFMIN-LABEL: splat_16bf16: +; ZFBFMIN-ZVFBFMIN: # %bb.0: +; ZFBFMIN-ZVFBFMIN-NEXT: fcvt.s.bf16 fa5, fa0 +; ZFBFMIN-ZVFBFMIN-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; ZFBFMIN-ZVFBFMIN-NEXT: vfmv.v.f v12, fa5 +; ZFBFMIN-ZVFBFMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZFBFMIN-ZVFBFMIN-NEXT: vfncvtbf16.f.f.w v8, v12 +; ZFBFMIN-ZVFBFMIN-NEXT: ret +; +; ZVFBFMIN-LABEL: splat_16bf16: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: fmv.x.w a0, fa0 +; ZVFBFMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFBFMIN-NEXT: vmv.v.x v8, a0 +; ZVFBFMIN-NEXT: ret + %a = insertelement <16 x bfloat> poison, bfloat %y, i32 0 + %b = shufflevector <16 x bfloat> %a, <16 x bfloat> poison, <16 x i32> zeroinitializer + ret <16 x bfloat> %b +} + +define <8 x bfloat> @splat_zero_v8bf16(ptr %x) { +; ZFBFMIN-ZVFBFMIN-LABEL: splat_zero_v8bf16: +; ZFBFMIN-ZVFBFMIN: # %bb.0: +; ZFBFMIN-ZVFBFMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZFBFMIN-ZVFBFMIN-NEXT: vmv.v.i v8, 0 +; ZFBFMIN-ZVFBFMIN-NEXT: ret +; +; ZVFBFMIN-LABEL: splat_zero_v8bf16: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFBFMIN-NEXT: vmv.v.i v8, 0 +; ZVFBFMIN-NEXT: ret + ret <8 x bfloat> splat (bfloat 0.0) +} + +define <16 x bfloat> @splat_zero_16bf16(ptr %x) { +; ZFBFMIN-ZVFBFMIN-LABEL: splat_zero_16bf16: +; ZFBFMIN-ZVFBFMIN: # %bb.0: +; ZFBFMIN-ZVFBFMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZFBFMIN-ZVFBFMIN-NEXT: vmv.v.i v8, 0 +; ZFBFMIN-ZVFBFMIN-NEXT: ret +; +; ZVFBFMIN-LABEL: splat_zero_16bf16: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFBFMIN-NEXT: vmv.v.i v8, 0 +; ZVFBFMIN-NEXT: ret + ret <16 x bfloat> splat (bfloat 0.0) +} + +define <8 x bfloat> @splat_negzero_v8bf16(ptr %x) { +; ZFBFMIN-ZVFBFMIN-LABEL: splat_negzero_v8bf16: +; ZFBFMIN-ZVFBFMIN: # %bb.0: +; ZFBFMIN-ZVFBFMIN-NEXT: lui a0, 1048568 +; ZFBFMIN-ZVFBFMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; ZFBFMIN-ZVFBFMIN-NEXT: vmv.v.x v8, a0 +; ZFBFMIN-ZVFBFMIN-NEXT: ret +; +; ZVFBFMIN-LABEL: splat_negzero_v8bf16: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: lui a0, 1048568 +; ZVFBFMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFBFMIN-NEXT: vmv.v.x v8, a0 +; ZVFBFMIN-NEXT: ret + ret <8 x bfloat> splat (bfloat -0.0) +} + +define <16 x bfloat> @splat_negzero_16bf16(ptr %x) { +; ZFBFMIN-ZVFBFMIN-LABEL: splat_negzero_16bf16: +; ZFBFMIN-ZVFBFMIN: # %bb.0: +; ZFBFMIN-ZVFBFMIN-NEXT: lui a0, 1048568 +; ZFBFMIN-ZVFBFMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; ZFBFMIN-ZVFBFMIN-NEXT: vmv.v.x v8, a0 +; ZFBFMIN-ZVFBFMIN-NEXT: ret +; +; ZVFBFMIN-LABEL: splat_negzero_16bf16: +; ZVFBFMIN: # %bb.0: +; ZVFBFMIN-NEXT: lui a0, 1048568 +; ZVFBFMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFBFMIN-NEXT: vmv.v.x v8, a0 +; ZVFBFMIN-NEXT: ret + ret <16 x bfloat> splat (bfloat -0.0) +} diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-masked-vops.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-masked-vops.ll index d26fd0ca26c72..3a439cdb996fa 100644 --- a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-masked-vops.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-masked-vops.ll @@ -159,9 +159,8 @@ define @vmerge_larger_vl_same_passthru( %pa define @vmerge_smaller_vl_different_passthru( %pt1, %pt2, %x, %y, %m) { ; CHECK-LABEL: vmerge_smaller_vl_different_passthru: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 3, e32, m1, tu, mu +; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, mu ; CHECK-NEXT: vadd.vv v8, v10, v11, v0.t -; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma ; CHECK-NEXT: vmv.v.v v9, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll index 39055dc5adfcf..6700920cebff0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll @@ -1072,9 +1072,8 @@ define @vmerge_larger_vl_same_passthru( %pa define @vmerge_smaller_vl_different_passthru( %pt1, %pt2, %x, %y, %m) { ; CHECK-LABEL: vmerge_smaller_vl_different_passthru: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 3, e32, m1, tu, ma -; CHECK-NEXT: vadd.vv v8, v10, v11 ; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; CHECK-NEXT: vadd.vv v8, v10, v11 ; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/SPIRV/debug-info/basic-global-di.ll b/llvm/test/CodeGen/SPIRV/debug-info/basic-global-di.ll index 336b7db324c3d..139a8e11182bc 100644 --- a/llvm/test/CodeGen/SPIRV/debug-info/basic-global-di.ll +++ b/llvm/test/CodeGen/SPIRV/debug-info/basic-global-di.ll @@ -8,12 +8,12 @@ ; CHECK-MIR-DAG: [[dwarf_version:%[0-9]+\:iid\(s32\)]] = OpConstantI [[type_i64]], 5 ; CHECK-MIR-DAG: [[source_language:%[0-9]+\:iid\(s32\)]] = OpConstantI [[type_i64]], 3 ; CHECK-MIR-DAG: [[debug_info_version:%[0-9]+\:iid\(s32\)]] = OpConstantI [[type_i64]], 21 -; CHECK-MIR-DAG: [[filename_str:%[0-9]+\:id\(s32\)]] = OpString 1094795567, 1094795585, 792805697, 1111638594, 1111638594, 1128481583, 1128481603, 1697596227, 1886216568, 1663985004, 0 +; CHECK-MIR-DAG: [[filename_str:%[0-9]+\:id\(s32\)]] = OpString 1094795567, 1094795585, 792805697, 1111638594, 1111638594, 1128481583, 1128481603, {{1697596227|1700545347}}, 1886216568, 1663985004, 0 ; CHECK-MIR-DAG: [[debug_source:%[0-9]+\:id\(s32\)]] = OpExtInst [[type_void]], 3, 35, [[filename_str]] ; CHECK-MIR-DAG: [[debug_compilation_unit:%[0-9]+\:id\(s32\)]] = OpExtInst [[type_void]], 3, 1, [[source_language]], [[dwarf_version]], [[debug_source]], [[debug_info_version]] ; CHECK-SPIRV: [[ext_inst_non_semantic:%[0-9]+]] = OpExtInstImport "NonSemantic.Shader.DebugInfo.100" -; CHECK-SPIRV: [[filename_str:%[0-9]+]] = OpString "/AAAAAAAAAA/BBBBBBBB/CCCCCCCCC/example.c" +; CHECK-SPIRV: [[filename_str:%[0-9]+]] = OpString "/AAAAAAAAAA/BBBBBBBB/CCCCCCCCC{{[/\\]}}example.c" ; CHECK-SPIRV-DAG: [[type_void:%[0-9]+]] = OpTypeVoid ; CHECK-SPIRV-DAG: [[type_i32:%[0-9]+]] = OpTypeInt 32 0 ; CHECK-SPIRV-DAG: [[dwarf_version:%[0-9]+]] = OpConstant [[type_i32]] 5 @@ -23,7 +23,7 @@ ; CHECK-SPIRV: [[debug_compiation_unit:%[0-9]+]] = OpExtInst [[type_void]] [[ext_inst_non_semantic]] DebugCompilationUnit [[source_language]] [[dwarf_version]] [[debug_source]] [[debug_info_version]] ; CHECK-OPTION-NOT: OpExtInstImport "NonSemantic.Shader.DebugInfo.100" -; CHECK-OPTION-NOT: OpString "/AAAAAAAAAA/BBBBBBBB/CCCCCCCCC/example.c" +; CHECK-OPTION-NOT: OpString "/AAAAAAAAAA/BBBBBBBB/CCCCCCCCC{{[/\\]}}example.c" define spir_func void @foo() { entry: diff --git a/llvm/test/CodeGen/SPIRV/opencl/vload_halfn.ll b/llvm/test/CodeGen/SPIRV/opencl/vload_halfn.ll new file mode 100644 index 0000000000000..abfae74afb659 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/opencl/vload_halfn.ll @@ -0,0 +1,15 @@ +; RUN: llc -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %} + +; CHECK: %[[#IMPORT:]] = OpExtInstImport "OpenCL.std" + +; CHECK: %[[#FLOAT:]] = OpTypeFloat 32 +; CHECK: %[[#V2FLOAT:]] = OpTypeVector %[[#FLOAT]] 2 + +define void @test(i64 %a, ptr addrspace(1) %b) { +; CHECK: %[[#]] = OpExtInst %[[#V2FLOAT:]] %[[#IMPORT]] vload_halfn %[[#]] %[[#]] 2 + %c = call spir_func <2 x float> @_Z11vload_half2mPU3AS1KDh(i64 %a, ptr addrspace(1) %b) + ret void +} + +declare <2 x float> @_Z11vload_half2mPU3AS1KDh(i64, ptr addrspace(1)) diff --git a/llvm/test/CodeGen/X86/code-align-loops.ll b/llvm/test/CodeGen/X86/code-align-loops.ll index 3823293d747c7..616478993a51d 100644 --- a/llvm/test/CodeGen/X86/code-align-loops.ll +++ b/llvm/test/CodeGen/X86/code-align-loops.ll @@ -96,6 +96,56 @@ for.body5: ; preds = %for.body, %for.body br i1 %exitcond16.not, label %for.cond.cleanup4, label %for.body5, !llvm.loop !2 } +; test3 is to check if .p2align can be correctly set on loops with multi latches. +; The test IR is generated from below simple C file: +; $ clang -O0 -S -emit-llvm loop.c +; $ cat loop.c +; int test3() { +; int i = 0; +; [[clang::code_align(32)]] +; while (i < 10) { +; if (i % 2) { +; continue; +; } +; i++; +; } +; } +; CHECK-LABEL: test3_multilatch: +; ALIGN: .p2align 4, 0x90 +; ALIGN-NEXT: .LBB2_1: # %while.cond +define dso_local i32 @test3_multilatch() #0 { +entry: + %retval = alloca i32, align 4 + %i = alloca i32, align 4 + store i32 0, ptr %retval, align 4 + store i32 0, ptr %i, align 4 + br label %while.cond + +while.cond: ; preds = %if.end, %if.then, %entry + %0 = load i32, ptr %i, align 4 + %cmp = icmp slt i32 %0, 10 + br i1 %cmp, label %while.body, label %while.end + +while.body: ; preds = %while.cond + %1 = load i32, ptr %i, align 4 + %rem = srem i32 %1, 2 + %tobool = icmp ne i32 %rem, 0 + br i1 %tobool, label %if.then, label %if.end + +if.then: ; preds = %while.body + br label %while.cond, !llvm.loop !0 + +if.end: ; preds = %while.body + %2 = load i32, ptr %i, align 4 + %inc = add nsw i32 %2, 1 + store i32 %inc, ptr %i, align 4 + br label %while.cond, !llvm.loop !0 + +while.end: ; preds = %while.cond + %3 = load i32, ptr %retval, align 4 + ret i32 %3 +} + declare void @bar() declare void @var() diff --git a/llvm/test/CodeGen/Xtensa/load.ll b/llvm/test/CodeGen/Xtensa/load.ll new file mode 100644 index 0000000000000..2f730f56eb1f5 --- /dev/null +++ b/llvm/test/CodeGen/Xtensa/load.ll @@ -0,0 +1,12 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc --mtriple=xtensa < %s | FileCheck %s + +define signext i8 @test_load_i8(ptr %p){ +; CHECK-LABEL: test_load_i8: +; CHECK: l8ui a8, a2, 0 +; CHECK-NEXT: slli a8, a8, 24 +; CHECK-NEXT: srai a2, a8, 24 +; CHECK-NEXT: ret + %1 = load i8, ptr %p, align 1 + ret i8 %1 +} diff --git a/llvm/test/Instrumentation/RealtimeSanitizer/rtsan_nosanitize.ll b/llvm/test/Instrumentation/RealtimeSanitizer/rtsan_nosanitize.ll new file mode 100644 index 0000000000000..5a465df749da6 --- /dev/null +++ b/llvm/test/Instrumentation/RealtimeSanitizer/rtsan_nosanitize.ll @@ -0,0 +1,25 @@ +; RUN: opt < %s -passes=rtsan -S | FileCheck %s + +define void @nosanitized_function() #0 { + %1 = alloca ptr, align 8 + %2 = call ptr @malloc(i64 noundef 2) #3 + store ptr %2, ptr %1, align 8 + ret void +} + +declare ptr @malloc(i64 noundef) #1 + +define noundef i32 @main() #2 { + %1 = alloca i32, align 4 + store i32 0, ptr %1, align 4 + call void @nosanitized_function() #4 + ret i32 0 +} + +attributes #0 = { nosanitize_realtime } + +; CHECK-LABEL: @nosanitized_function() +; CHECK-NEXT: call{{.*}}@__rtsan_off + +; CHECK: call{{.*}}@__rtsan_on +; CHECK-NEXT: ret{{.*}}void diff --git a/llvm/test/Transforms/ArgumentPromotion/actual-arguments.ll b/llvm/test/Transforms/ArgumentPromotion/actual-arguments.ll new file mode 100644 index 0000000000000..63366ba998c7b --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/actual-arguments.ll @@ -0,0 +1,231 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes +; RUN: opt -S -passes=argpromotion < %s | FileCheck %s + +; In the following tests, the call to @callee may invalidate ptr %test_c and so +; prohibit removing loads of %test_c following the call, preventing Argument +; Promotion of %test_c in the general case. + +; This is called by @caller_ptr_args, from which we cannot prove anything about +; whether %test_c may alias %p and so we cannot promote %test_c. +; +define internal i32 @test_cannot_promote_1(ptr %p, ptr nocapture readonly %test_c) { +; CHECK-LABEL: define {{[^@]+}}@test_cannot_promote_1 +; CHECK-SAME: (ptr [[P:%.*]], ptr nocapture readonly [[TEST_C:%.*]]) { +; CHECK-NEXT: [[TEST_C_VAL:%.*]] = load i32, ptr [[TEST_C]], align 4 +; CHECK-NEXT: [[RES:%.*]] = call i32 @callee(ptr [[P]], i32 [[TEST_C_VAL]]) +; CHECK-NEXT: [[LTEST_C:%.*]] = load i32, ptr [[TEST_C]], align 4 +; CHECK-NEXT: [[SUM:%.*]] = add i32 [[LTEST_C]], [[RES]] +; CHECK-NEXT: ret i32 [[SUM]] +; + %res = call i32 @callee(ptr %p, ptr %test_c) + + %ltest_c = load i32, ptr %test_c + + %sum = add i32 %ltest_c, %res + + ret i32 %sum +} + +; This is called by @caller_aliased_args, from which we can see that %test_c may +; alias %p and so we cannot promote %test_c. +; +define internal i32 @test_cannot_promote_2(ptr %p, ptr nocapture readonly %test_c) { +; CHECK-LABEL: define {{[^@]+}}@test_cannot_promote_2 +; CHECK-SAME: (ptr [[P:%.*]], ptr nocapture readonly [[TEST_C:%.*]]) { +; CHECK-NEXT: [[TEST_C_VAL:%.*]] = load i32, ptr [[TEST_C]], align 4 +; CHECK-NEXT: [[RES:%.*]] = call i32 @callee(ptr [[P]], i32 [[TEST_C_VAL]]) +; CHECK-NEXT: [[LTEST_C:%.*]] = load i32, ptr [[TEST_C]], align 4 +; CHECK-NEXT: [[SUM:%.*]] = add i32 [[LTEST_C]], [[RES]] +; CHECK-NEXT: ret i32 [[SUM]] +; + %res = call i32 @callee(ptr %p, ptr %test_c) + + %ltest_c = load i32, ptr %test_c + + %sum = add i32 %ltest_c, %res + + ret i32 %sum +} + +; This is called by @caller_safe_args_1, but also from @caller_aliased_args, so +; we cannot promote %test_c. +; +define internal i32 @test_cannot_promote_3(ptr %p, ptr nocapture readonly %test_c) { +; CHECK-LABEL: define {{[^@]+}}@test_cannot_promote_3 +; CHECK-SAME: (ptr [[P:%.*]], ptr nocapture readonly [[TEST_C:%.*]]) { +; CHECK-NEXT: [[TEST_C_VAL:%.*]] = load i32, ptr [[TEST_C]], align 4 +; CHECK-NEXT: [[RES:%.*]] = call i32 @callee(ptr [[P]], i32 [[TEST_C_VAL]]) +; CHECK-NEXT: [[LTEST_C:%.*]] = load i32, ptr [[TEST_C]], align 4 +; CHECK-NEXT: [[SUM:%.*]] = add i32 [[LTEST_C]], [[RES]] +; CHECK-NEXT: ret i32 [[SUM]] +; + %res = call i32 @callee(ptr %p, ptr %test_c) + + %ltest_c = load i32, ptr %test_c + + %sum = add i32 %ltest_c, %res + + ret i32 %sum +} + +; FIXME: We should perform ArgPromotion here! +; +; This is called only by @caller_safe_args_1, from which we can prove that +; %test_c does not alias %p for any Call to the function, so we can promote it. +; +define internal i32 @test_can_promote_1(ptr %p, ptr nocapture readonly %test_c) { +; CHECK-LABEL: define {{[^@]+}}@test_can_promote_1 +; CHECK-SAME: (ptr [[P:%.*]], ptr nocapture readonly [[TEST_C:%.*]]) { +; CHECK-NEXT: [[TEST_C_VAL:%.*]] = load i32, ptr [[TEST_C]], align 4 +; CHECK-NEXT: [[RES:%.*]] = call i32 @callee(ptr [[P]], i32 [[TEST_C_VAL]]) +; CHECK-NEXT: [[LTEST_C:%.*]] = load i32, ptr [[TEST_C]], align 4 +; CHECK-NEXT: [[SUM:%.*]] = add i32 [[LTEST_C]], [[RES]] +; CHECK-NEXT: ret i32 [[SUM]] +; + %res = call i32 @callee(ptr %p, ptr %test_c) + + %ltest_c = load i32, ptr %test_c + + %sum = add i32 %ltest_c, %res + + ret i32 %sum +} + +; FIXME: We should perform ArgPromotion here! +; +; This is called by multiple callers (@caller_safe_args_1, @caller_safe_args_2), +; from which we can prove that %test_c does not alias %p for any Call to the +; function, so we can promote it. +; +define internal i32 @test_can_promote_2(ptr %p, ptr nocapture readonly %test_c) { +; CHECK-LABEL: define {{[^@]+}}@test_can_promote_2 +; CHECK-SAME: (ptr [[P:%.*]], ptr nocapture readonly [[TEST_C:%.*]]) { +; CHECK-NEXT: [[TEST_C_VAL:%.*]] = load i32, ptr [[TEST_C]], align 4 +; CHECK-NEXT: [[RES:%.*]] = call i32 @callee(ptr [[P]], i32 [[TEST_C_VAL]]) +; CHECK-NEXT: [[LTEST_C:%.*]] = load i32, ptr [[TEST_C]], align 4 +; CHECK-NEXT: [[SUM:%.*]] = add i32 [[LTEST_C]], [[RES]] +; CHECK-NEXT: ret i32 [[SUM]] +; + %res = call i32 @callee(ptr %p, ptr %test_c) + + %ltest_c = load i32, ptr %test_c + + %sum = add i32 %ltest_c, %res + + ret i32 %sum +} + +; Called by @test_XXX +define internal i32 @callee(ptr %p, ptr nocapture readonly %callee_c) { +; CHECK-LABEL: define {{[^@]+}}@callee +; CHECK-SAME: (ptr [[P:%.*]], i32 [[CALLEE_C_0_VAL:%.*]]) { +; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[P]], align 4 +; CHECK-NEXT: [[SUM:%.*]] = add i32 [[A]], [[CALLEE_C_0_VAL]] +; CHECK-NEXT: store i32 [[SUM]], ptr [[P]], align 4 +; CHECK-NEXT: ret i32 [[SUM]] +; + %a = load i32, ptr %p + + %lcallee_c = load i32, ptr %callee_c + + %sum = add i32 %a, %lcallee_c + + store i32 %sum, ptr %p + + ret i32 %sum +} + +; Calls @test_cannot_promote_1 +define i32 @caller_ptr_args(i64 %n, ptr %p1, ptr %p2) { +; CHECK-LABEL: define {{[^@]+}}@caller_ptr_args +; CHECK-SAME: (i64 [[N:%.*]], ptr [[P1:%.*]], ptr [[P2:%.*]]) { +; CHECK-NEXT: call void @memset(ptr [[P1]], i64 0, i64 [[N]]) +; CHECK-NEXT: store i32 5, ptr [[P2]], align 4 +; CHECK-NEXT: [[RES:%.*]] = call i32 @test_cannot_promote_1(ptr [[P1]], ptr [[P2]]) +; CHECK-NEXT: ret i32 [[RES]] +; + call void @memset(ptr %p1, i64 0, i64 %n) + + store i32 5, ptr %p2 + + %res = call i32 @test_cannot_promote_1(ptr %p1, ptr %p2) + + ret i32 %res +} + +; Calls @test_cannot_promote_2 +; Calls @test_cannot_promote_3 +define i32 @caller_aliased_args() { +; CHECK-LABEL: define {{[^@]+}}@caller_aliased_args() { +; CHECK-NEXT: [[CALLER_C:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store i32 5, ptr [[CALLER_C]], align 4 +; CHECK-NEXT: [[RES1:%.*]] = call i32 @test_cannot_promote_2(ptr [[CALLER_C]], ptr [[CALLER_C]]) +; CHECK-NEXT: [[RES2:%.*]] = call i32 @test_cannot_promote_3(ptr [[CALLER_C]], ptr [[CALLER_C]]) +; CHECK-NEXT: [[RES:%.*]] = add i32 [[RES1]], [[RES2]] +; CHECK-NEXT: ret i32 [[RES]] +; + %caller_c = alloca i32 + store i32 5, ptr %caller_c + + %res1 = call i32 @test_cannot_promote_2(ptr %caller_c, ptr %caller_c) + %res2 = call i32 @test_cannot_promote_3(ptr %caller_c, ptr %caller_c) + + %res = add i32 %res1, %res2 + + ret i32 %res +} + +; Calls @test_cannot_promote_3 +; Calls @test_can_promote_1 +; Calls @test_can_promote_2 +define i32 @caller_safe_args_1(i64 %n) { +; CHECK-LABEL: define {{[^@]+}}@caller_safe_args_1 +; CHECK-SAME: (i64 [[N:%.*]]) { +; CHECK-NEXT: [[P:%.*]] = alloca [5 x double], i64 [[N]], align 8 +; CHECK-NEXT: call void @memset(ptr [[P]], i64 0, i64 [[N]]) +; CHECK-NEXT: [[CALLER_C:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store i32 5, ptr [[CALLER_C]], align 4 +; CHECK-NEXT: [[RES1:%.*]] = call i32 @test_cannot_promote_3(ptr [[P]], ptr [[CALLER_C]]) +; CHECK-NEXT: [[RES2:%.*]] = call i32 @test_can_promote_1(ptr [[P]], ptr [[CALLER_C]]) +; CHECK-NEXT: [[RES3:%.*]] = call i32 @test_can_promote_2(ptr [[P]], ptr [[CALLER_C]]) +; CHECK-NEXT: [[RES12:%.*]] = add i32 [[RES1]], [[RES2]] +; CHECK-NEXT: [[RES:%.*]] = add i32 [[RES12]], [[RES3]] +; CHECK-NEXT: ret i32 [[RES]] +; + %p = alloca [5 x double], i64 %n + call void @memset(ptr %p, i64 0, i64 %n) + + %caller_c = alloca i32 + store i32 5, ptr %caller_c + + %res1 = call i32 @test_cannot_promote_3(ptr %p, ptr %caller_c) + %res2 = call i32 @test_can_promote_1(ptr %p, ptr %caller_c) + %res3 = call i32 @test_can_promote_2(ptr %p, ptr %caller_c) + + %res12 = add i32 %res1, %res2 + %res = add i32 %res12, %res3 + + ret i32 %res +} + +; Calls @test_can_promote_2 +define i32 @caller_safe_args_2(i64 %n, ptr %p) { +; CHECK-LABEL: define {{[^@]+}}@caller_safe_args_2 +; CHECK-SAME: (i64 [[N:%.*]], ptr [[P:%.*]]) { +; CHECK-NEXT: call void @memset(ptr [[P]], i64 0, i64 [[N]]) +; CHECK-NEXT: [[CALLER_C:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store i32 5, ptr [[CALLER_C]], align 4 +; CHECK-NEXT: [[RES:%.*]] = call i32 @test_can_promote_2(ptr [[P]], ptr [[CALLER_C]]) +; CHECK-NEXT: ret i32 [[RES]] +; + call void @memset(ptr %p, i64 0, i64 %n) + + %caller_c = alloca i32 + store i32 5, ptr %caller_c + + %res = call i32 @test_can_promote_2(ptr %p, ptr %caller_c) + + ret i32 %res +} + +declare void @memset(ptr, i64, i64) diff --git a/llvm/test/Transforms/Coroutines/coro-pgo-setbranchweights.ll b/llvm/test/Transforms/Coroutines/coro-pgo-setbranchweights.ll new file mode 100644 index 0000000000000..4f5f936606ca3 --- /dev/null +++ b/llvm/test/Transforms/Coroutines/coro-pgo-setbranchweights.ll @@ -0,0 +1,42 @@ +; RUN: rm -rf %t && split-file %s %t + +; RUN: llvm-profdata merge %t/a.proftext -o %t/a.profdata +; RUN: opt < %t/a.ll --passes=pgo-instr-use -pgo-test-profile-file=%t/a.profdata + +;--- a.ll +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-redhat-linux-gnu" + +define void @_bar() presplitcoroutine personality ptr null { + %1 = call token @llvm.coro.save(ptr null) + %2 = call i8 @llvm.coro.suspend(token none, i1 false) + switch i8 %2, label %5 [ + i8 0, label %3 + i8 1, label %4 + ] + +3: ; preds = %0 + ret void + +4: ; preds = %0 + ret void + +5: ; preds = %0 + ret void +} + +declare token @llvm.coro.save(ptr) + +declare i8 @llvm.coro.suspend(token, i1) + +;--- a.proftext +# IR level Instrumentation Flag +:ir + +_bar +# Func Hash: +1063705160175073211 +# Num Counters: +2 +1 +0 diff --git a/llvm/test/Transforms/Inline/ML/lit.local.cfg b/llvm/test/Transforms/Inline/ML/lit.local.cfg deleted file mode 100644 index e8c7912650cb8..0000000000000 --- a/llvm/test/Transforms/Inline/ML/lit.local.cfg +++ /dev/null @@ -1,3 +0,0 @@ -import sys - -config.unsupported = sys.version_info.minor <= 8 diff --git a/llvm/test/Transforms/InstCombine/memchr-7.ll b/llvm/test/Transforms/InstCombine/memchr-7.ll index 0b364cce656d7..61f1093279f83 100644 --- a/llvm/test/Transforms/InstCombine/memchr-7.ll +++ b/llvm/test/Transforms/InstCombine/memchr-7.ll @@ -12,11 +12,12 @@ declare ptr @memchr(ptr, i32, i64) define zeroext i1 @strchr_to_memchr_n_equals_len(i32 %c) { ; CHECK-LABEL: @strchr_to_memchr_n_equals_len( -; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[C:%.*]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[C]], -97 -; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i32 [[TMP2]], 26 -; CHECK-NEXT: [[TMP4:%.*]] = or i1 [[TMP1]], [[TMP3]] -; CHECK-NEXT: ret i1 [[TMP4]] +; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[C:%.*]] to i8 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i8 [[TMP1]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = add i8 [[TMP1]], -97 +; CHECK-NEXT: [[TMP4:%.*]] = icmp ult i8 [[TMP3]], 26 +; CHECK-NEXT: [[TMP5:%.*]] = or i1 [[TMP2]], [[TMP4]] +; CHECK-NEXT: ret i1 [[TMP5]] ; %call = tail call ptr @strchr(ptr nonnull dereferenceable(27) @.str, i32 %c) %cmp = icmp ne ptr %call, null @@ -38,9 +39,10 @@ define zeroext i1 @memchr_n_equals_len(i32 %c) { define zeroext i1 @memchr_n_less_than_len(i32 %c) { ; CHECK-LABEL: @memchr_n_less_than_len( -; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[C:%.*]], -97 -; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[TMP1]], 15 -; CHECK-NEXT: ret i1 [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[C:%.*]] to i8 +; CHECK-NEXT: [[TMP2:%.*]] = add i8 [[TMP1]], -97 +; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i8 [[TMP2]], 15 +; CHECK-NEXT: ret i1 [[TMP3]] ; %call = tail call ptr @memchr(ptr @.str, i32 %c, i64 15) %cmp = icmp ne ptr %call, null @@ -50,11 +52,12 @@ define zeroext i1 @memchr_n_less_than_len(i32 %c) { define zeroext i1 @memchr_n_more_than_len(i32 %c) { ; CHECK-LABEL: @memchr_n_more_than_len( -; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[C:%.*]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[C]], -97 -; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i32 [[TMP2]], 26 -; CHECK-NEXT: [[TMP4:%.*]] = or i1 [[TMP1]], [[TMP3]] -; CHECK-NEXT: ret i1 [[TMP4]] +; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[C:%.*]] to i8 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i8 [[TMP1]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = add i8 [[TMP1]], -97 +; CHECK-NEXT: [[TMP4:%.*]] = icmp ult i8 [[TMP3]], 26 +; CHECK-NEXT: [[TMP5:%.*]] = or i1 [[TMP2]], [[TMP4]] +; CHECK-NEXT: ret i1 [[TMP5]] ; %call = tail call ptr @memchr(ptr @.str, i32 %c, i64 30) %cmp = icmp ne ptr %call, null @@ -114,12 +117,13 @@ define zeroext i1 @memchr_n_equals_len2_minsize(i32 %c) minsize { ; Positive test - 2 non-contiguous ranges define zeroext i1 @strchr_to_memchr_2_non_cont_ranges(i32 %c) { ; CHECK-LABEL: @strchr_to_memchr_2_non_cont_ranges( -; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[C:%.*]], -97 -; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[TMP1]], 6 -; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[C]], -109 -; CHECK-NEXT: [[TMP4:%.*]] = icmp ult i32 [[TMP3]], 3 -; CHECK-NEXT: [[TMP5:%.*]] = or i1 [[TMP2]], [[TMP4]] -; CHECK-NEXT: ret i1 [[TMP5]] +; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[C:%.*]] to i8 +; CHECK-NEXT: [[TMP2:%.*]] = add i8 [[TMP1]], -97 +; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i8 [[TMP2]], 6 +; CHECK-NEXT: [[TMP4:%.*]] = add i8 [[TMP1]], -109 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i8 [[TMP4]], 3 +; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[TMP3]], [[TMP5]] +; CHECK-NEXT: ret i1 [[TMP6]] ; %call = tail call ptr @memchr(ptr @.str.2, i32 %c, i64 9) %cmp = icmp ne ptr %call, null @@ -129,12 +133,13 @@ define zeroext i1 @strchr_to_memchr_2_non_cont_ranges(i32 %c) { ; Positive test - 2 non-contiguous ranges with char duplication define zeroext i1 @strchr_to_memchr_2_non_cont_ranges_char_dup(i32 %c) { ; CHECK-LABEL: @strchr_to_memchr_2_non_cont_ranges_char_dup( -; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[C:%.*]], -97 -; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[TMP1]], 3 -; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[C]], -109 -; CHECK-NEXT: [[TMP4:%.*]] = icmp ult i32 [[TMP3]], 2 -; CHECK-NEXT: [[TMP5:%.*]] = or i1 [[TMP2]], [[TMP4]] -; CHECK-NEXT: ret i1 [[TMP5]] +; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[C:%.*]] to i8 +; CHECK-NEXT: [[TMP2:%.*]] = add i8 [[TMP1]], -97 +; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i8 [[TMP2]], 3 +; CHECK-NEXT: [[TMP4:%.*]] = add i8 [[TMP1]], -109 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i8 [[TMP4]], 2 +; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[TMP3]], [[TMP5]] +; CHECK-NEXT: ret i1 [[TMP6]] ; %call = tail call ptr @memchr(ptr @.str.4, i32 %c, i64 6) %cmp = icmp ne ptr %call, null diff --git a/llvm/test/Transforms/LoopVectorize/global_alias.ll b/llvm/test/Transforms/LoopVectorize/global_alias.ll index e0f12fb3b0279..336e462a4cf63 100644 --- a/llvm/test/Transforms/LoopVectorize/global_alias.ll +++ b/llvm/test/Transforms/LoopVectorize/global_alias.ll @@ -503,7 +503,7 @@ for.end: ; preds = %for.body ; return Foo.A[a]; ; } ; CHECK-LABEL: define i32 @mayAlias01( -; CHECK: add nsw <4 x i32> +; CHECK-NOT: add nsw <4 x i32> ; CHECK: ret define i32 @mayAlias01(i32 %a) nounwind { @@ -536,7 +536,7 @@ for.end: ; preds = %for.body ; return Foo.A[a]; ; } ; CHECK-LABEL: define i32 @mayAlias02( -; CHECK: add nsw <4 x i32> +; CHECK-NOT: add nsw <4 x i32> ; CHECK: ret define i32 @mayAlias02(i32 %a) nounwind { diff --git a/llvm/test/Transforms/SCCP/pointer-nonnull.ll b/llvm/test/Transforms/SCCP/pointer-nonnull.ll new file mode 100644 index 0000000000000..cd04c1c2d39d9 --- /dev/null +++ b/llvm/test/Transforms/SCCP/pointer-nonnull.ll @@ -0,0 +1,206 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S -passes=sccp < %s | FileCheck %s + +declare ptr @get() + +define i1 @test_no_attr(ptr %p) { +; CHECK-LABEL: define i1 @test_no_attr( +; CHECK-SAME: ptr [[P:%.*]]) { +; CHECK-NEXT: [[CMP:%.*]] = icmp ne ptr [[P]], null +; CHECK-NEXT: ret i1 [[CMP]] +; + %cmp = icmp ne ptr %p, null + ret i1 %cmp +} + +define i1 @test_nonnull(ptr nonnull %p) { +; CHECK-LABEL: define i1 @test_nonnull( +; CHECK-SAME: ptr nonnull [[P:%.*]]) { +; CHECK-NEXT: ret i1 true +; + %cmp = icmp ne ptr %p, null + ret i1 %cmp +} + +define i1 @test_nonnull_eq(ptr nonnull %p) { +; CHECK-LABEL: define i1 @test_nonnull_eq( +; CHECK-SAME: ptr nonnull [[P:%.*]]) { +; CHECK-NEXT: ret i1 false +; + %cmp = icmp eq ptr %p, null + ret i1 %cmp +} + +define i1 @test_dereferenceable(ptr dereferenceable(4) %p) { +; CHECK-LABEL: define i1 @test_dereferenceable( +; CHECK-SAME: ptr dereferenceable(4) [[P:%.*]]) { +; CHECK-NEXT: ret i1 true +; + %cmp = icmp ne ptr %p, null + ret i1 %cmp +} + +define i1 @test_alloca() { +; CHECK-LABEL: define i1 @test_alloca() { +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: ret i1 true +; + %a = alloca i32 + %cmp = icmp ne ptr %a, null + ret i1 %cmp +} + +define i1 @test_alloca_addrspace() { +; CHECK-LABEL: define i1 @test_alloca_addrspace() { +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4, addrspace(1) +; CHECK-NEXT: [[CMP:%.*]] = icmp ne ptr addrspace(1) [[A]], null +; CHECK-NEXT: ret i1 [[CMP]] +; + %a = alloca i32, addrspace(1) + %cmp = icmp ne ptr addrspace(1) %a, null + ret i1 %cmp +} + +define i1 @test_alloca_null_pointer_valid() null_pointer_is_valid { +; CHECK-LABEL: define i1 @test_alloca_null_pointer_valid( +; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[CMP:%.*]] = icmp ne ptr [[A]], null +; CHECK-NEXT: ret i1 [[CMP]] +; + %a = alloca i32 + %cmp = icmp ne ptr %a, null + ret i1 %cmp +} + +define i1 @test_load_nonnull(ptr %p) { +; CHECK-LABEL: define i1 @test_load_nonnull( +; CHECK-SAME: ptr [[P:%.*]]) { +; CHECK-NEXT: [[P2:%.*]] = load ptr, ptr [[P]], align 8, !nonnull [[META0:![0-9]+]] +; CHECK-NEXT: ret i1 true +; + %p2 = load ptr, ptr %p, !nonnull !{} + %cmp = icmp ne ptr %p2, null + ret i1 %cmp +} + +define i1 @test_call_nonnull() { +; CHECK-LABEL: define i1 @test_call_nonnull() { +; CHECK-NEXT: [[P:%.*]] = call nonnull ptr @get() +; CHECK-NEXT: ret i1 true +; + %p = call nonnull ptr @get() + %cmp = icmp ne ptr %p, null + ret i1 %cmp +} + +define i1 @test_call_dereferenceable() { +; CHECK-LABEL: define i1 @test_call_dereferenceable() { +; CHECK-NEXT: [[P:%.*]] = call dereferenceable(4) ptr @get() +; CHECK-NEXT: ret i1 true +; + %p = call dereferenceable(4) ptr @get() + %cmp = icmp ne ptr %p, null + ret i1 %cmp +} + +define i1 @test_gep_no_flags(ptr nonnull %p, i64 %x) { +; CHECK-LABEL: define i1 @test_gep_no_flags( +; CHECK-SAME: ptr nonnull [[P:%.*]], i64 [[X:%.*]]) { +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[P]], i64 [[X]] +; CHECK-NEXT: [[CMP:%.*]] = icmp ne ptr [[GEP]], null +; CHECK-NEXT: ret i1 [[CMP]] +; + %gep = getelementptr i8, ptr %p, i64 %x + %cmp = icmp ne ptr %gep, null + ret i1 %cmp +} + +define i1 @test_gep_nuw(ptr nonnull %p, i64 %x) { +; CHECK-LABEL: define i1 @test_gep_nuw( +; CHECK-SAME: ptr nonnull [[P:%.*]], i64 [[X:%.*]]) { +; CHECK-NEXT: [[GEP:%.*]] = getelementptr nuw i8, ptr [[P]], i64 [[X]] +; CHECK-NEXT: ret i1 true +; + %gep = getelementptr nuw i8, ptr %p, i64 %x + %cmp = icmp ne ptr %gep, null + ret i1 %cmp +} + +define i1 @test_gep_inbounds(ptr nonnull %p, i64 %x) { +; CHECK-LABEL: define i1 @test_gep_inbounds( +; CHECK-SAME: ptr nonnull [[P:%.*]], i64 [[X:%.*]]) { +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[X]] +; CHECK-NEXT: ret i1 true +; + %gep = getelementptr inbounds i8, ptr %p, i64 %x + %cmp = icmp ne ptr %gep, null + ret i1 %cmp +} + +define i1 @test_gep_inbounds_null_pointer_valid(ptr nonnull %p, i64 %x) null_pointer_is_valid { +; CHECK-LABEL: define i1 @test_gep_inbounds_null_pointer_valid( +; CHECK-SAME: ptr nonnull [[P:%.*]], i64 [[X:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[X]] +; CHECK-NEXT: [[CMP:%.*]] = icmp ne ptr [[GEP]], null +; CHECK-NEXT: ret i1 [[CMP]] +; + %gep = getelementptr inbounds i8, ptr %p, i64 %x + %cmp = icmp ne ptr %gep, null + ret i1 %cmp +} + +define i1 @test_select(i1 %c, ptr nonnull %p, i64 %x) { +; CHECK-LABEL: define i1 @test_select( +; CHECK-SAME: i1 [[C:%.*]], ptr nonnull [[P:%.*]], i64 [[X:%.*]]) { +; CHECK-NEXT: [[GEP:%.*]] = getelementptr nuw i8, ptr [[P]], i64 [[X]] +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[C]], ptr [[P]], ptr [[GEP]] +; CHECK-NEXT: ret i1 true +; + %gep = getelementptr nuw i8, ptr %p, i64 %x + %sel = select i1 %c, ptr %p, ptr %gep + %cmp = icmp ne ptr %sel, null + ret i1 %cmp +} + +define i1 @test_select_not_nuw(i1 %c, ptr nonnull %p, i64 %x) { +; CHECK-LABEL: define i1 @test_select_not_nuw( +; CHECK-SAME: i1 [[C:%.*]], ptr nonnull [[P:%.*]], i64 [[X:%.*]]) { +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[P]], i64 [[X]] +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[C]], ptr [[P]], ptr [[GEP]] +; CHECK-NEXT: [[CMP:%.*]] = icmp ne ptr [[SEL]], null +; CHECK-NEXT: ret i1 [[CMP]] +; + %gep = getelementptr i8, ptr %p, i64 %x + %sel = select i1 %c, ptr %p, ptr %gep + %cmp = icmp ne ptr %sel, null + ret i1 %cmp +} + +define i1 @test_phi(i1 %c, ptr nonnull %p, i64 %x) { +; CHECK-LABEL: define i1 @test_phi( +; CHECK-SAME: i1 [[C:%.*]], ptr nonnull [[P:%.*]], i64 [[X:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br i1 [[C]], label %[[IF:.*]], label %[[JOIN:.*]] +; CHECK: [[IF]]: +; CHECK-NEXT: [[GEP:%.*]] = getelementptr nuw i8, ptr [[P]], i64 [[X]] +; CHECK-NEXT: br label %[[JOIN]] +; CHECK: [[JOIN]]: +; CHECK-NEXT: [[PHI:%.*]] = phi ptr [ [[P]], %[[ENTRY]] ], [ [[GEP]], %[[IF]] ] +; CHECK-NEXT: ret i1 true +; +entry: + br i1 %c, label %if, label %join + +if: + %gep = getelementptr nuw i8, ptr %p, i64 %x + br label %join + +join: + %phi = phi ptr [ %p, %entry ], [ %gep, %if ] + %cmp = icmp ne ptr %phi, null + ret i1 %cmp +} +;. +; CHECK: [[META0]] = !{} +;. diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/long-mask-split.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/long-mask-split.ll new file mode 100644 index 0000000000000..cb07090c601f6 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/long-mask-split.ll @@ -0,0 +1,438 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S -mtriple=riscv64-unknown-linux-gnu -mattr=+v < %s | FileCheck %s + +define i32 @test() { +; CHECK-LABEL: define i32 @test( +; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr getelementptr (i8, ptr null, i64 16), align 8 +; CHECK-NEXT: [[SHR_1_I:%.*]] = lshr i64 0, 0 +; CHECK-NEXT: [[SHR_1_I_13:%.*]] = lshr i64 0, [[TMP0]] +; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[SHR_1_I_13]] to i8 +; CHECK-NEXT: [[STOREDV_1_I_13:%.*]] = and i8 0, [[TMP1]] +; CHECK-NEXT: [[SHR_1_I_14:%.*]] = lshr i64 0, [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[SHR_1_I_14]] to i8 +; CHECK-NEXT: [[STOREDV_1_I_14:%.*]] = and i8 [[STOREDV_1_I_13]], [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr getelementptr (i8, ptr null, i64 32), align 8 +; CHECK-NEXT: [[SHR_2_I:%.*]] = lshr i64 0, [[TMP3]] +; CHECK-NEXT: [[TMP4:%.*]] = trunc i64 [[SHR_2_I]] to i8 +; CHECK-NEXT: [[STOREDV_2_I:%.*]] = and i8 [[STOREDV_1_I_14]], [[TMP4]] +; CHECK-NEXT: [[SHR_2_I_1:%.*]] = lshr i64 0, [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = trunc i64 [[SHR_2_I_1]] to i8 +; CHECK-NEXT: [[STOREDV_2_I_1:%.*]] = and i8 [[STOREDV_2_I]], [[TMP5]] +; CHECK-NEXT: [[SHR_2_I_2:%.*]] = lshr i64 0, [[TMP3]] +; CHECK-NEXT: [[TMP6:%.*]] = trunc i64 [[SHR_2_I_2]] to i8 +; CHECK-NEXT: [[STOREDV_2_I_2:%.*]] = and i8 [[STOREDV_2_I_1]], [[TMP6]] +; CHECK-NEXT: [[SHR_2_I_3:%.*]] = lshr i64 0, [[TMP3]] +; CHECK-NEXT: [[TMP7:%.*]] = trunc i64 [[SHR_2_I_3]] to i8 +; CHECK-NEXT: [[STOREDV_2_I_3:%.*]] = and i8 [[STOREDV_2_I_2]], [[TMP7]] +; CHECK-NEXT: [[SHR_2_I_4:%.*]] = lshr i64 0, [[TMP3]] +; CHECK-NEXT: [[TMP8:%.*]] = trunc i64 [[SHR_2_I_4]] to i8 +; CHECK-NEXT: [[STOREDV_2_I_4:%.*]] = and i8 [[STOREDV_2_I_3]], [[TMP8]] +; CHECK-NEXT: [[SHR_2_I_5:%.*]] = lshr i64 0, [[TMP3]] +; CHECK-NEXT: [[TMP9:%.*]] = trunc i64 [[SHR_2_I_5]] to i8 +; CHECK-NEXT: [[STOREDV_2_I_5:%.*]] = and i8 [[STOREDV_2_I_4]], [[TMP9]] +; CHECK-NEXT: [[SHR_2_I_6:%.*]] = lshr i64 0, [[TMP3]] +; CHECK-NEXT: [[TMP10:%.*]] = trunc i64 [[SHR_2_I_6]] to i8 +; CHECK-NEXT: [[STOREDV_2_I_6:%.*]] = and i8 [[STOREDV_2_I_5]], [[TMP10]] +; CHECK-NEXT: [[SHR_2_I_7:%.*]] = lshr i64 0, [[TMP3]] +; CHECK-NEXT: [[TMP11:%.*]] = trunc i64 [[SHR_2_I_7]] to i8 +; CHECK-NEXT: [[STOREDV_2_I_7:%.*]] = and i8 [[STOREDV_2_I_6]], [[TMP11]] +; CHECK-NEXT: [[STOREDV_2_I_8:%.*]] = and i8 [[STOREDV_2_I_7]], 0 +; CHECK-NEXT: [[SHR_2_I_9:%.*]] = lshr i64 0, [[TMP3]] +; CHECK-NEXT: [[TMP12:%.*]] = trunc i64 [[SHR_2_I_9]] to i8 +; CHECK-NEXT: [[STOREDV_2_I_9:%.*]] = and i8 [[STOREDV_2_I_8]], [[TMP12]] +; CHECK-NEXT: [[SHR_2_I_10:%.*]] = lshr i64 0, [[TMP3]] +; CHECK-NEXT: [[TMP13:%.*]] = trunc i64 [[SHR_2_I_10]] to i8 +; CHECK-NEXT: [[STOREDV_2_I_10:%.*]] = and i8 [[STOREDV_2_I_9]], [[TMP13]] +; CHECK-NEXT: [[SHR_2_I_11:%.*]] = lshr i64 0, [[TMP3]] +; CHECK-NEXT: [[TMP14:%.*]] = trunc i64 [[SHR_2_I_11]] to i8 +; CHECK-NEXT: [[STOREDV_2_I_11:%.*]] = and i8 [[STOREDV_2_I_10]], [[TMP14]] +; CHECK-NEXT: [[SHR_2_I_12:%.*]] = lshr i64 0, [[TMP3]] +; CHECK-NEXT: [[TMP15:%.*]] = trunc i64 [[SHR_2_I_12]] to i8 +; CHECK-NEXT: [[STOREDV_2_I_12:%.*]] = and i8 [[STOREDV_2_I_11]], [[TMP15]] +; CHECK-NEXT: [[SHR_2_I_13:%.*]] = lshr i64 0, [[TMP3]] +; CHECK-NEXT: [[TMP16:%.*]] = trunc i64 [[SHR_2_I_13]] to i8 +; CHECK-NEXT: [[STOREDV_2_I_13:%.*]] = and i8 [[STOREDV_2_I_12]], [[TMP16]] +; CHECK-NEXT: [[STOREDV_2_I_14:%.*]] = and i8 [[STOREDV_2_I_13]], 0 +; CHECK-NEXT: [[TMP17:%.*]] = load i64, ptr getelementptr (i8, ptr null, i64 48), align 8 +; CHECK-NEXT: [[SHR_3_I:%.*]] = lshr i64 0, [[TMP17]] +; CHECK-NEXT: [[TMP18:%.*]] = trunc i64 [[SHR_3_I]] to i8 +; CHECK-NEXT: [[STOREDV_3_I:%.*]] = and i8 [[STOREDV_2_I_14]], [[TMP18]] +; CHECK-NEXT: [[SHR_3_I_1:%.*]] = lshr i64 0, [[TMP17]] +; CHECK-NEXT: [[TMP19:%.*]] = trunc i64 [[SHR_3_I_1]] to i8 +; CHECK-NEXT: [[STOREDV_3_I_1:%.*]] = and i8 [[STOREDV_3_I]], [[TMP19]] +; CHECK-NEXT: [[STOREDV_3_I_2:%.*]] = and i8 [[STOREDV_3_I_1]], 0 +; CHECK-NEXT: [[SHR_3_I_3:%.*]] = lshr i64 0, [[TMP17]] +; CHECK-NEXT: [[TMP20:%.*]] = trunc i64 [[SHR_3_I_3]] to i8 +; CHECK-NEXT: [[STOREDV_3_I_3:%.*]] = and i8 [[STOREDV_3_I_2]], [[TMP20]] +; CHECK-NEXT: [[SHR_3_I_4:%.*]] = lshr i64 0, [[TMP17]] +; CHECK-NEXT: [[TMP21:%.*]] = trunc i64 [[SHR_3_I_4]] to i8 +; CHECK-NEXT: [[STOREDV_3_I_4:%.*]] = and i8 [[STOREDV_3_I_3]], [[TMP21]] +; CHECK-NEXT: [[SHR_3_I_5:%.*]] = lshr i64 0, [[TMP17]] +; CHECK-NEXT: [[TMP22:%.*]] = trunc i64 [[SHR_3_I_5]] to i8 +; CHECK-NEXT: [[STOREDV_3_I_5:%.*]] = and i8 [[STOREDV_3_I_4]], [[TMP22]] +; CHECK-NEXT: [[STOREDV_3_I_6:%.*]] = and i8 [[STOREDV_3_I_5]], 0 +; CHECK-NEXT: [[SHR_3_I_7:%.*]] = lshr i64 0, [[TMP17]] +; CHECK-NEXT: [[TMP23:%.*]] = trunc i64 [[SHR_3_I_7]] to i8 +; CHECK-NEXT: [[STOREDV_3_I_7:%.*]] = and i8 [[STOREDV_3_I_6]], [[TMP23]] +; CHECK-NEXT: [[SHR_3_I_8:%.*]] = lshr i64 0, [[TMP17]] +; CHECK-NEXT: [[TMP24:%.*]] = trunc i64 [[SHR_3_I_8]] to i8 +; CHECK-NEXT: [[STOREDV_3_I_8:%.*]] = and i8 [[STOREDV_3_I_7]], [[TMP24]] +; CHECK-NEXT: [[SHR_3_I_9:%.*]] = lshr i64 0, [[TMP17]] +; CHECK-NEXT: [[TMP25:%.*]] = trunc i64 [[SHR_3_I_9]] to i8 +; CHECK-NEXT: [[STOREDV_3_I_9:%.*]] = and i8 [[STOREDV_3_I_8]], [[TMP25]] +; CHECK-NEXT: [[SHR_3_I_10:%.*]] = lshr i64 0, [[TMP17]] +; CHECK-NEXT: [[TMP26:%.*]] = trunc i64 [[SHR_3_I_10]] to i8 +; CHECK-NEXT: [[STOREDV_3_I_10:%.*]] = and i8 [[STOREDV_3_I_9]], [[TMP26]] +; CHECK-NEXT: [[SHR_3_I_11:%.*]] = lshr i64 0, [[TMP17]] +; CHECK-NEXT: [[TMP27:%.*]] = trunc i64 [[SHR_3_I_11]] to i8 +; CHECK-NEXT: [[STOREDV_3_I_11:%.*]] = and i8 [[STOREDV_3_I_10]], [[TMP27]] +; CHECK-NEXT: [[STOREDV_3_I_12:%.*]] = and i8 [[STOREDV_3_I_11]], 0 +; CHECK-NEXT: [[SHR_3_I_13:%.*]] = lshr i64 0, [[TMP17]] +; CHECK-NEXT: [[TMP28:%.*]] = trunc i64 [[SHR_3_I_13]] to i8 +; CHECK-NEXT: [[STOREDV_3_I_13:%.*]] = and i8 [[STOREDV_3_I_12]], [[TMP28]] +; CHECK-NEXT: [[SHR_3_I_14:%.*]] = lshr i64 0, [[TMP17]] +; CHECK-NEXT: [[TMP29:%.*]] = trunc i64 [[SHR_3_I_14]] to i8 +; CHECK-NEXT: [[STOREDV_3_I_14:%.*]] = and i8 [[STOREDV_3_I_13]], [[TMP29]] +; CHECK-NEXT: [[TMP30:%.*]] = load i64, ptr null, align 8 +; CHECK-NEXT: [[STOREDV_4_I:%.*]] = and i8 [[STOREDV_3_I_14]], 0 +; CHECK-NEXT: [[SHR_4_I_1:%.*]] = lshr i64 0, [[TMP30]] +; CHECK-NEXT: [[TMP31:%.*]] = trunc i64 [[SHR_4_I_1]] to i8 +; CHECK-NEXT: [[STOREDV_4_I_1:%.*]] = and i8 [[STOREDV_4_I]], [[TMP31]] +; CHECK-NEXT: [[SHR_4_I_2:%.*]] = lshr i64 0, [[TMP30]] +; CHECK-NEXT: [[TMP32:%.*]] = trunc i64 [[SHR_4_I_2]] to i8 +; CHECK-NEXT: [[STOREDV_4_I_2:%.*]] = and i8 [[STOREDV_4_I_1]], [[TMP32]] +; CHECK-NEXT: [[SHR_4_I_3:%.*]] = lshr i64 0, [[TMP30]] +; CHECK-NEXT: [[TMP33:%.*]] = trunc i64 [[SHR_4_I_3]] to i8 +; CHECK-NEXT: [[STOREDV_4_I_3:%.*]] = and i8 [[STOREDV_4_I_2]], [[TMP33]] +; CHECK-NEXT: [[SHR_4_I_4:%.*]] = lshr i64 0, [[TMP30]] +; CHECK-NEXT: [[TMP34:%.*]] = trunc i64 [[SHR_4_I_4]] to i8 +; CHECK-NEXT: [[STOREDV_4_I_4:%.*]] = and i8 [[STOREDV_4_I_3]], [[TMP34]] +; CHECK-NEXT: [[STOREDV_4_I_5:%.*]] = and i8 [[STOREDV_4_I_4]], 0 +; CHECK-NEXT: [[SHR_4_I_6:%.*]] = lshr i64 0, [[TMP30]] +; CHECK-NEXT: [[TMP35:%.*]] = trunc i64 [[SHR_4_I_6]] to i8 +; CHECK-NEXT: [[STOREDV_4_I_6:%.*]] = and i8 [[STOREDV_4_I_5]], [[TMP35]] +; CHECK-NEXT: [[SHR_4_I_7:%.*]] = lshr i64 0, [[TMP30]] +; CHECK-NEXT: [[TMP36:%.*]] = trunc i64 [[SHR_4_I_7]] to i8 +; CHECK-NEXT: [[STOREDV_4_I_7:%.*]] = and i8 [[STOREDV_4_I_6]], [[TMP36]] +; CHECK-NEXT: [[SHR_4_I_8:%.*]] = lshr i64 0, [[TMP30]] +; CHECK-NEXT: [[TMP37:%.*]] = trunc i64 [[SHR_4_I_8]] to i8 +; CHECK-NEXT: [[STOREDV_4_I_8:%.*]] = and i8 [[STOREDV_4_I_7]], [[TMP37]] +; CHECK-NEXT: [[SHR_4_I_9:%.*]] = lshr i64 0, [[TMP30]] +; CHECK-NEXT: [[TMP38:%.*]] = trunc i64 [[SHR_4_I_9]] to i8 +; CHECK-NEXT: [[STOREDV_4_I_9:%.*]] = and i8 [[STOREDV_4_I_8]], [[TMP38]] +; CHECK-NEXT: [[SHR_4_I_10:%.*]] = lshr i64 0, [[TMP30]] +; CHECK-NEXT: [[TMP39:%.*]] = trunc i64 [[SHR_4_I_10]] to i8 +; CHECK-NEXT: [[STOREDV_4_I_10:%.*]] = and i8 [[STOREDV_4_I_9]], [[TMP39]] +; CHECK-NEXT: [[STOREDV_4_I_11:%.*]] = and i8 [[STOREDV_4_I_10]], 0 +; CHECK-NEXT: [[SHR_4_I_12:%.*]] = lshr i64 0, [[TMP30]] +; CHECK-NEXT: [[TMP40:%.*]] = trunc i64 [[SHR_4_I_12]] to i8 +; CHECK-NEXT: [[STOREDV_4_I_12:%.*]] = and i8 [[STOREDV_4_I_11]], [[TMP40]] +; CHECK-NEXT: [[SHR_4_I_13:%.*]] = lshr i64 0, [[TMP30]] +; CHECK-NEXT: [[TMP41:%.*]] = trunc i64 [[SHR_4_I_13]] to i8 +; CHECK-NEXT: [[STOREDV_4_I_13:%.*]] = and i8 [[STOREDV_4_I_12]], [[TMP41]] +; CHECK-NEXT: [[SHR_4_I_14:%.*]] = lshr i64 0, [[TMP30]] +; CHECK-NEXT: [[TMP42:%.*]] = trunc i64 [[SHR_4_I_14]] to i8 +; CHECK-NEXT: [[STOREDV_4_I_14:%.*]] = and i8 [[STOREDV_4_I_13]], [[TMP42]] +; CHECK-NEXT: [[TMP43:%.*]] = load i64, ptr getelementptr (i8, ptr null, i64 80), align 8 +; CHECK-NEXT: [[SHR_5_I:%.*]] = lshr i64 0, [[TMP43]] +; CHECK-NEXT: [[TMP44:%.*]] = trunc i64 [[SHR_5_I]] to i8 +; CHECK-NEXT: [[STOREDV_5_I:%.*]] = and i8 [[STOREDV_4_I_14]], [[TMP44]] +; CHECK-NEXT: [[SHR_5_I_1:%.*]] = lshr i64 0, [[TMP43]] +; CHECK-NEXT: [[TMP45:%.*]] = trunc i64 [[SHR_5_I_1]] to i8 +; CHECK-NEXT: [[STOREDV_5_I_1:%.*]] = and i8 [[STOREDV_5_I]], [[TMP45]] +; CHECK-NEXT: [[SHR_5_I_2:%.*]] = lshr i64 0, [[TMP43]] +; CHECK-NEXT: [[TMP46:%.*]] = trunc i64 [[SHR_5_I_2]] to i8 +; CHECK-NEXT: [[STOREDV_5_I_2:%.*]] = and i8 [[STOREDV_5_I_1]], [[TMP46]] +; CHECK-NEXT: [[STOREDV_5_I_3:%.*]] = and i8 [[STOREDV_5_I_2]], 0 +; CHECK-NEXT: [[SHR_5_I_4:%.*]] = lshr i64 0, [[TMP43]] +; CHECK-NEXT: [[TMP47:%.*]] = trunc i64 [[SHR_5_I_4]] to i8 +; CHECK-NEXT: [[STOREDV_5_I_4:%.*]] = and i8 [[STOREDV_5_I_3]], [[TMP47]] +; CHECK-NEXT: [[SHR_5_I_5:%.*]] = lshr i64 0, [[TMP43]] +; CHECK-NEXT: [[TMP48:%.*]] = trunc i64 [[SHR_5_I_5]] to i8 +; CHECK-NEXT: [[STOREDV_5_I_5:%.*]] = and i8 [[STOREDV_5_I_4]], [[TMP48]] +; CHECK-NEXT: [[SHR_5_I_6:%.*]] = lshr i64 0, [[TMP43]] +; CHECK-NEXT: [[TMP49:%.*]] = trunc i64 [[SHR_5_I_6]] to i8 +; CHECK-NEXT: [[STOREDV_5_I_6:%.*]] = and i8 [[STOREDV_5_I_5]], [[TMP49]] +; CHECK-NEXT: [[SHR_5_I_7:%.*]] = lshr i64 0, [[TMP43]] +; CHECK-NEXT: [[TMP50:%.*]] = trunc i64 [[SHR_5_I_7]] to i8 +; CHECK-NEXT: [[STOREDV_5_I_7:%.*]] = and i8 [[STOREDV_5_I_6]], [[TMP50]] +; CHECK-NEXT: [[SHR_5_I_8:%.*]] = lshr i64 0, [[TMP43]] +; CHECK-NEXT: [[TMP51:%.*]] = trunc i64 [[SHR_5_I_8]] to i8 +; CHECK-NEXT: [[STOREDV_5_I_8:%.*]] = and i8 [[STOREDV_5_I_7]], [[TMP51]] +; CHECK-NEXT: [[STOREDV_5_I_9:%.*]] = and i8 [[STOREDV_5_I_8]], 0 +; CHECK-NEXT: [[SHR_5_I_10:%.*]] = lshr i64 0, [[TMP43]] +; CHECK-NEXT: [[TMP52:%.*]] = trunc i64 [[SHR_5_I_10]] to i8 +; CHECK-NEXT: [[STOREDV_5_I_10:%.*]] = and i8 [[STOREDV_5_I_9]], [[TMP52]] +; CHECK-NEXT: [[SHR_5_I_11:%.*]] = lshr i64 0, [[TMP43]] +; CHECK-NEXT: [[TMP53:%.*]] = trunc i64 [[SHR_5_I_11]] to i8 +; CHECK-NEXT: [[STOREDV_5_I_11:%.*]] = and i8 [[STOREDV_5_I_10]], [[TMP53]] +; CHECK-NEXT: [[SHR_5_I_12:%.*]] = lshr i64 0, [[TMP43]] +; CHECK-NEXT: [[TMP54:%.*]] = trunc i64 [[SHR_5_I_12]] to i8 +; CHECK-NEXT: [[STOREDV_5_I_12:%.*]] = and i8 [[STOREDV_5_I_11]], [[TMP54]] +; CHECK-NEXT: [[SHR_5_I_13:%.*]] = lshr i64 0, [[TMP43]] +; CHECK-NEXT: [[TMP55:%.*]] = trunc i64 [[SHR_5_I_13]] to i8 +; CHECK-NEXT: [[STOREDV_5_I_13:%.*]] = and i8 [[STOREDV_5_I_12]], [[TMP55]] +; CHECK-NEXT: [[SHR_5_I_14:%.*]] = lshr i64 0, [[TMP43]] +; CHECK-NEXT: [[TMP56:%.*]] = trunc i64 [[SHR_5_I_14]] to i8 +; CHECK-NEXT: [[STOREDV_5_I_14:%.*]] = and i8 [[STOREDV_5_I_13]], [[TMP56]] +; CHECK-NEXT: [[TMP57:%.*]] = load i64, ptr null, align 8 +; CHECK-NEXT: [[SHR_6_I:%.*]] = lshr i64 0, [[TMP57]] +; CHECK-NEXT: [[TMP58:%.*]] = trunc i64 [[SHR_6_I]] to i8 +; CHECK-NEXT: [[STOREDV_6_I:%.*]] = and i8 [[STOREDV_5_I_14]], [[TMP58]] +; CHECK-NEXT: [[SHR_6_I_1:%.*]] = lshr i64 0, [[TMP57]] +; CHECK-NEXT: [[TMP59:%.*]] = trunc i64 [[SHR_6_I_1]] to i8 +; CHECK-NEXT: [[STOREDV_6_I_1:%.*]] = and i8 [[STOREDV_6_I]], [[TMP59]] +; CHECK-NEXT: [[SHR_6_I_2:%.*]] = lshr i64 0, [[TMP57]] +; CHECK-NEXT: [[TMP60:%.*]] = trunc i64 [[SHR_6_I_2]] to i8 +; CHECK-NEXT: [[STOREDV_6_I_2:%.*]] = and i8 [[STOREDV_6_I_1]], [[TMP60]] +; CHECK-NEXT: [[STOREDV_6_I_3:%.*]] = and i8 [[STOREDV_6_I_2]], 0 +; CHECK-NEXT: [[SHR_6_I_4:%.*]] = lshr i64 0, [[TMP57]] +; CHECK-NEXT: [[TMP61:%.*]] = trunc i64 [[SHR_6_I_4]] to i8 +; CHECK-NEXT: [[STOREDV_6_I_4:%.*]] = and i8 [[STOREDV_6_I_3]], [[TMP61]] +; CHECK-NEXT: [[SHR_6_I_5:%.*]] = lshr i64 0, [[TMP57]] +; CHECK-NEXT: [[TMP62:%.*]] = trunc i64 [[SHR_6_I_5]] to i8 +; CHECK-NEXT: [[STOREDV_6_I_5:%.*]] = and i8 [[STOREDV_6_I_4]], [[TMP62]] +; CHECK-NEXT: [[SHR_6_I_6:%.*]] = lshr i64 0, [[TMP57]] +; CHECK-NEXT: [[TMP63:%.*]] = trunc i64 [[SHR_6_I_6]] to i8 +; CHECK-NEXT: [[STOREDV_6_I_6:%.*]] = and i8 [[STOREDV_6_I_5]], [[TMP63]] +; CHECK-NEXT: [[SHR_6_I_7:%.*]] = lshr i64 0, [[TMP57]] +; CHECK-NEXT: [[TMP64:%.*]] = trunc i64 [[SHR_6_I_7]] to i8 +; CHECK-NEXT: [[STOREDV_6_I_7:%.*]] = and i8 [[STOREDV_6_I_6]], [[TMP64]] +; CHECK-NEXT: [[STOREDV_6_I_8:%.*]] = and i8 [[STOREDV_6_I_7]], 0 +; CHECK-NEXT: [[SHR_6_I_9:%.*]] = lshr i64 0, [[TMP57]] +; CHECK-NEXT: [[TMP65:%.*]] = trunc i64 [[SHR_6_I_9]] to i8 +; CHECK-NEXT: [[STOREDV_6_I_9:%.*]] = and i8 [[STOREDV_6_I_8]], [[TMP65]] +; CHECK-NEXT: [[SHR_6_I_10:%.*]] = lshr i64 0, [[TMP57]] +; CHECK-NEXT: [[TMP66:%.*]] = trunc i64 [[SHR_6_I_10]] to i8 +; CHECK-NEXT: [[STOREDV_6_I_10:%.*]] = and i8 [[STOREDV_6_I_9]], [[TMP66]] +; CHECK-NEXT: [[SHR_6_I_11:%.*]] = lshr i64 0, [[TMP57]] +; CHECK-NEXT: [[TMP67:%.*]] = trunc i64 [[SHR_6_I_11]] to i8 +; CHECK-NEXT: [[STOREDV_6_I_11:%.*]] = and i8 [[STOREDV_6_I_10]], [[TMP67]] +; CHECK-NEXT: [[SHR_6_I_12:%.*]] = lshr i64 0, [[TMP57]] +; CHECK-NEXT: [[TMP68:%.*]] = trunc i64 [[SHR_6_I_12]] to i8 +; CHECK-NEXT: [[STOREDV_6_I_12:%.*]] = and i8 [[STOREDV_6_I_11]], [[TMP68]] +; CHECK-NEXT: [[STOREDV_6_I_13:%.*]] = and i8 [[STOREDV_6_I_12]], 0 +; CHECK-NEXT: [[SHR_6_I_14:%.*]] = lshr i64 0, 0 +; CHECK-NEXT: [[TMP69:%.*]] = trunc i64 [[SHR_6_I_14]] to i8 +; CHECK-NEXT: [[STOREDV_6_I_14:%.*]] = and i8 [[STOREDV_6_I_13]], [[TMP69]] +; CHECK-NEXT: store i8 [[STOREDV_6_I_14]], ptr null, align 1 +; CHECK-NEXT: ret i32 0 +; +entry: + %0 = load i64, ptr getelementptr (i8, ptr null, i64 16), align 8 + %shr.1.i = lshr i64 0, 0 + %shr.1.i.13 = lshr i64 0, %0 + %1 = trunc i64 %shr.1.i.13 to i8 + %storedv.1.i.13 = and i8 0, %1 + %shr.1.i.14 = lshr i64 0, %0 + %2 = trunc i64 %shr.1.i.14 to i8 + %storedv.1.i.14 = and i8 %storedv.1.i.13, %2 + %3 = load i64, ptr getelementptr (i8, ptr null, i64 32), align 8 + %shr.2.i = lshr i64 0, %3 + %4 = trunc i64 %shr.2.i to i8 + %storedv.2.i = and i8 %storedv.1.i.14, %4 + %shr.2.i.1 = lshr i64 0, %3 + %5 = trunc i64 %shr.2.i.1 to i8 + %storedv.2.i.1 = and i8 %storedv.2.i, %5 + %shr.2.i.2 = lshr i64 0, %3 + %6 = trunc i64 %shr.2.i.2 to i8 + %storedv.2.i.2 = and i8 %storedv.2.i.1, %6 + %shr.2.i.3 = lshr i64 0, %3 + %7 = trunc i64 %shr.2.i.3 to i8 + %storedv.2.i.3 = and i8 %storedv.2.i.2, %7 + %shr.2.i.4 = lshr i64 0, %3 + %8 = trunc i64 %shr.2.i.4 to i8 + %storedv.2.i.4 = and i8 %storedv.2.i.3, %8 + %shr.2.i.5 = lshr i64 0, %3 + %9 = trunc i64 %shr.2.i.5 to i8 + %storedv.2.i.5 = and i8 %storedv.2.i.4, %9 + %shr.2.i.6 = lshr i64 0, %3 + %10 = trunc i64 %shr.2.i.6 to i8 + %storedv.2.i.6 = and i8 %storedv.2.i.5, %10 + %shr.2.i.7 = lshr i64 0, %3 + %11 = trunc i64 %shr.2.i.7 to i8 + %storedv.2.i.7 = and i8 %storedv.2.i.6, %11 + %storedv.2.i.8 = and i8 %storedv.2.i.7, 0 + %shr.2.i.9 = lshr i64 0, %3 + %12 = trunc i64 %shr.2.i.9 to i8 + %storedv.2.i.9 = and i8 %storedv.2.i.8, %12 + %shr.2.i.10 = lshr i64 0, %3 + %13 = trunc i64 %shr.2.i.10 to i8 + %storedv.2.i.10 = and i8 %storedv.2.i.9, %13 + %shr.2.i.11 = lshr i64 0, %3 + %14 = trunc i64 %shr.2.i.11 to i8 + %storedv.2.i.11 = and i8 %storedv.2.i.10, %14 + %shr.2.i.12 = lshr i64 0, %3 + %15 = trunc i64 %shr.2.i.12 to i8 + %storedv.2.i.12 = and i8 %storedv.2.i.11, %15 + %shr.2.i.13 = lshr i64 0, %3 + %16 = trunc i64 %shr.2.i.13 to i8 + %storedv.2.i.13 = and i8 %storedv.2.i.12, %16 + %storedv.2.i.14 = and i8 %storedv.2.i.13, 0 + %17 = load i64, ptr getelementptr (i8, ptr null, i64 48), align 8 + %shr.3.i = lshr i64 0, %17 + %18 = trunc i64 %shr.3.i to i8 + %storedv.3.i = and i8 %storedv.2.i.14, %18 + %shr.3.i.1 = lshr i64 0, %17 + %19 = trunc i64 %shr.3.i.1 to i8 + %storedv.3.i.1 = and i8 %storedv.3.i, %19 + %storedv.3.i.2 = and i8 %storedv.3.i.1, 0 + %shr.3.i.3 = lshr i64 0, %17 + %20 = trunc i64 %shr.3.i.3 to i8 + %storedv.3.i.3 = and i8 %storedv.3.i.2, %20 + %shr.3.i.4 = lshr i64 0, %17 + %21 = trunc i64 %shr.3.i.4 to i8 + %storedv.3.i.4 = and i8 %storedv.3.i.3, %21 + %shr.3.i.5 = lshr i64 0, %17 + %22 = trunc i64 %shr.3.i.5 to i8 + %storedv.3.i.5 = and i8 %storedv.3.i.4, %22 + %storedv.3.i.6 = and i8 %storedv.3.i.5, 0 + %shr.3.i.7 = lshr i64 0, %17 + %23 = trunc i64 %shr.3.i.7 to i8 + %storedv.3.i.7 = and i8 %storedv.3.i.6, %23 + %shr.3.i.8 = lshr i64 0, %17 + %24 = trunc i64 %shr.3.i.8 to i8 + %storedv.3.i.8 = and i8 %storedv.3.i.7, %24 + %shr.3.i.9 = lshr i64 0, %17 + %25 = trunc i64 %shr.3.i.9 to i8 + %storedv.3.i.9 = and i8 %storedv.3.i.8, %25 + %shr.3.i.10 = lshr i64 0, %17 + %26 = trunc i64 %shr.3.i.10 to i8 + %storedv.3.i.10 = and i8 %storedv.3.i.9, %26 + %shr.3.i.11 = lshr i64 0, %17 + %27 = trunc i64 %shr.3.i.11 to i8 + %storedv.3.i.11 = and i8 %storedv.3.i.10, %27 + %storedv.3.i.12 = and i8 %storedv.3.i.11, 0 + %shr.3.i.13 = lshr i64 0, %17 + %28 = trunc i64 %shr.3.i.13 to i8 + %storedv.3.i.13 = and i8 %storedv.3.i.12, %28 + %shr.3.i.14 = lshr i64 0, %17 + %29 = trunc i64 %shr.3.i.14 to i8 + %storedv.3.i.14 = and i8 %storedv.3.i.13, %29 + %30 = load i64, ptr null, align 8 + %storedv.4.i = and i8 %storedv.3.i.14, 0 + %shr.4.i.1 = lshr i64 0, %30 + %31 = trunc i64 %shr.4.i.1 to i8 + %storedv.4.i.1 = and i8 %storedv.4.i, %31 + %shr.4.i.2 = lshr i64 0, %30 + %32 = trunc i64 %shr.4.i.2 to i8 + %storedv.4.i.2 = and i8 %storedv.4.i.1, %32 + %shr.4.i.3 = lshr i64 0, %30 + %33 = trunc i64 %shr.4.i.3 to i8 + %storedv.4.i.3 = and i8 %storedv.4.i.2, %33 + %shr.4.i.4 = lshr i64 0, %30 + %34 = trunc i64 %shr.4.i.4 to i8 + %storedv.4.i.4 = and i8 %storedv.4.i.3, %34 + %storedv.4.i.5 = and i8 %storedv.4.i.4, 0 + %shr.4.i.6 = lshr i64 0, %30 + %35 = trunc i64 %shr.4.i.6 to i8 + %storedv.4.i.6 = and i8 %storedv.4.i.5, %35 + %shr.4.i.7 = lshr i64 0, %30 + %36 = trunc i64 %shr.4.i.7 to i8 + %storedv.4.i.7 = and i8 %storedv.4.i.6, %36 + %shr.4.i.8 = lshr i64 0, %30 + %37 = trunc i64 %shr.4.i.8 to i8 + %storedv.4.i.8 = and i8 %storedv.4.i.7, %37 + %shr.4.i.9 = lshr i64 0, %30 + %38 = trunc i64 %shr.4.i.9 to i8 + %storedv.4.i.9 = and i8 %storedv.4.i.8, %38 + %shr.4.i.10 = lshr i64 0, %30 + %39 = trunc i64 %shr.4.i.10 to i8 + %storedv.4.i.10 = and i8 %storedv.4.i.9, %39 + %storedv.4.i.11 = and i8 %storedv.4.i.10, 0 + %shr.4.i.12 = lshr i64 0, %30 + %40 = trunc i64 %shr.4.i.12 to i8 + %storedv.4.i.12 = and i8 %storedv.4.i.11, %40 + %shr.4.i.13 = lshr i64 0, %30 + %41 = trunc i64 %shr.4.i.13 to i8 + %storedv.4.i.13 = and i8 %storedv.4.i.12, %41 + %shr.4.i.14 = lshr i64 0, %30 + %42 = trunc i64 %shr.4.i.14 to i8 + %storedv.4.i.14 = and i8 %storedv.4.i.13, %42 + %43 = load i64, ptr getelementptr (i8, ptr null, i64 80), align 8 + %shr.5.i = lshr i64 0, %43 + %44 = trunc i64 %shr.5.i to i8 + %storedv.5.i = and i8 %storedv.4.i.14, %44 + %shr.5.i.1 = lshr i64 0, %43 + %45 = trunc i64 %shr.5.i.1 to i8 + %storedv.5.i.1 = and i8 %storedv.5.i, %45 + %shr.5.i.2 = lshr i64 0, %43 + %46 = trunc i64 %shr.5.i.2 to i8 + %storedv.5.i.2 = and i8 %storedv.5.i.1, %46 + %storedv.5.i.3 = and i8 %storedv.5.i.2, 0 + %shr.5.i.4 = lshr i64 0, %43 + %47 = trunc i64 %shr.5.i.4 to i8 + %storedv.5.i.4 = and i8 %storedv.5.i.3, %47 + %shr.5.i.5 = lshr i64 0, %43 + %48 = trunc i64 %shr.5.i.5 to i8 + %storedv.5.i.5 = and i8 %storedv.5.i.4, %48 + %shr.5.i.6 = lshr i64 0, %43 + %49 = trunc i64 %shr.5.i.6 to i8 + %storedv.5.i.6 = and i8 %storedv.5.i.5, %49 + %shr.5.i.7 = lshr i64 0, %43 + %50 = trunc i64 %shr.5.i.7 to i8 + %storedv.5.i.7 = and i8 %storedv.5.i.6, %50 + %shr.5.i.8 = lshr i64 0, %43 + %51 = trunc i64 %shr.5.i.8 to i8 + %storedv.5.i.8 = and i8 %storedv.5.i.7, %51 + %storedv.5.i.9 = and i8 %storedv.5.i.8, 0 + %shr.5.i.10 = lshr i64 0, %43 + %52 = trunc i64 %shr.5.i.10 to i8 + %storedv.5.i.10 = and i8 %storedv.5.i.9, %52 + %shr.5.i.11 = lshr i64 0, %43 + %53 = trunc i64 %shr.5.i.11 to i8 + %storedv.5.i.11 = and i8 %storedv.5.i.10, %53 + %shr.5.i.12 = lshr i64 0, %43 + %54 = trunc i64 %shr.5.i.12 to i8 + %storedv.5.i.12 = and i8 %storedv.5.i.11, %54 + %shr.5.i.13 = lshr i64 0, %43 + %55 = trunc i64 %shr.5.i.13 to i8 + %storedv.5.i.13 = and i8 %storedv.5.i.12, %55 + %shr.5.i.14 = lshr i64 0, %43 + %56 = trunc i64 %shr.5.i.14 to i8 + %storedv.5.i.14 = and i8 %storedv.5.i.13, %56 + %57 = load i64, ptr null, align 8 + %shr.6.i = lshr i64 0, %57 + %58 = trunc i64 %shr.6.i to i8 + %storedv.6.i = and i8 %storedv.5.i.14, %58 + %shr.6.i.1 = lshr i64 0, %57 + %59 = trunc i64 %shr.6.i.1 to i8 + %storedv.6.i.1 = and i8 %storedv.6.i, %59 + %shr.6.i.2 = lshr i64 0, %57 + %60 = trunc i64 %shr.6.i.2 to i8 + %storedv.6.i.2 = and i8 %storedv.6.i.1, %60 + %storedv.6.i.3 = and i8 %storedv.6.i.2, 0 + %shr.6.i.4 = lshr i64 0, %57 + %61 = trunc i64 %shr.6.i.4 to i8 + %storedv.6.i.4 = and i8 %storedv.6.i.3, %61 + %shr.6.i.5 = lshr i64 0, %57 + %62 = trunc i64 %shr.6.i.5 to i8 + %storedv.6.i.5 = and i8 %storedv.6.i.4, %62 + %shr.6.i.6 = lshr i64 0, %57 + %63 = trunc i64 %shr.6.i.6 to i8 + %storedv.6.i.6 = and i8 %storedv.6.i.5, %63 + %shr.6.i.7 = lshr i64 0, %57 + %64 = trunc i64 %shr.6.i.7 to i8 + %storedv.6.i.7 = and i8 %storedv.6.i.6, %64 + %storedv.6.i.8 = and i8 %storedv.6.i.7, 0 + %shr.6.i.9 = lshr i64 0, %57 + %65 = trunc i64 %shr.6.i.9 to i8 + %storedv.6.i.9 = and i8 %storedv.6.i.8, %65 + %shr.6.i.10 = lshr i64 0, %57 + %66 = trunc i64 %shr.6.i.10 to i8 + %storedv.6.i.10 = and i8 %storedv.6.i.9, %66 + %shr.6.i.11 = lshr i64 0, %57 + %67 = trunc i64 %shr.6.i.11 to i8 + %storedv.6.i.11 = and i8 %storedv.6.i.10, %67 + %shr.6.i.12 = lshr i64 0, %57 + %68 = trunc i64 %shr.6.i.12 to i8 + %storedv.6.i.12 = and i8 %storedv.6.i.11, %68 + %storedv.6.i.13 = and i8 %storedv.6.i.12, 0 + %shr.6.i.14 = lshr i64 0, 0 + %69 = trunc i64 %shr.6.i.14 to i8 + %storedv.6.i.14 = and i8 %storedv.6.i.13, %69 + store i8 %storedv.6.i.14, ptr null, align 1 + ret i32 0 +} diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/revec.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/revec.ll new file mode 100644 index 0000000000000..d822a24220df2 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/revec.ll @@ -0,0 +1,43 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -mtriple=riscv64 -mcpu=sifive-x280 -passes=slp-vectorizer -S -slp-revec -slp-max-reg-size=1024 -slp-threshold=-100 %s | FileCheck %s + +define i32 @test() { +; CHECK-LABEL: @test( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[IF_END_I87:%.*]] +; CHECK: if.end.i87: +; CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> getelementptr (i32, <4 x ptr> , <4 x i64> ), i32 4, <4 x i1> , <4 x i32> poison) +; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> poison, <2 x i32> poison, i64 0) +; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> [[TMP1]], <2 x i32> zeroinitializer, i64 2) +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> [[TMP2]], <4 x i32> +; CHECK-NEXT: switch i32 0, label [[SW_BB509_I:%.*]] [ +; CHECK-NEXT: i32 1, label [[SW_BB509_I]] +; CHECK-NEXT: i32 0, label [[IF_THEN458_I:%.*]] +; CHECK-NEXT: ] +; CHECK: if.then458.i: +; CHECK-NEXT: br label [[SW_BB509_I]] +; CHECK: sw.bb509.i: +; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i32> [ [[TMP0]], [[IF_THEN458_I]] ], [ [[TMP3]], [[IF_END_I87]] ], [ [[TMP3]], [[IF_END_I87]] ] +; CHECK-NEXT: ret i32 0 +; +entry: + %getelementptr0 = getelementptr i8, ptr null, i64 64036 + %getelementptr1 = getelementptr i8, ptr null, i64 64064 + br label %if.end.i87 + +if.end.i87: ; preds = %entry + %0 = load <2 x i32>, ptr %getelementptr0, align 4 + %1 = load <2 x i32>, ptr %getelementptr1, align 8 + switch i32 0, label %sw.bb509.i [ + i32 1, label %sw.bb509.i + i32 0, label %if.then458.i + ] + +if.then458.i: ; preds = %if.end.i87 + br label %sw.bb509.i + +sw.bb509.i: ; preds = %if.then458.i, %if.end.i87, %if.end.i87 + %4 = phi <2 x i32> [ %0, %if.then458.i ], [ %0, %if.end.i87 ], [ %0, %if.end.i87 ] + %5 = phi <2 x i32> [ %1, %if.then458.i ], [ zeroinitializer, %if.end.i87 ], [ zeroinitializer, %if.end.i87 ] + ret i32 0 +} diff --git a/llvm/test/Transforms/SLPVectorizer/X86/gather-with-cmp-user.ll b/llvm/test/Transforms/SLPVectorizer/X86/gather-with-cmp-user.ll new file mode 100644 index 0000000000000..7e75970de3492 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/gather-with-cmp-user.ll @@ -0,0 +1,48 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s + +define i1 @test(i32 %g, i16 %d) { +; CHECK-LABEL: define i1 @test( +; CHECK-SAME: i32 [[G:%.*]], i16 [[D:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = and i16 [[D]], 1 +; CHECK-NEXT: [[XOR_I_I:%.*]] = xor i32 [[G]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[G]], i32 0 +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[XOR_I_I]], i32 1 +; CHECK-NEXT: [[TMP4:%.*]] = trunc <2 x i32> [[TMP9]] to <2 x i8> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x i8> [[TMP4]], <2 x i8> poison, <4 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i8> [[TMP5]], +; CHECK-NEXT: [[TMP7:%.*]] = icmp sgt <4 x i8> [[TMP6]], +; CHECK-NEXT: [[TMP8:%.*]] = zext <4 x i1> [[TMP7]] to <4 x i8> +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x i32> [[TMP9]], <2 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = zext <4 x i8> [[TMP8]] to <4 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = icmp sgt <4 x i32> [[TMP10]], [[TMP11]] +; CHECK-NEXT: [[TMP13:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP12]]) +; CHECK-NEXT: ret i1 [[TMP13]] +; +entry: + %0 = and i16 %d, 1 + %xor.i.i = xor i32 %g, 1 + %conv1.i.i = trunc i32 %xor.i.i to i8 + %notsub.i = add i8 %conv1.i.i, -1 + %cmp.i.i = icmp sgt i8 %notsub.i, -3 + %conv3.i.i = zext i1 %cmp.i.i to i32 + %cmp4.i.i = icmp sgt i32 %xor.i.i, %conv3.i.i + %conv1.1.i.i = trunc i32 %g to i8 + %notsub25.i = add i8 %conv1.1.i.i, -1 + %cmp.1.i.i = icmp sgt i8 %notsub25.i, -3 + %conv3.1.i.i = zext i1 %cmp.1.i.i to i32 + %cmp4.1.i.i = icmp sgt i32 %g, %conv3.1.i.i + %notsub26.i = add i8 %conv1.1.i.i, -9 + %cmp.i17.i = icmp sgt i8 %notsub26.i, -3 + %conv3.i18.i = zext i1 %cmp.i17.i to i32 + %cmp4.i19.i = icmp sgt i32 %g, %conv3.i18.i + %notsub27.i = add i8 %conv1.i.i, -9 + %cmp.1.i22.i = icmp sgt i8 %notsub27.i, -3 + %conv3.1.i23.i = zext i1 %cmp.1.i22.i to i32 + %cmp4.1.i24.i = icmp sgt i32 %xor.i.i, %conv3.1.i23.i + %1 = and i1 %cmp4.i19.i, %cmp4.1.i24.i + %2 = and i1 %cmp4.i.i, %1 + %3 = and i1 %cmp4.1.i.i, %2 + ret i1 %3 +} diff --git a/llvm/test/Verifier/rtsan-attrs.ll b/llvm/test/Verifier/rtsan-attrs.ll new file mode 100644 index 0000000000000..42ab85163642b --- /dev/null +++ b/llvm/test/Verifier/rtsan-attrs.ll @@ -0,0 +1,9 @@ +; RUN: not llvm-as -disable-output %s 2>&1 | FileCheck %s + +; CHECK: Attributes 'sanitize_realtime and nosanitize_realtime' are incompatible! +; CHECK-NEXT: ptr @sanitize_nosanitize +define void @sanitize_nosanitize() #0 { + ret void +} + +attributes #0 = { sanitize_realtime nosanitize_realtime } diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp index 5094871a1d415..b47c77c5f2ff3 100644 --- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp +++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp @@ -1183,11 +1183,9 @@ void ProfileGeneratorBase::extractProbesFromRange( do { const AddressProbesMap &Address2ProbesMap = Binary->getAddress2ProbesMap(); - auto It = Address2ProbesMap.find(IP.Address); - if (It != Address2ProbesMap.end()) { - for (const MCDecodedPseudoProbe &Probe : It->second) { - ProbeCounter[&Probe] += Count; - } + for (const MCDecodedPseudoProbe &Probe : + Address2ProbesMap.find(IP.Address)) { + ProbeCounter[&Probe] += Count; } } while (IP.advance() && IP.Address <= RangeEnd); } @@ -1293,9 +1291,9 @@ void CSProfileGenerator::populateBodySamplesWithProbes( // and will be inferred by the compiler. for (auto &I : FrameSamples) { for (auto *FunctionProfile : I.second) { - for (auto *Probe : I.first->getProbes()) { - FunctionProfile->addBodySamples(Probe->getIndex(), - Probe->getDiscriminator(), 0); + for (const MCDecodedPseudoProbe &Probe : I.first->getProbes()) { + FunctionProfile->addBodySamples(Probe.getIndex(), + Probe.getDiscriminator(), 0); } } } diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp index a458ffcb96b41..e4fc3816cd0c4 100644 --- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp +++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp @@ -132,7 +132,7 @@ void BinarySizeContextTracker::trackInlineesOptimizedAway( MCPseudoProbeDecoder &ProbeDecoder) { ProbeFrameStack ProbeContext; for (const auto &Child : ProbeDecoder.getDummyInlineRoot().getChildren()) - trackInlineesOptimizedAway(ProbeDecoder, *Child.second, ProbeContext); + trackInlineesOptimizedAway(ProbeDecoder, Child, ProbeContext); } void BinarySizeContextTracker::trackInlineesOptimizedAway( @@ -160,9 +160,9 @@ void BinarySizeContextTracker::trackInlineesOptimizedAway( // DFS down the probe inline tree for (const auto &ChildNode : ProbeNode.getChildren()) { - InlineSite Location = ChildNode.first; + InlineSite Location = ChildNode.getInlineSite(); ProbeContext.back().second = std::get<1>(Location); - trackInlineesOptimizedAway(ProbeDecoder, *ChildNode.second, ProbeContext); + trackInlineesOptimizedAway(ProbeDecoder, ChildNode, ProbeContext); } ProbeContext.pop_back(); @@ -454,8 +454,8 @@ void ProfiledBinary::decodePseudoProbe(const ELFObjectFileBase *Obj) { // Build TopLevelProbeFrameMap to track size for optimized inlinees when probe // is available if (TrackFuncContextSize) { - for (const auto &Child : ProbeDecoder.getDummyInlineRoot().getChildren()) { - auto *Frame = Child.second.get(); + for (auto &Child : ProbeDecoder.getDummyInlineRoot().getChildren()) { + auto *Frame = &Child; StringRef FuncName = ProbeDecoder.getFuncDescForGUID(Frame->Guid)->FuncName; TopLevelProbeFrameMap[FuncName] = Frame; diff --git a/llvm/unittests/ADT/FunctionRefTest.cpp b/llvm/unittests/ADT/FunctionRefTest.cpp index 47633cada0f3c..b181933973037 100644 --- a/llvm/unittests/ADT/FunctionRefTest.cpp +++ b/llvm/unittests/ADT/FunctionRefTest.cpp @@ -59,4 +59,14 @@ TEST(FunctionRefTest, SFINAE) { EXPECT_EQ("string", returns([] { return "hello"; })); } +TEST(FunctionRefTest, Equality) { + function_ref X = [] { return 1; }; + function_ref Y = X; + EXPECT_EQ(X, Y); + + const auto Lambda = []() { return 0; }; + function_ref A(Lambda), B(Lambda); + EXPECT_EQ(A, B); +} + } // namespace diff --git a/llvm/unittests/CodeGen/GlobalISel/GISelMITest.h b/llvm/unittests/CodeGen/GlobalISel/GISelMITest.h index fd31e95cce13d..544dcabb56d0a 100644 --- a/llvm/unittests/CodeGen/GlobalISel/GISelMITest.h +++ b/llvm/unittests/CodeGen/GlobalISel/GISelMITest.h @@ -53,11 +53,9 @@ std::ostream & operator<<(std::ostream &OS, const MachineFunction &MF); } -static std::unique_ptr parseMIR(LLVMContext &Context, - std::unique_ptr &MIR, - const TargetMachine &TM, - StringRef MIRCode, const char *FuncName, - MachineModuleInfo &MMI) { +static std::unique_ptr +parseMIR(LLVMContext &Context, std::unique_ptr &MIR, + const TargetMachine &TM, StringRef MIRCode, MachineModuleInfo &MMI) { SMDiagnostic Diagnostic; std::unique_ptr MBuffer = MemoryBuffer::getMemBuffer(MIRCode); MIR = createMIRParser(std::move(MBuffer), Context); @@ -80,8 +78,7 @@ createDummyModule(LLVMContext &Context, const LLVMTargetMachine &TM, StringRef MIRString, const char *FuncName) { std::unique_ptr MIR; auto MMI = std::make_unique(&TM); - std::unique_ptr M = - parseMIR(Context, MIR, TM, MIRString, FuncName, *MMI); + std::unique_ptr M = parseMIR(Context, MIR, TM, MIRString, *MMI); return make_pair(std::move(M), std::move(MMI)); } diff --git a/llvm/unittests/CodeGen/MachineDomTreeUpdaterTest.cpp b/llvm/unittests/CodeGen/MachineDomTreeUpdaterTest.cpp index 9dcf3754a5bd7..f8505817d2e09 100644 --- a/llvm/unittests/CodeGen/MachineDomTreeUpdaterTest.cpp +++ b/llvm/unittests/CodeGen/MachineDomTreeUpdaterTest.cpp @@ -73,7 +73,7 @@ class MachineDomTreeUpdaterTest : public testing::Test { MAM.registerPass([&] { return MachineModuleAnalysis(*MMI); }); } - bool parseMIR(StringRef MIRCode, const char *FnName) { + bool parseMIR(StringRef MIRCode) { SMDiagnostic Diagnostic; std::unique_ptr MBuffer = MemoryBuffer::getMemBuffer(MIRCode); MIR = createMIRParser(std::move(MBuffer), Context); @@ -149,7 +149,7 @@ body: | ... )"; - ASSERT_TRUE(parseMIR(MIRString, "f0")); + ASSERT_TRUE(parseMIR(MIRString)); auto &MF = FAM.getResult(*M->getFunction("f0")).getMF(); @@ -239,7 +239,7 @@ body: | ... )"; - ASSERT_TRUE(parseMIR(MIRString, "f0")); + ASSERT_TRUE(parseMIR(MIRString)); auto &MF = FAM.getResult(*M->getFunction("f0")).getMF(); diff --git a/llvm/unittests/MI/LiveIntervalTest.cpp b/llvm/unittests/MI/LiveIntervalTest.cpp index bba5cb84d1152..7dcd82f3e7aa6 100644 --- a/llvm/unittests/MI/LiveIntervalTest.cpp +++ b/llvm/unittests/MI/LiveIntervalTest.cpp @@ -55,8 +55,10 @@ std::unique_ptr createTargetMachine() { } std::unique_ptr parseMIR(LLVMContext &Context, - legacy::PassManagerBase &PM, std::unique_ptr &MIR, - const LLVMTargetMachine &TM, StringRef MIRCode, const char *FuncName) { + legacy::PassManagerBase &PM, + std::unique_ptr &MIR, + const LLVMTargetMachine &TM, + StringRef MIRCode) { SMDiagnostic Diagnostic; std::unique_ptr MBuffer = MemoryBuffer::getMemBuffer(MIRCode); MIR = createMIRParser(std::move(MBuffer), Context); @@ -209,7 +211,7 @@ static void doTest(StringRef MIRFunc, legacy::PassManager PM; std::unique_ptr MIR; - std::unique_ptr M = parseMIR(Context, PM, MIR, *TM, MIRFunc, "func"); + std::unique_ptr M = parseMIR(Context, PM, MIR, *TM, MIRFunc); ASSERT_TRUE(M); PM.add(new TestPassT(T, ShouldPass)); diff --git a/llvm/unittests/MIR/MachineMetadata.cpp b/llvm/unittests/MIR/MachineMetadata.cpp index 9b1c3ef1c465a..cd30768f7ce76 100644 --- a/llvm/unittests/MIR/MachineMetadata.cpp +++ b/llvm/unittests/MIR/MachineMetadata.cpp @@ -79,7 +79,7 @@ class MachineMetadataTest : public testing::Test { } std::unique_ptr parseMIR(const TargetMachine &TM, StringRef MIRCode, - const char *FnName, MachineModuleInfo &MMI) { + MachineModuleInfo &MMI) { SMDiagnostic Diagnostic; std::unique_ptr MBuffer = MemoryBuffer::getMemBuffer(MIRCode); MIR = createMIRParser(std::move(MBuffer), Context); @@ -227,7 +227,7 @@ body: | )MIR"; MachineModuleInfo MMI(TM.get()); - M = parseMIR(*TM, MIRString, "test0", MMI); + M = parseMIR(*TM, MIRString, MMI); ASSERT_TRUE(M); auto *MF = MMI.getMachineFunction(*M->getFunction("test0")); @@ -338,7 +338,7 @@ body: | )MIR"; MachineModuleInfo MMI(TM.get()); - M = parseMIR(*TM, MIRString, "test0", MMI); + M = parseMIR(*TM, MIRString, MMI); ASSERT_TRUE(M); auto *MF = MMI.getMachineFunction(*M->getFunction("test0")); @@ -376,7 +376,7 @@ body: | )MIR"; MachineModuleInfo MMI(TM.get()); - M = parseMIR(*TM, MIRString, "test0", MMI); + M = parseMIR(*TM, MIRString, MMI); ASSERT_TRUE(M); auto *MF = MMI.getMachineFunction(*M->getFunction("test0")); @@ -474,7 +474,7 @@ body: | )MIR"; MachineModuleInfo MMI(TM.get()); - M = parseMIR(*TM, MIRString, "test0", MMI); + M = parseMIR(*TM, MIRString, MMI); ASSERT_TRUE(M); auto *MF = MMI.getMachineFunction(*M->getFunction("test0")); @@ -563,7 +563,7 @@ body: | ... )MIR"; MachineModuleInfo MMI(TM.get()); - M = parseMIR(*TM, MIRString, "foo", MMI); + M = parseMIR(*TM, MIRString, MMI); ASSERT_TRUE(M); auto *MF = MMI.getMachineFunction(*M->getFunction("foo")); MachineFunctionProperties &Properties = MF->getProperties(); @@ -594,7 +594,7 @@ body: | ... )MIR"; MachineModuleInfo MMI(TM.get()); - M = parseMIR(*TM, MIRString, "foo", MMI); + M = parseMIR(*TM, MIRString, MMI); ASSERT_TRUE(M); auto *MF = MMI.getMachineFunction(*M->getFunction("foo")); MachineFunctionProperties &Properties = MF->getProperties(); diff --git a/llvm/unittests/SandboxIR/SandboxIRTest.cpp b/llvm/unittests/SandboxIR/SandboxIRTest.cpp index 83edd954080e9..869e752b23913 100644 --- a/llvm/unittests/SandboxIR/SandboxIRTest.cpp +++ b/llvm/unittests/SandboxIR/SandboxIRTest.cpp @@ -580,6 +580,81 @@ define void @foo(i8 %v1) { EXPECT_EQ(I0->getNextNode(), Ret); } +TEST_F(SandboxIRTest, FreezeInst) { + parseIR(C, R"IR( +define void @foo(i8 %arg) { + freeze i8 %arg + ret void +} +)IR"); + llvm::Function *LLVMF = &*M->getFunction("foo"); + + sandboxir::Context Ctx(C); + sandboxir::Function *F = Ctx.createFunction(LLVMF); + auto *Arg = F->getArg(0); + auto *BB = &*F->begin(); + auto It = BB->begin(); + auto *Freeze = cast(&*It++); + auto *Ret = cast(&*It++); + + EXPECT_TRUE(isa(Freeze)); + EXPECT_EQ(Freeze->getOperand(0), Arg); + + // Check create(). + auto *NewFreeze = sandboxir::FreezeInst::create( + Arg, Ret->getIterator(), Ret->getParent(), Ctx, "NewFreeze"); + EXPECT_EQ(NewFreeze->getNextNode(), Ret); +#ifndef NDEBUG + EXPECT_EQ(NewFreeze->getName(), "NewFreeze"); +#endif // NDEBUG +} + +TEST_F(SandboxIRTest, FenceInst) { + parseIR(C, R"IR( +define void @foo() { + fence syncscope("singlethread") seq_cst + ret void +} +)IR"); + llvm::Function *LLVMF = &*M->getFunction("foo"); + llvm::BasicBlock *LLVMBB = &*LLVMF->begin(); + auto *LLVMFence = cast(&*LLVMBB->begin()); + sandboxir::Context Ctx(C); + sandboxir::Function *F = Ctx.createFunction(LLVMF); + auto *BB = &*F->begin(); + auto It = BB->begin(); + auto *Fence = cast(&*It++); + auto *Ret = cast(&*It++); + + // Check getOrdering(). + EXPECT_EQ(Fence->getOrdering(), LLVMFence->getOrdering()); + // Check setOrdering(). + auto OrigOrdering = Fence->getOrdering(); + auto NewOrdering = AtomicOrdering::Release; + EXPECT_NE(NewOrdering, OrigOrdering); + Fence->setOrdering(NewOrdering); + EXPECT_EQ(Fence->getOrdering(), NewOrdering); + Fence->setOrdering(OrigOrdering); + EXPECT_EQ(Fence->getOrdering(), OrigOrdering); + // Check getSyncScopeID(). + EXPECT_EQ(Fence->getSyncScopeID(), LLVMFence->getSyncScopeID()); + // Check setSyncScopeID(). + auto OrigSSID = Fence->getSyncScopeID(); + auto NewSSID = SyncScope::System; + EXPECT_NE(NewSSID, OrigSSID); + Fence->setSyncScopeID(NewSSID); + EXPECT_EQ(Fence->getSyncScopeID(), NewSSID); + Fence->setSyncScopeID(OrigSSID); + EXPECT_EQ(Fence->getSyncScopeID(), OrigSSID); + // Check create(). + auto *NewFence = + sandboxir::FenceInst::create(AtomicOrdering::Release, Ret->getIterator(), + BB, Ctx, SyncScope::SingleThread); + EXPECT_EQ(NewFence->getNextNode(), Ret); + EXPECT_EQ(NewFence->getOrdering(), AtomicOrdering::Release); + EXPECT_EQ(NewFence->getSyncScopeID(), SyncScope::SingleThread); +} + TEST_F(SandboxIRTest, SelectInst) { parseIR(C, R"IR( define void @foo(i1 %c0, i8 %v0, i8 %v1, i1 %c1) { @@ -1867,6 +1942,67 @@ define void @foo(i8 %arg) { } } +TEST_F(SandboxIRTest, LandingPadInst) { + parseIR(C, R"IR( +define void @foo() { +entry: + invoke void @foo() + to label %bb unwind label %unwind +unwind: + %lpad = landingpad { ptr, i32 } + catch ptr null + ret void +bb: + ret void +} +)IR"); + Function &LLVMF = *M->getFunction("foo"); + auto *LLVMUnwind = getBasicBlockByName(LLVMF, "unwind"); + auto *LLVMLPad = cast(&*LLVMUnwind->begin()); + + sandboxir::Context Ctx(C); + [[maybe_unused]] auto &F = *Ctx.createFunction(&LLVMF); + auto *Unwind = cast(Ctx.getValue(LLVMUnwind)); + auto *BB = cast( + Ctx.getValue(getBasicBlockByName(LLVMF, "bb"))); + auto It = Unwind->begin(); + auto *LPad = cast(&*It++); + [[maybe_unused]] auto *Ret = cast(&*It++); + + // Check isCleanup(). + EXPECT_EQ(LPad->isCleanup(), LLVMLPad->isCleanup()); + // Check setCleanup(). + auto OrigIsCleanup = LPad->isCleanup(); + auto NewIsCleanup = true; + EXPECT_NE(NewIsCleanup, OrigIsCleanup); + LPad->setCleanup(NewIsCleanup); + EXPECT_EQ(LPad->isCleanup(), NewIsCleanup); + LPad->setCleanup(OrigIsCleanup); + EXPECT_EQ(LPad->isCleanup(), OrigIsCleanup); + // Check getNumClauses(). + EXPECT_EQ(LPad->getNumClauses(), LLVMLPad->getNumClauses()); + // Check getClause(). + for (auto Idx : seq(0, LPad->getNumClauses())) + EXPECT_EQ(LPad->getClause(Idx), Ctx.getValue(LLVMLPad->getClause(Idx))); + // Check isCatch(). + for (auto Idx : seq(0, LPad->getNumClauses())) + EXPECT_EQ(LPad->isCatch(Idx), LLVMLPad->isCatch(Idx)); + // Check isFilter(). + for (auto Idx : seq(0, LPad->getNumClauses())) + EXPECT_EQ(LPad->isFilter(Idx), LLVMLPad->isFilter(Idx)); + // Check create(). + auto *BBRet = &*BB->begin(); + auto *NewLPad = + cast(sandboxir::LandingPadInst::create( + Type::getInt8Ty(C), 0, BBRet->getIterator(), BBRet->getParent(), Ctx, + "NewLPad")); + EXPECT_EQ(NewLPad->getNextNode(), BBRet); + EXPECT_FALSE(NewLPad->isCleanup()); +#ifndef NDEBUG + EXPECT_EQ(NewLPad->getName(), "NewLPad"); +#endif // NDEBUG +} + TEST_F(SandboxIRTest, FuncletPadInst_CatchPadInst_CleanupPadInst) { parseIR(C, R"IR( define void @foo() { @@ -2825,6 +2961,31 @@ define void @foo(i8 %arg0, i8 %arg1, float %farg0, float %farg1) { } } +TEST_F(SandboxIRTest, PossiblyDisjointInst) { + parseIR(C, R"IR( +define void @foo(i8 %arg0, i8 %arg1) { + %or = or i8 %arg0, %arg1 + ret void +} +)IR"); + Function &LLVMF = *M->getFunction("foo"); + sandboxir::Context Ctx(C); + + auto &F = *Ctx.createFunction(&LLVMF); + auto *BB = &*F.begin(); + auto It = BB->begin(); + auto *PDI = cast(&*It++); + + // Check setIsDisjoint(), isDisjoint(). + auto OrigIsDisjoint = PDI->isDisjoint(); + auto NewIsDisjoint = true; + EXPECT_NE(NewIsDisjoint, OrigIsDisjoint); + PDI->setIsDisjoint(NewIsDisjoint); + EXPECT_EQ(PDI->isDisjoint(), NewIsDisjoint); + PDI->setIsDisjoint(OrigIsDisjoint); + EXPECT_EQ(PDI->isDisjoint(), OrigIsDisjoint); +} + TEST_F(SandboxIRTest, AtomicRMWInst) { parseIR(C, R"IR( define void @foo(ptr %ptr, i8 %arg) { @@ -3521,6 +3682,48 @@ define void @foo(i32 %arg, float %farg, double %darg, ptr %ptr) { } } +TEST_F(SandboxIRTest, PossiblyNonNegInst) { + parseIR(C, R"IR( +define void @foo(i32 %arg, float %farg, double %darg, ptr %ptr) { + %zext = zext i32 %arg to i64 + %uitofp = uitofp i32 %arg to float + + %sext = sext i32 %arg to i64 + %fptoui = fptoui float %farg to i32 + %fptosi = fptosi float %farg to i32 + %fpext = fpext float %farg to double + %ptrtoint = ptrtoint ptr %ptr to i32 + %inttoptr = inttoptr i32 %arg to ptr + %sitofp = sitofp i32 %arg to float + %trunc = trunc i32 %arg to i16 + %fptrunc = fptrunc double %darg to float + %bitcast = bitcast i32 %arg to float + %addrspacecast = addrspacecast ptr %ptr to ptr addrspace(1) + ret void +} +)IR"); + Function &LLVMF = *M->getFunction("foo"); + sandboxir::Context Ctx(C); + sandboxir::Function *F = Ctx.createFunction(&LLVMF); + auto *BB = &*F->begin(); + auto It = BB->begin(); + auto *PNNI0 = cast(&*It++); + auto *PNNI1 = cast(&*It++); + for (auto ItE = BB->end(); It != ItE; ++It) + EXPECT_FALSE(isa(&*It++)); + + for (auto *PNNI : {PNNI0, PNNI1}) { + // Check setNonNeg(), hasNonNeg(). + auto OrigNonNeg = PNNI->hasNonNeg(); + auto NewNonNeg = true; + EXPECT_NE(NewNonNeg, OrigNonNeg); + PNNI->setNonNeg(NewNonNeg); + EXPECT_EQ(PNNI->hasNonNeg(), NewNonNeg); + PNNI->setNonNeg(OrigNonNeg); + EXPECT_EQ(PNNI->hasNonNeg(), OrigNonNeg); + } +} + /// CastInst's subclasses are very similar so we can use a common test function /// for them. template diff --git a/llvm/unittests/SandboxIR/TrackerTest.cpp b/llvm/unittests/SandboxIR/TrackerTest.cpp index f0d6a0d57b8c3..ca6effb727bf3 100644 --- a/llvm/unittests/SandboxIR/TrackerTest.cpp +++ b/llvm/unittests/SandboxIR/TrackerTest.cpp @@ -542,6 +542,40 @@ define void @foo(ptr %ptr) { EXPECT_EQ(It, BB->end()); } +TEST_F(TrackerTest, FenceInstSetters) { + parseIR(C, R"IR( +define void @foo() { + fence syncscope("singlethread") seq_cst + ret void +} +)IR"); + llvm::Function *LLVMF = &*M->getFunction("foo"); + sandboxir::Context Ctx(C); + sandboxir::Function *F = Ctx.createFunction(LLVMF); + auto *BB = &*F->begin(); + auto It = BB->begin(); + auto *Fence = cast(&*It++); + + // Check setOrdering(). + auto OrigOrdering = Fence->getOrdering(); + auto NewOrdering = AtomicOrdering::Release; + EXPECT_NE(NewOrdering, OrigOrdering); + Ctx.save(); + Fence->setOrdering(NewOrdering); + EXPECT_EQ(Fence->getOrdering(), NewOrdering); + Ctx.revert(); + EXPECT_EQ(Fence->getOrdering(), OrigOrdering); + // Check setSyncScopeID(). + auto OrigSSID = Fence->getSyncScopeID(); + auto NewSSID = SyncScope::System; + EXPECT_NE(NewSSID, OrigSSID); + Ctx.save(); + Fence->setSyncScopeID(NewSSID); + EXPECT_EQ(Fence->getSyncScopeID(), NewSSID); + Ctx.revert(); + EXPECT_EQ(Fence->getSyncScopeID(), OrigSSID); +} + TEST_F(TrackerTest, CallBaseSetters) { parseIR(C, R"IR( declare void @bar1(i8) @@ -713,6 +747,41 @@ define void @foo(i32 %cond0, i32 %cond1) { EXPECT_EQ(*HIt++, Handler1); } +TEST_F(TrackerTest, LandingPadInstSetters) { + parseIR(C, R"IR( +define void @foo() { +entry: + invoke void @foo() + to label %bb unwind label %unwind +unwind: + %lpad = landingpad { ptr, i32 } + catch ptr null + ret void +bb: + ret void +} +)IR"); + Function &LLVMF = *M->getFunction("foo"); + auto *LLVMUnwind = getBasicBlockByName(LLVMF, "unwind"); + + sandboxir::Context Ctx(C); + [[maybe_unused]] auto &F = *Ctx.createFunction(&LLVMF); + auto *Unwind = cast(Ctx.getValue(LLVMUnwind)); + auto It = Unwind->begin(); + auto *LPad = cast(&*It++); + [[maybe_unused]] auto *Ret = cast(&*It++); + + // Check setCleanup(). + auto OrigIsCleanup = LPad->isCleanup(); + auto NewIsCleanup = true; + EXPECT_NE(NewIsCleanup, OrigIsCleanup); + Ctx.save(); + LPad->setCleanup(NewIsCleanup); + EXPECT_EQ(LPad->isCleanup(), NewIsCleanup); + Ctx.revert(); + EXPECT_EQ(LPad->isCleanup(), OrigIsCleanup); +} + TEST_F(TrackerTest, CatchReturnInstSetters) { parseIR(C, R"IR( define void @foo() { @@ -919,6 +988,58 @@ define void @foo(<2 x i8> %v1, <2 x i8> %v2) { EXPECT_THAT(SVI->getShuffleMask(), testing::ElementsAreArray(OrigMask)); } +TEST_F(TrackerTest, PossiblyDisjointInstSetters) { + parseIR(C, R"IR( +define void @foo(i8 %arg0, i8 %arg1) { + %or = or i8 %arg0, %arg1 + ret void +} +)IR"); + Function &LLVMF = *M->getFunction("foo"); + sandboxir::Context Ctx(C); + + auto &F = *Ctx.createFunction(&LLVMF); + auto *BB = &*F.begin(); + auto It = BB->begin(); + auto *PDI = cast(&*It++); + + // Check setIsDisjoint(). + auto OrigIsDisjoint = PDI->isDisjoint(); + auto NewIsDisjoint = true; + EXPECT_NE(NewIsDisjoint, OrigIsDisjoint); + Ctx.save(); + PDI->setIsDisjoint(NewIsDisjoint); + EXPECT_EQ(PDI->isDisjoint(), NewIsDisjoint); + Ctx.revert(); + EXPECT_EQ(PDI->isDisjoint(), OrigIsDisjoint); +} + +TEST_F(TrackerTest, PossiblyNonNegInstSetters) { + parseIR(C, R"IR( +define void @foo(i32 %arg) { + %zext = zext i32 %arg to i64 + ret void +} +)IR"); + Function &LLVMF = *M->getFunction("foo"); + sandboxir::Context Ctx(C); + + auto &F = *Ctx.createFunction(&LLVMF); + auto *BB = &*F.begin(); + auto It = BB->begin(); + auto *PNNI = cast(&*It++); + + // Check setNonNeg(). + auto OrigNonNeg = PNNI->hasNonNeg(); + auto NewNonNeg = true; + EXPECT_NE(NewNonNeg, OrigNonNeg); + Ctx.save(); + PNNI->setNonNeg(NewNonNeg); + EXPECT_EQ(PNNI->hasNonNeg(), NewNonNeg); + Ctx.revert(); + EXPECT_EQ(PNNI->hasNonNeg(), OrigNonNeg); +} + TEST_F(TrackerTest, AtomicRMWSetters) { parseIR(C, R"IR( define void @foo(ptr %ptr, i8 %arg) { diff --git a/llvm/unittests/Target/WebAssembly/WebAssemblyExceptionInfoTest.cpp b/llvm/unittests/Target/WebAssembly/WebAssemblyExceptionInfoTest.cpp index 5838ab6f782ba..55caaf5d13b6c 100644 --- a/llvm/unittests/Target/WebAssembly/WebAssemblyExceptionInfoTest.cpp +++ b/llvm/unittests/Target/WebAssembly/WebAssemblyExceptionInfoTest.cpp @@ -43,7 +43,7 @@ std::unique_ptr createTargetMachine() { std::unique_ptr parseMIR(LLVMContext &Context, std::unique_ptr &MIR, const TargetMachine &TM, StringRef MIRCode, - const char *FuncName, MachineModuleInfo &MMI) { + MachineModuleInfo &MMI) { SMDiagnostic Diagnostic; std::unique_ptr MBuffer = MemoryBuffer::getMemBuffer(MIRCode); MIR = createMIRParser(std::move(MBuffer), Context); @@ -157,8 +157,7 @@ body: | LLVMContext Context; std::unique_ptr MIR; MachineModuleInfo MMI(TM.get()); - std::unique_ptr M = - parseMIR(Context, MIR, *TM, MIRString, "test0", MMI); + std::unique_ptr M = parseMIR(Context, MIR, *TM, MIRString, MMI); ASSERT_TRUE(M); Function *F = M->getFunction("test0"); @@ -332,8 +331,7 @@ body: | LLVMContext Context; std::unique_ptr MIR; MachineModuleInfo MMI(TM.get()); - std::unique_ptr M = - parseMIR(Context, MIR, *TM, MIRString, "test1", MMI); + std::unique_ptr M = parseMIR(Context, MIR, *TM, MIRString, MMI); ASSERT_TRUE(M); Function *F = M->getFunction("test1"); diff --git a/llvm/utils/TableGen/AsmMatcherEmitter.cpp b/llvm/utils/TableGen/AsmMatcherEmitter.cpp index 3b630e3cf014e..f351087ad212f 100644 --- a/llvm/utils/TableGen/AsmMatcherEmitter.cpp +++ b/llvm/utils/TableGen/AsmMatcherEmitter.cpp @@ -1013,7 +1013,7 @@ void MatchableInfo::tokenizeAsmString(const AsmMatcherInfo &Info, InTok = false; IsIsolatedToken = false; } - addAsmOperand(String.slice(i, i + 1), IsIsolatedToken); + addAsmOperand(String.substr(i, 1), IsIsolatedToken); Prev = i + 1; IsIsolatedToken = true; continue; @@ -1037,7 +1037,7 @@ void MatchableInfo::tokenizeAsmString(const AsmMatcherInfo &Info, } ++i; assert(i != String.size() && "Invalid quoted character"); - addAsmOperand(String.slice(i, i + 1), IsIsolatedToken); + addAsmOperand(String.substr(i, 1), IsIsolatedToken); Prev = i + 1; IsIsolatedToken = false; break; diff --git a/llvm/utils/TableGen/CallingConvEmitter.cpp b/llvm/utils/TableGen/CallingConvEmitter.cpp index ec6ef56a66fa0..6a3030bfc1b7e 100644 --- a/llvm/utils/TableGen/CallingConvEmitter.cpp +++ b/llvm/utils/TableGen/CallingConvEmitter.cpp @@ -160,7 +160,7 @@ void CallingConvEmitter::EmitAction(Record *Action, unsigned Indent, ListInit *RegList = Action->getValueAsListInit("RegList"); if (RegList->size() == 1) { std::string Name = getQualifiedName(RegList->getElementAsRecord(0)); - O << IndentStr << "if (unsigned Reg = State.AllocateReg(" << Name + O << IndentStr << "if (MCRegister Reg = State.AllocateReg(" << Name << ")) {\n"; if (SwiftAction) AssignedSwiftRegsMap[CurrentAction].insert(Name); @@ -180,7 +180,7 @@ void CallingConvEmitter::EmitAction(Record *Action, unsigned Indent, O << LS << Name; } O << "\n" << IndentStr << "};\n"; - O << IndentStr << "if (unsigned Reg = State.AllocateReg(RegList" + O << IndentStr << "if (MCRegister Reg = State.AllocateReg(RegList" << Counter << ")) {\n"; } O << IndentStr << " State.addLoc(CCValAssign::getReg(ValNo, ValVT, " @@ -217,7 +217,7 @@ void CallingConvEmitter::EmitAction(Record *Action, unsigned Indent, "Invalid length of list of shadowed registers"); if (RegList->size() == 1) { - O << IndentStr << "if (unsigned Reg = State.AllocateReg("; + O << IndentStr << "if (MCRegister Reg = State.AllocateReg("; O << getQualifiedName(RegList->getElementAsRecord(0)); O << ", " << getQualifiedName(ShadowRegList->getElementAsRecord(0)); O << ")) {\n"; @@ -241,7 +241,7 @@ void CallingConvEmitter::EmitAction(Record *Action, unsigned Indent, O << LSS << getQualifiedName(ShadowRegList->getElementAsRecord(i)); O << "\n" << IndentStr << "};\n"; - O << IndentStr << "if (unsigned Reg = State.AllocateReg(RegList" + O << IndentStr << "if (MCRegister Reg = State.AllocateReg(RegList" << RegListNumber << ", " << "RegList" << ShadowRegListNumber << ")) {\n"; } diff --git a/llvm/utils/TableGen/DisassemblerEmitter.cpp b/llvm/utils/TableGen/DisassemblerEmitter.cpp index d41750075b41f..f2c25d38ad2a7 100644 --- a/llvm/utils/TableGen/DisassemblerEmitter.cpp +++ b/llvm/utils/TableGen/DisassemblerEmitter.cpp @@ -11,6 +11,7 @@ #include "WebAssemblyDisassemblerEmitter.h" #include "X86DisassemblerTables.h" #include "X86RecognizableInstr.h" +#include "llvm/Support/CommandLine.h" #include "llvm/TableGen/Error.h" #include "llvm/TableGen/Record.h" #include "llvm/TableGen/TableGenBackend.h" diff --git a/llvm/utils/TableGen/IntrinsicEmitter.cpp b/llvm/utils/TableGen/IntrinsicEmitter.cpp index 8e536c99f627f..4c211cdca84c5 100644 --- a/llvm/utils/TableGen/IntrinsicEmitter.cpp +++ b/llvm/utils/TableGen/IntrinsicEmitter.cpp @@ -65,6 +65,14 @@ class IntrinsicEmitter { void EmitIntrinsicToBuiltinMap(const CodeGenIntrinsicTable &Ints, bool IsClang, raw_ostream &OS); }; + +// Helper class to use with `TableGen::Emitter::OptClass`. +template class IntrinsicEmitterOpt : public IntrinsicEmitter { +public: + IntrinsicEmitterOpt(const RecordKeeper &R) : IntrinsicEmitter(R) {} + void run(raw_ostream &OS) { IntrinsicEmitter::run(OS, Enums); } +}; + } // End anonymous namespace //===----------------------------------------------------------------------===// @@ -770,16 +778,8 @@ Intrinsic::getIntrinsicFor{1}Builtin(StringRef TargetPrefix, UpperCompilerName); } -static void EmitIntrinsicEnums(RecordKeeper &RK, raw_ostream &OS) { - IntrinsicEmitter(RK).run(OS, /*Enums=*/true); -} - -static TableGen::Emitter::Opt X("gen-intrinsic-enums", EmitIntrinsicEnums, - "Generate intrinsic enums"); - -static void EmitIntrinsicImpl(RecordKeeper &RK, raw_ostream &OS) { - IntrinsicEmitter(RK).run(OS, /*Enums=*/false); -} +static TableGen::Emitter::OptClass> + X("gen-intrinsic-enums", "Generate intrinsic enums"); -static TableGen::Emitter::Opt Y("gen-intrinsic-impl", EmitIntrinsicImpl, - "Generate intrinsic implementation code"); +static TableGen::Emitter::OptClass> + Y("gen-intrinsic-impl", "Generate intrinsic implementation code"); diff --git a/llvm/utils/TableGen/TableGen.cpp b/llvm/utils/TableGen/TableGen.cpp index c420843574cbf..7ee6fa5c83211 100644 --- a/llvm/utils/TableGen/TableGen.cpp +++ b/llvm/utils/TableGen/TableGen.cpp @@ -39,7 +39,7 @@ static cl::opt Class("class", cl::value_desc("class name"), cl::cat(PrintEnumsCat)); -static void PrintRecords(RecordKeeper &Records, raw_ostream &OS) { +static void PrintRecords(const RecordKeeper &Records, raw_ostream &OS) { OS << Records; // No argument, dump all contents } @@ -49,14 +49,14 @@ static void PrintEnums(RecordKeeper &Records, raw_ostream &OS) { OS << "\n"; } -static void PrintSets(RecordKeeper &Records, raw_ostream &OS) { +static void PrintSets(const RecordKeeper &Records, raw_ostream &OS) { SetTheory Sets; Sets.addFieldExpander("Set", "Elements"); for (Record *Rec : Records.getAllDerivedDefinitions("Set")) { OS << Rec->getName() << " = ["; const std::vector *Elts = Sets.expand(Rec); assert(Elts && "Couldn't expand Set instance"); - for (Record *Elt : *Elts) + for (const Record *Elt : *Elts) OS << ' ' << Elt->getName(); OS << " ]\n"; } @@ -67,7 +67,7 @@ static TableGen::Emitter::Opt X[] = { true}, {"print-detailed-records", EmitDetailedRecords, "Print full details of all records to stdout"}, - {"null-backend", [](RecordKeeper &Records, raw_ostream &OS) {}, + {"null-backend", [](const RecordKeeper &Records, raw_ostream &OS) {}, "Do nothing after parsing (useful for timing)"}, {"dump-json", EmitJSON, "Dump all records as machine-readable JSON"}, {"print-enums", PrintEnums, "Print enum values for a class"}, diff --git a/llvm/utils/gn/secondary/clang/lib/AST/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/AST/BUILD.gn index 1dd34f2a077bc..0428322e07e3e 100644 --- a/llvm/utils/gn/secondary/clang/lib/AST/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/lib/AST/BUILD.gn @@ -62,6 +62,32 @@ static_library("AST") { "AttrDocTable.cpp", "AttrImpl.cpp", "Availability.cpp", + "ByteCode/ByteCodeEmitter.cpp", + "ByteCode/Compiler.cpp", + "ByteCode/Context.cpp", + "ByteCode/Descriptor.cpp", + "ByteCode/Disasm.cpp", + "ByteCode/DynamicAllocator.cpp", + "ByteCode/EvalEmitter.cpp", + "ByteCode/EvaluationResult.cpp", + "ByteCode/Floating.cpp", + "ByteCode/Frame.cpp", + "ByteCode/Function.cpp", + "ByteCode/FunctionPointer.cpp", + "ByteCode/Interp.cpp", + "ByteCode/InterpBlock.cpp", + "ByteCode/InterpBuiltin.cpp", + "ByteCode/InterpFrame.cpp", + "ByteCode/InterpShared.cpp", + "ByteCode/InterpStack.cpp", + "ByteCode/InterpState.cpp", + "ByteCode/MemberPointer.cpp", + "ByteCode/Pointer.cpp", + "ByteCode/PrimType.cpp", + "ByteCode/Program.cpp", + "ByteCode/Record.cpp", + "ByteCode/Source.cpp", + "ByteCode/State.cpp", "CXXInheritance.cpp", "Comment.cpp", "CommentBriefParser.cpp", @@ -92,31 +118,6 @@ static_library("AST") { "ExternalASTSource.cpp", "FormatString.cpp", "InheritViz.cpp", - "ByteCode/ByteCodeEmitter.cpp", - "ByteCode/Compiler.cpp", - "ByteCode/Context.cpp", - "ByteCode/Descriptor.cpp", - "ByteCode/Disasm.cpp", - "ByteCode/DynamicAllocator.cpp", - "ByteCode/EvalEmitter.cpp", - "ByteCode/EvaluationResult.cpp", - "ByteCode/Floating.cpp", - "ByteCode/Frame.cpp", - "ByteCode/Function.cpp", - "ByteCode/Interp.cpp", - "ByteCode/InterpBlock.cpp", - "ByteCode/InterpBuiltin.cpp", - "ByteCode/InterpFrame.cpp", - "ByteCode/InterpShared.cpp", - "ByteCode/InterpStack.cpp", - "ByteCode/InterpState.cpp", - "ByteCode/MemberPointer.cpp", - "ByteCode/Pointer.cpp", - "ByteCode/PrimType.cpp", - "ByteCode/Program.cpp", - "ByteCode/Record.cpp", - "ByteCode/Source.cpp", - "ByteCode/State.cpp", "ItaniumCXXABI.cpp", "ItaniumMangle.cpp", "JSONNodeDumper.cpp", diff --git a/llvm/utils/gn/secondary/lldb/tools/lldb-dap/BUILD.gn b/llvm/utils/gn/secondary/lldb/tools/lldb-dap/BUILD.gn index 8cb60fd81840f..b627b1ecc2548 100644 --- a/llvm/utils/gn/secondary/lldb/tools/lldb-dap/BUILD.gn +++ b/llvm/utils/gn/secondary/lldb/tools/lldb-dap/BUILD.gn @@ -45,6 +45,7 @@ executable("lldb-dap") { "FifoFiles.cpp", "FunctionBreakpoint.cpp", "IOStream.cpp", + "InstructionBreakpoint.cpp", "JSONUtils.cpp", "LLDBUtils.cpp", "OutputRedirector.cpp", diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/AMDGPU/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/AMDGPU/BUILD.gn index 006e1ed700b82..dd4af4e98832f 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Target/AMDGPU/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Target/AMDGPU/BUILD.gn @@ -166,7 +166,6 @@ static_library("LLVMAMDGPUCodeGen") { "AMDGPULowerModuleLDSPass.cpp", "AMDGPUMCInstLower.cpp", "AMDGPUMIRFormatter.cpp", - "AMDGPUMachineCFGStructurizer.cpp", "AMDGPUMachineFunction.cpp", "AMDGPUMachineModuleInfo.cpp", "AMDGPUMacroFusion.cpp", @@ -189,6 +188,7 @@ static_library("LLVMAMDGPUCodeGen") { "AMDGPUSetWavePriority.cpp", "AMDGPUSplitModule.cpp", "AMDGPUSubtarget.cpp", + "AMDGPUSwLowerLDS.cpp", "AMDGPUTargetMachine.cpp", "AMDGPUTargetObjectFile.cpp", "AMDGPUTargetTransformInfo.cpp", diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/PowerPC/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/PowerPC/BUILD.gn index 83b55be3f9208..4ebeaff28c14a 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Target/PowerPC/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Target/PowerPC/BUILD.gn @@ -68,7 +68,6 @@ static_library("LLVMPowerPCCodeGen") { "PPCCallingConv.cpp", "PPCEarlyReturn.cpp", "PPCExpandAtomicPseudoInsts.cpp", - "PPCExpandISEL.cpp", "PPCFastISel.cpp", "PPCFrameLowering.cpp", "PPCGenScalarMASSEntries.cpp", diff --git a/llvm/utils/gn/secondary/llvm/lib/Transforms/Utils/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Transforms/Utils/BUILD.gn index 58acabf85d296..55e25e0fe5b79 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Transforms/Utils/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Transforms/Utils/BUILD.gn @@ -24,6 +24,7 @@ static_library("Utils") { "CodeExtractor.cpp", "CodeLayout.cpp", "CodeMoverUtils.cpp", + "ControlFlowUtils.cpp", "CountVisits.cpp", "CtorUtils.cpp", "DXILUpgrade.cpp", diff --git a/llvm/utils/lit/lit/TestRunner.py b/llvm/utils/lit/lit/TestRunner.py index 4dad1412436d9..e3ca9b1fb32cc 100644 --- a/llvm/utils/lit/lit/TestRunner.py +++ b/llvm/utils/lit/lit/TestRunner.py @@ -356,7 +356,7 @@ def executeBuiltinPopd(cmd, shenv): def executeBuiltinExport(cmd, shenv): """executeBuiltinExport - Set an environment variable.""" if len(cmd.args) != 2: - raise InternalShellError("'export' supports only one argument") + raise InternalShellError(cmd, "'export' supports only one argument") updateEnv(shenv, cmd.args) return ShellCommandResult(cmd, "", "", 0, False) diff --git a/llvm/utils/lit/tests/Inputs/shtest-export/export-too-many-args.txt b/llvm/utils/lit/tests/Inputs/shtest-export/export-too-many-args.txt new file mode 100644 index 0000000000000..b282e1a176498 --- /dev/null +++ b/llvm/utils/lit/tests/Inputs/shtest-export/export-too-many-args.txt @@ -0,0 +1,2 @@ +## Test export command with too many arguments. +# RUN: export FOO=1 BAR=2 diff --git a/llvm/utils/lit/tests/Inputs/shtest-export/lit.cfg b/llvm/utils/lit/tests/Inputs/shtest-export/lit.cfg new file mode 100644 index 0000000000000..22ddf13ea3857 --- /dev/null +++ b/llvm/utils/lit/tests/Inputs/shtest-export/lit.cfg @@ -0,0 +1,7 @@ +import lit.formats + +config.name = "shtest-export" +config.suffixes = [".txt"] +config.test_format = lit.formats.ShTest() +config.test_source_root = None +config.test_exec_root = None diff --git a/llvm/utils/lit/tests/shtest-export.py b/llvm/utils/lit/tests/shtest-export.py new file mode 100644 index 0000000000000..f2de8e8cd8b5f --- /dev/null +++ b/llvm/utils/lit/tests/shtest-export.py @@ -0,0 +1,12 @@ +## Test the export command. + +# RUN: not %{lit} -a -v %{inputs}/shtest-export \ +# RUN: | FileCheck -match-full-lines %s +# +# END. + +# CHECK: FAIL: shtest-export :: export-too-many-args.txt {{.*}} +# CHECK: export FOO=1 BAR=2 +# CHECK: # executed command: export FOO=1 BAR=2 +# CHECK: # | 'export' supports only one argument +# CHECK: # error: command failed with exit status: {{.*}} diff --git a/mlir/docs/Dialects/emitc.md b/mlir/docs/Dialects/emitc.md index 4b0394606e4a2..743d70959f3d8 100644 --- a/mlir/docs/Dialects/emitc.md +++ b/mlir/docs/Dialects/emitc.md @@ -12,6 +12,10 @@ The following convention is followed: operation, C++20 is required. * If `ssize_t` is used, then the code requires the POSIX header `sys/types.h` or any of the C++ headers in which the type is defined. +* If `_Float16` is used, the code requires the support of C additional + floating types. +* If `__bf16` is used, the code requires a compiler that supports it, such as + GCC or Clang. * Else the generated code is compatible with C99. These restrictions are neither inherent to the EmitC dialect itself nor to the diff --git a/mlir/include/mlir-c/Dialect/LLVM.h b/mlir/include/mlir-c/Dialect/LLVM.h index 631b564618320..5eb96a86e472d 100644 --- a/mlir/include/mlir-c/Dialect/LLVM.h +++ b/mlir/include/mlir-c/Dialect/LLVM.h @@ -316,7 +316,8 @@ MLIR_CAPI_EXPORTED MlirAttribute mlirLLVMDISubprogramAttrGet( MlirContext ctx, MlirAttribute id, MlirAttribute compileUnit, MlirAttribute scope, MlirAttribute name, MlirAttribute linkageName, MlirAttribute file, unsigned int line, unsigned int scopeLine, - uint64_t subprogramFlags, MlirAttribute type); + uint64_t subprogramFlags, MlirAttribute type, intptr_t nRetainedNodes, + MlirAttribute const *retainedNodes); /// Gets the scope from this DISubprogramAttr. MLIR_CAPI_EXPORTED MlirAttribute @@ -353,6 +354,12 @@ MLIR_CAPI_EXPORTED MlirAttribute mlirLLVMDIModuleAttrGet( MlirAttribute name, MlirAttribute configMacros, MlirAttribute includePath, MlirAttribute apinotes, unsigned int line, bool isDecl); +/// Creates a LLVM DIImportedEntityAttr attribute. +MLIR_CAPI_EXPORTED MlirAttribute mlirLLVMDIImportedEntityAttrGet( + MlirContext ctx, unsigned int tag, MlirAttribute entity, MlirAttribute file, + unsigned int line, MlirAttribute name, intptr_t nElements, + MlirAttribute const *elements); + /// Gets the scope of this DIModuleAttr. MLIR_CAPI_EXPORTED MlirAttribute mlirLLVMDIModuleAttrGetScope(MlirAttribute diModule); diff --git a/mlir/include/mlir/Bindings/Python/IRTypes.h b/mlir/include/mlir/Bindings/Python/IRTypes.h new file mode 100644 index 0000000000000..9afad4c23b3f3 --- /dev/null +++ b/mlir/include/mlir/Bindings/Python/IRTypes.h @@ -0,0 +1,31 @@ +//===- IRTypes.h - Type Interfaces ----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_BINDINGS_PYTHON_IRTYPES_H +#define MLIR_BINDINGS_PYTHON_IRTYPES_H + +#include "mlir/Bindings/Python/PybindAdaptors.h" + +namespace mlir { + +/// Shaped Type Interface - ShapedType +class PyShapedType : public python::PyConcreteType { +public: + static const IsAFunctionTy isaFunction; + static constexpr const char *pyClassName = "ShapedType"; + using PyConcreteType::PyConcreteType; + + static void bindDerived(ClassTy &c); + +private: + void requireHasRank(); +}; + +} // namespace mlir + +#endif // MLIR_BINDINGS_PYTHON_IRTYPES_H diff --git a/mlir/include/mlir/Conversion/ArithToAMDGPU/ArithToAMDGPU.h b/mlir/include/mlir/Conversion/ArithToAMDGPU/ArithToAMDGPU.h index 78c79c915e060..28fdc234e5ef0 100644 --- a/mlir/include/mlir/Conversion/ArithToAMDGPU/ArithToAMDGPU.h +++ b/mlir/include/mlir/Conversion/ArithToAMDGPU/ArithToAMDGPU.h @@ -9,7 +9,9 @@ #ifndef MLIR_CONVERSION_ARITHTOAMDGPU_ARITHTOAMDGPU_H #define MLIR_CONVERSION_ARITHTOAMDGPU_ARITHTOAMDGPU_H +#include "mlir/Dialect/AMDGPU/Utils/Chipset.h" #include +#include namespace mlir { @@ -26,7 +28,10 @@ namespace arith { /// to the largest value of that type instead of being rewritten to Inf (aka /// NaN). void populateArithToAMDGPUConversionPatterns(RewritePatternSet &patterns, - bool saturateFP8TruncF); + bool convertFP8Arithmetic, + bool saturateFP8Truncf, + bool allowPackedF16Rtz, + amdgpu::Chipset chipset); } // namespace arith } // namespace mlir diff --git a/mlir/include/mlir/Conversion/Passes.td b/mlir/include/mlir/Conversion/Passes.td index 7bde9e490e4f4..383e7dca0429c 100644 --- a/mlir/include/mlir/Conversion/Passes.td +++ b/mlir/include/mlir/Conversion/Passes.td @@ -150,9 +150,15 @@ def ArithToAMDGPUConversionPass : Pass<"convert-arith-to-amdgpu"> { let dependentDialects = ["amdgpu::AMDGPUDialect", "vector::VectorDialect"]; let options = [ + Option<"chipset", "chipset", "std::string", + /*default=*/"\"gfx000\"", + "Chipset that these operations will run on">, Option<"saturateFP8Truncf", "saturate-fp8-truncf", "bool", /*default=*/"false", "Use saturating truncation for 8-bit float types">, + Option<"allowPackedF16Rtz", "allow-packed-f16-round-to-zero", "bool", + /*default=*/"false", + "Whether we should allow f32->f16 packed round-to-zero conversion">, ]; } diff --git a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td index 97e0580c89808..e5c1a53f34bf6 100644 --- a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td +++ b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td @@ -25,6 +25,7 @@ def AMDGPU_Dialect : Dialect { let dependentDialects = [ + "ROCDL::ROCDLDialect", "arith::ArithDialect", "gpu::GPUDialect" ]; diff --git a/mlir/include/mlir/Dialect/DLTI/DLTI.h b/mlir/include/mlir/Dialect/DLTI/DLTI.h index a97eb523cb063..f268fea340a6f 100644 --- a/mlir/include/mlir/Dialect/DLTI/DLTI.h +++ b/mlir/include/mlir/Dialect/DLTI/DLTI.h @@ -26,7 +26,7 @@ namespace mlir { namespace dlti { /// Perform a DLTI-query at `op`, recursively querying each key of `keys` on /// query interface-implementing attrs, starting from attr obtained from `op`. -FailureOr query(Operation *op, ArrayRef keys, +FailureOr query(Operation *op, ArrayRef keys, bool emitError = false); } // namespace dlti } // namespace mlir diff --git a/mlir/include/mlir/Dialect/DLTI/TransformOps/DLTITransformOps.td b/mlir/include/mlir/Dialect/DLTI/TransformOps/DLTITransformOps.td index 1b1bebfaab4e3..f25bb383912d4 100644 --- a/mlir/include/mlir/Dialect/DLTI/TransformOps/DLTITransformOps.td +++ b/mlir/include/mlir/Dialect/DLTI/TransformOps/DLTITransformOps.td @@ -26,9 +26,10 @@ def QueryOp : Op:$line, OptionalParameter<"unsigned">:$scopeLine, OptionalParameter<"DISubprogramFlags">:$subprogramFlags, - OptionalParameter<"DISubroutineTypeAttr">:$type + OptionalParameter<"DISubroutineTypeAttr">:$type, + OptionalArrayRefParameter<"DINodeAttr">:$retainedNodes ); let builders = [ AttrBuilderWithInferredContext<(ins "DistinctAttr":$id, "DICompileUnitAttr":$compileUnit, "DIScopeAttr":$scope, "StringRef":$name, "StringRef":$linkageName, "DIFileAttr":$file, "unsigned":$line, "unsigned":$scopeLine, - "DISubprogramFlags":$subprogramFlags, "DISubroutineTypeAttr":$type + "DISubprogramFlags":$subprogramFlags, "DISubroutineTypeAttr":$type, + "ArrayRef":$retainedNodes ), [{ MLIRContext *ctx = file.getContext(); return $_get(ctx, id, compileUnit, scope, StringAttr::get(ctx, name), StringAttr::get(ctx, linkageName), file, line, - scopeLine, subprogramFlags, type); + scopeLine, subprogramFlags, type, retainedNodes); }]> ]; @@ -619,6 +621,29 @@ def LLVM_DINamespaceAttr : LLVM_Attr<"DINamespace", "di_namespace", let assemblyFormat = "`<` struct(params) `>`"; } +//===----------------------------------------------------------------------===// +// DIImportedEntityAttr +//===----------------------------------------------------------------------===// + +def LLVM_DIImportedEntityAttr : LLVM_Attr<"DIImportedEntity", "di_imported_entity", + /*traits=*/[], "DINodeAttr"> { + /// TODO: DIImportedEntity has a 'scope' field which represents the scope where + /// this entity is imported. Currently, we are not adding a 'scope' field in + /// DIImportedEntityAttr to avoid cyclic dependency. As DIImportedEntityAttr + /// entries will be contained inside a scope entity (e.g. DISubprogramAttr), + /// the scope can easily be inferred. + let parameters = (ins + LLVM_DITagParameter:$tag, + "DINodeAttr":$entity, + OptionalParameter<"DIFileAttr">:$file, + OptionalParameter<"unsigned">:$line, + OptionalParameter<"StringAttr">:$name, + OptionalArrayRefParameter<"DINodeAttr">:$elements + ); + + let assemblyFormat = "`<` struct(params) `>`"; +} + //===----------------------------------------------------------------------===// // DISubrangeAttr //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td b/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td index a1e6fc3e29900..e832dfa9d6b80 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td @@ -166,7 +166,7 @@ def ROCDL_BallotOp : let summary = "Vote across thread group"; let description = [{ - Ballot provides a bit mask containing the 1-bit predicate value from each lane. + Ballot provides a bit mask containing the 1-bit predicate value from each lane. The nth bit of the result contains the 1 bit contributed by the nth warp lane. }]; @@ -579,6 +579,21 @@ def ROCDL_DPPUpdateOp : ROCDL_IntrOp<"update.dpp", [], [0], }]; } +//===---------------------------------------------------------------------===// +// 16-bit float intrinsics +//===---------------------------------------------------------------------===// +def ROCDL_CvtPkRtz: + ROCDL_IntrOp<"cvt.pkrtz", [], [], [Pure], 1>, + Arguments<(ins F32:$srcA, F32:$srcB)> { + let summary = "Convert two f32 input into a vector<2xf16>"; + let description = [{ + Convert two f32 values into a packed vector<2xf16>. + }]; + let assemblyFormat = [{ + attr-dict $srcA `,` $srcB `:` type($res) + }]; +} + //===---------------------------------------------------------------------===// // 8-bit float intrinsics //===---------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgInterfaces.h b/mlir/include/mlir/Dialect/Linalg/IR/LinalgInterfaces.h index 08afdf373f014..0fcaa96ade403 100644 --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgInterfaces.h +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgInterfaces.h @@ -110,8 +110,12 @@ struct ConvolutionDimensions { FailureOr inferConvolutionDims(LinalgOp linalgOp); /// Checks whether `linalgOp` conforms to ConvolutionOpInterface. +/// By default, we require the `linalgOp` to have non-empty convolved dims +/// (implicitly non-empty `output_image` and `filter_loop`). +/// Users can loosen the constraint by setting `allowEmptyConvolvedDims` to true // TODO: embed within `isa` if possible / natural. -bool isaConvolutionOpInterface(LinalgOp linalgOp); +bool isaConvolutionOpInterface(LinalgOp linalgOp, + bool allowEmptyConvolvedDims = false); /// Checks whether `linalgOp` is semantically equivalent to a `linalg.copyOp`. bool isaCopyOpInterface(LinalgOp linalgOp); @@ -175,9 +179,12 @@ enum class MatchConvolutionResult; /// Checks whether `op` conforms to ConvolutionOpInterface and populates /// `dimensions` with indexes of the different kinds of dimensions when /// present. +/// If `allowEmptyConvolvedDims` is not set, we further checks whether the `op` +/// contains convolved dims. MatchConvolutionResult isConvolutionInterfaceImpl(Operation *op, - ConvolutionDimensions *dimensions = nullptr); + ConvolutionDimensions *dimensions = nullptr, + bool allowEmptyConvolvedDims = false); /// Returns the error message corresponding to the convolution checking return /// code. diff --git a/mlir/include/mlir/Dialect/SparseTensor/Pipelines/Passes.h b/mlir/include/mlir/Dialect/SparseTensor/Pipelines/Passes.h index 90021ffa7c380..efbe5c56a219b 100644 --- a/mlir/include/mlir/Dialect/SparseTensor/Pipelines/Passes.h +++ b/mlir/include/mlir/Dialect/SparseTensor/Pipelines/Passes.h @@ -162,7 +162,7 @@ struct SparsifierOptions : public PassPipelineOptions { } /// Projects out the options for `createConvertVectorToLLVMPass`. - ConvertVectorToLLVMPassOptions lowerVectorToLLVMOptions() const { + ConvertVectorToLLVMPassOptions convertVectorToLLVMOptions() const { ConvertVectorToLLVMPassOptions opts{}; opts.reassociateFPReductions = reassociateFPReductions; opts.force32BitVectorIndices = force32BitVectorIndices; diff --git a/mlir/include/mlir/Support/ThreadLocalCache.h b/mlir/include/mlir/Support/ThreadLocalCache.h index 87cc52cc56ac4..53b6d31a09555 100644 --- a/mlir/include/mlir/Support/ThreadLocalCache.h +++ b/mlir/include/mlir/Support/ThreadLocalCache.h @@ -27,6 +27,8 @@ template class ThreadLocalCache { struct PerInstanceState; + using PointerAndFlag = std::pair>; + /// The "observer" is owned by a thread-local cache instance. It is /// constructed the first time a `ThreadLocalCache` instance is accessed by a /// thread, unless `perInstanceState` happens to get re-allocated to the same @@ -41,7 +43,8 @@ class ThreadLocalCache { /// This is the double pointer, explicitly allocated because we need to keep /// the address stable if the TLC map re-allocates. It is owned by the /// observer and shared with the value owner. - std::shared_ptr ptr = std::make_shared(nullptr); + std::shared_ptr ptr = + std::make_shared(std::make_pair(nullptr, false)); /// Because the `Owner` instance that lives inside `PerInstanceState` /// contains a reference to the double pointer, and likewise this class /// contains a reference to the value, we need to synchronize destruction of @@ -62,18 +65,21 @@ class ThreadLocalCache { /// Save a pointer to the reference and write it to the newly created entry. Owner(Observer &observer) : value(std::make_unique()), ptrRef(observer.ptr) { - *observer.ptr = value.get(); + observer.ptr->second = true; + observer.ptr->first = value.get(); } ~Owner() { - if (std::shared_ptr ptr = ptrRef.lock()) - *ptr = nullptr; + if (std::shared_ptr ptr = ptrRef.lock()) { + ptr->first = nullptr; + ptr->second = false; + } } Owner(Owner &&) = default; Owner &operator=(Owner &&) = default; std::unique_ptr value; - std::weak_ptr ptrRef; + std::weak_ptr ptrRef; }; // Keep a separate shared_ptr protected state that can be acquired atomically @@ -116,7 +122,7 @@ class ThreadLocalCache { // back to the data here that is being destroyed. for (auto &[instance, observer] : *this) if (std::shared_ptr state = observer.keepalive.lock()) - state->remove(*observer.ptr); + state->remove(observer.ptr->first); } /// Clear out any unused entries within the map. This method is not @@ -124,7 +130,7 @@ class ThreadLocalCache { void clearExpiredEntries() { for (auto it = this->begin(), e = this->end(); it != e;) { auto curIt = it++; - if (!*curIt->second.ptr) + if (!curIt->second.ptr->second) this->erase(curIt); } } @@ -142,7 +148,7 @@ class ThreadLocalCache { // Check for an already existing instance for this thread. CacheType &staticCache = getStaticCache(); Observer &threadInstance = staticCache[perInstanceState.get()]; - if (ValueT *value = *threadInstance.ptr) + if (ValueT *value = threadInstance.ptr->first) return *value; // Otherwise, create a new instance for this thread. @@ -157,7 +163,7 @@ class ThreadLocalCache { // entries in the static map. The cache is only cleared within the same // thread to remove the need to lock the cache itself. staticCache.clearExpiredEntries(); - return **threadInstance.ptr; + return *threadInstance.ptr->first; } ValueT &operator*() { return get(); } ValueT *operator->() { return &get(); } diff --git a/mlir/lib/Bindings/Python/IRTypes.cpp b/mlir/lib/Bindings/Python/IRTypes.cpp index c3d42c0ef8e3c..2ee1d89c38884 100644 --- a/mlir/lib/Bindings/Python/IRTypes.cpp +++ b/mlir/lib/Bindings/Python/IRTypes.cpp @@ -10,6 +10,8 @@ #include "PybindUtils.h" +#include "mlir/Bindings/Python/IRTypes.h" + #include "mlir-c/BuiltinAttributes.h" #include "mlir-c/BuiltinTypes.h" #include "mlir-c/Support.h" @@ -418,98 +420,98 @@ class PyComplexType : public PyConcreteType { } }; -class PyShapedType : public PyConcreteType { -public: - static constexpr IsAFunctionTy isaFunction = mlirTypeIsAShaped; - static constexpr const char *pyClassName = "ShapedType"; - using PyConcreteType::PyConcreteType; +} // namespace - static void bindDerived(ClassTy &c) { - c.def_property_readonly( - "element_type", - [](PyShapedType &self) { return mlirShapedTypeGetElementType(self); }, - "Returns the element type of the shaped type."); - c.def_property_readonly( - "has_rank", - [](PyShapedType &self) -> bool { return mlirShapedTypeHasRank(self); }, - "Returns whether the given shaped type is ranked."); - c.def_property_readonly( - "rank", - [](PyShapedType &self) { - self.requireHasRank(); - return mlirShapedTypeGetRank(self); - }, - "Returns the rank of the given ranked shaped type."); - c.def_property_readonly( - "has_static_shape", - [](PyShapedType &self) -> bool { - return mlirShapedTypeHasStaticShape(self); - }, - "Returns whether the given shaped type has a static shape."); - c.def( - "is_dynamic_dim", - [](PyShapedType &self, intptr_t dim) -> bool { - self.requireHasRank(); - return mlirShapedTypeIsDynamicDim(self, dim); - }, - py::arg("dim"), - "Returns whether the dim-th dimension of the given shaped type is " - "dynamic."); - c.def( - "get_dim_size", - [](PyShapedType &self, intptr_t dim) { - self.requireHasRank(); - return mlirShapedTypeGetDimSize(self, dim); - }, - py::arg("dim"), - "Returns the dim-th dimension of the given ranked shaped type."); - c.def_static( - "is_dynamic_size", - [](int64_t size) -> bool { return mlirShapedTypeIsDynamicSize(size); }, - py::arg("dim_size"), - "Returns whether the given dimension size indicates a dynamic " - "dimension."); - c.def( - "is_dynamic_stride_or_offset", - [](PyShapedType &self, int64_t val) -> bool { - self.requireHasRank(); - return mlirShapedTypeIsDynamicStrideOrOffset(val); - }, - py::arg("dim_size"), - "Returns whether the given value is used as a placeholder for dynamic " - "strides and offsets in shaped types."); - c.def_property_readonly( - "shape", - [](PyShapedType &self) { - self.requireHasRank(); - - std::vector shape; - int64_t rank = mlirShapedTypeGetRank(self); - shape.reserve(rank); - for (int64_t i = 0; i < rank; ++i) - shape.push_back(mlirShapedTypeGetDimSize(self, i)); - return shape; - }, - "Returns the shape of the ranked shaped type as a list of integers."); - c.def_static( - "get_dynamic_size", []() { return mlirShapedTypeGetDynamicSize(); }, - "Returns the value used to indicate dynamic dimensions in shaped " - "types."); - c.def_static( - "get_dynamic_stride_or_offset", - []() { return mlirShapedTypeGetDynamicStrideOrOffset(); }, - "Returns the value used to indicate dynamic strides or offsets in " - "shaped types."); - } +// Shaped Type Interface - ShapedType +void mlir::PyShapedType::bindDerived(ClassTy &c) { + c.def_property_readonly( + "element_type", + [](PyShapedType &self) { return mlirShapedTypeGetElementType(self); }, + "Returns the element type of the shaped type."); + c.def_property_readonly( + "has_rank", + [](PyShapedType &self) -> bool { return mlirShapedTypeHasRank(self); }, + "Returns whether the given shaped type is ranked."); + c.def_property_readonly( + "rank", + [](PyShapedType &self) { + self.requireHasRank(); + return mlirShapedTypeGetRank(self); + }, + "Returns the rank of the given ranked shaped type."); + c.def_property_readonly( + "has_static_shape", + [](PyShapedType &self) -> bool { + return mlirShapedTypeHasStaticShape(self); + }, + "Returns whether the given shaped type has a static shape."); + c.def( + "is_dynamic_dim", + [](PyShapedType &self, intptr_t dim) -> bool { + self.requireHasRank(); + return mlirShapedTypeIsDynamicDim(self, dim); + }, + py::arg("dim"), + "Returns whether the dim-th dimension of the given shaped type is " + "dynamic."); + c.def( + "get_dim_size", + [](PyShapedType &self, intptr_t dim) { + self.requireHasRank(); + return mlirShapedTypeGetDimSize(self, dim); + }, + py::arg("dim"), + "Returns the dim-th dimension of the given ranked shaped type."); + c.def_static( + "is_dynamic_size", + [](int64_t size) -> bool { return mlirShapedTypeIsDynamicSize(size); }, + py::arg("dim_size"), + "Returns whether the given dimension size indicates a dynamic " + "dimension."); + c.def( + "is_dynamic_stride_or_offset", + [](PyShapedType &self, int64_t val) -> bool { + self.requireHasRank(); + return mlirShapedTypeIsDynamicStrideOrOffset(val); + }, + py::arg("dim_size"), + "Returns whether the given value is used as a placeholder for dynamic " + "strides and offsets in shaped types."); + c.def_property_readonly( + "shape", + [](PyShapedType &self) { + self.requireHasRank(); + + std::vector shape; + int64_t rank = mlirShapedTypeGetRank(self); + shape.reserve(rank); + for (int64_t i = 0; i < rank; ++i) + shape.push_back(mlirShapedTypeGetDimSize(self, i)); + return shape; + }, + "Returns the shape of the ranked shaped type as a list of integers."); + c.def_static( + "get_dynamic_size", []() { return mlirShapedTypeGetDynamicSize(); }, + "Returns the value used to indicate dynamic dimensions in shaped " + "types."); + c.def_static( + "get_dynamic_stride_or_offset", + []() { return mlirShapedTypeGetDynamicStrideOrOffset(); }, + "Returns the value used to indicate dynamic strides or offsets in " + "shaped types."); +} -private: - void requireHasRank() { - if (!mlirShapedTypeHasRank(*this)) { - throw py::value_error( - "calling this method requires that the type has a rank."); - } +void mlir::PyShapedType::requireHasRank() { + if (!mlirShapedTypeHasRank(*this)) { + throw py::value_error( + "calling this method requires that the type has a rank."); } -}; +} + +const mlir::PyShapedType::IsAFunctionTy mlir::PyShapedType::isaFunction = + mlirTypeIsAShaped; + +namespace { /// Vector Type subclass - VectorType. class PyVectorType : public PyConcreteType { diff --git a/mlir/lib/CAPI/Dialect/LLVM.cpp b/mlir/lib/CAPI/Dialect/LLVM.cpp index 03e2f2be2156a..13341f0c4de88 100644 --- a/mlir/lib/CAPI/Dialect/LLVM.cpp +++ b/mlir/lib/CAPI/Dialect/LLVM.cpp @@ -293,14 +293,20 @@ MlirAttribute mlirLLVMDISubprogramAttrGet( MlirContext ctx, MlirAttribute id, MlirAttribute compileUnit, MlirAttribute scope, MlirAttribute name, MlirAttribute linkageName, MlirAttribute file, unsigned int line, unsigned int scopeLine, - uint64_t subprogramFlags, MlirAttribute type) { + uint64_t subprogramFlags, MlirAttribute type, intptr_t nRetainedNodes, + MlirAttribute const *retainedNodes) { + SmallVector nodesStorage; + nodesStorage.reserve(nRetainedNodes); return wrap(DISubprogramAttr::get( unwrap(ctx), cast(unwrap(id)), cast(unwrap(compileUnit)), cast(unwrap(scope)), cast(unwrap(name)), cast(unwrap(linkageName)), cast(unwrap(file)), line, scopeLine, DISubprogramFlags(subprogramFlags), - cast(unwrap(type)))); + cast(unwrap(type)), + llvm::map_to_vector( + unwrapList(nRetainedNodes, retainedNodes, nodesStorage), + [](Attribute a) { return cast(a); }))); } MlirAttribute mlirLLVMDISubprogramAttrGetScope(MlirAttribute diSubprogram) { @@ -345,3 +351,16 @@ MlirAttribute mlirLLVMDIModuleAttrGet(MlirContext ctx, MlirAttribute file, MlirAttribute mlirLLVMDIModuleAttrGetScope(MlirAttribute diModule) { return wrap(cast(unwrap(diModule)).getScope()); } + +MlirAttribute mlirLLVMDIImportedEntityAttrGet( + MlirContext ctx, unsigned int tag, MlirAttribute entity, MlirAttribute file, + unsigned int line, MlirAttribute name, intptr_t nElements, + MlirAttribute const *elements) { + SmallVector elementsStorage; + elementsStorage.reserve(nElements); + return wrap(DIImportedEntityAttr::get( + unwrap(ctx), tag, cast(unwrap(entity)), + cast(unwrap(file)), line, cast(unwrap(name)), + llvm::map_to_vector(unwrapList(nElements, elements, elementsStorage), + [](Attribute a) { return cast(a); }))); +} diff --git a/mlir/lib/Conversion/ArithToAMDGPU/ArithToAMDGPU.cpp b/mlir/lib/Conversion/ArithToAMDGPU/ArithToAMDGPU.cpp index b3798a3f7624b..d36583c8118ff 100644 --- a/mlir/lib/Conversion/ArithToAMDGPU/ArithToAMDGPU.cpp +++ b/mlir/lib/Conversion/ArithToAMDGPU/ArithToAMDGPU.cpp @@ -9,8 +9,11 @@ #include "mlir/Conversion/ArithToAMDGPU/ArithToAMDGPU.h" #include "mlir/Dialect/AMDGPU/IR/AMDGPUDialect.h" +#include "mlir/Dialect/AMDGPU/Utils/Chipset.h" #include "mlir/Dialect/Arith/IR/Arith.h" #include "mlir/Dialect/Arith/Utils/Utils.h" +#include "mlir/Dialect/LLVMIR/LLVMDialect.h" +#include "mlir/Dialect/LLVMIR/ROCDLDialect.h" #include "mlir/Dialect/Vector/IR/VectorOps.h" #include "mlir/IR/BuiltinTypes.h" #include "mlir/IR/PatternMatch.h" @@ -24,6 +27,7 @@ namespace mlir { } // namespace mlir using namespace mlir; +using namespace mlir::amdgpu; namespace { struct ArithToAMDGPUConversionPass final @@ -43,12 +47,25 @@ struct ExtFOnFloat8RewritePattern final : OpRewritePattern { struct TruncFToFloat8RewritePattern final : OpRewritePattern { bool saturateFP8 = false; - TruncFToFloat8RewritePattern(MLIRContext *ctx, bool saturateFP8) - : OpRewritePattern::OpRewritePattern(ctx), saturateFP8(saturateFP8) {} + TruncFToFloat8RewritePattern(MLIRContext *ctx, bool saturateFP8, + Chipset chipset) + : OpRewritePattern::OpRewritePattern(ctx), saturateFP8(saturateFP8), + chipset(chipset) {} + Chipset chipset; LogicalResult match(arith::TruncFOp op) const override; void rewrite(arith::TruncFOp op, PatternRewriter &rewriter) const override; }; + +struct TruncfToFloat16RewritePattern final + : public OpRewritePattern { + + using OpRewritePattern::OpRewritePattern; + + LogicalResult match(arith::TruncFOp op) const override; + void rewrite(arith::TruncFOp op, PatternRewriter &rewriter) const override; +}; + } // end namespace static Value castF32To(Type elementType, Value f32, Location loc, @@ -272,17 +289,105 @@ void TruncFToFloat8RewritePattern::rewrite(arith::TruncFOp op, rewriter.replaceOp(op, result); } +LogicalResult TruncfToFloat16RewritePattern::match(arith::TruncFOp op) const { + Type outType = op.getOut().getType(); + Type inputType = getElementTypeOrSelf(op.getIn()); + if (auto outVecType = dyn_cast(outType)) { + if (outVecType.isScalable()) + return failure(); + outType = outVecType.getElementType(); + } + return success(outType.isF16() && inputType.isF32()); +} + +void TruncfToFloat16RewritePattern::rewrite(arith::TruncFOp op, + PatternRewriter &rewriter) const { + Location loc = op.getLoc(); + Value in = op.getIn(); + Type outElemType = getElementTypeOrSelf(op.getOut().getType()); + VectorType truncResType = VectorType::get(2, outElemType); + auto inVectorTy = dyn_cast(in.getType()); + + // Handle the case where input type is not a vector type + if (!inVectorTy) { + auto sourceB = rewriter.create(loc, rewriter.getF32Type()); + Value asF16s = + rewriter.create(loc, truncResType, in, sourceB); + Value result = rewriter.create( + loc, asF16s, rewriter.createOrFold(loc, 0)); + return rewriter.replaceOp(op, result); + } + VectorType outType = cast(op.getOut().getType()); + int64_t numElements = outType.getNumElements(); + Value zero = rewriter.createOrFold( + loc, outElemType, rewriter.getFloatAttr(outElemType, 0.0)); + Value result = rewriter.createOrFold(loc, outType, zero); + + if (inVectorTy.getRank() > 1) { + inVectorTy = VectorType::get(SmallVector{numElements}, + inVectorTy.getElementType()); + in = rewriter.create(loc, inVectorTy, in); + } + + // Handle the vector case. We also handle the (uncommon) case where the vector + // length is odd + for (int64_t i = 0; i < numElements; i += 2) { + int64_t elemsThisOp = std::min(numElements, i + 2) - i; + Value thisResult = nullptr; + Value elemA = rewriter.create( + loc, in, rewriter.create(loc, i)); + Value elemB = rewriter.create(loc, rewriter.getF32Type()); + + if (elemsThisOp == 2) { + elemB = rewriter.create( + loc, in, rewriter.createOrFold(loc, i + 1)); + } + + thisResult = + rewriter.create(loc, truncResType, elemA, elemB); + // Place back the truncated result into the possibly larger vector. If we + // are operating on a size 2 vector, these operations should be folded away + thisResult = rewriter.create( + loc, thisResult, 0, elemsThisOp, 1); + result = rewriter.create(loc, thisResult, + result, i, 1); + } + + if (inVectorTy.getRank() != outType.getRank()) { + result = rewriter.create(loc, outType, result); + } + + rewriter.replaceOp(op, result); +} + void mlir::arith::populateArithToAMDGPUConversionPatterns( - RewritePatternSet &patterns, bool saturateFP8TruncF) { - patterns.add(patterns.getContext()); - patterns.add(patterns.getContext(), - saturateFP8TruncF); + RewritePatternSet &patterns, bool convertFP8Arithmetic, + bool saturateFP8Truncf, bool allowPackedF16Rtz, Chipset chipset) { + + if (convertFP8Arithmetic) { + patterns.add(patterns.getContext()); + patterns.add(patterns.getContext(), + saturateFP8Truncf, chipset); + } + if (allowPackedF16Rtz) + patterns.add(patterns.getContext()); } void ArithToAMDGPUConversionPass::runOnOperation() { Operation *op = getOperation(); + MLIRContext *ctx = &getContext(); RewritePatternSet patterns(op->getContext()); - arith::populateArithToAMDGPUConversionPatterns(patterns, saturateFP8Truncf); + FailureOr maybeChipset = amdgpu::Chipset::parse(chipset); + if (failed(maybeChipset)) { + emitError(UnknownLoc::get(ctx), "Invalid chipset name: " + chipset); + return signalPassFailure(); + } + + bool convertFP8Arithmetic = + (*maybeChipset).majorVersion == 9 && (*maybeChipset).minorVersion >= 0x40; + arith::populateArithToAMDGPUConversionPatterns( + patterns, convertFP8Arithmetic, saturateFP8Truncf, allowPackedF16Rtz, + *maybeChipset); if (failed(applyPatternsAndFoldGreedily(op, std::move(patterns)))) return signalPassFailure(); } diff --git a/mlir/lib/Conversion/ArithToAMDGPU/CMakeLists.txt b/mlir/lib/Conversion/ArithToAMDGPU/CMakeLists.txt index e2c951b0b34d8..50be09ab5a7c5 100644 --- a/mlir/lib/Conversion/ArithToAMDGPU/CMakeLists.txt +++ b/mlir/lib/Conversion/ArithToAMDGPU/CMakeLists.txt @@ -12,6 +12,7 @@ add_mlir_conversion_library(MLIRArithToAMDGPU LINK_LIBS PUBLIC MLIRAMDGPUDialect + MLIRAMDGPUUtils MLIRArithDialect MLIRArithUtils MLIRVectorDialect diff --git a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVMPass.cpp b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVMPass.cpp index 55143d5939ba2..842d239cf6a51 100644 --- a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVMPass.cpp +++ b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVMPass.cpp @@ -35,8 +35,8 @@ using namespace mlir; using namespace mlir::vector; namespace { -struct LowerVectorToLLVMPass - : public impl::ConvertVectorToLLVMPassBase { +struct ConvertVectorToLLVMPass + : public impl::ConvertVectorToLLVMPassBase { using Base::Base; @@ -58,7 +58,7 @@ struct LowerVectorToLLVMPass }; } // namespace -void LowerVectorToLLVMPass::runOnOperation() { +void ConvertVectorToLLVMPass::runOnOperation() { // Perform progressive lowering of operations on slices and // all contraction operations. Also applies folding and DCE. { diff --git a/mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp b/mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp index c1a785fb25478..3943696364950 100644 --- a/mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp +++ b/mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp @@ -14,6 +14,7 @@ #include "mlir/Dialect/Arith/IR/Arith.h" #include "mlir/Dialect/GPU/IR/GPUDialect.h" +#include "mlir/Dialect/LLVMIR/ROCDLDialect.h" #include "mlir/IR/Builders.h" #include "mlir/IR/BuiltinTypes.h" #include "mlir/IR/Diagnostics.h" diff --git a/mlir/lib/Dialect/AMDGPU/IR/CMakeLists.txt b/mlir/lib/Dialect/AMDGPU/IR/CMakeLists.txt index 0551d13b5a0cf..78d78cf48a747 100644 --- a/mlir/lib/Dialect/AMDGPU/IR/CMakeLists.txt +++ b/mlir/lib/Dialect/AMDGPU/IR/CMakeLists.txt @@ -11,6 +11,7 @@ add_mlir_dialect_library(MLIRAMDGPUDialect LINK_LIBS PUBLIC MLIRArithDialect + MLIRROCDLDialect # Needed for GPU address space enum definition MLIRGPUDialect MLIRIR diff --git a/mlir/lib/Dialect/Bufferization/Transforms/FuncBufferizableOpInterfaceImpl.cpp b/mlir/lib/Dialect/Bufferization/Transforms/FuncBufferizableOpInterfaceImpl.cpp index 053ea7935260a..9fbe574ec392d 100644 --- a/mlir/lib/Dialect/Bufferization/Transforms/FuncBufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Dialect/Bufferization/Transforms/FuncBufferizableOpInterfaceImpl.cpp @@ -258,20 +258,23 @@ struct CallOpInterface return failure(); Value buffer = *maybeBuffer; - // Caller / callee type mismatch is handled with a CastOp. + // Caller / callee type mismatch is handled with castOrReallocMemRefValue. auto memRefType = funcType.getInput(opOperand.getOperandNumber()); // Since we don't yet have a clear layout story, to_memref may // conservatively turn tensors into more dynamic memref than necessary. // If the memref type of the callee fails, introduce an extra memref.cast // that will either canonicalize away or fail compilation until we can do - // something better. + // something better. Insert a reallocation + copy if it cannot be + // statically guaranteed that a direct cast would be valid. if (buffer.getType() != memRefType) { - assert( - memref::CastOp::areCastCompatible(buffer.getType(), memRefType) && - "CallOp::bufferize: cast incompatible"); - Value castBuffer = rewriter.create(callOp.getLoc(), - memRefType, buffer); - buffer = castBuffer; + auto memrefDstType = dyn_cast(memRefType); + assert(memrefDstType && + "buffer layout not supported on unranked tensors"); + FailureOr replacement = bufferization::castOrReallocMemRefValue( + rewriter, buffer, memrefDstType, options); + if (failed(replacement)) + return failure(); + buffer = *replacement; } newOperands.push_back(buffer); } diff --git a/mlir/lib/Dialect/DLTI/DLTI.cpp b/mlir/lib/Dialect/DLTI/DLTI.cpp index 7f8e11a1b7334..85ec9fc93248a 100644 --- a/mlir/lib/Dialect/DLTI/DLTI.cpp +++ b/mlir/lib/Dialect/DLTI/DLTI.cpp @@ -424,8 +424,16 @@ getClosestQueryable(Operation *op) { return std::pair(queryable, op); } -FailureOr dlti::query(Operation *op, ArrayRef keys, - bool emitError) { +FailureOr +dlti::query(Operation *op, ArrayRef keys, bool emitError) { + if (keys.empty()) { + if (emitError) { + auto diag = op->emitError() << "target op of failed DLTI query"; + diag.attachNote(op->getLoc()) << "no keys provided to attempt query with"; + } + return failure(); + } + auto [queryable, queryOp] = getClosestQueryable(op); Operation *reportOp = (queryOp ? queryOp : op); @@ -438,6 +446,15 @@ FailureOr dlti::query(Operation *op, ArrayRef keys, return failure(); } + auto keyToStr = [](DataLayoutEntryKey key) -> std::string { + std::string buf; + llvm::TypeSwitch(key) + .Case( // The only two kinds of key we know of. + [&](auto key) { llvm::raw_string_ostream(buf) << key; }) + .Default([](auto) { llvm_unreachable("unexpected entry key kind"); }); + return buf; + }; + Attribute currentAttr = queryable; for (auto &&[idx, key] : llvm::enumerate(keys)) { if (auto map = llvm::dyn_cast(currentAttr)) { @@ -446,17 +463,24 @@ FailureOr dlti::query(Operation *op, ArrayRef keys, if (emitError) { auto diag = op->emitError() << "target op of failed DLTI query"; diag.attachNote(reportOp->getLoc()) - << "key " << key << " has no DLTI-mapping per attr: " << map; + << "key " << keyToStr(key) + << " has no DLTI-mapping per attr: " << map; } return failure(); } currentAttr = *maybeAttr; } else { if (emitError) { + std::string commaSeparatedKeys; + llvm::interleave( + keys.take_front(idx), // All prior keys. + [&](auto key) { commaSeparatedKeys += keyToStr(key); }, + [&]() { commaSeparatedKeys += ","; }); + auto diag = op->emitError() << "target op of failed DLTI query"; diag.attachNote(reportOp->getLoc()) << "got non-DLTI-queryable attribute upon looking up keys [" - << keys.take_front(idx) << "] at op"; + << commaSeparatedKeys << "] at op"; } return failure(); } diff --git a/mlir/lib/Dialect/DLTI/TransformOps/DLTITransformOps.cpp b/mlir/lib/Dialect/DLTI/TransformOps/DLTITransformOps.cpp index 90aef82bddff0..02c41b4fe8113 100644 --- a/mlir/lib/Dialect/DLTI/TransformOps/DLTITransformOps.cpp +++ b/mlir/lib/Dialect/DLTI/TransformOps/DLTITransformOps.cpp @@ -33,7 +33,16 @@ void transform::QueryOp::getEffects( DiagnosedSilenceableFailure transform::QueryOp::applyToOne( transform::TransformRewriter &rewriter, Operation *target, transform::ApplyToEachResultList &results, TransformState &state) { - auto keys = SmallVector(getKeys().getAsRange()); + SmallVector keys; + for (Attribute key : getKeys()) { + if (auto strKey = dyn_cast(key)) + keys.push_back(strKey); + else if (auto typeKey = dyn_cast(key)) + keys.push_back(typeKey.getValue()); + else + return emitDefiniteFailure("'transform.dlti.query' keys of wrong type: " + "only StringAttr and TypeAttr are allowed"); + } FailureOr result = dlti::query(target, keys, /*emitError=*/true); diff --git a/mlir/lib/Dialect/EmitC/IR/EmitC.cpp b/mlir/lib/Dialect/EmitC/IR/EmitC.cpp index e6f1618cc2611..fdc21d6c6e24b 100644 --- a/mlir/lib/Dialect/EmitC/IR/EmitC.cpp +++ b/mlir/lib/Dialect/EmitC/IR/EmitC.cpp @@ -116,6 +116,11 @@ bool mlir::emitc::isIntegerIndexOrOpaqueType(Type type) { bool mlir::emitc::isSupportedFloatType(Type type) { if (auto floatType = llvm::dyn_cast(type)) { switch (floatType.getWidth()) { + case 16: { + if (llvm::isa(type)) + return true; + return false; + } case 32: case 64: return true; diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMAttrs.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMAttrs.cpp index 963a4be25079e..98a9659735e7e 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/LLVMAttrs.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMAttrs.cpp @@ -58,10 +58,11 @@ void LLVMDialect::registerAttributes() { bool DINodeAttr::classof(Attribute attr) { return llvm::isa(attr); + DIImportedEntityAttr, DILabelAttr, DILexicalBlockAttr, + DILexicalBlockFileAttr, DILocalVariableAttr, DIModuleAttr, + DINamespaceAttr, DINullTypeAttr, DIStringTypeAttr, + DISubprogramAttr, DISubrangeAttr, DISubroutineTypeAttr>( + attr); } //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/LLVMIR/Transforms/DIScopeForLLVMFuncOp.cpp b/mlir/lib/Dialect/LLVMIR/Transforms/DIScopeForLLVMFuncOp.cpp index 395ff6ed1e48e..758700c9272bc 100644 --- a/mlir/lib/Dialect/LLVMIR/Transforms/DIScopeForLLVMFuncOp.cpp +++ b/mlir/lib/Dialect/LLVMIR/Transforms/DIScopeForLLVMFuncOp.cpp @@ -79,7 +79,8 @@ static void addScopeToFunction(LLVM::LLVMFuncOp llvmFunc, context, id, compileUnitAttr, fileAttr, funcNameAttr, funcNameAttr, fileAttr, /*line=*/line, - /*scopeline=*/col, subprogramFlags, subroutineTypeAttr); + /*scopeline=*/col, subprogramFlags, subroutineTypeAttr, + /*retainedNodes=*/{}); llvmFunc->setLoc(FusedLoc::get(context, {loc}, subprogramAttr)); } diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgInterfaces.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgInterfaces.cpp index 6ee1810c2ff2b..d5c21fb5d845e 100644 --- a/mlir/lib/Dialect/Linalg/IR/LinalgInterfaces.cpp +++ b/mlir/lib/Dialect/Linalg/IR/LinalgInterfaces.cpp @@ -762,13 +762,15 @@ enum class MatchConvolutionResult { NotProjectedPermutations, NonConvolutionLoop, OutputDimsNotParallel, - NonOutputDimNotReduction + NonOutputDimNotReduction, + EmptyConvolvedDims }; } // namespace mlir::linalg::detail mlir::linalg::detail::MatchConvolutionResult mlir::linalg::detail::isConvolutionInterfaceImpl( - Operation *op, ConvolutionDimensions *dimensions) { + Operation *op, ConvolutionDimensions *dimensions, + bool allowEmptyConvolvedDims) { auto linalgOp = dyn_cast(op); if (!linalgOp) return MatchConvolutionResult::NotLinalgOp; @@ -886,10 +888,12 @@ mlir::linalg::detail::isConvolutionInterfaceImpl( if (allLoopDims.size() != linalgOp.getNumLoops()) return MatchConvolutionResult::NonConvolutionLoop; + if (!allowEmptyConvolvedDims && inputExprWalker.convolvedDims.empty()) + return MatchConvolutionResult::EmptyConvolvedDims; + if (dimensions) { - FailureOr res = - inferConvolutionDimsImpl(linalgOp, inputExprWalker, - /*allowEmptyConvolvedDims=*/true); + FailureOr res = inferConvolutionDimsImpl( + linalgOp, inputExprWalker, allowEmptyConvolvedDims); assert(succeeded(res) && "unexpected failure to infer convolution dims"); *dimensions = *res; } @@ -914,14 +918,18 @@ mlir::linalg::detail::getMatchConvolutionMessage(MatchConvolutionResult res) { return "expected all iterators used to access outputs to be parallel"; case MatchConvolutionResult::NonOutputDimNotReduction: return "expected all iterators not used to access outputs to be reduction"; + case MatchConvolutionResult::EmptyConvolvedDims: + return "FIXME"; case MatchConvolutionResult::Success: return ""; } llvm_unreachable("unhandled MatchConvolutionResult case"); } -bool mlir::linalg::isaConvolutionOpInterface(LinalgOp linalgOp) { - return linalg::detail::isConvolutionInterfaceImpl(linalgOp.getOperation()) == +bool mlir::linalg::isaConvolutionOpInterface(LinalgOp linalgOp, + bool allowEmptyConvolvedDims) { + return linalg::detail::isConvolutionInterfaceImpl( + linalgOp.getOperation(), nullptr, allowEmptyConvolvedDims) == linalg::detail::MatchConvolutionResult::Success; } diff --git a/mlir/lib/Dialect/SparseTensor/Pipelines/SparseTensorPipelines.cpp b/mlir/lib/Dialect/SparseTensor/Pipelines/SparseTensorPipelines.cpp index c5eb965884396..12e330ac7efbd 100644 --- a/mlir/lib/Dialect/SparseTensor/Pipelines/SparseTensorPipelines.cpp +++ b/mlir/lib/Dialect/SparseTensor/Pipelines/SparseTensorPipelines.cpp @@ -76,16 +76,19 @@ void mlir::sparse_tensor::buildSparsifier(OpPassManager &pm, pm.addNestedPass(createConvertSCFToCFPass()); pm.addPass(memref::createExpandStridedMetadataPass()); pm.addPass(createLowerAffinePass()); - pm.addPass(createConvertVectorToLLVMPass(options.lowerVectorToLLVMOptions())); + pm.addPass( + createConvertVectorToLLVMPass(options.convertVectorToLLVMOptions())); pm.addPass(createFinalizeMemRefToLLVMConversionPass()); pm.addNestedPass(createConvertComplexToStandardPass()); pm.addNestedPass(arith::createArithExpandOpsPass()); pm.addNestedPass(createConvertMathToLLVMPass()); pm.addPass(createConvertMathToLibmPass()); pm.addPass(createConvertComplexToLibmPass()); - pm.addPass(createConvertVectorToLLVMPass(options.lowerVectorToLLVMOptions())); + pm.addPass( + createConvertVectorToLLVMPass(options.convertVectorToLLVMOptions())); pm.addPass(createConvertComplexToLLVMPass()); - pm.addPass(createConvertVectorToLLVMPass(options.lowerVectorToLLVMOptions())); + pm.addPass( + createConvertVectorToLLVMPass(options.convertVectorToLLVMOptions())); pm.addPass(createConvertFuncToLLVMPass()); // Finalize GPU code generation. diff --git a/mlir/lib/Target/Cpp/TranslateToCpp.cpp b/mlir/lib/Target/Cpp/TranslateToCpp.cpp index c043582b7be9c..30657d8fccb15 100644 --- a/mlir/lib/Target/Cpp/TranslateToCpp.cpp +++ b/mlir/lib/Target/Cpp/TranslateToCpp.cpp @@ -1258,6 +1258,12 @@ LogicalResult CppEmitter::emitAttribute(Location loc, Attribute attr) { val.toString(strValue, 0, 0, false); os << strValue; switch (llvm::APFloatBase::SemanticsToEnum(val.getSemantics())) { + case llvm::APFloatBase::S_IEEEhalf: + os << "f16"; + break; + case llvm::APFloatBase::S_BFloat: + os << "bf16"; + break; case llvm::APFloatBase::S_IEEEsingle: os << "f"; break; @@ -1277,17 +1283,19 @@ LogicalResult CppEmitter::emitAttribute(Location loc, Attribute attr) { // Print floating point attributes. if (auto fAttr = dyn_cast(attr)) { - if (!isa(fAttr.getType())) { - return emitError(loc, - "expected floating point attribute to be f32 or f64"); + if (!isa( + fAttr.getType())) { + return emitError( + loc, "expected floating point attribute to be f16, bf16, f32 or f64"); } printFloat(fAttr.getValue()); return success(); } if (auto dense = dyn_cast(attr)) { - if (!isa(dense.getElementType())) { - return emitError(loc, - "expected floating point attribute to be f32 or f64"); + if (!isa( + dense.getElementType())) { + return emitError( + loc, "expected floating point attribute to be f16, bf16, f32 or f64"); } os << '{'; interleaveComma(dense, os, [&](const APFloat &val) { printFloat(val); }); @@ -1640,6 +1648,14 @@ LogicalResult CppEmitter::emitType(Location loc, Type type) { } if (auto fType = dyn_cast(type)) { switch (fType.getWidth()) { + case 16: { + if (llvm::isa(type)) + return (os << "_Float16"), success(); + else if (llvm::isa(type)) + return (os << "__bf16"), success(); + else + return emitError(loc, "cannot emit float type ") << type; + } case 32: return (os << "float"), success(); case 64: diff --git a/mlir/lib/Target/LLVMIR/DebugImporter.cpp b/mlir/lib/Target/LLVMIR/DebugImporter.cpp index 1817c1271b43e..ce3643f513d34 100644 --- a/mlir/lib/Target/LLVMIR/DebugImporter.cpp +++ b/mlir/lib/Target/LLVMIR/DebugImporter.cpp @@ -208,6 +208,20 @@ DINamespaceAttr DebugImporter::translateImpl(llvm::DINamespace *node) { node->getExportSymbols()); } +DIImportedEntityAttr +DebugImporter::translateImpl(llvm::DIImportedEntity *node) { + SmallVector elements; + for (llvm::DINode *element : node->getElements()) { + assert(element && "expected a non-null element type"); + elements.push_back(translate(element)); + } + + return DIImportedEntityAttr::get( + context, node->getTag(), translate(node->getEntity()), + translate(node->getFile()), node->getLine(), + getStringAttrOrNull(node->getRawName()), elements); +} + DISubprogramAttr DebugImporter::translateImpl(llvm::DISubprogram *node) { // Only definitions require a distinct identifier. mlir::DistinctAttr id; @@ -223,11 +237,17 @@ DISubprogramAttr DebugImporter::translateImpl(llvm::DISubprogram *node) { DISubroutineTypeAttr type = translate(node->getType()); if (node->getType() && !type) return nullptr; + + SmallVector retainedNodes; + for (llvm::DINode *retainedNode : node->getRetainedNodes()) + retainedNodes.push_back(translate(retainedNode)); + return DISubprogramAttr::get(context, id, translate(node->getUnit()), scope, getStringAttrOrNull(node->getRawName()), getStringAttrOrNull(node->getRawLinkageName()), translate(node->getFile()), node->getLine(), - node->getScopeLine(), *subprogramFlags, type); + node->getScopeLine(), *subprogramFlags, type, + retainedNodes); } DISubrangeAttr DebugImporter::translateImpl(llvm::DISubrange *node) { @@ -308,6 +328,8 @@ DINodeAttr DebugImporter::translate(llvm::DINode *node) { return translateImpl(casted); if (auto *casted = dyn_cast(node)) return translateImpl(casted); + if (auto *casted = dyn_cast(node)) + return translateImpl(casted); if (auto *casted = dyn_cast(node)) return translateImpl(casted); if (auto *casted = dyn_cast(node)) diff --git a/mlir/lib/Target/LLVMIR/DebugImporter.h b/mlir/lib/Target/LLVMIR/DebugImporter.h index 0e040891ba6c0..cb796676759c3 100644 --- a/mlir/lib/Target/LLVMIR/DebugImporter.h +++ b/mlir/lib/Target/LLVMIR/DebugImporter.h @@ -75,6 +75,7 @@ class DebugImporter { DIVariableAttr translateImpl(llvm::DIVariable *node); DIModuleAttr translateImpl(llvm::DIModule *node); DINamespaceAttr translateImpl(llvm::DINamespace *node); + DIImportedEntityAttr translateImpl(llvm::DIImportedEntity *node); DIScopeAttr translateImpl(llvm::DIScope *node); DISubprogramAttr translateImpl(llvm::DISubprogram *node); DISubrangeAttr translateImpl(llvm::DISubrange *node); diff --git a/mlir/lib/Target/LLVMIR/DebugTranslation.cpp b/mlir/lib/Target/LLVMIR/DebugTranslation.cpp index 95b37e47d0461..042e015f107fe 100644 --- a/mlir/lib/Target/LLVMIR/DebugTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/DebugTranslation.cpp @@ -306,6 +306,19 @@ llvm::DISubprogram *DebugTranslation::translateImpl(DISubprogramAttr attr) { static_cast(attr.getSubprogramFlags()), compileUnit); + // DIImportedEntity requires scope information which DIImportedEntityAttr does + // not have. This is why we translate DIImportedEntityAttr after we have + // created DISubprogram as we can use it as the scope. + SmallVector retainedNodes; + for (DINodeAttr nodeAttr : attr.getRetainedNodes()) { + if (auto importedAttr = dyn_cast(nodeAttr)) { + llvm::DINode *dn = translate(importedAttr, node); + retainedNodes.push_back(dn); + } + } + if (!retainedNodes.empty()) + node->replaceRetainedNodes(llvm::MDTuple::get(llvmCtx, retainedNodes)); + if (attr.getId()) distinctAttrToNode.try_emplace(attr.getId(), node); return node; @@ -326,6 +339,18 @@ llvm::DINamespace *DebugTranslation::translateImpl(DINamespaceAttr attr) { attr.getExportSymbols()); } +llvm::DIImportedEntity *DebugTranslation::translate(DIImportedEntityAttr attr, + llvm::DIScope *scope) { + SmallVector elements; + for (DINodeAttr member : attr.getElements()) + elements.push_back(translate(member)); + + return llvm::DIImportedEntity::get( + llvmCtx, attr.getTag(), scope, translate(attr.getEntity()), + translate(attr.getFile()), attr.getLine(), + getMDStringOrNull(attr.getName()), llvm::MDNode::get(llvmCtx, elements)); +} + llvm::DISubrange *DebugTranslation::translateImpl(DISubrangeAttr attr) { auto getMetadataOrNull = [&](Attribute attr) -> llvm::Metadata * { if (!attr) diff --git a/mlir/lib/Target/LLVMIR/DebugTranslation.h b/mlir/lib/Target/LLVMIR/DebugTranslation.h index 16a853736226d..37b985acf8541 100644 --- a/mlir/lib/Target/LLVMIR/DebugTranslation.h +++ b/mlir/lib/Target/LLVMIR/DebugTranslation.h @@ -90,6 +90,12 @@ class DebugTranslation { llvm::DISubroutineType *translateImpl(DISubroutineTypeAttr attr); llvm::DIType *translateImpl(DITypeAttr attr); + /// Currently, DIImportedEntityAttr does not have a scope field to avoid a + /// cyclic dependency. The scope information is obtained from the entity + /// which holds the list of DIImportedEntityAttr. This requires that scope + /// information be passed to translate function. + llvm::DIImportedEntity *translate(DIImportedEntityAttr attr, llvm::DIScope *); + /// Attributes that support self recursion need to implement an additional /// method to hook into `translateRecursive`. /// - ` translateTemporaryImpl()`: diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 83a14290bcf64..6a32aeb444140 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -592,7 +592,20 @@ convertOmpOrderedRegion(Operation &opInst, llvm::IRBuilderBase &builder, return bodyGenStatus; } +namespace { +/// Contains the arguments for an LLVM store operation +struct DeferredStore { + DeferredStore(llvm::Value *value, llvm::Value *address) + : value(value), address(address) {} + + llvm::Value *value; + llvm::Value *address; +}; +} // namespace + /// Allocate space for privatized reduction variables. +/// `deferredStores` contains information to create store operations which needs +/// to be inserted after all allocas template static LogicalResult allocReductionVars(T loop, ArrayRef reductionArgs, @@ -602,13 +615,13 @@ allocReductionVars(T loop, ArrayRef reductionArgs, SmallVectorImpl &reductionDecls, SmallVectorImpl &privateReductionVariables, DenseMap &reductionVariableMap, + SmallVectorImpl &deferredStores, llvm::ArrayRef isByRefs) { llvm::IRBuilderBase::InsertPointGuard guard(builder); builder.SetInsertPoint(allocaIP.getBlock()->getTerminator()); // delay creating stores until after all allocas - SmallVector> storesToCreate; - storesToCreate.reserve(loop.getNumReductionVars()); + deferredStores.reserve(loop.getNumReductionVars()); for (std::size_t i = 0; i < loop.getNumReductionVars(); ++i) { Region &allocRegion = reductionDecls[i].getAllocRegion(); @@ -628,7 +641,7 @@ allocReductionVars(T loop, ArrayRef reductionArgs, // variable allocated in the inlined region) llvm::Value *var = builder.CreateAlloca( moduleTranslation.convertType(reductionDecls[i].getType())); - storesToCreate.emplace_back(phis[0], var); + deferredStores.emplace_back(phis[0], var); privateReductionVariables[i] = var; moduleTranslation.mapValue(reductionArgs[i], phis[0]); @@ -644,10 +657,6 @@ allocReductionVars(T loop, ArrayRef reductionArgs, } } - // TODO: further delay this so it doesn't come in the entry block at all - for (auto [data, addr] : storesToCreate) - builder.CreateStore(data, addr); - return success(); } @@ -819,12 +828,19 @@ static LogicalResult allocAndInitializeReductionVars( if (op.getNumReductionVars() == 0) return success(); + SmallVector deferredStores; + if (failed(allocReductionVars(op, reductionArgs, builder, moduleTranslation, allocaIP, reductionDecls, privateReductionVariables, reductionVariableMap, - isByRef))) + deferredStores, isByRef))) return failure(); + // store result of the alloc region to the allocated pointer to the real + // reduction variable + for (auto [data, addr] : deferredStores) + builder.CreateStore(data, addr); + // Before the loop, store the initial values of reductions into reduction // variables. Although this could be done after allocas, we don't want to mess // up with the alloca insertion point. @@ -1359,6 +1375,7 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder, collectReductionDecls(opInst, reductionDecls); SmallVector privateReductionVariables( opInst.getNumReductionVars()); + SmallVector deferredStores; auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) { // Allocate reduction vars @@ -1373,10 +1390,10 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder, InsertPointTy(allocaIP.getBlock(), allocaIP.getBlock()->getTerminator()->getIterator()); - if (failed(allocReductionVars(opInst, reductionArgs, builder, - moduleTranslation, allocaIP, reductionDecls, - privateReductionVariables, - reductionVariableMap, isByRef))) + if (failed(allocReductionVars( + opInst, reductionArgs, builder, moduleTranslation, allocaIP, + reductionDecls, privateReductionVariables, reductionVariableMap, + deferredStores, isByRef))) bodyGenStatus = failure(); // Initialize reduction vars @@ -1401,6 +1418,12 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder, builder.SetInsertPoint(initBlock->getFirstNonPHIOrDbgOrAlloca()); + // insert stores deferred until after all allocas + // these store the results of the alloc region into the allocation for the + // pointer to the reduction variable + for (auto [data, addr] : deferredStores) + builder.CreateStore(data, addr); + for (unsigned i = 0; i < opInst.getNumReductionVars(); ++i) { SmallVector phis; diff --git a/mlir/python/mlir/runtime/np_to_memref.py b/mlir/python/mlir/runtime/np_to_memref.py index 882b2751921bf..8cca1e7ad4a9e 100644 --- a/mlir/python/mlir/runtime/np_to_memref.py +++ b/mlir/python/mlir/runtime/np_to_memref.py @@ -37,6 +37,11 @@ class BF16(ctypes.Structure): _fields_ = [("bf16", ctypes.c_int16)] +class F8E5M2(ctypes.Structure): + """A ctype representation for MLIR's Float8E5M2.""" + + _fields_ = [("f8E5M2", ctypes.c_int8)] + # https://stackoverflow.com/questions/26921836/correct-way-to-test-for-numpy-dtype def as_ctype(dtp): @@ -49,6 +54,8 @@ def as_ctype(dtp): return F16 if ml_dtypes is not None and dtp == ml_dtypes.bfloat16: return BF16 + if ml_dtypes is not None and dtp == ml_dtypes.float8_e5m2: + return F8E5M2 return np.ctypeslib.as_ctypes_type(dtp) @@ -65,6 +72,11 @@ def to_numpy(array): ), f"bfloat16 requires the ml_dtypes package, please run:\n\npip install ml_dtypes\n" if array.dtype == BF16: return array.view("bfloat16") + assert not ( + array.dtype == F8E5M2 and ml_dtypes is None + ), f"float8_e5m2 requires the ml_dtypes package, please run:\n\npip install ml_dtypes\n" + if array.dtype == F8E5M2: + return array.view("float8_e5m2") return array diff --git a/mlir/test/CAPI/llvm.c b/mlir/test/CAPI/llvm.c index d3054aa6a0d93..da28a96f89691 100644 --- a/mlir/test/CAPI/llvm.c +++ b/mlir/test/CAPI/llvm.c @@ -312,9 +312,15 @@ static void testDebugInfoAttributes(MlirContext ctx) { // CHECK: #llvm.di_subroutine_type<{{.*}}> mlirAttributeDump(subroutine_type); - MlirAttribute di_subprogram = - mlirLLVMDISubprogramAttrGet(ctx, id, compile_unit, compile_unit, foo, bar, - file, 1, 2, 0, subroutine_type); + MlirAttribute di_imported_entity = mlirLLVMDIImportedEntityAttrGet( + ctx, 0, di_module, file, 1, foo, 1, &local_var); + + mlirAttributeDump(di_imported_entity); + // CHECK: #llvm.di_imported_entity<{{.*}}> + + MlirAttribute di_subprogram = mlirLLVMDISubprogramAttrGet( + ctx, id, compile_unit, compile_unit, foo, bar, file, 1, 2, 0, + subroutine_type, 1, &di_imported_entity); // CHECK: #llvm.di_subprogram<{{.*}}> mlirAttributeDump(di_subprogram); diff --git a/mlir/test/Conversion/ArithToAMDGPU/16-bit-floats.mlir b/mlir/test/Conversion/ArithToAMDGPU/16-bit-floats.mlir new file mode 100644 index 0000000000000..121cae26748a8 --- /dev/null +++ b/mlir/test/Conversion/ArithToAMDGPU/16-bit-floats.mlir @@ -0,0 +1,51 @@ +// RUN: mlir-opt --split-input-file %s -convert-arith-to-amdgpu="allow-packed-f16-round-to-zero=true" | FileCheck %s + +// CHECK-LABEL: @scalar_trunc +// CHECK-SAME: (%[[value:.*]]: f32) +func.func @scalar_trunc(%v: f32) -> f16{ + // CHECK: %[[poison:.*]] = llvm.mlir.poison : f32 + // CHECK: %[[trunc:.*]] = rocdl.cvt.pkrtz %[[value]], %[[poison]] : vector<2xf16> + // CHECK: %[[extract:.*]] = vector.extractelement %[[trunc]][%c0 : index] : vector<2xf16> + // CHECK: return %[[extract]] : f16 + %w = arith.truncf %v : f32 to f16 + return %w : f16 +} + +// CHECK-LABEL: @vector_trunc +// CHECK-SAME: (%[[value:.*]]: vector<2xf32>) +func.func @vector_trunc_short(%v: vector<2xf32>) -> vector<2xf16> { + // CHECK: %[[elem0:.*]] = vector.extractelement %[[value]] + // CHECK: %[[elem1:.*]] = vector.extractelement %[[value]] + // CHECK: %[[ret:.*]] = rocdl.cvt.pkrtz %[[elem0]], %[[elem1]] : vector<2xf16> + // CHECK: return %[[ret]] + %w = arith.truncf %v : vector<2xf32> to vector<2xf16> + return %w : vector<2xf16> +} + +// CHECK-LABEL: @vector_trunc_long +// CHECK-SAME: (%[[value:.*]]: vector<9xf32>) +func.func @vector_trunc_long(%v: vector<9xf32>) -> vector<9xf16> { + // CHECK: %[[elem0:.*]] = vector.extractelement %[[value]][%c0 : index] + // CHECK: %[[elem1:.*]] = vector.extractelement %[[value]][%c1 : index] + // CHECK: %[[packed0:.*]] = rocdl.cvt.pkrtz %[[elem0]], %[[elem1]] : vector<2xf16> + // CHECK: %[[out0:.*]] = vector.insert_strided_slice %[[packed0]], {{.*}} {offsets = [0], strides = [1]} : vector<2xf16> into vector<9xf16> + // CHECK: %[[elem2:.*]] = vector.extractelement %[[value]][%c2 : index] + // CHECK: %[[elem3:.*]] = vector.extractelement %[[value]][%c3 : index] + // CHECK: %[[packed1:.*]] = rocdl.cvt.pkrtz %[[elem2]], %[[elem3]] : vector<2xf16> + // CHECK: %[[out1:.*]] = vector.insert_strided_slice %[[packed1]], %[[out0]] {offsets = [2], strides = [1]} : vector<2xf16> into vector<9xf16> + // CHECK: %[[elem4:.*]] = vector.extractelement %[[value]][%c4 : index] + // CHECK: %[[elem5:.*]] = vector.extractelement %[[value]][%c5 : index] + // CHECK: %[[packed2:.*]] = rocdl.cvt.pkrtz %[[elem4]], %[[elem5]] : vector<2xf16> + // CHECK: %[[out2:.*]] = vector.insert_strided_slice %[[packed2]], %[[out1]] {offsets = [4], strides = [1]} : vector<2xf16> into vector<9xf16> + // CHECK: %[[elem6:.*]] = vector.extractelement %[[value]] + // CHECK: %[[elem7:.*]] = vector.extractelement %[[value]] + // CHECK: %[[packed3:.*]] = rocdl.cvt.pkrtz %[[elem6]], %[[elem7]] : vector<2xf16> + // CHECK: %[[out3:.*]] = vector.insert_strided_slice %[[packed3]], %[[out2]] {offsets = [6], strides = [1]} : vector<2xf16> into vector<9xf16> + // CHECK: %[[elem8:.*]] = vector.extractelement %[[value]] + // CHECK: %[[packed4:.*]] = rocdl.cvt.pkrtz %[[elem8:.*]] : vector<2xf16> + // CHECK: %[[slice:.*]] = vector.extract_strided_slice %[[packed4]] {offsets = [0], sizes = [1], strides = [1]} : vector<2xf16> to vector<1xf16> + // CHECK: %[[out4:.*]] = vector.insert_strided_slice %[[slice]], %[[out3]] {offsets = [8], strides = [1]} : vector<1xf16> into vector<9xf16> + // CHECK: return %[[out4]] + %w = arith.truncf %v : vector<9xf32> to vector<9xf16> + return %w : vector<9xf16> +} diff --git a/mlir/test/Conversion/ArithToAMDGPU/8-bit-float-saturation.mlir b/mlir/test/Conversion/ArithToAMDGPU/8-bit-float-saturation.mlir index c7f39440a349b..cd921da2294e1 100644 --- a/mlir/test/Conversion/ArithToAMDGPU/8-bit-float-saturation.mlir +++ b/mlir/test/Conversion/ArithToAMDGPU/8-bit-float-saturation.mlir @@ -1,5 +1,5 @@ // RUN: mlir-opt --split-input-file %s \ -// RUN: --pass-pipeline='builtin.module(func.func(convert-arith-to-amdgpu{saturate-fp8-truncf=true}))' \ +// RUN: --pass-pipeline='builtin.module(func.func(convert-arith-to-amdgpu{chipset=gfx940 saturate-fp8-truncf=true}))' \ // RUN: | FileCheck %s // CHECK-LABEL: func.func @scalar_trunc diff --git a/mlir/test/Conversion/ArithToAMDGPU/8-bit-floats.mlir b/mlir/test/Conversion/ArithToAMDGPU/8-bit-floats.mlir index 26a222a4a788e..bd90facb61544 100644 --- a/mlir/test/Conversion/ArithToAMDGPU/8-bit-floats.mlir +++ b/mlir/test/Conversion/ArithToAMDGPU/8-bit-floats.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt --split-input-file %s -convert-arith-to-amdgpu | FileCheck %s +// RUN: mlir-opt --split-input-file %s -convert-arith-to-amdgpu="chipset=gfx940" | FileCheck %s // CHECK-LABEL: func.func @scalar_ext // CHECK-SAME: ([[V:%.+]]: f8E5M2FNUZ) diff --git a/mlir/test/Conversion/ArithToEmitC/arith-to-emitc-unsupported.mlir b/mlir/test/Conversion/ArithToEmitC/arith-to-emitc-unsupported.mlir index ef0e71ee8673b..b86690461dc26 100644 --- a/mlir/test/Conversion/ArithToEmitC/arith-to-emitc-unsupported.mlir +++ b/mlir/test/Conversion/ArithToEmitC/arith-to-emitc-unsupported.mlir @@ -15,36 +15,35 @@ func.func @arith_cast_vector(%arg0: vector<5xf32>) -> vector<5xi32> { } // ----- - -func.func @arith_cast_bf16(%arg0: bf16) -> i32 { +func.func @arith_cast_f80(%arg0: f80) -> i32 { // expected-error @+1 {{failed to legalize operation 'arith.fptosi'}} - %t = arith.fptosi %arg0 : bf16 to i32 + %t = arith.fptosi %arg0 : f80 to i32 return %t: i32 } // ----- -func.func @arith_cast_f16(%arg0: f16) -> i32 { +func.func @arith_cast_f128(%arg0: f128) -> i32 { // expected-error @+1 {{failed to legalize operation 'arith.fptosi'}} - %t = arith.fptosi %arg0 : f16 to i32 + %t = arith.fptosi %arg0 : f128 to i32 return %t: i32 } // ----- -func.func @arith_cast_to_bf16(%arg0: i32) -> bf16 { +func.func @arith_cast_to_f80(%arg0: i32) -> f80 { // expected-error @+1 {{failed to legalize operation 'arith.sitofp'}} - %t = arith.sitofp %arg0 : i32 to bf16 - return %t: bf16 + %t = arith.sitofp %arg0 : i32 to f80 + return %t: f80 } // ----- -func.func @arith_cast_to_f16(%arg0: i32) -> f16 { +func.func @arith_cast_to_f128(%arg0: i32) -> f128 { // expected-error @+1 {{failed to legalize operation 'arith.sitofp'}} - %t = arith.sitofp %arg0 : i32 to f16 - return %t: f16 + %t = arith.sitofp %arg0 : i32 to f128 + return %t: f128 } // ----- diff --git a/mlir/test/Conversion/MemRefToEmitC/memref-to-emitc-failed.mlir b/mlir/test/Conversion/MemRefToEmitC/memref-to-emitc-failed.mlir index 836d8aedefc1f..dee9cc97a1449 100644 --- a/mlir/test/Conversion/MemRefToEmitC/memref-to-emitc-failed.mlir +++ b/mlir/test/Conversion/MemRefToEmitC/memref-to-emitc-failed.mlir @@ -46,9 +46,9 @@ memref.global "nested" constant @nested_global : memref<3x7xf32> // ----- -func.func @unsupported_type_f16() { +func.func @unsupported_type_f128() { // expected-error@+1 {{failed to legalize operation 'memref.alloca'}} - %0 = memref.alloca() : memref<4xf16> + %0 = memref.alloca() : memref<4xf128> return } diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir index 0248afb11f167..0d5224514e3a0 100644 --- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir +++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir @@ -71,6 +71,30 @@ func.func @return_extract_slice(%idx: index, %sz: index) -> (tensor<2x?xf32>) // ----- +// CHECK-NO-LAYOUT-MAP-LABEL: func.func @foo( +// CHECK-NO-LAYOUT-MAP-SAME: %[[VAL_0:.*]]: memref<3x8xf16>) -> memref<3x8xf16> { +// CHECK-NO-LAYOUT-MAP: return %[[VAL_0]] : memref<3x8xf16> +// CHECK-NO-LAYOUT-MAP: } +func.func @foo(%arg0: tensor<3x8xf16>) -> tensor<3x8xf16> { + return %arg0 : tensor<3x8xf16> +} + +// CHECK-NO-LAYOUT-MAP-LABEL: func.func @call_extract_slice( +// CHECK-NO-LAYOUT-MAP-SAME: %[[VAL_0:.*]]: memref<4x8xf16>) -> memref<3x8xf16> { +// CHECK-NO-LAYOUT-MAP: %[[VAL_1:.*]] = memref.subview %[[VAL_0]][1, 0] [3, 8] [1, 1] : memref<4x8xf16> to memref<3x8xf16, strided<[8, 1], offset: 8>> +// CHECK-NO-LAYOUT-MAP: %[[VAL_2:.*]] = memref.alloc() {alignment = 64 : i64} : memref<3x8xf16> +// CHECK-NO-LAYOUT-MAP: memref.copy %[[VAL_1]], %[[VAL_2]] : memref<3x8xf16, strided<[8, 1], offset: 8>> to memref<3x8xf16> +// CHECK-NO-LAYOUT-MAP: %[[VAL_3:.*]] = call @foo(%[[VAL_2]]) : (memref<3x8xf16>) -> memref<3x8xf16> +// CHECK-NO-LAYOUT-MAP: return %[[VAL_3]] : memref<3x8xf16> +// CHECK-NO-LAYOUT-MAP: } +func.func @call_extract_slice(%arg0: tensor<4x8xf16>) -> (tensor<3x8xf16>) { + %0 = tensor.extract_slice %arg0[1, 0] [3, 8] [1, 1] : tensor<4x8xf16> to tensor<3x8xf16> + %1 = call @foo(%0) : (tensor<3x8xf16>) -> tensor<3x8xf16> + return %1 : tensor<3x8xf16> +} + +// ----- + // CHECK-LABEL: func private @private_func // CHECK-NO-LAYOUT-MAP-LABEL: func private @private_func(memref) -> f32 func.func private @private_func(tensor) -> (f32) diff --git a/mlir/test/Dialect/DLTI/invalid.mlir b/mlir/test/Dialect/DLTI/invalid.mlir index 05f919fa25671..4b04f0195ef82 100644 --- a/mlir/test/Dialect/DLTI/invalid.mlir +++ b/mlir/test/Dialect/DLTI/invalid.mlir @@ -33,6 +33,14 @@ // ----- +// expected-error@below {{repeated layout entry key: 'i32'}} +"test.unknown_op"() { test.unknown_attr = #dlti.map< + #dlti.dl_entry, + #dlti.dl_entry +>} : () -> () + +// ----- + // expected-error@below {{repeated layout entry key: 'i32'}} "test.unknown_op"() { test.unknown_attr = #dlti.dl_spec< #dlti.dl_entry, diff --git a/mlir/test/Dialect/DLTI/query.mlir b/mlir/test/Dialect/DLTI/query.mlir index 10e91afd2ca7e..a793c1a6e8e6a 100644 --- a/mlir/test/Dialect/DLTI/query.mlir +++ b/mlir/test/Dialect/DLTI/query.mlir @@ -17,6 +17,60 @@ module attributes {transform.with_named_sequence} { // ----- +// expected-remark @below {{i32 present in set : unit}} +module attributes { test.dlti = #dlti.map<#dlti.dl_entry>} { + func.func private @f() +} + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg: !transform.any_op) { + %funcs = transform.structured.match ops{["func.func"]} in %arg : (!transform.any_op) -> !transform.any_op + %module = transform.get_parent_op %funcs : (!transform.any_op) -> !transform.any_op + %param = transform.dlti.query [i32] at %module : (!transform.any_op) -> !transform.any_param + transform.debug.emit_param_as_remark %param, "i32 present in set :" at %module : !transform.any_param, !transform.any_op + transform.yield + } +} + +// ----- + +// expected-remark @below {{associated attr 32 : i32}} +module attributes { test.dlti = #dlti.map<#dlti.dl_entry>>>} { + func.func private @f() +} + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg: !transform.any_op) { + %funcs = transform.structured.match ops{["func.func"]} in %arg : (!transform.any_op) -> !transform.any_op + %module = transform.get_parent_op %funcs : (!transform.any_op) -> !transform.any_op + %param = transform.dlti.query [i32,"width_in_bits"] at %module : (!transform.any_op) -> !transform.any_param + transform.debug.emit_param_as_remark %param, "associated attr" at %module : !transform.any_param, !transform.any_op + transform.yield + } +} + +// ----- + +// expected-remark @below {{width in bits of i32 = 32 : i64}} +// expected-remark @below {{width in bits of f64 = 64 : i64}} +module attributes { test.dlti = #dlti.map<#dlti.dl_entry<"width_in_bits", #dlti.map<#dlti.dl_entry, #dlti.dl_entry>>>} { + func.func private @f() +} + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg: !transform.any_op) { + %funcs = transform.structured.match ops{["func.func"]} in %arg : (!transform.any_op) -> !transform.any_op + %module = transform.get_parent_op %funcs : (!transform.any_op) -> !transform.any_op + %i32bits = transform.dlti.query ["width_in_bits",i32] at %module : (!transform.any_op) -> !transform.any_param + %f64bits = transform.dlti.query ["width_in_bits",f64] at %module : (!transform.any_op) -> !transform.any_param + transform.debug.emit_param_as_remark %i32bits, "width in bits of i32 =" at %module : !transform.any_param, !transform.any_op + transform.debug.emit_param_as_remark %f64bits, "width in bits of f64 =" at %module : !transform.any_param, !transform.any_op + transform.yield + } +} + +// ----- + // expected-remark @below {{associated attr 42 : i32}} module attributes { test.dlti = #dlti.dl_spec<#dlti.dl_entry<"test.id", 42 : i32>>} { func.func private @f() @@ -336,6 +390,23 @@ module attributes {transform.with_named_sequence} { // ----- +// expected-note @below {{got non-DLTI-queryable attribute upon looking up keys [i32]}} +module attributes { test.dlti = #dlti.dl_spec<#dlti.dl_entry>} { + // expected-error @below {{target op of failed DLTI query}} + func.func private @f() +} + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg: !transform.any_op) { + %func = transform.structured.match ops{["func.func"]} in %arg : (!transform.any_op) -> !transform.any_op + // expected-error @below {{'transform.dlti.query' op failed to apply}} + %param = transform.dlti.query [i32,"width_in_bits"] at %func : (!transform.any_op) -> !transform.any_param + transform.yield + } +} + +// ----- + module { // expected-error @below {{target op of failed DLTI query}} // expected-note @below {{no DLTI-queryable attrs on target op or any of its ancestors}} @@ -353,6 +424,55 @@ module attributes {transform.with_named_sequence} { // ----- +// expected-note @below {{key i64 has no DLTI-mapping per attr: #dlti.map<#dlti.dl_entry>}} +module attributes { test.dlti = #dlti.map<#dlti.dl_entry<"width_in_bits", #dlti.map<#dlti.dl_entry>>>} { + // expected-error @below {{target op of failed DLTI query}} + func.func private @f() +} + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg: !transform.any_op) { + %func = transform.structured.match ops{["func.func"]} in %arg : (!transform.any_op) -> !transform.any_op + // expected-error @below {{'transform.dlti.query' op failed to apply}} + %param = transform.dlti.query ["width_in_bits",i64] at %func : (!transform.any_op) -> !transform.any_param + transform.yield + } +} + +// ----- + +module attributes { test.dlti = #dlti.dl_spec<#dlti.dl_entry<"test.id", 42 : i32>>} { + func.func private @f() +} + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg: !transform.any_op) { + %funcs = transform.structured.match ops{["func.func"]} in %arg : (!transform.any_op) -> !transform.any_op + // expected-error @below {{'transform.dlti.query' keys of wrong type: only StringAttr and TypeAttr are allowed}} + %param = transform.dlti.query [1] at %funcs : (!transform.any_op) -> !transform.param + transform.yield + } +} + +// ----- + +module attributes { test.dlti = #dlti.map<#dlti.dl_entry<"test.id", 42 : i32>>} { + // expected-error @below {{target op of failed DLTI query}} + // expected-note @below {{no keys provided to attempt query with}} + func.func private @f() +} + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg: !transform.any_op) { + %func = transform.structured.match ops{["func.func"]} in %arg : (!transform.any_op) -> !transform.any_op + // expected-error @below {{'transform.dlti.query' op failed to apply}} + %param = transform.dlti.query [] at %func : (!transform.any_op) -> !transform.any_param + transform.yield + } +} + +// ----- + module attributes { test.dlti = #dlti.dl_spec<#dlti.dl_entry<"test.id", 42 : i32>>} { func.func private @f() } diff --git a/mlir/test/Dialect/DLTI/valid.mlir b/mlir/test/Dialect/DLTI/valid.mlir index 4133eac5424ce..31c925e5cb5be 100644 --- a/mlir/test/Dialect/DLTI/valid.mlir +++ b/mlir/test/Dialect/DLTI/valid.mlir @@ -206,3 +206,18 @@ module attributes { "GPU": #dlti.target_device_spec< #dlti.dl_entry<"L1_cache_size_in_bytes", "128">> >} {} + + +// ----- + +// CHECK: "test.op_with_dlti_map"() ({ +// CHECK: }) {dlti.map = #dlti.map<#dlti.dl_entry<"dlti.unknown_id", 42 : i64>>} +"test.op_with_dlti_map"() ({ +}) { dlti.map = #dlti.map<#dlti.dl_entry<"dlti.unknown_id", 42>> } : () -> () + +// ----- + +// CHECK: "test.op_with_dlti_map"() ({ +// CHECK: }) {dlti.map = #dlti.map<#dlti.dl_entry>} +"test.op_with_dlti_map"() ({ +}) { dlti.map = #dlti.map<#dlti.dl_entry> } : () -> () diff --git a/mlir/test/Dialect/OpenMP/ops.mlir b/mlir/test/Dialect/OpenMP/ops.mlir index 9c308cc010849..7ba55fc957a47 100644 --- a/mlir/test/Dialect/OpenMP/ops.mlir +++ b/mlir/test/Dialect/OpenMP/ops.mlir @@ -1019,10 +1019,10 @@ func.func @parallel_reduction_byref() { func.func @parallel_wsloop_reduction(%lb : index, %ub : index, %step : index) { %c1 = arith.constant 1 : i32 %0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr - // CHECK: omp.parallel reduction(@add_f32 %{{.*}} -> %{{.+}} : !llvm.ptr) { - omp.parallel reduction(@add_f32 %0 -> %prv : !llvm.ptr) { - // CHECK: omp.wsloop { - omp.wsloop { + // CHECK: omp.parallel { + omp.parallel { + // CHECK: omp.wsloop reduction(@add_f32 %{{.*}} -> %{{.+}} : !llvm.ptr) { + omp.wsloop reduction(@add_f32 %0 -> %prv : !llvm.ptr) { // CHECK: omp.loop_nest (%{{.+}}) : index = (%{{.+}}) to (%{{.+}}) step (%{{.+}}) { omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) { %1 = arith.constant 2.0 : f32 @@ -1216,10 +1216,10 @@ func.func @parallel_reduction2() { func.func @parallel_wsloop_reduction2(%lb : index, %ub : index, %step : index) { %c1 = arith.constant 1 : i32 %0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr - // CHECK: omp.parallel reduction(@add2_f32 %{{.*}} -> %{{.+}} : !llvm.ptr) { - omp.parallel reduction(@add2_f32 %0 -> %prv : !llvm.ptr) { - // CHECK: omp.wsloop { - omp.wsloop { + // CHECK: omp.parallel { + omp.parallel { + // CHECK: omp.wsloop reduction(@add2_f32 %{{.*}} -> %{{.+}} : !llvm.ptr) { + omp.wsloop reduction(@add2_f32 %0 -> %prv : !llvm.ptr) { // CHECK: omp.loop_nest (%{{.+}}) : index = (%{{.+}}) to (%{{.+}}) step (%{{.+}}) { omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) { %1 = arith.constant 2.0 : f32 diff --git a/mlir/test/Target/Cpp/const.mlir b/mlir/test/Target/Cpp/const.mlir index 3658455d66943..d3656f830c48c 100644 --- a/mlir/test/Target/Cpp/const.mlir +++ b/mlir/test/Target/Cpp/const.mlir @@ -11,6 +11,8 @@ func.func @emitc_constant() { %c6 = "emitc.constant"(){value = 2 : index} : () -> index %c7 = "emitc.constant"(){value = 2.0 : f32} : () -> f32 %f64 = "emitc.constant"(){value = 4.0 : f64} : () -> f64 + %f16 = "emitc.constant"(){value = 2.0 : f16} : () -> f16 + %bf16 = "emitc.constant"(){value = 4.0 : bf16} : () -> bf16 %c8 = "emitc.constant"(){value = dense<0> : tensor} : () -> tensor %c9 = "emitc.constant"(){value = dense<[0, 1]> : tensor<2xindex>} : () -> tensor<2xindex> %c10 = "emitc.constant"(){value = dense<[[0.0, 1.0], [2.0, 3.0]]> : tensor<2x2xf32>} : () -> tensor<2x2xf32> @@ -26,6 +28,8 @@ func.func @emitc_constant() { // CPP-DEFAULT-NEXT: size_t [[V6:[^ ]*]] = 2; // CPP-DEFAULT-NEXT: float [[V7:[^ ]*]] = 2.000000000e+00f; // CPP-DEFAULT-NEXT: double [[F64:[^ ]*]] = 4.00000000000000000e+00; +// CPP-DEFAULT-NEXT: _Float16 [[F16:[^ ]*]] = 2.00000e+00f16; +// CPP-DEFAULT-NEXT: __bf16 [[BF16:[^ ]*]] = 4.0000e+00bf16; // CPP-DEFAULT-NEXT: Tensor [[V8:[^ ]*]] = {0}; // CPP-DEFAULT-NEXT: Tensor [[V9:[^ ]*]] = {0, 1}; // CPP-DEFAULT-NEXT: Tensor [[V10:[^ ]*]] = {0.0e+00f, 1.000000000e+00f, 2.000000000e+00f, 3.000000000e+00f}; @@ -40,6 +44,8 @@ func.func @emitc_constant() { // CPP-DECLTOP-NEXT: size_t [[V6:[^ ]*]]; // CPP-DECLTOP-NEXT: float [[V7:[^ ]*]]; // CPP-DECLTOP-NEXT: double [[F64:[^ ]*]]; +// CPP-DECLTOP-NEXT: _Float16 [[F16:[^ ]*]]; +// CPP-DECLTOP-NEXT: __bf16 [[BF16:[^ ]*]]; // CPP-DECLTOP-NEXT: Tensor [[V8:[^ ]*]]; // CPP-DECLTOP-NEXT: Tensor [[V9:[^ ]*]]; // CPP-DECLTOP-NEXT: Tensor [[V10:[^ ]*]]; @@ -52,6 +58,8 @@ func.func @emitc_constant() { // CPP-DECLTOP-NEXT: [[V6]] = 2; // CPP-DECLTOP-NEXT: [[V7]] = 2.000000000e+00f; // CPP-DECLTOP-NEXT: [[F64]] = 4.00000000000000000e+00; +// CPP-DECLTOP-NEXT: [[F16]] = 2.00000e+00f16; +// CPP-DECLTOP-NEXT: [[BF16]] = 4.0000e+00bf16; // CPP-DECLTOP-NEXT: [[V8]] = {0}; // CPP-DECLTOP-NEXT: [[V9]] = {0, 1}; // CPP-DECLTOP-NEXT: [[V10]] = {0.0e+00f, 1.000000000e+00f, 2.000000000e+00f, 3.000000000e+00f}; diff --git a/mlir/test/Target/Cpp/types.mlir b/mlir/test/Target/Cpp/types.mlir index deda383b3b0a7..e7f935c737438 100644 --- a/mlir/test/Target/Cpp/types.mlir +++ b/mlir/test/Target/Cpp/types.mlir @@ -22,6 +22,10 @@ func.func @ptr_types() { emitc.call_opaque "f"() {template_args = [!emitc.ptr]} : () -> () // CHECK-NEXT: f(); emitc.call_opaque "f"() {template_args = [!emitc.ptr]} : () -> () + // CHECK-NEXT: f<_Float16*>(); + emitc.call_opaque "f"() {template_args = [!emitc.ptr]} : () -> () + // CHECK-NEXT: f<__bf16*>(); + emitc.call_opaque "f"() {template_args = [!emitc.ptr]} : () -> () // CHECK-NEXT: f(); emitc.call_opaque "f"() {template_args = [!emitc.ptr]} : () -> () // CHECK-NEXT: f(); diff --git a/mlir/test/Target/LLVMIR/Import/debug-info.ll b/mlir/test/Target/LLVMIR/Import/debug-info.ll index 03c3855a9a324..bb03da37c0d09 100644 --- a/mlir/test/Target/LLVMIR/Import/debug-info.ll +++ b/mlir/test/Target/LLVMIR/Import/debug-info.ll @@ -792,3 +792,28 @@ define void @string_type(ptr %arg1) { ; CHECK-SAME: stringLengthExp = <[DW_OP_push_object_address, DW_OP_plus_uconst(8)]> ; CHECK-SAME: stringLocationExp = <[DW_OP_push_object_address, DW_OP_deref]>> ; CHECK: #di_local_variable1 = #llvm.di_local_variable + +; // ----- + +; Test that imported entities for a functions are handled correctly. + +define void @imp_fn() !dbg !12 { + ret void +} + +!llvm.module.flags = !{!10} +!llvm.dbg.cu = !{!4} + +!2 = !DIModule(scope: !4, name: "mod1", file: !3, line: 1) +!3 = !DIFile(filename: "test.f90", directory: "") +!4 = distinct !DICompileUnit(language: DW_LANG_Fortran95, file: !3) +!8 = !DIModule(scope: !4, name: "mod1", file: !3, line: 5) +!10 = !{i32 2, !"Debug Info Version", i32 3} +!12 = distinct !DISubprogram(name: "imp_fn", linkageName: "imp_fn", scope: !3, file: !3, line: 10, type: !14, scopeLine: 10, spFlags: DISPFlagDefinition, unit: !4, retainedNodes: !16) +!14 = !DISubroutineType(cc: DW_CC_program, types: !15) +!15 = !{} +!16 = !{!17} +!17 = !DIImportedEntity(tag: DW_TAG_imported_module, scope: !12, entity: !8, file: !3, line: 1, elements: !15) + +; CHECK-DAG: #[[M:.+]] = #llvm.di_module<{{.*}}name = "mod1"{{.*}}> +; CHECK-DAG: #[[SP:.+]] = #llvm.di_subprogram<{{.*}}name = "imp_fn"{{.*}}retainedNodes = #llvm.di_imported_entity> diff --git a/mlir/test/Target/LLVMIR/llvmir-debug.mlir b/mlir/test/Target/LLVMIR/llvmir-debug.mlir index 1a9a8561de00d..30b2ba5e9bad1 100644 --- a/mlir/test/Target/LLVMIR/llvmir-debug.mlir +++ b/mlir/test/Target/LLVMIR/llvmir-debug.mlir @@ -366,6 +366,36 @@ llvm.func @fn_with_gl() { // ----- +// Test that imported entries correctly generates 'retainedNodes' in the +// subprogram. + +llvm.func @imp_fn() { + llvm.return +} loc(#loc2) +#file = #llvm.di_file<"test.f90" in ""> +#SP_TY = #llvm.di_subroutine_type +#CU = #llvm.di_compile_unit, + sourceLanguage = DW_LANG_Fortran95, file = #file, isOptimized = false, + emissionKind = Full> +#MOD = #llvm.di_module +#MOD1 = #llvm.di_module +#SP = #llvm.di_subprogram, compileUnit = #CU, scope = #file, + name = "imp_fn", file = #file, subprogramFlags = Definition, type = #SP_TY, + retainedNodes = #llvm.di_imported_entity, #llvm.di_imported_entity> +#loc1 = loc("test.f90":12:14) +#loc2 = loc(fused<#SP>[#loc1]) + +// CHECK-DAG: ![[SP:[0-9]+]] = {{.*}}!DISubprogram(name: "imp_fn"{{.*}}retainedNodes: ![[NODES:[0-9]+]]) +// CHECK-DAG: ![[NODES]] = !{![[NODE2:[0-9]+]], ![[NODE1:[0-9]+]]} +// CHECK-DAG: ![[NODE1]] = !DIImportedEntity(tag: DW_TAG_imported_module, scope: ![[SP]], entity: ![[MOD1:[0-9]+]]{{.*}}) +// CHECK-DAG: ![[NODE2]] = !DIImportedEntity(tag: DW_TAG_imported_module, scope: ![[SP]], entity: ![[MOD2:[0-9]+]]{{.*}}) +// CHECK-DAG: ![[MOD1]] = !DIModule({{.*}}name: "mod1"{{.*}}) +// CHECK-DAG: ![[MOD2]] = !DIModule({{.*}}name: "mod2"{{.*}}) + +// ----- + // Nameless and scopeless global constant. // CHECK-LABEL: @.str.1 = external constant [10 x i8] diff --git a/mlir/test/Target/LLVMIR/openmp-firstprivate.mlir b/mlir/test/Target/LLVMIR/openmp-firstprivate.mlir index b06ad96f4592c..02ce6b5b19cea 100644 --- a/mlir/test/Target/LLVMIR/openmp-firstprivate.mlir +++ b/mlir/test/Target/LLVMIR/openmp-firstprivate.mlir @@ -156,3 +156,49 @@ llvm.func @foo() // CHECK: %[[STR_LEN:.*]] = extractvalue { ptr, i64 } %{{.*}}, 1 // CHECK: %{{.*}} = alloca i8, i64 %[[STR_LEN]], align 1 // CHECK: call void @foo() + +// ----- + +// Verifies fix for https://github.com/llvm/llvm-project/issues/102939. +// +// The issues occurs because the CodeExtractor component only collect inputs +// (to the parallel regions) that are defined in the same function in which the +// parallel regions is present. Howerver, this is problematic because if we are +// privatizing a global value (e.g. a `target` variable which is emitted as a +// global), then we miss finding that input and we do not privatize the +// variable. + +omp.private {type = firstprivate} @global_privatizer : !llvm.ptr alloc { +^bb0(%arg0: !llvm.ptr): + %0 = llvm.mlir.constant(1 : i64) : i64 + %1 = llvm.alloca %0 x f32 {bindc_name = "global", pinned} : (i64) -> !llvm.ptr + omp.yield(%1 : !llvm.ptr) +} copy { +^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr): + %0 = llvm.load %arg0 : !llvm.ptr -> f32 + llvm.store %0, %arg1 : f32, !llvm.ptr + omp.yield(%arg1 : !llvm.ptr) +} + +llvm.func @global_accessor() { + %global_addr = llvm.mlir.addressof @global : !llvm.ptr + omp.parallel private(@global_privatizer %global_addr -> %arg0 : !llvm.ptr) { + %1 = llvm.mlir.constant(3.140000e+00 : f32) : f32 + llvm.store %1, %arg0 : f32, !llvm.ptr + omp.terminator + } + llvm.return +} + +llvm.mlir.global internal @global() {addr_space = 0 : i32} : f32 { + %0 = llvm.mlir.zero : f32 + llvm.return %0 : f32 +} + +// CHECK-LABEL: @global_accessor..omp_par({{.*}}) +// CHECK-NEXT: omp.par.entry: +// Verify that we found the privatizer by checking that we properly inlined the +// bodies of the alloc and copy regions. +// CHECK: %[[PRIV_ALLOC:.*]] = alloca float, i64 1, align 4 +// CHECK: %[[GLOB_VAL:.*]] = load float, ptr @global, align 4 +// CHECK: store float %[[GLOB_VAL]], ptr %[[PRIV_ALLOC]], align 4 diff --git a/mlir/test/Target/LLVMIR/openmp-reduction-array-sections.mlir b/mlir/test/Target/LLVMIR/openmp-reduction-array-sections.mlir index da6f943004612..2d8a13ccd2a1f 100644 --- a/mlir/test/Target/LLVMIR/openmp-reduction-array-sections.mlir +++ b/mlir/test/Target/LLVMIR/openmp-reduction-array-sections.mlir @@ -89,7 +89,6 @@ llvm.func @sectionsreduction_(%arg0: !llvm.ptr {fir.bindc_name = "x"}) attribute // CHECK: %[[VAL_13:.*]] = load i32, ptr %[[VAL_10]], align 4 // CHECK: %[[VAL_20:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, i64 1, align 8 // CHECK: %[[VAL_21:.*]] = alloca ptr, align 8 -// CHECK: store ptr %[[VAL_20]], ptr %[[VAL_21]], align 8 // CHECK: %[[VAL_14:.*]] = alloca [1 x ptr], align 8 // CHECK: br label %[[VAL_15:.*]] // CHECK: omp.reduction.init: ; preds = %[[VAL_16:.*]] @@ -98,6 +97,7 @@ llvm.func @sectionsreduction_(%arg0: !llvm.ptr {fir.bindc_name = "x"}) attribute // CHECK: br label %[[VAL_18:.*]] // CHECK: omp.par.region1: ; preds = %[[VAL_17]] // CHECK: %[[VAL_19:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, i64 1, align 8 +// CHECK: store ptr %[[VAL_20]], ptr %[[VAL_21]], align 8 // CHECK: br label %[[VAL_22:.*]] // CHECK: omp_section_loop.preheader: ; preds = %[[VAL_18]] // CHECK: store i32 0, ptr %[[VAL_7]], align 4 diff --git a/mlir/test/Target/LLVMIR/openmp-reduction.mlir b/mlir/test/Target/LLVMIR/openmp-reduction.mlir index bfdad8c19335e..1d4b4915bcc39 100644 --- a/mlir/test/Target/LLVMIR/openmp-reduction.mlir +++ b/mlir/test/Target/LLVMIR/openmp-reduction.mlir @@ -540,8 +540,8 @@ llvm.func @parallel_nested_workshare_reduction(%ub : i64) { %lb = llvm.mlir.constant(1 : i64) : i64 %step = llvm.mlir.constant(1 : i64) : i64 - omp.parallel reduction(@add_i32 %0 -> %prv : !llvm.ptr) { - omp.wsloop { + omp.parallel { + omp.wsloop reduction(@add_i32 %0 -> %prv : !llvm.ptr) { omp.loop_nest (%iv) : i64 = (%lb) to (%ub) step (%step) { %ival = llvm.trunc %iv : i64 to i32 %lprv = llvm.load %prv : !llvm.ptr -> i32 diff --git a/mlir/test/Target/LLVMIR/rocdl.mlir b/mlir/test/Target/LLVMIR/rocdl.mlir index 64bcb5bdb255d..d902a82eeb9ea 100644 --- a/mlir/test/Target/LLVMIR/rocdl.mlir +++ b/mlir/test/Target/LLVMIR/rocdl.mlir @@ -530,6 +530,12 @@ llvm.func @rocdl_8bit_floats(%source: i32, %stoch: i32) -> i32 { llvm.return %source5 : i32 } +llvm.func @rocdl_16bit_packed_floats(%sourceA: f32, %sourceB: f32) -> vector<2xf16> { + // CHECK: call <2 x half> @llvm.amdgcn.cvt.pkrtz(float {{.*}}, float {{.*}}) + %source = rocdl.cvt.pkrtz %sourceA, %sourceB : vector<2xf16> + llvm.return %source : vector<2xf16> +} + // CHECK-DAG: attributes #[[$KERNEL_ATTRS]] = { "amdgpu-flat-work-group-size"="1,256" "uniform-work-group-size"="true" } // CHECK-DAG: attributes #[[$KERNEL_WORKGROUP_ATTRS]] = { "amdgpu-flat-work-group-size"="1,1024" // CHECK-DAG: attributes #[[$KNOWN_BLOCK_SIZE_ATTRS]] = { "amdgpu-flat-work-group-size"="128,128" diff --git a/mlir/test/python/execution_engine.py b/mlir/test/python/execution_engine.py index 8125bf3fb8fc9..1cdda63eefe30 100644 --- a/mlir/test/python/execution_engine.py +++ b/mlir/test/python/execution_engine.py @@ -5,7 +5,7 @@ from mlir.passmanager import * from mlir.execution_engine import * from mlir.runtime import * -from ml_dtypes import bfloat16 +from ml_dtypes import bfloat16, float8_e5m2 # Log everything to stderr and flush so that we have a unified stream to match @@ -561,6 +561,45 @@ def testBF16Memref(): run(testBF16Memref) +# Test f8E5M2 memrefs +# CHECK-LABEL: TEST: testF8E5M2Memref +def testF8E5M2Memref(): + with Context(): + module = Module.parse( + """ + module { + func.func @main(%arg0: memref<1xf8E5M2>, + %arg1: memref<1xf8E5M2>) attributes { llvm.emit_c_interface } { + %0 = arith.constant 0 : index + %1 = memref.load %arg0[%0] : memref<1xf8E5M2> + memref.store %1, %arg1[%0] : memref<1xf8E5M2> + return + } + } """ + ) + + arg1 = np.array([0.5]).astype(float8_e5m2) + arg2 = np.array([0.0]).astype(float8_e5m2) + + arg1_memref_ptr = ctypes.pointer( + ctypes.pointer(get_ranked_memref_descriptor(arg1)) + ) + arg2_memref_ptr = ctypes.pointer( + ctypes.pointer(get_ranked_memref_descriptor(arg2)) + ) + + execution_engine = ExecutionEngine(lowerToLLVM(module)) + execution_engine.invoke("main", arg1_memref_ptr, arg2_memref_ptr) + + # test to-numpy utility + # CHECK: [0.5] + npout = ranked_memref_to_numpy(arg2_memref_ptr[0]) + log(npout) + + +run(testF8E5M2Memref) + + # Test addition of two 2d_memref # CHECK-LABEL: TEST: testDynamicMemrefAdd2D def testDynamicMemrefAdd2D(): diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index ddb08f12f0497..866bd5ed6fd3e 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -1534,6 +1534,7 @@ cc_library( ":BytecodeOpInterface", ":GPUDialect", ":IR", + ":ROCDLDialect", ":SideEffectInterfaces", "//llvm:Support", ], @@ -8581,11 +8582,14 @@ cc_library( includes = ["include"], deps = [ ":AMDGPUDialect", + ":AMDGPUUtils", ":ArithDialect", ":ArithUtils", ":ConversionPassIncGen", ":IR", + ":LLVMDialect", ":Pass", + ":ROCDLDialect", ":Support", ":TransformUtils", ":VectorDialect",