diff --git a/bolt/docs/CommandLineArgumentReference.md b/bolt/docs/CommandLineArgumentReference.md index 8887d1f5d5bd4..49e226513028f 100644 --- a/bolt/docs/CommandLineArgumentReference.md +++ b/bolt/docs/CommandLineArgumentReference.md @@ -802,6 +802,11 @@ The maximum size of a function to consider for inference. +- `--stale-matching-min-matched-block=` + + Minimum percent of exact match block for a function to be considered for + profile inference. + - `--stale-threshold=` Maximum percentage of stale functions to tolerate (default: 100) @@ -1161,4 +1166,4 @@ - `--print-options` - Print non-default options after command line parsing \ No newline at end of file + Print non-default options after command line parsing diff --git a/bolt/lib/Profile/StaleProfileMatching.cpp b/bolt/lib/Profile/StaleProfileMatching.cpp index 365bc5389266d..c9bcd04281e72 100644 --- a/bolt/lib/Profile/StaleProfileMatching.cpp +++ b/bolt/lib/Profile/StaleProfileMatching.cpp @@ -51,6 +51,12 @@ cl::opt cl::desc("Infer counts from stale profile data."), cl::init(false), cl::Hidden, cl::cat(BoltOptCategory)); +cl::opt StaleMatchingMinMatchedBlock( + "stale-matching-min-matched-block", + cl::desc("Percentage threshold of matched basic blocks at which stale " + "profile inference is executed."), + cl::init(0), cl::Hidden, cl::cat(BoltOptCategory)); + cl::opt StaleMatchingMaxFuncSize( "stale-matching-max-func-size", cl::desc("The maximum size of a function to consider for inference."), @@ -391,10 +397,9 @@ createFlowFunction(const BinaryFunction::BasicBlockOrderType &BlockOrder) { /// of the basic blocks in the binary, the count is "matched" to the block. /// Similarly, if both the source and the target of a count in the profile are /// matched to a jump in the binary, the count is recorded in CFG. -void matchWeightsByHashes(BinaryContext &BC, - const BinaryFunction::BasicBlockOrderType &BlockOrder, - const yaml::bolt::BinaryFunctionProfile &YamlBF, - FlowFunction &Func) { +size_t matchWeightsByHashes( + BinaryContext &BC, const BinaryFunction::BasicBlockOrderType &BlockOrder, + const yaml::bolt::BinaryFunctionProfile &YamlBF, FlowFunction &Func) { assert(Func.Blocks.size() == BlockOrder.size() + 1); std::vector Blocks; @@ -500,6 +505,8 @@ void matchWeightsByHashes(BinaryContext &BC, Block.HasUnknownWeight = false; Block.Weight = std::max(OutWeight[Block.Index], InWeight[Block.Index]); } + + return MatchedBlocks.size(); } /// The function finds all blocks that are (i) reachable from the Entry block @@ -575,10 +582,16 @@ void preprocessUnreachableBlocks(FlowFunction &Func) { /// Decide if stale profile matching can be applied for a given function. /// Currently we skip inference for (very) large instances and for instances /// having "unexpected" control flow (e.g., having no sink basic blocks). -bool canApplyInference(const FlowFunction &Func) { +bool canApplyInference(const FlowFunction &Func, + const yaml::bolt::BinaryFunctionProfile &YamlBF, + const uint64_t &MatchedBlocks) { if (Func.Blocks.size() > opts::StaleMatchingMaxFuncSize) return false; + if (MatchedBlocks * 100 < + opts::StaleMatchingMinMatchedBlock * YamlBF.Blocks.size()) + return false; + bool HasExitBlocks = llvm::any_of( Func.Blocks, [&](const FlowBlock &Block) { return Block.isExit(); }); if (!HasExitBlocks) @@ -725,18 +738,21 @@ bool YAMLProfileReader::inferStaleProfile( const BinaryFunction::BasicBlockOrderType BlockOrder( BF.getLayout().block_begin(), BF.getLayout().block_end()); + // Tracks the number of matched blocks. + // Create a wrapper flow function to use with the profile inference algorithm. FlowFunction Func = createFlowFunction(BlockOrder); // Match as many block/jump counts from the stale profile as possible - matchWeightsByHashes(BF.getBinaryContext(), BlockOrder, YamlBF, Func); + size_t MatchedBlocks = + matchWeightsByHashes(BF.getBinaryContext(), BlockOrder, YamlBF, Func); // Adjust the flow function by marking unreachable blocks Unlikely so that // they don't get any counts assigned. preprocessUnreachableBlocks(Func); // Check if profile inference can be applied for the instance. - if (!canApplyInference(Func)) + if (!canApplyInference(Func, YamlBF, MatchedBlocks)) return false; // Apply the profile inference algorithm. diff --git a/bolt/test/X86/Inputs/blarge_profile_stale_low_matched_blocks.yaml b/bolt/test/X86/Inputs/blarge_profile_stale_low_matched_blocks.yaml new file mode 100644 index 0000000000000..785e23922ce49 --- /dev/null +++ b/bolt/test/X86/Inputs/blarge_profile_stale_low_matched_blocks.yaml @@ -0,0 +1,57 @@ +--- +header: + profile-version: 1 + binary-name: 'reader-yaml.test.tmp.exe' + binary-build-id: '' + profile-flags: [ lbr ] + profile-origin: branch profile reader + profile-events: '' + dfs-order: false + hash-func: xxh3 +functions: + - name: SolveCubic + fid: 6 + hash: 0x0000000000000000 + exec: 151 + nblocks: 18 + blocks: + - bid: 0 + insns: 43 + hash: 0x4600940a609c0000 + exec: 151 + succ: [ { bid: 1, cnt: 151, mis: 2 }, { bid: 7, cnt: 0 } ] + - bid: 1 + insns: 7 + hash: 0x167a1f084f130088 + succ: [ { bid: 13, cnt: 151 }, { bid: 2, cnt: 0 } ] + - bid: 13 + insns: 26 + hash: 0xa8d50000f81902a7 + succ: [ { bid: 3, cnt: 89 }, { bid: 2, cnt: 10 } ] + - bid: 3 + insns: 9 + hash: 0xc516000073dc00a0 + succ: [ { bid: 5, cnt: 151 } ] + - bid: 5 + insns: 9 + hash: 0x6446e1ea500111 + - name: usqrt + fid: 7 + hash: 0x0000000000000000 + exec: 20 + nblocks: 6 + blocks: + - bid: 0 + insns: 4 + hash: 0x0000000000000001 + exec: 20 + succ: [ { bid: 1, cnt: 0 } ] + - bid: 1 + insns: 9 + hash: 0x0000000000000001 + succ: [ { bid: 3, cnt: 320, mis: 171 }, { bid: 2, cnt: 0 } ] + - bid: 3 + insns: 2 + hash: 0x0000000000000001 + succ: [ { bid: 1, cnt: 300, mis: 33 }, { bid: 4, cnt: 20 } ] +... diff --git a/bolt/test/X86/stale-matching-min-matched-block.test b/bolt/test/X86/stale-matching-min-matched-block.test new file mode 100644 index 0000000000000..383d4d7dcb9d2 --- /dev/null +++ b/bolt/test/X86/stale-matching-min-matched-block.test @@ -0,0 +1,10 @@ +## This script checks the stale-matching-min-matched-block flag. + +RUN: yaml2obj %p/Inputs/blarge.yaml &> %t.exe + +## Testing "usqrt" +RUN: llvm-bolt %t.exe -o %t.null --b %p/Inputs/blarge_profile_stale_low_matched_blocks.yaml \ +RUN: --infer-stale-profile=1 --stale-matching-min-matched-block=75 \ +RUN: --profile-ignore-hash=1 --debug-only=bolt-prof 2>&1 | FileCheck %s + +CHECK: BOLT-INFO: inferred profile for 1 (50.00% of profiled, 50.00% of stale) functions responsible for 46.31% samples (552 out of 1192) diff --git a/clang/include/clang/AST/ASTContext.h b/clang/include/clang/AST/ASTContext.h index 53ece996769a8..4d1f440506e09 100644 --- a/clang/include/clang/AST/ASTContext.h +++ b/clang/include/clang/AST/ASTContext.h @@ -1147,6 +1147,8 @@ class ASTContext : public RefCountedBase { #include "clang/Basic/RISCVVTypes.def" #define WASM_TYPE(Name, Id, SingletonId) CanQualType SingletonId; #include "clang/Basic/WebAssemblyReferenceTypes.def" +#define AMDGPU_TYPE(Name, Id, SingletonId) CanQualType SingletonId; +#include "clang/Basic/AMDGPUTypes.def" // Types for deductions in C++0x [stmt.ranged]'s desugaring. Built on demand. mutable QualType AutoDeductTy; // Deduction against 'auto'. diff --git a/clang/include/clang/AST/Type.h b/clang/include/clang/AST/Type.h index fab233b62d8d1..61246479188e9 100644 --- a/clang/include/clang/AST/Type.h +++ b/clang/include/clang/AST/Type.h @@ -3015,6 +3015,9 @@ class BuiltinType : public Type { // WebAssembly reference types #define WASM_TYPE(Name, Id, SingletonId) Id, #include "clang/Basic/WebAssemblyReferenceTypes.def" +// AMDGPU types +#define AMDGPU_TYPE(Name, Id, SingletonId) Id, +#include "clang/Basic/AMDGPUTypes.def" // All other builtin types #define BUILTIN_TYPE(Id, SingletonId) Id, #define LAST_BUILTIN_TYPE(Id) LastKind = Id diff --git a/clang/include/clang/AST/TypeProperties.td b/clang/include/clang/AST/TypeProperties.td index 40dd16f080e2e..aba14b222a03a 100644 --- a/clang/include/clang/AST/TypeProperties.td +++ b/clang/include/clang/AST/TypeProperties.td @@ -861,6 +861,10 @@ let Class = BuiltinType in { case BuiltinType::ID: return ctx.SINGLETON_ID; #include "clang/Basic/WebAssemblyReferenceTypes.def" +#define AMDGPU_TYPE(NAME, ID, SINGLETON_ID) \ + case BuiltinType::ID: return ctx.SINGLETON_ID; +#include "clang/Basic/AMDGPUTypes.def" + #define BUILTIN_TYPE(ID, SINGLETON_ID) \ case BuiltinType::ID: return ctx.SINGLETON_ID; #include "clang/AST/BuiltinTypes.def" diff --git a/clang/include/clang/Basic/AMDGPUTypes.def b/clang/include/clang/Basic/AMDGPUTypes.def new file mode 100644 index 0000000000000..e0d7be470a325 --- /dev/null +++ b/clang/include/clang/Basic/AMDGPUTypes.def @@ -0,0 +1,21 @@ +//===-- AMDGPUTypes.def - Metadata about AMDGPU types -----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines various AMDGPU builtin types. +// +//===----------------------------------------------------------------------===// + +#ifndef AMDGPU_OPAQUE_PTR_TYPE +#define AMDGPU_OPAQUE_PTR_TYPE(Name, MangledName, AS, Width, Align, Id, SingletonId) \ + AMDGPU_TYPE(Name, Id, SingletonId) +#endif + +AMDGPU_OPAQUE_PTR_TYPE("__amdgpu_buffer_rsrc_t", "__amdgpu_buffer_rsrc_t", 8, 128, 128, AMDGPUBufferRsrc, AMDGPUBufferRsrcTy) + +#undef AMDGPU_TYPE +#undef AMDGPU_OPAQUE_PTR_TYPE diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h index a4728b1c06b3f..24e616f76b9af 100644 --- a/clang/include/clang/Serialization/ASTBitCodes.h +++ b/clang/include/clang/Serialization/ASTBitCodes.h @@ -1097,6 +1097,9 @@ enum PredefinedTypeIDs { // \brief WebAssembly reference types with auto numeration #define WASM_TYPE(Name, Id, SingletonId) PREDEF_TYPE_##Id##_ID, #include "clang/Basic/WebAssemblyReferenceTypes.def" +// \brief AMDGPU types with auto numeration +#define AMDGPU_TYPE(Name, Id, SingletonId) PREDEF_TYPE_##Id##_ID, +#include "clang/Basic/AMDGPUTypes.def" /// The placeholder type for unresolved templates. PREDEF_TYPE_UNRESOLVED_TEMPLATE, @@ -1109,7 +1112,7 @@ enum PredefinedTypeIDs { /// /// Type IDs for non-predefined types will start at /// NUM_PREDEF_TYPE_IDs. -const unsigned NUM_PREDEF_TYPE_IDS = 503; +const unsigned NUM_PREDEF_TYPE_IDS = 504; // Ensure we do not overrun the predefined types we reserved // in the enum PredefinedTypeIDs above. diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index 34aa399fda2f8..d389ef12468ee 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -1384,6 +1384,13 @@ void ASTContext::InitBuiltinTypes(const TargetInfo &Target, #include "clang/Basic/WebAssemblyReferenceTypes.def" } + if (Target.getTriple().isAMDGPU() || + (AuxTarget && AuxTarget->getTriple().isAMDGPU())) { +#define AMDGPU_TYPE(Name, Id, SingletonId) \ + InitBuiltinType(SingletonId, BuiltinType::Id); +#include "clang/Basic/AMDGPUTypes.def" + } + // Builtin type for __objc_yes and __objc_no ObjCBuiltinBoolTy = (Target.useSignedCharForObjCBool() ? SignedCharTy : BoolTy); @@ -2200,6 +2207,13 @@ TypeInfo ASTContext::getTypeInfoImpl(const Type *T) const { Align = 8; \ break; #include "clang/Basic/WebAssemblyReferenceTypes.def" +#define AMDGPU_OPAQUE_PTR_TYPE(NAME, MANGLEDNAME, AS, WIDTH, ALIGN, ID, \ + SINGLETONID) \ + case BuiltinType::ID: \ + Width = WIDTH; \ + Align = ALIGN; \ + break; +#include "clang/Basic/AMDGPUTypes.def" } break; case Type::ObjCObjectPointer: @@ -8168,6 +8182,8 @@ static char getObjCEncodingForPrimitiveType(const ASTContext *C, #include "clang/Basic/RISCVVTypes.def" #define WASM_TYPE(Name, Id, SingletonId) case BuiltinType::Id: #include "clang/Basic/WebAssemblyReferenceTypes.def" +#define AMDGPU_TYPE(Name, Id, SingletonId) case BuiltinType::Id: +#include "clang/Basic/AMDGPUTypes.def" { DiagnosticsEngine &Diags = C->getDiagnostics(); unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error, diff --git a/clang/lib/AST/ASTImporter.cpp b/clang/lib/AST/ASTImporter.cpp index 02cd4ed9a6cac..1b67feaae8874 100644 --- a/clang/lib/AST/ASTImporter.cpp +++ b/clang/lib/AST/ASTImporter.cpp @@ -1099,6 +1099,10 @@ ExpectedType ASTNodeImporter::VisitBuiltinType(const BuiltinType *T) { case BuiltinType::Id: \ return Importer.getToContext().SingletonId; #include "clang/Basic/WebAssemblyReferenceTypes.def" +#define AMDGPU_TYPE(Name, Id, SingletonId) \ + case BuiltinType::Id: \ + return Importer.getToContext().SingletonId; +#include "clang/Basic/AMDGPUTypes.def" #define SHARED_SINGLETON_TYPE(Expansion) #define BUILTIN_TYPE(Id, SingletonId) \ case BuiltinType::Id: return Importer.getToContext().SingletonId; diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 3a6c8b4f82cca..dd355a7125c5a 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -11814,6 +11814,8 @@ GCCTypeClass EvaluateBuiltinClassifyType(QualType T, #include "clang/Basic/RISCVVTypes.def" #define WASM_TYPE(Name, Id, SingletonId) case BuiltinType::Id: #include "clang/Basic/WebAssemblyReferenceTypes.def" +#define AMDGPU_TYPE(Name, Id, SingletonId) case BuiltinType::Id: +#include "clang/Basic/AMDGPUTypes.def" return GCCTypeClass::None; case BuiltinType::Dependent: diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp index ed9e6eeb36c75..203db72c43733 100644 --- a/clang/lib/AST/ItaniumMangle.cpp +++ b/clang/lib/AST/ItaniumMangle.cpp @@ -3423,6 +3423,12 @@ void CXXNameMangler::mangleType(const BuiltinType *T) { Out << 'u' << type_name.size() << type_name; \ break; #include "clang/Basic/WebAssemblyReferenceTypes.def" +#define AMDGPU_TYPE(Name, Id, SingletonId) \ + case BuiltinType::Id: \ + type_name = Name; \ + Out << 'u' << type_name.size() << type_name; \ + break; +#include "clang/Basic/AMDGPUTypes.def" } } diff --git a/clang/lib/AST/MicrosoftMangle.cpp b/clang/lib/AST/MicrosoftMangle.cpp index a863ec7a529b9..d87be5f2043a9 100644 --- a/clang/lib/AST/MicrosoftMangle.cpp +++ b/clang/lib/AST/MicrosoftMangle.cpp @@ -2612,6 +2612,8 @@ void MicrosoftCXXNameMangler::mangleType(const BuiltinType *T, Qualifiers, #include "clang/Basic/PPCTypes.def" #define RVV_TYPE(Name, Id, SingletonId) case BuiltinType::Id: #include "clang/Basic/RISCVVTypes.def" +#define AMDGPU_TYPE(Name, Id, SingletonId) case BuiltinType::Id: +#include "clang/Basic/AMDGPUTypes.def" case BuiltinType::ShortAccum: case BuiltinType::Accum: case BuiltinType::LongAccum: diff --git a/clang/lib/AST/NSAPI.cpp b/clang/lib/AST/NSAPI.cpp index 2d16237f5325a..48d1763125e6c 100644 --- a/clang/lib/AST/NSAPI.cpp +++ b/clang/lib/AST/NSAPI.cpp @@ -453,6 +453,8 @@ NSAPI::getNSNumberFactoryMethodKind(QualType T) const { #include "clang/Basic/RISCVVTypes.def" #define WASM_TYPE(Name, Id, SingletonId) case BuiltinType::Id: #include "clang/Basic/WebAssemblyReferenceTypes.def" +#define AMDGPU_TYPE(Name, Id, SingletonId) case BuiltinType::Id: +#include "clang/Basic/AMDGPUTypes.def" case BuiltinType::BoundMember: case BuiltinType::UnresolvedTemplate: case BuiltinType::Dependent: diff --git a/clang/lib/AST/PrintfFormatString.cpp b/clang/lib/AST/PrintfFormatString.cpp index dd3b38fabb550..3031d76abbd75 100644 --- a/clang/lib/AST/PrintfFormatString.cpp +++ b/clang/lib/AST/PrintfFormatString.cpp @@ -865,6 +865,8 @@ bool PrintfSpecifier::fixType(QualType QT, const LangOptions &LangOpt, #include "clang/Basic/RISCVVTypes.def" #define WASM_TYPE(Name, Id, SingletonId) case BuiltinType::Id: #include "clang/Basic/WebAssemblyReferenceTypes.def" +#define AMDGPU_TYPE(Name, Id, SingletonId) case BuiltinType::Id: +#include "clang/Basic/AMDGPUTypes.def" #define SIGNED_TYPE(Id, SingletonId) #define UNSIGNED_TYPE(Id, SingletonId) #define FLOATING_TYPE(Id, SingletonId) diff --git a/clang/lib/AST/Type.cpp b/clang/lib/AST/Type.cpp index 33acae2cbafac..656b733a13b0e 100644 --- a/clang/lib/AST/Type.cpp +++ b/clang/lib/AST/Type.cpp @@ -3509,6 +3509,10 @@ StringRef BuiltinType::getName(const PrintingPolicy &Policy) const { case Id: \ return Name; #include "clang/Basic/WebAssemblyReferenceTypes.def" +#define AMDGPU_TYPE(Name, Id, SingletonId) \ + case Id: \ + return Name; +#include "clang/Basic/AMDGPUTypes.def" } llvm_unreachable("Invalid builtin type."); @@ -4778,6 +4782,8 @@ bool Type::canHaveNullability(bool ResultIfUnknown) const { #include "clang/Basic/RISCVVTypes.def" #define WASM_TYPE(Name, Id, SingletonId) case BuiltinType::Id: #include "clang/Basic/WebAssemblyReferenceTypes.def" +#define AMDGPU_TYPE(Name, Id, SingletonId) case BuiltinType::Id: +#include "clang/Basic/AMDGPUTypes.def" case BuiltinType::BuiltinFn: case BuiltinType::NullPtr: case BuiltinType::IncompleteMatrixIdx: diff --git a/clang/lib/AST/TypeLoc.cpp b/clang/lib/AST/TypeLoc.cpp index 9dd90d9bf4e54..33e6ccbadc12d 100644 --- a/clang/lib/AST/TypeLoc.cpp +++ b/clang/lib/AST/TypeLoc.cpp @@ -428,6 +428,8 @@ TypeSpecifierType BuiltinTypeLoc::getWrittenTypeSpec() const { #include "clang/Basic/RISCVVTypes.def" #define WASM_TYPE(Name, Id, SingletonId) case BuiltinType::Id: #include "clang/Basic/WebAssemblyReferenceTypes.def" +#define AMDGPU_TYPE(Name, Id, SingletonId) case BuiltinType::Id: +#include "clang/Basic/AMDGPUTypes.def" case BuiltinType::BuiltinFn: case BuiltinType::IncompleteMatrixIdx: case BuiltinType::ArraySection: diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp index 572ba84d22ef5..a072475ba7705 100644 --- a/clang/lib/CodeGen/CGDebugInfo.cpp +++ b/clang/lib/CodeGen/CGDebugInfo.cpp @@ -865,7 +865,16 @@ llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) { return SingletonId; \ } #include "clang/Basic/WebAssemblyReferenceTypes.def" - +#define AMDGPU_OPAQUE_PTR_TYPE(Name, MangledName, AS, Width, Align, Id, \ + SingletonId) \ + case BuiltinType::Id: { \ + if (!SingletonId) \ + SingletonId = \ + DBuilder.createForwardDecl(llvm::dwarf::DW_TAG_structure_type, \ + MangledName, TheCU, TheCU->getFile(), 0); \ + return SingletonId; \ + } +#include "clang/Basic/AMDGPUTypes.def" case BuiltinType::UChar: case BuiltinType::Char_U: Encoding = llvm::dwarf::DW_ATE_unsigned_char; diff --git a/clang/lib/CodeGen/CGDebugInfo.h b/clang/lib/CodeGen/CGDebugInfo.h index 272c8d6e75965..2731c627d9dc3 100644 --- a/clang/lib/CodeGen/CGDebugInfo.h +++ b/clang/lib/CodeGen/CGDebugInfo.h @@ -83,6 +83,8 @@ class CGDebugInfo { #include "clang/Basic/OpenCLExtensionTypes.def" #define WASM_TYPE(Name, Id, SingletonId) llvm::DIType *SingletonId = nullptr; #include "clang/Basic/WebAssemblyReferenceTypes.def" +#define AMDGPU_TYPE(Name, Id, SingletonId) llvm::DIType *SingletonId = nullptr; +#include "clang/Basic/AMDGPUTypes.def" /// Cache of previously constructed Types. llvm::DenseMap TypeCache; diff --git a/clang/lib/CodeGen/CodeGenTypes.cpp b/clang/lib/CodeGen/CodeGenTypes.cpp index 0a926e4ac27fe..d823c336e39bf 100644 --- a/clang/lib/CodeGen/CodeGenTypes.cpp +++ b/clang/lib/CodeGen/CodeGenTypes.cpp @@ -533,6 +533,11 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) { llvm_unreachable("Unexpected wasm reference builtin type!"); \ } break; #include "clang/Basic/WebAssemblyReferenceTypes.def" +#define AMDGPU_OPAQUE_PTR_TYPE(Name, MangledName, AS, Width, Align, Id, \ + SingletonId) \ + case BuiltinType::Id: \ + return llvm::PointerType::get(getLLVMContext(), AS); +#include "clang/Basic/AMDGPUTypes.def" case BuiltinType::Dependent: #define BUILTIN_TYPE(Id, SingletonId) #define PLACEHOLDER_TYPE(Id, SingletonId) \ diff --git a/clang/lib/CodeGen/ItaniumCXXABI.cpp b/clang/lib/CodeGen/ItaniumCXXABI.cpp index 5a3e83de625c9..01a735c1437e1 100644 --- a/clang/lib/CodeGen/ItaniumCXXABI.cpp +++ b/clang/lib/CodeGen/ItaniumCXXABI.cpp @@ -3362,6 +3362,8 @@ static bool TypeInfoIsInStandardLibrary(const BuiltinType *Ty) { #include "clang/Basic/RISCVVTypes.def" #define WASM_TYPE(Name, Id, SingletonId) case BuiltinType::Id: #include "clang/Basic/WebAssemblyReferenceTypes.def" +#define AMDGPU_TYPE(Name, Id, SingletonId) case BuiltinType::Id: +#include "clang/Basic/AMDGPUTypes.def" case BuiltinType::ShortAccum: case BuiltinType::Accum: case BuiltinType::LongAccum: diff --git a/clang/lib/Index/USRGeneration.cpp b/clang/lib/Index/USRGeneration.cpp index 31c4a3345c09d..5036ddee35fd1 100644 --- a/clang/lib/Index/USRGeneration.cpp +++ b/clang/lib/Index/USRGeneration.cpp @@ -772,6 +772,11 @@ void USRGenerator::VisitType(QualType T) { #include "clang/Basic/RISCVVTypes.def" #define WASM_TYPE(Name, Id, SingletonId) case BuiltinType::Id: #include "clang/Basic/WebAssemblyReferenceTypes.def" +#define AMDGPU_TYPE(Name, Id, SingletonId) \ + case BuiltinType::Id: \ + Out << "@BT@" << #Name; \ + break; +#include "clang/Basic/AMDGPUTypes.def" case BuiltinType::ShortAccum: Out << "@BT@ShortAccum"; break; case BuiltinType::Accum: diff --git a/clang/lib/Sema/Sema.cpp b/clang/lib/Sema/Sema.cpp index 907a05a5d1b49..069978c1b4023 100644 --- a/clang/lib/Sema/Sema.cpp +++ b/clang/lib/Sema/Sema.cpp @@ -497,6 +497,14 @@ void Sema::Initialize() { #include "clang/Basic/WebAssemblyReferenceTypes.def" } + if (Context.getTargetInfo().getTriple().isAMDGPU() || + (Context.getAuxTargetInfo() && + Context.getAuxTargetInfo()->getTriple().isAMDGPU())) { +#define AMDGPU_TYPE(Name, Id, SingletonId) \ + addImplicitTypedef(Name, Context.SingletonId); +#include "clang/Basic/AMDGPUTypes.def" + } + if (Context.getTargetInfo().hasBuiltinMSVaList()) { DeclarationName MSVaList = &Context.Idents.get("__builtin_ms_va_list"); if (IdResolver.begin(MSVaList) == IdResolver.end()) diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index 4db8b4130c3c7..a31cefc540cbf 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -6169,6 +6169,8 @@ static bool isPlaceholderToRemoveAsArg(QualType type) { #include "clang/Basic/RISCVVTypes.def" #define WASM_TYPE(Name, Id, SingletonId) case BuiltinType::Id: #include "clang/Basic/WebAssemblyReferenceTypes.def" +#define AMDGPU_TYPE(Name, Id, SingletonId) case BuiltinType::Id: +#include "clang/Basic/AMDGPUTypes.def" #define PLACEHOLDER_TYPE(ID, SINGLETON_ID) #define BUILTIN_TYPE(ID, SINGLETON_ID) case BuiltinType::ID: #include "clang/AST/BuiltinTypes.def" @@ -21004,6 +21006,8 @@ ExprResult Sema::CheckPlaceholderExpr(Expr *E) { #include "clang/Basic/RISCVVTypes.def" #define WASM_TYPE(Name, Id, SingletonId) case BuiltinType::Id: #include "clang/Basic/WebAssemblyReferenceTypes.def" +#define AMDGPU_TYPE(Name, Id, SingletonId) case BuiltinType::Id: +#include "clang/Basic/AMDGPUTypes.def" #define BUILTIN_TYPE(Id, SingletonId) case BuiltinType::Id: #define PLACEHOLDER_TYPE(Id, SingletonId) #include "clang/AST/BuiltinTypes.def" diff --git a/clang/lib/Serialization/ASTCommon.cpp b/clang/lib/Serialization/ASTCommon.cpp index bc662a87a7bf3..3385cb8aad7e4 100644 --- a/clang/lib/Serialization/ASTCommon.cpp +++ b/clang/lib/Serialization/ASTCommon.cpp @@ -258,6 +258,11 @@ serialization::TypeIdxFromBuiltin(const BuiltinType *BT) { ID = PREDEF_TYPE_##Id##_ID; \ break; #include "clang/Basic/WebAssemblyReferenceTypes.def" +#define AMDGPU_TYPE(Name, Id, SingletonId) \ + case BuiltinType::Id: \ + ID = PREDEF_TYPE_##Id##_ID; \ + break; +#include "clang/Basic/AMDGPUTypes.def" case BuiltinType::BuiltinFn: ID = PREDEF_TYPE_BUILTIN_FN; break; diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index a2c322087fd1e..0810d720bb4e0 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -7401,6 +7401,11 @@ QualType ASTReader::GetType(TypeID ID) { T = Context.SingletonId; \ break; #include "clang/Basic/WebAssemblyReferenceTypes.def" +#define AMDGPU_TYPE(Name, Id, SingletonId) \ + case PREDEF_TYPE_##Id##_ID: \ + T = Context.SingletonId; \ + break; +#include "clang/Basic/AMDGPUTypes.def" } assert(!T.isNull() && "Unknown predefined type"); diff --git a/clang/test/AST/ast-dump-amdgpu-types.c b/clang/test/AST/ast-dump-amdgpu-types.c new file mode 100644 index 0000000000000..e032d678f1a09 --- /dev/null +++ b/clang/test/AST/ast-dump-amdgpu-types.c @@ -0,0 +1,10 @@ +// REQUIRES: amdgpu-registered-target +// Test without serialization: +// RUN: %clang_cc1 -triple amdgcn -ast-dump -ast-dump-filter __amdgpu_buffer_rsrc_t %s | FileCheck %s +// +// Test with serialization: +// RUN: %clang_cc1 -triple amdgcn -emit-pch -o %t %s +// RUN: %clang_cc1 -x c -triple amdgcn -include-pch %t -ast-dump-all -ast-dump-filter __amdgpu_buffer_rsrc_t /dev/null | sed -e "s/ //" -e "s/ imported//" | FileCheck %s + +// CHECK: TypedefDecl {{.*}} implicit __amdgpu_buffer_rsrc_t +// CHECK-NEXT: -BuiltinType {{.*}} '__amdgpu_buffer_rsrc_t' diff --git a/clang/test/CodeGen/amdgpu-buffer-rsrc-type-debug-info.c b/clang/test/CodeGen/amdgpu-buffer-rsrc-type-debug-info.c new file mode 100644 index 0000000000000..c266fa83e4b62 --- /dev/null +++ b/clang/test/CodeGen/amdgpu-buffer-rsrc-type-debug-info.c @@ -0,0 +1,8 @@ +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple amdgcn -emit-llvm -o - %s -debug-info-kind=limited 2>&1 | FileCheck %s + +// CHECK: name: "__amdgpu_buffer_rsrc_t",{{.*}}baseType: ![[BT:[0-9]+]] +// CHECK: [[BT]] = !DICompositeType(tag: DW_TAG_structure_type, name: "__amdgpu_buffer_rsrc_t", {{.*}} flags: DIFlagFwdDecl) +void test_locals(void) { + __amdgpu_buffer_rsrc_t k; +} diff --git a/clang/test/CodeGenCXX/amdgpu-buffer-rsrc-typeinfo.cpp b/clang/test/CodeGenCXX/amdgpu-buffer-rsrc-typeinfo.cpp new file mode 100644 index 0000000000000..a44e7dc5efe6a --- /dev/null +++ b/clang/test/CodeGenCXX/amdgpu-buffer-rsrc-typeinfo.cpp @@ -0,0 +1,9 @@ +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple amdgcn %s -emit-llvm -o - | FileCheck %s + +namespace std { class type_info; }; + +auto &b = typeid(__amdgpu_buffer_rsrc_t); + +// CHECK-DAG: @_ZTSu22__amdgpu_buffer_rsrc_t = {{.*}} c"u22__amdgpu_buffer_rsrc_t\00" +// CHECK-DAG: @_ZTIu22__amdgpu_buffer_rsrc_t = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTSu22__amdgpu_buffer_rsrc_t diff --git a/clang/test/CodeGenOpenCL/amdgcn-buffer-rsrc-type.cl b/clang/test/CodeGenOpenCL/amdgcn-buffer-rsrc-type.cl new file mode 100644 index 0000000000000..69dabda08fba6 --- /dev/null +++ b/clang/test/CodeGenOpenCL/amdgcn-buffer-rsrc-type.cl @@ -0,0 +1,82 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature + // REQUIRES: amdgpu-registered-target + // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu verde -emit-llvm -o - %s | FileCheck %s + +typedef struct AA_ty { + int x; + __amdgpu_buffer_rsrc_t r; +} AA; + +AA getAA(void *p); +__amdgpu_buffer_rsrc_t getBufferImpl(void *p); +void consumeBuffer(__amdgpu_buffer_rsrc_t); + +// CHECK-LABEL: define {{[^@]+}}@getBuffer +// CHECK-SAME: (ptr addrspace(5) noundef [[P:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[CALL:%.*]] = tail call ptr addrspace(8) @getBufferImpl(ptr addrspace(5) noundef [[P]]) #[[ATTR2:[0-9]+]] +// CHECK-NEXT: ret ptr addrspace(8) [[CALL]] +// +__amdgpu_buffer_rsrc_t getBuffer(void *p) { + return getBufferImpl(p); +} + +// CHECK-LABEL: define {{[^@]+}}@consumeBufferPtr +// CHECK-SAME: (ptr addrspace(5) noundef readonly [[P:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq ptr addrspace(5) [[P]], addrspacecast (ptr null to ptr addrspace(5)) +// CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] +// CHECK: if.then: +// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(8), ptr addrspace(5) [[P]], align 16, !tbaa [[TBAA4:![0-9]+]] +// CHECK-NEXT: tail call void @consumeBuffer(ptr addrspace(8) [[TMP0]]) #[[ATTR2]] +// CHECK-NEXT: br label [[IF_END]] +// CHECK: if.end: +// CHECK-NEXT: ret void +// +void consumeBufferPtr(__amdgpu_buffer_rsrc_t *p) { + if (p) + consumeBuffer(*p); +} + +// CHECK-LABEL: define {{[^@]+}}@test +// CHECK-SAME: (ptr addrspace(5) noundef readonly [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[A]], align 16, !tbaa [[TBAA8:![0-9]+]] +// CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0 +// CHECK-NEXT: [[TOBOOL_NOT_I:%.*]] = icmp eq ptr addrspace(5) [[A]], addrspacecast (ptr null to ptr addrspace(5)) +// CHECK-NEXT: [[OR_COND:%.*]] = or i1 [[TOBOOL_NOT_I]], [[TOBOOL_NOT]] +// CHECK-NEXT: br i1 [[OR_COND]], label [[IF_END:%.*]], label [[IF_THEN_I:%.*]] +// CHECK: if.then.i: +// CHECK-NEXT: [[R:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[A]], i32 16 +// CHECK-NEXT: [[TMP1:%.*]] = load ptr addrspace(8), ptr addrspace(5) [[R]], align 16, !tbaa [[TBAA4]] +// CHECK-NEXT: tail call void @consumeBuffer(ptr addrspace(8) [[TMP1]]) #[[ATTR2]] +// CHECK-NEXT: br label [[IF_END]] +// CHECK: if.end: +// CHECK-NEXT: ret void +// +void test(AA *a) { + if (a->x) + consumeBufferPtr(&(a->r)); +} + +// CHECK-LABEL: define {{[^@]+}}@bar +// CHECK-SAME: (ptr addrspace(5) noundef [[P:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[CALL:%.*]] = tail call [[STRUCT_AA_TY:%.*]] @[[GETAA:[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr addrspace(5) noundef [[P]]) #[[ATTR2]] +// CHECK-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_AA_TY]] [[CALL]], 0 +// CHECK-NEXT: [[CALL_I:%.*]] = tail call ptr addrspace(8) @getBufferImpl(ptr addrspace(5) noundef [[P]]) #[[ATTR2]] +// CHECK-NEXT: [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[TMP0]], 0 +// CHECK-NEXT: br i1 [[TOBOOL_NOT_I]], label [[TEST_EXIT:%.*]], label [[IF_THEN_I_I:%.*]] +// CHECK: if.then.i.i: +// CHECK-NEXT: tail call void @consumeBuffer(ptr addrspace(8) [[CALL_I]]) #[[ATTR2]] +// CHECK-NEXT: br label [[TEST_EXIT]] +// CHECK: test.exit: +// CHECK-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [[STRUCT_AA_TY]] [[CALL]], ptr addrspace(8) [[CALL_I]], 1 +// CHECK-NEXT: ret [[STRUCT_AA_TY]] [[DOTFCA_1_INSERT]] +// +AA bar(void *p) { + AA a = getAA(p); + a.r = getBuffer(p); + test(&a); + return a; +} diff --git a/clang/test/SemaCXX/amdgpu-buffer-rsrc.cpp b/clang/test/SemaCXX/amdgpu-buffer-rsrc.cpp new file mode 100644 index 0000000000000..80c4c519c4e6b --- /dev/null +++ b/clang/test/SemaCXX/amdgpu-buffer-rsrc.cpp @@ -0,0 +1,17 @@ +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -fsyntax-only -verify -std=gnu++11 -triple amdgcn -Wno-unused-value %s + +void foo() { + int n = 100; + __amdgpu_buffer_rsrc_t v = 0; // expected-error {{cannot initialize a variable of type '__amdgpu_buffer_rsrc_t' with an rvalue of type 'int'}} + static_cast<__amdgpu_buffer_rsrc_t>(n); // expected-error {{static_cast from 'int' to '__amdgpu_buffer_rsrc_t' is not allowed}} + dynamic_cast<__amdgpu_buffer_rsrc_t>(n); // expected-error {{invalid target type '__amdgpu_buffer_rsrc_t' for dynamic_cast; target type must be a reference or pointer type to a defined class}} + reinterpret_cast<__amdgpu_buffer_rsrc_t>(n); // expected-error {{reinterpret_cast from 'int' to '__amdgpu_buffer_rsrc_t' is not allowed}} + int c(v); // expected-error {{cannot initialize a variable of type 'int' with an lvalue of type '__amdgpu_buffer_rsrc_t'}} + __amdgpu_buffer_rsrc_t k; + int *ip = (int *)k; // expected-error {{cannot cast from type '__amdgpu_buffer_rsrc_t' to pointer type 'int *'}} + void *vp = (void *)k; // expected-error {{cannot cast from type '__amdgpu_buffer_rsrc_t' to pointer type 'void *'}} +} + +static_assert(sizeof(__amdgpu_buffer_rsrc_t) == 16, "wrong size"); +static_assert(alignof(__amdgpu_buffer_rsrc_t) == 16, "wrong aignment"); diff --git a/clang/test/SemaHIP/amdgpu-buffer-rsrc.hip b/clang/test/SemaHIP/amdgpu-buffer-rsrc.hip new file mode 100644 index 0000000000000..3e5b22dc8963d --- /dev/null +++ b/clang/test/SemaHIP/amdgpu-buffer-rsrc.hip @@ -0,0 +1,20 @@ +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -fsyntax-only -verify -triple amdgcn -Wno-unused-value %s +// RUN: %clang_cc1 -fsyntax-only -verify -triple x86_64 -aux-triple amdgcn -Wno-unused-value %s + +#define __device__ __attribute__((device)) + +__device__ void foo() { + int n = 100; + __amdgpu_buffer_rsrc_t v = 0; // expected-error {{cannot initialize a variable of type '__amdgpu_buffer_rsrc_t' with an rvalue of type 'int'}} + static_cast<__amdgpu_buffer_rsrc_t>(n); // expected-error {{static_cast from 'int' to '__amdgpu_buffer_rsrc_t' is not allowed}} + dynamic_cast<__amdgpu_buffer_rsrc_t>(n); // expected-error {{invalid target type '__amdgpu_buffer_rsrc_t' for dynamic_cast; target type must be a reference or pointer type to a defined class}} + reinterpret_cast<__amdgpu_buffer_rsrc_t>(n); // expected-error {{reinterpret_cast from 'int' to '__amdgpu_buffer_rsrc_t' is not allowed}} + int c(v); // expected-error {{cannot initialize a variable of type 'int' with an lvalue of type '__amdgpu_buffer_rsrc_t'}} + __amdgpu_buffer_rsrc_t k; + int *ip = (int *)k; // expected-error {{cannot cast from type '__amdgpu_buffer_rsrc_t' to pointer type 'int *'}} + void *vp = (void *)k; // expected-error {{cannot cast from type '__amdgpu_buffer_rsrc_t' to pointer type 'void *'}} +} + +static_assert(sizeof(__amdgpu_buffer_rsrc_t) == 16, "wrong size"); +static_assert(alignof(__amdgpu_buffer_rsrc_t) == 16, "wrong aignment"); diff --git a/clang/test/SemaOpenCL/amdgpu-buffer-rsrc.cl b/clang/test/SemaOpenCL/amdgpu-buffer-rsrc.cl new file mode 100644 index 0000000000000..2d74835699c6d --- /dev/null +++ b/clang/test/SemaOpenCL/amdgpu-buffer-rsrc.cl @@ -0,0 +1,12 @@ +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -verify -cl-std=CL1.2 -triple amdgcn-amd-amdhsa -Wno-unused-value %s +// RUN: %clang_cc1 -verify -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -Wno-unused-value %s + +void foo() { + int n = 100; + __amdgpu_buffer_rsrc_t v = 0; // expected-error {{initializing '__private __amdgpu_buffer_rsrc_t' with an expression of incompatible type 'int'}} + int c = v; // expected-error {{initializing '__private int' with an expression of incompatible type '__private __amdgpu_buffer_rsrc_t'}} + __amdgpu_buffer_rsrc_t k; + int *ip = (int *)k; // expected-error {{operand of type '__amdgpu_buffer_rsrc_t' where arithmetic or pointer type is required}} + void *vp = (void *)k; // expected-error {{operand of type '__amdgpu_buffer_rsrc_t' where arithmetic or pointer type is required}} + } diff --git a/clang/test/SemaOpenMP/amdgpu-buffer-rsrc.cpp b/clang/test/SemaOpenMP/amdgpu-buffer-rsrc.cpp new file mode 100644 index 0000000000000..eb6ded229a75c --- /dev/null +++ b/clang/test/SemaOpenMP/amdgpu-buffer-rsrc.cpp @@ -0,0 +1,17 @@ +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -triple amdgcn-amd-amdhsa -fopenmp-is-target-device -Wno-unused-value %s + +void foo() { +#pragma omp target + { + int n = 100; + __amdgpu_buffer_rsrc_t v = 0; // expected-error {{cannot initialize a variable of type '__amdgpu_buffer_rsrc_t' with an rvalue of type 'int'}} + static_cast<__amdgpu_buffer_rsrc_t>(n); // expected-error {{static_cast from 'int' to '__amdgpu_buffer_rsrc_t' is not allowed}} + dynamic_cast<__amdgpu_buffer_rsrc_t>(n); // expected-error {{invalid target type '__amdgpu_buffer_rsrc_t' for dynamic_cast; target type must be a reference or pointer type to a defined class}} + reinterpret_cast<__amdgpu_buffer_rsrc_t>(n); // expected-error {{reinterpret_cast from 'int' to '__amdgpu_buffer_rsrc_t' is not allowed}} + int c(v); // expected-error {{cannot initialize a variable of type 'int' with an lvalue of type '__amdgpu_buffer_rsrc_t'}} + __amdgpu_buffer_rsrc_t k; + int *ip = (int *)k; // expected-error {{cannot cast from type '__amdgpu_buffer_rsrc_t' to pointer type 'int *'}} + void *vp = (void *)k; // expected-error {{cannot cast from type '__amdgpu_buffer_rsrc_t' to pointer type 'void *'}} + } + } diff --git a/clang/tools/libclang/CIndex.cpp b/clang/tools/libclang/CIndex.cpp index dcd9555e1bfcc..35312e3d2ae70 100644 --- a/clang/tools/libclang/CIndex.cpp +++ b/clang/tools/libclang/CIndex.cpp @@ -1643,6 +1643,8 @@ bool CursorVisitor::VisitBuiltinTypeLoc(BuiltinTypeLoc TL) { #include "clang/Basic/RISCVVTypes.def" #define WASM_TYPE(Name, Id, SingletonId) case BuiltinType::Id: #include "clang/Basic/WebAssemblyReferenceTypes.def" +#define AMDGPU_TYPE(Name, Id, SingletonId) case BuiltinType::Id: +#include "clang/Basic/AMDGPUTypes.def" #define BUILTIN_TYPE(Id, SingletonId) #define SIGNED_TYPE(Id, SingletonId) case BuiltinType::Id: #define UNSIGNED_TYPE(Id, SingletonId) case BuiltinType::Id: diff --git a/compiler-rt/lib/asan/asan_linux.cpp b/compiler-rt/lib/asan/asan_linux.cpp index a517de5af00dc..0b470db86748f 100644 --- a/compiler-rt/lib/asan/asan_linux.cpp +++ b/compiler-rt/lib/asan/asan_linux.cpp @@ -93,7 +93,8 @@ uptr FindDynamicShadowStart() { # endif return MapDynamicShadow(shadow_size_bytes, ASAN_SHADOW_SCALE, - /*min_shadow_base_alignment*/ 0, kHighMemEnd); + /*min_shadow_base_alignment*/ 0, kHighMemEnd, + GetMmapGranularity()); } void AsanApplyToGlobals(globals_op_fptr op, const void *needle) { diff --git a/compiler-rt/lib/asan/asan_mac.cpp b/compiler-rt/lib/asan/asan_mac.cpp index b250f796e165f..bfc349223258b 100644 --- a/compiler-rt/lib/asan/asan_mac.cpp +++ b/compiler-rt/lib/asan/asan_mac.cpp @@ -51,7 +51,8 @@ bool IsSystemHeapAddress (uptr addr) { return false; } uptr FindDynamicShadowStart() { return MapDynamicShadow(MemToShadowSize(kHighMemEnd), ASAN_SHADOW_SCALE, - /*min_shadow_base_alignment*/ 0, kHighMemEnd); + /*min_shadow_base_alignment*/ 0, kHighMemEnd, + GetMmapGranularity()); } // No-op. Mac does not support static linkage anyway. diff --git a/compiler-rt/lib/asan/asan_premap_shadow.cpp b/compiler-rt/lib/asan/asan_premap_shadow.cpp index bed2f62a22511..6e08b8f966507 100644 --- a/compiler-rt/lib/asan/asan_premap_shadow.cpp +++ b/compiler-rt/lib/asan/asan_premap_shadow.cpp @@ -33,7 +33,8 @@ uptr PremapShadowSize() { // PremapShadowSize() bytes on the right of it are mapped r/o. uptr PremapShadow() { return MapDynamicShadow(PremapShadowSize(), /*mmap_alignment_scale*/ 3, - /*min_shadow_base_alignment*/ 0, kHighMemEnd); + /*min_shadow_base_alignment*/ 0, kHighMemEnd, + GetMmapGranularity()); } bool PremapShadowFailed() { diff --git a/compiler-rt/lib/asan/asan_win.cpp b/compiler-rt/lib/asan/asan_win.cpp index cda1f7a91e140..09a13b11cff1f 100644 --- a/compiler-rt/lib/asan/asan_win.cpp +++ b/compiler-rt/lib/asan/asan_win.cpp @@ -268,7 +268,8 @@ void PlatformTSDDtor(void *tsd) { AsanThread::TSDDtor(tsd); } // ---------------------- Various stuff ---------------- {{{ uptr FindDynamicShadowStart() { return MapDynamicShadow(MemToShadowSize(kHighMemEnd), ASAN_SHADOW_SCALE, - /*min_shadow_base_alignment*/ 0, kHighMemEnd); + /*min_shadow_base_alignment*/ 0, kHighMemEnd, + GetMmapGranularity()); } void AsanCheckDynamicRTPrereqs() {} diff --git a/compiler-rt/lib/hwasan/hwasan_dynamic_shadow.cpp b/compiler-rt/lib/hwasan/hwasan_dynamic_shadow.cpp index 7642ba6c0bf08..48bc3b631ac07 100644 --- a/compiler-rt/lib/hwasan/hwasan_dynamic_shadow.cpp +++ b/compiler-rt/lib/hwasan/hwasan_dynamic_shadow.cpp @@ -36,15 +36,20 @@ decltype(__hwasan_shadow)* __hwasan_premap_shadow(); namespace __hwasan { +// We cannot call anything in libc here (see comment above), so we need to +// assume the biggest allowed page size. +// Android max page size is defined as 16k here: +// https://android.googlesource.com/platform/bionic/+/main/libc/platform/bionic/page.h#41 +static constexpr uptr kMaxGranularity = 16384; + // Conservative upper limit. static uptr PremapShadowSize() { - return RoundUpTo(GetMaxVirtualAddress() >> kShadowScale, - GetMmapGranularity()); + return RoundUpTo(GetMaxVirtualAddress() >> kShadowScale, kMaxGranularity); } static uptr PremapShadow() { return MapDynamicShadow(PremapShadowSize(), kShadowScale, - kShadowBaseAlignment, kHighMemEnd); + kShadowBaseAlignment, kHighMemEnd, kMaxGranularity); } static bool IsPremapShadowAvailable() { @@ -56,7 +61,7 @@ static bool IsPremapShadowAvailable() { } static uptr FindPremappedShadowStart(uptr shadow_size_bytes) { - const uptr granularity = GetMmapGranularity(); + const uptr granularity = kMaxGranularity; const uptr shadow_start = reinterpret_cast(&__hwasan_shadow); const uptr premap_shadow_size = PremapShadowSize(); const uptr shadow_size = RoundUpTo(shadow_size_bytes, granularity); @@ -109,7 +114,7 @@ uptr FindDynamicShadowStart(uptr shadow_size_bytes) { if (IsPremapShadowAvailable()) return FindPremappedShadowStart(shadow_size_bytes); return MapDynamicShadow(shadow_size_bytes, kShadowScale, kShadowBaseAlignment, - kHighMemEnd); + kHighMemEnd, kMaxGranularity); } } // namespace __hwasan @@ -135,7 +140,7 @@ uptr FindDynamicShadowStart(uptr shadow_size_bytes) { RingBufferSize()); # endif return MapDynamicShadow(shadow_size_bytes, kShadowScale, kShadowBaseAlignment, - kHighMemEnd); + kHighMemEnd, GetMmapGranularity()); } } // namespace __hwasan diff --git a/compiler-rt/lib/memprof/memprof_linux.cpp b/compiler-rt/lib/memprof/memprof_linux.cpp index 26a2b456b874e..fbe5d250f840b 100644 --- a/compiler-rt/lib/memprof/memprof_linux.cpp +++ b/compiler-rt/lib/memprof/memprof_linux.cpp @@ -58,7 +58,8 @@ void InitializePlatformExceptionHandlers() {} uptr FindDynamicShadowStart() { uptr shadow_size_bytes = MemToShadowSize(kHighMemEnd); return MapDynamicShadow(shadow_size_bytes, SHADOW_SCALE, - /*min_shadow_base_alignment*/ 0, kHighMemEnd); + /*min_shadow_base_alignment*/ 0, kHighMemEnd, + GetMmapGranularity()); } void *MemprofDlSymNext(const char *sym) { return dlsym(RTLD_NEXT, sym); } diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common.h b/compiler-rt/lib/sanitizer_common/sanitizer_common.h index c451fc962c529..2d1059140c303 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_common.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_common.h @@ -60,14 +60,10 @@ inline int Verbosity() { return atomic_load(¤t_verbosity, memory_order_relaxed); } -#if SANITIZER_ANDROID -inline uptr GetPageSize() { -// Android post-M sysconf(_SC_PAGESIZE) crashes if called from .preinit_array. - return 4096; -} -inline uptr GetPageSizeCached() { - return 4096; -} +#if SANITIZER_ANDROID && !defined(__aarch64__) +// 32-bit Android only has 4k pages. +inline uptr GetPageSize() { return 4096; } +inline uptr GetPageSizeCached() { return 4096; } #else uptr GetPageSize(); extern uptr PageSizeCached; @@ -77,6 +73,7 @@ inline uptr GetPageSizeCached() { return PageSizeCached; } #endif + uptr GetMmapGranularity(); uptr GetMaxVirtualAddress(); uptr GetMaxUserVirtualAddress(); @@ -91,6 +88,7 @@ void GetThreadStackAndTls(bool main, uptr *stk_addr, uptr *stk_size, // Memory management void *MmapOrDie(uptr size, const char *mem_type, bool raw_report = false); + inline void *MmapOrDieQuietly(uptr size, const char *mem_type) { return MmapOrDie(size, mem_type, /*raw_report*/ true); } @@ -139,7 +137,8 @@ void UnmapFromTo(uptr from, uptr to); // shadow_size_bytes bytes on the right, which on linux is mapped no access. // The high_mem_end may be updated if the original shadow size doesn't fit. uptr MapDynamicShadow(uptr shadow_size_bytes, uptr shadow_scale, - uptr min_shadow_base_alignment, uptr &high_mem_end); + uptr min_shadow_base_alignment, uptr &high_mem_end, + uptr granularity); // Let S = max(shadow_size, num_aliases * alias_size, ring_buffer_size). // Reserves 2*S bytes of address space to the right of the returned address and diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp index 5d2dd3a7a658f..d15caa76efb06 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp @@ -1136,7 +1136,7 @@ uptr GetMaxUserVirtualAddress() { return addr; } -# if !SANITIZER_ANDROID +# if !SANITIZER_ANDROID || defined(__aarch64__) uptr GetPageSize() { # if SANITIZER_LINUX && (defined(__x86_64__) || defined(__i386__)) && \ defined(EXEC_PAGESIZE) @@ -1155,7 +1155,7 @@ uptr GetPageSize() { return sysconf(_SC_PAGESIZE); // EXEC_PAGESIZE may not be trustworthy. # endif } -# endif // !SANITIZER_ANDROID +# endif uptr ReadBinaryName(/*out*/ char *buf, uptr buf_len) { # if SANITIZER_SOLARIS diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp index 6d05411222d9e..175362183fd78 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp @@ -995,9 +995,8 @@ void UnmapFromTo(uptr from, uptr to) { } uptr MapDynamicShadow(uptr shadow_size_bytes, uptr shadow_scale, - uptr min_shadow_base_alignment, - UNUSED uptr &high_mem_end) { - const uptr granularity = GetMmapGranularity(); + uptr min_shadow_base_alignment, UNUSED uptr &high_mem_end, + uptr granularity) { const uptr alignment = Max(granularity << shadow_scale, 1ULL << min_shadow_base_alignment); const uptr left_padding = diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_mac.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_mac.cpp index 24e3d1112520e..cbdf3e95925bf 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_mac.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_mac.cpp @@ -1188,8 +1188,8 @@ uptr GetMaxVirtualAddress() { } uptr MapDynamicShadow(uptr shadow_size_bytes, uptr shadow_scale, - uptr min_shadow_base_alignment, uptr &high_mem_end) { - const uptr granularity = GetMmapGranularity(); + uptr min_shadow_base_alignment, uptr &high_mem_end, + uptr granularity) { const uptr alignment = Max(granularity << shadow_scale, 1ULL << min_shadow_base_alignment); const uptr left_padding = diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_win.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_win.cpp index 4e5ad8e4693b4..0b198890fc798 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_win.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_win.cpp @@ -384,9 +384,8 @@ bool DontDumpShadowMemory(uptr addr, uptr length) { } uptr MapDynamicShadow(uptr shadow_size_bytes, uptr shadow_scale, - uptr min_shadow_base_alignment, - UNUSED uptr &high_mem_end) { - const uptr granularity = GetMmapGranularity(); + uptr min_shadow_base_alignment, UNUSED uptr &high_mem_end, + uptr granularity) { const uptr alignment = Max(granularity << shadow_scale, 1ULL << min_shadow_base_alignment); const uptr left_padding = diff --git a/compiler-rt/lib/scudo/standalone/secondary.h b/compiler-rt/lib/scudo/standalone/secondary.h index d8c9f5bcfcaf6..9a8e53be388b7 100644 --- a/compiler-rt/lib/scudo/standalone/secondary.h +++ b/compiler-rt/lib/scudo/standalone/secondary.h @@ -391,10 +391,11 @@ template class MapAllocatorCache { return true; } if (O == Option::MaxCacheEntriesCount) { - const u32 MaxCount = static_cast(Value); - if (MaxCount > Config::getEntriesArraySize()) + if (Value < 0) return false; - atomic_store_relaxed(&MaxEntriesCount, MaxCount); + atomic_store_relaxed( + &MaxEntriesCount, + Min(static_cast(Value), Config::getEntriesArraySize())); return true; } if (O == Option::MaxCacheEntrySize) { diff --git a/compiler-rt/lib/scudo/standalone/tests/secondary_test.cpp b/compiler-rt/lib/scudo/standalone/tests/secondary_test.cpp index 8f0250e88ebf3..af69313214ea6 100644 --- a/compiler-rt/lib/scudo/standalone/tests/secondary_test.cpp +++ b/compiler-rt/lib/scudo/standalone/tests/secondary_test.cpp @@ -192,9 +192,9 @@ TEST_F(MapAllocatorTest, SecondaryIterate) { TEST_F(MapAllocatorTest, SecondaryOptions) { // Attempt to set a maximum number of entries higher than the array size. - EXPECT_FALSE( - Allocator->setOption(scudo::Option::MaxCacheEntriesCount, 4096U)); - // A negative number will be cast to a scudo::u32, and fail. + EXPECT_TRUE(Allocator->setOption(scudo::Option::MaxCacheEntriesCount, 4096U)); + + // Attempt to set an invalid (negative) number of entries EXPECT_FALSE(Allocator->setOption(scudo::Option::MaxCacheEntriesCount, -1)); if (Allocator->canCache(0U)) { // Various valid combinations. diff --git a/flang/include/flang/Evaluate/tools.h b/flang/include/flang/Evaluate/tools.h index 378a5fca03264..ea56a20633f0c 100644 --- a/flang/include/flang/Evaluate/tools.h +++ b/flang/include/flang/Evaluate/tools.h @@ -450,12 +450,12 @@ struct ExtractSubstringHelper { template static std::optional visit(const Designator &e) { - return std::visit([](auto &&s) { return visit(s); }, e.u); + return common::visit([](auto &&s) { return visit(s); }, e.u); } template static std::optional visit(const Expr &e) { - return std::visit([](auto &&s) { return visit(s); }, e.u); + return common::visit([](auto &&s) { return visit(s); }, e.u); } }; diff --git a/flang/include/flang/Lower/DumpEvaluateExpr.h b/flang/include/flang/Lower/DumpEvaluateExpr.h index c67df245359e3..88f53e96a81c2 100644 --- a/flang/include/flang/Lower/DumpEvaluateExpr.h +++ b/flang/include/flang/Lower/DumpEvaluateExpr.h @@ -68,7 +68,7 @@ class DumpEvaluateExpr { } template void show(const std::variant &u) { - std::visit([&](const auto &v) { show(v); }, u); + Fortran::common::visit([&](const auto &v) { show(v); }, u); } template void show(const std::vector &x) { diff --git a/flang/include/flang/Lower/PFTBuilder.h b/flang/include/flang/Lower/PFTBuilder.h index c2b600c6b5d9b..7f1b93c564b4c 100644 --- a/flang/include/flang/Lower/PFTBuilder.h +++ b/flang/include/flang/Lower/PFTBuilder.h @@ -76,7 +76,7 @@ class ReferenceVariantBase { } template constexpr auto visit(VISITOR &&visitor) const { - return std::visit( + return Fortran::common::visit( common::visitors{[&visitor](auto ref) { return visitor(ref.get()); }}, u); } @@ -494,7 +494,8 @@ struct Variable { /// Is this variable a global? bool isGlobal() const { - return std::visit([](const auto &x) { return x.isGlobal(); }, var); + return Fortran::common::visit([](const auto &x) { return x.isGlobal(); }, + var); } /// Is this a module or submodule variable? @@ -504,7 +505,7 @@ struct Variable { } const Fortran::semantics::Scope *getOwningScope() const { - return std::visit( + return Fortran::common::visit( common::visitors{ [](const Nominal &x) { return &x.symbol->GetUltimate().owner(); }, [](const AggregateStore &agg) { return &agg.getOwningScope(); }}, diff --git a/flang/include/flang/Lower/Support/Utils.h b/flang/include/flang/Lower/Support/Utils.h index e791f3dbb221a..1cc74521e22d8 100644 --- a/flang/include/flang/Lower/Support/Utils.h +++ b/flang/include/flang/Lower/Support/Utils.h @@ -69,7 +69,8 @@ static Fortran::lower::SomeExpr ignoreEvConvert(const A &x) { inline Fortran::lower::SomeExpr ignoreEvConvert(const Fortran::evaluate::Expr> &x) { - return std::visit([](const auto &v) { return ignoreEvConvert(v); }, x.u); + return Fortran::common::visit( + [](const auto &v) { return ignoreEvConvert(v); }, x.u); } /// Zip two containers of the same size together and flatten the pairs. `flatZip @@ -119,7 +120,8 @@ class HashEvaluateExpr { return 0u; } static unsigned getHashValue(const Fortran::evaluate::Subscript &x) { - return std::visit([&](const auto &v) { return getHashValue(v); }, x.u); + return Fortran::common::visit( + [&](const auto &v) { return getHashValue(v); }, x.u); } static unsigned getHashValue(const Fortran::evaluate::Triplet &x) { return getHashValue(x.lower()) - getHashValue(x.upper()) * 5u - @@ -154,7 +156,8 @@ class HashEvaluateExpr { return getHashValue(x.GetComponent()) * 13u; } static unsigned getHashValue(const Fortran::evaluate::DataRef &x) { - return std::visit([&](const auto &v) { return getHashValue(v); }, x.u); + return Fortran::common::visit( + [&](const auto &v) { return getHashValue(v); }, x.u); } static unsigned getHashValue(const Fortran::evaluate::ComplexPart &x) { return getHashValue(x.complex()) - static_cast(x.part()); @@ -247,8 +250,9 @@ class HashEvaluateExpr { return getHashValue(sym.get()); } static unsigned getHashValue(const Fortran::evaluate::Substring &x) { - return 61u * std::visit([&](const auto &p) { return getHashValue(p); }, - x.parent()) - + return 61u * + Fortran::common::visit( + [&](const auto &p) { return getHashValue(p); }, x.parent()) - getHashValue(x.lower()) - (getHashValue(x.lower()) + 1u); } static unsigned @@ -270,7 +274,8 @@ class HashEvaluateExpr { } static unsigned getHashValue(const Fortran::evaluate::ProcedureDesignator &x) { - return std::visit([&](const auto &v) { return getHashValue(v); }, x.u); + return Fortran::common::visit( + [&](const auto &v) { return getHashValue(v); }, x.u); } static unsigned getHashValue(const Fortran::evaluate::ProcedureRef &x) { unsigned args = 13u; @@ -321,15 +326,18 @@ class HashEvaluateExpr { } template static unsigned getHashValue(const Fortran::evaluate::Expr &x) { - return std::visit([&](const auto &v) { return getHashValue(v); }, x.u); + return Fortran::common::visit( + [&](const auto &v) { return getHashValue(v); }, x.u); } static unsigned getHashValue( const Fortran::evaluate::Relational &x) { - return std::visit([&](const auto &v) { return getHashValue(v); }, x.u); + return Fortran::common::visit( + [&](const auto &v) { return getHashValue(v); }, x.u); } template static unsigned getHashValue(const Fortran::evaluate::Designator &x) { - return std::visit([&](const auto &v) { return getHashValue(v); }, x.u); + return Fortran::common::visit( + [&](const auto &v) { return getHashValue(v); }, x.u); } template static unsigned @@ -378,7 +386,7 @@ class IsEqualEvaluateExpr { } static bool isEqual(const Fortran::evaluate::Subscript &x, const Fortran::evaluate::Subscript &y) { - return std::visit( + return Fortran::common::visit( [&](const auto &v, const auto &w) { return isEqual(v, w); }, x.u, y.u); } static bool isEqual(const Fortran::evaluate::Triplet &x, @@ -411,7 +419,7 @@ class IsEqualEvaluateExpr { } static bool isEqual(const Fortran::evaluate::DataRef &x, const Fortran::evaluate::DataRef &y) { - return std::visit( + return Fortran::common::visit( [&](const auto &v, const auto &w) { return isEqual(v, w); }, x.u, y.u); } static bool isEqual(const Fortran::evaluate::ComplexPart &x, @@ -499,7 +507,7 @@ class IsEqualEvaluateExpr { } static bool isEqual(const Fortran::evaluate::Substring &x, const Fortran::evaluate::Substring &y) { - return std::visit( + return Fortran::common::visit( [&](const auto &p, const auto &q) { return isEqual(p, q); }, x.parent(), y.parent()) && isEqual(x.lower(), y.lower()) && isEqual(x.upper(), y.upper()); @@ -529,7 +537,7 @@ class IsEqualEvaluateExpr { } static bool isEqual(const Fortran::evaluate::ProcedureDesignator &x, const Fortran::evaluate::ProcedureDesignator &y) { - return std::visit( + return Fortran::common::visit( [&](const auto &v, const auto &w) { return isEqual(v, w); }, x.u, y.u); } static bool isEqual(const Fortran::evaluate::ProcedureRef &x, @@ -591,19 +599,19 @@ class IsEqualEvaluateExpr { template static bool isEqual(const Fortran::evaluate::Expr &x, const Fortran::evaluate::Expr &y) { - return std::visit( + return Fortran::common::visit( [&](const auto &v, const auto &w) { return isEqual(v, w); }, x.u, y.u); } static bool isEqual(const Fortran::evaluate::Relational &x, const Fortran::evaluate::Relational &y) { - return std::visit( + return Fortran::common::visit( [&](const auto &v, const auto &w) { return isEqual(v, w); }, x.u, y.u); } template static bool isEqual(const Fortran::evaluate::Designator &x, const Fortran::evaluate::Designator &y) { - return std::visit( + return Fortran::common::visit( [&](const auto &v, const auto &w) { return isEqual(v, w); }, x.u, y.u); } template diff --git a/flang/include/flang/Optimizer/Support/Matcher.h b/flang/include/flang/Optimizer/Support/Matcher.h index da1d7c21f42c4..44672d3c0a072 100644 --- a/flang/include/flang/Optimizer/Support/Matcher.h +++ b/flang/include/flang/Optimizer/Support/Matcher.h @@ -13,6 +13,7 @@ #ifndef FORTRAN_OPTIMIZER_SUPPORT_MATCHER_H #define FORTRAN_OPTIMIZER_SUPPORT_MATCHER_H +#include "flang/Common/idioms.h" #include // Boilerplate CRTP class for a simplified type-casing syntactic sugar. This @@ -23,10 +24,10 @@ template struct matches : Ts... { using Ts::operator()...; }; template matches(Ts...) -> matches; template struct matcher { template auto match(Ts... ts) { - return std::visit(matches{ts...}, static_cast(this)->matchee()); + return Fortran::common::visit(matches{ts...}, static_cast(this)->matchee()); } template auto match(Ts... ts) const { - return std::visit(matches{ts...}, static_cast(this)->matchee()); + return Fortran::common::visit(matches{ts...}, static_cast(this)->matchee()); } }; // clang-format on diff --git a/flang/lib/Evaluate/intrinsics.cpp b/flang/lib/Evaluate/intrinsics.cpp index ace316174a892..1bba541e8e14a 100644 --- a/flang/lib/Evaluate/intrinsics.cpp +++ b/flang/lib/Evaluate/intrinsics.cpp @@ -2936,7 +2936,7 @@ static bool CheckForNonPositiveValues(FoldingContext &context, if (arg.Rank() > 0) { if (const Expr *expr{arg.UnwrapExpr()}) { if (const auto *intExpr{std::get_if>(&expr->u)}) { - std::visit( + Fortran::common::visit( [&](const auto &kindExpr) { using IntType = typename std::decay_t::Result; if (const auto *constArray{ diff --git a/flang/lib/Lower/Allocatable.cpp b/flang/lib/Lower/Allocatable.cpp index 068f5d25967c9..77e02898ac9fb 100644 --- a/flang/lib/Lower/Allocatable.cpp +++ b/flang/lib/Lower/Allocatable.cpp @@ -350,10 +350,10 @@ class AllocateStmtHelper { void visitAllocateOptions() { for (const auto &allocOption : std::get>(stmt.t)) - std::visit( + Fortran::common::visit( Fortran::common::visitors{ [&](const Fortran::parser::StatOrErrmsg &statOrErr) { - std::visit( + Fortran::common::visit( Fortran::common::visitors{ [&](const Fortran::parser::StatVariable &statVar) { statExpr = Fortran::semantics::GetExpr(statVar); @@ -898,15 +898,16 @@ void Fortran::lower::genDeallocateStmt( const Fortran::lower::SomeExpr *errMsgExpr = nullptr; for (const Fortran::parser::StatOrErrmsg &statOrErr : std::get>(stmt.t)) - std::visit(Fortran::common::visitors{ - [&](const Fortran::parser::StatVariable &statVar) { - statExpr = Fortran::semantics::GetExpr(statVar); - }, - [&](const Fortran::parser::MsgVariable &errMsgVar) { - errMsgExpr = Fortran::semantics::GetExpr(errMsgVar); - }, - }, - statOrErr.u); + Fortran::common::visit( + Fortran::common::visitors{ + [&](const Fortran::parser::StatVariable &statVar) { + statExpr = Fortran::semantics::GetExpr(statVar); + }, + [&](const Fortran::parser::MsgVariable &errMsgVar) { + errMsgExpr = Fortran::semantics::GetExpr(errMsgVar); + }, + }, + statOrErr.u); ErrorManager errorManager; errorManager.init(converter, loc, statExpr, errMsgExpr); fir::FirOpBuilder &builder = converter.getFirOpBuilder(); diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp index 24a57812ba104..423c418889a00 100644 --- a/flang/lib/Lower/Bridge.cpp +++ b/flang/lib/Lower/Bridge.cpp @@ -302,7 +302,7 @@ class FirConverter : public Fortran::lower::AbstractConverter { bool hasMainProgram = false; const Fortran::semantics::Symbol *globalOmpRequiresSymbol = nullptr; for (Fortran::lower::pft::Program::Units &u : pft.getUnits()) { - std::visit( + Fortran::common::visit( Fortran::common::visitors{ [&](Fortran::lower::pft::FunctionLikeUnit &f) { if (f.isMainProgram()) @@ -336,7 +336,7 @@ class FirConverter : public Fortran::lower::AbstractConverter { // Primary translation pass. for (Fortran::lower::pft::Program::Units &u : pft.getUnits()) { - std::visit( + Fortran::common::visit( Fortran::common::visitors{ [&](Fortran::lower::pft::FunctionLikeUnit &f) { lowerFunc(f); }, [&](Fortran::lower::pft::ModuleLikeUnit &m) { lowerMod(m); }, @@ -2062,7 +2062,7 @@ class FirConverter : public Fortran::lower::AbstractConverter { handleLocalitySpecs(info); for (const auto *dir : dirs) { - std::visit( + Fortran::common::visit( Fortran::common::visitors{ [&](const Fortran::parser::CompilerDirective::VectorAlways &d) { addLoopAnnotationAttr(info); }, @@ -2433,7 +2433,7 @@ class FirConverter : public Fortran::lower::AbstractConverter { } void genFIR(const Fortran::parser::ForallAssignmentStmt &stmt) { - std::visit([&](const auto &x) { genFIR(x); }, stmt.u); + Fortran::common::visit([&](const auto &x) { genFIR(x); }, stmt.u); } void genFIR(const Fortran::parser::EndForallStmt &) { @@ -2494,7 +2494,7 @@ class FirConverter : public Fortran::lower::AbstractConverter { forall.t)); for (const Fortran::parser::ForallBodyConstruct &s : std::get>(forall.t)) { - std::visit( + Fortran::common::visit( Fortran::common::visitors{ [&](const Fortran::parser::WhereConstruct &b) { genFIR(b); }, [&](const Fortran::common::Indirection< @@ -2617,7 +2617,7 @@ class FirConverter : public Fortran::lower::AbstractConverter { void genFIR(const Fortran::parser::CompilerDirective &dir) { Fortran::lower::pft::Evaluation &eval = getEval(); - std::visit( + Fortran::common::visit( Fortran::common::visitors{ [&](const Fortran::parser::CompilerDirective::VectorAlways &) { attachDirectiveToLoop(dir, &eval); @@ -3198,7 +3198,7 @@ class FirConverter : public Fortran::lower::AbstractConverter { const auto &rank = std::get( rankCaseStmt->t); assert(e->block && "missing SelectRankCaseStmt block"); - std::visit( + Fortran::common::visit( Fortran::common::visitors{ [&](const Fortran::parser::ScalarIntConstantExpr &rankExpr) { blockList.emplace_back(e->block); @@ -3229,9 +3229,9 @@ class FirConverter : public Fortran::lower::AbstractConverter { "selector should not yet be set"); Fortran::lower::StatementContext &stmtCtx = activeConstructStack.back().stmtCtx; - const Fortran::lower::SomeExpr *selectorExpr = - std::visit([](const auto &x) { return Fortran::semantics::GetExpr(x); }, - std::get(selectRankStmt.t).u); + const Fortran::lower::SomeExpr *selectorExpr = Fortran::common::visit( + [](const auto &x) { return Fortran::semantics::GetExpr(x); }, + std::get(selectRankStmt.t).u); assert(selectorExpr && "failed to retrieve selector expr"); hlfir::Entity selector = Fortran::lower::convertExprToHLFIR( loc, *this, *selectorExpr, localSymbols, stmtCtx); @@ -3663,7 +3663,7 @@ class FirConverter : public Fortran::lower::AbstractConverter { Fortran::parser::Label errLabel{}; bool hasIostat{}; for (const auto &spec : specList) { - std::visit( + Fortran::common::visit( Fortran::common::visitors{ [&](const Fortran::parser::EndLabel &label) { endLabel = label.v; @@ -4373,7 +4373,7 @@ class FirConverter : public Fortran::lower::AbstractConverter { void genAssignment(const Fortran::evaluate::Assignment &assign) { mlir::Location loc = toLocation(); if (lowerToHighLevelFIR()) { - std::visit( + Fortran::common::visit( Fortran::common::visitors{ [&](const Fortran::evaluate::Assignment::Intrinsic &) { genDataAssignment(assign, /*userDefinedAssignment=*/nullptr); @@ -4401,7 +4401,7 @@ class FirConverter : public Fortran::lower::AbstractConverter { explicitIterSpace.genLoopNest(); } Fortran::lower::StatementContext stmtCtx; - std::visit( + Fortran::common::visit( Fortran::common::visitors{ // [1] Plain old assignment. [&](const Fortran::evaluate::Assignment::Intrinsic &) { @@ -4670,7 +4670,7 @@ class FirConverter : public Fortran::lower::AbstractConverter { } } void genFIR(const Fortran::parser::WhereBodyConstruct &body) { - std::visit( + Fortran::common::visit( Fortran::common::visitors{ [&](const Fortran::parser::Statement< Fortran::parser::AssignmentStmt> &stmt) { @@ -5386,18 +5386,19 @@ class FirConverter : public Fortran::lower::AbstractConverter { // The intrinsic module scope, if present, is the first scope. const Fortran::semantics::Scope *intrinsicModuleScope = nullptr; for (Fortran::lower::pft::Program::Units &u : pft.getUnits()) { - std::visit(Fortran::common::visitors{ - [&](Fortran::lower::pft::FunctionLikeUnit &f) { - intrinsicModuleScope = &f.getScope().parent(); - }, - [&](Fortran::lower::pft::ModuleLikeUnit &m) { - intrinsicModuleScope = &m.getScope().parent(); - }, - [&](Fortran::lower::pft::BlockDataUnit &b) {}, - [&](Fortran::lower::pft::CompilerDirectiveUnit &d) {}, - [&](Fortran::lower::pft::OpenACCDirectiveUnit &d) {}, - }, - u); + Fortran::common::visit( + Fortran::common::visitors{ + [&](Fortran::lower::pft::FunctionLikeUnit &f) { + intrinsicModuleScope = &f.getScope().parent(); + }, + [&](Fortran::lower::pft::ModuleLikeUnit &m) { + intrinsicModuleScope = &m.getScope().parent(); + }, + [&](Fortran::lower::pft::BlockDataUnit &b) {}, + [&](Fortran::lower::pft::CompilerDirectiveUnit &d) {}, + [&](Fortran::lower::pft::OpenACCDirectiveUnit &d) {}, + }, + u); if (intrinsicModuleScope) { while (!intrinsicModuleScope->IsGlobal()) intrinsicModuleScope = &intrinsicModuleScope->parent(); @@ -5531,7 +5532,7 @@ class FirConverter : public Fortran::lower::AbstractConverter { analyzeExplicitSpace(lhs); analyzeExplicitSpace(rhs); }; - std::visit( + Fortran::common::visit( Fortran::common::visitors{ [&](const Fortran::evaluate::ProcedureRef &procRef) { // Ensure the procRef expressions are the one being visited. @@ -5549,7 +5550,8 @@ class FirConverter : public Fortran::lower::AbstractConverter { explicitIterSpace.endAssign(); } void analyzeExplicitSpace(const Fortran::parser::ForallAssignmentStmt &stmt) { - std::visit([&](const auto &s) { analyzeExplicitSpace(s); }, stmt.u); + Fortran::common::visit([&](const auto &s) { analyzeExplicitSpace(s); }, + stmt.u); } void analyzeExplicitSpace(const Fortran::parser::AssignmentStmt &s) { analyzeExplicitSpace(s.typedAssignment->v.operator->()); @@ -5594,13 +5596,14 @@ class FirConverter : public Fortran::lower::AbstractConverter { analyzeExplicitSpace(e); } void analyzeExplicitSpace(const Fortran::parser::WhereBodyConstruct &body) { - std::visit(Fortran::common::visitors{ - [&](const Fortran::common::Indirection< - Fortran::parser::WhereConstruct> &wc) { - analyzeExplicitSpace(wc.value()); - }, - [&](const auto &s) { analyzeExplicitSpace(s.statement); }}, - body.u); + Fortran::common::visit( + Fortran::common::visitors{ + [&](const Fortran::common::Indirection< + Fortran::parser::WhereConstruct> &wc) { + analyzeExplicitSpace(wc.value()); + }, + [&](const auto &s) { analyzeExplicitSpace(s.statement); }}, + body.u); } void analyzeExplicitSpace(const Fortran::parser::MaskedElsewhereStmt &stmt) { const Fortran::lower::SomeExpr *exp = Fortran::semantics::GetExpr( @@ -5651,16 +5654,17 @@ class FirConverter : public Fortran::lower::AbstractConverter { .statement); for (const Fortran::parser::ForallBodyConstruct &s : std::get>(forall.t)) { - std::visit(Fortran::common::visitors{ - [&](const Fortran::common::Indirection< - Fortran::parser::ForallConstruct> &b) { - analyzeExplicitSpace(b.value()); - }, - [&](const Fortran::parser::WhereConstruct &w) { - analyzeExplicitSpace(w); - }, - [&](const auto &b) { analyzeExplicitSpace(b.statement); }}, - s.u); + Fortran::common::visit( + Fortran::common::visitors{ + [&](const Fortran::common::Indirection< + Fortran::parser::ForallConstruct> &b) { + analyzeExplicitSpace(b.value()); + }, + [&](const Fortran::parser::WhereConstruct &w) { + analyzeExplicitSpace(w); + }, + [&](const auto &b) { analyzeExplicitSpace(b.statement); }}, + s.u); } analyzeExplicitSpacePop(); } @@ -5715,7 +5719,7 @@ class FirConverter : public Fortran::lower::AbstractConverter { std::string getConstantExprManglePrefix(mlir::Location loc, const Fortran::lower::SomeExpr &expr, mlir::Type eleTy) { - return std::visit( + return Fortran::common::visit( [&](const auto &x) -> std::string { using T = std::decay_t; if constexpr (Fortran::common::HasMember< @@ -5730,7 +5734,7 @@ class FirConverter : public Fortran::lower::AbstractConverter { fir::emitFatalError(loc, "non a constant derived type expression"); } else { - return std::visit( + return Fortran::common::visit( [&](const auto &someKind) -> std::string { using T = std::decay_t; using TK = Fortran::evaluate::Type(pair); - std::visit( + Fortran::common::visit( Fortran::common::visitors{ [&](const auto &dummy) { const auto &entity = getDataObjectEntity(std::get<1>(pair)); @@ -877,7 +877,7 @@ class Fortran::lower::CallInterfaceImpl { for (auto pair : llvm::zip(procedure.dummyArguments, argumentEntities)) { const Fortran::evaluate::characteristics::DummyArgument &argCharacteristics = std::get<0>(pair); - std::visit( + Fortran::common::visit( Fortran::common::visitors{ [&](const Fortran::evaluate::characteristics::DummyDataObject &dummy) { diff --git a/flang/lib/Lower/ComponentPath.cpp b/flang/lib/Lower/ComponentPath.cpp index d20ea23153102..5bdbca6062e6d 100644 --- a/flang/lib/Lower/ComponentPath.cpp +++ b/flang/lib/Lower/ComponentPath.cpp @@ -36,7 +36,7 @@ void Fortran::lower::ComponentPath::clear() { bool Fortran::lower::isRankedArrayAccess(const Fortran::evaluate::ArrayRef &x) { for (const Fortran::evaluate::Subscript &sub : x.subscript()) { - if (std::visit( + if (Fortran::common::visit( Fortran::common::visitors{ [&](const Fortran::evaluate::Triplet &) { return true; }, [&](const Fortran::evaluate::IndirectSubscriptIntegerExpr &e) { diff --git a/flang/lib/Lower/ConvertArrayConstructor.cpp b/flang/lib/Lower/ConvertArrayConstructor.cpp index 341fad9a5e43c..3c43cd20eb080 100644 --- a/flang/lib/Lower/ConvertArrayConstructor.cpp +++ b/flang/lib/Lower/ConvertArrayConstructor.cpp @@ -438,7 +438,7 @@ class ArrayCtorLoweringStrategy { void pushValue(mlir::Location loc, fir::FirOpBuilder &builder, hlfir::Entity value) { - return std::visit( + return Fortran::common::visit( [&](auto &impl) { return impl.pushValue(loc, builder, value); }, implVariant); } @@ -446,7 +446,7 @@ class ArrayCtorLoweringStrategy { mlir::Value startImpliedDo(mlir::Location loc, fir::FirOpBuilder &builder, mlir::Value lower, mlir::Value upper, mlir::Value stride) { - return std::visit( + return Fortran::common::visit( [&](auto &impl) { return impl.startImpliedDo(loc, builder, lower, upper, stride); }, @@ -455,13 +455,13 @@ class ArrayCtorLoweringStrategy { hlfir::Entity finishArrayCtorLowering(mlir::Location loc, fir::FirOpBuilder &builder) { - return std::visit( + return Fortran::common::visit( [&](auto &impl) { return impl.finishArrayCtorLowering(loc, builder); }, implVariant); } void startImpliedDoScope(llvm::StringRef doName, mlir::Value indexValue) { - std::visit( + Fortran::common::visit( [&](auto &impl) { return impl.startImpliedDoScope(doName, indexValue); }, @@ -469,8 +469,8 @@ class ArrayCtorLoweringStrategy { } void endImpliedDoScope() { - std::visit([&](auto &impl) { return impl.endImpliedDoScope(); }, - implVariant); + Fortran::common::visit([&](auto &impl) { return impl.endImpliedDoScope(); }, + implVariant); } private: @@ -612,16 +612,17 @@ ArrayCtorAnalysis::ArrayCtorAnalysis( arrayValueListStack.pop_back_val(); for (const Fortran::evaluate::ArrayConstructorValue &acValue : *currentArrayValueList) - std::visit(Fortran::common::visitors{ - [&](const Fortran::evaluate::ImpliedDo &impledDo) { - arrayValueListStack.push_back(&impledDo.values()); - localNumberOfImpliedDo++; - }, - [&](const Fortran::evaluate::Expr &expr) { - localNumberOfExpr++; - anyArrayExpr = anyArrayExpr || expr.Rank() > 0; - }}, - acValue.u); + Fortran::common::visit( + Fortran::common::visitors{ + [&](const Fortran::evaluate::ImpliedDo &impledDo) { + arrayValueListStack.push_back(&impledDo.values()); + localNumberOfImpliedDo++; + }, + [&](const Fortran::evaluate::Expr &expr) { + localNumberOfExpr++; + anyArrayExpr = anyArrayExpr || expr.Rank() > 0; + }}, + acValue.u); anyImpliedDo = anyImpliedDo || localNumberOfImpliedDo > 0; if (localNumberOfImpliedDo == 0) { @@ -765,7 +766,7 @@ static void genAcValue(mlir::Location loc, impliedDoIndexValue); for (const auto &acValue : impledDo.values()) - std::visit( + Fortran::common::visit( [&](const auto &x) { genAcValue(loc, converter, x, symMap, stmtCtx, arrayBuilder); }, @@ -787,7 +788,7 @@ hlfir::EntityWithAttributes Fortran::lower::ArrayConstructorBuilder::gen( loc, converter, arrayCtorExpr, symMap, stmtCtx); // Run the array lowering strategy through the ac-values. for (const auto &acValue : arrayCtorExpr) - std::visit( + Fortran::common::visit( [&](const auto &x) { genAcValue(loc, converter, x, symMap, stmtCtx, arrayBuilder); }, diff --git a/flang/lib/Lower/ConvertCall.cpp b/flang/lib/Lower/ConvertCall.cpp index b1dc41f3ca838..65a2ffbea5dd1 100644 --- a/flang/lib/Lower/ConvertCall.cpp +++ b/flang/lib/Lower/ConvertCall.cpp @@ -935,7 +935,8 @@ struct CallCleanUp { mlir::Value mustFree; }; void genCleanUp(mlir::Location loc, fir::FirOpBuilder &builder) { - std::visit([&](auto &c) { c.genCleanUp(loc, builder); }, cleanUp); + Fortran::common::visit([&](auto &c) { c.genCleanUp(loc, builder); }, + cleanUp); } std::variant cleanUp; }; diff --git a/flang/lib/Lower/ConvertConstant.cpp b/flang/lib/Lower/ConvertConstant.cpp index a4ace40a3a1c4..3361817ee27ee 100644 --- a/flang/lib/Lower/ConvertConstant.cpp +++ b/flang/lib/Lower/ConvertConstant.cpp @@ -105,7 +105,7 @@ class DenseGlobalBuilder { const Fortran::lower::SomeExpr &initExpr, cuf::DataAttributeAttr dataAttr) { DenseGlobalBuilder globalBuilder; - std::visit( + Fortran::common::visit( Fortran::common::visitors{ [&](const Fortran::evaluate::Expr & x) { globalBuilder.tryConvertingToAttributes(builder, x); }, @@ -164,7 +164,7 @@ class DenseGlobalBuilder { template void tryConvertingToAttributes(fir::FirOpBuilder &builder, const Fortran::evaluate::Expr &expr) { - std::visit( + Fortran::common::visit( [&](const auto &x) { using TR = Fortran::evaluate::ResultType; if (const auto *constant = @@ -796,7 +796,7 @@ static fir::ExtendedValue genConstantValue(Fortran::lower::AbstractConverter &converter, mlir::Location loc, const Fortran::lower::SomeExpr &constantExpr) { - return std::visit( + return Fortran::common::visit( [&](const auto &x) -> fir::ExtendedValue { using T = std::decay_t; if constexpr (Fortran::common::HasMember< @@ -805,7 +805,7 @@ genConstantValue(Fortran::lower::AbstractConverter &converter, Fortran::common::TypeCategory::Derived) { return genConstantValue(converter, loc, x); } else { - return std::visit( + return Fortran::common::visit( [&](const auto &preciseKind) { return genConstantValue(converter, loc, preciseKind); }, diff --git a/flang/lib/Lower/ConvertExpr.cpp b/flang/lib/Lower/ConvertExpr.cpp index 9567685aa3d2e..9937e9d159886 100644 --- a/flang/lib/Lower/ConvertExpr.cpp +++ b/flang/lib/Lower/ConvertExpr.cpp @@ -398,8 +398,8 @@ static bool isParenthesizedVariable(const Fortran::evaluate::Expr &expr) { return Fortran::evaluate::IsVariable(parentheses->left()); return false; } else { - return std::visit([&](const auto &x) { return isParenthesizedVariable(x); }, - expr.u); + return Fortran::common::visit( + [&](const auto &x) { return isParenthesizedVariable(x); }, expr.u); } } @@ -646,7 +646,7 @@ isOptimizableTranspose(Fortran::evaluate::Expr expr, if (!isTransposeOptEnabled(converter)) return false; - return std::visit( + return Fortran::common::visit( [&](const auto &e) { return isOptimizableTranspose(e, converter); }, expr.u); } @@ -696,7 +696,7 @@ class ScalarExprLowering { // - result of NULL() or NULL(MOLD) intrinsic. // NULL() requires some context to be lowered, so it is not handled // here and must be lowered according to the context where it appears. - ExtValue exv = std::visit( + ExtValue exv = Fortran::common::visit( [&](const auto &x) { return genMutableBoxValueImpl(x); }, expr.u); const fir::MutableBoxValue *mutableBox = exv.getBoxOf(); @@ -737,7 +737,7 @@ class ScalarExprLowering { template ExtValue genMutableBoxValueImpl(const Fortran::evaluate::Designator &designator) { - return std::visit( + return Fortran::common::visit( Fortran::common::visitors{ [&](const Fortran::evaluate::SymbolRef &sym) -> ExtValue { return converter.getSymbolExtendedValue(*sym, &symMap); @@ -754,8 +754,8 @@ class ScalarExprLowering { template ExtValue genMutableBoxValueImpl(const Fortran::evaluate::Expr &expr) { - return std::visit([&](const auto &x) { return genMutableBoxValueImpl(x); }, - expr.u); + return Fortran::common::visit( + [&](const auto &x) { return genMutableBoxValueImpl(x); }, expr.u); } mlir::Location getLoc() { return location; } @@ -1222,7 +1222,8 @@ class ScalarExprLowering { ExtValue genval(const Fortran::evaluate::Relational &op) { - return std::visit([&](const auto &x) { return genval(x); }, op.u); + return Fortran::common::visit([&](const auto &x) { return genval(x); }, + op.u); } template &list) { if (!getLastSym(cmpt).test(Fortran::semantics::Symbol::Flag::ParentComp)) list.push_front(&cmpt); - return std::visit( + return Fortran::common::visit( Fortran::common::visitors{ [&](const Fortran::evaluate::Component &x) { if (Fortran::semantics::IsAllocatableOrPointer(getLastSym(x))) @@ -1713,11 +1716,12 @@ class ScalarExprLowering { template ExtValue gen(const Fortran::evaluate::Designator &des) { - return std::visit([&](const auto &x) { return gen(x); }, des.u); + return Fortran::common::visit([&](const auto &x) { return gen(x); }, des.u); } template ExtValue genval(const Fortran::evaluate::Designator &des) { - return std::visit([&](const auto &x) { return genval(x); }, des.u); + return Fortran::common::visit([&](const auto &x) { return genval(x); }, + des.u); } mlir::Type genType(const Fortran::evaluate::DynamicType &dt) { @@ -2900,8 +2904,8 @@ class ScalarExprLowering { } template bool isTransformationalRef(Fortran::evaluate::Expr expr) { - return std::visit([&](const auto &e) { return isTransformationalRef(e); }, - expr.u); + return Fortran::common::visit( + [&](const auto &e) { return isTransformationalRef(e); }, expr.u); } template @@ -2914,11 +2918,13 @@ class ScalarExprLowering { /// value, so it may be possible to avoid making a temporary. template ExtValue asArrayArg(const Fortran::evaluate::Expr &x) { - return std::visit([&](const auto &e) { return asArrayArg(e, x); }, x.u); + return Fortran::common::visit( + [&](const auto &e) { return asArrayArg(e, x); }, x.u); } template ExtValue asArrayArg(const Fortran::evaluate::Expr &x, const B &y) { - return std::visit([&](const auto &e) { return asArrayArg(e, y); }, x.u); + return Fortran::common::visit( + [&](const auto &e) { return asArrayArg(e, y); }, x.u); } template ExtValue asArrayArg(const Fortran::evaluate::Designator &, const B &x) { @@ -2956,7 +2962,8 @@ class ScalarExprLowering { if (isScalar(x) || Fortran::evaluate::UnwrapWholeSymbolOrComponentDataRef(x) || (isTransformationalRef(x) && !isOptimizableTranspose(x, converter))) - return std::visit([&](const auto &e) { return genref(e); }, x.u); + return Fortran::common::visit([&](const auto &e) { return genref(e); }, + x.u); if (useBoxArg) return asArrayArg(x); return asArray(x); @@ -2967,7 +2974,8 @@ class ScalarExprLowering { return val; if (isScalar(x) || Fortran::evaluate::UnwrapWholeSymbolDataRef(x) || inInitializer) - return std::visit([&](const auto &e) { return genval(e); }, x.u); + return Fortran::common::visit([&](const auto &e) { return genval(e); }, + x.u); return asArray(x); } @@ -2976,7 +2984,8 @@ class ScalarExprLowering { Fortran::common::TypeCategory::Logical, KIND>> &exp) { if (mlir::Value val = getIfOverridenExpr(exp)) return val; - return std::visit([&](const auto &e) { return genval(e); }, exp.u); + return Fortran::common::visit([&](const auto &e) { return genval(e); }, + exp.u); } using RefSet = @@ -3462,7 +3471,7 @@ class ArrayExprLowering { ExtValue lowerBoxedArrayExpr(const Fortran::lower::SomeExpr &exp) { PushSemantics(ConstituentSemantics::BoxValue); - return std::visit( + return Fortran::common::visit( [&](const auto &e) { auto f = genarr(e); ExtValue exv = f(IterationSpace{}); @@ -3824,28 +3833,29 @@ class ArrayExprLowering { fir::factory::getExtents(loc, builder, exv); mlir::Value one = builder.createIntegerConstant(loc, idxTy, 1); for (auto ss : llvm::enumerate(x.subscript())) { - std::visit(Fortran::common::visitors{ - [&](const Fortran::evaluate::Triplet &trip) { - // For a subscript of triple notation, we compute the - // range of this dimension of the iteration space. - auto lo = [&]() { - if (auto optLo = trip.lower()) - return fir::getBase(asScalar(*optLo)); - return getLBound(exv, ss.index(), one); - }(); - auto hi = [&]() { - if (auto optHi = trip.upper()) - return fir::getBase(asScalar(*optHi)); - return getUBound(exv, ss.index(), one); - }(); - auto step = builder.createConvert( - loc, idxTy, fir::getBase(asScalar(trip.stride()))); - auto extent = builder.genExtentFromTriplet(loc, lo, hi, - step, idxTy); - destShape.push_back(extent); - }, - [&](auto) {}}, - ss.value().u); + Fortran::common::visit( + Fortran::common::visitors{ + [&](const Fortran::evaluate::Triplet &trip) { + // For a subscript of triple notation, we compute the + // range of this dimension of the iteration space. + auto lo = [&]() { + if (auto optLo = trip.lower()) + return fir::getBase(asScalar(*optLo)); + return getLBound(exv, ss.index(), one); + }(); + auto hi = [&]() { + if (auto optHi = trip.upper()) + return fir::getBase(asScalar(*optHi)); + return getUBound(exv, ss.index(), one); + }(); + auto step = builder.createConvert( + loc, idxTy, fir::getBase(asScalar(trip.stride()))); + auto extent = + builder.genExtentFromTriplet(loc, lo, hi, step, idxTy); + destShape.push_back(extent); + }, + [&](auto) {}}, + ss.value().u); } return true; } @@ -3855,8 +3865,8 @@ class ArrayExprLowering { return genShapeFromDataRef(x.GetComponent()); } bool genShapeFromDataRef(const Fortran::evaluate::DataRef &x) { - return std::visit([&](const auto &v) { return genShapeFromDataRef(v); }, - x.u); + return Fortran::common::visit( + [&](const auto &v) { return genShapeFromDataRef(v); }, x.u); } /// When in an explicit space, the ranked component must be evaluated to @@ -3890,7 +3900,7 @@ class ArrayExprLowering { TODO(getLoc(), "polymorphic array expression lowering with vector subscript"); - return std::visit( + return Fortran::common::visit( [&](const auto &e) { return lowerArrayExpression(genarr(e), resTy); }, exp.u); } @@ -5012,10 +5022,12 @@ class ArrayExprLowering { LLVM_DEBUG(Fortran::lower::DumpEvaluateExpr::dump(llvm::dbgs(), x)); if (isArray(x) || (explicitSpaceIsActive() && isLeftHandSide()) || isElementalProcWithArrayArgs(x)) - return std::visit([&](const auto &e) { return genarr(e); }, x.u); + return Fortran::common::visit([&](const auto &e) { return genarr(e); }, + x.u); if (explicitSpaceIsActive()) { assert(!isArray(x) && !isLeftHandSide()); - auto cc = std::visit([&](const auto &e) { return genarr(e); }, x.u); + auto cc = + Fortran::common::visit([&](const auto &e) { return genarr(e); }, x.u); auto result = cc(IterationSpace{}); return [=](IterSpace) { return result; }; } @@ -5289,7 +5301,8 @@ class ArrayExprLowering { static Fortran::lower::SomeExpr ignoreEvConvert(const Fortran::evaluate::Expr> &x) { - return std::visit([&](const auto &v) { return ignoreEvConvert(v); }, x.u); + return Fortran::common::visit( + [&](const auto &v) { return ignoreEvConvert(v); }, x.u); } template static Fortran::lower::SomeExpr ignoreEvConvert( @@ -5310,8 +5323,8 @@ class ArrayExprLowering { template static const Fortran::semantics::Symbol * extractSubscriptSymbol(const Fortran::evaluate::Expr &x) { - return std::visit([&](const auto &v) { return extractSubscriptSymbol(v); }, - x.u); + return Fortran::common::visit( + [&](const auto &v) { return extractSubscriptSymbol(v); }, x.u); } template static const Fortran::semantics::Symbol * @@ -5420,7 +5433,7 @@ class ArrayExprLowering { std::size_t shapeIndex = 0; for (auto sub : llvm::enumerate(x.subscript())) { const std::size_t subsIndex = sub.index(); - std::visit( + Fortran::common::visit( Fortran::common::visitors{ [&](const Fortran::evaluate::Triplet &t) { mlir::Value lowerBound; @@ -6034,8 +6047,8 @@ class ArrayExprLowering { /// Substrings (see 9.4.1) CC genarr(const Fortran::evaluate::Substring &x, ComponentPath &components) { components.substring = &x; - return std::visit([&](const auto &v) { return genarr(v, components); }, - x.parent()); + return Fortran::common::visit( + [&](const auto &v) { return genarr(v, components); }, x.parent()); } template @@ -6333,7 +6346,7 @@ class ArrayExprLowering { stmtCtx.pushScope(); std::optional charLen; for (const Fortran::evaluate::ArrayConstructorValue &acv : x.values()) { - auto [exv, copyNeeded] = std::visit( + auto [exv, copyNeeded] = Fortran::common::visit( [&](const auto &v) { return genArrayCtorInitializer(v, resTy, mem, buffPos, buffSize, stmtCtx); @@ -6417,7 +6430,7 @@ class ArrayExprLowering { // Populate the buffer with the elements, growing as necessary. std::optional charLen; for (const auto &expr : x) { - auto [exv, copyNeeded] = std::visit( + auto [exv, copyNeeded] = Fortran::common::visit( [&](const auto &e) { return genArrayCtorInitializer(e, resTy, mem, buffPos, buffSize, stmtCtx); @@ -6582,22 +6595,24 @@ class ArrayExprLowering { } CC genarr( const Fortran::evaluate::Relational &r) { - return std::visit([&](const auto &x) { return genarr(x); }, r.u); + return Fortran::common::visit([&](const auto &x) { return genarr(x); }, + r.u); } template CC genarr(const Fortran::evaluate::Designator &des) { ComponentPath components(des.Rank() > 0); - return std::visit([&](const auto &x) { return genarr(x, components); }, - des.u); + return Fortran::common::visit( + [&](const auto &x) { return genarr(x, components); }, des.u); } /// Is the path component rank > 0? static bool ranked(const PathComponent &x) { - return std::visit(Fortran::common::visitors{ - [](const ImplicitSubscripts &) { return false; }, - [](const auto *v) { return v->Rank() > 0; }}, - x); + return Fortran::common::visit( + Fortran::common::visitors{ + [](const ImplicitSubscripts &) { return false; }, + [](const auto *v) { return v->Rank() > 0; }}, + x); } void extendComponent(Fortran::lower::ComponentPath &component, @@ -6653,7 +6668,7 @@ class ArrayExprLowering { : nextPathSemantics()); unsigned index = 0; for (const auto &v : llvm::reverse(revPath)) { - std::visit( + Fortran::common::visit( Fortran::common::visitors{ [&](const ImplicitSubscripts &) { prefix = false; @@ -6678,7 +6693,7 @@ class ArrayExprLowering { unsigned ssIndex = 0u; llvm::SmallVector componentsToAdd; for (const auto &ss : x->subscript()) { - std::visit( + Fortran::common::visit( Fortran::common::visitors{ [&](const Fortran::evaluate:: IndirectSubscriptIntegerExpr &ie) { @@ -7099,8 +7114,8 @@ class ArrayExprLowering { } CC genarr(const Fortran::evaluate::DataRef &x, ComponentPath &components) { - return std::visit([&](const auto &v) { return genarr(v, components); }, - x.u); + return Fortran::common::visit( + [&](const auto &v) { return genarr(v, components); }, x.u); } bool pathIsEmpty(const ComponentPath &components) { @@ -7575,13 +7590,13 @@ void Fortran::lower::createArrayLoads( }; if (esp.lhsBases[counter]) { auto &base = *esp.lhsBases[counter]; - auto load = std::visit(genLoad, base); + auto load = Fortran::common::visit(genLoad, base); esp.initialArgs.push_back(load); esp.resetInnerArgs(); esp.bindLoad(base, load); } for (const auto &base : esp.rhsBases[counter]) - esp.bindLoad(base, std::visit(genLoad, base)); + esp.bindLoad(base, Fortran::common::visit(genLoad, base)); } void Fortran::lower::createArrayMergeStores( diff --git a/flang/lib/Lower/ConvertExprToHLFIR.cpp b/flang/lib/Lower/ConvertExprToHLFIR.cpp index 9035856eabfe7..1933f38f735b5 100644 --- a/flang/lib/Lower/ConvertExprToHLFIR.cpp +++ b/flang/lib/Lower/ConvertExprToHLFIR.cpp @@ -75,7 +75,7 @@ class HlfirDesignatorBuilder { hlfir::EntityWithAttributes gen(const CharacterDesignators &designatorVariant, bool vectorSubscriptDesignatorToValue = true) { - return std::visit( + return Fortran::common::visit( [&](const auto &x) -> hlfir::EntityWithAttributes { return genLeafPartRef(x, vectorSubscriptDesignatorToValue); }, @@ -88,7 +88,7 @@ class HlfirDesignatorBuilder { hlfir::EntityWithAttributes gen(const RealDesignators &designatorVariant, bool vectorSubscriptDesignatorToValue = true) { - return std::visit( + return Fortran::common::visit( [&](const auto &x) -> hlfir::EntityWithAttributes { return genLeafPartRef(x, vectorSubscriptDesignatorToValue); }, @@ -101,7 +101,7 @@ class HlfirDesignatorBuilder { hlfir::EntityWithAttributes gen(const OtherDesignators &designatorVariant, bool vectorSubscriptDesignatorToValue = true) { - return std::visit( + return Fortran::common::visit( [&](const auto &x) -> hlfir::EntityWithAttributes { return genLeafPartRef(x, vectorSubscriptDesignatorToValue); }, @@ -169,7 +169,7 @@ class HlfirDesignatorBuilder { fir::FortranVariableOpInterface gen(const Fortran::evaluate::DataRef &dataRef) { - return std::visit( + return Fortran::common::visit( Fortran::common::visitors{[&](const auto &x) { return gen(x); }}, dataRef.u); } @@ -364,7 +364,7 @@ class HlfirDesignatorBuilder { fir::FortranVariableOpInterface gen(const Fortran::evaluate::Substring &substring) { PartInfo partInfo; - mlir::Type baseStringType = std::visit( + mlir::Type baseStringType = Fortran::common::visit( [&](const auto &x) { return visit(x, partInfo); }, substring.parent()); assert(partInfo.typeParams.size() == 1 && "expect base string length"); // Compute the substring lower and upper bound. @@ -436,8 +436,8 @@ class HlfirDesignatorBuilder { mlir::Type visit(const Fortran::evaluate::DataRef &dataRef, PartInfo &partInfo) { - return std::visit([&](const auto &x) { return visit(x, partInfo); }, - dataRef.u); + return Fortran::common::visit( + [&](const auto &x) { return visit(x, partInfo); }, dataRef.u); } mlir::Type @@ -892,7 +892,7 @@ hlfir::EntityWithAttributes HlfirDesignatorBuilder::genDesignatorExpr( bool vectorSubscriptDesignatorToValue) { // Expr plumbing to unwrap Designator and call // gen(Designator.u). - return std::visit( + return Fortran::common::visit( [&](const auto &x) -> hlfir::EntityWithAttributes { using T = std::decay_t; if constexpr (Fortran::common::HasMember< @@ -904,7 +904,7 @@ hlfir::EntityWithAttributes HlfirDesignatorBuilder::genDesignatorExpr( .u, vectorSubscriptDesignatorToValue); } else { - return std::visit( + return Fortran::common::visit( [&](const auto &preciseKind) { using TK = typename std::decay_t::Result; @@ -1426,7 +1426,8 @@ class HlfirBuilder { return hlfir::EntityWithAttributes{match->second}; } } - return std::visit([&](const auto &x) { return gen(x); }, expr.u); + return Fortran::common::visit([&](const auto &x) { return gen(x); }, + expr.u); } private: @@ -1594,7 +1595,7 @@ class HlfirBuilder { hlfir::EntityWithAttributes gen(const Fortran::evaluate::Relational &op) { - return std::visit([&](const auto &x) { return gen(x); }, op.u); + return Fortran::common::visit([&](const auto &x) { return gen(x); }, op.u); } hlfir::EntityWithAttributes gen(const Fortran::evaluate::TypeParamInquiry &) { diff --git a/flang/lib/Lower/ConvertType.cpp b/flang/lib/Lower/ConvertType.cpp index e6557d7f0b767..f64f6c93541a3 100644 --- a/flang/lib/Lower/ConvertType.cpp +++ b/flang/lib/Lower/ConvertType.cpp @@ -212,7 +212,7 @@ struct TypeBuilderImpl { } mlir::Type genTypelessExprType(const Fortran::lower::SomeExpr &expr) { - return std::visit( + return Fortran::common::visit( Fortran::common::visitors{ [&](const Fortran::evaluate::BOZLiteralConstant &) -> mlir::Type { return mlir::NoneType::get(context); diff --git a/flang/lib/Lower/DirectivesCommon.h b/flang/lib/Lower/DirectivesCommon.h index 48b090f6d2dbe..f0af5f982c14f 100644 --- a/flang/lib/Lower/DirectivesCommon.h +++ b/flang/lib/Lower/DirectivesCommon.h @@ -836,7 +836,7 @@ struct PeelConvert { static Fortran::semantics::MaybeExpr visit_with_category( const Fortran::evaluate::Expr> &expr) { - return std::visit( + return Fortran::common::visit( [](auto &&s) { return visit_with_category(s); }, expr.u); } @@ -859,12 +859,12 @@ struct PeelConvert { static Fortran::semantics::MaybeExpr visit(const Fortran::evaluate::Expr> &expr) { - return std::visit([](auto &&s) { return visit_with_category(s); }, - expr.u); + return Fortran::common::visit( + [](auto &&s) { return visit_with_category(s); }, expr.u); } static Fortran::semantics::MaybeExpr visit(const Fortran::evaluate::Expr &expr) { - return std::visit([](auto &&s) { return visit(s); }, expr.u); + return Fortran::common::visit([](auto &&s) { return visit(s); }, expr.u); } template // static Fortran::semantics::MaybeExpr visit(const T &) { diff --git a/flang/lib/Lower/IO.cpp b/flang/lib/Lower/IO.cpp index 97ef991cb3990..9e98b230b676f 100644 --- a/flang/lib/Lower/IO.cpp +++ b/flang/lib/Lower/IO.cpp @@ -1388,7 +1388,7 @@ static void threadSpecs(Fortran::lower::AbstractConverter &converter, fir::FirOpBuilder &builder = converter.getFirOpBuilder(); for (const auto &spec : specList) { makeNextConditionalOn(builder, loc, checkResult, ok); - ok = std::visit( + ok = Fortran::common::visit( Fortran::common::visitors{ [&](const Fortran::parser::IoControlSpec::Size &x) -> mlir::Value { // Size must be queried after the related READ runtime calls, not @@ -1425,7 +1425,7 @@ ConditionSpecInfo lowerErrorSpec(Fortran::lower::AbstractConverter &converter, ConditionSpecInfo csi; const Fortran::lower::SomeExpr *ioMsgExpr = nullptr; for (const auto &spec : specList) { - std::visit( + Fortran::common::visit( Fortran::common::visitors{ [&](const Fortran::parser::StatVariable &var) { csi.ioStatExpr = Fortran::semantics::GetExpr(var); @@ -2397,7 +2397,7 @@ lowerIdExpr(Fortran::lower::AbstractConverter &converter, mlir::Location loc, const std::list &ispecs, Fortran::lower::StatementContext &stmtCtx) { for (const Fortran::parser::InquireSpec &spec : ispecs) - if (mlir::Value v = std::visit( + if (mlir::Value v = Fortran::common::visit( Fortran::common::visitors{ [&](const Fortran::parser::IdExpr &idExpr) { return fir::getBase(converter.genExprValue( @@ -2419,11 +2419,11 @@ static void threadInquire(Fortran::lower::AbstractConverter &converter, mlir::Value idExpr = lowerIdExpr(converter, loc, ispecs, stmtCtx); for (const Fortran::parser::InquireSpec &spec : ispecs) { makeNextConditionalOn(builder, loc, checkResult, ok); - ok = std::visit(Fortran::common::visitors{[&](const auto &x) { - return genInquireSpec(converter, loc, cookie, idExpr, x, - stmtCtx); - }}, - spec.u); + ok = Fortran::common::visit(Fortran::common::visitors{[&](const auto &x) { + return genInquireSpec(converter, loc, cookie, + idExpr, x, stmtCtx); + }}, + spec.u); } } diff --git a/flang/lib/Lower/IterationSpace.cpp b/flang/lib/Lower/IterationSpace.cpp index 6bf310b5cfb76..9303536403837 100644 --- a/flang/lib/Lower/IterationSpace.cpp +++ b/flang/lib/Lower/IterationSpace.cpp @@ -21,14 +21,14 @@ unsigned Fortran::lower::getHashValue( const Fortran::lower::ExplicitIterSpace::ArrayBases &x) { - return std::visit( + return Fortran::common::visit( [&](const auto *p) { return HashEvaluateExpr::getHashValue(*p); }, x); } bool Fortran::lower::isEqual( const Fortran::lower::ExplicitIterSpace::ArrayBases &x, const Fortran::lower::ExplicitIterSpace::ArrayBases &y) { - return std::visit( + return Fortran::common::visit( Fortran::common::visitors{ // Fortran::semantics::Symbol * are the exception here. These pointers // have identity; if two Symbol * values are the same (different) then @@ -169,7 +169,7 @@ class ArrayBaseFinder { } template RT find(const std::variant &u) { - return std::visit([&](const auto &v) { return find(v); }, u); + return Fortran::common::visit([&](const auto &v) { return find(v); }, u); } template RT find(const std::vector &x) { @@ -361,22 +361,23 @@ llvm::raw_ostream & Fortran::lower::operator<<(llvm::raw_ostream &s, const Fortran::lower::ExplicitIterSpace &e) { auto dump = [&](const auto &u) { - std::visit(Fortran::common::visitors{ - [&](const Fortran::semantics::Symbol *y) { - s << " " << *y << '\n'; - }, - [&](const Fortran::evaluate::ArrayRef *y) { - s << " "; - if (y->base().IsSymbol()) - s << y->base().GetFirstSymbol(); - else - s << y->base().GetComponent().GetLastSymbol(); - s << '\n'; - }, - [&](const Fortran::evaluate::Component *y) { - s << " " << y->GetLastSymbol() << '\n'; - }}, - u); + Fortran::common::visit( + Fortran::common::visitors{ + [&](const Fortran::semantics::Symbol *y) { + s << " " << *y << '\n'; + }, + [&](const Fortran::evaluate::ArrayRef *y) { + s << " "; + if (y->base().IsSymbol()) + s << y->base().GetFirstSymbol(); + else + s << y->base().GetComponent().GetLastSymbol(); + s << '\n'; + }, + [&](const Fortran::evaluate::Component *y) { + s << " " << y->GetLastSymbol() << '\n'; + }}, + u); }; s << "LHS bases:\n"; for (const std::optional &u : diff --git a/flang/lib/Lower/Mangler.cpp b/flang/lib/Lower/Mangler.cpp index 9a33be318a27d..878ba6dea49b6 100644 --- a/flang/lib/Lower/Mangler.cpp +++ b/flang/lib/Lower/Mangler.cpp @@ -110,7 +110,7 @@ std::string Fortran::lower::mangle::mangleName( return fir::NameUniquer::doVariable(modules, procs, blockId, symbolName); }; - return std::visit( + return Fortran::common::visit( Fortran::common::visitors{ [&](const Fortran::semantics::MainProgramDetails &) { return fir::NameUniquer::doProgramEntry().str(); diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp index 4f5da8fb70eba..166fa686cd883 100644 --- a/flang/lib/Lower/OpenACC.cpp +++ b/flang/lib/Lower/OpenACC.cpp @@ -46,14 +46,15 @@ static mlir::Location genOperandLocation(Fortran::lower::AbstractConverter &converter, const Fortran::parser::AccObject &accObject) { mlir::Location loc = converter.genUnknownLocation(); - std::visit(Fortran::common::visitors{ - [&](const Fortran::parser::Designator &designator) { - loc = converter.genLocation(designator.source); - }, - [&](const Fortran::parser::Name &name) { - loc = converter.genLocation(name.source); - }}, - accObject.u); + Fortran::common::visit( + Fortran::common::visitors{ + [&](const Fortran::parser::Designator &designator) { + loc = converter.genLocation(designator.source); + }, + [&](const Fortran::parser::Name &name) { + loc = converter.genLocation(name.source); + }}, + accObject.u); return loc; } @@ -297,8 +298,8 @@ genDataOperandOperations(const Fortran::parser::AccObjectList &objectList, std::stringstream asFortran; mlir::Location operandLocation = genOperandLocation(converter, accObject); Fortran::semantics::Symbol &symbol = getSymbolFromAccObject(accObject); - Fortran::semantics::MaybeExpr designator = - std::visit([&](auto &&s) { return ea.Analyze(s); }, accObject.u); + Fortran::semantics::MaybeExpr designator = Fortran::common::visit( + [&](auto &&s) { return ea.Analyze(s); }, accObject.u); Fortran::lower::AddrAndBoundsInfo info = Fortran::lower::gatherDataOperandAddrAndBounds< mlir::acc::DataBoundsOp, mlir::acc::DataBoundsType>( @@ -335,8 +336,8 @@ static void genDeclareDataOperandOperations( std::stringstream asFortran; mlir::Location operandLocation = genOperandLocation(converter, accObject); Fortran::semantics::Symbol &symbol = getSymbolFromAccObject(accObject); - Fortran::semantics::MaybeExpr designator = - std::visit([&](auto &&s) { return ea.Analyze(s); }, accObject.u); + Fortran::semantics::MaybeExpr designator = Fortran::common::visit( + [&](auto &&s) { return ea.Analyze(s); }, accObject.u); Fortran::lower::AddrAndBoundsInfo info = Fortran::lower::gatherDataOperandAddrAndBounds< mlir::acc::DataBoundsOp, mlir::acc::DataBoundsType>( @@ -790,8 +791,8 @@ genPrivatizations(const Fortran::parser::AccObjectList &objectList, std::stringstream asFortran; mlir::Location operandLocation = genOperandLocation(converter, accObject); Fortran::semantics::Symbol &symbol = getSymbolFromAccObject(accObject); - Fortran::semantics::MaybeExpr designator = - std::visit([&](auto &&s) { return ea.Analyze(s); }, accObject.u); + Fortran::semantics::MaybeExpr designator = Fortran::common::visit( + [&](auto &&s) { return ea.Analyze(s); }, accObject.u); Fortran::lower::AddrAndBoundsInfo info = Fortran::lower::gatherDataOperandAddrAndBounds< mlir::acc::DataBoundsOp, mlir::acc::DataBoundsType>( @@ -1364,8 +1365,8 @@ genReductions(const Fortran::parser::AccObjectListWithReduction &objectList, std::stringstream asFortran; mlir::Location operandLocation = genOperandLocation(converter, accObject); Fortran::semantics::Symbol &symbol = getSymbolFromAccObject(accObject); - Fortran::semantics::MaybeExpr designator = - std::visit([&](auto &&s) { return ea.Analyze(s); }, accObject.u); + Fortran::semantics::MaybeExpr designator = Fortran::common::visit( + [&](auto &&s) { return ea.Analyze(s); }, accObject.u); Fortran::lower::AddrAndBoundsInfo info = Fortran::lower::gatherDataOperandAddrAndBounds< mlir::acc::DataBoundsOp, mlir::acc::DataBoundsType>( @@ -3414,7 +3415,7 @@ static void genGlobalCtors(Fortran::lower::AbstractConverter &converter, fir::FirOpBuilder &builder = converter.getFirOpBuilder(); for (const auto &accObject : accObjectList.v) { mlir::Location operandLocation = genOperandLocation(converter, accObject); - std::visit( + Fortran::common::visit( Fortran::common::visitors{ [&](const Fortran::parser::Designator &designator) { if (const auto *name = @@ -3993,7 +3994,7 @@ genACC(Fortran::lower::AbstractConverter &converter, const Fortran::parser::OpenACCAtomicConstruct &atomicConstruct) { mlir::Location loc = converter.genLocation(atomicConstruct.source); - std::visit( + Fortran::common::visit( Fortran::common::visitors{ [&](const Fortran::parser::AccAtomicRead &atomicRead) { Fortran::lower::genOmpAccAtomicRead static SymbolWithDesignator visit(const evaluate::Expr &e) { - return std::visit([](auto &&s) { return visit(s); }, e.u); + return Fortran::common::visit([](auto &&s) { return visit(s); }, e.u); } static void verify(const SymbolWithDesignator &sd) { @@ -112,7 +112,7 @@ struct SymbolAndDesignatorExtractor { SymbolWithDesignator getSymbolAndDesignator(const MaybeExpr &expr) { if (!expr) return SymbolWithDesignator{}; - return std::visit( + return Fortran::common::visit( [](auto &&s) { return SymbolAndDesignatorExtractor::visit(s); }, expr->u); } @@ -278,7 +278,7 @@ DefinedOperator makeDefinedOperator(const parser::DefinedOperator &inp, // clang-format on ); - return std::visit( + return Fortran::common::visit( common::visitors{ [&](const parser::DefinedOpName &s) { return DefinedOperator{ @@ -294,7 +294,7 @@ DefinedOperator makeDefinedOperator(const parser::DefinedOperator &inp, ProcedureDesignator makeProcedureDesignator(const parser::ProcedureDesignator &inp, semantics::SemanticsContext &semaCtx) { - return ProcedureDesignator{std::visit( + return ProcedureDesignator{Fortran::common::visit( common::visitors{ [&](const parser::Name &t) { return makeObject(t, semaCtx); }, [&](const parser::ProcComponentRef &t) { @@ -306,7 +306,7 @@ makeProcedureDesignator(const parser::ProcedureDesignator &inp, ReductionOperator makeReductionOperator(const parser::OmpReductionOperator &inp, semantics::SemanticsContext &semaCtx) { - return std::visit( + return Fortran::common::visit( common::visitors{ [&](const parser::DefinedOperator &s) { return ReductionOperator{makeDefinedOperator(s, semaCtx)}; @@ -366,7 +366,7 @@ Allocate make(const parser::OmpClause::Allocate &inp, using Tuple = decltype(Allocate::t); - return Allocate{std::visit( + return Allocate{Fortran::common::visit( common::visitors{ // simple-modifier [&](const wrapped::AllocateModifier::Allocator &v) -> Tuple { @@ -531,7 +531,7 @@ Depend make(const parser::OmpClause::Depend &inp, // clang-format on ); - return Depend{std::visit( // + return Depend{Fortran::common::visit( // common::visitors{ // Doacross [&](const wrapped::Source &s) -> Variant { @@ -793,7 +793,7 @@ Linear make(const parser::OmpClause::Linear &inp, using Tuple = decltype(Linear::t); - return Linear{std::visit( + return Linear{Fortran::common::visit( common::visitors{ [&](const wrapped::WithModifier &s) -> Tuple { return { @@ -949,7 +949,7 @@ Order make(const parser::OmpClause::Order &inp, auto &t1 = std::get(inp.v.t); auto convert3 = [&](const parser::OmpOrderModifier &s) { - return std::visit( + return Fortran::common::visit( [&](parser::OmpOrderModifier::Kind k) { return convert1(k); }, s.u); }; return Order{ @@ -1212,7 +1212,7 @@ UsesAllocators make(const parser::OmpClause::UsesAllocators &inp, Clause makeClause(const parser::OmpClause &cls, semantics::SemanticsContext &semaCtx) { - return std::visit( + return Fortran::common::visit( [&](auto &&s) { return makeClause(getClauseId(cls), clause::make(s, semaCtx), cls.source); diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index aac22f0faad37..7d75e6f67dc1b 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -2199,7 +2199,7 @@ static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, const parser::OpenMPDeclarativeConstruct &ompDeclConstruct) { - std::visit( + Fortran::common::visit( [&](auto &&s) { return genOMP(converter, symTable, semaCtx, eval, s); }, ompDeclConstruct.u); } @@ -2276,7 +2276,7 @@ static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, const parser::OpenMPStandaloneConstruct &standaloneConstruct) { - std::visit( + Fortran::common::visit( [&](auto &&s) { return genOMP(converter, symTable, semaCtx, eval, s); }, standaloneConstruct.u); } @@ -2296,7 +2296,7 @@ static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, const parser::OpenMPAtomicConstruct &atomicConstruct) { - std::visit( + Fortran::common::visit( common::visitors{ [&](const parser::OmpAtomicRead &atomicRead) { mlir::Location loc = converter.genLocation(atomicRead.source); @@ -2487,7 +2487,7 @@ static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, const parser::OpenMPConstruct &ompConstruct) { - std::visit( + Fortran::common::visit( [&](auto &&s) { return genOMP(converter, symTable, semaCtx, eval, s); }, ompConstruct.u); } @@ -2649,21 +2649,22 @@ void Fortran::lower::gatherOpenMPDeferredDeclareTargets( const parser::OpenMPDeclarativeConstruct &ompDecl, llvm::SmallVectorImpl &deferredDeclareTarget) { - std::visit(common::visitors{ - [&](const parser::OpenMPDeclareTargetConstruct &ompReq) { - collectDeferredDeclareTargets(converter, semaCtx, eval, - ompReq, deferredDeclareTarget); - }, - [&](const auto &) {}, - }, - ompDecl.u); + Fortran::common::visit( + common::visitors{ + [&](const parser::OpenMPDeclareTargetConstruct &ompReq) { + collectDeferredDeclareTargets(converter, semaCtx, eval, ompReq, + deferredDeclareTarget); + }, + [&](const auto &) {}, + }, + ompDecl.u); } bool Fortran::lower::isOpenMPDeviceDeclareTarget( lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, const parser::OpenMPDeclarativeConstruct &ompDecl) { - return std::visit( + return Fortran::common::visit( common::visitors{ [&](const parser::OpenMPDeclareTargetConstruct &ompReq) { mlir::omp::DeclareTargetDeviceType targetType = diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp index 36d96f37ff36a..8aeef175ad2d2 100644 --- a/flang/lib/Lower/OpenMP/Utils.cpp +++ b/flang/lib/Lower/OpenMP/Utils.cpp @@ -325,7 +325,7 @@ void insertChildMapInfoIntoParent( semantics::Symbol *getOmpObjectSymbol(const parser::OmpObject &ompObject) { semantics::Symbol *sym = nullptr; - std::visit( + Fortran::common::visit( common::visitors{ [&](const parser::Designator &designator) { if (auto *arrayEle = diff --git a/flang/lib/Lower/PFTBuilder.cpp b/flang/lib/Lower/PFTBuilder.cpp index fc34895d1ec75..5b3d5471925bf 100644 --- a/flang/lib/Lower/PFTBuilder.cpp +++ b/flang/lib/Lower/PFTBuilder.cpp @@ -103,7 +103,7 @@ class PFTBuilder { stmt.unwrapped, pftParentStack.back(), stmt.position, stmt.label}); return false; } else if constexpr (std::is_same_v) { - return std::visit( + return Fortran::common::visit( common::visitors{ [&](const common::Indirection &x) { addEvaluation(lower::pft::Evaluation{ @@ -239,7 +239,7 @@ class PFTBuilder { // Get rid of production wrapper bool Pre(const parser::Statement &statement) { - addEvaluation(std::visit( + addEvaluation(Fortran::common::visit( [&](const auto &x) { return lower::pft::Evaluation{x, pftParentStack.back(), statement.source, statement.label}; @@ -248,7 +248,7 @@ class PFTBuilder { return false; } bool Pre(const parser::WhereBodyConstruct &whereBody) { - return std::visit( + return Fortran::common::visit( common::visitors{ [&](const parser::Statement &stmt) { // Not caught as other AssignmentStmt because it is not @@ -469,7 +469,7 @@ class PFTBuilder { makeEvaluationAction(const parser::ActionStmt &statement, parser::CharBlock position, std::optional label) { - return std::visit( + return Fortran::common::visit( common::visitors{ [&](const auto &x) { return lower::pft::Evaluation{ @@ -664,7 +664,7 @@ class PFTBuilder { }; auto analyzeSpecs{[&](const auto &specList) { for (const auto &spec : specList) { - std::visit( + Fortran::common::visit( Fortran::common::visitors{ [&](const Fortran::parser::Format &format) { analyzeFormatSpec(format); @@ -1172,26 +1172,27 @@ class PFTDumper { void dumpPFT(llvm::raw_ostream &outputStream, const lower::pft::Program &pft) { for (auto &unit : pft.getUnits()) { - std::visit(common::visitors{ - [&](const lower::pft::BlockDataUnit &unit) { - outputStream << getNodeIndex(unit) << " "; - outputStream << "BlockData: "; - outputStream << "\nEnd BlockData\n\n"; - }, - [&](const lower::pft::FunctionLikeUnit &func) { - dumpFunctionLikeUnit(outputStream, func); - }, - [&](const lower::pft::ModuleLikeUnit &unit) { - dumpModuleLikeUnit(outputStream, unit); - }, - [&](const lower::pft::CompilerDirectiveUnit &unit) { - dumpCompilerDirectiveUnit(outputStream, unit); - }, - [&](const lower::pft::OpenACCDirectiveUnit &unit) { - dumpOpenACCDirectiveUnit(outputStream, unit); - }, - }, - unit); + Fortran::common::visit( + common::visitors{ + [&](const lower::pft::BlockDataUnit &unit) { + outputStream << getNodeIndex(unit) << " "; + outputStream << "BlockData: "; + outputStream << "\nEnd BlockData\n\n"; + }, + [&](const lower::pft::FunctionLikeUnit &func) { + dumpFunctionLikeUnit(outputStream, func); + }, + [&](const lower::pft::ModuleLikeUnit &unit) { + dumpModuleLikeUnit(outputStream, unit); + }, + [&](const lower::pft::CompilerDirectiveUnit &unit) { + dumpCompilerDirectiveUnit(outputStream, unit); + }, + [&](const lower::pft::OpenACCDirectiveUnit &unit) { + dumpOpenACCDirectiveUnit(outputStream, unit); + }, + }, + unit); } } diff --git a/flang/lib/Lower/VectorSubscripts.cpp b/flang/lib/Lower/VectorSubscripts.cpp index d7a311d32d59d..389a89ddcf102 100644 --- a/flang/lib/Lower/VectorSubscripts.cpp +++ b/flang/lib/Lower/VectorSubscripts.cpp @@ -55,10 +55,11 @@ class VectorSubscriptBoxBuilder { using Designator = Fortran::evaluate::Designator; if constexpr (Fortran::common::HasMember) { const auto &designator = std::get(expr.u); - return std::visit([&](const auto &x) { return gen(x); }, designator.u); + return Fortran::common::visit([&](const auto &x) { return gen(x); }, + designator.u); } else { - return std::visit([&](const auto &x) { return genDesignator(x); }, - expr.u); + return Fortran::common::visit( + [&](const auto &x) { return genDesignator(x); }, expr.u); } } @@ -66,8 +67,8 @@ class VectorSubscriptBoxBuilder { // type of X elements. mlir::Type gen(const Fortran::evaluate::DataRef &dataRef) { - return std::visit([&](const auto &ref) -> mlir::Type { return gen(ref); }, - dataRef.u); + return Fortran::common::visit( + [&](const auto &ref) -> mlir::Type { return gen(ref); }, dataRef.u); } mlir::Type gen(const Fortran::evaluate::SymbolRef &symRef) { @@ -128,7 +129,7 @@ class VectorSubscriptBoxBuilder { mlir::Type gen(const Fortran::evaluate::ArrayRef &arrayRef) { auto isTripletOrVector = [](const Fortran::evaluate::Subscript &subscript) -> bool { - return std::visit( + return Fortran::common::visit( Fortran::common::visitors{ [](const Fortran::evaluate::IndirectSubscriptIntegerExpr &expr) { return expr.value().Rank() != 0; @@ -165,7 +166,7 @@ class VectorSubscriptBoxBuilder { mlir::Type idxTy = builder.getIndexType(); mlir::Value one = builder.createIntegerConstant(loc, idxTy, 1); for (const auto &subscript : llvm::enumerate(arrayRef.subscript())) { - std::visit( + Fortran::common::visit( Fortran::common::visitors{ [&](const Fortran::evaluate::IndirectSubscriptIntegerExpr &expr) { if (expr.value().Rank() == 0) { @@ -327,24 +328,24 @@ Fortran::lower::VectorSubscriptBox::createSlice(fir::FirOpBuilder &builder, mlir::Value one = builder.createIntegerConstant(loc, idxTy, 1); auto undef = builder.create(loc, idxTy); for (const LoweredSubscript &subscript : loweredSubscripts) - std::visit(Fortran::common::visitors{ - [&](const LoweredTriplet &triplet) { - triples.emplace_back(triplet.lb); - triples.emplace_back(triplet.ub); - triples.emplace_back(triplet.stride); - }, - [&](const LoweredVectorSubscript &vector) { - triples.emplace_back(one); - triples.emplace_back(vector.size); - triples.emplace_back(one); - }, - [&](const mlir::Value &i) { - triples.emplace_back(i); - triples.emplace_back(undef); - triples.emplace_back(undef); - }, - }, - subscript); + Fortran::common::visit(Fortran::common::visitors{ + [&](const LoweredTriplet &triplet) { + triples.emplace_back(triplet.lb); + triples.emplace_back(triplet.ub); + triples.emplace_back(triplet.stride); + }, + [&](const LoweredVectorSubscript &vector) { + triples.emplace_back(one); + triples.emplace_back(vector.size); + triples.emplace_back(one); + }, + [&](const mlir::Value &i) { + triples.emplace_back(i); + triples.emplace_back(undef); + triples.emplace_back(undef); + }, + }, + subscript); return builder.create(loc, triples, componentPath); } @@ -390,28 +391,28 @@ fir::ExtendedValue Fortran::lower::VectorSubscriptBox::getElementAt( llvm::SmallVector indexes; size_t inductionIdx = inductionVariables.size() - 1; for (const LoweredSubscript &subscript : loweredSubscripts) - std::visit(Fortran::common::visitors{ - [&](const LoweredTriplet &triplet) { - indexes.emplace_back(inductionVariables[inductionIdx--]); - }, - [&](const LoweredVectorSubscript &vector) { - mlir::Value vecIndex = inductionVariables[inductionIdx--]; - mlir::Value vecBase = fir::getBase(vector.vector); - mlir::Type vecEleTy = fir::unwrapSequenceType( - fir::unwrapPassByRefType(vecBase.getType())); - mlir::Type refTy = builder.getRefType(vecEleTy); - auto vecEltRef = builder.create( - loc, refTy, vecBase, vecIndex); - auto vecElt = - builder.create(loc, vecEleTy, vecEltRef); - indexes.emplace_back( - builder.createConvert(loc, idxTy, vecElt)); - }, - [&](const mlir::Value &i) { - indexes.emplace_back(builder.createConvert(loc, idxTy, i)); - }, - }, - subscript); + Fortran::common::visit( + Fortran::common::visitors{ + [&](const LoweredTriplet &triplet) { + indexes.emplace_back(inductionVariables[inductionIdx--]); + }, + [&](const LoweredVectorSubscript &vector) { + mlir::Value vecIndex = inductionVariables[inductionIdx--]; + mlir::Value vecBase = fir::getBase(vector.vector); + mlir::Type vecEleTy = fir::unwrapSequenceType( + fir::unwrapPassByRefType(vecBase.getType())); + mlir::Type refTy = builder.getRefType(vecEleTy); + auto vecEltRef = builder.create( + loc, refTy, vecBase, vecIndex); + auto vecElt = + builder.create(loc, vecEleTy, vecEltRef); + indexes.emplace_back(builder.createConvert(loc, idxTy, vecElt)); + }, + [&](const mlir::Value &i) { + indexes.emplace_back(builder.createConvert(loc, idxTy, i)); + }, + }, + subscript); mlir::Type refTy = builder.getRefType(getElementType()); auto elementAddr = builder.create( loc, refTy, fir::getBase(loweredBase), shape, slice, indexes, diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp index ab106f62aecfb..c929d05038462 100644 --- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp +++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp @@ -1788,7 +1788,7 @@ IntrinsicLibrary::genIntrinsicCall(llvm::StringRef specificName, llvm::StringRef name = genericName(specificName); if (const IntrinsicHandler *handler = findIntrinsicHandler(name)) { bool outline = handler->outline || outlineAllIntrinsics; - return {std::visit( + return {Fortran::common::visit( [&](auto &generator) -> fir::ExtendedValue { return invokeHandler(generator, *handler, resultType, args, outline, *this); @@ -1802,7 +1802,7 @@ IntrinsicLibrary::genIntrinsicCall(llvm::StringRef specificName, if (fir::getTargetTriple(mod).isPPC()) { if (const IntrinsicHandler *ppcHandler = findPPCIntrinsicHandler(name)) { bool outline = ppcHandler->outline || outlineAllIntrinsics; - return {std::visit( + return {Fortran::common::visit( [&](auto &generator) -> fir::ExtendedValue { return invokeHandler(generator, *ppcHandler, resultType, args, outline, *this); @@ -2136,7 +2136,7 @@ mlir::SymbolRefAttr IntrinsicLibrary::getUnrestrictedIntrinsicSymbolRefAttr( bool loadRefArguments = true; mlir::func::FuncOp funcOp; if (const IntrinsicHandler *handler = findIntrinsicHandler(name)) - funcOp = std::visit( + funcOp = Fortran::common::visit( [&](auto generator) { return getWrapper(generator, name, signature, loadRefArguments); }, diff --git a/flang/lib/Optimizer/Transforms/DebugTypeGenerator.cpp b/flang/lib/Optimizer/Transforms/DebugTypeGenerator.cpp index 70ff8b386ac3d..407ecc8e327b4 100644 --- a/flang/lib/Optimizer/Transforms/DebugTypeGenerator.cpp +++ b/flang/lib/Optimizer/Transforms/DebugTypeGenerator.cpp @@ -75,8 +75,8 @@ static mlir::LLVM::DITypeAttr genBasicType(mlir::MLIRContext *context, } static mlir::LLVM::DITypeAttr genPlaceholderType(mlir::MLIRContext *context) { - return genBasicType(context, mlir::StringAttr::get(context, "integer"), 32, - llvm::dwarf::DW_ATE_signed); + return genBasicType(context, mlir::StringAttr::get(context, "integer"), + /*bitSize=*/32, llvm::dwarf::DW_ATE_signed); } mlir::LLVM::DITypeAttr DebugTypeGenerator::convertBoxedSequenceType( @@ -145,11 +145,10 @@ mlir::LLVM::DITypeAttr DebugTypeGenerator::convertBoxedSequenceType( elements.push_back(subrangeTy); } return mlir::LLVM::DICompositeTypeAttr::get( - context, llvm::dwarf::DW_TAG_array_type, /*recursive id*/ {}, - /* name */ nullptr, /* file */ nullptr, /* line */ 0, - /* scope */ nullptr, elemTy, mlir::LLVM::DIFlags::Zero, - /* sizeInBits */ 0, /*alignInBits*/ 0, elements, dataLocation, - /* rank */ nullptr, allocated, associated); + context, llvm::dwarf::DW_TAG_array_type, /*recursive_id=*/{}, + /*name=*/nullptr, /*file=*/nullptr, /*line=*/0, /*scope=*/nullptr, elemTy, + mlir::LLVM::DIFlags::Zero, /*sizeInBits=*/0, /*alignInBits=*/0, elements, + dataLocation, /*rank=*/nullptr, allocated, associated); } mlir::LLVM::DITypeAttr DebugTypeGenerator::convertSequenceType( @@ -184,12 +183,11 @@ mlir::LLVM::DITypeAttr DebugTypeGenerator::convertSequenceType( // have been set to some valid default values. return mlir::LLVM::DICompositeTypeAttr::get( - context, llvm::dwarf::DW_TAG_array_type, /*recursive id*/ {}, - /* name */ nullptr, /* file */ nullptr, /* line */ 0, /* scope */ nullptr, - elemTy, mlir::LLVM::DIFlags::Zero, /* sizeInBits */ 0, - /*alignInBits*/ 0, elements, /* dataLocation */ nullptr, - /* rank */ nullptr, /* allocated */ nullptr, - /* associated */ nullptr); + context, llvm::dwarf::DW_TAG_array_type, /*recursive_id=*/{}, + /*name=*/nullptr, /*file=*/nullptr, /*line=*/0, /*scope=*/nullptr, elemTy, + mlir::LLVM::DIFlags::Zero, /*sizeInBits=*/0, /*alignInBits=*/0, elements, + /*dataLocation=*/nullptr, /*rank=*/nullptr, /*allocated=*/nullptr, + /*associated=*/nullptr); } mlir::LLVM::DITypeAttr DebugTypeGenerator::convertCharacterType( diff --git a/flang/lib/Semantics/check-acc-structure.cpp b/flang/lib/Semantics/check-acc-structure.cpp index 69b9fe17e6a88..25140a0473749 100644 --- a/flang/lib/Semantics/check-acc-structure.cpp +++ b/flang/lib/Semantics/check-acc-structure.cpp @@ -403,9 +403,9 @@ void AccStructureChecker::CheckMultipleOccurrenceInDeclare( if (GetContext().directive != llvm::acc::Directive::ACCD_declare) return; for (const auto &object : list.v) { - std::visit( - Fortran::common::visitors{ - [&](const Fortran::parser::Designator &designator) { + common::visit( + common::visitors{ + [&](const parser::Designator &designator) { if (const auto *name = getDesignatorNameIfDataRef(designator)) { if (declareSymbols.contains(&name->symbol->GetUltimate())) { if (declareSymbols[&name->symbol->GetUltimate()] == clause) { @@ -435,7 +435,7 @@ void AccStructureChecker::CheckMultipleOccurrenceInDeclare( declareSymbols.insert({&name->symbol->GetUltimate(), clause}); } }, - [&](const Fortran::parser::Name &name) { + [&](const parser::Name &name) { // TODO: check common block }}, object.u); @@ -674,9 +674,9 @@ void AccStructureChecker::Enter(const parser::AccClause::Reduction &reduction) { const auto &objects{std::get(list.t)}; for (const auto &object : objects.v) { - std::visit( - Fortran::common::visitors{ - [&](const Fortran::parser::Designator &designator) { + common::visit( + common::visitors{ + [&](const parser::Designator &designator) { if (const auto *name = getDesignatorNameIfDataRef(designator)) { const auto *type{name->symbol->GetType()}; if (type->IsNumeric(TypeCategory::Integer) && diff --git a/flang/lib/Semantics/check-coarray.cpp b/flang/lib/Semantics/check-coarray.cpp index 106af7960fa94..6cf61a6b923db 100644 --- a/flang/lib/Semantics/check-coarray.cpp +++ b/flang/lib/Semantics/check-coarray.cpp @@ -93,7 +93,7 @@ static void CheckCoindexedStatOrErrmsg(SemanticsContext &context, } } }}; - std::visit(CoindexedCheck, statOrErrmsg.u); + Fortran::common::visit(CoindexedCheck, statOrErrmsg.u); } static void CheckSyncStatList( diff --git a/libc/src/stdlib/CMakeLists.txt b/libc/src/stdlib/CMakeLists.txt index e26c19f03f5ab..fdbf7b75e72f4 100644 --- a/libc/src/stdlib/CMakeLists.txt +++ b/libc/src/stdlib/CMakeLists.txt @@ -418,15 +418,23 @@ else() libc.src.string.memory_utils.inline_memcpy libc.src.string.memory_utils.inline_memset ) - add_entrypoint_object( - malloc - SRCS - freelist_malloc.cpp - HDRS - malloc.h - DEPENDS - .freelist_heap - ) + # Only add malloc in full build mode. Use the system malloc in overlay mode. + if(LLVM_LIBC_FULL_BUILD) + add_entrypoint_object( + malloc + SRCS + freelist_malloc.cpp + HDRS + malloc.h + DEPENDS + .freelist_heap + ) + else() + add_entrypoint_external( + malloc + ) + endif() + add_entrypoint_external( free ) diff --git a/libc/test/IntegrationTest/test.cpp b/libc/test/IntegrationTest/test.cpp index a8b2f2911fd8e..0c961dbafb840 100644 --- a/libc/test/IntegrationTest/test.cpp +++ b/libc/test/IntegrationTest/test.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include "src/__support/common.h" -#include "src/sys/auxv/getauxval.h" #include #include @@ -83,6 +82,8 @@ void *realloc(void *ptr, size_t s) { void *__dso_handle = nullptr; #ifdef LIBC_TARGET_ARCH_IS_AARCH64 +#include "src/sys/auxv/getauxval.h" + // Due to historical reasons, libgcc on aarch64 may expect __getauxval to be // defined. See also https://gcc.gnu.org/pipermail/gcc-cvs/2020-June/300635.html unsigned long __getauxval(unsigned long id) { diff --git a/libc/test/UnitTest/HermeticTestUtils.cpp b/libc/test/UnitTest/HermeticTestUtils.cpp index 6e815e6c8aab0..191e54b7344a6 100644 --- a/libc/test/UnitTest/HermeticTestUtils.cpp +++ b/libc/test/UnitTest/HermeticTestUtils.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include "src/__support/common.h" -#include "src/sys/auxv/getauxval.h" #include #include @@ -111,6 +110,8 @@ void __cxa_pure_virtual() { void *__dso_handle = nullptr; #ifdef LIBC_TARGET_ARCH_IS_AARCH64 +#include "src/sys/auxv/getauxval.h" + // Due to historical reasons, libgcc on aarch64 may expect __getauxval to be // defined. See also https://gcc.gnu.org/pipermail/gcc-cvs/2020-June/300635.html unsigned long __getauxval(unsigned long id) { diff --git a/libc/test/src/stdlib/CMakeLists.txt b/libc/test/src/stdlib/CMakeLists.txt index 648404afb5730..0ded674ee0e12 100644 --- a/libc/test/src/stdlib/CMakeLists.txt +++ b/libc/test/src/stdlib/CMakeLists.txt @@ -79,20 +79,22 @@ add_libc_test( libc.src.__support.CPP.span ) -add_libc_test( - freelist_heap_test - SUITE - libc-stdlib-tests - SRCS - freelist_heap_test.cpp - freelist_malloc_test.cpp - DEPENDS - libc.src.__support.CPP.span - libc.src.stdlib.freelist_heap - libc.src.stdlib.malloc - libc.src.string.memcmp - libc.src.string.memcpy -) +if(LLVM_LIBC_FULL_BUILD) + add_libc_test( + freelist_heap_test + SUITE + libc-stdlib-tests + SRCS + freelist_heap_test.cpp + freelist_malloc_test.cpp + DEPENDS + libc.src.__support.CPP.span + libc.src.stdlib.freelist_heap + libc.src.stdlib.malloc + libc.src.string.memcmp + libc.src.string.memcpy + ) +endif() add_fp_unittest( strtod_test diff --git a/lldb/include/lldb/DataFormatters/ValueObjectPrinter.h b/lldb/include/lldb/DataFormatters/ValueObjectPrinter.h index 32b101a2f9843..fb5d60ba30d77 100644 --- a/lldb/include/lldb/DataFormatters/ValueObjectPrinter.h +++ b/lldb/include/lldb/DataFormatters/ValueObjectPrinter.h @@ -1,5 +1,4 @@ -//===-- ValueObjectPrinter.h ---------------------------------------*- C++ -//-*-===// +//===-- ValueObjectPrinter.h ------------------------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -25,7 +24,7 @@ class ValueObjectPrinter { /// does not retain the ValueObject it is printing, that is the job of /// its caller. It also doesn't attempt to track changes in the /// ValueObject, e.g. changing synthetic child providers or changing - /// dynamic vrs. static vrs. synthetic settings. + /// dynamic versus static versus synthetic settings. public: ValueObjectPrinter(ValueObject &valobj, Stream *s); @@ -42,16 +41,16 @@ class ValueObjectPrinter { InstancePointersSetSP m_printed_instance_pointers; - // only this class (and subclasses, if any) should ever be concerned with the - // depth mechanism + /// Only this class (and subclasses, if any) should ever be + /// concerned with the depth mechanism. ValueObjectPrinter(ValueObject &valobj, Stream *s, const DumpValueObjectOptions &options, const DumpValueObjectOptions::PointerDepth &ptr_depth, uint32_t curr_depth, InstancePointersSetSP printed_instance_pointers); - // we should actually be using delegating constructors here but some versions - // of GCC still have trouble with those + /// Ee should actually be using delegating constructors here but + /// some versions of GCC still have trouble with those. void Init(ValueObject &valobj, Stream *s, const DumpValueObjectOptions &options, const DumpValueObjectOptions::PointerDepth &ptr_depth, @@ -67,7 +66,7 @@ class ValueObjectPrinter { /// use dynamic and use synthetic settings of the ValueObject being printed, /// so changes made to these settings won't affect already made /// ValueObjectPrinters. SetupMostSpecializedValue(); - + /// /// Access the cached "most specialized value" - that is the one to use for /// printing the value object's value. However, be sure to use /// GetValueForChildGeneration when you are generating the children of this @@ -143,9 +142,9 @@ class ValueObjectPrinter { bool ShouldShowName() const; ValueObject &m_orig_valobj; - ValueObject *m_cached_valobj; /// Cache the current "most specialized" value. - /// Don't use this directly, use - /// GetMostSpecializedValue. + /// Cache the current "most specialized" value. Don't use this + /// directly, use GetMostSpecializedValue. + ValueObject *m_cached_valobj; Stream *m_stream; DumpValueObjectOptions m_options; Flags m_type_flags; diff --git a/lldb/test/API/lit.cfg.py b/lldb/test/API/lit.cfg.py index 48c5f49e78e01..6d45508ccb916 100644 --- a/lldb/test/API/lit.cfg.py +++ b/lldb/test/API/lit.cfg.py @@ -63,13 +63,14 @@ def find_python_interpreter(): if "DYLD_INSERT_LIBRARIES" not in config.environment: return None - # If we're running in a virtual environment, we already have a copy of the - # Python executable. + # If we're running in a virtual environment, we have to copy Python into + # the virtual environment for it to work. if sys.prefix != sys.base_prefix: - return None + copied_python = os.path.join(sys.prefix, "bin", "copied-python") + else: + copied_python = os.path.join(config.lldb_build_directory, "copied-python") # Avoid doing any work if we already copied the binary. - copied_python = os.path.join(config.lldb_build_directory, "copied-python") if os.path.isfile(copied_python): return copied_python diff --git a/llvm/docs/Security.rst b/llvm/docs/Security.rst index 91b762719138d..9bd2b1d435fd0 100644 --- a/llvm/docs/Security.rst +++ b/llvm/docs/Security.rst @@ -44,7 +44,6 @@ username for an individual isn't available, the brackets will be empty. * Ed Maste (individual; FreeBSD) [@emaste] * George Burgess IV (Google) [@gburgessiv] * Josh Stone (Red Hat; Rust) [@cuviper] -* Kate McInnes (Apple) [] * Kristof Beyls (ARM) [@kbeyls] * Matthew Riley (Google) [@mmdriley] * Nikhil Gupta (Nvidia) [] diff --git a/llvm/include/llvm/Analysis/MLInlineAdvisor.h b/llvm/include/llvm/Analysis/MLInlineAdvisor.h index f58862e533529..2aa077fe0e035 100644 --- a/llvm/include/llvm/Analysis/MLInlineAdvisor.h +++ b/llvm/include/llvm/Analysis/MLInlineAdvisor.h @@ -13,6 +13,7 @@ #include "llvm/Analysis/InlineAdvisor.h" #include "llvm/Analysis/LazyCallGraph.h" #include "llvm/Analysis/MLModelRunner.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/IR/PassManager.h" #include @@ -89,6 +90,7 @@ class MLInlineAdvisor : public InlineAdvisor { llvm::SmallPtrSet NodesInLastSCC; DenseSet AllNodes; bool ForceStop = false; + ProfileSummaryInfo &PSI; }; /// InlineAdvice that tracks changes post inlining. For that reason, it only diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index e6b69b39911a9..45f1092094572 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -1337,27 +1337,9 @@ def int_amdgcn_raw_ptr_buffer_atomic_cmpswap : Intrinsic< // gfx908 intrinsic def int_amdgcn_raw_buffer_atomic_fadd : AMDGPURawBufferAtomic; + +// Supports float and <2 x half> on gfx908. Supports v2bf16 on gfx90a, gfx940, gfx12+. def int_amdgcn_raw_ptr_buffer_atomic_fadd : AMDGPURawPtrBufferAtomic; -// gfx12+ intrinsic -def int_amdgcn_raw_buffer_atomic_fadd_v2bf16 : Intrinsic < - [llvm_v2bf16_ty], - [llvm_v2bf16_ty, - llvm_v4i32_ty, - llvm_i32_ty, - llvm_i32_ty, - llvm_i32_ty], - [ImmArg>, IntrWillReturn, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>, - AMDGPURsrcIntrinsic<1, 0>; -def int_amdgcn_raw_ptr_buffer_atomic_fadd_v2bf16 : Intrinsic < - [llvm_v2bf16_ty], - [llvm_v2bf16_ty, - AMDGPUBufferRsrcTy, - llvm_i32_ty, - llvm_i32_ty, - llvm_i32_ty], - [IntrArgMemOnly, NoCapture>, - ImmArg>, IntrWillReturn, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>, - AMDGPURsrcIntrinsic<1, 0>; class AMDGPUStructBufferAtomic : Intrinsic < [data_ty], @@ -1434,28 +1416,6 @@ def int_amdgcn_struct_ptr_buffer_atomic_cmpswap : Intrinsic< // gfx908 intrinsic def int_amdgcn_struct_buffer_atomic_fadd : AMDGPUStructBufferAtomic; def int_amdgcn_struct_ptr_buffer_atomic_fadd : AMDGPUStructPtrBufferAtomic; -// gfx12 intrinsic -def int_amdgcn_struct_buffer_atomic_fadd_v2bf16 : Intrinsic < - [llvm_v2bf16_ty], - [llvm_v2bf16_ty, - llvm_v4i32_ty, - llvm_i32_ty, - llvm_i32_ty, - llvm_i32_ty, - llvm_i32_ty], - [ImmArg>, IntrWillReturn, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>, - AMDGPURsrcIntrinsic<1, 0>; -def int_amdgcn_struct_ptr_buffer_atomic_fadd_v2bf16 : Intrinsic < - [llvm_v2bf16_ty], - [llvm_v2bf16_ty, - AMDGPUBufferRsrcTy, - llvm_i32_ty, - llvm_i32_ty, - llvm_i32_ty, - llvm_i32_ty], - [IntrArgMemOnly, NoCapture>, - ImmArg>, IntrWillReturn, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>, - AMDGPURsrcIntrinsic<1, 0>; // gfx90a intrinsics def int_amdgcn_struct_buffer_atomic_fmin : AMDGPUStructBufferAtomic; diff --git a/llvm/lib/Analysis/DevelopmentModeInlineAdvisor.cpp b/llvm/lib/Analysis/DevelopmentModeInlineAdvisor.cpp index 7d51302bcc1ad..3ab0af8d57da1 100644 --- a/llvm/lib/Analysis/DevelopmentModeInlineAdvisor.cpp +++ b/llvm/lib/Analysis/DevelopmentModeInlineAdvisor.cpp @@ -253,7 +253,6 @@ class LoggingMLInlineAdvice : public MLInlineAdvice { }; static const std::vector TrainingOnlyFeatures{ - TensorSpec::createSpec(TFFeedPrefix + "inlining_default", {1}), TensorSpec::createSpec(TFFeedPrefix + "discount", {1}), TensorSpec::createSpec(TFFeedPrefix + "reward", {1}), TensorSpec::createSpec(TFFeedPrefix + "step_type", {1})}; diff --git a/llvm/lib/Analysis/MLInlineAdvisor.cpp b/llvm/lib/Analysis/MLInlineAdvisor.cpp index 75eb8ece2e447..21946572339b9 100644 --- a/llvm/lib/Analysis/MLInlineAdvisor.cpp +++ b/llvm/lib/Analysis/MLInlineAdvisor.cpp @@ -14,6 +14,7 @@ #include "llvm/Analysis/MLInlineAdvisor.h" #include "llvm/ADT/SCCIterator.h" #include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/FunctionPropertiesAnalysis.h" #include "llvm/Analysis/InlineCost.h" @@ -23,6 +24,7 @@ #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MLModelRunner.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/ReleaseModeModelRunner.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/Dominators.h" @@ -46,6 +48,14 @@ static cl::opt InteractiveIncludeDefault("inliner-interactive-include-default", cl::Hidden, cl::desc(InclDefaultMsg)); +enum class SkipMLPolicyCriteria { Never, IfCallerIsNotCold }; + +static cl::opt SkipPolicy( + "ml-inliner-skip-policy", cl::Hidden, cl::init(SkipMLPolicyCriteria::Never), + cl::values(clEnumValN(SkipMLPolicyCriteria::Never, "never", "never"), + clEnumValN(SkipMLPolicyCriteria::IfCallerIsNotCold, + "if-caller-not-cold", "if the caller is not cold"))); + #if defined(LLVM_HAVE_TF_AOT_INLINERSIZEMODEL) // codegen-ed file #include "InlinerSizeModel.h" // NOLINT @@ -129,7 +139,8 @@ MLInlineAdvisor::MLInlineAdvisor( M, MAM.getResult(M).getManager()), ModelRunner(std::move(Runner)), GetDefaultAdvice(GetDefaultAdvice), CG(MAM.getResult(M)), - InitialIRSize(getModuleIRSize()), CurrentIRSize(InitialIRSize) { + InitialIRSize(getModuleIRSize()), CurrentIRSize(InitialIRSize), + PSI(MAM.getResult(M)) { assert(ModelRunner); ModelRunner->switchContext(""); // Extract the 'call site height' feature - the position of a call site @@ -334,6 +345,11 @@ std::unique_ptr MLInlineAdvisor::getAdviceImpl(CallBase &CB) { auto &TIR = FAM.getResult(Callee); auto &ORE = FAM.getResult(Caller); + if (SkipPolicy == SkipMLPolicyCriteria::IfCallerIsNotCold) { + if (!PSI.isFunctionEntryCold(&Caller)) + return std::make_unique(this, CB, ORE, + GetDefaultAdvice(CB)); + } auto MandatoryKind = InlineAdvisor::getMandatoryKind(CB, FAM, ORE); // If this is a "never inline" case, there won't be any changes to internal // state we need to track, so we can just return the base InlineAdvice, which diff --git a/llvm/lib/Analysis/models/gen-inline-oz-test-model.py b/llvm/lib/Analysis/models/gen-inline-oz-test-model.py index 4898509ea544f..3846e7a3cee76 100644 --- a/llvm/lib/Analysis/models/gen-inline-oz-test-model.py +++ b/llvm/lib/Analysis/models/gen-inline-oz-test-model.py @@ -47,7 +47,6 @@ def get_input_signature(): "edge_count", "callsite_height", "cost_estimate", - "inlining_default", "sroa_savings", "sroa_losses", "load_elimination", @@ -102,12 +101,12 @@ def get_output_spec_path(path): return os.path.join(path, "output_spec.json") -def build_mock_model(path, signature): +def build_mock_model(path, signature, advice): """Build and save the mock model with the given signature""" module = tf.Module() def action(*inputs): - return {signature["output"]: tf.constant(value=1, dtype=tf.int64)} + return {signature["output"]: tf.constant(value=advice, dtype=tf.int64)} module.action = tf.function()(action) action = {"action": module.action.get_concrete_function(signature["inputs"])} @@ -128,12 +127,18 @@ def get_signature(): def main(argv): - assert len(argv) == 2 + assert len(argv) == 2 or (len(argv) == 3 and argv[2] == "never") model_path = argv[1] print(f"Output model to: [{argv[1]}]") + + constant_advice = 1 + if len(argv) == 3: + constant_advice = 0 + print(f"The model will always return: {constant_advice}") + signature = get_signature() - build_mock_model(model_path, signature) + build_mock_model(model_path, signature, constant_advice) if __name__ == "__main__": diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td index 231db188e65dc..d81c18875eebd 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td @@ -290,7 +290,6 @@ def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; -def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 18193d8807597..519e623306eb1 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -5564,7 +5564,6 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(BUFFER_ATOMIC_CMPSWAP) NODE_NAME_CASE(BUFFER_ATOMIC_CSUB) NODE_NAME_CASE(BUFFER_ATOMIC_FADD) - NODE_NAME_CASE(BUFFER_ATOMIC_FADD_BF16) NODE_NAME_CASE(BUFFER_ATOMIC_FMIN) NODE_NAME_CASE(BUFFER_ATOMIC_FMAX) NODE_NAME_CASE(BUFFER_ATOMIC_COND_SUB_U32) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h index 71c4334029b43..206bb46b6c863 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -615,7 +615,6 @@ enum NodeType : unsigned { BUFFER_ATOMIC_CMPSWAP, BUFFER_ATOMIC_CSUB, BUFFER_ATOMIC_FADD, - BUFFER_ATOMIC_FADD_BF16, BUFFER_ATOMIC_FMIN, BUFFER_ATOMIC_FMAX, BUFFER_ATOMIC_COND_SUB_U32, diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 973b6b8cce177..0c7b1968e551c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -6018,11 +6018,6 @@ static unsigned getBufferAtomicPseudo(Intrinsic::ID IntrID) { case Intrinsic::amdgcn_struct_buffer_atomic_fadd: case Intrinsic::amdgcn_struct_ptr_buffer_atomic_fadd: return AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD; - case Intrinsic::amdgcn_raw_buffer_atomic_fadd_v2bf16: - case Intrinsic::amdgcn_struct_buffer_atomic_fadd_v2bf16: - case Intrinsic::amdgcn_raw_ptr_buffer_atomic_fadd_v2bf16: - case Intrinsic::amdgcn_struct_ptr_buffer_atomic_fadd_v2bf16: - return AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD_BF16; case Intrinsic::amdgcn_raw_buffer_atomic_fmin: case Intrinsic::amdgcn_raw_ptr_buffer_atomic_fmin: case Intrinsic::amdgcn_struct_buffer_atomic_fmin: @@ -7330,10 +7325,6 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper, case Intrinsic::amdgcn_raw_ptr_buffer_atomic_fadd: case Intrinsic::amdgcn_struct_buffer_atomic_fadd: case Intrinsic::amdgcn_struct_ptr_buffer_atomic_fadd: - case Intrinsic::amdgcn_raw_buffer_atomic_fadd_v2bf16: - case Intrinsic::amdgcn_raw_ptr_buffer_atomic_fadd_v2bf16: - case Intrinsic::amdgcn_struct_buffer_atomic_fadd_v2bf16: - case Intrinsic::amdgcn_struct_ptr_buffer_atomic_fadd_v2bf16: return legalizeBufferAtomic(MI, B, IntrID); case Intrinsic::amdgcn_rsq_clamp: return legalizeRsqClampIntrinsic(MI, MRI, B); diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index 7ebd674757fbc..313d53a1524d2 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -3079,7 +3079,6 @@ void AMDGPURegisterBankInfo::applyMappingImpl( return; } case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD: - case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD_BF16: case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMIN: case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMAX: { applyDefaultMapping(OpdMapper); @@ -4376,7 +4375,6 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_INC: case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_DEC: case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD: - case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD_BF16: case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMIN: case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMAX: { // vdata_out diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td b/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td index e84d39a2895c8..7b29d573b6101 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td @@ -269,7 +269,6 @@ def : SourceOfDivergence; def : SourceOfDivergence; def : SourceOfDivergence; def : SourceOfDivergence; -def : SourceOfDivergence; def : SourceOfDivergence; def : SourceOfDivergence; def : SourceOfDivergence; @@ -287,7 +286,6 @@ def : SourceOfDivergence; def : SourceOfDivergence; def : SourceOfDivergence; def : SourceOfDivergence; -def : SourceOfDivergence; def : SourceOfDivergence; def : SourceOfDivergence; def : SourceOfDivergence; @@ -305,7 +303,6 @@ def : SourceOfDivergence; def : SourceOfDivergence; def : SourceOfDivergence; def : SourceOfDivergence; -def : SourceOfDivergence; def : SourceOfDivergence; def : SourceOfDivergence; def : SourceOfDivergence; @@ -323,7 +320,6 @@ def : SourceOfDivergence; def : SourceOfDivergence; def : SourceOfDivergence; def : SourceOfDivergence; -def : SourceOfDivergence; def : SourceOfDivergence; def : SourceOfDivergence; def : SourceOfDivergence; diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td index 43e5434ea2700..dff19b6a93286 100644 --- a/llvm/lib/Target/AMDGPU/BUFInstructions.td +++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td @@ -1419,27 +1419,21 @@ let OtherPredicates = [HasPackedD16VMem] in { defm : MUBUF_LoadIntrinsicPat; } // End HasPackedD16VMem. -defm : MUBUF_LoadIntrinsicPat; -defm : MUBUF_LoadIntrinsicPat; -defm : MUBUF_LoadIntrinsicPat; -defm : MUBUF_LoadIntrinsicPat; -defm : MUBUF_LoadIntrinsicPat; -defm : MUBUF_LoadIntrinsicPat; -defm : MUBUF_LoadIntrinsicPat; -defm : MUBUF_LoadIntrinsicPat; -defm : MUBUF_LoadIntrinsicPat; -defm : MUBUF_LoadIntrinsicPat; -defm : MUBUF_LoadIntrinsicPat; -defm : MUBUF_LoadIntrinsicPat; -defm : MUBUF_LoadIntrinsicPat; -defm : MUBUF_LoadIntrinsicPat; -defm : MUBUF_LoadIntrinsicPat; -defm : MUBUF_LoadIntrinsicPat; -defm : MUBUF_LoadIntrinsicPat; -defm : MUBUF_LoadIntrinsicPat; -defm : MUBUF_LoadIntrinsicPat; -defm : MUBUF_LoadIntrinsicPat; -defm : MUBUF_LoadIntrinsicPat; +foreach vt = Reg32Types.types in { +defm : MUBUF_LoadIntrinsicPat; +} + +foreach vt = Reg64Types.types in { +defm : MUBUF_LoadIntrinsicPat; +} + +foreach vt = Reg96Types.types in { +defm : MUBUF_LoadIntrinsicPat; +} + +foreach vt = Reg128Types.types in { +defm : MUBUF_LoadIntrinsicPat; +} defm : MUBUF_LoadIntrinsicPat; defm : MUBUF_LoadIntrinsicPat; @@ -1530,27 +1524,21 @@ let OtherPredicates = [HasPackedD16VMem] in { defm : MUBUF_StoreIntrinsicPat; } // End HasPackedD16VMem. -defm : MUBUF_StoreIntrinsicPat; -defm : MUBUF_StoreIntrinsicPat; -defm : MUBUF_StoreIntrinsicPat; -defm : MUBUF_StoreIntrinsicPat; -defm : MUBUF_StoreIntrinsicPat; -defm : MUBUF_StoreIntrinsicPat; -defm : MUBUF_StoreIntrinsicPat; -defm : MUBUF_StoreIntrinsicPat; -defm : MUBUF_StoreIntrinsicPat; -defm : MUBUF_StoreIntrinsicPat; -defm : MUBUF_StoreIntrinsicPat; -defm : MUBUF_StoreIntrinsicPat; -defm : MUBUF_StoreIntrinsicPat; -defm : MUBUF_StoreIntrinsicPat; -defm : MUBUF_StoreIntrinsicPat; -defm : MUBUF_StoreIntrinsicPat; -defm : MUBUF_StoreIntrinsicPat; -defm : MUBUF_StoreIntrinsicPat; -defm : MUBUF_StoreIntrinsicPat; -defm : MUBUF_StoreIntrinsicPat; -defm : MUBUF_StoreIntrinsicPat; +foreach vt = Reg32Types.types in { +defm : MUBUF_StoreIntrinsicPat; +} + +foreach vt = Reg64Types.types in { +defm : MUBUF_StoreIntrinsicPat; +} + +foreach vt = Reg96Types.types in { +defm : MUBUF_StoreIntrinsicPat; +} + +foreach vt = Reg128Types.types in { +defm : MUBUF_StoreIntrinsicPat; +} defm : MUBUF_StoreIntrinsicPat; defm : MUBUF_StoreIntrinsicPat; @@ -1751,7 +1739,7 @@ let OtherPredicates = [HasAtomicCSubNoRtnInsts] in defm : SIBufferAtomicPat<"SIbuffer_atomic_csub", i32, "BUFFER_ATOMIC_CSUB", ["noret"]>; let SubtargetPredicate = isGFX12Plus in { - defm : SIBufferAtomicPat_Common<"SIbuffer_atomic_fadd_bf16", v2bf16, "BUFFER_ATOMIC_PK_ADD_BF16_VBUFFER">; + defm : SIBufferAtomicPat_Common<"SIbuffer_atomic_fadd", v2bf16, "BUFFER_ATOMIC_PK_ADD_BF16_VBUFFER">; defm : SIBufferAtomicPat_Common<"SIbuffer_atomic_cond_sub_u32", i32, "BUFFER_ATOMIC_COND_SUB_U32_VBUFFER", ["ret"]>; let OtherPredicates = [HasAtomicCSubNoRtnInsts] in diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index d9a163ded6bab..c436e03806dc8 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -8833,17 +8833,9 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, case Intrinsic::amdgcn_raw_buffer_atomic_fadd: case Intrinsic::amdgcn_raw_ptr_buffer_atomic_fadd: return lowerRawBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_FADD); - case Intrinsic::amdgcn_raw_ptr_buffer_atomic_fadd_v2bf16: - case Intrinsic::amdgcn_raw_buffer_atomic_fadd_v2bf16: - return lowerRawBufferAtomicIntrin(Op, DAG, - AMDGPUISD::BUFFER_ATOMIC_FADD_BF16); case Intrinsic::amdgcn_struct_buffer_atomic_fadd: case Intrinsic::amdgcn_struct_ptr_buffer_atomic_fadd: return lowerStructBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_FADD); - case Intrinsic::amdgcn_struct_buffer_atomic_fadd_v2bf16: - case Intrinsic::amdgcn_struct_ptr_buffer_atomic_fadd_v2bf16: - return lowerStructBufferAtomicIntrin(Op, DAG, - AMDGPUISD::BUFFER_ATOMIC_FADD_BF16); case Intrinsic::amdgcn_raw_buffer_atomic_fmin: case Intrinsic::amdgcn_raw_ptr_buffer_atomic_fmin: return lowerRawBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_FMIN); @@ -15841,7 +15833,6 @@ bool SITargetLowering::isSDNodeSourceOfDivergence(const SDNode *N, case AMDGPUISD::BUFFER_ATOMIC_CMPSWAP: case AMDGPUISD::BUFFER_ATOMIC_CSUB: case AMDGPUISD::BUFFER_ATOMIC_FADD: - case AMDGPUISD::BUFFER_ATOMIC_FADD_BF16: case AMDGPUISD::BUFFER_ATOMIC_FMIN: case AMDGPUISD::BUFFER_ATOMIC_FMAX: // Target-specific read-modify-write atomics are sources of divergence. diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index 6682763210411..9b9ff4a5d6996 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -222,7 +222,6 @@ defm SIbuffer_atomic_inc : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_INC">; defm SIbuffer_atomic_dec : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_DEC">; defm SIbuffer_atomic_csub : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_CSUB">; defm SIbuffer_atomic_fadd : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_FADD">; -defm SIbuffer_atomic_fadd_bf16 : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_FADD_BF16">; defm SIbuffer_atomic_fmin : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_FMIN">; defm SIbuffer_atomic_fmax : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_FMAX">; defm SIbuffer_atomic_cond_sub_u32 : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_COND_SUB_U32">; diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index ba31027da92e8..e32bb8fec1f54 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -3892,7 +3892,6 @@ def G_AMDGPU_BUFFER_ATOMIC_XOR : BufferAtomicGenericInstruction; def G_AMDGPU_BUFFER_ATOMIC_INC : BufferAtomicGenericInstruction; def G_AMDGPU_BUFFER_ATOMIC_DEC : BufferAtomicGenericInstruction; def G_AMDGPU_BUFFER_ATOMIC_FADD : BufferAtomicGenericInstruction; -def G_AMDGPU_BUFFER_ATOMIC_FADD_BF16 : BufferAtomicGenericInstruction; def G_AMDGPU_BUFFER_ATOMIC_FMIN : BufferAtomicGenericInstruction; def G_AMDGPU_BUFFER_ATOMIC_FMAX : BufferAtomicGenericInstruction; diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td index 3666976cf82f8..a8efe2b2ba35e 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td @@ -586,7 +586,9 @@ class RegisterTypes reg_types> { def Reg16Types : RegisterTypes<[i16, f16, bf16]>; def Reg32Types : RegisterTypes<[i32, f32, v2i16, v2f16, v2bf16, p2, p3, p5, p6]>; -def Reg64Types : RegisterTypes<[i64, f64, v2i32, v2f32, v4i16, v4f16, v4bf16, p0]>; +def Reg64Types : RegisterTypes<[i64, f64, v2i32, v2f32, p0, v4i16, v4f16, v4bf16]>; +def Reg96Types : RegisterTypes<[v3i32, v3f32]>; +def Reg128Types : RegisterTypes<[v4i32, v4f32, v2i64, v2f64, v8i16, v8f16, v8bf16]>; let HasVGPR = 1 in { // VOP3 and VINTERP can access 256 lo and 256 hi registers. @@ -744,7 +746,7 @@ def Pseudo_SReg_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, let BaseClassOrder = 10000; } -def Pseudo_SReg_128 : SIRegisterClass<"AMDGPU", [v4i32, v2i64, v2f64, v8i16, v8f16, v8bf16], 32, +def Pseudo_SReg_128 : SIRegisterClass<"AMDGPU", Reg128Types.types, 32, (add PRIVATE_RSRC_REG)> { let isAllocatable = 0; let CopyCost = -1; @@ -815,7 +817,7 @@ def SRegOrLds_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v let HasSGPR = 1; } -def SGPR_64 : SIRegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, v4i16, v4f16, v4bf16], 32, +def SGPR_64 : SIRegisterClass<"AMDGPU", Reg64Types.types, 32, (add SGPR_64Regs)> { let CopyCost = 1; let AllocationPriority = 1; @@ -905,8 +907,8 @@ multiclass SRegClass; -defm "" : SRegClass<4, [v4i32, v4f32, v2i64, v2f64, v8i16, v8f16, v8bf16], SGPR_128Regs, TTMP_128Regs>; +defm "" : SRegClass<3, Reg96Types.types, SGPR_96Regs, TTMP_96Regs>; +defm "" : SRegClass<4, Reg128Types.types, SGPR_128Regs, TTMP_128Regs>; defm "" : SRegClass<5, [v5i32, v5f32], SGPR_160Regs, TTMP_160Regs>; defm "" : SRegClass<6, [v6i32, v6f32, v3i64, v3f64], SGPR_192Regs, TTMP_192Regs>; defm "" : SRegClass<7, [v7i32, v7f32], SGPR_224Regs, TTMP_224Regs>; @@ -958,8 +960,8 @@ multiclass VRegClass regTypes, dag regList> { defm VReg_64 : VRegClass<2, [i64, f64, v2i32, v2f32, v4f16, v4bf16, v4i16, p0, p1, p4], (add VGPR_64)>; -defm VReg_96 : VRegClass<3, [v3i32, v3f32], (add VGPR_96)>; -defm VReg_128 : VRegClass<4, [v4i32, v4f32, v2i64, v2f64, v8i16, v8f16, v8bf16], (add VGPR_128)>; +defm VReg_96 : VRegClass<3, Reg96Types.types, (add VGPR_96)>; +defm VReg_128 : VRegClass<4, Reg128Types.types, (add VGPR_128)>; defm VReg_160 : VRegClass<5, [v5i32, v5f32], (add VGPR_160)>; defm VReg_192 : VRegClass<6, [v6i32, v6f32, v3i64, v3f64], (add VGPR_192)>; diff --git a/llvm/test/CodeGen/AMDGPU/fp-atomics-gfx1200.ll b/llvm/test/CodeGen/AMDGPU/fp-atomics-gfx1200.ll index 2f29a1a9aa768..9f339af0f5580 100644 --- a/llvm/test/CodeGen/AMDGPU/fp-atomics-gfx1200.ll +++ b/llvm/test/CodeGen/AMDGPU/fp-atomics-gfx1200.ll @@ -321,7 +321,7 @@ define amdgpu_ps void @raw_buffer_atomic_add_v2f16_noret_offset(<2 x half> %val, ; ; GFX12-GISEL-LABEL: raw_buffer_atomic_add_v2f16_noret_offset: ; GFX12-GISEL: ; %bb.0: -; GFX12-GISEL-NEXT: buffer_atomic_pk_add_f16 v0, off, s[0:3], s4 offset:92 +; GFX12-GISEL-NEXT: buffer_atomic_pk_add_bf16 v0, off, s[0:3], s4 offset:92 ; GFX12-GISEL-NEXT: s_nop 0 ; GFX12-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX12-GISEL-NEXT: s_endpgm @@ -339,7 +339,7 @@ define amdgpu_ps void @raw_buffer_atomic_add_v2f16_noret(<2 x half> %val, <4 x i ; ; GFX12-GISEL-LABEL: raw_buffer_atomic_add_v2f16_noret: ; GFX12-GISEL: ; %bb.0: -; GFX12-GISEL-NEXT: buffer_atomic_pk_add_f16 v0, v1, s[0:3], s4 offen +; GFX12-GISEL-NEXT: buffer_atomic_pk_add_bf16 v0, v1, s[0:3], s4 offen ; GFX12-GISEL-NEXT: s_nop 0 ; GFX12-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX12-GISEL-NEXT: s_endpgm @@ -356,7 +356,7 @@ define amdgpu_ps <2 x half> @raw_buffer_atomic_add_v2f16_ret_offset(<2 x half> % ; ; GFX12-GISEL-LABEL: raw_buffer_atomic_add_v2f16_ret_offset: ; GFX12-GISEL: ; %bb.0: -; GFX12-GISEL-NEXT: buffer_atomic_pk_add_f16 v0, off, s[0:3], s4 offset:92 th:TH_ATOMIC_RETURN +; GFX12-GISEL-NEXT: buffer_atomic_pk_add_bf16 v0, off, s[0:3], s4 offset:92 th:TH_ATOMIC_RETURN ; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0 ; GFX12-GISEL-NEXT: ; return to shader part epilog %ret = call <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 92, i32 %soffset, i32 0) @@ -372,7 +372,7 @@ define amdgpu_ps <2 x half> @raw_buffer_atomic_add_v2f16_ret(<2 x half> %val, <4 ; ; GFX12-GISEL-LABEL: raw_buffer_atomic_add_v2f16_ret: ; GFX12-GISEL: ; %bb.0: -; GFX12-GISEL-NEXT: buffer_atomic_pk_add_f16 v0, v1, s[0:3], s4 offen th:TH_ATOMIC_RETURN +; GFX12-GISEL-NEXT: buffer_atomic_pk_add_bf16 v0, v1, s[0:3], s4 offen th:TH_ATOMIC_RETURN ; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0 ; GFX12-GISEL-NEXT: ; return to shader part epilog %ret = call <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) @@ -388,7 +388,7 @@ define amdgpu_ps float @struct_buffer_atomic_add_v2f16_ret(<2 x half> %val, <4 x ; ; GFX12-GISEL-LABEL: struct_buffer_atomic_add_v2f16_ret: ; GFX12-GISEL: ; %bb.0: -; GFX12-GISEL-NEXT: buffer_atomic_pk_add_f16 v0, v[1:2], s[0:3], s4 idxen offen th:TH_ATOMIC_RETURN +; GFX12-GISEL-NEXT: buffer_atomic_pk_add_bf16 v0, v[1:2], s[0:3], s4 idxen offen th:TH_ATOMIC_RETURN ; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0 ; GFX12-GISEL-NEXT: ; return to shader part epilog %orig = call <2 x half> @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) @@ -406,7 +406,7 @@ define amdgpu_ps void @struct_buffer_atomic_add_v2f16_noret(<2 x half> %val, <4 ; ; GFX12-GISEL-LABEL: struct_buffer_atomic_add_v2f16_noret: ; GFX12-GISEL: ; %bb.0: -; GFX12-GISEL-NEXT: buffer_atomic_pk_add_f16 v0, v[1:2], s[0:3], s4 idxen offen +; GFX12-GISEL-NEXT: buffer_atomic_pk_add_bf16 v0, v[1:2], s[0:3], s4 idxen offen ; GFX12-GISEL-NEXT: s_nop 0 ; GFX12-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX12-GISEL-NEXT: s_endpgm diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ptr.buffer.atomic.fadd_rtn_errors.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ptr.buffer.atomic.fadd_rtn_errors.ll index a3b83c346c1bb..f8caf84d5c51a 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ptr.buffer.atomic.fadd_rtn_errors.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ptr.buffer.atomic.fadd_rtn_errors.ll @@ -17,10 +17,12 @@ ; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=null %t/struct-ret-v2f16-error.ll 2>&1 | FileCheck -check-prefix=ERR-STRUCT-V2F16-GISEL %s ; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=null %t/raw-ret-v2bf16-error.ll 2>&1 | FileCheck -check-prefix=ERR-RAW-V2BF16-GISEL %s ; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=null %t/struct-ret-v2bf16-error.ll 2>&1 | FileCheck -check-prefix=ERR-STRUCT-V2BF16-GISEL %s -; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -filetype=null %t/raw-ret-v2bf16-error.ll 2>&1 | FileCheck -check-prefix=ERR-RAW-V2BF16-GISEL %s -; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -filetype=null %t/struct-ret-v2bf16-error.ll 2>&1 | FileCheck -check-prefix=ERR-STRUCT-V2BF16-GISEL %s -; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -filetype=null %t/raw-ret-v2bf16-error.ll 2>&1 | FileCheck -check-prefix=ERR-RAW-V2BF16-GISEL %s -; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -filetype=null %t/struct-ret-v2bf16-error.ll 2>&1 | FileCheck -check-prefix=ERR-STRUCT-V2BF16-GISEL %s + +; FIXME: These should fail when bfloat support is handled correctly +; xUN: not --crash llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -filetype=null %t/raw-ret-v2bf16-error.ll 2>&1 | FileCheck -check-prefix=ERR-RAW-V2BF16-GISEL %s +; xUN: not --crash llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -filetype=null %t/struct-ret-v2bf16-error.ll 2>&1 | FileCheck -check-prefix=ERR-STRUCT-V2BF16-GISEL %s +; xUN: not --crash llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -filetype=null %t/raw-ret-v2bf16-error.ll 2>&1 | FileCheck -check-prefix=ERR-RAW-V2BF16-GISEL %s +; xUN: not --crash llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -filetype=null %t/struct-ret-v2bf16-error.ll 2>&1 | FileCheck -check-prefix=ERR-STRUCT-V2BF16-GISEL %s ; Make sure buffer fadd atomics with return values are not selected ; for gfx908 where they do not work. diff --git a/llvm/test/Transforms/Inline/ML/bypass.ll b/llvm/test/Transforms/Inline/ML/bypass.ll new file mode 100644 index 0000000000000..ccdefdcc93bfe --- /dev/null +++ b/llvm/test/Transforms/Inline/ML/bypass.ll @@ -0,0 +1,78 @@ +; REQUIRES: have_tflite +; RUN: rm -rf %t.runfiles %t.tflite %t.model_out +; RUN: mkdir %t.runfiles +; RUN: cp %S/../../../../lib/Analysis/models/gen-inline-oz-test-model.py %t.runfiles +; RUN: cp %S/../../../../lib/Analysis/models/saved-model-to-tflite.py %t.runfiles +; RUN: %python %t.runfiles/gen-inline-oz-test-model.py %t.model_out never +; RUN: %python %t.runfiles/saved-model-to-tflite.py %t.model_out %t.tflite + +; When running O2, we expect both callers to inline callee. +; RUN: opt < %s -passes='default' -inline-threshold=0 -hot-callsite-threshold=100 -S | FileCheck %s --check-prefixes=O2-HOT,O2-COLD + +; The ML model we use always blocks inlining (by construction) +; RUN: opt < %s -passes='default' -inline-threshold=0 -hot-callsite-threshold=100 \ +; RUN: -enable-ml-inliner=development -ml-inliner-model-under-training=%t.tflite \ +; RUN: -S | FileCheck %s --check-prefixes=ML-HOT,ML-COLD + +; When bypassing ML for non-cold callers, the hot caller will have its callee inlined, but the cold one won't +; RUN: opt < %s -passes='default' -inline-threshold=0 -hot-callsite-threshold=100 \ +; RUN: -enable-ml-inliner=development -ml-inliner-model-under-training=%t.tflite \ +; RUN: -ml-inliner-skip-policy=if-caller-not-cold -S | FileCheck %s --check-prefixes=O2-HOT,ML-COLD + +declare void @extern() + +define i32 @callee(i32 %x) { + %x1 = add i32 %x, 1 + %x2 = add i32 %x1, 1 + %x3 = add i32 %x2, 1 + call void @extern() + call void @extern() + ret i32 %x3 +} + +define i32 @hot_caller(i32 %y1) !prof !15 { + %y = call i32 @callee(i32 %y1), !prof !16 + ret i32 %y +} + +define i32 @cold_caller(i32 %y1) !prof !17 { + %y = call i32 @callee(i32 %y1), !prof !16 + ret i32 %y +} + + +!llvm.module.flags = !{!1} +!15 = !{!"function_entry_count", i64 300} +!16 = !{!"branch_weights", i64 300} +!17 = !{!"function_entry_count", i64 1} + +!1 = !{i32 1, !"ProfileSummary", !2} +!2 = !{!3, !4, !5, !6, !7, !8, !9, !10} +!3 = !{!"ProfileFormat", !"SampleProfile"} +!4 = !{!"TotalCount", i64 10000} +!5 = !{!"MaxCount", i64 1000} +!6 = !{!"MaxInternalCount", i64 1} +!7 = !{!"MaxFunctionCount", i64 1000} +!8 = !{!"NumCounts", i64 3} +!9 = !{!"NumFunctions", i64 3} +!10 = !{!"DetailedSummary", !11} +!11 = !{!12, !13, !14} +!12 = !{i32 10000, i64 100, i32 1} +!13 = !{i32 999000, i64 100, i32 1} +!14 = !{i32 999999, i64 1, i32 2} + +; O2-HOT-LABEL: @hot_caller +; O2-HOT-NOT: call i32 @callee +; O2-HOT: call void @extern +; O2-HOT-NEXT: call void @extern +; O2-HOT-NEXT: ret +; O2-COLD-LABEL: @cold_caller +; O2-COLD-NOT: call i32 @callee +; O2-COLD: call void @extern +; O2-COLD-NEXT: call void @extern +; O2-COLD-NEXT: ret + +; ML-HOT-LABEL: @hot_caller +; ML-HOT-NEXT: call i32 @callee +; ML-COLD-LABEL: @cold_caller +; ML-COLD-NEXT: call i32 @callee \ No newline at end of file diff --git a/llvm/utils/git/code-format-helper.py b/llvm/utils/git/code-format-helper.py index f1207026704e8..d60d4131bc94b 100755 --- a/llvm/utils/git/code-format-helper.py +++ b/llvm/utils/git/code-format-helper.py @@ -216,6 +216,17 @@ def format_run(self, changed_files: List[str], args: FormatArgs) -> Optional[str cf_cmd.append(args.start_rev) cf_cmd.append(args.end_rev) + # Gather the extension of all modified files and pass them explicitly to git-clang-format. + # This prevents git-clang-format from applying its own filtering rules on top of ours. + extensions = set() + for file in cpp_files: + _, ext = os.path.splitext(file) + extensions.add( + ext.strip(".") + ) # Exclude periods since git-clang-format takes extensions without them + cf_cmd.append("--extensions") + cf_cmd.append("'{}'".format(",".join(extensions))) + cf_cmd.append("--") cf_cmd += cpp_files diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseIterationToScf.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseIterationToScf.cpp index 62887c75c872b..4224925147c84 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseIterationToScf.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseIterationToScf.cpp @@ -34,6 +34,20 @@ convertIterSpaceType(IterSpaceType itSp, SmallVectorImpl &fields) { return success(); } +static std::optional +convertIteratorType(IteratorType itTp, SmallVectorImpl &fields) { + // The actually Iterator Values (that are updated every iteration). + auto idxTp = IndexType::get(itTp.getContext()); + // TODO: handle batch dimension. + assert(itTp.getEncoding().getBatchLvlRank() == 0); + if (!itTp.isUnique()) { + // Segment high for non-unique iterator. + fields.push_back(idxTp); + } + fields.push_back(idxTp); + return success(); +} + namespace { /// Sparse codegen rule for number of entries operator. @@ -57,10 +71,114 @@ class ExtractIterSpaceConverter } }; +class SparseIterateOpConverter : public OneToNOpConversionPattern { +public: + using OneToNOpConversionPattern::OneToNOpConversionPattern; + LogicalResult + matchAndRewrite(IterateOp op, OpAdaptor adaptor, + OneToNPatternRewriter &rewriter) const override { + if (!op.getCrdUsedLvls().empty()) + return rewriter.notifyMatchFailure( + op, "non-empty coordinates list not implemented."); + + Location loc = op.getLoc(); + + auto iterSpace = SparseIterationSpace::fromValues( + op.getIterSpace().getType(), adaptor.getIterSpace(), 0); + + std::unique_ptr it = + iterSpace.extractIterator(rewriter, loc); + + if (it->iteratableByFor()) { + auto [lo, hi] = it->genForCond(rewriter, loc); + Value step = constantIndex(rewriter, loc, 1); + SmallVector ivs; + for (ValueRange inits : adaptor.getInitArgs()) + llvm::append_range(ivs, inits); + scf::ForOp forOp = rewriter.create(loc, lo, hi, step, ivs); + + Block *loopBody = op.getBody(); + OneToNTypeMapping bodyTypeMapping(loopBody->getArgumentTypes()); + if (failed(typeConverter->convertSignatureArgs( + loopBody->getArgumentTypes(), bodyTypeMapping))) + return failure(); + rewriter.applySignatureConversion(loopBody, bodyTypeMapping); + + rewriter.eraseBlock(forOp.getBody()); + Region &dstRegion = forOp.getRegion(); + rewriter.inlineRegionBefore(op.getRegion(), dstRegion, dstRegion.end()); + + auto yieldOp = + llvm::cast(forOp.getBody()->getTerminator()); + + rewriter.setInsertionPointToEnd(forOp.getBody()); + // replace sparse_tensor.yield with scf.yield. + rewriter.create(loc, yieldOp.getResults()); + rewriter.eraseOp(yieldOp); + + const OneToNTypeMapping &resultMapping = adaptor.getResultMapping(); + rewriter.replaceOp(op, forOp.getResults(), resultMapping); + } else { + SmallVector ivs; + llvm::append_range(ivs, it->getCursor()); + for (ValueRange inits : adaptor.getInitArgs()) + llvm::append_range(ivs, inits); + + assert(llvm::all_of(ivs, [](Value v) { return v != nullptr; })); + + TypeRange types = ValueRange(ivs).getTypes(); + auto whileOp = rewriter.create(loc, types, ivs); + SmallVector l(types.size(), op.getIterator().getLoc()); + + // Generates loop conditions. + Block *before = rewriter.createBlock(&whileOp.getBefore(), {}, types, l); + rewriter.setInsertionPointToStart(before); + ValueRange bArgs = before->getArguments(); + auto [whileCond, remArgs] = it->genWhileCond(rewriter, loc, bArgs); + assert(remArgs.size() == adaptor.getInitArgs().size()); + rewriter.create(loc, whileCond, before->getArguments()); + + // Generates loop body. + Block *loopBody = op.getBody(); + OneToNTypeMapping bodyTypeMapping(loopBody->getArgumentTypes()); + if (failed(typeConverter->convertSignatureArgs( + loopBody->getArgumentTypes(), bodyTypeMapping))) + return failure(); + rewriter.applySignatureConversion(loopBody, bodyTypeMapping); + + Region &dstRegion = whileOp.getAfter(); + // TODO: handle uses of coordinate! + rewriter.inlineRegionBefore(op.getRegion(), dstRegion, dstRegion.end()); + ValueRange aArgs = whileOp.getAfterArguments(); + auto yieldOp = llvm::cast( + whileOp.getAfterBody()->getTerminator()); + + rewriter.setInsertionPointToEnd(whileOp.getAfterBody()); + + aArgs = it->linkNewScope(aArgs); + ValueRange nx = it->forward(rewriter, loc); + SmallVector yields; + llvm::append_range(yields, nx); + llvm::append_range(yields, yieldOp.getResults()); + + // replace sparse_tensor.yield with scf.yield. + rewriter.eraseOp(yieldOp); + rewriter.create(loc, yields); + + const OneToNTypeMapping &resultMapping = adaptor.getResultMapping(); + rewriter.replaceOp( + op, whileOp.getResults().drop_front(it->getCursor().size()), + resultMapping); + } + return success(); + } +}; + } // namespace mlir::SparseIterationTypeConverter::SparseIterationTypeConverter() { addConversion([](Type type) { return type; }); + addConversion(convertIteratorType); addConversion(convertIterSpaceType); addSourceMaterialization([](OpBuilder &builder, IterSpaceType spTp, @@ -74,5 +192,6 @@ mlir::SparseIterationTypeConverter::SparseIterationTypeConverter() { void mlir::populateLowerSparseIterationToSCFPatterns( TypeConverter &converter, RewritePatternSet &patterns) { - patterns.add(converter, patterns.getContext()); + patterns.add( + converter, patterns.getContext()); } diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/Utils/LoopEmitter.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/Utils/LoopEmitter.cpp index 05883f1cefdf3..fe0e515a2d180 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/Utils/LoopEmitter.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/Utils/LoopEmitter.cpp @@ -542,7 +542,7 @@ std::pair LoopEmitter::emitWhileLoopOverTensorsAtLvls( } // The remaining block arguments are user-provided reduction values and an // optional universal index. Make sure their sizes match. - assert(bArgs.size() == reduc.size() + needsUniv ? 1 : 0); + assert(bArgs.size() == reduc.size() + needsUniv); builder.create(loc, whileCond, before->getArguments()); // Generates loop body. @@ -560,7 +560,7 @@ std::pair LoopEmitter::emitWhileLoopOverTensorsAtLvls( } // In-place update on reduction variable. - assert(aArgs.size() == reduc.size() + needsUniv ? 1 : 0); + assert(aArgs.size() == reduc.size() + needsUniv); for (unsigned i = 0, e = reduc.size(); i < e; i++) reduc[i] = aArgs[i]; diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/Utils/SparseTensorIterator.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/Utils/SparseTensorIterator.cpp index be8e15d6ae6f4..ef95fcc84bd90 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/Utils/SparseTensorIterator.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/Utils/SparseTensorIterator.cpp @@ -331,6 +331,13 @@ class TrivialIterator : public ConcreteIterator { TrivialIterator(const SparseTensorLevel &stl) : ConcreteIterator(stl, IterKind::kTrivial, /*itValCnt=*/1) {} + TrivialIterator(OpBuilder &b, Location l, const SparseTensorLevel &stl, + Value posLo, Value posHi) + : ConcreteIterator(stl, IterKind::kTrivial, /*itValCnt=*/1), posLo(posLo), + posHi(posHi) { + seek(posLo); + } + std::string getDebugInterfacePrefix() const override { return std::string("trivial<") + stl.toString() + ">"; } @@ -420,6 +427,14 @@ class DedupIterator : public ConcreteIterator { : ConcreteIterator(stl, IterKind::kDedup, /*itValCnt=*/2) { assert(!stl.isUnique()); } + + DedupIterator(OpBuilder &b, Location l, const SparseTensorLevel &stl, + Value posLo, Value posHi) + : ConcreteIterator(stl, IterKind::kDedup, /*itValCnt=*/2), posHi(posHi) { + assert(!stl.isUnique()); + seek({posLo, genSegmentHigh(b, l, posLo)}); + } + // For LLVM-style RTTI. static bool classof(const SparseIterator *from) { return from->kind == IterKind::kDedup; @@ -1532,6 +1547,11 @@ SparseIterationSpace mlir::sparse_tensor::SparseIterationSpace::fromValues( return space; } +std::unique_ptr +SparseIterationSpace::extractIterator(OpBuilder &b, Location l) const { + return makeSimpleIterator(b, l, *this); +} + //===----------------------------------------------------------------------===// // SparseIterator factory functions. //===----------------------------------------------------------------------===// @@ -1590,6 +1610,26 @@ sparse_tensor::makeSynLevelAndIterator(Value sz, unsigned tid, unsigned lvl, return std::make_pair(std::move(stl), std::move(it)); } +std::unique_ptr +sparse_tensor::makeSimpleIterator(OpBuilder &b, Location l, + const SparseIterationSpace &iterSpace) { + // assert(iterSpace.getSpaceDim() == 1); + std::unique_ptr ret; + if (!iterSpace.isUnique()) { + // We always dedupliate the non-unique level, but we should optimize it away + // if possible. + ret = std::make_unique(b, l, iterSpace.getLastLvl(), + iterSpace.getBoundLo(), + iterSpace.getBoundHi()); + } else { + ret = std::make_unique(b, l, iterSpace.getLastLvl(), + iterSpace.getBoundLo(), + iterSpace.getBoundHi()); + } + ret->setSparseEmitStrategy(SparseEmitStrategy::kFunctional); + return ret; +} + std::unique_ptr sparse_tensor::makeSimpleIterator(const SparseTensorLevel &stl, SparseEmitStrategy strategy) { diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/Utils/SparseTensorIterator.h b/mlir/lib/Dialect/SparseTensor/Transforms/Utils/SparseTensorIterator.h index 17636af2b2f9d..91f363db93f1d 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/Utils/SparseTensorIterator.h +++ b/mlir/lib/Dialect/SparseTensor/Transforms/Utils/SparseTensorIterator.h @@ -132,6 +132,10 @@ class SparseIterationSpace { Value getBoundLo() const { return bound.first; } Value getBoundHi() const { return bound.second; } + // Extract an iterator to iterate over the sparse iteration space. + std::unique_ptr extractIterator(OpBuilder &b, + Location l) const; + private: SmallVector> lvls; std::pair bound; @@ -192,6 +196,13 @@ class SparseIterator { crd = nullptr; } + // Reconstructs a iteration space directly from the provided ValueRange. + static std::unique_ptr + fromValues(IteratorType dstTp, ValueRange values, unsigned tid); + + // The inverse operation of `fromValues`. + SmallVector toValues() const { llvm_unreachable("Not implemented"); } + // // Iterator properties. // @@ -345,12 +356,21 @@ std::unique_ptr makeSparseTensorLevel(OpBuilder &b, unsigned tid, Level lvl); -/// Helper function to create a TensorLevel object from given `tensor`. +/// Helper function to create a TensorLevel object from given ValueRange. std::unique_ptr makeSparseTensorLevel(LevelType lt, Value sz, ValueRange buffers, unsigned tid, Level l); -/// Helper function to create a simple SparseIterator object that iterates -/// over the SparseTensorLevel. + +/// Helper function to create a simple SparseIterator object that iterate +/// over the entire iteration space. +std::unique_ptr +makeSimpleIterator(OpBuilder &b, Location l, + const SparseIterationSpace &iterSpace); + +/// Helper function to create a simple SparseIterator object that iterate +/// over the sparse tensor level. +/// TODO: switch to `SparseIterationSpace` (which support N-D iterator) when +/// feature complete. std::unique_ptr makeSimpleIterator( const SparseTensorLevel &stl, SparseEmitStrategy strategy = SparseEmitStrategy::kFunctional); diff --git a/mlir/test/Dialect/SparseTensor/sparse_iteration_to_scf.mlir b/mlir/test/Dialect/SparseTensor/sparse_iteration_to_scf.mlir index 5fcd661bb69b2..77a0e89dc7c81 100644 --- a/mlir/test/Dialect/SparseTensor/sparse_iteration_to_scf.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_iteration_to_scf.mlir @@ -1,4 +1,5 @@ // RUN: mlir-opt %s --lower-sparse-iteration-to-scf | FileCheck %s +// RUN: mlir-opt %s --sparse-space-collapse --lower-sparse-iteration-to-scf | FileCheck %s --check-prefix COLLAPSED #COO = #sparse_tensor.encoding<{ map = (i, j) -> ( @@ -7,17 +8,44 @@ ) }> -// CHECK-LABEL: func.func @sparse_1D_space( -// CHECK-SAME: %[[VAL_0:.*]]: tensor) -> !sparse_tensor.iter_space<#sparse{{[0-9]*}}, lvls = 0> { -// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index -// CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index -// CHECK-DAG: %[[LVL_SIZE:.*]] = sparse_tensor.lvl %[[VAL_0]], %[[C0]] : tensor -// CHECK: %[[POS_MEM:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor to memref -// CHECK: %[[CRD_MEM:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor to memref -// CHECK: %[[POS_LO:.*]] = memref.load %[[POS_MEM]]{{\[}}%[[C0]]] : memref -// CHECK: %[[POS_HI:.*]] = memref.load %[[POS_MEM]]{{\[}}%[[C1]]] : memref -// CHECK: %[[ITER_SPACE:.*]] = builtin.unrealized_conversion_cast %[[POS_MEM]], %[[CRD_MEM]], %[[LVL_SIZE]], %[[POS_LO]], %[[POS_HI]] -func.func @sparse_1D_space(%sp : tensor) -> !sparse_tensor.iter_space<#COO, lvls = 0> { - %l1 = sparse_tensor.extract_iteration_space %sp lvls = 0 : tensor -> !sparse_tensor.iter_space<#COO, lvls = 0> - return %l1 : !sparse_tensor.iter_space<#COO, lvls = 0> +// CHECK-LABEL: @sparse_iteration_to_scf +// // deduplication +// CHECK: scf.while {{.*}} { +// CHECK: } do { +// CHECK: } +// CHECK: scf.while {{.*}} { +// CHECK: } do { +// // actual computation +// CHECK: scf.for {{.*}} { +// CHECK: arith.addi +// CHECK: } +// // deduplication +// CHECK: scf.while {{.*}} { +// CHECK: } do { +// CHECK: } +// CHECK: scf.yield +// CHECK: } +// CHECK: return + +// COLLAPSED-LABEL: @sparse_iteration_to_scf +// COLLAPSED: %[[RET:.*]] = scf.for {{.*}} { +// COLLAPSED: %[[VAL:.*]] = arith.addi +// COLLAPSED: scf.yield %[[VAL]] : index +// COLLAPSED: } +// COLLAPSED: return %[[RET]] : index +func.func @sparse_iteration_to_scf(%sp : tensor<4x8xf32, #COO>) -> index { + %i = arith.constant 0 : index + %c1 = arith.constant 1 : index + %l1 = sparse_tensor.extract_iteration_space %sp lvls = 0 + : tensor<4x8xf32, #COO> -> !sparse_tensor.iter_space<#COO, lvls = 0> + %r1 = sparse_tensor.iterate %it1 in %l1 iter_args(%outer = %i): !sparse_tensor.iter_space<#COO, lvls = 0 to 1> -> index { + %l2 = sparse_tensor.extract_iteration_space %sp at %it1 lvls = 1 + : tensor<4x8xf32, #COO>, !sparse_tensor.iterator<#COO, lvls = 0 to 1> -> !sparse_tensor.iter_space<#COO, lvls = 1> + %r2 = sparse_tensor.iterate %it2 in %l2 iter_args(%inner = %outer): !sparse_tensor.iter_space<#COO, lvls = 1 to 2> -> index { + %k = arith.addi %inner, %c1 : index + sparse_tensor.yield %k : index + } + sparse_tensor.yield %r2 : index + } + return %r1 : index } diff --git a/mlir/test/mlir-tblgen/rewriter-static-matcher.td b/mlir/test/mlir-tblgen/rewriter-static-matcher.td index 2907923cb28e4..7a84dfd5706b1 100644 --- a/mlir/test/mlir-tblgen/rewriter-static-matcher.td +++ b/mlir/test/mlir-tblgen/rewriter-static-matcher.td @@ -35,6 +35,16 @@ def COp : NS_Op<"c_op", []> { let results = (outs AnyInteger); } +def DOp : NS_Op<"d_op", []> { + let arguments = (ins + Variadic:$any_integer + ); + + let results = (outs AnyInteger); +} + +def Foo : NativeCodeCall<"foo($_builder, $0)">; + // Test static matcher for duplicate DagNode // --- @@ -53,3 +63,8 @@ def : Pat<(AOp (BOp I32Attr:$attr, I32:$int)), // CHECK: if(::mlir::failed([[$DAG_MATCHER]](rewriter, op1, tblgen_ops def : Pat<(COp $_, (BOp I32Attr:$attr, I32:$int)), (COp $attr, $int)>; + +// CHECK: auto [[$VAR:.*]] = foo( +// CHECK: ::llvm::SmallVector<::mlir::Value, 4> [[$ARR:tblgen_variadic_values_.*]]; +// CHECK: [[$ARR]].push_back([[$VAR]]); +def : Pat<(AOp $x), (DOp (variadic (Foo $x)))>; diff --git a/mlir/tools/mlir-tblgen/RewriterGen.cpp b/mlir/tools/mlir-tblgen/RewriterGen.cpp index d8e16d98fd756..b8a3d5b49938e 100644 --- a/mlir/tools/mlir-tblgen/RewriterGen.cpp +++ b/mlir/tools/mlir-tblgen/RewriterGen.cpp @@ -1261,20 +1261,23 @@ std::string PatternEmitter::handleResultPattern(DagNode resultTree, std::string PatternEmitter::handleVariadic(DagNode tree, int depth) { assert(tree.isVariadic()); + std::string output; + llvm::raw_string_ostream oss(output); auto name = std::string(formatv("tblgen_variadic_values_{0}", nextValueId++)); symbolInfoMap.bindValue(name); - os << "::llvm::SmallVector<::mlir::Value, 4> " << name << ";\n"; + oss << "::llvm::SmallVector<::mlir::Value, 4> " << name << ";\n"; for (int i = 0, e = tree.getNumArgs(); i != e; ++i) { if (auto child = tree.getArgAsNestedDag(i)) { - os << name << ".push_back(" << handleResultPattern(child, i, depth + 1) - << ");\n"; + oss << name << ".push_back(" << handleResultPattern(child, i, depth + 1) + << ");\n"; } else { - os << name << ".push_back(" - << handleOpArgument(tree.getArgAsLeaf(i), tree.getArgName(i)) - << ");\n"; + oss << name << ".push_back(" + << handleOpArgument(tree.getArgAsLeaf(i), tree.getArgName(i)) + << ");\n"; } } + os << oss.str(); return name; } diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp index e678213df18ce..e6643d3260eb4 100644 --- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp +++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp @@ -58,12 +58,12 @@ #endif #if defined(__has_include) -#if __has_include("hsa/hsa.h") -#include "hsa/hsa.h" -#include "hsa/hsa_ext_amd.h" -#elif __has_include("hsa.h") +#if __has_include("hsa.h") #include "hsa.h" #include "hsa_ext_amd.h" +#elif __has_include("hsa/hsa.h") +#include "hsa/hsa.h" +#include "hsa/hsa_ext_amd.h" #endif #else #include "hsa/hsa.h"