diff --git a/clang/include/clang/AST/Stmt.h b/clang/include/clang/AST/Stmt.h index bbd7634bcc3bf..5f8580e8752de 100644 --- a/clang/include/clang/AST/Stmt.h +++ b/clang/include/clang/AST/Stmt.h @@ -152,6 +152,11 @@ class alignas(void *) Stmt { LLVM_PREFERRED_TYPE(bool) unsigned HasFPFeatures : 1; + /// True if the compound statement has one or more pragmas that set some + /// atomic options. + LLVM_PREFERRED_TYPE(bool) + unsigned HasAtomicOptions : 1; + unsigned NumStmts; }; @@ -1603,7 +1608,8 @@ class NullStmt : public Stmt { /// CompoundStmt - This represents a group of statements like { stmt stmt }. class CompoundStmt final : public Stmt, - private llvm::TrailingObjects { + private llvm::TrailingObjects { friend class ASTStmtReader; friend TrailingObjects; @@ -1614,7 +1620,8 @@ class CompoundStmt final SourceLocation RBraceLoc; CompoundStmt(ArrayRef Stmts, FPOptionsOverride FPFeatures, - SourceLocation LB, SourceLocation RB); + AtomicOptionsOverride AtomicOptions, SourceLocation LB, + SourceLocation RB); explicit CompoundStmt(EmptyShell Empty) : Stmt(CompoundStmtClass, Empty) {} void setStmts(ArrayRef Stmts); @@ -1625,13 +1632,24 @@ class CompoundStmt final *getTrailingObjects() = F; } + /// Set AtomicOptionsOverride in trailing storage. Used only by Serialization. + void setStoredAtomicOptions(AtomicOptionsOverride A) { + assert(hasStoredAtomicOptions()); + *getTrailingObjects() = A; + } + size_t numTrailingObjects(OverloadToken) const { return CompoundStmtBits.NumStmts; } + size_t numTrailingObjects(OverloadToken) const { + return CompoundStmtBits.HasFPFeatures; + } + public: static CompoundStmt *Create(const ASTContext &C, ArrayRef Stmts, - FPOptionsOverride FPFeatures, SourceLocation LB, + FPOptionsOverride FPFeatures, + AtomicOptionsOverride, SourceLocation LB, SourceLocation RB); // Build an empty compound statement with a location. @@ -1641,16 +1659,20 @@ class CompoundStmt final : Stmt(CompoundStmtClass), LBraceLoc(Loc), RBraceLoc(EndLoc) { CompoundStmtBits.NumStmts = 0; CompoundStmtBits.HasFPFeatures = 0; + CompoundStmtBits.HasAtomicOptions = 0; } // Build an empty compound statement. static CompoundStmt *CreateEmpty(const ASTContext &C, unsigned NumStmts, - bool HasFPFeatures); + bool HasFPFeatures, bool HasAtomicOptions); bool body_empty() const { return CompoundStmtBits.NumStmts == 0; } unsigned size() const { return CompoundStmtBits.NumStmts; } bool hasStoredFPFeatures() const { return CompoundStmtBits.HasFPFeatures; } + bool hasStoredAtomicOptions() const { + return CompoundStmtBits.HasAtomicOptions; + } /// Get FPOptionsOverride from trailing storage. FPOptionsOverride getStoredFPFeatures() const { @@ -1663,6 +1685,18 @@ class CompoundStmt final return hasStoredFPFeatures() ? getStoredFPFeatures() : FPOptionsOverride(); } + /// Get AtomicOptionsOverride from trailing storage. + AtomicOptionsOverride getStoredAtomicOptions() const { + assert(hasStoredAtomicOptions()); + return *getTrailingObjects(); + } + + /// Get the stored AtomicOptionsOverride or default if not stored. + AtomicOptionsOverride getStoredAtomicOptionsOrDefault() const { + return hasStoredAtomicOptions() ? getStoredAtomicOptions() + : AtomicOptionsOverride(); + } + using body_iterator = Stmt **; using body_range = llvm::iterator_range; diff --git a/clang/include/clang/AST/TextNodeDumper.h b/clang/include/clang/AST/TextNodeDumper.h index 39dd1f515c9eb..f9b47df8db70e 100644 --- a/clang/include/clang/AST/TextNodeDumper.h +++ b/clang/include/clang/AST/TextNodeDumper.h @@ -157,6 +157,7 @@ class TextNodeDumper const char *getCommandName(unsigned CommandID); void printFPOptions(FPOptionsOverride FPO); + void printAtomicOptions(AtomicOptionsOverride AO); void dumpAPValueChildren(const APValue &Value, QualType Ty, const APValue &(*IdxToChildFun)(const APValue &, diff --git a/clang/include/clang/Basic/AtomicOptions.def b/clang/include/clang/Basic/AtomicOptions.def new file mode 100644 index 0000000000000..4cf2dab581c8b --- /dev/null +++ b/clang/include/clang/Basic/AtomicOptions.def @@ -0,0 +1,19 @@ +//===--- AtomicOptions.def - Atomic Options database -------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// This file defines the Atomic language options. Users of this file +// must define the OPTION macro to make use of this information. +#ifndef OPTION +# error Define the OPTION macro to handle atomic language options +#endif + +// OPTION(name, type, width, previousName) +OPTION(NoRemoteMemory, bool, 1, First) +OPTION(NoFineGrainedMemory, bool, 1, NoRemoteMemory) +OPTION(IgnoreDenormalMode, bool, 1, NoFineGrainedMemory) + +#undef OPTION \ No newline at end of file diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td index 3d8240f8357b4..38f0a0365a830 100644 --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -301,6 +301,13 @@ def err_drv_invalid_int_value : Error<"invalid integral value '%1' in '%0'">; def err_drv_invalid_value_with_suggestion : Error< "invalid value '%1' in '%0', expected one of: %2">; def err_drv_alignment_not_power_of_two : Error<"alignment is not a power of 2 in '%0'">; + +def err_drv_invalid_atomic_option : Error< + "invalid argument '%0' to -fatomic=; must be a " + "comma-separated list of key:value pairs, where allowed keys are " + "'no_fine_grained_memory', 'no_remote_memory', 'ignore_denormal_mode', " + "and values are 'on' or 'off', and each key must be unique">; + def err_drv_invalid_remap_file : Error< "invalid option '%0' not of the form ;">; def err_drv_invalid_gcc_install_dir : Error<"'%0' does not contain a GCC installation">; diff --git a/clang/include/clang/Basic/DiagnosticParseKinds.td b/clang/include/clang/Basic/DiagnosticParseKinds.td index f8d50d12bb935..647d1e208472c 100644 --- a/clang/include/clang/Basic/DiagnosticParseKinds.td +++ b/clang/include/clang/Basic/DiagnosticParseKinds.td @@ -1272,6 +1272,9 @@ def warn_pragma_init_seg_unsupported_target : Warning< def err_pragma_file_or_compound_scope : Error< "'#pragma %0' can only appear at file scope or at the start of a " "compound statement">; +// - #pragma restricted to start of compound statement +def err_pragma_compound_scope : Error< + "'#pragma %0' can only appear at the start of a compound statement">; // - #pragma stdc unknown def ext_stdc_pragma_ignored : ExtWarn<"unknown pragma in STDC namespace">, InGroup; @@ -1655,6 +1658,12 @@ def err_pragma_fp_invalid_argument : Error< "'ignore', 'maytrap' or 'strict'|" "'source', 'double' or 'extended'}2">; +def err_pragma_atomic_invalid_option : Error< + "%select{invalid|missing}0 option%select{ %1|}0; expected 'no_remote_memory', 'no_fine_grained_memory', or 'ignore_denormal_mode'">; + +def err_pragma_atomic_invalid_argument : Error< + "unexpected argument '%0' to '#pragma clang atomic %1'; expected 'on' or 'off'">; + def err_pragma_invalid_keyword : Error< "invalid argument; expected 'enable'%select{|, 'full'}0%select{|, 'assume_safety'}1 or 'disable'">; def err_pragma_pipeline_invalid_keyword : Error< diff --git a/clang/include/clang/Basic/LangOptions.h b/clang/include/clang/Basic/LangOptions.h index 91f1c2f2e6239..617b0ed74603c 100644 --- a/clang/include/clang/Basic/LangOptions.h +++ b/clang/include/clang/Basic/LangOptions.h @@ -579,6 +579,10 @@ class LangOptions : public LangOptionsBase { // WebAssembly target. bool NoWasmOpt = false; + /// The default atomic codegen options specified by command line in the + /// format of key:{on|off}. + std::vector AtomicOptionsAsWritten; + LangOptions(); /// Set language defaults for the given input language and @@ -1034,6 +1038,169 @@ inline void FPOptions::applyChanges(FPOptionsOverride FPO) { *this = FPO.applyOverrides(*this); } +/// Atomic control options +class AtomicOptionsOverride; +class AtomicOptions { +public: + using storage_type = uint16_t; + + static constexpr unsigned StorageBitSize = 8 * sizeof(storage_type); + + static constexpr storage_type FirstShift = 0, FirstWidth = 0; +#define OPTION(NAME, TYPE, WIDTH, PREVIOUS) \ + static constexpr storage_type NAME##Shift = \ + PREVIOUS##Shift + PREVIOUS##Width; \ + static constexpr storage_type NAME##Width = WIDTH; \ + static constexpr storage_type NAME##Mask = ((1 << NAME##Width) - 1) \ + << NAME##Shift; +#include "clang/Basic/AtomicOptions.def" + + static constexpr storage_type TotalWidth = 0 +#define OPTION(NAME, TYPE, WIDTH, PREVIOUS) +WIDTH +#include "clang/Basic/AtomicOptions.def" + ; + static_assert(TotalWidth <= StorageBitSize, + "Too short type for AtomicOptions"); + +private: + storage_type Value; + + AtomicOptionsOverride getChangesSlow(const AtomicOptions &Base) const; + +public: + AtomicOptions() : Value(0) { + setNoRemoteMemory(false); + setNoFineGrainedMemory(false); + setIgnoreDenormalMode(false); + } + explicit AtomicOptions(const LangOptions &LO) { + Value = 0; +#if 0 + setNoRemoteMemory(LO.NoRemoteMemoryAccess); + setNoFineGrainedMemory(LO.NoFineGrainedMemoryAccess); + setIgnoreDenormalMode(LO.IgnoreDenormals); +#endif + } + + bool operator==(AtomicOptions other) const { return Value == other.Value; } + + /// Return the default value of AtomicOptions that's used when trailing + /// storage isn't required. + static AtomicOptions defaultWithoutTrailingStorage(const LangOptions &LO); + + storage_type getAsOpaqueInt() const { return Value; } + static AtomicOptions getFromOpaqueInt(storage_type Value) { + AtomicOptions Opts; + Opts.Value = Value; + return Opts; + } + + /// Return difference with the given option set. + AtomicOptionsOverride getChangesFrom(const AtomicOptions &Base) const; + + void applyChanges(AtomicOptionsOverride AO); + +#define OPTION(NAME, TYPE, WIDTH, PREVIOUS) \ + TYPE get##NAME() const { \ + return static_cast((Value & NAME##Mask) >> NAME##Shift); \ + } \ + void set##NAME(TYPE value) { \ + Value = (Value & ~NAME##Mask) | (storage_type(value) << NAME##Shift); \ + } +#include "clang/Basic/AtomicOptions.def" + LLVM_DUMP_METHOD void dump(); +}; + +/// Represents difference between two AtomicOptions values. +class AtomicOptionsOverride { + AtomicOptions Options = AtomicOptions::getFromOpaqueInt(0); + AtomicOptions::storage_type OverrideMask = 0; + +public: + /// The type suitable for storing values of AtomicOptionsOverride. Must be + /// twice as wide as bit size of AtomicOption. + using storage_type = uint32_t; + static_assert(sizeof(storage_type) >= 2 * sizeof(AtomicOptions::storage_type), + "Too short type for AtomicOptionsOverride"); + + /// Bit mask selecting bits of OverrideMask in serialized representation of + /// AtomicOptionsOverride. + static constexpr storage_type OverrideMaskBits = + (static_cast(1) << AtomicOptions::StorageBitSize) - 1; + + AtomicOptionsOverride() {} + AtomicOptionsOverride(const LangOptions &LO); + AtomicOptionsOverride(AtomicOptions AO) + : Options(AO), OverrideMask(OverrideMaskBits) {} + AtomicOptionsOverride(AtomicOptions AO, AtomicOptions::storage_type Mask) + : Options(AO), OverrideMask(Mask) {} + + bool requiresTrailingStorage() const { return OverrideMask != 0; } + + storage_type getAsOpaqueInt() const { + return (static_cast(Options.getAsOpaqueInt()) + << AtomicOptions::StorageBitSize) | + OverrideMask; + } + + static AtomicOptionsOverride getFromOpaqueInt(storage_type I) { + AtomicOptionsOverride Opts; + Opts.OverrideMask = I & OverrideMaskBits; + Opts.Options = + AtomicOptions::getFromOpaqueInt(I >> AtomicOptions::StorageBitSize); + return Opts; + } + + AtomicOptions applyOverrides(AtomicOptions Base) { + AtomicOptions Result = AtomicOptions::getFromOpaqueInt( + (Base.getAsOpaqueInt() & ~OverrideMask) | + (Options.getAsOpaqueInt() & OverrideMask)); + return Result; + } + + AtomicOptions applyOverrides(const LangOptions &LO) { + return applyOverrides(AtomicOptions(LO)); + } + + bool operator==(AtomicOptionsOverride other) const { + return Options == other.Options && OverrideMask == other.OverrideMask; + } + bool operator!=(AtomicOptionsOverride other) const { + return !(*this == other); + } + +#define OPTION(NAME, TYPE, WIDTH, PREVIOUS) \ + bool has##NAME##Override() const { \ + return OverrideMask & AtomicOptions::NAME##Mask; \ + } \ + TYPE get##NAME##Override() const { \ + assert(has##NAME##Override()); \ + return Options.get##NAME(); \ + } \ + void clear##NAME##Override() { \ + Options.set##NAME(TYPE(0)); \ + OverrideMask &= ~AtomicOptions::NAME##Mask; \ + } \ + void set##NAME##Override(TYPE value) { \ + Options.set##NAME(value); \ + OverrideMask |= AtomicOptions::NAME##Mask; \ + } +#include "clang/Basic/AtomicOptions.def" + + LLVM_DUMP_METHOD void dump(); +}; + +inline AtomicOptionsOverride +AtomicOptions::getChangesFrom(const AtomicOptions &Base) const { + if (Value == Base.Value) + return AtomicOptionsOverride(); + return getChangesSlow(Base); +} + +inline void AtomicOptions::applyChanges(AtomicOptionsOverride AO) { + *this = AO.applyOverrides(*this); +} + /// Describes the kind of translation unit being processed. enum TranslationUnitKind { /// The translation unit is a complete translation unit. diff --git a/clang/include/clang/Basic/PragmaKinds.h b/clang/include/clang/Basic/PragmaKinds.h index 42f049f7323d2..bec3140b0866b 100644 --- a/clang/include/clang/Basic/PragmaKinds.h +++ b/clang/include/clang/Basic/PragmaKinds.h @@ -42,6 +42,13 @@ enum PragmaFPKind { PFK_Exceptions, // #pragma clang fp exceptions PFK_EvalMethod // #pragma clang fp eval_method }; + +enum PragmaAtomicKind { + PAK_NoRemoteMemory, // #prama clang atomic begin(no_remote_memory:on) + PAK_NoFineGrainedMemory, // #pragma clang atomic + // begin(no_fine_grained_memory:on) + PAK_IgnoreDenormalMode, // #pragma clang atomic begin(ignore_denormal_mode:on) +}; } #endif diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h index a58fb5f979272..7a7fe1268dbf5 100644 --- a/clang/include/clang/Basic/TargetInfo.h +++ b/clang/include/clang/Basic/TargetInfo.h @@ -296,6 +296,9 @@ class TargetInfo : public TransferrableTargetInfo, // in function attributes in IR. llvm::StringSet<> ReadOnlyFeatures; + // Default atomic options + AtomicOptions AtomicOpts; + public: /// Construct a target for the given options. /// @@ -1680,6 +1683,9 @@ class TargetInfo : public TransferrableTargetInfo, return CC_C; } + /// Get the default atomic options. + AtomicOptions getAtomicOpts() const { return AtomicOpts; } + enum CallingConvCheckResult { CCCR_OK, CCCR_Warning, diff --git a/clang/include/clang/Basic/TokenKinds.def b/clang/include/clang/Basic/TokenKinds.def index 421dbb413fed9..b94aa8e2595a7 100644 --- a/clang/include/clang/Basic/TokenKinds.def +++ b/clang/include/clang/Basic/TokenKinds.def @@ -999,6 +999,8 @@ PRAGMA_ANNOTATION(pragma_loop_hint) PRAGMA_ANNOTATION(pragma_fp) +PRAGMA_ANNOTATION(pragma_atomic) + // Annotation for the attribute pragma directives - #pragma clang attribute ... PRAGMA_ANNOTATION(pragma_attribute) diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index e196c3dc5cb3b..902129fe59fd2 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -2303,6 +2303,14 @@ def fsymbol_partition_EQ : Joined<["-"], "fsymbol-partition=">, Group, Visibility<[ClangOption, CC1Option]>, MarshallingInfoString>; +def fatomic_EQ : CommaJoined<["-"], "fatomic=">, Group, + Visibility<[ClangOption, CC1Option]>, + HelpText<"Specify atomic codegen options as a comma-separated list of " + "key:value pairs, allowed keys and values are " + "no_fine_grained_memory:on|off, no_remote_memory:on|off, " + "ignore_denormal_mode:on|off">, + MarshallingInfoStringVector>; + defm memory_profile : OptInCC1FFlag<"memory-profile", "Enable", "Disable", " heap memory profiling">; def fmemory_profile_EQ : Joined<["-"], "fmemory-profile=">, Group, Visibility<[ClangOption, CC1Option]>, diff --git a/clang/include/clang/Parse/Parser.h b/clang/include/clang/Parse/Parser.h index 39c5f588167ed..ec86ecc2e2cdb 100644 --- a/clang/include/clang/Parse/Parser.h +++ b/clang/include/clang/Parse/Parser.h @@ -213,6 +213,7 @@ class Parser : public CodeCompletionHandler { std::unique_ptr UnrollAndJamHintHandler; std::unique_ptr NoUnrollAndJamHintHandler; std::unique_ptr FPHandler; + std::unique_ptr AtomicHandler; std::unique_ptr STDCFenvAccessHandler; std::unique_ptr STDCFenvRoundHandler; std::unique_ptr STDCCXLIMITHandler; @@ -837,6 +838,10 @@ class Parser : public CodeCompletionHandler { /// #pragma clang fp ... void HandlePragmaFP(); + /// \brief Handle the annotation token produced for + /// #pragma clang atomic ... + void HandlePragmaAtomic(); + /// Handle the annotation token produced for /// #pragma OPENCL EXTENSION... void HandlePragmaOpenCLExtension(); diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index b7bd6c2433efd..0c37ba7549e91 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -1738,6 +1738,23 @@ class Sema final : public SemaBase { return result; } + // This stack tracks the current state of Sema.CurAtomicFeatures. + PragmaStack AtomicPragmaStack; + + AtomicOptionsOverride getCurAtomicOptionsOverrides() { + AtomicOptionsOverride Result; + if (!AtomicPragmaStack.hasValue()) { + Result = AtomicOptionsOverride(); + } else { + Result = AtomicPragmaStack.CurrentValue; + } + return Result; + } + + void setCurAtomicOptionsOverrides(AtomicOptionsOverride AO) { + AtomicPragmaStack.CurrentValue = AO; + } + enum PragmaSectionKind { PSK_DataSeg, PSK_BSSSeg, @@ -2038,6 +2055,11 @@ class Sema final : public SemaBase { /// Called to set constant rounding mode for floating point operations. void ActOnPragmaFEnvRound(SourceLocation Loc, llvm::RoundingMode); + /// Called on well formed + /// \#pragma clang atomic + void ActOnPragmaAtomicOption(SourceLocation Loc, PragmaAtomicKind Kind, + bool IsEnabled); + /// Called to set exception behavior for floating point operations. void setExceptionMode(SourceLocation Loc, LangOptions::FPExceptionModeKind); @@ -13539,8 +13561,8 @@ class Sema final : public SemaBase { SavedPendingLocalImplicitInstantiations; }; - /// Records and restores the CurFPFeatures state on entry/exit of compound - /// statements. + /// Records and restores the CurFPFeatures state on entry/exit + /// of compound statements. class FPFeaturesStateRAII { public: FPFeaturesStateRAII(Sema &S); @@ -13555,6 +13577,18 @@ class Sema final : public SemaBase { SourceLocation OldFPPragmaLocation; }; + /// Records and restores the AtomicOptions state on entry/exit + /// of compound statements. + class AtomicOptionsRAII { + public: + AtomicOptionsRAII(Sema &S_); + ~AtomicOptionsRAII(); + + private: + Sema &S; + AtomicOptionsOverride SavedAOO; + }; + class GlobalEagerInstantiationScope { public: GlobalEagerInstantiationScope(Sema &S, bool Enabled) diff --git a/clang/lib/AST/ASTImporter.cpp b/clang/lib/AST/ASTImporter.cpp index 198bc34a9f031..cf7b01f8540d7 100644 --- a/clang/lib/AST/ASTImporter.cpp +++ b/clang/lib/AST/ASTImporter.cpp @@ -6913,7 +6913,10 @@ ExpectedStmt ASTNodeImporter::VisitCompoundStmt(CompoundStmt *S) { FPOptionsOverride FPO = S->hasStoredFPFeatures() ? S->getStoredFPFeatures() : FPOptionsOverride(); - return CompoundStmt::Create(Importer.getToContext(), ToStmts, FPO, + AtomicOptionsOverride AO = S->hasStoredAtomicOptions() + ? S->getStoredAtomicOptions() + : AtomicOptionsOverride(); + return CompoundStmt::Create(Importer.getToContext(), ToStmts, FPO, AO, *ToLBracLocOrErr, *ToRBracLocOrErr); } diff --git a/clang/lib/AST/Stmt.cpp b/clang/lib/AST/Stmt.cpp index fe59d6070b3e8..025dd65102880 100644 --- a/clang/lib/AST/Stmt.cpp +++ b/clang/lib/AST/Stmt.cpp @@ -364,13 +364,17 @@ int64_t Stmt::getID(const ASTContext &Context) const { } CompoundStmt::CompoundStmt(ArrayRef Stmts, FPOptionsOverride FPFeatures, + AtomicOptionsOverride AtomicOptions, SourceLocation LB, SourceLocation RB) : Stmt(CompoundStmtClass), LBraceLoc(LB), RBraceLoc(RB) { CompoundStmtBits.NumStmts = Stmts.size(); CompoundStmtBits.HasFPFeatures = FPFeatures.requiresTrailingStorage(); + CompoundStmtBits.HasAtomicOptions = AtomicOptions.requiresTrailingStorage(); setStmts(Stmts); if (hasStoredFPFeatures()) setStoredFPFeatures(FPFeatures); + if (hasStoredAtomicOptions()) + setStoredAtomicOptions(AtomicOptions); } void CompoundStmt::setStmts(ArrayRef Stmts) { @@ -382,22 +386,27 @@ void CompoundStmt::setStmts(ArrayRef Stmts) { CompoundStmt *CompoundStmt::Create(const ASTContext &C, ArrayRef Stmts, FPOptionsOverride FPFeatures, + AtomicOptionsOverride AtomicOpts, SourceLocation LB, SourceLocation RB) { - void *Mem = - C.Allocate(totalSizeToAlloc( - Stmts.size(), FPFeatures.requiresTrailingStorage()), - alignof(CompoundStmt)); - return new (Mem) CompoundStmt(Stmts, FPFeatures, LB, RB); + void *Mem = C.Allocate( + totalSizeToAlloc( + Stmts.size(), FPFeatures.requiresTrailingStorage(), + AtomicOpts.requiresTrailingStorage()), + alignof(CompoundStmt)); + return new (Mem) CompoundStmt(Stmts, FPFeatures, AtomicOpts, LB, RB); } CompoundStmt *CompoundStmt::CreateEmpty(const ASTContext &C, unsigned NumStmts, - bool HasFPFeatures) { + bool HasFPFeatures, + bool HasAtomicOptions) { void *Mem = C.Allocate( - totalSizeToAlloc(NumStmts, HasFPFeatures), + totalSizeToAlloc( + NumStmts, HasFPFeatures, HasAtomicOptions), alignof(CompoundStmt)); CompoundStmt *New = new (Mem) CompoundStmt(EmptyShell()); New->CompoundStmtBits.NumStmts = NumStmts; New->CompoundStmtBits.HasFPFeatures = HasFPFeatures; + New->CompoundStmtBits.HasAtomicOptions = HasAtomicOptions; return New; } diff --git a/clang/lib/AST/StmtPrinter.cpp b/clang/lib/AST/StmtPrinter.cpp index 014d02220d291..de2b1a8b6b757 100644 --- a/clang/lib/AST/StmtPrinter.cpp +++ b/clang/lib/AST/StmtPrinter.cpp @@ -130,6 +130,7 @@ namespace { void PrintOMPExecutableDirective(OMPExecutableDirective *S, bool ForceNoStmt = false); void PrintFPPragmas(CompoundStmt *S); + void PrintAtomicPragmas(CompoundStmt *S); void PrintExpr(Expr *E) { if (E) @@ -178,6 +179,7 @@ void StmtPrinter::PrintRawCompoundStmt(CompoundStmt *Node) { assert(Node && "Compound statement cannot be null"); OS << "{" << NL; PrintFPPragmas(Node); + PrintAtomicPragmas(Node); for (auto *I : Node->body()) PrintStmt(I); @@ -244,6 +246,27 @@ void StmtPrinter::PrintFPPragmas(CompoundStmt *S) { } } +void StmtPrinter::PrintAtomicPragmas(CompoundStmt *S) { + if (!S->hasStoredAtomicOptions()) + return; + AtomicOptionsOverride AO = S->getStoredAtomicOptions(); + + if (AO.hasNoRemoteMemoryOverride()) { + Indent() << "#pragma clang atomic no_remote_memory(" + << (AO.getNoRemoteMemoryOverride() ? "on" : "off") << ")\n"; + } + + if (AO.hasNoFineGrainedMemoryOverride()) { + Indent() << "#pragma clang atomic no_finegrained_memory(" + << (AO.getNoFineGrainedMemoryOverride() ? "on" : "off") << ")\n"; + } + + if (AO.hasIgnoreDenormalModeOverride()) { + Indent() << "#pragma clang atomic ignore_denormal_mode(" + << (AO.getIgnoreDenormalModeOverride() ? "on" : "off") << ")\n"; + } +} + void StmtPrinter::PrintRawDecl(Decl *D) { D->print(OS, Policy, IndentLevel); } diff --git a/clang/lib/AST/TextNodeDumper.cpp b/clang/lib/AST/TextNodeDumper.cpp index 388c927c9aa55..249aed3320270 100644 --- a/clang/lib/AST/TextNodeDumper.cpp +++ b/clang/lib/AST/TextNodeDumper.cpp @@ -990,6 +990,13 @@ void TextNodeDumper::printFPOptions(FPOptionsOverride FPO) { #include "clang/Basic/FPOptions.def" } +void TextNodeDumper::printAtomicOptions(AtomicOptionsOverride AO) { +#define OPTION(NAME, TYPE, WIDTH, PREVIOUS) \ + if (AO.has##NAME##Override()) \ + OS << " Atomic" #NAME "=" << AO.get##NAME##Override(); +#include "clang/Basic/AtomicOptions.def" +} + void TextNodeDumper::visitTextComment(const comments::TextComment *C, const comments::FullComment *) { OS << " Text=\"" << C->getText() << "\""; @@ -2867,6 +2874,8 @@ void TextNodeDumper::VisitCompoundStmt(const CompoundStmt *S) { VisitStmt(S); if (S->hasStoredFPFeatures()) printFPOptions(S->getStoredFPFeatures()); + if (S->hasStoredAtomicOptions()) + printAtomicOptions(S->getStoredAtomicOptions()); } void TextNodeDumper::VisitHLSLBufferDecl(const HLSLBufferDecl *D) { diff --git a/clang/lib/Analysis/BodyFarm.cpp b/clang/lib/Analysis/BodyFarm.cpp index 127e843d4ead2..90d9758ab4c8b 100644 --- a/clang/lib/Analysis/BodyFarm.cpp +++ b/clang/lib/Analysis/BodyFarm.cpp @@ -135,7 +135,8 @@ BinaryOperator *ASTMaker::makeComparison(const Expr *LHS, const Expr *RHS, } CompoundStmt *ASTMaker::makeCompound(ArrayRef Stmts) { - return CompoundStmt::Create(C, Stmts, FPOptionsOverride(), SourceLocation(), + return CompoundStmt::Create(C, Stmts, FPOptionsOverride(), + AtomicOptionsOverride(), SourceLocation(), SourceLocation()); } diff --git a/clang/lib/Basic/LangOptions.cpp b/clang/lib/Basic/LangOptions.cpp index 9331a63d91b17..8cc71645d4ea9 100644 --- a/clang/lib/Basic/LangOptions.cpp +++ b/clang/lib/Basic/LangOptions.cpp @@ -238,3 +238,55 @@ LLVM_DUMP_METHOD void FPOptionsOverride::dump() { #include "clang/Basic/FPOptions.def" llvm::errs() << "\n"; } + +AtomicOptions +AtomicOptions::defaultWithoutTrailingStorage(const LangOptions &LO) { + AtomicOptions result(LO); + return result; +} + +AtomicOptionsOverride +AtomicOptions::getChangesSlow(const AtomicOptions &Base) const { + AtomicOptions::storage_type OverrideMask = 0; +#define OPTION(NAME, TYPE, WIDTH, PREVIOUS) \ + if (get##NAME() != Base.get##NAME()) \ + OverrideMask |= NAME##Mask; +#include "clang/Basic/AtomicOptions.def" + return AtomicOptionsOverride(*this, OverrideMask); +} + +LLVM_DUMP_METHOD void AtomicOptions::dump() { +#define OPTION(NAME, TYPE, WIDTH, PREVIOUS) \ + llvm::errs() << "\n " #NAME " " << get##NAME(); +#include "clang/Basic/AtomicOptions.def" + llvm::errs() << "\n"; +} + +LLVM_DUMP_METHOD void AtomicOptionsOverride::dump() { +#define OPTION(NAME, TYPE, WIDTH, PREVIOUS) \ + if (has##NAME##Override()) \ + llvm::errs() << "\n " #NAME " Override is " << get##NAME##Override(); +#include "clang/Basic/AtomicOptions.def" + llvm::errs() << "\n"; +} + +AtomicOptionsOverride::AtomicOptionsOverride(const LangOptions &LO) { + for (const auto &Setting : LO.AtomicOptionsAsWritten) { + SmallVector KeyValue; + StringRef(Setting).split(KeyValue, ":"); + // Assuming option string has been checked elsewhere and is valid. + assert(KeyValue.size() == 2 && "Invalid atomic option format"); + StringRef Key = KeyValue[0]; + StringRef Val = KeyValue[1]; + bool IsEnabled = (Val == "on"); + + if (Key == "no_fine_grained_memory") + setNoFineGrainedMemoryOverride(IsEnabled); + else if (Key == "no_remote_memory") + setNoRemoteMemoryOverride(IsEnabled); + else if (Key == "ignore_denormal_mode") + setIgnoreDenormalModeOverride(IsEnabled); + else + assert(false && "Unknown atomic option key"); + } +} diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp index 3b748d0249d57..0d7fdc3e4c799 100644 --- a/clang/lib/Basic/Targets/AMDGPU.cpp +++ b/clang/lib/Basic/Targets/AMDGPU.cpp @@ -241,6 +241,11 @@ AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple, WavefrontSize = (GPUFeatures & llvm::AMDGPU::FEATURE_WAVE32) ? 32 : 64; AllowAMDGPUUnsafeFPAtomics = Opts.AllowAMDGPUUnsafeFPAtomics; + // Set the default atomic options + AtomicOpts.setNoRemoteMemory(true); + AtomicOpts.setNoFineGrainedMemory(true); + AtomicOpts.setIgnoreDenormalMode(Opts.AllowAMDGPUUnsafeFPAtomics); + // Set pointer width and alignment for the generic address space. PointerWidth = PointerAlign = getPointerWidthV(LangAS::Default); if (getMaxPointerWidth() == 64) { @@ -264,6 +269,8 @@ void AMDGPUTargetInfo::adjust(DiagnosticsEngine &Diags, LangOptions &Opts) { // can be removed from the following line. setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL || !isAMDGCN(getTriple())); + + AtomicOpts.applyChanges(AtomicOptionsOverride(Opts)); } ArrayRef AMDGPUTargetInfo::getTargetBuiltins() const { diff --git a/clang/lib/CodeGen/CGCoroutine.cpp b/clang/lib/CodeGen/CGCoroutine.cpp index a8a70186c2c5a..ef1e6835014b4 100644 --- a/clang/lib/CodeGen/CGCoroutine.cpp +++ b/clang/lib/CodeGen/CGCoroutine.cpp @@ -348,7 +348,8 @@ static LValueOrRValue emitSuspendExpression(CodeGenFunction &CGF, CGCoroData &Co auto *Catch = new (CGF.getContext()) CXXCatchStmt(Loc, /*exDecl=*/nullptr, Coro.ExceptionHandler); auto *TryBody = CompoundStmt::Create(CGF.getContext(), S.getResumeExpr(), - FPOptionsOverride(), Loc, Loc); + FPOptionsOverride(), + AtomicOptionsOverride(), Loc, Loc); TryStmt = CXXTryStmt::Create(CGF.getContext(), Loc, TryBody, Catch); CGF.EnterCXXTryStmt(*TryStmt); CGF.EmitStmt(TryBody); diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp index 68386957bc2d9..afc17f0efdcfe 100644 --- a/clang/lib/CodeGen/CGStmt.cpp +++ b/clang/lib/CodeGen/CGStmt.cpp @@ -518,6 +518,8 @@ CodeGenFunction::EmitCompoundStmtWithoutScope(const CompoundStmt &S, bool GetLast, AggValueSlot AggSlot) { + CGAtomicOptionsRAII AORAII(CGM, S.getStoredAtomicOptionsOrDefault()); + const Stmt *ExprResult = S.getStmtExprResult(); assert((!GetLast || (GetLast && ExprResult)) && "If GetLast is true then the CompoundStmt must have a StmtExprResult"); diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 1c0a0e117e560..b0cf35867ff41 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -810,6 +810,23 @@ class CodeGenFunction : public CodeGenTypeCache { }; FPOptions CurFPFeatures; + class CGAtomicOptionsRAII { + public: + CGAtomicOptionsRAII(CodeGenModule &CGM_, AtomicOptions AO) + : CGM(CGM_), SavedAtomicOpts(CGM.getAtomicOpts()) { + CGM.setAtomicOpts(AO); + } + CGAtomicOptionsRAII(CodeGenModule &CGM_, AtomicOptionsOverride AOO) + : CGM(CGM_), SavedAtomicOpts(CGM.getAtomicOpts()) { + CGM.setAtomicOpts(AOO.applyOverrides(SavedAtomicOpts)); + } + ~CGAtomicOptionsRAII() { CGM.setAtomicOpts(SavedAtomicOpts); } + + private: + CodeGenModule &CGM; + AtomicOptions SavedAtomicOpts; + }; + public: /// ObjCEHValueStack - Stack of Objective-C exception values, used for /// rethrows. diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 9aaf90ccfe04f..75d34291d3401 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -342,7 +342,8 @@ CodeGenModule::CodeGenModule(ASTContext &C, PreprocessorOpts(PPO), CodeGenOpts(CGO), TheModule(M), Diags(diags), Target(C.getTargetInfo()), ABI(createCXXABI(*this)), VMContext(M.getContext()), Types(*this), VTables(*this), - SanitizerMD(new SanitizerMetadata(*this)) { + SanitizerMD(new SanitizerMetadata(*this)), + AtomicOpts(Target.getAtomicOpts()) { // Initialize the type cache. llvm::LLVMContext &LLVMContext = M.getContext(); diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h index 284bba823baeb..1fd1e20768ce5 100644 --- a/clang/lib/CodeGen/CodeGenModule.h +++ b/clang/lib/CodeGen/CodeGenModule.h @@ -627,6 +627,8 @@ class CodeGenModule : public CodeGenTypeCache { std::optional computeVTPointerAuthentication(const CXXRecordDecl *ThisClass); + AtomicOptions AtomicOpts; + public: CodeGenModule(ASTContext &C, IntrusiveRefCntPtr FS, const HeaderSearchOptions &headersearchopts, @@ -642,6 +644,12 @@ class CodeGenModule : public CodeGenTypeCache { /// Finalize LLVM code generation. void Release(); + /// Get the current Atomic options. + AtomicOptions getAtomicOpts() { return AtomicOpts; } + + /// Set the current Atomic options. + void setAtomicOpts(AtomicOptions AO) { AtomicOpts = AO; } + /// Return true if we should emit location information for expressions. bool getExpressionLocationsEnabled() const; diff --git a/clang/lib/CodeGen/Targets/AMDGPU.cpp b/clang/lib/CodeGen/Targets/AMDGPU.cpp index 37e6af3d4196a..91f5df22b4892 100644 --- a/clang/lib/CodeGen/Targets/AMDGPU.cpp +++ b/clang/lib/CodeGen/Targets/AMDGPU.cpp @@ -550,19 +550,16 @@ AMDGPUTargetCodeGenInfo::getLLVMSyncScopeID(const LangOptions &LangOpts, void AMDGPUTargetCodeGenInfo::setTargetAtomicMetadata( CodeGenFunction &CGF, llvm::AtomicRMWInst &RMW) const { - if (!CGF.getTarget().allowAMDGPUUnsafeFPAtomics()) - return; - - // TODO: Introduce new, more controlled options that also work for integers, - // and deprecate allowAMDGPUUnsafeFPAtomics. - llvm::AtomicRMWInst::BinOp RMWOp = RMW.getOperation(); - if (llvm::AtomicRMWInst::isFPOperation(RMWOp)) { - llvm::MDNode *Empty = llvm::MDNode::get(CGF.getLLVMContext(), {}); + AtomicOptions AO = CGF.CGM.getAtomicOpts(); + llvm::MDNode *Empty = llvm::MDNode::get(CGF.getLLVMContext(), {}); + if (AO.getNoFineGrainedMemory()) RMW.setMetadata("amdgpu.no.fine.grained.memory", Empty); - - if (RMWOp == llvm::AtomicRMWInst::FAdd && RMW.getType()->isFloatTy()) - RMW.setMetadata("amdgpu.ignore.denormal.mode", Empty); - } + if (AO.getNoRemoteMemory()) + RMW.setMetadata("amdgpu.no.remote.memory", Empty); + if (AO.getIgnoreDenormalMode() && + RMW.getOperation() == llvm::AtomicRMWInst::FAdd && + RMW.getType()->isFloatTy()) + RMW.setMetadata("amdgpu.ignore.denormal.mode", Empty); } bool AMDGPUTargetCodeGenInfo::shouldEmitStaticExternCAliases() const { diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index c698d38b80e57..e6a39077e7c69 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -5881,6 +5881,32 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, RenderFloatingPointOptions(TC, D, OFastEnabled, Args, CmdArgs, JA); + if (Arg *AtomicArg = Args.getLastArg(options::OPT_fatomic_EQ)) { + if (!AtomicArg->getNumValues()) { + D.Diag(clang::diag::warn_drv_empty_joined_argument) + << AtomicArg->getAsString(Args); + } else { + bool Valid = true; + std::set Keys; + for (StringRef Option : AtomicArg->getValues()) { + SmallVector KeyValue; + Option.split(KeyValue, ":"); + if (KeyValue.size() != 2 || + (KeyValue[1] != "on" && KeyValue[1] != "off") || + (KeyValue[0] != "no_fine_grained_memory" && + KeyValue[0] != "no_remote_memory" && + KeyValue[0] != "ignore_denormal_mode") || + !Keys.insert(KeyValue[0]).second) { + Valid = false; + D.Diag(diag::err_drv_invalid_atomic_option) << Option; + break; + } + } + if (Valid) + CmdArgs.push_back(Args.MakeArgString(AtomicArg->getAsString(Args))); + } + } + if (Arg *A = Args.getLastArg(options::OPT_fextend_args_EQ)) { const llvm::Triple::ArchType Arch = TC.getArch(); if (Arch == llvm::Triple::x86 || Arch == llvm::Triple::x86_64) { diff --git a/clang/lib/Parse/ParsePragma.cpp b/clang/lib/Parse/ParsePragma.cpp index aef4ddb758816..107eec273500a 100644 --- a/clang/lib/Parse/ParsePragma.cpp +++ b/clang/lib/Parse/ParsePragma.cpp @@ -183,6 +183,12 @@ struct PragmaFPHandler : public PragmaHandler { Token &FirstToken) override; }; +struct PragmaAtomicHandler : public PragmaHandler { + PragmaAtomicHandler() : PragmaHandler("atomic") {} + void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer, + Token &FirstToken) override; +}; + // A pragma handler to be the base of the NoOpenMPHandler and NoOpenACCHandler, // which are identical other than the name given to them, and the diagnostic // emitted. @@ -568,6 +574,9 @@ void Parser::initializePragmaHandlers() { FPHandler = std::make_unique(); PP.AddPragmaHandler("clang", FPHandler.get()); + AtomicHandler = std::make_unique(); + PP.AddPragmaHandler("clang", AtomicHandler.get()); + AttributePragmaHandler = std::make_unique(AttrFactory); PP.AddPragmaHandler("clang", AttributePragmaHandler.get()); @@ -709,6 +718,9 @@ void Parser::resetPragmaHandlers() { PP.RemovePragmaHandler("clang", FPHandler.get()); FPHandler.reset(); + PP.RemovePragmaHandler("clang", AtomicHandler.get()); + AtomicHandler.reset(); + PP.RemovePragmaHandler("clang", AttributePragmaHandler.get()); AttributePragmaHandler.reset(); @@ -3533,6 +3545,141 @@ void Parser::HandlePragmaFP() { ConsumeAnnotationToken(); } +struct TokAtomicAnnotValue { + std::optional NoRemoteMemoryValue; + std::optional NoFineGrainedMemoryValue; + std::optional IgnoreDenormalModeValue; +}; + +void PragmaAtomicHandler::HandlePragma(Preprocessor &PP, + PragmaIntroducer Introducer, + Token &Tok) { + Token PragmaName = Tok; + SmallVector TokenList; + + PP.Lex(Tok); + if (Tok.isNot(tok::identifier)) { + PP.Diag(Tok.getLocation(), diag::err_pragma_atomic_invalid_option) + << /*MissingOption=*/true << ""; + return; + } + + auto *AnnotValue = new (PP.getPreprocessorAllocator()) TokAtomicAnnotValue; + int OptionCount = 0; + + while (Tok.is(tok::identifier) && OptionCount < 3) { + IdentifierInfo *OptionInfo = Tok.getIdentifierInfo(); + + auto OptionKind = + llvm::StringSwitch>( + OptionInfo->getName()) + .Case("no_remote_memory", PAK_NoRemoteMemory) + .Case("no_fine_grained_memory", PAK_NoFineGrainedMemory) + .Case("ignore_denormal_mode", PAK_IgnoreDenormalMode) + .Default(std::nullopt); + + if (!OptionKind) { + PP.Diag(Tok.getLocation(), diag::err_pragma_atomic_invalid_option) + << /*MissingOption=*/false << OptionInfo; + return; + } + + PP.Lex(Tok); + + // Read '(' + if (Tok.isNot(tok::l_paren)) { + PP.Diag(Tok.getLocation(), diag::err_expected) << tok::l_paren; + return; + } + PP.Lex(Tok); + + if (Tok.isNot(tok::identifier)) { + PP.Diag(Tok.getLocation(), diag::err_pragma_atomic_invalid_argument) + << PP.getSpelling(Tok) << OptionInfo->getName(); + return; + } + + const IdentifierInfo *II = Tok.getIdentifierInfo(); + bool Value = llvm::StringSwitch(II->getName()) + .Case("on", true) + .Case("off", false) + .Default(false); + + switch (*OptionKind) { + case PAK_NoRemoteMemory: + AnnotValue->NoRemoteMemoryValue = Value; + break; + case PAK_NoFineGrainedMemory: + AnnotValue->NoFineGrainedMemoryValue = Value; + break; + case PAK_IgnoreDenormalMode: + AnnotValue->IgnoreDenormalModeValue = Value; + break; + } + + PP.Lex(Tok); + + // Read ')' + if (Tok.isNot(tok::r_paren)) { + PP.Diag(Tok.getLocation(), diag::err_expected) << tok::r_paren; + return; + } + PP.Lex(Tok); + + OptionCount++; + } + + if (Tok.isNot(tok::eod)) { + PP.Diag(Tok.getLocation(), diag::warn_pragma_extra_tokens_at_eol) + << "clang atomic"; + return; + } + + Token AtomicTok; + AtomicTok.startToken(); + AtomicTok.setKind(tok::annot_pragma_atomic); + AtomicTok.setLocation(PragmaName.getLocation()); + AtomicTok.setAnnotationEndLoc(PragmaName.getLocation()); + AtomicTok.setAnnotationValue(reinterpret_cast(AnnotValue)); + TokenList.push_back(AtomicTok); + + auto TokenArray = std::make_unique(TokenList.size()); + std::copy(TokenList.begin(), TokenList.end(), TokenArray.get()); + + PP.EnterTokenStream(std::move(TokenArray), TokenList.size(), + /*DisableMacroExpansion=*/false, /*IsReinject=*/false); +} + +void Parser::HandlePragmaAtomic() { + assert(Tok.is(tok::annot_pragma_atomic)); + + if (!getCurScope()->isCompoundStmtScope()) { + Diag(Tok.getLocation(), diag::err_pragma_compound_scope) << "clang atomic"; + ConsumeAnnotationToken(); + return; + } + + auto *AnnotValue = + reinterpret_cast(Tok.getAnnotationValue()); + + if (AnnotValue->NoRemoteMemoryValue) { + Actions.ActOnPragmaAtomicOption(Tok.getLocation(), PAK_NoRemoteMemory, + *AnnotValue->NoRemoteMemoryValue); + } + + if (AnnotValue->NoFineGrainedMemoryValue) { + Actions.ActOnPragmaAtomicOption(Tok.getLocation(), PAK_NoFineGrainedMemory, + *AnnotValue->NoFineGrainedMemoryValue); + } + + if (AnnotValue->IgnoreDenormalModeValue) { + Actions.ActOnPragmaAtomicOption(Tok.getLocation(), PAK_IgnoreDenormalMode, + *AnnotValue->IgnoreDenormalModeValue); + } + + ConsumeAnnotationToken(); +} + /// Parses loop or unroll pragma hint value and fills in Info. static bool ParseLoopHintValue(Preprocessor &PP, Token &Tok, Token PragmaName, Token Option, bool ValueInParens, diff --git a/clang/lib/Parse/ParseStmt.cpp b/clang/lib/Parse/ParseStmt.cpp index bdb3fc051d0b3..7f6ac0eeb8ffc 100644 --- a/clang/lib/Parse/ParseStmt.cpp +++ b/clang/lib/Parse/ParseStmt.cpp @@ -483,6 +483,13 @@ StmtResult Parser::ParseStatementOrDeclarationAfterAttributes( ConsumeAnnotationToken(); return StmtError(); + case tok::annot_pragma_atomic: + ProhibitAttributes(CXX11Attrs); + ProhibitAttributes(GNUAttrs); + Diag(Tok, diag::err_pragma_compound_scope) << "clang atomic"; + ConsumeAnnotationToken(); + return StmtError(); + case tok::annot_pragma_opencl_extension: ProhibitAttributes(CXX11Attrs); ProhibitAttributes(GNUAttrs); @@ -1095,6 +1102,9 @@ void Parser::ParseCompoundStatementLeadingPragmas() { case tok::annot_pragma_fenv_round: HandlePragmaFEnvRound(); break; + case tok::annot_pragma_atomic: + HandlePragmaAtomic(); + break; case tok::annot_pragma_cx_limited_range: HandlePragmaCXLimitedRange(); break; @@ -1194,6 +1204,7 @@ StmtResult Parser::ParseCompoundStatementBody(bool isStmtExpr) { // Record the current FPFeatures, restore on leaving the // compound statement. Sema::FPFeaturesStateRAII SaveFPFeatures(Actions); + Sema::AtomicOptionsRAII SaveAtomicOpts(Actions); InMessageExpressionRAIIObject InMessage(*this, false); BalancedDelimiterTracker T(*this, tok::l_brace); diff --git a/clang/lib/Parse/Parser.cpp b/clang/lib/Parse/Parser.cpp index 5ebe71e496a2e..32fae90a1da2a 100644 --- a/clang/lib/Parse/Parser.cpp +++ b/clang/lib/Parse/Parser.cpp @@ -861,6 +861,9 @@ Parser::ParseExternalDeclaration(ParsedAttributes &Attrs, case tok::annot_pragma_fp: HandlePragmaFP(); break; + case tok::annot_pragma_atomic: + HandlePragmaAtomic(); + break; case tok::annot_pragma_opencl_extension: HandlePragmaOpenCLExtension(); return nullptr; diff --git a/clang/lib/Sema/HLSLExternalSemaSource.cpp b/clang/lib/Sema/HLSLExternalSemaSource.cpp index 6ee90d15d7a6d..d1a3670c31a20 100644 --- a/clang/lib/Sema/HLSLExternalSemaSource.cpp +++ b/clang/lib/Sema/HLSLExternalSemaSource.cpp @@ -197,9 +197,9 @@ struct BuiltinTypeDeclBuilder { AST, Handle, Call, BO_Assign, Handle->getType(), VK_LValue, OK_Ordinary, SourceLocation(), FPOptionsOverride()); - Constructor->setBody( - CompoundStmt::Create(AST, {Assign}, FPOptionsOverride(), - SourceLocation(), SourceLocation())); + Constructor->setBody(CompoundStmt::Create( + AST, {Assign}, FPOptionsOverride(), AtomicOptionsOverride(), + SourceLocation(), SourceLocation())); Constructor->setAccess(AccessSpecifier::AS_public); Record->addDecl(Constructor); return *this; @@ -279,9 +279,9 @@ struct BuiltinTypeDeclBuilder { auto *Return = ReturnStmt::Create(AST, SourceLocation(), Array, nullptr); - MethodDecl->setBody(CompoundStmt::Create(AST, {Return}, FPOptionsOverride(), - SourceLocation(), - SourceLocation())); + MethodDecl->setBody(CompoundStmt::Create( + AST, {Return}, FPOptionsOverride(), AtomicOptionsOverride(), + SourceLocation(), SourceLocation())); MethodDecl->setLexicalDeclContext(Record); MethodDecl->setAccess(AccessSpecifier::AS_public); MethodDecl->addAttr(AlwaysInlineAttr::CreateImplicit( diff --git a/clang/lib/Sema/Sema.cpp b/clang/lib/Sema/Sema.cpp index 19d8692ee6484..d02b201793b16 100644 --- a/clang/lib/Sema/Sema.cpp +++ b/clang/lib/Sema/Sema.cpp @@ -253,7 +253,8 @@ Sema::Sema(Preprocessor &pp, ASTContext &ctxt, ASTConsumer &consumer, AlignPackStack(AlignPackInfo(getLangOpts().XLPragmaPack)), DataSegStack(nullptr), BSSSegStack(nullptr), ConstSegStack(nullptr), CodeSegStack(nullptr), StrictGuardStackCheckStack(false), - FpPragmaStack(FPOptionsOverride()), CurInitSeg(nullptr), + FpPragmaStack(FPOptionsOverride()), + AtomicPragmaStack(AtomicOptionsOverride()), CurInitSeg(nullptr), VisContext(nullptr), PragmaAttributeCurrentTargetDecl(nullptr), StdCoroutineTraitsCache(nullptr), IdResolver(pp), OriginalLexicalContext(nullptr), StdInitializerList(nullptr), @@ -2746,6 +2747,13 @@ Sema::FPFeaturesStateRAII::~FPFeaturesStateRAII() { S.PP.setCurrentFPEvalMethod(OldFPPragmaLocation, OldEvalMethod); } +Sema::AtomicOptionsRAII::AtomicOptionsRAII(Sema &S_) + : S(S_), SavedAOO(S.getCurAtomicOptionsOverrides()) {} + +Sema::AtomicOptionsRAII::~AtomicOptionsRAII() { + S.setCurAtomicOptionsOverrides(SavedAOO); +} + bool Sema::isDeclaratorFunctionLike(Declarator &D) { assert(D.getCXXScopeSpec().isSet() && "can only be called for qualified names"); diff --git a/clang/lib/Sema/SemaAttr.cpp b/clang/lib/Sema/SemaAttr.cpp index b0c239678d0b0..37363d7cc36cd 100644 --- a/clang/lib/Sema/SemaAttr.cpp +++ b/clang/lib/Sema/SemaAttr.cpp @@ -1348,6 +1348,24 @@ void Sema::ActOnPragmaFEnvAccess(SourceLocation Loc, bool IsEnabled) { CurFPFeatures = NewFPFeatures.applyOverrides(getLangOpts()); } +void Sema::ActOnPragmaAtomicOption(SourceLocation Loc, PragmaAtomicKind Kind, + bool IsEnabled) { + AtomicOptionsOverride NewAtomicOptions = getCurAtomicOptionsOverrides(); + switch (Kind) { + case PAK_NoRemoteMemory: + NewAtomicOptions.setNoRemoteMemoryOverride(IsEnabled); + break; + case PAK_NoFineGrainedMemory: + NewAtomicOptions.setNoFineGrainedMemoryOverride(IsEnabled); + break; + case PAK_IgnoreDenormalMode: + NewAtomicOptions.setIgnoreDenormalModeOverride(IsEnabled); + break; + } + + AtomicPragmaStack.Act(Loc, PSK_Set, StringRef(), NewAtomicOptions); +} + void Sema::ActOnPragmaCXLimitedRange(SourceLocation Loc, LangOptions::ComplexRangeKind Range) { FPOptionsOverride NewFPFeatures = CurFPFeatureOverrides(); diff --git a/clang/lib/Sema/SemaCoroutine.cpp b/clang/lib/Sema/SemaCoroutine.cpp index 1bb8955f6f879..ace869af0c763 100644 --- a/clang/lib/Sema/SemaCoroutine.cpp +++ b/clang/lib/Sema/SemaCoroutine.cpp @@ -1159,7 +1159,8 @@ static CompoundStmt *buildCoroutineBody(Stmt *Body, ASTContext &Context) { // statement for consistency. assert(isa(Body) && "Unimaged coroutine body type"); return CompoundStmt::Create(Context, {Body}, FPOptionsOverride(), - SourceLocation(), SourceLocation()); + AtomicOptionsOverride(), SourceLocation(), + SourceLocation()); } CoroutineStmtBuilder::CoroutineStmtBuilder(Sema &S, FunctionDecl &FD, diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp index b07e555afcacc..9a94f84fee45a 100644 --- a/clang/lib/Sema/SemaDeclCXX.cpp +++ b/clang/lib/Sema/SemaDeclCXX.cpp @@ -15800,6 +15800,7 @@ void Sema::DefineImplicitLambdaToFunctionPointerConversion( assert(FunctionRef && "Can't refer to __invoke function?"); Stmt *Return = BuildReturnStmt(Conv->getLocation(), FunctionRef).get(); Conv->setBody(CompoundStmt::Create(Context, Return, FPOptionsOverride(), + AtomicOptionsOverride(), Conv->getLocation(), Conv->getLocation())); Conv->markUsed(Context); Conv->setReferenced(); @@ -15852,6 +15853,7 @@ void Sema::DefineImplicitLambdaToBlockPointerConversion( // Set the body of the conversion function. Stmt *ReturnS = Return.get(); Conv->setBody(CompoundStmt::Create(Context, ReturnS, FPOptionsOverride(), + AtomicOptionsOverride(), Conv->getLocation(), Conv->getLocation())); Conv->markUsed(Context); diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp index 124435330ca10..519ba9278965f 100644 --- a/clang/lib/Sema/SemaExprCXX.cpp +++ b/clang/lib/Sema/SemaExprCXX.cpp @@ -7718,9 +7718,9 @@ Stmt *Sema::MaybeCreateStmtWithCleanups(Stmt *SubStmt) { // a StmtExpr; currently this is only used for asm statements. // This is hacky, either create a new CXXStmtWithTemporaries statement or // a new AsmStmtWithTemporaries. - CompoundStmt *CompStmt = - CompoundStmt::Create(Context, SubStmt, FPOptionsOverride(), - SourceLocation(), SourceLocation()); + CompoundStmt *CompStmt = CompoundStmt::Create( + Context, SubStmt, FPOptionsOverride(), AtomicOptionsOverride(), + SourceLocation(), SourceLocation()); Expr *E = new (Context) StmtExpr(CompStmt, Context.VoidTy, SourceLocation(), SourceLocation(), /*FIXME TemplateDepth=*/0); diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index b5978ddde2465..5ffe551c59310 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -9482,7 +9482,8 @@ static Stmt *buildPreInits(ASTContext &Context, ArrayRef PreInits) { SmallVector Stmts; for (Stmt *S : PreInits) appendFlattenedStmtList(Stmts, S); - return CompoundStmt::Create(Context, PreInits, FPOptionsOverride(), {}, {}); + return CompoundStmt::Create(Context, PreInits, FPOptionsOverride(), + AtomicOptionsOverride(), {}, {}); } /// Build postupdate expression for the given list of postupdates expressions. @@ -14295,7 +14296,8 @@ StmtResult SemaOpenMP::ActOnOpenMPTileDirective(ArrayRef Clauses, BodyParts.push_back(SourceCXXFor->getLoopVarStmt()); BodyParts.push_back(Inner); Inner = CompoundStmt::Create(Context, BodyParts, FPOptionsOverride(), - Inner->getBeginLoc(), Inner->getEndLoc()); + AtomicOptionsOverride(), Inner->getBeginLoc(), + Inner->getEndLoc()); Inner = new (Context) ForStmt(Context, InitStmt.get(), CondExpr.get(), nullptr, IncrStmt.get(), Inner, LoopHelper.Init->getBeginLoc(), @@ -14574,9 +14576,9 @@ StmtResult SemaOpenMP::ActOnOpenMPUnrollDirective(ArrayRef Clauses, if (auto *CXXRangeFor = dyn_cast(LoopStmt)) InnerBodyStmts.push_back(CXXRangeFor->getLoopVarStmt()); InnerBodyStmts.push_back(Body); - CompoundStmt *InnerBody = - CompoundStmt::Create(getASTContext(), InnerBodyStmts, FPOptionsOverride(), - Body->getBeginLoc(), Body->getEndLoc()); + CompoundStmt *InnerBody = CompoundStmt::Create( + getASTContext(), InnerBodyStmts, FPOptionsOverride(), + AtomicOptionsOverride(), Body->getBeginLoc(), Body->getEndLoc()); ForStmt *InnerFor = new (Context) ForStmt(Context, InnerInit.get(), InnerCond.get(), nullptr, InnerIncr.get(), InnerBody, LoopHelper.Init->getBeginLoc(), @@ -14808,9 +14810,9 @@ StmtResult SemaOpenMP::ActOnOpenMPReverseDirective(Stmt *AStmt, if (auto *CXXRangeFor = dyn_cast(LoopStmt)) BodyStmts.push_back(CXXRangeFor->getLoopVarStmt()); BodyStmts.push_back(Body); - auto *ReversedBody = - CompoundStmt::Create(Context, BodyStmts, FPOptionsOverride(), - Body->getBeginLoc(), Body->getEndLoc()); + auto *ReversedBody = CompoundStmt::Create( + Context, BodyStmts, FPOptionsOverride(), AtomicOptionsOverride(), + Body->getBeginLoc(), Body->getEndLoc()); // Finally create the reversed For-statement. auto *ReversedFor = new (Context) @@ -14962,7 +14964,8 @@ StmtResult SemaOpenMP::ActOnOpenMPInterchangeDirective( BodyParts.push_back(SourceCXXFor->getLoopVarStmt()); BodyParts.push_back(Inner); Inner = CompoundStmt::Create(Context, BodyParts, FPOptionsOverride(), - Inner->getBeginLoc(), Inner->getEndLoc()); + AtomicOptionsOverride(), Inner->getBeginLoc(), + Inner->getEndLoc()); Inner = new (Context) ForStmt( Context, InitStmt.get(), CondExpr.get(), nullptr, IncrStmt.get(), Inner, SourceHelper.Init->getBeginLoc(), SourceHelper.Init->getBeginLoc(), diff --git a/clang/lib/Sema/SemaStmt.cpp b/clang/lib/Sema/SemaStmt.cpp index d283eaa511011..52ab8df4ab814 100644 --- a/clang/lib/Sema/SemaStmt.cpp +++ b/clang/lib/Sema/SemaStmt.cpp @@ -457,7 +457,8 @@ StmtResult Sema::ActOnCompoundStmt(SourceLocation L, SourceLocation R, : getCurCompoundScope().InitialFPFeatures; FPOptionsOverride FPDiff = getCurFPFeatures().getChangesFrom(FPO); - return CompoundStmt::Create(Context, Elts, FPDiff, L, R); + return CompoundStmt::Create(Context, Elts, FPDiff, + getCurAtomicOptionsOverrides(), L, R); } ExprResult diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h index 8f6f30434af65..e964272d10813 100644 --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -7852,9 +7852,12 @@ TreeTransform::TransformCompoundStmt(CompoundStmt *S, bool IsStmtExpr) { Sema::CompoundScopeRAII CompoundScope(getSema()); Sema::FPFeaturesStateRAII FPSave(getSema()); + Sema::AtomicOptionsRAII SaveAtomicOpts(getSema()); + if (S->hasStoredFPFeatures()) getSema().resetFPOptions( S->getStoredFPFeatures().applyOverrides(getSema().getLangOpts())); + getSema().setCurAtomicOptionsOverrides(S->getStoredAtomicOptionsOrDefault()); const Stmt *ExprResult = S->getStmtExprResult(); bool SubStmtInvalid = false; diff --git a/clang/lib/Serialization/ASTReaderStmt.cpp b/clang/lib/Serialization/ASTReaderStmt.cpp index a33f2a41a6549..982aa41a3c2a9 100644 --- a/clang/lib/Serialization/ASTReaderStmt.cpp +++ b/clang/lib/Serialization/ASTReaderStmt.cpp @@ -2939,7 +2939,9 @@ Stmt *ASTReader::ReadStmtFromStream(ModuleFile &F) { case STMT_COMPOUND: { unsigned NumStmts = Record[ASTStmtReader::NumStmtFields]; bool HasFPFeatures = Record[ASTStmtReader::NumStmtFields + 1]; - S = CompoundStmt::CreateEmpty(Context, NumStmts, HasFPFeatures); + // TODO: Add serialization of atomic options + S = CompoundStmt::CreateEmpty(Context, NumStmts, HasFPFeatures, + /*HasAtomicOptions=*/false); break; } diff --git a/clang/test/AST/ast-dump-atomic-options.hip b/clang/test/AST/ast-dump-atomic-options.hip new file mode 100644 index 0000000000000..801f106b7d0a5 --- /dev/null +++ b/clang/test/AST/ast-dump-atomic-options.hip @@ -0,0 +1,80 @@ +// RUN: %clang_cc1 -ast-dump %s | FileCheck %s +// RUN: %clang_cc1 -ast-dump -fcuda-is-device %s | FileCheck %s +// RUN: %clang_cc1 -ast-dump -fcuda-is-device %s \ +// RUN: -fatomic=no_fine_grained_memory:off,no_remote_memory:on,ignore_denormal_mode:on \ +// RUN: | FileCheck %s + +#include "Inputs/cuda.h" + +// CHECK-LABEL: FunctionDecl {{.*}} test_default +// CHECK: | |-CompoundStmt +// CHECK-NOT: AtomicNoRemoteMemory +// CHECK-NOT: AtomicNoFineGrainedMemory +// CHECK-NOT: AtomicIgnoreDenormalMode +__device__ __host__ void test_default(float *a) { + __scoped_atomic_fetch_add(a, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM); +} + +// CHECK-LABEL: FunctionDecl {{.*}} test_one +// CHECK: | |-CompoundStmt {{.*}} AtomicNoRemoteMemory=1 +// CHECK-NOT: AtomicNoFineGrainedMemory +// CHECK-NOT: AtomicIgnoreDenormalMode +__device__ __host__ void test_one(float *a) { + #pragma clang atomic no_remote_memory(on) + __scoped_atomic_fetch_add(a, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM); +} + +// CHECK-LABEL: FunctionDecl {{.*}} test_two +// CHECK: | |-CompoundStmt {{.*}} AtomicNoRemoteMemory=0 AtomicIgnoreDenormalMode=1 +// CHECK-NOT: AtomicNoFineGrainedMemory +__device__ __host__ void test_two(float *a) { + #pragma clang atomic no_remote_memory(off) ignore_denormal_mode(on) + __scoped_atomic_fetch_add(a, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM); +} + +// CHECK-LABEL: FunctionDecl {{.*}} test_three +// CHECK: | |-CompoundStmt {{.*}} AtomicNoRemoteMemory=1 AtomicNoFineGrainedMemory=0 AtomicIgnoreDenormalMode=0 +__device__ __host__ void test_three(float *a) { + #pragma clang atomic no_remote_memory(on) no_fine_grained_memory(off) ignore_denormal_mode(off) + __scoped_atomic_fetch_add(a, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM); +} + +// CHECK-LABEL: FunctionDecl {{.*}} test_nested +// CHECK: | |-CompoundStmt +// CHECK-NOT: AtomicNoRemoteMemory +// CHECK-NOT: AtomicNoFineGrainedMemory +// CHECK-NOT: AtomicIgnoreDenormalMode +// CHECK: | | `-CompoundStmt {{.*}} AtomicNoRemoteMemory=0 AtomicNoFineGrainedMemory=0 AtomicIgnoreDenormalMode=0 +// CHECK: | | |-CompoundStmt {{.*}} AtomicNoRemoteMemory=1 AtomicNoFineGrainedMemory=0 AtomicIgnoreDenormalMode=0 +// CHECK: | | `-CompoundStmt {{.*}} AtomicNoRemoteMemory=0 AtomicNoFineGrainedMemory=1 AtomicIgnoreDenormalMode=0 +__device__ __host__ void test_nested(float *a) { + __scoped_atomic_fetch_add(a, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM); + { + #pragma clang atomic no_remote_memory(off) no_fine_grained_memory(off) ignore_denormal_mode(off) + __scoped_atomic_fetch_max(a, 2, __ATOMIC_SEQ_CST, __MEMORY_SCOPE_DEVICE); + { + #pragma clang atomic no_remote_memory(on) + __scoped_atomic_fetch_min(a, 3, __ATOMIC_ACQUIRE, __MEMORY_SCOPE_WRKGRP); + } + { + #pragma clang atomic no_fine_grained_memory(on) + __scoped_atomic_fetch_sub(a, 4, __ATOMIC_RELEASE, __MEMORY_SCOPE_WVFRNT); + } + } +} + +// CHECK-LABEL: FunctionTemplateDecl {{.*}} test_template +// CHECK-LABEL: FunctionDecl {{.*}} test_template 'void (T *)' +// CHECK: | |-CompoundStmt {{.*}} AtomicNoRemoteMemory=1 AtomicNoFineGrainedMemory=0 AtomicIgnoreDenormalMode=0 +// CHECK-LABEL: FunctionDecl {{.*}} used test_template 'void (float *)' implicit_instantiation +// CHECK: | |-CompoundStmt {{.*}} AtomicNoRemoteMemory=1 AtomicNoFineGrainedMemory=0 AtomicIgnoreDenormalMode=0 +template +__device__ __host__ void test_template(T *a) { + #pragma clang atomic no_remote_memory(on) no_fine_grained_memory(off) ignore_denormal_mode(off) + __scoped_atomic_fetch_add(a, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM); +} + +__device__ __host__ void test_template_caller() { + float *p; + test_template(p); +} diff --git a/clang/test/CodeGen/AMDGPU/amdgpu-atomic-float.c b/clang/test/CodeGen/AMDGPU/amdgpu-atomic-float.c index 6deff1116e1d8..718e8d8b2087d 100644 --- a/clang/test/CodeGen/AMDGPU/amdgpu-atomic-float.c +++ b/clang/test/CodeGen/AMDGPU/amdgpu-atomic-float.c @@ -2,21 +2,13 @@ // RUN: %clang_cc1 -fnative-half-arguments-and-returns -triple amdgcn-amd-amdhsa-gnu -target-cpu gfx900 -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK,SAFE %s // RUN: %clang_cc1 -fnative-half-arguments-and-returns -triple amdgcn-amd-amdhsa-gnu -target-cpu gfx900 -emit-llvm -munsafe-fp-atomics -o - %s | FileCheck -check-prefixes=CHECK,UNSAFE %s -// SAFE-LABEL: define dso_local float @test_float_post_inc( -// SAFE-SAME: ) #[[ATTR0:[0-9]+]] { -// SAFE-NEXT: [[ENTRY:.*:]] -// SAFE-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5) -// SAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr -// SAFE-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @test_float_post_inc.n to ptr), float 1.000000e+00 seq_cst, align 4 -// SAFE-NEXT: ret float [[TMP0]] -// -// UNSAFE-LABEL: define dso_local float @test_float_post_inc( -// UNSAFE-SAME: ) #[[ATTR0:[0-9]+]] { -// UNSAFE-NEXT: [[ENTRY:.*:]] -// UNSAFE-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5) -// UNSAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr -// UNSAFE-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @test_float_post_inc.n to ptr), float 1.000000e+00 seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META3:![0-9]+]], !amdgpu.ignore.denormal.mode [[META3]] -// UNSAFE-NEXT: ret float [[TMP0]] +// CHECK-LABEL: define dso_local float @test_float_post_inc( +// CHECK-SAME: ) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @test_float_post_inc.n to ptr), float 1.000000e+00 seq_cst, align 4 +// CHECK-NEXT: ret float [[TMP0]] // float test_float_post_inc() { @@ -24,21 +16,13 @@ float test_float_post_inc() return n++; } -// SAFE-LABEL: define dso_local float @test_float_post_dc( -// SAFE-SAME: ) #[[ATTR0]] { -// SAFE-NEXT: [[ENTRY:.*:]] -// SAFE-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5) -// SAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr -// SAFE-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr addrspacecast (ptr addrspace(1) @test_float_post_dc.n to ptr), float 1.000000e+00 seq_cst, align 4 -// SAFE-NEXT: ret float [[TMP0]] -// -// UNSAFE-LABEL: define dso_local float @test_float_post_dc( -// UNSAFE-SAME: ) #[[ATTR0]] { -// UNSAFE-NEXT: [[ENTRY:.*:]] -// UNSAFE-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5) -// UNSAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr -// UNSAFE-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr addrspacecast (ptr addrspace(1) @test_float_post_dc.n to ptr), float 1.000000e+00 seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META3]] -// UNSAFE-NEXT: ret float [[TMP0]] +// CHECK-LABEL: define dso_local float @test_float_post_dc( +// CHECK-SAME: ) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr addrspacecast (ptr addrspace(1) @test_float_post_dc.n to ptr), float 1.000000e+00 seq_cst, align 4 +// CHECK-NEXT: ret float [[TMP0]] // float test_float_post_dc() { @@ -46,23 +30,14 @@ float test_float_post_dc() return n--; } -// SAFE-LABEL: define dso_local float @test_float_pre_dc( -// SAFE-SAME: ) #[[ATTR0]] { -// SAFE-NEXT: [[ENTRY:.*:]] -// SAFE-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5) -// SAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr -// SAFE-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr addrspacecast (ptr addrspace(1) @test_float_pre_dc.n to ptr), float 1.000000e+00 seq_cst, align 4 -// SAFE-NEXT: [[TMP1:%.*]] = fsub float [[TMP0]], 1.000000e+00 -// SAFE-NEXT: ret float [[TMP1]] -// -// UNSAFE-LABEL: define dso_local float @test_float_pre_dc( -// UNSAFE-SAME: ) #[[ATTR0]] { -// UNSAFE-NEXT: [[ENTRY:.*:]] -// UNSAFE-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5) -// UNSAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr -// UNSAFE-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr addrspacecast (ptr addrspace(1) @test_float_pre_dc.n to ptr), float 1.000000e+00 seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META3]] -// UNSAFE-NEXT: [[TMP1:%.*]] = fsub float [[TMP0]], 1.000000e+00 -// UNSAFE-NEXT: ret float [[TMP1]] +// CHECK-LABEL: define dso_local float @test_float_pre_dc( +// CHECK-SAME: ) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr addrspacecast (ptr addrspace(1) @test_float_pre_dc.n to ptr), float 1.000000e+00 seq_cst, align 4 +// CHECK-NEXT: [[TMP1:%.*]] = fsub float [[TMP0]], 1.000000e+00 +// CHECK-NEXT: ret float [[TMP1]] // float test_float_pre_dc() { @@ -70,23 +45,14 @@ float test_float_pre_dc() return --n; } -// SAFE-LABEL: define dso_local float @test_float_pre_inc( -// SAFE-SAME: ) #[[ATTR0]] { -// SAFE-NEXT: [[ENTRY:.*:]] -// SAFE-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5) -// SAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr -// SAFE-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @test_float_pre_inc.n to ptr), float 1.000000e+00 seq_cst, align 4 -// SAFE-NEXT: [[TMP1:%.*]] = fadd float [[TMP0]], 1.000000e+00 -// SAFE-NEXT: ret float [[TMP1]] -// -// UNSAFE-LABEL: define dso_local float @test_float_pre_inc( -// UNSAFE-SAME: ) #[[ATTR0]] { -// UNSAFE-NEXT: [[ENTRY:.*:]] -// UNSAFE-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5) -// UNSAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr -// UNSAFE-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @test_float_pre_inc.n to ptr), float 1.000000e+00 seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.ignore.denormal.mode [[META3]] -// UNSAFE-NEXT: [[TMP1:%.*]] = fadd float [[TMP0]], 1.000000e+00 -// UNSAFE-NEXT: ret float [[TMP1]] +// CHECK-LABEL: define dso_local float @test_float_pre_inc( +// CHECK-SAME: ) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @test_float_pre_inc.n to ptr), float 1.000000e+00 seq_cst, align 4 +// CHECK-NEXT: [[TMP1:%.*]] = fadd float [[TMP0]], 1.000000e+00 +// CHECK-NEXT: ret float [[TMP1]] // float test_float_pre_inc() { @@ -94,25 +60,15 @@ float test_float_pre_inc() return ++n; } -// SAFE-LABEL: define dso_local double @test_double_post_inc( -// SAFE-SAME: ) #[[ATTR0]] { -// SAFE-NEXT: [[ENTRY:.*:]] -// SAFE-NEXT: [[RETVAL:%.*]] = alloca double, align 8, addrspace(5) -// SAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr -// SAFE-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @test_double_post_inc.n to ptr), float 1.000000e+00 seq_cst, align 8 -// SAFE-NEXT: store float [[TMP0]], ptr [[RETVAL_ASCAST]], align 8 -// SAFE-NEXT: [[TMP1:%.*]] = load double, ptr [[RETVAL_ASCAST]], align 8 -// SAFE-NEXT: ret double [[TMP1]] -// -// UNSAFE-LABEL: define dso_local double @test_double_post_inc( -// UNSAFE-SAME: ) #[[ATTR0]] { -// UNSAFE-NEXT: [[ENTRY:.*:]] -// UNSAFE-NEXT: [[RETVAL:%.*]] = alloca double, align 8, addrspace(5) -// UNSAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr -// UNSAFE-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @test_double_post_inc.n to ptr), float 1.000000e+00 seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.ignore.denormal.mode [[META3]] -// UNSAFE-NEXT: store float [[TMP0]], ptr [[RETVAL_ASCAST]], align 8 -// UNSAFE-NEXT: [[TMP1:%.*]] = load double, ptr [[RETVAL_ASCAST]], align 8 -// UNSAFE-NEXT: ret double [[TMP1]] +// CHECK-LABEL: define dso_local double @test_double_post_inc( +// CHECK-SAME: ) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca double, align 8, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @test_double_post_inc.n to ptr), float 1.000000e+00 seq_cst, align 8 +// CHECK-NEXT: store float [[TMP0]], ptr [[RETVAL_ASCAST]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load double, ptr [[RETVAL_ASCAST]], align 8 +// CHECK-NEXT: ret double [[TMP1]] // double test_double_post_inc() { @@ -120,25 +76,15 @@ double test_double_post_inc() return n++; } -// SAFE-LABEL: define dso_local double @test_double_post_dc( -// SAFE-SAME: ) #[[ATTR0]] { -// SAFE-NEXT: [[ENTRY:.*:]] -// SAFE-NEXT: [[RETVAL:%.*]] = alloca double, align 8, addrspace(5) -// SAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr -// SAFE-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr addrspacecast (ptr addrspace(1) @test_double_post_dc.n to ptr), float 1.000000e+00 seq_cst, align 8 -// SAFE-NEXT: store float [[TMP0]], ptr [[RETVAL_ASCAST]], align 8 -// SAFE-NEXT: [[TMP1:%.*]] = load double, ptr [[RETVAL_ASCAST]], align 8 -// SAFE-NEXT: ret double [[TMP1]] -// -// UNSAFE-LABEL: define dso_local double @test_double_post_dc( -// UNSAFE-SAME: ) #[[ATTR0]] { -// UNSAFE-NEXT: [[ENTRY:.*:]] -// UNSAFE-NEXT: [[RETVAL:%.*]] = alloca double, align 8, addrspace(5) -// UNSAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr -// UNSAFE-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr addrspacecast (ptr addrspace(1) @test_double_post_dc.n to ptr), float 1.000000e+00 seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META3]] -// UNSAFE-NEXT: store float [[TMP0]], ptr [[RETVAL_ASCAST]], align 8 -// UNSAFE-NEXT: [[TMP1:%.*]] = load double, ptr [[RETVAL_ASCAST]], align 8 -// UNSAFE-NEXT: ret double [[TMP1]] +// CHECK-LABEL: define dso_local double @test_double_post_dc( +// CHECK-SAME: ) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca double, align 8, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr addrspacecast (ptr addrspace(1) @test_double_post_dc.n to ptr), float 1.000000e+00 seq_cst, align 8 +// CHECK-NEXT: store float [[TMP0]], ptr [[RETVAL_ASCAST]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load double, ptr [[RETVAL_ASCAST]], align 8 +// CHECK-NEXT: ret double [[TMP1]] // double test_double_post_dc() { @@ -146,27 +92,16 @@ double test_double_post_dc() return n--; } -// SAFE-LABEL: define dso_local double @test_double_pre_dc( -// SAFE-SAME: ) #[[ATTR0]] { -// SAFE-NEXT: [[ENTRY:.*:]] -// SAFE-NEXT: [[RETVAL:%.*]] = alloca double, align 8, addrspace(5) -// SAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr -// SAFE-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr addrspacecast (ptr addrspace(1) @test_double_pre_dc.n to ptr), float 1.000000e+00 seq_cst, align 8 -// SAFE-NEXT: [[TMP1:%.*]] = fsub float [[TMP0]], 1.000000e+00 -// SAFE-NEXT: store float [[TMP1]], ptr [[RETVAL_ASCAST]], align 8 -// SAFE-NEXT: [[TMP2:%.*]] = load double, ptr [[RETVAL_ASCAST]], align 8 -// SAFE-NEXT: ret double [[TMP2]] -// -// UNSAFE-LABEL: define dso_local double @test_double_pre_dc( -// UNSAFE-SAME: ) #[[ATTR0]] { -// UNSAFE-NEXT: [[ENTRY:.*:]] -// UNSAFE-NEXT: [[RETVAL:%.*]] = alloca double, align 8, addrspace(5) -// UNSAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr -// UNSAFE-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr addrspacecast (ptr addrspace(1) @test_double_pre_dc.n to ptr), float 1.000000e+00 seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META3]] -// UNSAFE-NEXT: [[TMP1:%.*]] = fsub float [[TMP0]], 1.000000e+00 -// UNSAFE-NEXT: store float [[TMP1]], ptr [[RETVAL_ASCAST]], align 8 -// UNSAFE-NEXT: [[TMP2:%.*]] = load double, ptr [[RETVAL_ASCAST]], align 8 -// UNSAFE-NEXT: ret double [[TMP2]] +// CHECK-LABEL: define dso_local double @test_double_pre_dc( +// CHECK-SAME: ) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca double, align 8, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr addrspacecast (ptr addrspace(1) @test_double_pre_dc.n to ptr), float 1.000000e+00 seq_cst, align 8 +// CHECK-NEXT: [[TMP1:%.*]] = fsub float [[TMP0]], 1.000000e+00 +// CHECK-NEXT: store float [[TMP1]], ptr [[RETVAL_ASCAST]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load double, ptr [[RETVAL_ASCAST]], align 8 +// CHECK-NEXT: ret double [[TMP2]] // double test_double_pre_dc() { @@ -174,27 +109,16 @@ double test_double_pre_dc() return --n; } -// SAFE-LABEL: define dso_local double @test_double_pre_inc( -// SAFE-SAME: ) #[[ATTR0]] { -// SAFE-NEXT: [[ENTRY:.*:]] -// SAFE-NEXT: [[RETVAL:%.*]] = alloca double, align 8, addrspace(5) -// SAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr -// SAFE-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @test_double_pre_inc.n to ptr), float 1.000000e+00 seq_cst, align 8 -// SAFE-NEXT: [[TMP1:%.*]] = fadd float [[TMP0]], 1.000000e+00 -// SAFE-NEXT: store float [[TMP1]], ptr [[RETVAL_ASCAST]], align 8 -// SAFE-NEXT: [[TMP2:%.*]] = load double, ptr [[RETVAL_ASCAST]], align 8 -// SAFE-NEXT: ret double [[TMP2]] -// -// UNSAFE-LABEL: define dso_local double @test_double_pre_inc( -// UNSAFE-SAME: ) #[[ATTR0]] { -// UNSAFE-NEXT: [[ENTRY:.*:]] -// UNSAFE-NEXT: [[RETVAL:%.*]] = alloca double, align 8, addrspace(5) -// UNSAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr -// UNSAFE-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @test_double_pre_inc.n to ptr), float 1.000000e+00 seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.ignore.denormal.mode [[META3]] -// UNSAFE-NEXT: [[TMP1:%.*]] = fadd float [[TMP0]], 1.000000e+00 -// UNSAFE-NEXT: store float [[TMP1]], ptr [[RETVAL_ASCAST]], align 8 -// UNSAFE-NEXT: [[TMP2:%.*]] = load double, ptr [[RETVAL_ASCAST]], align 8 -// UNSAFE-NEXT: ret double [[TMP2]] +// CHECK-LABEL: define dso_local double @test_double_pre_inc( +// CHECK-SAME: ) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca double, align 8, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @test_double_pre_inc.n to ptr), float 1.000000e+00 seq_cst, align 8 +// CHECK-NEXT: [[TMP1:%.*]] = fadd float [[TMP0]], 1.000000e+00 +// CHECK-NEXT: store float [[TMP1]], ptr [[RETVAL_ASCAST]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load double, ptr [[RETVAL_ASCAST]], align 8 +// CHECK-NEXT: ret double [[TMP2]] // double test_double_pre_inc() { @@ -202,25 +126,15 @@ double test_double_pre_inc() return ++n; } -// SAFE-LABEL: define dso_local half @test__Float16_post_inc( -// SAFE-SAME: ) #[[ATTR0]] { -// SAFE-NEXT: [[ENTRY:.*:]] -// SAFE-NEXT: [[RETVAL:%.*]] = alloca half, align 2, addrspace(5) -// SAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr -// SAFE-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @test__Float16_post_inc.n to ptr), float 1.000000e+00 seq_cst, align 2 -// SAFE-NEXT: store float [[TMP0]], ptr [[RETVAL_ASCAST]], align 2 -// SAFE-NEXT: [[TMP1:%.*]] = load half, ptr [[RETVAL_ASCAST]], align 2 -// SAFE-NEXT: ret half [[TMP1]] -// -// UNSAFE-LABEL: define dso_local half @test__Float16_post_inc( -// UNSAFE-SAME: ) #[[ATTR0]] { -// UNSAFE-NEXT: [[ENTRY:.*:]] -// UNSAFE-NEXT: [[RETVAL:%.*]] = alloca half, align 2, addrspace(5) -// UNSAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr -// UNSAFE-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @test__Float16_post_inc.n to ptr), float 1.000000e+00 seq_cst, align 2, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.ignore.denormal.mode [[META3]] -// UNSAFE-NEXT: store float [[TMP0]], ptr [[RETVAL_ASCAST]], align 2 -// UNSAFE-NEXT: [[TMP1:%.*]] = load half, ptr [[RETVAL_ASCAST]], align 2 -// UNSAFE-NEXT: ret half [[TMP1]] +// CHECK-LABEL: define dso_local half @test__Float16_post_inc( +// CHECK-SAME: ) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca half, align 2, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @test__Float16_post_inc.n to ptr), float 1.000000e+00 seq_cst, align 2 +// CHECK-NEXT: store float [[TMP0]], ptr [[RETVAL_ASCAST]], align 2 +// CHECK-NEXT: [[TMP1:%.*]] = load half, ptr [[RETVAL_ASCAST]], align 2 +// CHECK-NEXT: ret half [[TMP1]] // _Float16 test__Float16_post_inc() { @@ -228,25 +142,15 @@ _Float16 test__Float16_post_inc() return n++; } -// SAFE-LABEL: define dso_local half @test__Float16_post_dc( -// SAFE-SAME: ) #[[ATTR0]] { -// SAFE-NEXT: [[ENTRY:.*:]] -// SAFE-NEXT: [[RETVAL:%.*]] = alloca half, align 2, addrspace(5) -// SAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr -// SAFE-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr addrspacecast (ptr addrspace(1) @test__Float16_post_dc.n to ptr), float 1.000000e+00 seq_cst, align 2 -// SAFE-NEXT: store float [[TMP0]], ptr [[RETVAL_ASCAST]], align 2 -// SAFE-NEXT: [[TMP1:%.*]] = load half, ptr [[RETVAL_ASCAST]], align 2 -// SAFE-NEXT: ret half [[TMP1]] -// -// UNSAFE-LABEL: define dso_local half @test__Float16_post_dc( -// UNSAFE-SAME: ) #[[ATTR0]] { -// UNSAFE-NEXT: [[ENTRY:.*:]] -// UNSAFE-NEXT: [[RETVAL:%.*]] = alloca half, align 2, addrspace(5) -// UNSAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr -// UNSAFE-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr addrspacecast (ptr addrspace(1) @test__Float16_post_dc.n to ptr), float 1.000000e+00 seq_cst, align 2, !amdgpu.no.fine.grained.memory [[META3]] -// UNSAFE-NEXT: store float [[TMP0]], ptr [[RETVAL_ASCAST]], align 2 -// UNSAFE-NEXT: [[TMP1:%.*]] = load half, ptr [[RETVAL_ASCAST]], align 2 -// UNSAFE-NEXT: ret half [[TMP1]] +// CHECK-LABEL: define dso_local half @test__Float16_post_dc( +// CHECK-SAME: ) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca half, align 2, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr addrspacecast (ptr addrspace(1) @test__Float16_post_dc.n to ptr), float 1.000000e+00 seq_cst, align 2 +// CHECK-NEXT: store float [[TMP0]], ptr [[RETVAL_ASCAST]], align 2 +// CHECK-NEXT: [[TMP1:%.*]] = load half, ptr [[RETVAL_ASCAST]], align 2 +// CHECK-NEXT: ret half [[TMP1]] // _Float16 test__Float16_post_dc() { @@ -254,27 +158,16 @@ _Float16 test__Float16_post_dc() return n--; } -// SAFE-LABEL: define dso_local half @test__Float16_pre_dc( -// SAFE-SAME: ) #[[ATTR0]] { -// SAFE-NEXT: [[ENTRY:.*:]] -// SAFE-NEXT: [[RETVAL:%.*]] = alloca half, align 2, addrspace(5) -// SAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr -// SAFE-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr addrspacecast (ptr addrspace(1) @test__Float16_pre_dc.n to ptr), float 1.000000e+00 seq_cst, align 2 -// SAFE-NEXT: [[TMP1:%.*]] = fsub float [[TMP0]], 1.000000e+00 -// SAFE-NEXT: store float [[TMP1]], ptr [[RETVAL_ASCAST]], align 2 -// SAFE-NEXT: [[TMP2:%.*]] = load half, ptr [[RETVAL_ASCAST]], align 2 -// SAFE-NEXT: ret half [[TMP2]] -// -// UNSAFE-LABEL: define dso_local half @test__Float16_pre_dc( -// UNSAFE-SAME: ) #[[ATTR0]] { -// UNSAFE-NEXT: [[ENTRY:.*:]] -// UNSAFE-NEXT: [[RETVAL:%.*]] = alloca half, align 2, addrspace(5) -// UNSAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr -// UNSAFE-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr addrspacecast (ptr addrspace(1) @test__Float16_pre_dc.n to ptr), float 1.000000e+00 seq_cst, align 2, !amdgpu.no.fine.grained.memory [[META3]] -// UNSAFE-NEXT: [[TMP1:%.*]] = fsub float [[TMP0]], 1.000000e+00 -// UNSAFE-NEXT: store float [[TMP1]], ptr [[RETVAL_ASCAST]], align 2 -// UNSAFE-NEXT: [[TMP2:%.*]] = load half, ptr [[RETVAL_ASCAST]], align 2 -// UNSAFE-NEXT: ret half [[TMP2]] +// CHECK-LABEL: define dso_local half @test__Float16_pre_dc( +// CHECK-SAME: ) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca half, align 2, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr addrspacecast (ptr addrspace(1) @test__Float16_pre_dc.n to ptr), float 1.000000e+00 seq_cst, align 2 +// CHECK-NEXT: [[TMP1:%.*]] = fsub float [[TMP0]], 1.000000e+00 +// CHECK-NEXT: store float [[TMP1]], ptr [[RETVAL_ASCAST]], align 2 +// CHECK-NEXT: [[TMP2:%.*]] = load half, ptr [[RETVAL_ASCAST]], align 2 +// CHECK-NEXT: ret half [[TMP2]] // _Float16 test__Float16_pre_dc() { @@ -282,35 +175,22 @@ _Float16 test__Float16_pre_dc() return --n; } -// SAFE-LABEL: define dso_local half @test__Float16_pre_inc( -// SAFE-SAME: ) #[[ATTR0]] { -// SAFE-NEXT: [[ENTRY:.*:]] -// SAFE-NEXT: [[RETVAL:%.*]] = alloca half, align 2, addrspace(5) -// SAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr -// SAFE-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @test__Float16_pre_inc.n to ptr), float 1.000000e+00 seq_cst, align 2 -// SAFE-NEXT: [[TMP1:%.*]] = fadd float [[TMP0]], 1.000000e+00 -// SAFE-NEXT: store float [[TMP1]], ptr [[RETVAL_ASCAST]], align 2 -// SAFE-NEXT: [[TMP2:%.*]] = load half, ptr [[RETVAL_ASCAST]], align 2 -// SAFE-NEXT: ret half [[TMP2]] -// -// UNSAFE-LABEL: define dso_local half @test__Float16_pre_inc( -// UNSAFE-SAME: ) #[[ATTR0]] { -// UNSAFE-NEXT: [[ENTRY:.*:]] -// UNSAFE-NEXT: [[RETVAL:%.*]] = alloca half, align 2, addrspace(5) -// UNSAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr -// UNSAFE-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @test__Float16_pre_inc.n to ptr), float 1.000000e+00 seq_cst, align 2, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.ignore.denormal.mode [[META3]] -// UNSAFE-NEXT: [[TMP1:%.*]] = fadd float [[TMP0]], 1.000000e+00 -// UNSAFE-NEXT: store float [[TMP1]], ptr [[RETVAL_ASCAST]], align 2 -// UNSAFE-NEXT: [[TMP2:%.*]] = load half, ptr [[RETVAL_ASCAST]], align 2 -// UNSAFE-NEXT: ret half [[TMP2]] +// CHECK-LABEL: define dso_local half @test__Float16_pre_inc( +// CHECK-SAME: ) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca half, align 2, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @test__Float16_pre_inc.n to ptr), float 1.000000e+00 seq_cst, align 2 +// CHECK-NEXT: [[TMP1:%.*]] = fadd float [[TMP0]], 1.000000e+00 +// CHECK-NEXT: store float [[TMP1]], ptr [[RETVAL_ASCAST]], align 2 +// CHECK-NEXT: [[TMP2:%.*]] = load half, ptr [[RETVAL_ASCAST]], align 2 +// CHECK-NEXT: ret half [[TMP2]] // _Float16 test__Float16_pre_inc() { static _Atomic _Float16 n; return ++n; } -//. -// UNSAFE: [[META3]] = !{} -//. //// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -// CHECK: {{.*}} +// SAFE: {{.*}} +// UNSAFE: {{.*}} diff --git a/clang/test/CodeGenCUDA/amdgpu-atomic-ops.cu b/clang/test/CodeGenCUDA/amdgpu-atomic-ops.cu index 8bf8241e343e7..1725b67c104d7 100644 --- a/clang/test/CodeGenCUDA/amdgpu-atomic-ops.cu +++ b/clang/test/CodeGenCUDA/amdgpu-atomic-ops.cu @@ -1,19 +1,19 @@ // RUN: %clang_cc1 -x hip %s -emit-llvm -o - -triple=amdgcn-amd-amdhsa \ // RUN: -fcuda-is-device -target-cpu gfx906 -fnative-half-type \ -// RUN: -fnative-half-arguments-and-returns | FileCheck -check-prefixes=CHECK,SAFEIR %s +// RUN: -fnative-half-arguments-and-returns | FileCheck -check-prefixes=FUN,CHECK,SAFEIR %s // RUN: %clang_cc1 -x hip %s -emit-llvm -o - -triple=amdgcn-amd-amdhsa \ // RUN: -fcuda-is-device -target-cpu gfx906 -fnative-half-type \ -// RUN: -fnative-half-arguments-and-returns -munsafe-fp-atomics | FileCheck -check-prefixes=CHECK,UNSAFEIR %s +// RUN: -fnative-half-arguments-and-returns -munsafe-fp-atomics | FileCheck -check-prefixes=FUN,CHECK,UNSAFEIR %s // RUN: %clang_cc1 -x hip %s -O3 -S -o - -triple=amdgcn-amd-amdhsa \ // RUN: -fcuda-is-device -target-cpu gfx1100 -fnative-half-type \ -// RUN: -fnative-half-arguments-and-returns | FileCheck -check-prefix=SAFE %s +// RUN: -fnative-half-arguments-and-returns | FileCheck -check-prefixes=FUN,SAFE %s // RUN: %clang_cc1 -x hip %s -O3 -S -o - -triple=amdgcn-amd-amdhsa \ // RUN: -fcuda-is-device -target-cpu gfx940 -fnative-half-type \ // RUN: -fnative-half-arguments-and-returns -munsafe-fp-atomics \ -// RUN: | FileCheck -check-prefix=UNSAFE %s +// RUN: | FileCheck -check-prefixes=FUN,UNSAFE %s // REQUIRES: amdgpu-registered-target @@ -21,30 +21,28 @@ #include __global__ void ffp1(float *p) { - // CHECK-LABEL: @_Z4ffp1Pf - // SAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 4{{$}} - // SAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 4{{$}} - // SAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 4{{$}} - // SAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 4{{$}} - // SAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent-one-as") monotonic, align 4{{$}} - // SAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup-one-as") monotonic, align 4{{$}} - - // UNSAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+}}, !amdgpu.ignore.denormal.mode !{{[0-9]+$}} - // UNSAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} - // UNSAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} - // UNSAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} - // UNSAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent-one-as") monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} - // UNSAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup-one-as") monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} - - // SAFE: _Z4ffp1Pf - // SAFE: global_atomic_cmpswap - // SAFE: global_atomic_cmpswap - // SAFE: global_atomic_cmpswap - // SAFE: global_atomic_cmpswap - // SAFE: global_atomic_cmpswap + // FUN-LABEL: @_Z4ffp1Pf + // SAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 4, [[DEFMD:!amdgpu.no.fine.grained.memory ![0-9]+, !amdgpu.no.remote.memory ![0-9]+$]] + // SAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 4, [[DEFMD]] + // SAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 4, [[DEFMD]] + // SAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 4, [[DEFMD]] + // SAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent-one-as") monotonic, align 4, [[DEFMD]] + // SAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup-one-as") monotonic, align 4, [[DEFMD]] + + // UNSAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+}}, !amdgpu.no.remote.memory !{{[0-9]+}}, !amdgpu.ignore.denormal.mode !{{[0-9]+$}} + // UNSAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 4, [[DEFMD:!amdgpu.no.fine.grained.memory ![0-9]+, !amdgpu.no.remote.memory ![0-9]+$]] + // UNSAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 4, [[DEFMD]] + // UNSAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 4, [[DEFMD]] + // UNSAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent-one-as") monotonic, align 4, [[DEFMD]] + // UNSAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup-one-as") monotonic, align 4, [[DEFMD]] + + // SAFE: global_atomic_add_f32 // SAFE: global_atomic_cmpswap + // SAFE: global_atomic_max + // SAFE: global_atomic_min + // SAFE: global_atomic_max + // SAFE: global_atomic_min - // UNSAFE: _Z4ffp1Pf // UNSAFE: global_atomic_add_f32 // UNSAFE: global_atomic_cmpswap // UNSAFE: global_atomic_cmpswap @@ -61,22 +59,21 @@ __global__ void ffp1(float *p) { } __global__ void ffp2(double *p) { - // CHECK-LABEL: @_Z4ffp2Pd - // SAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 8{{$}} - // SAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8{{$}} - // SAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 8{{$}} - // SAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 8{{$}} - // SAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent-one-as") monotonic, align 8{{$}} - // SAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup-one-as") monotonic, align 8{{$}} - - // UNSAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} - // UNSAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} - // UNSAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} - // UNSAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} - // UNSAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent-one-as") monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} - // UNSAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup-one-as") monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} - - // SAFE-LABEL: @_Z4ffp2Pd + // FUN-LABEL: @_Z4ffp2Pd + // SAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 8, [[DEFMD]] + // SAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8, [[DEFMD]] + // SAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 8, [[DEFMD]] + // SAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 8, [[DEFMD]] + // SAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent-one-as") monotonic, align 8, [[DEFMD]] + // SAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup-one-as") monotonic, align 8, [[DEFMD]] + + // UNSAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 8, [[DEFMD]] + // UNSAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8, [[DEFMD]] + // UNSAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 8, [[DEFMD]] + // UNSAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 8, [[DEFMD]] + // UNSAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent-one-as") monotonic, align 8, [[DEFMD]] + // UNSAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup-one-as") monotonic, align 8, [[DEFMD]] + // SAFE: global_atomic_cmpswap_b64 // SAFE: global_atomic_cmpswap_b64 // SAFE: global_atomic_cmpswap_b64 @@ -84,7 +81,6 @@ __global__ void ffp2(double *p) { // SAFE: global_atomic_cmpswap_b64 // SAFE: global_atomic_cmpswap_b64 - // UNSAFE-LABEL: @_Z4ffp2Pd // UNSAFE: global_atomic_add_f64 // UNSAFE: global_atomic_cmpswap_x2 // UNSAFE: global_atomic_max_f64 @@ -101,28 +97,27 @@ __global__ void ffp2(double *p) { // long double is the same as double for amdgcn. __global__ void ffp3(long double *p) { - // CHECK-LABEL: @_Z4ffp3Pe - // SAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 8{{$}} - // SAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8{{$}} - // SAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 8{{$}} - // SAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 8{{$}} - // SAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent-one-as") monotonic, align 8{{$}} - // SAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup-one-as") monotonic, align 8{{$}} - - // UNSAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} - // UNSAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} - // UNSAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} - // UNSAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} - // UNSAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent-one-as") monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} - // UNSAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup-one-as") monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} - - // SAFE-LABEL: @_Z4ffp3Pe + // FUN-LABEL: @_Z4ffp3Pe + // SAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 8, [[DEFMD]] + // SAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8, [[DEFMD]] + // SAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 8, [[DEFMD]] + // SAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 8, [[DEFMD]] + // SAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent-one-as") monotonic, align 8, [[DEFMD]] + // SAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup-one-as") monotonic, align 8, [[DEFMD]] + + // UNSAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 8, [[DEFMD]] + // UNSAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8, [[DEFMD]] + // UNSAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 8, [[DEFMD]] + // UNSAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 8, [[DEFMD]] + // UNSAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent-one-as") monotonic, align 8, [[DEFMD]] + // UNSAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup-one-as") monotonic, align 8, [[DEFMD]] + // SAFE: global_atomic_cmpswap_b64 // SAFE: global_atomic_cmpswap_b64 // SAFE: global_atomic_cmpswap_b64 // SAFE: global_atomic_cmpswap_b64 // SAFE: global_atomic_cmpswap_b64 - // UNSAFE-LABEL: @_Z4ffp3Pe + // UNSAFE: global_atomic_cmpswap_x2 // UNSAFE: global_atomic_max_f64 // UNSAFE: global_atomic_min_f64 @@ -137,38 +132,37 @@ __global__ void ffp3(long double *p) { } __device__ double ffp4(double *p, float f) { - // CHECK-LABEL: @_Z4ffp4Pdf + // FUN-LABEL: @_Z4ffp4Pdf // CHECK: fpext float {{.*}} to double - // SAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8{{$}} - // UNSAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} + // SAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8, [[DEFMD]] + // UNSAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8, [[DEFMD]] return __atomic_fetch_sub(p, f, memory_order_relaxed); } __device__ double ffp5(double *p, int i) { - // CHECK-LABEL: @_Z4ffp5Pdi + // FUN-LABEL: @_Z4ffp5Pdi // CHECK: sitofp i32 {{.*}} to double - // SAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8{{$}} - // UNSAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} + // SAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8, [[DEFMD]] + // UNSAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8, [[DEFMD]] return __atomic_fetch_sub(p, i, memory_order_relaxed); } __global__ void ffp6(_Float16 *p) { - // CHECK-LABEL: @_Z4ffp6PDF16 - // SAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 2{{$}} - // SAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 2{{$}} - // SAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 2{{$}} - // SAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 2{{$}} - // SAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent-one-as") monotonic, align 2{{$}} - // SAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup-one-as") monotonic, align 2{{$}} - - // UNSAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 2, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} - // UNSAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 2, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} - // UNSAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 2, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} - // UNSAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 2, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} - // UNSAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent-one-as") monotonic, align 2, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} - // UNSAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup-one-as") monotonic, align 2, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} - - // SAFE: _Z4ffp6PDF16 + // FUN-LABEL: @_Z4ffp6PDF16 + // SAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 2, [[DEFMD]] + // SAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 2, [[DEFMD]] + // SAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 2, [[DEFMD]] + // SAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 2, [[DEFMD]] + // SAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent-one-as") monotonic, align 2, [[DEFMD]] + // SAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup-one-as") monotonic, align 2, [[DEFMD]] + + // UNSAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 2, [[DEFMD]] + // UNSAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 2, [[DEFMD]] + // UNSAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 2, [[DEFMD]] + // UNSAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 2, [[DEFMD]] + // UNSAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent-one-as") monotonic, align 2, [[DEFMD]] + // UNSAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup-one-as") monotonic, align 2, [[DEFMD]] + // SAFE: global_atomic_cmpswap // SAFE: global_atomic_cmpswap // SAFE: global_atomic_cmpswap @@ -176,7 +170,6 @@ __global__ void ffp6(_Float16 *p) { // SAFE: global_atomic_cmpswap // SAFE: global_atomic_cmpswap - // UNSAFE: _Z4ffp6PDF16 // UNSAFE: global_atomic_cmpswap // UNSAFE: global_atomic_cmpswap // UNSAFE: global_atomic_cmpswap diff --git a/clang/test/CodeGenCUDA/atomic-options.hip b/clang/test/CodeGenCUDA/atomic-options.hip new file mode 100644 index 0000000000000..503d5f6939af7 --- /dev/null +++ b/clang/test/CodeGenCUDA/atomic-options.hip @@ -0,0 +1,449 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu \ +// RUN: -emit-llvm -o - %s | FileCheck --check-prefix=HOST %s +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \ +// RUN: -emit-llvm -o - -fcuda-is-device %s | FileCheck --check-prefix=DEV %s +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \ +// RUN: -fatomic=no_fine_grained_memory:off,no_remote_memory:on,ignore_denormal_mode:on \ +// RUN: -emit-llvm -o - -fcuda-is-device %s | FileCheck --check-prefix=OPT %s + +#include "Inputs/cuda.h" + +// HOST-LABEL: define dso_local void @_Z12test_defaultPf( +// HOST-SAME: ptr noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] { +// HOST-NEXT: [[ENTRY:.*:]] +// HOST-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// HOST-NEXT: [[DOTATOMICTMP:%.*]] = alloca float, align 4 +// HOST-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4 +// HOST-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// HOST-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// HOST-NEXT: store float 1.000000e+00, ptr [[DOTATOMICTMP]], align 4 +// HOST-NEXT: [[TMP1:%.*]] = load float, ptr [[DOTATOMICTMP]], align 4 +// HOST-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr [[TMP0]], float [[TMP1]] monotonic, align 4 +// HOST-NEXT: store float [[TMP2]], ptr [[ATOMIC_TEMP]], align 4 +// HOST-NEXT: [[TMP3:%.*]] = load float, ptr [[ATOMIC_TEMP]], align 4 +// HOST-NEXT: ret void +// +// DEV-LABEL: define dso_local void @_Z12test_defaultPf( +// DEV-SAME: ptr noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] { +// DEV-NEXT: [[ENTRY:.*:]] +// DEV-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// DEV-NEXT: [[DOTATOMICTMP:%.*]] = alloca float, align 4, addrspace(5) +// DEV-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4, addrspace(5) +// DEV-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// DEV-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr +// DEV-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr +// DEV-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8 +// DEV-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 +// DEV-NEXT: store float 1.000000e+00, ptr [[DOTATOMICTMP_ASCAST]], align 4 +// DEV-NEXT: [[TMP1:%.*]] = load float, ptr [[DOTATOMICTMP_ASCAST]], align 4 +// DEV-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr [[TMP0]], float [[TMP1]] syncscope("one-as") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META4:![0-9]+]], !amdgpu.no.remote.memory [[META4]] +// DEV-NEXT: store float [[TMP2]], ptr [[ATOMIC_TEMP_ASCAST]], align 4 +// DEV-NEXT: [[TMP3:%.*]] = load float, ptr [[ATOMIC_TEMP_ASCAST]], align 4 +// DEV-NEXT: ret void +// +// OPT-LABEL: define dso_local void @_Z12test_defaultPf( +// OPT-SAME: ptr noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] { +// OPT-NEXT: [[ENTRY:.*:]] +// OPT-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// OPT-NEXT: [[DOTATOMICTMP:%.*]] = alloca float, align 4, addrspace(5) +// OPT-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4, addrspace(5) +// OPT-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// OPT-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr +// OPT-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr +// OPT-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8 +// OPT-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 +// OPT-NEXT: store float 1.000000e+00, ptr [[DOTATOMICTMP_ASCAST]], align 4 +// OPT-NEXT: [[TMP1:%.*]] = load float, ptr [[DOTATOMICTMP_ASCAST]], align 4 +// OPT-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr [[TMP0]], float [[TMP1]] syncscope("one-as") monotonic, align 4, !amdgpu.no.remote.memory [[META4:![0-9]+]], !amdgpu.ignore.denormal.mode [[META4]] +// OPT-NEXT: store float [[TMP2]], ptr [[ATOMIC_TEMP_ASCAST]], align 4 +// OPT-NEXT: [[TMP3:%.*]] = load float, ptr [[ATOMIC_TEMP_ASCAST]], align 4 +// OPT-NEXT: ret void +// +__device__ __host__ void test_default(float *a) { + __scoped_atomic_fetch_add(a, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM); +} + +// HOST-LABEL: define dso_local void @_Z8test_onePf( +// HOST-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] { +// HOST-NEXT: [[ENTRY:.*:]] +// HOST-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// HOST-NEXT: [[DOTATOMICTMP:%.*]] = alloca float, align 4 +// HOST-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4 +// HOST-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// HOST-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// HOST-NEXT: store float 1.000000e+00, ptr [[DOTATOMICTMP]], align 4 +// HOST-NEXT: [[TMP1:%.*]] = load float, ptr [[DOTATOMICTMP]], align 4 +// HOST-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr [[TMP0]], float [[TMP1]] monotonic, align 4 +// HOST-NEXT: store float [[TMP2]], ptr [[ATOMIC_TEMP]], align 4 +// HOST-NEXT: [[TMP3:%.*]] = load float, ptr [[ATOMIC_TEMP]], align 4 +// HOST-NEXT: ret void +// +// DEV-LABEL: define dso_local void @_Z8test_onePf( +// DEV-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] { +// DEV-NEXT: [[ENTRY:.*:]] +// DEV-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// DEV-NEXT: [[DOTATOMICTMP:%.*]] = alloca float, align 4, addrspace(5) +// DEV-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4, addrspace(5) +// DEV-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// DEV-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr +// DEV-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr +// DEV-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8 +// DEV-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 +// DEV-NEXT: store float 1.000000e+00, ptr [[DOTATOMICTMP_ASCAST]], align 4 +// DEV-NEXT: [[TMP1:%.*]] = load float, ptr [[DOTATOMICTMP_ASCAST]], align 4 +// DEV-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr [[TMP0]], float [[TMP1]] syncscope("one-as") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META4]], !amdgpu.no.remote.memory [[META4]] +// DEV-NEXT: store float [[TMP2]], ptr [[ATOMIC_TEMP_ASCAST]], align 4 +// DEV-NEXT: [[TMP3:%.*]] = load float, ptr [[ATOMIC_TEMP_ASCAST]], align 4 +// DEV-NEXT: ret void +// +// OPT-LABEL: define dso_local void @_Z8test_onePf( +// OPT-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] { +// OPT-NEXT: [[ENTRY:.*:]] +// OPT-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// OPT-NEXT: [[DOTATOMICTMP:%.*]] = alloca float, align 4, addrspace(5) +// OPT-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4, addrspace(5) +// OPT-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// OPT-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr +// OPT-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr +// OPT-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8 +// OPT-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 +// OPT-NEXT: store float 1.000000e+00, ptr [[DOTATOMICTMP_ASCAST]], align 4 +// OPT-NEXT: [[TMP1:%.*]] = load float, ptr [[DOTATOMICTMP_ASCAST]], align 4 +// OPT-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr [[TMP0]], float [[TMP1]] syncscope("one-as") monotonic, align 4, !amdgpu.no.remote.memory [[META4]], !amdgpu.ignore.denormal.mode [[META4]] +// OPT-NEXT: store float [[TMP2]], ptr [[ATOMIC_TEMP_ASCAST]], align 4 +// OPT-NEXT: [[TMP3:%.*]] = load float, ptr [[ATOMIC_TEMP_ASCAST]], align 4 +// OPT-NEXT: ret void +// +__device__ __host__ void test_one(float *a) { + #pragma clang atomic no_remote_memory(on) + __scoped_atomic_fetch_add(a, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM); +} + +// HOST-LABEL: define dso_local void @_Z8test_twoPf( +// HOST-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] { +// HOST-NEXT: [[ENTRY:.*:]] +// HOST-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// HOST-NEXT: [[DOTATOMICTMP:%.*]] = alloca float, align 4 +// HOST-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4 +// HOST-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// HOST-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// HOST-NEXT: store float 1.000000e+00, ptr [[DOTATOMICTMP]], align 4 +// HOST-NEXT: [[TMP1:%.*]] = load float, ptr [[DOTATOMICTMP]], align 4 +// HOST-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr [[TMP0]], float [[TMP1]] monotonic, align 4 +// HOST-NEXT: store float [[TMP2]], ptr [[ATOMIC_TEMP]], align 4 +// HOST-NEXT: [[TMP3:%.*]] = load float, ptr [[ATOMIC_TEMP]], align 4 +// HOST-NEXT: ret void +// +// DEV-LABEL: define dso_local void @_Z8test_twoPf( +// DEV-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] { +// DEV-NEXT: [[ENTRY:.*:]] +// DEV-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// DEV-NEXT: [[DOTATOMICTMP:%.*]] = alloca float, align 4, addrspace(5) +// DEV-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4, addrspace(5) +// DEV-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// DEV-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr +// DEV-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr +// DEV-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8 +// DEV-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 +// DEV-NEXT: store float 1.000000e+00, ptr [[DOTATOMICTMP_ASCAST]], align 4 +// DEV-NEXT: [[TMP1:%.*]] = load float, ptr [[DOTATOMICTMP_ASCAST]], align 4 +// DEV-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr [[TMP0]], float [[TMP1]] syncscope("one-as") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META4]], !amdgpu.ignore.denormal.mode [[META4]] +// DEV-NEXT: store float [[TMP2]], ptr [[ATOMIC_TEMP_ASCAST]], align 4 +// DEV-NEXT: [[TMP3:%.*]] = load float, ptr [[ATOMIC_TEMP_ASCAST]], align 4 +// DEV-NEXT: ret void +// +// OPT-LABEL: define dso_local void @_Z8test_twoPf( +// OPT-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] { +// OPT-NEXT: [[ENTRY:.*:]] +// OPT-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// OPT-NEXT: [[DOTATOMICTMP:%.*]] = alloca float, align 4, addrspace(5) +// OPT-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4, addrspace(5) +// OPT-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// OPT-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr +// OPT-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr +// OPT-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8 +// OPT-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 +// OPT-NEXT: store float 1.000000e+00, ptr [[DOTATOMICTMP_ASCAST]], align 4 +// OPT-NEXT: [[TMP1:%.*]] = load float, ptr [[DOTATOMICTMP_ASCAST]], align 4 +// OPT-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr [[TMP0]], float [[TMP1]] syncscope("one-as") monotonic, align 4, !amdgpu.ignore.denormal.mode [[META4]] +// OPT-NEXT: store float [[TMP2]], ptr [[ATOMIC_TEMP_ASCAST]], align 4 +// OPT-NEXT: [[TMP3:%.*]] = load float, ptr [[ATOMIC_TEMP_ASCAST]], align 4 +// OPT-NEXT: ret void +// +__device__ __host__ void test_two(float *a) { + #pragma clang atomic no_remote_memory(off) ignore_denormal_mode(on) + __scoped_atomic_fetch_add(a, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM); +} + +// HOST-LABEL: define dso_local void @_Z10test_threePf( +// HOST-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] { +// HOST-NEXT: [[ENTRY:.*:]] +// HOST-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// HOST-NEXT: [[DOTATOMICTMP:%.*]] = alloca float, align 4 +// HOST-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4 +// HOST-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// HOST-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// HOST-NEXT: store float 1.000000e+00, ptr [[DOTATOMICTMP]], align 4 +// HOST-NEXT: [[TMP1:%.*]] = load float, ptr [[DOTATOMICTMP]], align 4 +// HOST-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr [[TMP0]], float [[TMP1]] monotonic, align 4 +// HOST-NEXT: store float [[TMP2]], ptr [[ATOMIC_TEMP]], align 4 +// HOST-NEXT: [[TMP3:%.*]] = load float, ptr [[ATOMIC_TEMP]], align 4 +// HOST-NEXT: ret void +// +// DEV-LABEL: define dso_local void @_Z10test_threePf( +// DEV-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] { +// DEV-NEXT: [[ENTRY:.*:]] +// DEV-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// DEV-NEXT: [[DOTATOMICTMP:%.*]] = alloca float, align 4, addrspace(5) +// DEV-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4, addrspace(5) +// DEV-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// DEV-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr +// DEV-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr +// DEV-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8 +// DEV-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 +// DEV-NEXT: store float 1.000000e+00, ptr [[DOTATOMICTMP_ASCAST]], align 4 +// DEV-NEXT: [[TMP1:%.*]] = load float, ptr [[DOTATOMICTMP_ASCAST]], align 4 +// DEV-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr [[TMP0]], float [[TMP1]] syncscope("one-as") monotonic, align 4, !amdgpu.no.remote.memory [[META4]] +// DEV-NEXT: store float [[TMP2]], ptr [[ATOMIC_TEMP_ASCAST]], align 4 +// DEV-NEXT: [[TMP3:%.*]] = load float, ptr [[ATOMIC_TEMP_ASCAST]], align 4 +// DEV-NEXT: ret void +// +// OPT-LABEL: define dso_local void @_Z10test_threePf( +// OPT-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] { +// OPT-NEXT: [[ENTRY:.*:]] +// OPT-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// OPT-NEXT: [[DOTATOMICTMP:%.*]] = alloca float, align 4, addrspace(5) +// OPT-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4, addrspace(5) +// OPT-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// OPT-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr +// OPT-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr +// OPT-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8 +// OPT-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 +// OPT-NEXT: store float 1.000000e+00, ptr [[DOTATOMICTMP_ASCAST]], align 4 +// OPT-NEXT: [[TMP1:%.*]] = load float, ptr [[DOTATOMICTMP_ASCAST]], align 4 +// OPT-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr [[TMP0]], float [[TMP1]] syncscope("one-as") monotonic, align 4, !amdgpu.no.remote.memory [[META4]] +// OPT-NEXT: store float [[TMP2]], ptr [[ATOMIC_TEMP_ASCAST]], align 4 +// OPT-NEXT: [[TMP3:%.*]] = load float, ptr [[ATOMIC_TEMP_ASCAST]], align 4 +// OPT-NEXT: ret void +// +__device__ __host__ void test_three(float *a) { + #pragma clang atomic no_remote_memory(on) no_fine_grained_memory(off) ignore_denormal_mode(off) + __scoped_atomic_fetch_add(a, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM); +} + +// HOST-LABEL: define dso_local void @_Z11test_nestedPf( +// HOST-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] { +// HOST-NEXT: [[ENTRY:.*:]] +// HOST-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// HOST-NEXT: [[DOTATOMICTMP:%.*]] = alloca float, align 4 +// HOST-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4 +// HOST-NEXT: [[DOTATOMICTMP1:%.*]] = alloca float, align 4 +// HOST-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca float, align 4 +// HOST-NEXT: [[DOTATOMICTMP3:%.*]] = alloca float, align 4 +// HOST-NEXT: [[ATOMIC_TEMP4:%.*]] = alloca float, align 4 +// HOST-NEXT: [[DOTATOMICTMP5:%.*]] = alloca float, align 4 +// HOST-NEXT: [[ATOMIC_TEMP6:%.*]] = alloca float, align 4 +// HOST-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// HOST-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// HOST-NEXT: store float 1.000000e+00, ptr [[DOTATOMICTMP]], align 4 +// HOST-NEXT: [[TMP1:%.*]] = load float, ptr [[DOTATOMICTMP]], align 4 +// HOST-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr [[TMP0]], float [[TMP1]] monotonic, align 4 +// HOST-NEXT: store float [[TMP2]], ptr [[ATOMIC_TEMP]], align 4 +// HOST-NEXT: [[TMP3:%.*]] = load float, ptr [[ATOMIC_TEMP]], align 4 +// HOST-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// HOST-NEXT: store float 2.000000e+00, ptr [[DOTATOMICTMP1]], align 4 +// HOST-NEXT: [[TMP5:%.*]] = load float, ptr [[DOTATOMICTMP1]], align 4 +// HOST-NEXT: [[TMP6:%.*]] = atomicrmw fmax ptr [[TMP4]], float [[TMP5]] seq_cst, align 4 +// HOST-NEXT: store float [[TMP6]], ptr [[ATOMIC_TEMP2]], align 4 +// HOST-NEXT: [[TMP7:%.*]] = load float, ptr [[ATOMIC_TEMP2]], align 4 +// HOST-NEXT: [[TMP8:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// HOST-NEXT: store float 3.000000e+00, ptr [[DOTATOMICTMP3]], align 4 +// HOST-NEXT: [[TMP9:%.*]] = load float, ptr [[DOTATOMICTMP3]], align 4 +// HOST-NEXT: [[TMP10:%.*]] = atomicrmw fmin ptr [[TMP8]], float [[TMP9]] acquire, align 4 +// HOST-NEXT: store float [[TMP10]], ptr [[ATOMIC_TEMP4]], align 4 +// HOST-NEXT: [[TMP11:%.*]] = load float, ptr [[ATOMIC_TEMP4]], align 4 +// HOST-NEXT: [[TMP12:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// HOST-NEXT: store float 4.000000e+00, ptr [[DOTATOMICTMP5]], align 4 +// HOST-NEXT: [[TMP13:%.*]] = load float, ptr [[DOTATOMICTMP5]], align 4 +// HOST-NEXT: [[TMP14:%.*]] = atomicrmw fsub ptr [[TMP12]], float [[TMP13]] release, align 4 +// HOST-NEXT: store float [[TMP14]], ptr [[ATOMIC_TEMP6]], align 4 +// HOST-NEXT: [[TMP15:%.*]] = load float, ptr [[ATOMIC_TEMP6]], align 4 +// HOST-NEXT: ret void +// +// DEV-LABEL: define dso_local void @_Z11test_nestedPf( +// DEV-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] { +// DEV-NEXT: [[ENTRY:.*:]] +// DEV-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// DEV-NEXT: [[DOTATOMICTMP:%.*]] = alloca float, align 4, addrspace(5) +// DEV-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4, addrspace(5) +// DEV-NEXT: [[DOTATOMICTMP1:%.*]] = alloca float, align 4, addrspace(5) +// DEV-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca float, align 4, addrspace(5) +// DEV-NEXT: [[DOTATOMICTMP3:%.*]] = alloca float, align 4, addrspace(5) +// DEV-NEXT: [[ATOMIC_TEMP4:%.*]] = alloca float, align 4, addrspace(5) +// DEV-NEXT: [[DOTATOMICTMP5:%.*]] = alloca float, align 4, addrspace(5) +// DEV-NEXT: [[ATOMIC_TEMP6:%.*]] = alloca float, align 4, addrspace(5) +// DEV-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// DEV-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr +// DEV-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr +// DEV-NEXT: [[DOTATOMICTMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP1]] to ptr +// DEV-NEXT: [[ATOMIC_TEMP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP2]] to ptr +// DEV-NEXT: [[DOTATOMICTMP3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP3]] to ptr +// DEV-NEXT: [[ATOMIC_TEMP4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP4]] to ptr +// DEV-NEXT: [[DOTATOMICTMP5_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP5]] to ptr +// DEV-NEXT: [[ATOMIC_TEMP6_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP6]] to ptr +// DEV-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8 +// DEV-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 +// DEV-NEXT: store float 1.000000e+00, ptr [[DOTATOMICTMP_ASCAST]], align 4 +// DEV-NEXT: [[TMP1:%.*]] = load float, ptr [[DOTATOMICTMP_ASCAST]], align 4 +// DEV-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr [[TMP0]], float [[TMP1]] syncscope("one-as") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META4]], !amdgpu.no.remote.memory [[META4]] +// DEV-NEXT: store float [[TMP2]], ptr [[ATOMIC_TEMP_ASCAST]], align 4 +// DEV-NEXT: [[TMP3:%.*]] = load float, ptr [[ATOMIC_TEMP_ASCAST]], align 4 +// DEV-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 +// DEV-NEXT: store float 2.000000e+00, ptr [[DOTATOMICTMP1_ASCAST]], align 4 +// DEV-NEXT: [[TMP5:%.*]] = load float, ptr [[DOTATOMICTMP1_ASCAST]], align 4 +// DEV-NEXT: [[TMP6:%.*]] = atomicrmw fmax ptr [[TMP4]], float [[TMP5]] syncscope("agent") seq_cst, align 4 +// DEV-NEXT: store float [[TMP6]], ptr [[ATOMIC_TEMP2_ASCAST]], align 4 +// DEV-NEXT: [[TMP7:%.*]] = load float, ptr [[ATOMIC_TEMP2_ASCAST]], align 4 +// DEV-NEXT: [[TMP8:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 +// DEV-NEXT: store float 3.000000e+00, ptr [[DOTATOMICTMP3_ASCAST]], align 4 +// DEV-NEXT: [[TMP9:%.*]] = load float, ptr [[DOTATOMICTMP3_ASCAST]], align 4 +// DEV-NEXT: [[TMP10:%.*]] = atomicrmw fmin ptr [[TMP8]], float [[TMP9]] syncscope("workgroup-one-as") acquire, align 4, !amdgpu.no.remote.memory [[META4]] +// DEV-NEXT: store float [[TMP10]], ptr [[ATOMIC_TEMP4_ASCAST]], align 4 +// DEV-NEXT: [[TMP11:%.*]] = load float, ptr [[ATOMIC_TEMP4_ASCAST]], align 4 +// DEV-NEXT: [[TMP12:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 +// DEV-NEXT: store float 4.000000e+00, ptr [[DOTATOMICTMP5_ASCAST]], align 4 +// DEV-NEXT: [[TMP13:%.*]] = load float, ptr [[DOTATOMICTMP5_ASCAST]], align 4 +// DEV-NEXT: [[TMP14:%.*]] = atomicrmw fsub ptr [[TMP12]], float [[TMP13]] syncscope("wavefront-one-as") release, align 4, !amdgpu.no.fine.grained.memory [[META4]] +// DEV-NEXT: store float [[TMP14]], ptr [[ATOMIC_TEMP6_ASCAST]], align 4 +// DEV-NEXT: [[TMP15:%.*]] = load float, ptr [[ATOMIC_TEMP6_ASCAST]], align 4 +// DEV-NEXT: ret void +// +// OPT-LABEL: define dso_local void @_Z11test_nestedPf( +// OPT-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] { +// OPT-NEXT: [[ENTRY:.*:]] +// OPT-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// OPT-NEXT: [[DOTATOMICTMP:%.*]] = alloca float, align 4, addrspace(5) +// OPT-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4, addrspace(5) +// OPT-NEXT: [[DOTATOMICTMP1:%.*]] = alloca float, align 4, addrspace(5) +// OPT-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca float, align 4, addrspace(5) +// OPT-NEXT: [[DOTATOMICTMP3:%.*]] = alloca float, align 4, addrspace(5) +// OPT-NEXT: [[ATOMIC_TEMP4:%.*]] = alloca float, align 4, addrspace(5) +// OPT-NEXT: [[DOTATOMICTMP5:%.*]] = alloca float, align 4, addrspace(5) +// OPT-NEXT: [[ATOMIC_TEMP6:%.*]] = alloca float, align 4, addrspace(5) +// OPT-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// OPT-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr +// OPT-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr +// OPT-NEXT: [[DOTATOMICTMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP1]] to ptr +// OPT-NEXT: [[ATOMIC_TEMP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP2]] to ptr +// OPT-NEXT: [[DOTATOMICTMP3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP3]] to ptr +// OPT-NEXT: [[ATOMIC_TEMP4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP4]] to ptr +// OPT-NEXT: [[DOTATOMICTMP5_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP5]] to ptr +// OPT-NEXT: [[ATOMIC_TEMP6_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP6]] to ptr +// OPT-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8 +// OPT-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 +// OPT-NEXT: store float 1.000000e+00, ptr [[DOTATOMICTMP_ASCAST]], align 4 +// OPT-NEXT: [[TMP1:%.*]] = load float, ptr [[DOTATOMICTMP_ASCAST]], align 4 +// OPT-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr [[TMP0]], float [[TMP1]] syncscope("one-as") monotonic, align 4, !amdgpu.no.remote.memory [[META4]], !amdgpu.ignore.denormal.mode [[META4]] +// OPT-NEXT: store float [[TMP2]], ptr [[ATOMIC_TEMP_ASCAST]], align 4 +// OPT-NEXT: [[TMP3:%.*]] = load float, ptr [[ATOMIC_TEMP_ASCAST]], align 4 +// OPT-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 +// OPT-NEXT: store float 2.000000e+00, ptr [[DOTATOMICTMP1_ASCAST]], align 4 +// OPT-NEXT: [[TMP5:%.*]] = load float, ptr [[DOTATOMICTMP1_ASCAST]], align 4 +// OPT-NEXT: [[TMP6:%.*]] = atomicrmw fmax ptr [[TMP4]], float [[TMP5]] syncscope("agent") seq_cst, align 4 +// OPT-NEXT: store float [[TMP6]], ptr [[ATOMIC_TEMP2_ASCAST]], align 4 +// OPT-NEXT: [[TMP7:%.*]] = load float, ptr [[ATOMIC_TEMP2_ASCAST]], align 4 +// OPT-NEXT: [[TMP8:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 +// OPT-NEXT: store float 3.000000e+00, ptr [[DOTATOMICTMP3_ASCAST]], align 4 +// OPT-NEXT: [[TMP9:%.*]] = load float, ptr [[DOTATOMICTMP3_ASCAST]], align 4 +// OPT-NEXT: [[TMP10:%.*]] = atomicrmw fmin ptr [[TMP8]], float [[TMP9]] syncscope("workgroup-one-as") acquire, align 4, !amdgpu.no.remote.memory [[META4]] +// OPT-NEXT: store float [[TMP10]], ptr [[ATOMIC_TEMP4_ASCAST]], align 4 +// OPT-NEXT: [[TMP11:%.*]] = load float, ptr [[ATOMIC_TEMP4_ASCAST]], align 4 +// OPT-NEXT: [[TMP12:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 +// OPT-NEXT: store float 4.000000e+00, ptr [[DOTATOMICTMP5_ASCAST]], align 4 +// OPT-NEXT: [[TMP13:%.*]] = load float, ptr [[DOTATOMICTMP5_ASCAST]], align 4 +// OPT-NEXT: [[TMP14:%.*]] = atomicrmw fsub ptr [[TMP12]], float [[TMP13]] syncscope("wavefront-one-as") release, align 4, !amdgpu.no.fine.grained.memory [[META4]] +// OPT-NEXT: store float [[TMP14]], ptr [[ATOMIC_TEMP6_ASCAST]], align 4 +// OPT-NEXT: [[TMP15:%.*]] = load float, ptr [[ATOMIC_TEMP6_ASCAST]], align 4 +// OPT-NEXT: ret void +// +__device__ __host__ void test_nested(float *a) { + __scoped_atomic_fetch_add(a, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM); + { + #pragma clang atomic no_remote_memory(off) no_fine_grained_memory(off) ignore_denormal_mode(off) + __scoped_atomic_fetch_max(a, 2, __ATOMIC_SEQ_CST, __MEMORY_SCOPE_DEVICE); + { + #pragma clang atomic no_remote_memory(on) + __scoped_atomic_fetch_min(a, 3, __ATOMIC_ACQUIRE, __MEMORY_SCOPE_WRKGRP); + } + { + #pragma clang atomic no_fine_grained_memory(on) + __scoped_atomic_fetch_sub(a, 4, __ATOMIC_RELEASE, __MEMORY_SCOPE_WVFRNT); + } + } +} + +// +// HOST-LABEL: define weak_odr void @_Z13test_templateIfEvPT_( +// HOST-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] comdat { +// HOST-NEXT: [[ENTRY:.*:]] +// HOST-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// HOST-NEXT: [[DOTATOMICTMP:%.*]] = alloca float, align 4 +// HOST-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4 +// HOST-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// HOST-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// HOST-NEXT: store float 1.000000e+00, ptr [[DOTATOMICTMP]], align 4 +// HOST-NEXT: [[TMP1:%.*]] = load float, ptr [[DOTATOMICTMP]], align 4 +// HOST-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr [[TMP0]], float [[TMP1]] monotonic, align 4 +// HOST-NEXT: store float [[TMP2]], ptr [[ATOMIC_TEMP]], align 4 +// HOST-NEXT: [[TMP3:%.*]] = load float, ptr [[ATOMIC_TEMP]], align 4 +// HOST-NEXT: ret void +// +// DEV-LABEL: define internal void @_Z13test_templateIfEvPT_( +// DEV-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] comdat { +// DEV-NEXT: [[ENTRY:.*:]] +// DEV-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// DEV-NEXT: [[DOTATOMICTMP:%.*]] = alloca float, align 4, addrspace(5) +// DEV-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4, addrspace(5) +// DEV-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// DEV-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr +// DEV-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr +// DEV-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8 +// DEV-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 +// DEV-NEXT: store float 1.000000e+00, ptr [[DOTATOMICTMP_ASCAST]], align 4 +// DEV-NEXT: [[TMP1:%.*]] = load float, ptr [[DOTATOMICTMP_ASCAST]], align 4 +// DEV-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr [[TMP0]], float [[TMP1]] syncscope("one-as") monotonic, align 4, !amdgpu.no.remote.memory [[META4]] +// DEV-NEXT: store float [[TMP2]], ptr [[ATOMIC_TEMP_ASCAST]], align 4 +// DEV-NEXT: [[TMP3:%.*]] = load float, ptr [[ATOMIC_TEMP_ASCAST]], align 4 +// DEV-NEXT: ret void +// +// OPT-LABEL: define internal void @_Z13test_templateIfEvPT_( +// OPT-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] comdat { +// OPT-NEXT: [[ENTRY:.*:]] +// OPT-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// OPT-NEXT: [[DOTATOMICTMP:%.*]] = alloca float, align 4, addrspace(5) +// OPT-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4, addrspace(5) +// OPT-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// OPT-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr +// OPT-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr +// OPT-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8 +// OPT-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 +// OPT-NEXT: store float 1.000000e+00, ptr [[DOTATOMICTMP_ASCAST]], align 4 +// OPT-NEXT: [[TMP1:%.*]] = load float, ptr [[DOTATOMICTMP_ASCAST]], align 4 +// OPT-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr [[TMP0]], float [[TMP1]] syncscope("one-as") monotonic, align 4, !amdgpu.no.remote.memory [[META4]], !amdgpu.ignore.denormal.mode [[META4]] +// OPT-NEXT: store float [[TMP2]], ptr [[ATOMIC_TEMP_ASCAST]], align 4 +// OPT-NEXT: [[TMP3:%.*]] = load float, ptr [[ATOMIC_TEMP_ASCAST]], align 4 +// OPT-NEXT: ret void +// +template __device__ __host__ void test_template(T *a) { + #pragma clang atomic no_remote_memory(on) no_fine_grained_memory(off) + __scoped_atomic_fetch_add(a, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM); +} + +template __device__ __host__ void test_template(float *a); + +//. +// DEV: [[META4]] = !{} +//. +// OPT: [[META4]] = !{} +//. diff --git a/clang/test/Driver/atomic-options.hip b/clang/test/Driver/atomic-options.hip new file mode 100644 index 0000000000000..a79818f0484b7 --- /dev/null +++ b/clang/test/Driver/atomic-options.hip @@ -0,0 +1,31 @@ +// RUN: %clang -### -nogpulib -nogpuinc %s \ +// RUN: -fatomic=no_fine_grained_memory:off,no_remote_memory:on,ignore_denormal_mode:on \ +// RUN: 2>&1 | FileCheck %s --check-prefix=CHECK-VALID + +// CHECK-VALID: "-cc1" {{.*}}"-triple" "amdgcn-amd-amdhsa" {{.*}}"-fatomic=no_fine_grained_memory:off,no_remote_memory:on,ignore_denormal_mode:on" +// CHECK-VALID: "-cc1" {{.*}}"-triple" {{.*}}"-fatomic=no_fine_grained_memory:off,no_remote_memory:on,ignore_denormal_mode:on" + +// RUN: not %clang -### -nogpulib -nogpuinc %s \ +// RUN: -fatomic=invalid_key:on 2>&1 | FileCheck %s --check-prefix=CHECK-INVALID-KEY + +// CHECK-INVALID-KEY: clang: error: invalid argument 'invalid_key:on' to -fatomic=; must be a comma-separated list of key:value pairs, where allowed keys are 'no_fine_grained_memory', 'no_remote_memory', 'ignore_denormal_mode', and values are 'on' or 'off', and each key must be unique + +// RUN: not %clang -### -nogpulib -nogpuinc %s \ +// RUN: -fatomic=no_fine_grained_memory:invalid 2>&1 | FileCheck %s --check-prefix=CHECK-INVALID-VALUE + +// CHECK-INVALID-VALUE: clang: error: invalid argument 'no_fine_grained_memory:invalid' to -fatomic=; must be a comma-separated list of key:value pairs, where allowed keys are 'no_fine_grained_memory', 'no_remote_memory', 'ignore_denormal_mode', and values are 'on' or 'off', and each key must be unique + +// RUN: not %clang -### -nogpulib -nogpuinc %s \ +// RUN: -fatomic=no_fine_grained_memory 2>&1 | FileCheck %s --check-prefix=CHECK-MISSING-VALUE + +// CHECK-MISSING-VALUE: clang: error: invalid argument 'no_fine_grained_memory' to -fatomic=; must be a comma-separated list of key:value pairs, where allowed keys are 'no_fine_grained_memory', 'no_remote_memory', 'ignore_denormal_mode', and values are 'on' or 'off', and each key must be unique + +// RUN: not %clang -### -nogpulib -nogpuinc %s \ +// RUN: -fatomic=no_fine_grained_memory:on,no_fine_grained_memory:off 2>&1 | FileCheck %s --check-prefix=CHECK-DUPLICATE-KEY + +// CHECK-DUPLICATE-KEY: clang: error: invalid argument 'no_fine_grained_memory:off' to -fatomic=; must be a comma-separated list of key:value pairs, where allowed keys are 'no_fine_grained_memory', 'no_remote_memory', 'ignore_denormal_mode', and values are 'on' or 'off', and each key must be unique + +// RUN: %clang -### -nogpulib -nogpuinc %s \ +// RUN: -fatomic= 2>&1 | FileCheck %s --check-prefix=CHECK-EMPTY + +// CHECK-EMPTY: clang: warning: joined argument expects additional value: '-fatomic=' [-Wunused-command-line-argument] diff --git a/clang/test/OpenMP/amdgpu-unsafe-fp-atomics.cpp b/clang/test/OpenMP/amdgpu-unsafe-fp-atomics.cpp index 7a34113cec8fa..60d7cb008a368 100644 --- a/clang/test/OpenMP/amdgpu-unsafe-fp-atomics.cpp +++ b/clang/test/OpenMP/amdgpu-unsafe-fp-atomics.cpp @@ -11,7 +11,7 @@ double dv, dx; // DEFAULT-SAME: ) #[[ATTR0:[0-9]+]] { // DEFAULT-NEXT: [[ENTRY:.*:]] // DEFAULT-NEXT: [[TMP0:%.*]] = load float, ptr addrspacecast (ptr addrspace(1) @fv to ptr), align 4 -// DEFAULT-NEXT: [[TMP1:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @fx to ptr), float [[TMP0]] monotonic, align 4 +// DEFAULT-NEXT: [[TMP1:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @fx to ptr), float [[TMP0]] monotonic, align 4, !amdgpu.no.fine.grained.memory [[META5:![0-9]+]], !amdgpu.no.remote.memory [[META5]] // DEFAULT-NEXT: [[ADD:%.*]] = fadd float [[TMP1]], [[TMP0]] // DEFAULT-NEXT: store float [[ADD]], ptr addrspacecast (ptr addrspace(1) @fv to ptr), align 4 // DEFAULT-NEXT: ret void @@ -20,7 +20,7 @@ double dv, dx; // UNSAFE-FP-ATOMICS-SAME: ) #[[ATTR0:[0-9]+]] { // UNSAFE-FP-ATOMICS-NEXT: [[ENTRY:.*:]] // UNSAFE-FP-ATOMICS-NEXT: [[TMP0:%.*]] = load float, ptr addrspacecast (ptr addrspace(1) @fv to ptr), align 4 -// UNSAFE-FP-ATOMICS-NEXT: [[TMP1:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @fx to ptr), float [[TMP0]] monotonic, align 4, !amdgpu.no.fine.grained.memory [[META5:![0-9]+]], !amdgpu.ignore.denormal.mode [[META5]] +// UNSAFE-FP-ATOMICS-NEXT: [[TMP1:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @fx to ptr), float [[TMP0]] monotonic, align 4, !amdgpu.no.fine.grained.memory [[META5:![0-9]+]], !amdgpu.no.remote.memory [[META5]], !amdgpu.ignore.denormal.mode [[META5]] // UNSAFE-FP-ATOMICS-NEXT: [[ADD:%.*]] = fadd float [[TMP1]], [[TMP0]] // UNSAFE-FP-ATOMICS-NEXT: store float [[ADD]], ptr addrspacecast (ptr addrspace(1) @fv to ptr), align 4 // UNSAFE-FP-ATOMICS-NEXT: ret void @@ -34,7 +34,7 @@ void atomic_fadd_f32() { // DEFAULT-SAME: ) #[[ATTR0]] { // DEFAULT-NEXT: [[ENTRY:.*:]] // DEFAULT-NEXT: [[TMP0:%.*]] = load double, ptr addrspacecast (ptr addrspace(1) @dv to ptr), align 8 -// DEFAULT-NEXT: [[TMP1:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @dx to ptr), double [[TMP0]] monotonic, align 8 +// DEFAULT-NEXT: [[TMP1:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @dx to ptr), double [[TMP0]] monotonic, align 8, !amdgpu.no.fine.grained.memory [[META5]], !amdgpu.no.remote.memory [[META5]] // DEFAULT-NEXT: [[ADD:%.*]] = fadd double [[TMP1]], [[TMP0]] // DEFAULT-NEXT: store double [[ADD]], ptr addrspacecast (ptr addrspace(1) @dv to ptr), align 8 // DEFAULT-NEXT: ret void @@ -43,7 +43,7 @@ void atomic_fadd_f32() { // UNSAFE-FP-ATOMICS-SAME: ) #[[ATTR0]] { // UNSAFE-FP-ATOMICS-NEXT: [[ENTRY:.*:]] // UNSAFE-FP-ATOMICS-NEXT: [[TMP0:%.*]] = load double, ptr addrspacecast (ptr addrspace(1) @dv to ptr), align 8 -// UNSAFE-FP-ATOMICS-NEXT: [[TMP1:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @dx to ptr), double [[TMP0]] monotonic, align 8, !amdgpu.no.fine.grained.memory [[META5]] +// UNSAFE-FP-ATOMICS-NEXT: [[TMP1:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @dx to ptr), double [[TMP0]] monotonic, align 8, !amdgpu.no.fine.grained.memory [[META5]], !amdgpu.no.remote.memory [[META5]] // UNSAFE-FP-ATOMICS-NEXT: [[ADD:%.*]] = fadd double [[TMP1]], [[TMP0]] // UNSAFE-FP-ATOMICS-NEXT: store double [[ADD]], ptr addrspacecast (ptr addrspace(1) @dv to ptr), align 8 // UNSAFE-FP-ATOMICS-NEXT: ret void @@ -55,5 +55,7 @@ void atomic_fadd_f64() { #pragma omp end declare target //. +// DEFAULT: [[META5]] = !{} +//. // UNSAFE-FP-ATOMICS: [[META5]] = !{} //. diff --git a/clang/test/Parser/Inputs/cuda.h b/clang/test/Parser/Inputs/cuda.h new file mode 100644 index 0000000000000..405ef8bb807d9 --- /dev/null +++ b/clang/test/Parser/Inputs/cuda.h @@ -0,0 +1,54 @@ +/* Minimal declarations for CUDA support. Testing purposes only. */ + +#include + +// Make this file work with nvcc, for testing compatibility. + +#ifndef __NVCC__ +#define __constant__ __attribute__((constant)) +#define __device__ __attribute__((device)) +#define __global__ __attribute__((global)) +#define __host__ __attribute__((host)) +#define __shared__ __attribute__((shared)) +#define __managed__ __attribute__((managed)) +#define __launch_bounds__(...) __attribute__((launch_bounds(__VA_ARGS__))) + +struct dim3 { + unsigned x, y, z; + __host__ __device__ dim3(unsigned x, unsigned y = 1, unsigned z = 1) : x(x), y(y), z(z) {} +}; + +#ifdef __HIP__ +typedef struct hipStream *hipStream_t; +typedef enum hipError {} hipError_t; +int hipConfigureCall(dim3 gridSize, dim3 blockSize, size_t sharedSize = 0, + hipStream_t stream = 0); +extern "C" hipError_t __hipPushCallConfiguration(dim3 gridSize, dim3 blockSize, + size_t sharedSize = 0, + hipStream_t stream = 0); +extern "C" hipError_t hipLaunchKernel(const void *func, dim3 gridDim, + dim3 blockDim, void **args, + size_t sharedMem, + hipStream_t stream); +#else +typedef struct cudaStream *cudaStream_t; +typedef enum cudaError {} cudaError_t; + +extern "C" int cudaConfigureCall(dim3 gridSize, dim3 blockSize, + size_t sharedSize = 0, + cudaStream_t stream = 0); +extern "C" int __cudaPushCallConfiguration(dim3 gridSize, dim3 blockSize, + size_t sharedSize = 0, + cudaStream_t stream = 0); +extern "C" cudaError_t cudaLaunchKernel(const void *func, dim3 gridDim, + dim3 blockDim, void **args, + size_t sharedMem, cudaStream_t stream); +#endif + +// Host- and device-side placement new overloads. +void *operator new(__SIZE_TYPE__, void *p) { return p; } +void *operator new[](__SIZE_TYPE__, void *p) { return p; } +__device__ void *operator new(__SIZE_TYPE__, void *p) { return p; } +__device__ void *operator new[](__SIZE_TYPE__, void *p) { return p; } + +#endif // !__NVCC__ diff --git a/clang/test/Parser/atomic-options.hip b/clang/test/Parser/atomic-options.hip new file mode 100644 index 0000000000000..5e75d7e50b01a --- /dev/null +++ b/clang/test/Parser/atomic-options.hip @@ -0,0 +1,28 @@ +// RUN: %clang_cc1 -fsyntax-only -verify %s +// RUN: %clang_cc1 -fsyntax-only -verify -fcuda-is-device %s +// RUN: %clang_cc1 -fsyntax-only -verify -fcuda-is-device %s \ +// RUN: -fatomic=no_fine_grained_memory:off,no_remote_memory:on,ignore_denormal_mode:on + +#include "Inputs/cuda.h" + +#pragma clang atomic no_remote_memory(off) // expected-error {{'#pragma clang atomic' can only appear at the start of a compound statement}} + +__device__ __host__ void test_location(float *a) { + __scoped_atomic_fetch_add(a, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM); + #pragma clang atomic no_remote_memory(off) // expected-error {{'#pragma clang atomic' can only appear at the start of a compound statement}} +} + +__device__ __host__ void test_invalid_option(float *a) { + #pragma clang atomic fast(on) // expected-error {{invalid option 'fast'; expected 'no_remote_memory', 'no_fine_grained_memory', or 'ignore_denormal_mode'}} + __scoped_atomic_fetch_add(a, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM); +} + +__device__ __host__ void test_invalid_value(float *a) { + #pragma clang atomic no_remote_memory(default) // expected-error {{unexpected argument 'default' to '#pragma clang atomic no_remote_memory'; expected 'on' or 'off'}} + __scoped_atomic_fetch_add(a, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM); +} + +__device__ __host__ void test_extra_token(float *a) { + #pragma clang atomic no_remote_memory(on) * // expected-warning {{extra tokens at end of '#pragma clang atomic' - ignored}} + __scoped_atomic_fetch_add(a, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM); +}