Skip to content
This repository was archived by the owner on Sep 15, 2025. It is now read-only.

Commit aa1d706

Browse files
SC llvm teamSC llvm team
authored andcommitted
Merged main:eb02ee44d325 into amd-gfx:0cc6e26f9eb2
Local branch amd-gfx 0cc6e26 Merged main:b3cb4f069c2c into amd-gfx:60ce4094065a Remote branch main eb02ee4 [AArch64] Move PAuth codegen down the machine pipeline
2 parents 0cc6e26 + eb02ee4 commit aa1d706

File tree

60 files changed

+4414
-801
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

60 files changed

+4414
-801
lines changed

clang/include/clang/Basic/AttrDocs.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1163,7 +1163,7 @@ caveats to this use of name mangling:
11631163
* The ``overloadable`` attribute has almost no meaning when used in C++,
11641164
because names will already be mangled and functions are already overloadable.
11651165
However, when an ``overloadable`` function occurs within an ``extern "C"``
1166-
linkage specification, it's name *will* be mangled in the same way as it
1166+
linkage specification, its name *will* be mangled in the same way as it
11671167
would in C.
11681168

11691169
For the purpose of backwards compatibility, at most one function with the same
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
! RUN: bbc -fopenmp -pft-test -o %t %s | FileCheck %s
2+
! RUN: %flang_fc1 -fopenmp -fdebug-dump-pft -o %t %s | FileCheck %s
3+
4+
! Loop constructs always have an `end do` which can be the target of
5+
! a branch. So OpenMP loop constructs do not need an artificial
6+
! continue inserted for a target.
7+
8+
!CHECK-LABEL: sb0
9+
!CHECK-NOT: continue
10+
subroutine sb0(cond)
11+
implicit none
12+
logical :: cond
13+
integer :: i
14+
!$omp parallel do
15+
do i = 1, 20
16+
if( cond) then
17+
cycle
18+
end if
19+
end do
20+
return
21+
end subroutine
22+
23+
!CHECK-LABEL: sb1
24+
!CHECK-NOT: continue
25+
subroutine sb1(cond)
26+
implicit none
27+
logical :: cond
28+
integer :: i
29+
!$omp parallel do
30+
do i = 1, 20
31+
if( cond) then
32+
cycle
33+
end if
34+
end do
35+
!$omp end parallel do
36+
return
37+
end subroutine
38+
39+
!CHECK-LABEL: sb2
40+
!CHECK-NOT: continue
41+
subroutine sb2
42+
integer :: i, n
43+
integer :: tmp
44+
45+
!$omp parallel do
46+
do ifld=1,n
47+
do isum=1,n
48+
if (tmp > n) then
49+
exit
50+
endif
51+
enddo
52+
tmp = n
53+
enddo
54+
end subroutine
55+
56+
!CHECK-LABEL: sb3
57+
!CHECK-NOT: continue
58+
subroutine sb3
59+
integer :: i, n
60+
integer :: tmp
61+
62+
!$omp parallel do
63+
do ifld=1,n
64+
do isum=1,n
65+
if (tmp > n) then
66+
exit
67+
endif
68+
enddo
69+
enddo
70+
end subroutine
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
! RUN: bbc -fopenmp -pft-test -o %t %s | FileCheck %s
2+
! RUN: %flang_fc1 -fopenmp -fdebug-dump-pft -o %t %s | FileCheck %s
3+
4+
! Test structure of the Pre-FIR tree with OpenMP
5+
6+
subroutine sub1(a, b, n)
7+
real :: a(:), b(:)
8+
integer :: n, i
9+
!$omp parallel do
10+
do i = 1, n
11+
b(i) = exp(a(i))
12+
end do
13+
!$omp end parallel do
14+
end subroutine
15+
16+
! CHECK-LABEL: Subroutine sub1
17+
! CHECK: <<OpenMPConstruct>>
18+
! CHECK: <<DoConstruct>>
19+
! CHECK: <<End OpenMPConstruct>>
Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
! RUN: %flang_fc1 -fdebug-pre-fir-tree -fopenmp %s | FileCheck %s
2+
3+
subroutine openmp_sections(x, y)
4+
5+
integer, intent(inout)::x, y
6+
7+
!==============================================================================
8+
! empty construct
9+
!==============================================================================
10+
!$omp sections
11+
!$omp end sections
12+
13+
!CHECK: OpenMPConstruct
14+
!CHECK: End OpenMPConstruct
15+
16+
!==============================================================================
17+
! single section, without `!$omp section`
18+
!==============================================================================
19+
!$omp sections
20+
call F1()
21+
!$omp end sections
22+
23+
!CHECK: OpenMPConstruct
24+
!CHECK: OpenMPConstruct
25+
!CHECK: CallStmt
26+
!CHECK: End OpenMPConstruct
27+
!CHECK: End OpenMPConstruct
28+
29+
!==============================================================================
30+
! single section with `!$omp section`
31+
!==============================================================================
32+
!$omp sections
33+
!$omp section
34+
call F1
35+
!$omp end sections
36+
37+
!CHECK: OpenMPConstruct
38+
!CHECK: OpenMPConstruct
39+
!CHECK: CallStmt
40+
!CHECK: End OpenMPConstruct
41+
!CHECK: End OpenMPConstruct
42+
43+
!==============================================================================
44+
! multiple sections
45+
!==============================================================================
46+
!$omp sections
47+
!$omp section
48+
call F1
49+
!$omp section
50+
call F2
51+
!$omp section
52+
call F3
53+
!$omp end sections
54+
55+
!CHECK: OpenMPConstruct
56+
!CHECK: OpenMPConstruct
57+
!CHECK: CallStmt
58+
!CHECK: End OpenMPConstruct
59+
!CHECK: OpenMPConstruct
60+
!CHECK: CallStmt
61+
!CHECK: End OpenMPConstruct
62+
!CHECK: OpenMPConstruct
63+
!CHECK: CallStmt
64+
!CHECK: End OpenMPConstruct
65+
!CHECK: End OpenMPConstruct
66+
67+
!==============================================================================
68+
! multiple sections with clauses
69+
!==============================================================================
70+
!$omp sections PRIVATE(x) FIRSTPRIVATE(y)
71+
!$omp section
72+
call F1
73+
!$omp section
74+
call F2
75+
!$omp section
76+
call F3
77+
!$omp end sections NOWAIT
78+
79+
!CHECK: OpenMPConstruct
80+
!CHECK: OpenMPConstruct
81+
!CHECK: CallStmt
82+
!CHECK: End OpenMPConstruct
83+
!CHECK: OpenMPConstruct
84+
!CHECK: CallStmt
85+
!CHECK: End OpenMPConstruct
86+
!CHECK: OpenMPConstruct
87+
!CHECK: CallStmt
88+
!CHECK: End OpenMPConstruct
89+
!CHECK: End OpenMPConstruct
90+
91+
end subroutine openmp_sections

llvm/include/llvm/Config/llvm-config.h.cmake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
/* Indicate that this is LLVM compiled from the amd-gfx branch. */
1818
#define LLVM_HAVE_BRANCH_AMD_GFX
19-
#define LLVM_MAIN_REVISION 475603
19+
#define LLVM_MAIN_REVISION 475620
2020

2121
/* Define if LLVM_ENABLE_DUMP is enabled */
2222
#cmakedefine LLVM_ENABLE_DUMP

llvm/include/llvm/Support/TargetOpcodes.def

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ HANDLE_TARGET_OPCODE(SUBREG_TO_REG)
7272
/// virtual registers have been created for all the instructions, and it's
7373
/// only needed in cases where the register classes implied by the
7474
/// instructions are insufficient. It is emitted as a COPY MachineInstr.
75-
HANDLE_TARGET_OPCODE(COPY_TO_REGCLASS)
75+
HANDLE_TARGET_OPCODE(COPY_TO_REGCLASS)
7676

7777
/// DBG_VALUE - a mapping of the llvm.dbg.value intrinsic
7878
HANDLE_TARGET_OPCODE(DBG_VALUE)
@@ -103,11 +103,11 @@ HANDLE_TARGET_OPCODE(DBG_LABEL)
103103
/// e.g. v1027 = REG_SEQUENCE v1024, 3, v1025, 4, v1026, 5
104104
/// After register coalescing references of v1024 should be replace with
105105
/// v1027:3, v1025 with v1027:4, etc.
106-
HANDLE_TARGET_OPCODE(REG_SEQUENCE)
106+
HANDLE_TARGET_OPCODE(REG_SEQUENCE)
107107

108108
/// COPY - Target-independent register copy. This instruction can also be
109109
/// used to copy between subregisters of virtual registers.
110-
HANDLE_TARGET_OPCODE(COPY)
110+
HANDLE_TARGET_OPCODE(COPY)
111111

112112
/// BUNDLE - This instruction represents an instruction bundle. Instructions
113113
/// which immediately follow a BUNDLE instruction which are marked with

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5700,7 +5700,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
57005700
if (OpOpcode == ISD::TRUNCATE) {
57015701
SDValue OpOp = N1.getOperand(0);
57025702
if (OpOp.getValueType() == VT) {
5703-
if (OpOp.getOpcode() == ISD::AssertZext && N1->hasOneUse()) {
5703+
if (OpOp.getOpcode() == ISD::AssertZext) {
57045704
APInt HiBits = APInt::getBitsSetFrom(VT.getScalarSizeInBits(),
57055705
N1.getScalarValueSizeInBits());
57065706
if (MaskedValueIsZero(OpOp, HiBits)) {

llvm/lib/CodeGen/StackColoring.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -709,7 +709,8 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot) {
709709
if (WinEHFuncInfo *EHInfo = MF->getWinEHFuncInfo())
710710
for (WinEHTryBlockMapEntry &TBME : EHInfo->TryBlockMap)
711711
for (WinEHHandlerType &H : TBME.HandlerArray)
712-
if (H.CatchObj.FrameIndex != std::numeric_limits<int>::max())
712+
if (H.CatchObj.FrameIndex != std::numeric_limits<int>::max() &&
713+
H.CatchObj.FrameIndex >= 0)
713714
ConservativeSlots.set(H.CatchObj.FrameIndex);
714715

715716
LLVM_DEBUG(dumpBV("Conservative slots", ConservativeSlots));

llvm/lib/Target/AArch64/AArch64.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ FunctionPass *createAArch64A57FPLoadBalancing();
5151
FunctionPass *createAArch64A53Fix835769();
5252
FunctionPass *createFalkorHWPFFixPass();
5353
FunctionPass *createFalkorMarkStridedAccessesPass();
54+
FunctionPass *createAArch64PointerAuthPass();
5455
FunctionPass *createAArch64BranchTargetsPass();
5556
FunctionPass *createAArch64MIPeepholeOptPass();
5657

@@ -74,6 +75,7 @@ ModulePass *createAArch64GlobalsTaggingPass();
7475
void initializeAArch64A53Fix835769Pass(PassRegistry&);
7576
void initializeAArch64A57FPLoadBalancingPass(PassRegistry&);
7677
void initializeAArch64AdvSIMDScalarPass(PassRegistry&);
78+
void initializeAArch64PointerAuthPass(PassRegistry&);
7779
void initializeAArch64BranchTargetsPass(PassRegistry&);
7880
void initializeAArch64CFIFixupPass(PassRegistry&);
7981
void initializeAArch64CollectLOHPass(PassRegistry &);

llvm/lib/Target/AArch64/AArch64FrameLowering.cpp

Lines changed: 13 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -1381,42 +1381,6 @@ static void emitDefineCFAWithFP(MachineFunction &MF, MachineBasicBlock &MBB,
13811381
.setMIFlags(MachineInstr::FrameSetup);
13821382
}
13831383

1384-
void AArch64FrameLowering::signLR(MachineFunction &MF, MachineBasicBlock &MBB,
1385-
MachineBasicBlock::iterator MBBI,
1386-
bool NeedsWinCFI, bool *HasWinCFI) {
1387-
const auto &MFnI = *MF.getInfo<AArch64FunctionInfo>();
1388-
const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
1389-
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
1390-
bool EmitCFI = MFnI.needsDwarfUnwindInfo(MF);
1391-
1392-
// Debug location must be unknown, see emitPrologue().
1393-
DebugLoc DL;
1394-
1395-
if (MFnI.shouldSignWithBKey()) {
1396-
BuildMI(MBB, MBBI, DL, TII->get(AArch64::EMITBKEY))
1397-
.setMIFlag(MachineInstr::FrameSetup);
1398-
}
1399-
1400-
// No SEH opcode for this one; it doesn't materialize into an
1401-
// instruction on Windows.
1402-
BuildMI(
1403-
MBB, MBBI, DL,
1404-
TII->get(MFnI.shouldSignWithBKey() ? AArch64::PACIBSP : AArch64::PACIASP))
1405-
.setMIFlag(MachineInstr::FrameSetup);
1406-
1407-
if (EmitCFI) {
1408-
unsigned CFIIndex =
1409-
MF.addFrameInst(MCCFIInstruction::createNegateRAState(nullptr));
1410-
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
1411-
.addCFIIndex(CFIIndex)
1412-
.setMIFlags(MachineInstr::FrameSetup);
1413-
} else if (NeedsWinCFI) {
1414-
*HasWinCFI = true;
1415-
BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PACSignLR))
1416-
.setMIFlag(MachineInstr::FrameSetup);
1417-
}
1418-
}
1419-
14201384
void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
14211385
MachineBasicBlock &MBB) const {
14221386
MachineBasicBlock::iterator MBBI = MBB.begin();
@@ -1450,8 +1414,12 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
14501414
emitShadowCallStackPrologue(*TII, MF, MBB, MBBI, DL, NeedsWinCFI,
14511415
MFnI.needsDwarfUnwindInfo(MF));
14521416

1453-
if (MFnI.shouldSignReturnAddress(MF))
1454-
signLR(MF, MBB, MBBI, NeedsWinCFI, &HasWinCFI);
1417+
if (MFnI.shouldSignReturnAddress(MF)) {
1418+
BuildMI(MBB, MBBI, DL, TII->get(AArch64::PAUTH_PROLOGUE))
1419+
.setMIFlag(MachineInstr::FrameSetup);
1420+
if (NeedsWinCFI)
1421+
HasWinCFI = true; // AArch64PointerAuth pass will insert SEH_PACSignLR
1422+
}
14551423

14561424
if (EmitCFI && MFnI.isMTETagged()) {
14571425
BuildMI(MBB, MBBI, DL, TII->get(AArch64::EMITMTETAGGED))
@@ -1911,54 +1879,6 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
19111879
}
19121880
}
19131881

1914-
void AArch64FrameLowering::authenticateLR(MachineFunction &MF,
1915-
MachineBasicBlock &MBB,
1916-
bool NeedsWinCFI, bool *HasWinCFI) {
1917-
const auto &MFI = *MF.getInfo<AArch64FunctionInfo>();
1918-
const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
1919-
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
1920-
bool EmitAsyncCFI = MFI.needsAsyncDwarfUnwindInfo(MF);
1921-
1922-
MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
1923-
DebugLoc DL;
1924-
if (MBBI != MBB.end())
1925-
DL = MBBI->getDebugLoc();
1926-
1927-
// The AUTIASP instruction assembles to a hint instruction before v8.3a so
1928-
// this instruction can safely used for any v8a architecture.
1929-
// From v8.3a onwards there are optimised authenticate LR and return
1930-
// instructions, namely RETA{A,B}, that can be used instead. In this case the
1931-
// DW_CFA_AARCH64_negate_ra_state can't be emitted.
1932-
bool TerminatorIsCombinable =
1933-
MBBI != MBB.end() && (MBBI->getOpcode() == AArch64::RET_ReallyLR ||
1934-
MBBI->getOpcode() == AArch64::RET);
1935-
if (Subtarget.hasPAuth() && TerminatorIsCombinable && !NeedsWinCFI &&
1936-
!MF.getFunction().hasFnAttribute(Attribute::ShadowCallStack)) {
1937-
BuildMI(MBB, MBBI, DL,
1938-
TII->get(MFI.shouldSignWithBKey() ? AArch64::RETAB : AArch64::RETAA))
1939-
.copyImplicitOps(*MBBI);
1940-
MBB.erase(MBBI);
1941-
} else {
1942-
BuildMI(
1943-
MBB, MBBI, DL,
1944-
TII->get(MFI.shouldSignWithBKey() ? AArch64::AUTIBSP : AArch64::AUTIASP))
1945-
.setMIFlag(MachineInstr::FrameDestroy);
1946-
1947-
if (EmitAsyncCFI) {
1948-
unsigned CFIIndex =
1949-
MF.addFrameInst(MCCFIInstruction::createNegateRAState(nullptr));
1950-
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
1951-
.addCFIIndex(CFIIndex)
1952-
.setMIFlags(MachineInstr::FrameDestroy);
1953-
}
1954-
if (NeedsWinCFI) {
1955-
*HasWinCFI = true;
1956-
BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PACSignLR))
1957-
.setMIFlag(MachineInstr::FrameDestroy);
1958-
}
1959-
}
1960-
}
1961-
19621882
static bool isFuncletReturnInstr(const MachineInstr &MI) {
19631883
switch (MI.getOpcode()) {
19641884
default:
@@ -1990,8 +1910,13 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
19901910
MachineBasicBlock::iterator EpilogStartI = MBB.end();
19911911

19921912
auto FinishingTouches = make_scope_exit([&]() {
1993-
if (AFI->shouldSignReturnAddress(MF))
1994-
authenticateLR(MF, MBB, NeedsWinCFI, &HasWinCFI);
1913+
if (AFI->shouldSignReturnAddress(MF)) {
1914+
BuildMI(MBB, MBB.getFirstTerminator(), DL,
1915+
TII->get(AArch64::PAUTH_EPILOGUE))
1916+
.setMIFlag(MachineInstr::FrameDestroy);
1917+
if (NeedsWinCFI)
1918+
HasWinCFI = true; // AArch64PointerAuth pass will insert SEH_PACSignLR
1919+
}
19951920
if (needsShadowCallStackPrologueEpilogue(MF))
19961921
emitShadowCallStackEpilogue(*TII, MF, MBB, MBB.getFirstTerminator(), DL);
19971922
if (EmitCFI)

0 commit comments

Comments
 (0)