From a6105547287ae5ee7941620853a2c2910fe336a8 Mon Sep 17 00:00:00 2001 From: Jonas Paulsson Date: Wed, 4 Mar 2020 17:11:40 +0100 Subject: [PATCH 1/4] [SimplifyCFG] Skip merging return blocks if it would break a CallBr. SimplifyCFG should not merge empty return blocks and leave a CallBr behind with a duplicated destination since the verifier will then trigger an assert. This patch checks for this case and avoids the transformation. CodeGenPrepare has a similar check which also has a FIXME comment about why this is needed. It seems perhaps better if these two passes would eventually instead update the CallBr instruction instead of just checking and avoiding. This fixes https://bugs.llvm.org/show_bug.cgi?id=45062. Review: Craig Topper Differential Revision: https://reviews.llvm.org/D75620 (cherry picked from commit c2dafe12dc24f7f1326f5c4c6a3b23f1485f1bd6) --- .../lib/Transforms/Scalar/SimplifyCFGPass.cpp | 15 ++++++++++ .../SimplifyCFG/callbr-destinations.ll | 28 +++++++++++++++++++ 2 files changed, 43 insertions(+) create mode 100644 llvm/test/Transforms/SimplifyCFG/callbr-destinations.ll diff --git a/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp index 623a8b711ed89..ac53ff33e8362 100644 --- a/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp +++ b/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp @@ -104,6 +104,21 @@ static bool mergeEmptyReturnBlocks(Function &F) { continue; } + // Skip merging if this would result in a CallBr instruction with a + // duplicate destination. FIXME: See note in CodeGenPrepare.cpp. + bool SkipCallBr = false; + for (pred_iterator PI = pred_begin(&BB), E = pred_end(&BB); + PI != E && !SkipCallBr; ++PI) { + if (auto *CBI = dyn_cast((*PI)->getTerminator())) + for (unsigned i = 0, e = CBI->getNumSuccessors(); i != e; ++i) + if (RetBlock == CBI->getSuccessor(i)) { + SkipCallBr = true; + break; + } + } + if (SkipCallBr) + continue; + // Otherwise, we found a duplicate return block. Merge the two. Changed = true; diff --git a/llvm/test/Transforms/SimplifyCFG/callbr-destinations.ll b/llvm/test/Transforms/SimplifyCFG/callbr-destinations.ll new file mode 100644 index 0000000000000..18210e5e0b060 --- /dev/null +++ b/llvm/test/Transforms/SimplifyCFG/callbr-destinations.ll @@ -0,0 +1,28 @@ +; RUN: opt < %s -simplifycfg -disable-output +; +; Test that SimplifyCFG does not cause CallBr instructions to have duplicate +; destinations, which will cause the verifier to assert. + +define void @fun0() { +entry: + callbr void asm sideeffect "", "X"(i8* blockaddress(@fun0, %bb1)) + to label %bb2 [label %bb1] + +bb1: ; preds = %bb + ret void + +bb2: ; preds = %bb + ret void +} + +define void @fun1() { +entry: + callbr void asm sideeffect "", "X"(i8* blockaddress(@fun1, %bb1)) + to label %bb2 [label %bb1] + +bb2: ; preds = %bb + ret void + +bb1: ; preds = %bb + ret void +} From c7aafffcafa05bdd76a880bc59619ec6499a7034 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 7 Apr 2020 14:45:16 +0100 Subject: [PATCH 2/4] [X86][SSE] combineX86ShufflesConstants - early out for zeroable vectors (PR45443) Shuffle combining can insert zero byte sized elements into the shuffle mask, which combineX86ShufflesConstants will attempt to fold without taking into account whether the byte-sized type is legal (e.g. AVX512F only targets). If we have a full-zeroable vector then we should just return a zero version of the root type, otherwise if the type isn't valid we should bail. Fixes PR45443 (cherry picked from commit e3b60597769f79a8abc19fb8ef1f321d9adc1358) --- llvm/lib/Target/X86/X86ISelLowering.cpp | 8 +++++++- llvm/test/CodeGen/X86/pr45443.ll | 21 +++++++++++++++++++++ 2 files changed, 28 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/X86/pr45443.ll diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index cbdd7135de438..60eefbc677da5 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -33998,6 +33998,7 @@ static SDValue combineX86ShufflesConstants(ArrayRef Ops, return SDValue(); // Shuffle the constant bits according to the mask. + SDLoc DL(Root); APInt UndefElts(NumMaskElts, 0); APInt ZeroElts(NumMaskElts, 0); APInt ConstantElts(NumMaskElts, 0); @@ -34035,6 +34036,10 @@ static SDValue combineX86ShufflesConstants(ArrayRef Ops, } assert((UndefElts | ZeroElts | ConstantElts).isAllOnesValue()); + // Attempt to create a zero vector. + if ((UndefElts | ZeroElts).isAllOnesValue()) + return getZeroVector(Root.getSimpleValueType(), Subtarget, DAG, DL); + // Create the constant data. MVT MaskSVT; if (VT.isFloatingPoint() && (MaskSizeInBits == 32 || MaskSizeInBits == 64)) @@ -34043,8 +34048,9 @@ static SDValue combineX86ShufflesConstants(ArrayRef Ops, MaskSVT = MVT::getIntegerVT(MaskSizeInBits); MVT MaskVT = MVT::getVectorVT(MaskSVT, NumMaskElts); + if (!DAG.getTargetLoweringInfo().isTypeLegal(MaskVT)) + return SDValue(); - SDLoc DL(Root); SDValue CstOp = getConstVector(ConstantBitData, UndefElts, MaskVT, DAG, DL); return DAG.getBitcast(VT, CstOp); } diff --git a/llvm/test/CodeGen/X86/pr45443.ll b/llvm/test/CodeGen/X86/pr45443.ll new file mode 100644 index 0000000000000..1e40ab94e9ca4 --- /dev/null +++ b/llvm/test/CodeGen/X86/pr45443.ll @@ -0,0 +1,21 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-- -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,X86 +; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,X64 + +define <16 x float> @PR45443() { +; CHECK-LABEL: PR45443: +; CHECK: # %bb.0: # %bb +; CHECK-NEXT: vfmadd231ps {{.*#+}} zmm0 = (zmm0 * mem) + zmm0 +; CHECK-NEXT: ret{{[l|q]}} +bb: + %tmp = tail call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> , <4 x i32> ) + %tmp4 = tail call fast <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> , <16 x float> undef) + %tmp5 = icmp ult <16 x i32> %tmp, + %tmp6 = and <16 x i32> %tmp, + %tmp7 = icmp ne <16 x i32> %tmp6, zeroinitializer + %tmp8 = and <16 x i1> %tmp7, %tmp5 + %tmp9 = select fast <16 x i1> %tmp8, <16 x float> , <16 x float> %tmp4 + ret <16 x float> %tmp9 +} +declare <16 x float> @llvm.fma.v16f32(<16 x float>, <16 x float>, <16 x float>) +declare <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32>, <4 x i32>) From 02d1f4d7ec4b7f08700b689fba2f8e5b7b2efd8c Mon Sep 17 00:00:00 2001 From: Melanie Blower Date: Mon, 24 Feb 2020 06:42:05 -0800 Subject: [PATCH 3/4] add release notes for ffp-model and ffp-exception-behavior (cherry picked from commit c8dadac228b7dd3a71d5fc25489d1b884a2b0f5e) --- clang/docs/ReleaseNotes.rst | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index a100d0a76b492..4939230a0643b 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -184,6 +184,13 @@ New Compiler Flags - ``-mbranches-within-32B-boundaries`` is added as an x86 assembler mitigation for Intel's Jump Condition Code Erratum. +- -ffp-exception-behavior={ignore,maytrap,strict} allows the user to specify + the floating-point exception behavior. The default setting is ``ignore``. + +- -ffp-model={precise,strict,fast} provides the user an umbrella option to + simplify access to the many single purpose floating point options. The default + setting is ``precise``. + Deprecated Compiler Flags ------------------------- From e3f3a43807d2edcadd60ad333713206ed8fdd74e Mon Sep 17 00:00:00 2001 From: Alexandre Ganea Date: Fri, 13 Mar 2020 12:22:09 -0400 Subject: [PATCH 4/4] [CodeView] Align type records on 4-bytes when emitting PDBs When emitting PDBs, the TypeStreamMerger class is used to merge .debug$T records from the input .OBJ files into the output .PDB stream. Records in .OBJs are not required to be aligned on 4-bytes, and "The Netwide Assembler 2.14" generates non-aligned records. When compiling with -DLLVM_ENABLE_ASSERTIONS=ON, an assert was triggered in MergingTypeTableBuilder when non-ghash merging was used. With ghash merging there was no assert. As a result, LLD could potentially generate a non-aligned TPI stream. We now align records on 4-bytes when record indices are remapped, in TypeStreamMerger::remapIndices(). Differential Revision: https://reviews.llvm.org/D75081 (cherry picked from commit a7325298e1f311b383b8ce5ba8e2d3698fef472a) --- lld/test/COFF/pdb-tpi-aligned-records.test | 46 +++++++++++++++++++ .../CodeView/GlobalTypeTableBuilder.h | 5 ++ .../CodeView/MergingTypeTableBuilder.cpp | 4 +- .../DebugInfo/CodeView/TypeStreamMerger.cpp | 24 ++++++++-- .../DebugInfo/PDB/Native/TpiStreamBuilder.cpp | 10 +++- 5 files changed, 83 insertions(+), 6 deletions(-) create mode 100644 lld/test/COFF/pdb-tpi-aligned-records.test diff --git a/lld/test/COFF/pdb-tpi-aligned-records.test b/lld/test/COFF/pdb-tpi-aligned-records.test new file mode 100644 index 0000000000000..cb3e412a8c202 --- /dev/null +++ b/lld/test/COFF/pdb-tpi-aligned-records.test @@ -0,0 +1,46 @@ +# RUN: yaml2obj < %s > %t.obj +# RUN: yaml2obj %p/Inputs/generic.yaml > %t2.obj + +# RUN: lld-link /out:%t.exe /debug /entry:main %t.obj %t2.obj /nodefaultlib +# RUN: llvm-pdbutil dump --types --type-data %t.pdb | FileCheck %s +# RUN: lld-link /out:%t.exe /debug:ghash /entry:main %t.obj %t2.obj /nodefaultlib +# RUN: llvm-pdbutil dump --types --type-data %t.pdb | FileCheck %s + +# CHECK: 0000: 12000810 03000000 00000000 00000000 0000F2F1 + +--- !COFF +header: + Machine: IMAGE_FILE_MACHINE_AMD64 + Characteristics: [] +sections: + - Name: '.debug$T' + Characteristics: [ IMAGE_SCN_CNT_INITIALIZED_DATA, IMAGE_SCN_MEM_DISCARDABLE, IMAGE_SCN_MEM_READ ] + Alignment: 1 + # It is important to keep the 'SectionData' since the .OBJ is reconstructed from it, + # and that triggers an alignement bug in the output .PDB. + SectionData: '040000001000081003000000000000000000000000000600011200000000' + Types: + - Kind: LF_PROCEDURE + Procedure: + ReturnType: 3 + CallConv: NearC + Options: [ None ] + ParameterCount: 0 + ArgumentList: 0 + - Kind: LF_ARGLIST + ArgList: + ArgIndices: [ ] +symbols: + - Name: '.debug$T' + Value: 0 + SectionNumber: 1 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_STATIC + SectionDefinition: + Length: 30 + NumberOfRelocations: 0 + NumberOfLinenumbers: 0 + CheckSum: 0 + Number: 0 +... diff --git a/llvm/include/llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h b/llvm/include/llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h index 3b103c227708a..bb8cc032e28da 100644 --- a/llvm/include/llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h +++ b/llvm/include/llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h @@ -71,6 +71,11 @@ class GlobalTypeTableBuilder : public TypeCollection { template TypeIndex insertRecordAs(GloballyHashedType Hash, size_t RecordSize, CreateFunc Create) { + assert(RecordSize < UINT32_MAX && "Record too big"); + assert(RecordSize % 4 == 0 && + "RecordSize is not a multiple of 4 bytes which will cause " + "misalignment in the output TPI stream!"); + auto Result = HashedRecords.try_emplace(Hash, nextTypeIndex()); if (LLVM_UNLIKELY(Result.second /*inserted*/ || diff --git a/llvm/lib/DebugInfo/CodeView/MergingTypeTableBuilder.cpp b/llvm/lib/DebugInfo/CodeView/MergingTypeTableBuilder.cpp index 4d7cd468f3ee6..6924b0e0ca02f 100644 --- a/llvm/lib/DebugInfo/CodeView/MergingTypeTableBuilder.cpp +++ b/llvm/lib/DebugInfo/CodeView/MergingTypeTableBuilder.cpp @@ -90,7 +90,9 @@ static inline ArrayRef stabilize(BumpPtrAllocator &Alloc, TypeIndex MergingTypeTableBuilder::insertRecordAs(hash_code Hash, ArrayRef &Record) { assert(Record.size() < UINT32_MAX && "Record too big"); - assert(Record.size() % 4 == 0 && "Record is not aligned to 4 bytes!"); + assert(Record.size() % 4 == 0 && + "The type record size is not a multiple of 4 bytes which will cause " + "misalignment in the output TPI stream!"); LocallyHashedType WeakHash{Hash, Record}; auto Result = HashedRecords.try_emplace(WeakHash, nextTypeIndex()); diff --git a/llvm/lib/DebugInfo/CodeView/TypeStreamMerger.cpp b/llvm/lib/DebugInfo/CodeView/TypeStreamMerger.cpp index f9fca74a2199a..c233db5c1d06a 100644 --- a/llvm/lib/DebugInfo/CodeView/TypeStreamMerger.cpp +++ b/llvm/lib/DebugInfo/CodeView/TypeStreamMerger.cpp @@ -360,16 +360,18 @@ Error TypeStreamMerger::remapType(const CVType &Type) { [this, Type](MutableArrayRef Storage) -> ArrayRef { return remapIndices(Type, Storage); }; + unsigned AlignedSize = alignTo(Type.RecordData.size(), 4); + if (LLVM_LIKELY(UseGlobalHashes)) { GlobalTypeTableBuilder &Dest = isIdRecord(Type.kind()) ? *DestGlobalIdStream : *DestGlobalTypeStream; GloballyHashedType H = GlobalHashes[CurIndex.toArrayIndex()]; - DestIdx = Dest.insertRecordAs(H, Type.RecordData.size(), DoSerialize); + DestIdx = Dest.insertRecordAs(H, AlignedSize, DoSerialize); } else { MergingTypeTableBuilder &Dest = isIdRecord(Type.kind()) ? *DestIdStream : *DestTypeStream; - RemapStorage.resize(Type.RecordData.size()); + RemapStorage.resize(AlignedSize); ArrayRef Result = DoSerialize(RemapStorage); if (!Result.empty()) DestIdx = Dest.insertRecordBytes(Result); @@ -386,9 +388,15 @@ Error TypeStreamMerger::remapType(const CVType &Type) { ArrayRef TypeStreamMerger::remapIndices(const CVType &OriginalType, MutableArrayRef Storage) { + unsigned Align = OriginalType.RecordData.size() & 3; + unsigned AlignedSize = alignTo(OriginalType.RecordData.size(), 4); + assert(Storage.size() == AlignedSize && + "The storage buffer size is not a multiple of 4 bytes which will " + "cause misalignment in the output TPI stream!"); + SmallVector Refs; discoverTypeIndices(OriginalType.RecordData, Refs); - if (Refs.empty()) + if (Refs.empty() && Align == 0) return OriginalType.RecordData; ::memcpy(Storage.data(), OriginalType.RecordData.data(), @@ -408,6 +416,16 @@ TypeStreamMerger::remapIndices(const CVType &OriginalType, return {}; } } + + if (Align > 0) { + RecordPrefix *StorageHeader = + reinterpret_cast(Storage.data()); + StorageHeader->RecordLen += 4 - Align; + + DestContent = Storage.data() + OriginalType.RecordData.size(); + for (; Align < 4; ++Align) + *DestContent++ = LF_PAD4 - Align; + } return Storage; } diff --git a/llvm/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp b/llvm/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp index 4f10f8524a9b1..51a1f0a544e3c 100644 --- a/llvm/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp @@ -44,6 +44,9 @@ void TpiStreamBuilder::setVersionHeader(PdbRaw_TpiVer Version) { void TpiStreamBuilder::addTypeRecord(ArrayRef Record, Optional Hash) { // If we just crossed an 8KB threshold, add a type index offset. + assert(((Record.size() & 3) == 0) && + "The type record's size is not a multiple of 4 bytes which will " + "cause misalignment in the output TPI stream!"); size_t NewSize = TypeRecordBytes + Record.size(); constexpr size_t EightKB = 8 * 1024; if (NewSize / EightKB > TypeRecordBytes / EightKB || TypeRecords.empty()) { @@ -153,8 +156,11 @@ Error TpiStreamBuilder::commit(const msf::MSFLayout &Layout, return EC; for (auto Rec : TypeRecords) { - assert(!Rec.empty()); // An empty record will not write anything, but it - // would shift all offsets from here on. + assert(!Rec.empty() && "Attempting to write an empty type record shifts " + "all offsets in the TPI stream!"); + assert(((Rec.size() & 3) == 0) && + "The type record's size is not a multiple of 4 bytes which will " + "cause misalignment in the output TPI stream!"); if (auto EC = Writer.writeBytes(Rec)) return EC; }