diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index a100d0a76b492..4939230a0643b 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -184,6 +184,13 @@ New Compiler Flags - ``-mbranches-within-32B-boundaries`` is added as an x86 assembler mitigation for Intel's Jump Condition Code Erratum. +- -ffp-exception-behavior={ignore,maytrap,strict} allows the user to specify + the floating-point exception behavior. The default setting is ``ignore``. + +- -ffp-model={precise,strict,fast} provides the user an umbrella option to + simplify access to the many single purpose floating point options. The default + setting is ``precise``. + Deprecated Compiler Flags ------------------------- diff --git a/lld/test/COFF/pdb-tpi-aligned-records.test b/lld/test/COFF/pdb-tpi-aligned-records.test new file mode 100644 index 0000000000000..cb3e412a8c202 --- /dev/null +++ b/lld/test/COFF/pdb-tpi-aligned-records.test @@ -0,0 +1,46 @@ +# RUN: yaml2obj < %s > %t.obj +# RUN: yaml2obj %p/Inputs/generic.yaml > %t2.obj + +# RUN: lld-link /out:%t.exe /debug /entry:main %t.obj %t2.obj /nodefaultlib +# RUN: llvm-pdbutil dump --types --type-data %t.pdb | FileCheck %s +# RUN: lld-link /out:%t.exe /debug:ghash /entry:main %t.obj %t2.obj /nodefaultlib +# RUN: llvm-pdbutil dump --types --type-data %t.pdb | FileCheck %s + +# CHECK: 0000: 12000810 03000000 00000000 00000000 0000F2F1 + +--- !COFF +header: + Machine: IMAGE_FILE_MACHINE_AMD64 + Characteristics: [] +sections: + - Name: '.debug$T' + Characteristics: [ IMAGE_SCN_CNT_INITIALIZED_DATA, IMAGE_SCN_MEM_DISCARDABLE, IMAGE_SCN_MEM_READ ] + Alignment: 1 + # It is important to keep the 'SectionData' since the .OBJ is reconstructed from it, + # and that triggers an alignement bug in the output .PDB. + SectionData: '040000001000081003000000000000000000000000000600011200000000' + Types: + - Kind: LF_PROCEDURE + Procedure: + ReturnType: 3 + CallConv: NearC + Options: [ None ] + ParameterCount: 0 + ArgumentList: 0 + - Kind: LF_ARGLIST + ArgList: + ArgIndices: [ ] +symbols: + - Name: '.debug$T' + Value: 0 + SectionNumber: 1 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_STATIC + SectionDefinition: + Length: 30 + NumberOfRelocations: 0 + NumberOfLinenumbers: 0 + CheckSum: 0 + Number: 0 +... diff --git a/llvm/include/llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h b/llvm/include/llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h index 3b103c227708a..bb8cc032e28da 100644 --- a/llvm/include/llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h +++ b/llvm/include/llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h @@ -71,6 +71,11 @@ class GlobalTypeTableBuilder : public TypeCollection { template TypeIndex insertRecordAs(GloballyHashedType Hash, size_t RecordSize, CreateFunc Create) { + assert(RecordSize < UINT32_MAX && "Record too big"); + assert(RecordSize % 4 == 0 && + "RecordSize is not a multiple of 4 bytes which will cause " + "misalignment in the output TPI stream!"); + auto Result = HashedRecords.try_emplace(Hash, nextTypeIndex()); if (LLVM_UNLIKELY(Result.second /*inserted*/ || diff --git a/llvm/lib/DebugInfo/CodeView/MergingTypeTableBuilder.cpp b/llvm/lib/DebugInfo/CodeView/MergingTypeTableBuilder.cpp index 4d7cd468f3ee6..6924b0e0ca02f 100644 --- a/llvm/lib/DebugInfo/CodeView/MergingTypeTableBuilder.cpp +++ b/llvm/lib/DebugInfo/CodeView/MergingTypeTableBuilder.cpp @@ -90,7 +90,9 @@ static inline ArrayRef stabilize(BumpPtrAllocator &Alloc, TypeIndex MergingTypeTableBuilder::insertRecordAs(hash_code Hash, ArrayRef &Record) { assert(Record.size() < UINT32_MAX && "Record too big"); - assert(Record.size() % 4 == 0 && "Record is not aligned to 4 bytes!"); + assert(Record.size() % 4 == 0 && + "The type record size is not a multiple of 4 bytes which will cause " + "misalignment in the output TPI stream!"); LocallyHashedType WeakHash{Hash, Record}; auto Result = HashedRecords.try_emplace(WeakHash, nextTypeIndex()); diff --git a/llvm/lib/DebugInfo/CodeView/TypeStreamMerger.cpp b/llvm/lib/DebugInfo/CodeView/TypeStreamMerger.cpp index f9fca74a2199a..c233db5c1d06a 100644 --- a/llvm/lib/DebugInfo/CodeView/TypeStreamMerger.cpp +++ b/llvm/lib/DebugInfo/CodeView/TypeStreamMerger.cpp @@ -360,16 +360,18 @@ Error TypeStreamMerger::remapType(const CVType &Type) { [this, Type](MutableArrayRef Storage) -> ArrayRef { return remapIndices(Type, Storage); }; + unsigned AlignedSize = alignTo(Type.RecordData.size(), 4); + if (LLVM_LIKELY(UseGlobalHashes)) { GlobalTypeTableBuilder &Dest = isIdRecord(Type.kind()) ? *DestGlobalIdStream : *DestGlobalTypeStream; GloballyHashedType H = GlobalHashes[CurIndex.toArrayIndex()]; - DestIdx = Dest.insertRecordAs(H, Type.RecordData.size(), DoSerialize); + DestIdx = Dest.insertRecordAs(H, AlignedSize, DoSerialize); } else { MergingTypeTableBuilder &Dest = isIdRecord(Type.kind()) ? *DestIdStream : *DestTypeStream; - RemapStorage.resize(Type.RecordData.size()); + RemapStorage.resize(AlignedSize); ArrayRef Result = DoSerialize(RemapStorage); if (!Result.empty()) DestIdx = Dest.insertRecordBytes(Result); @@ -386,9 +388,15 @@ Error TypeStreamMerger::remapType(const CVType &Type) { ArrayRef TypeStreamMerger::remapIndices(const CVType &OriginalType, MutableArrayRef Storage) { + unsigned Align = OriginalType.RecordData.size() & 3; + unsigned AlignedSize = alignTo(OriginalType.RecordData.size(), 4); + assert(Storage.size() == AlignedSize && + "The storage buffer size is not a multiple of 4 bytes which will " + "cause misalignment in the output TPI stream!"); + SmallVector Refs; discoverTypeIndices(OriginalType.RecordData, Refs); - if (Refs.empty()) + if (Refs.empty() && Align == 0) return OriginalType.RecordData; ::memcpy(Storage.data(), OriginalType.RecordData.data(), @@ -408,6 +416,16 @@ TypeStreamMerger::remapIndices(const CVType &OriginalType, return {}; } } + + if (Align > 0) { + RecordPrefix *StorageHeader = + reinterpret_cast(Storage.data()); + StorageHeader->RecordLen += 4 - Align; + + DestContent = Storage.data() + OriginalType.RecordData.size(); + for (; Align < 4; ++Align) + *DestContent++ = LF_PAD4 - Align; + } return Storage; } diff --git a/llvm/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp b/llvm/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp index 4f10f8524a9b1..51a1f0a544e3c 100644 --- a/llvm/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp @@ -44,6 +44,9 @@ void TpiStreamBuilder::setVersionHeader(PdbRaw_TpiVer Version) { void TpiStreamBuilder::addTypeRecord(ArrayRef Record, Optional Hash) { // If we just crossed an 8KB threshold, add a type index offset. + assert(((Record.size() & 3) == 0) && + "The type record's size is not a multiple of 4 bytes which will " + "cause misalignment in the output TPI stream!"); size_t NewSize = TypeRecordBytes + Record.size(); constexpr size_t EightKB = 8 * 1024; if (NewSize / EightKB > TypeRecordBytes / EightKB || TypeRecords.empty()) { @@ -153,8 +156,11 @@ Error TpiStreamBuilder::commit(const msf::MSFLayout &Layout, return EC; for (auto Rec : TypeRecords) { - assert(!Rec.empty()); // An empty record will not write anything, but it - // would shift all offsets from here on. + assert(!Rec.empty() && "Attempting to write an empty type record shifts " + "all offsets in the TPI stream!"); + assert(((Rec.size() & 3) == 0) && + "The type record's size is not a multiple of 4 bytes which will " + "cause misalignment in the output TPI stream!"); if (auto EC = Writer.writeBytes(Rec)) return EC; } diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index cbdd7135de438..60eefbc677da5 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -33998,6 +33998,7 @@ static SDValue combineX86ShufflesConstants(ArrayRef Ops, return SDValue(); // Shuffle the constant bits according to the mask. + SDLoc DL(Root); APInt UndefElts(NumMaskElts, 0); APInt ZeroElts(NumMaskElts, 0); APInt ConstantElts(NumMaskElts, 0); @@ -34035,6 +34036,10 @@ static SDValue combineX86ShufflesConstants(ArrayRef Ops, } assert((UndefElts | ZeroElts | ConstantElts).isAllOnesValue()); + // Attempt to create a zero vector. + if ((UndefElts | ZeroElts).isAllOnesValue()) + return getZeroVector(Root.getSimpleValueType(), Subtarget, DAG, DL); + // Create the constant data. MVT MaskSVT; if (VT.isFloatingPoint() && (MaskSizeInBits == 32 || MaskSizeInBits == 64)) @@ -34043,8 +34048,9 @@ static SDValue combineX86ShufflesConstants(ArrayRef Ops, MaskSVT = MVT::getIntegerVT(MaskSizeInBits); MVT MaskVT = MVT::getVectorVT(MaskSVT, NumMaskElts); + if (!DAG.getTargetLoweringInfo().isTypeLegal(MaskVT)) + return SDValue(); - SDLoc DL(Root); SDValue CstOp = getConstVector(ConstantBitData, UndefElts, MaskVT, DAG, DL); return DAG.getBitcast(VT, CstOp); } diff --git a/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp index 623a8b711ed89..ac53ff33e8362 100644 --- a/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp +++ b/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp @@ -104,6 +104,21 @@ static bool mergeEmptyReturnBlocks(Function &F) { continue; } + // Skip merging if this would result in a CallBr instruction with a + // duplicate destination. FIXME: See note in CodeGenPrepare.cpp. + bool SkipCallBr = false; + for (pred_iterator PI = pred_begin(&BB), E = pred_end(&BB); + PI != E && !SkipCallBr; ++PI) { + if (auto *CBI = dyn_cast((*PI)->getTerminator())) + for (unsigned i = 0, e = CBI->getNumSuccessors(); i != e; ++i) + if (RetBlock == CBI->getSuccessor(i)) { + SkipCallBr = true; + break; + } + } + if (SkipCallBr) + continue; + // Otherwise, we found a duplicate return block. Merge the two. Changed = true; diff --git a/llvm/test/CodeGen/X86/pr45443.ll b/llvm/test/CodeGen/X86/pr45443.ll new file mode 100644 index 0000000000000..1e40ab94e9ca4 --- /dev/null +++ b/llvm/test/CodeGen/X86/pr45443.ll @@ -0,0 +1,21 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-- -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,X86 +; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,X64 + +define <16 x float> @PR45443() { +; CHECK-LABEL: PR45443: +; CHECK: # %bb.0: # %bb +; CHECK-NEXT: vfmadd231ps {{.*#+}} zmm0 = (zmm0 * mem) + zmm0 +; CHECK-NEXT: ret{{[l|q]}} +bb: + %tmp = tail call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> , <4 x i32> ) + %tmp4 = tail call fast <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> , <16 x float> undef) + %tmp5 = icmp ult <16 x i32> %tmp, + %tmp6 = and <16 x i32> %tmp, + %tmp7 = icmp ne <16 x i32> %tmp6, zeroinitializer + %tmp8 = and <16 x i1> %tmp7, %tmp5 + %tmp9 = select fast <16 x i1> %tmp8, <16 x float> , <16 x float> %tmp4 + ret <16 x float> %tmp9 +} +declare <16 x float> @llvm.fma.v16f32(<16 x float>, <16 x float>, <16 x float>) +declare <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32>, <4 x i32>) diff --git a/llvm/test/Transforms/SimplifyCFG/callbr-destinations.ll b/llvm/test/Transforms/SimplifyCFG/callbr-destinations.ll new file mode 100644 index 0000000000000..18210e5e0b060 --- /dev/null +++ b/llvm/test/Transforms/SimplifyCFG/callbr-destinations.ll @@ -0,0 +1,28 @@ +; RUN: opt < %s -simplifycfg -disable-output +; +; Test that SimplifyCFG does not cause CallBr instructions to have duplicate +; destinations, which will cause the verifier to assert. + +define void @fun0() { +entry: + callbr void asm sideeffect "", "X"(i8* blockaddress(@fun0, %bb1)) + to label %bb2 [label %bb1] + +bb1: ; preds = %bb + ret void + +bb2: ; preds = %bb + ret void +} + +define void @fun1() { +entry: + callbr void asm sideeffect "", "X"(i8* blockaddress(@fun1, %bb1)) + to label %bb2 [label %bb1] + +bb2: ; preds = %bb + ret void + +bb1: ; preds = %bb + ret void +}