llvm · rotateright · Mar 4, 2020 · Apr 7, 2020 · Feb 24, 2020 · Mar 13, 2020
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
@@ -184,6 +184,13 @@ New Compiler Flags
 - ``-mbranches-within-32B-boundaries`` is added as an x86 assembler mitigation
   for Intel's Jump Condition Code Erratum.
 
+- -ffp-exception-behavior={ignore,maytrap,strict} allows the user to specify
+  the floating-point exception behavior.  The default setting is ``ignore``.
+
+- -ffp-model={precise,strict,fast} provides the user an umbrella option to
+  simplify access to the many single purpose floating point options. The default
+  setting is ``precise``.
+
 Deprecated Compiler Flags
 -------------------------
 

diff --git a/lld/test/COFF/pdb-tpi-aligned-records.test b/lld/test/COFF/pdb-tpi-aligned-records.test
@@ -0,0 +1,46 @@
+# RUN: yaml2obj < %s > %t.obj
+# RUN: yaml2obj %p/Inputs/generic.yaml > %t2.obj
+
+# RUN: lld-link /out:%t.exe /debug /entry:main %t.obj %t2.obj /nodefaultlib
+# RUN: llvm-pdbutil dump --types --type-data %t.pdb | FileCheck %s
+# RUN: lld-link /out:%t.exe /debug:ghash /entry:main %t.obj %t2.obj /nodefaultlib
+# RUN: llvm-pdbutil dump --types --type-data %t.pdb | FileCheck %s
+
+# CHECK: 0000: 12000810 03000000 00000000 00000000 0000F2F1
+
+--- !COFF
+header:
+  Machine:         IMAGE_FILE_MACHINE_AMD64
+  Characteristics: []
+sections:
+  - Name:            '.debug$T'
+    Characteristics: [ IMAGE_SCN_CNT_INITIALIZED_DATA, IMAGE_SCN_MEM_DISCARDABLE, IMAGE_SCN_MEM_READ ]
+    Alignment:       1
+    # It is important to keep the 'SectionData' since the .OBJ is reconstructed from it,
+    # and that triggers an alignement bug in the output .PDB.
+    SectionData:     '040000001000081003000000000000000000000000000600011200000000'
+    Types:
+      - Kind:            LF_PROCEDURE
+        Procedure:
+          ReturnType:      3
+          CallConv:        NearC
+          Options:         [ None ]
+          ParameterCount:  0
+          ArgumentList:    0
+      - Kind:            LF_ARGLIST
+        ArgList:
+          ArgIndices:      [  ]
+symbols:
+  - Name:            '.debug$T'
+    Value:           0
+    SectionNumber:   1
+    SimpleType:      IMAGE_SYM_TYPE_NULL
+    ComplexType:     IMAGE_SYM_DTYPE_NULL
+    StorageClass:    IMAGE_SYM_CLASS_STATIC
+    SectionDefinition:
+      Length:          30
+      NumberOfRelocations: 0
+      NumberOfLinenumbers: 0
+      CheckSum:        0
+      Number:          0
+...
diff --git a/llvm/include/llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h b/llvm/include/llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h
@@ -71,6 +71,11 @@ class GlobalTypeTableBuilder : public TypeCollection {
   template <typename CreateFunc>
   TypeIndex insertRecordAs(GloballyHashedType Hash, size_t RecordSize,
                            CreateFunc Create) {
+    assert(RecordSize < UINT32_MAX && "Record too big");
+    assert(RecordSize % 4 == 0 &&
+           "RecordSize is not a multiple of 4 bytes which will cause "
+           "misalignment in the output TPI stream!");
+
     auto Result = HashedRecords.try_emplace(Hash, nextTypeIndex());
 
     if (LLVM_UNLIKELY(Result.second /*inserted*/ ||

diff --git a/llvm/lib/DebugInfo/CodeView/MergingTypeTableBuilder.cpp b/llvm/lib/DebugInfo/CodeView/MergingTypeTableBuilder.cpp
@@ -90,7 +90,9 @@ static inline ArrayRef<uint8_t> stabilize(BumpPtrAllocator &Alloc,
 TypeIndex MergingTypeTableBuilder::insertRecordAs(hash_code Hash,
                                                   ArrayRef<uint8_t> &Record) {
   assert(Record.size() < UINT32_MAX && "Record too big");
-  assert(Record.size() % 4 == 0 && "Record is not aligned to 4 bytes!");
+  assert(Record.size() % 4 == 0 &&
+         "The type record size is not a multiple of 4 bytes which will cause "
+         "misalignment in the output TPI stream!");
 
   LocallyHashedType WeakHash{Hash, Record};
   auto Result = HashedRecords.try_emplace(WeakHash, nextTypeIndex());

diff --git a/llvm/lib/DebugInfo/CodeView/TypeStreamMerger.cpp b/llvm/lib/DebugInfo/CodeView/TypeStreamMerger.cpp
@@ -360,16 +360,18 @@ Error TypeStreamMerger::remapType(const CVType &Type) {
         [this, Type](MutableArrayRef<uint8_t> Storage) -> ArrayRef<uint8_t> {
       return remapIndices(Type, Storage);
     };
+    unsigned AlignedSize = alignTo(Type.RecordData.size(), 4);
+
     if (LLVM_LIKELY(UseGlobalHashes)) {
       GlobalTypeTableBuilder &Dest =
           isIdRecord(Type.kind()) ? *DestGlobalIdStream : *DestGlobalTypeStream;
       GloballyHashedType H = GlobalHashes[CurIndex.toArrayIndex()];
-      DestIdx = Dest.insertRecordAs(H, Type.RecordData.size(), DoSerialize);
+      DestIdx = Dest.insertRecordAs(H, AlignedSize, DoSerialize);
     } else {
       MergingTypeTableBuilder &Dest =
           isIdRecord(Type.kind()) ? *DestIdStream : *DestTypeStream;
 
-      RemapStorage.resize(Type.RecordData.size());
+      RemapStorage.resize(AlignedSize);
       ArrayRef<uint8_t> Result = DoSerialize(RemapStorage);
       if (!Result.empty())
         DestIdx = Dest.insertRecordBytes(Result);
@@ -386,9 +388,15 @@ Error TypeStreamMerger::remapType(const CVType &Type) {
 ArrayRef<uint8_t>
 TypeStreamMerger::remapIndices(const CVType &OriginalType,
                                MutableArrayRef<uint8_t> Storage) {
+  unsigned Align = OriginalType.RecordData.size() & 3;
+  unsigned AlignedSize = alignTo(OriginalType.RecordData.size(), 4);
+  assert(Storage.size() == AlignedSize &&
+         "The storage buffer size is not a multiple of 4 bytes which will "
+         "cause misalignment in the output TPI stream!");
+
   SmallVector<TiReference, 4> Refs;
   discoverTypeIndices(OriginalType.RecordData, Refs);
-  if (Refs.empty())
+  if (Refs.empty() && Align == 0)
     return OriginalType.RecordData;
 
   ::memcpy(Storage.data(), OriginalType.RecordData.data(),
@@ -408,6 +416,16 @@ TypeStreamMerger::remapIndices(const CVType &OriginalType,
         return {};
     }
   }
+
+  if (Align > 0) {
+    RecordPrefix *StorageHeader =
+        reinterpret_cast<RecordPrefix *>(Storage.data());
+    StorageHeader->RecordLen += 4 - Align;
+
+    DestContent = Storage.data() + OriginalType.RecordData.size();
+    for (; Align < 4; ++Align)
+      *DestContent++ = LF_PAD4 - Align;
+  }
   return Storage;
 }
 

diff --git a/llvm/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp b/llvm/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp
@@ -44,6 +44,9 @@ void TpiStreamBuilder::setVersionHeader(PdbRaw_TpiVer Version) {
 void TpiStreamBuilder::addTypeRecord(ArrayRef<uint8_t> Record,
                                      Optional<uint32_t> Hash) {
   // If we just crossed an 8KB threshold, add a type index offset.
+  assert(((Record.size() & 3) == 0) &&
+         "The type record's size is not a multiple of 4 bytes which will "
+         "cause misalignment in the output TPI stream!");
   size_t NewSize = TypeRecordBytes + Record.size();
   constexpr size_t EightKB = 8 * 1024;
   if (NewSize / EightKB > TypeRecordBytes / EightKB || TypeRecords.empty()) {
@@ -153,8 +156,11 @@ Error TpiStreamBuilder::commit(const msf::MSFLayout &Layout,
     return EC;
 
   for (auto Rec : TypeRecords) {
-    assert(!Rec.empty()); // An empty record will not write anything, but it
-                          // would shift all offsets from here on.
+    assert(!Rec.empty() && "Attempting to write an empty type record shifts "
+                           "all offsets in the TPI stream!");
+    assert(((Rec.size() & 3) == 0) &&
+           "The type record's size is not a multiple of 4 bytes which will "
+           "cause misalignment in the output TPI stream!");
     if (auto EC = Writer.writeBytes(Rec))
       return EC;
   }

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -33998,6 +33998,7 @@ static SDValue combineX86ShufflesConstants(ArrayRef<SDValue> Ops,
     return SDValue();
 
   // Shuffle the constant bits according to the mask.
+  SDLoc DL(Root);
   APInt UndefElts(NumMaskElts, 0);
   APInt ZeroElts(NumMaskElts, 0);
   APInt ConstantElts(NumMaskElts, 0);
@@ -34035,6 +34036,10 @@ static SDValue combineX86ShufflesConstants(ArrayRef<SDValue> Ops,
   }
   assert((UndefElts | ZeroElts | ConstantElts).isAllOnesValue());
 
+  // Attempt to create a zero vector.
+  if ((UndefElts | ZeroElts).isAllOnesValue())
+    return getZeroVector(Root.getSimpleValueType(), Subtarget, DAG, DL);
+
   // Create the constant data.
   MVT MaskSVT;
   if (VT.isFloatingPoint() && (MaskSizeInBits == 32 || MaskSizeInBits == 64))
@@ -34043,8 +34048,9 @@ static SDValue combineX86ShufflesConstants(ArrayRef<SDValue> Ops,
     MaskSVT = MVT::getIntegerVT(MaskSizeInBits);
 
   MVT MaskVT = MVT::getVectorVT(MaskSVT, NumMaskElts);
+  if (!DAG.getTargetLoweringInfo().isTypeLegal(MaskVT))
+    return SDValue();
 
-  SDLoc DL(Root);
   SDValue CstOp = getConstVector(ConstantBitData, UndefElts, MaskVT, DAG, DL);
   return DAG.getBitcast(VT, CstOp);
 }

diff --git a/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -104,6 +104,21 @@ static bool mergeEmptyReturnBlocks(Function &F) {
       continue;
     }
 
+    // Skip merging if this would result in a CallBr instruction with a
+    // duplicate destination. FIXME: See note in CodeGenPrepare.cpp.
+    bool SkipCallBr = false;
+    for (pred_iterator PI = pred_begin(&BB), E = pred_end(&BB);
+         PI != E && !SkipCallBr; ++PI) {
+      if (auto *CBI = dyn_cast<CallBrInst>((*PI)->getTerminator()))
+        for (unsigned i = 0, e = CBI->getNumSuccessors(); i != e; ++i)
+          if (RetBlock == CBI->getSuccessor(i)) {
+            SkipCallBr = true;
+            break;
+          }
+    }
+    if (SkipCallBr)
+      continue;
+
     // Otherwise, we found a duplicate return block.  Merge the two.
     Changed = true;
 

diff --git a/llvm/test/CodeGen/X86/pr45443.ll b/llvm/test/CodeGen/X86/pr45443.ll
@@ -0,0 +1,21 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-- -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,X86
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,X64
+
+define <16 x float> @PR45443() {
+; CHECK-LABEL: PR45443:
+; CHECK:       # %bb.0: # %bb
+; CHECK-NEXT:    vfmadd231ps {{.*#+}} zmm0 = (zmm0 * mem) + zmm0
+; CHECK-NEXT:    ret{{[l|q]}}
+bb:
+  %tmp = tail call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> <i32 1090519040, i32 1090519040, i32 1090519040, i32 1090519040, i32 1090519040, i32 1090519040, i32 1090519040, i32 1090519040, i32 1090519040, i32 1090519040, i32 1090519040, i32 1090519040, i32 1090519040, i32 1090519040, i32 1090519040, i32 1090519040>, <4 x i32> <i32 1, i32 0, i32 undef, i32 undef>)
+  %tmp4 = tail call fast <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> <float 0x3FE6300000000000, float 0x3FE6300000000000, float 0x3FE6300000000000, float 0x3FE6300000000000, float 0x3FE6300000000000, float 0x3FE6300000000000, float 0x3FE6300000000000, float 0x3FE6300000000000, float 0x3FE6300000000000, float 0x3FE6300000000000, float 0x3FE6300000000000, float 0x3FE6300000000000, float 0x3FE6300000000000, float 0x3FE6300000000000, float 0x3FE6300000000000, float 0x3FE6300000000000>, <16 x float> undef)
+  %tmp5 = icmp ult <16 x i32> %tmp, <i32 16777216, i32 16777216, i32 16777216, i32 16777216, i32 16777216, i32 16777216, i32 16777216, i32 16777216, i32 16777216, i32 16777216, i32 16777216, i32 16777216, i32 16777216, i32 16777216, i32 16777216, i32 16777216>
+  %tmp6 = and <16 x i32> %tmp, <i32 16777215, i32 16777215, i32 16777215, i32 16777215, i32 16777215, i32 16777215, i32 16777215, i32 16777215, i32 16777215, i32 16777215, i32 16777215, i32 16777215, i32 16777215, i32 16777215, i32 16777215, i32 16777215>
+  %tmp7 = icmp ne <16 x i32> %tmp6, zeroinitializer
+  %tmp8 = and <16 x i1> %tmp7, %tmp5
+  %tmp9 = select fast <16 x i1> %tmp8, <16 x float> <float 0xFFF0000000000000, float 0xFFF0000000000000, float 0xFFF0000000000000, float 0xFFF0000000000000, float 0xFFF0000000000000, float 0xFFF0000000000000, float 0xFFF0000000000000, float 0xFFF0000000000000, float 0xFFF0000000000000, float 0xFFF0000000000000, float 0xFFF0000000000000, float 0xFFF0000000000000, float 0xFFF0000000000000, float 0xFFF0000000000000, float 0xFFF0000000000000, float 0xFFF0000000000000>, <16 x float> %tmp4
+  ret <16 x float> %tmp9
+}
+declare <16 x float> @llvm.fma.v16f32(<16 x float>, <16 x float>, <16 x float>)
+declare <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32>, <4 x i32>)
diff --git a/llvm/test/Transforms/SimplifyCFG/callbr-destinations.ll b/llvm/test/Transforms/SimplifyCFG/callbr-destinations.ll
@@ -0,0 +1,28 @@
+; RUN: opt < %s -simplifycfg -disable-output
+;
+; Test that SimplifyCFG does not cause CallBr instructions to have duplicate
+; destinations, which will cause the verifier to assert.
+
+define void @fun0() {
+entry:
+  callbr void asm sideeffect "", "X"(i8* blockaddress(@fun0, %bb1))
+          to label %bb2 [label %bb1]
+
+bb1:                                              ; preds = %bb
+  ret void
+
+bb2:                                             ; preds = %bb
+  ret void
+}
+
+define void @fun1() {
+entry:
+  callbr void asm sideeffect "", "X"(i8* blockaddress(@fun1, %bb1))
+          to label %bb2 [label %bb1]
+
+bb2:                                             ; preds = %bb
+  ret void
+
+bb1:                                              ; preds = %bb
+  ret void
+}