Skip to content

[DAG] isSplatValue - incorrect handling of undef element masks through binop #134602

@mark-sed

Description

@mark-sed

When working on a miscompile I have extracted this function bellow, which was already processed by LLVM with loop vectorization optimizations and when run in lli it returns 178.
After running it through llvm opt with just -passes=simplifycfg, which only converts this to a single basic block the function returns 0.

Reproducer

target triple = "x86_64-unknown-linux-gnu"

@val = internal global i16 0, align 2

define noundef i32 @test(i32 noundef %0) {
gc.safepoint_poll.exit:                           ; preds = %iter.check, %do_safepoint.i
  %.promoted3 = load atomic i16, ptr @val unordered, align 2
  %3 = trunc i32 %0 to i16
  %4 = sub i16 83, %3
  %reass.sub = sub i16 86, %3
  %broadcast.splatinsert = insertelement <16 x i16> poison, i16 %4, i64 0
  %broadcast.splat = shufflevector <16 x i16> %broadcast.splatinsert, <16 x i16> poison, <16 x i32> zeroinitializer
  %broadcast.splatinsert7 = insertelement <16 x i16> poison, i16 %reass.sub, i64 0
  %broadcast.splat8 = shufflevector <16 x i16> %broadcast.splatinsert7, <16 x i16> poison, <16 x i32> zeroinitializer
  %5 = insertelement <16 x i16> <i16 poison, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, i16 %.promoted3, i64 0
  br label %vector.body

vector.body:                                      ; preds = %gc.safepoint_poll.exit
  %6 = mul <16 x i16> %broadcast.splat, <i16 28, i16 27, i16 27, i16 27, i16 26, i16 26, i16 26, i16 25, i16 25, i16 25, i16 24, i16 24, i16 24, i16 23, i16 23, i16 23>
  %7 = add <16 x i16> %broadcast.splat8, %5
  %8 = add <16 x i16> %7, %6
  %9 = add <16 x i16> %8, <i16 -1134, i16 -1053, i16 -1053, i16 -1053, i16 -975, i16 -975, i16 -975, i16 -900, i16 -900, i16 -900, i16 -828, i16 -828, i16 -828, i16 -759, i16 -759, i16 -759>
  %10 = mul <16 x i16> %broadcast.splat, <i16 22, i16 22, i16 22, i16 21, i16 21, i16 21, i16 20, i16 20, i16 20, i16 19, i16 19, i16 19, i16 18, i16 18, i16 18, i16 17>
  %11 = add <16 x i16> %broadcast.splat8, %9
  %12 = add <16 x i16> %11, %10
  %13 = add <16 x i16> %12, <i16 -693, i16 -693, i16 -693, i16 -630, i16 -630, i16 -630, i16 -570, i16 -570, i16 -570, i16 -513, i16 -513, i16 -513, i16 -459, i16 -459, i16 -459, i16 -408>
  %14 = mul <16 x i16> %broadcast.splat, <i16 17, i16 17, i16 16, i16 16, i16 16, i16 15, i16 15, i16 15, i16 14, i16 14, i16 14, i16 13, i16 13, i16 13, i16 12, i16 12>
  %15 = add <16 x i16> %broadcast.splat8, %13
  %16 = add <16 x i16> %15, %14
  %17 = add <16 x i16> %16, <i16 -408, i16 -408, i16 -360, i16 -360, i16 -360, i16 -315, i16 -315, i16 -315, i16 -273, i16 -273, i16 -273, i16 -234, i16 -234, i16 -234, i16 -198, i16 -198>
  %18 = mul <16 x i16> %broadcast.splat, <i16 12, i16 11, i16 11, i16 11, i16 10, i16 10, i16 10, i16 9, i16 9, i16 9, i16 8, i16 8, i16 8, i16 7, i16 7, i16 7>
  %19 = add <16 x i16> %broadcast.splat8, %17
  %20 = add <16 x i16> %19, %18
  %21 = add <16 x i16> %20, <i16 -198, i16 -165, i16 -165, i16 -165, i16 -135, i16 -135, i16 -135, i16 -108, i16 -108, i16 -108, i16 -84, i16 -84, i16 -84, i16 -63, i16 -63, i16 -63>
  %22 = mul <16 x i16> %broadcast.splat, <i16 6, i16 6, i16 6, i16 5, i16 5, i16 5, i16 4, i16 4, i16 4, i16 3, i16 3, i16 3, i16 2, i16 2, i16 2, i16 1>
  %23 = add <16 x i16> %broadcast.splat8, %21
  %24 = add <16 x i16> %23, %22
  %25 = add <16 x i16> %24, <i16 -45, i16 -45, i16 -45, i16 -30, i16 -30, i16 -30, i16 -18, i16 -18, i16 -18, i16 -9, i16 -9, i16 -9, i16 -3, i16 -3, i16 -3, i16 0>
  %26 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %25)
  %broadcast.splatinsert9 = insertelement <4 x i16> poison, i16 %4, i64 0
  %broadcast.splat10 = shufflevector <4 x i16> %broadcast.splatinsert9, <4 x i16> poison, <4 x i32> zeroinitializer
  %broadcast.splatinsert11 = insertelement <4 x i16> poison, i16 %reass.sub, i64 0
  %broadcast.splat12 = shufflevector <4 x i16> %broadcast.splatinsert11, <4 x i16> poison, <4 x i32> zeroinitializer
  %27 = insertelement <4 x i16> <i16 poison, i16 0, i16 0, i16 0>, i16 %26, i64 0
  br label %vec.epilog.vector.body

vec.epilog.vector.body:                           ; preds = %vector.body
  %28 = mul <4 x i16> %broadcast.splat10, <i16 1, i16 1, i16 0, i16 0>
  %29 = add <4 x i16> %broadcast.splat12, %27
  %30 = add <4 x i16> %29, %28
  %31 = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %30)
  br label %bci_11.preheader

bci_11.preheader:                                 ; preds = %vec.epilog.vector.body
  %32 = add i16 %reass.sub, %31
  store atomic i16 %32, ptr @val unordered, align 2
  %ret_32 = sext i16 %32 to i32
  ret i32 %ret_32
}

define i32 @main() {
    %rval = call i32 @test(i32 0)
    ret i32 %rval
}


; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare i16 @llvm.vector.reduce.add.v16i16(<16 x i16>) #0

; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare i16 @llvm.vector.reduce.add.v4i16(<4 x i16>) #0

attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }

Runner script:

opt -passes=simplifycfg test.ll -S > test_simplify.ll
lli test.ll
echo "           test.ll returned: " $?
lli test_simplify.ll
echo "simplified test.ll returned: " $?

I can see the following output:

           test.ll returned:  178
simplified test.ll returned:  0

And here is the test_simplify.ll:

; ModuleID = 'test.ll'
source_filename = "test.ll"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

@val = internal global i16 0, align 2

define noundef i32 @test(i32 noundef %0) {
gc.safepoint_poll.exit:
  %.promoted3 = load atomic i16, ptr @val unordered, align 2
  %1 = trunc i32 %0 to i16
  %2 = sub i16 83, %1
  %reass.sub = sub i16 86, %1
  %broadcast.splatinsert = insertelement <16 x i16> poison, i16 %2, i64 0
  %broadcast.splat = shufflevector <16 x i16> %broadcast.splatinsert, <16 x i16> poison, <16 x i32> zeroinitializer
  %broadcast.splatinsert7 = insertelement <16 x i16> poison, i16 %reass.sub, i64 0
  %broadcast.splat8 = shufflevector <16 x i16> %broadcast.splatinsert7, <16 x i16> poison, <16 x i32> zeroinitializer
  %3 = insertelement <16 x i16> <i16 poison, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, i16 %.promoted3, i64 0
  %4 = mul <16 x i16> %broadcast.splat, <i16 28, i16 27, i16 27, i16 27, i16 26, i16 26, i16 26, i16 25, i16 25, i16 25, i16 24, i16 24, i16 24, i16 23, i16 23, i16 23>
  %5 = add <16 x i16> %broadcast.splat8, %3
  %6 = add <16 x i16> %5, %4
  %7 = add <16 x i16> %6, <i16 -1134, i16 -1053, i16 -1053, i16 -1053, i16 -975, i16 -975, i16 -975, i16 -900, i16 -900, i16 -900, i16 -828, i16 -828, i16 -828, i16 -759, i16 -759, i16 -759>
  %8 = mul <16 x i16> %broadcast.splat, <i16 22, i16 22, i16 22, i16 21, i16 21, i16 21, i16 20, i16 20, i16 20, i16 19, i16 19, i16 19, i16 18, i16 18, i16 18, i16 17>
  %9 = add <16 x i16> %broadcast.splat8, %7
  %10 = add <16 x i16> %9, %8
  %11 = add <16 x i16> %10, <i16 -693, i16 -693, i16 -693, i16 -630, i16 -630, i16 -630, i16 -570, i16 -570, i16 -570, i16 -513, i16 -513, i16 -513, i16 -459, i16 -459, i16 -459, i16 -408>
  %12 = mul <16 x i16> %broadcast.splat, <i16 17, i16 17, i16 16, i16 16, i16 16, i16 15, i16 15, i16 15, i16 14, i16 14, i16 14, i16 13, i16 13, i16 13, i16 12, i16 12>
  %13 = add <16 x i16> %broadcast.splat8, %11
  %14 = add <16 x i16> %13, %12
  %15 = add <16 x i16> %14, <i16 -408, i16 -408, i16 -360, i16 -360, i16 -360, i16 -315, i16 -315, i16 -315, i16 -273, i16 -273, i16 -273, i16 -234, i16 -234, i16 -234, i16 -198, i16 -198>
  %16 = mul <16 x i16> %broadcast.splat, <i16 12, i16 11, i16 11, i16 11, i16 10, i16 10, i16 10, i16 9, i16 9, i16 9, i16 8, i16 8, i16 8, i16 7, i16 7, i16 7>
  %17 = add <16 x i16> %broadcast.splat8, %15
  %18 = add <16 x i16> %17, %16
  %19 = add <16 x i16> %18, <i16 -198, i16 -165, i16 -165, i16 -165, i16 -135, i16 -135, i16 -135, i16 -108, i16 -108, i16 -108, i16 -84, i16 -84, i16 -84, i16 -63, i16 -63, i16 -63>
  %20 = mul <16 x i16> %broadcast.splat, <i16 6, i16 6, i16 6, i16 5, i16 5, i16 5, i16 4, i16 4, i16 4, i16 3, i16 3, i16 3, i16 2, i16 2, i16 2, i16 1>
  %21 = add <16 x i16> %broadcast.splat8, %19
  %22 = add <16 x i16> %21, %20
  %23 = add <16 x i16> %22, <i16 -45, i16 -45, i16 -45, i16 -30, i16 -30, i16 -30, i16 -18, i16 -18, i16 -18, i16 -9, i16 -9, i16 -9, i16 -3, i16 -3, i16 -3, i16 0>
  %24 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %23)
  %broadcast.splatinsert9 = insertelement <4 x i16> poison, i16 %2, i64 0
  %broadcast.splat10 = shufflevector <4 x i16> %broadcast.splatinsert9, <4 x i16> poison, <4 x i32> zeroinitializer
  %broadcast.splatinsert11 = insertelement <4 x i16> poison, i16 %reass.sub, i64 0
  %broadcast.splat12 = shufflevector <4 x i16> %broadcast.splatinsert11, <4 x i16> poison, <4 x i32> zeroinitializer
  %25 = insertelement <4 x i16> <i16 poison, i16 0, i16 0, i16 0>, i16 %24, i64 0
  %26 = mul <4 x i16> %broadcast.splat10, <i16 1, i16 1, i16 0, i16 0>
  %27 = add <4 x i16> %broadcast.splat12, %25
  %28 = add <4 x i16> %27, %26
  %29 = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %28)
  %30 = add i16 %reass.sub, %29
  store atomic i16 %30, ptr @val unordered, align 2
  %ret_32 = sext i16 %30 to i32
  ret i32 %ret_32
}

define i32 @main() {
  %rval = call i32 @test(i32 0)
  ret i32 %rval
}

; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare i16 @llvm.vector.reduce.add.v16i16(<16 x i16>) #0

; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare i16 @llvm.vector.reduce.add.v4i16(<4 x i16>) #0

attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }

Metadata

Metadata

Assignees

Type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions