From 4d47f0b42f5112e111c93395488b0afa7d5027e5 Mon Sep 17 00:00:00 2001 From: Thurston Dang Date: Thu, 31 Oct 2024 20:33:16 +0000 Subject: [PATCH 1/5] [msan] Add test for Arm NEON tbl intrinsics Arm NEON tbl intrinsics are currently not instrumented properly by MSan. This test will help track progress. --- .../MemorySanitizer/AArch64/neon_tbl.ll | 509 ++++++++++++++++++ 1 file changed, 509 insertions(+) create mode 100644 llvm/test/Instrumentation/MemorySanitizer/AArch64/neon_tbl.ll diff --git a/llvm/test/Instrumentation/MemorySanitizer/AArch64/neon_tbl.ll b/llvm/test/Instrumentation/MemorySanitizer/AArch64/neon_tbl.ll new file mode 100644 index 0000000000000..42f79fbf02748 --- /dev/null +++ b/llvm/test/Instrumentation/MemorySanitizer/AArch64/neon_tbl.ll @@ -0,0 +1,509 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --tool build/bin/opt --version 2 +; Test memory sanitizer instrumentation for Arm NEON tbl instructions. +; +; RUN: opt < %s -passes=msan -S | FileCheck %s +; +; Forked from llvm/test/CodeGen/AArch64/arm64-tbl.ll + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64--linux-android9001" + +; ----------------------------------------------------------------------------------------------------------------------------------------------- + +define <8 x i8> @tbl1_8b(<16 x i8> %A, <8 x i8> %B) nounwind { +; CHECK-LABEL: define <8 x i8> @tbl1_8b +; CHECK-SAME: (<16 x i8> [[A:%.*]], <8 x i8> [[B:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[OUT:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[A]], <8 x i8> [[B]]) +; CHECK-NEXT: store <8 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i8> [[OUT]] +; + %out = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> %A, <8 x i8> %B) + ret <8 x i8> %out +} + +define <16 x i8> @tbl1_16b(<16 x i8> %A, <16 x i8> %B) nounwind { +; CHECK-LABEL: define <16 x i8> @tbl1_16b +; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[OUT:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> [[A]], <16 x i8> [[B]]) +; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i8> [[OUT]] +; + %out = call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> %A, <16 x i8> %B) + ret <16 x i8> %out +} + +define <8 x i8> @tbl2_8b(<16 x i8> %A, <16 x i8> %B, <8 x i8> %C) { +; CHECK-LABEL: define <8 x i8> @tbl2_8b +; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <8 x i8> [[C:%.*]]) { +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[OUT:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[A]], <16 x i8> [[B]], <8 x i8> [[C]]) +; CHECK-NEXT: store <8 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i8> [[OUT]] +; + %out = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %A, <16 x i8> %B, <8 x i8> %C) + ret <8 x i8> %out +} + +define <16 x i8> @tbl2_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) { +; CHECK-LABEL: define <16 x i8> @tbl2_16b +; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]]) { +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[OUT:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]]) +; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i8> [[OUT]] +; + %out = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) + ret <16 x i8> %out +} + +define <8 x i8> @tbl3_8b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D) { +; CHECK-LABEL: define <8 x i8> @tbl3_8b +; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <8 x i8> [[D:%.*]]) { +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[OUT:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]], <8 x i8> [[D]]) +; CHECK-NEXT: store <8 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i8> [[OUT]] +; + %out = call <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D) + ret <8 x i8> %out +} + +define <16 x i8> @tbl3_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) { +; CHECK-LABEL: define <16 x i8> @tbl3_16b +; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]]) { +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[OUT:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]], <16 x i8> [[D]]) +; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i8> [[OUT]] +; + %out = call <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) + ret <16 x i8> %out +} + +define <8 x i8> @tbl4_8b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E) { +; CHECK-LABEL: define <8 x i8> @tbl4_8b +; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], <8 x i8> [[E:%.*]]) { +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[OUT:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]], <16 x i8> [[D]], <8 x i8> [[E]]) +; CHECK-NEXT: store <8 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i8> [[OUT]] +; + %out = call <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E) + ret <8 x i8> %out +} + +define <16 x i8> @tbl4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) { +; CHECK-LABEL: define <16 x i8> @tbl4_16b +; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], <16 x i8> [[E:%.*]]) { +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[OUT:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> [[E]]) +; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i8> [[OUT]] +; + %out = call <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) + ret <16 x i8> %out +} + + + +define <8 x i8> @shuffled_tbl2_to_tbl4_v8i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) { +; CHECK-LABEL: define <8 x i8> @shuffled_tbl2_to_tbl4_v8i8 +; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]]) { +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[T1:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[A]], <16 x i8> [[B]], <8 x i8> ) +; CHECK-NEXT: [[T2:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[C]], <16 x i8> [[D]], <8 x i8> ) +; CHECK-NEXT: [[S:%.*]] = shufflevector <8 x i8> [[T1]], <8 x i8> [[T2]], <8 x i32> +; CHECK-NEXT: store <8 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i8> [[S]] +; + %t1 = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %a, <16 x i8> %b, <8 x i8> ) + %t2 = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %c, <16 x i8> %d, <8 x i8> ) + %s = shufflevector <8 x i8> %t1, <8 x i8> %t2, <8 x i32> + ret <8 x i8> %s +} + + + +define <16 x i8> @shuffled_tbl2_to_tbl4(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) { +; CHECK-LABEL: define <16 x i8> @shuffled_tbl2_to_tbl4 +; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]]) { +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[T1:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> ) +; CHECK-NEXT: [[T2:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> ) +; CHECK-NEXT: [[S:%.*]] = shufflevector <16 x i8> [[T1]], <16 x i8> [[T2]], <16 x i32> +; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i8> [[S]] +; + %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> ) + %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> ) + %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> + ret <16 x i8> %s +} + + +define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_first_mask(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d, i8 %v) { +; CHECK-LABEL: define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_first_mask +; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], i8 [[V:%.*]]) { +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[INS_0:%.*]] = insertelement <16 x i8> poison, i8 [[V]], i32 0 +; CHECK-NEXT: [[INS_1:%.*]] = insertelement <16 x i8> [[INS_0]], i8 [[V]], i32 1 +; CHECK-NEXT: [[INS_2:%.*]] = insertelement <16 x i8> [[INS_1]], i8 [[V]], i32 2 +; CHECK-NEXT: [[INS_3:%.*]] = insertelement <16 x i8> [[INS_2]], i8 [[V]], i32 3 +; CHECK-NEXT: [[INS_4:%.*]] = insertelement <16 x i8> [[INS_3]], i8 [[V]], i32 4 +; CHECK-NEXT: [[INS_5:%.*]] = insertelement <16 x i8> [[INS_4]], i8 [[V]], i32 5 +; CHECK-NEXT: [[INS_6:%.*]] = insertelement <16 x i8> [[INS_5]], i8 [[V]], i32 6 +; CHECK-NEXT: [[INS_7:%.*]] = insertelement <16 x i8> [[INS_6]], i8 [[V]], i32 7 +; CHECK-NEXT: [[INS_8:%.*]] = insertelement <16 x i8> [[INS_7]], i8 -1, i32 8 +; CHECK-NEXT: [[INS_9:%.*]] = insertelement <16 x i8> [[INS_8]], i8 -1, i32 9 +; CHECK-NEXT: [[INS_10:%.*]] = insertelement <16 x i8> [[INS_9]], i8 -1, i32 10 +; CHECK-NEXT: [[INS_11:%.*]] = insertelement <16 x i8> [[INS_10]], i8 -1, i32 11 +; CHECK-NEXT: [[INS_12:%.*]] = insertelement <16 x i8> [[INS_11]], i8 -1, i32 12 +; CHECK-NEXT: [[INS_13:%.*]] = insertelement <16 x i8> [[INS_12]], i8 -1, i32 13 +; CHECK-NEXT: [[INS_14:%.*]] = insertelement <16 x i8> [[INS_13]], i8 -1, i32 14 +; CHECK-NEXT: [[INS_15:%.*]] = insertelement <16 x i8> [[INS_14]], i8 -1, i32 15 +; CHECK-NEXT: [[T1:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[INS_15]]) +; CHECK-NEXT: [[T2:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> ) +; CHECK-NEXT: [[S:%.*]] = shufflevector <16 x i8> [[T1]], <16 x i8> [[T2]], <16 x i32> +; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i8> [[S]] +; + %ins.0 = insertelement <16 x i8> poison, i8 %v, i32 0 + %ins.1 = insertelement <16 x i8> %ins.0, i8 %v, i32 1 + %ins.2 = insertelement <16 x i8> %ins.1, i8 %v, i32 2 + %ins.3 = insertelement <16 x i8> %ins.2, i8 %v, i32 3 + %ins.4 = insertelement <16 x i8> %ins.3, i8 %v, i32 4 + %ins.5 = insertelement <16 x i8> %ins.4, i8 %v, i32 5 + %ins.6 = insertelement <16 x i8> %ins.5, i8 %v, i32 6 + %ins.7 = insertelement <16 x i8> %ins.6, i8 %v, i32 7 + %ins.8 = insertelement <16 x i8> %ins.7, i8 -1, i32 8 + %ins.9 = insertelement <16 x i8> %ins.8, i8 -1, i32 9 + %ins.10 = insertelement <16 x i8> %ins.9, i8 -1, i32 10 + %ins.11 = insertelement <16 x i8> %ins.10, i8 -1, i32 11 + %ins.12 = insertelement <16 x i8> %ins.11, i8 -1, i32 12 + %ins.13 = insertelement <16 x i8> %ins.12, i8 -1, i32 13 + %ins.14 = insertelement <16 x i8> %ins.13, i8 -1, i32 14 + %ins.15 = insertelement <16 x i8> %ins.14, i8 -1, i32 15 + %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %ins.15) + %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> ) + %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> + ret <16 x i8> %s +} + + +define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_first_mask2(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d, i8 %v) { +; CHECK-LABEL: define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_first_mask2 +; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], i8 [[V:%.*]]) { +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[INS_0:%.*]] = insertelement <16 x i8> poison, i8 1, i32 0 +; CHECK-NEXT: [[INS_1:%.*]] = insertelement <16 x i8> [[INS_0]], i8 1, i32 1 +; CHECK-NEXT: [[INS_2:%.*]] = insertelement <16 x i8> [[INS_1]], i8 1, i32 2 +; CHECK-NEXT: [[INS_3:%.*]] = insertelement <16 x i8> [[INS_2]], i8 1, i32 3 +; CHECK-NEXT: [[INS_4:%.*]] = insertelement <16 x i8> [[INS_3]], i8 1, i32 4 +; CHECK-NEXT: [[INS_5:%.*]] = insertelement <16 x i8> [[INS_4]], i8 1, i32 5 +; CHECK-NEXT: [[INS_6:%.*]] = insertelement <16 x i8> [[INS_5]], i8 1, i32 6 +; CHECK-NEXT: [[INS_7:%.*]] = insertelement <16 x i8> [[INS_6]], i8 1, i32 7 +; CHECK-NEXT: [[INS_8:%.*]] = insertelement <16 x i8> [[INS_7]], i8 -1, i32 8 +; CHECK-NEXT: [[INS_9:%.*]] = insertelement <16 x i8> [[INS_8]], i8 -1, i32 9 +; CHECK-NEXT: [[INS_10:%.*]] = insertelement <16 x i8> [[INS_9]], i8 -1, i32 10 +; CHECK-NEXT: [[INS_11:%.*]] = insertelement <16 x i8> [[INS_10]], i8 -1, i32 11 +; CHECK-NEXT: [[INS_12:%.*]] = insertelement <16 x i8> [[INS_11]], i8 [[V]], i32 12 +; CHECK-NEXT: [[INS_13:%.*]] = insertelement <16 x i8> [[INS_12]], i8 [[V]], i32 13 +; CHECK-NEXT: [[INS_14:%.*]] = insertelement <16 x i8> [[INS_13]], i8 -1, i32 14 +; CHECK-NEXT: [[INS_15:%.*]] = insertelement <16 x i8> [[INS_14]], i8 [[V]], i32 15 +; CHECK-NEXT: [[T1:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[INS_15]]) +; CHECK-NEXT: [[T2:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> ) +; CHECK-NEXT: [[S:%.*]] = shufflevector <16 x i8> [[T1]], <16 x i8> [[T2]], <16 x i32> +; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i8> [[S]] +; + %ins.0 = insertelement <16 x i8> poison, i8 1, i32 0 + %ins.1 = insertelement <16 x i8> %ins.0, i8 1, i32 1 + %ins.2 = insertelement <16 x i8> %ins.1, i8 1, i32 2 + %ins.3 = insertelement <16 x i8> %ins.2, i8 1, i32 3 + %ins.4 = insertelement <16 x i8> %ins.3, i8 1, i32 4 + %ins.5 = insertelement <16 x i8> %ins.4, i8 1, i32 5 + %ins.6 = insertelement <16 x i8> %ins.5, i8 1, i32 6 + %ins.7 = insertelement <16 x i8> %ins.6, i8 1, i32 7 + %ins.8 = insertelement <16 x i8> %ins.7, i8 -1, i32 8 + %ins.9 = insertelement <16 x i8> %ins.8, i8 -1, i32 9 + %ins.10 = insertelement <16 x i8> %ins.9, i8 -1, i32 10 + %ins.11 = insertelement <16 x i8> %ins.10, i8 -1, i32 11 + %ins.12 = insertelement <16 x i8> %ins.11, i8 %v, i32 12 + %ins.13 = insertelement <16 x i8> %ins.12, i8 %v, i32 13 + %ins.14 = insertelement <16 x i8> %ins.13, i8 -1, i32 14 + %ins.15 = insertelement <16 x i8> %ins.14, i8 %v, i32 15 + %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %ins.15) + %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> ) + %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> + ret <16 x i8> %s +} + + + +define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_second_mask(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d, i8 %v) { +; CHECK-LABEL: define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_second_mask +; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], i8 [[V:%.*]]) { +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[INS_0:%.*]] = insertelement <16 x i8> poison, i8 [[V]], i32 0 +; CHECK-NEXT: [[INS_1:%.*]] = insertelement <16 x i8> [[INS_0]], i8 [[V]], i32 1 +; CHECK-NEXT: [[INS_2:%.*]] = insertelement <16 x i8> [[INS_1]], i8 [[V]], i32 2 +; CHECK-NEXT: [[INS_3:%.*]] = insertelement <16 x i8> [[INS_2]], i8 [[V]], i32 3 +; CHECK-NEXT: [[INS_4:%.*]] = insertelement <16 x i8> [[INS_3]], i8 [[V]], i32 4 +; CHECK-NEXT: [[INS_5:%.*]] = insertelement <16 x i8> [[INS_4]], i8 [[V]], i32 5 +; CHECK-NEXT: [[INS_6:%.*]] = insertelement <16 x i8> [[INS_5]], i8 [[V]], i32 6 +; CHECK-NEXT: [[INS_7:%.*]] = insertelement <16 x i8> [[INS_6]], i8 [[V]], i32 7 +; CHECK-NEXT: [[INS_8:%.*]] = insertelement <16 x i8> [[INS_7]], i8 -1, i32 8 +; CHECK-NEXT: [[INS_9:%.*]] = insertelement <16 x i8> [[INS_8]], i8 -1, i32 9 +; CHECK-NEXT: [[INS_10:%.*]] = insertelement <16 x i8> [[INS_9]], i8 -1, i32 10 +; CHECK-NEXT: [[INS_11:%.*]] = insertelement <16 x i8> [[INS_10]], i8 -1, i32 11 +; CHECK-NEXT: [[INS_12:%.*]] = insertelement <16 x i8> [[INS_11]], i8 -1, i32 12 +; CHECK-NEXT: [[INS_13:%.*]] = insertelement <16 x i8> [[INS_12]], i8 -1, i32 13 +; CHECK-NEXT: [[INS_14:%.*]] = insertelement <16 x i8> [[INS_13]], i8 -1, i32 14 +; CHECK-NEXT: [[INS_15:%.*]] = insertelement <16 x i8> [[INS_14]], i8 -1, i32 15 +; CHECK-NEXT: [[T1:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> ) +; CHECK-NEXT: [[T2:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[INS_15]]) +; CHECK-NEXT: [[S:%.*]] = shufflevector <16 x i8> [[T1]], <16 x i8> [[T2]], <16 x i32> +; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i8> [[S]] +; + %ins.0 = insertelement <16 x i8> poison, i8 %v, i32 0 + %ins.1 = insertelement <16 x i8> %ins.0, i8 %v, i32 1 + %ins.2 = insertelement <16 x i8> %ins.1, i8 %v, i32 2 + %ins.3 = insertelement <16 x i8> %ins.2, i8 %v, i32 3 + %ins.4 = insertelement <16 x i8> %ins.3, i8 %v, i32 4 + %ins.5 = insertelement <16 x i8> %ins.4, i8 %v, i32 5 + %ins.6 = insertelement <16 x i8> %ins.5, i8 %v, i32 6 + %ins.7 = insertelement <16 x i8> %ins.6, i8 %v, i32 7 + %ins.8 = insertelement <16 x i8> %ins.7, i8 -1, i32 8 + %ins.9 = insertelement <16 x i8> %ins.8, i8 -1, i32 9 + %ins.10 = insertelement <16 x i8> %ins.9, i8 -1, i32 10 + %ins.11 = insertelement <16 x i8> %ins.10, i8 -1, i32 11 + %ins.12 = insertelement <16 x i8> %ins.11, i8 -1, i32 12 + %ins.13 = insertelement <16 x i8> %ins.12, i8 -1, i32 13 + %ins.14 = insertelement <16 x i8> %ins.13, i8 -1, i32 14 + %ins.15 = insertelement <16 x i8> %ins.14, i8 -1, i32 15 + %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> ) + %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %ins.15) + %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> + ret <16 x i8> %s +} + + + +define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_second_mask2(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d, i8 %v) { +; CHECK-LABEL: define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_second_mask2 +; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], i8 [[V:%.*]]) { +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[INS_0:%.*]] = insertelement <16 x i8> poison, i8 [[V]], i32 0 +; CHECK-NEXT: [[INS_1:%.*]] = insertelement <16 x i8> [[INS_0]], i8 [[V]], i32 1 +; CHECK-NEXT: [[INS_2:%.*]] = insertelement <16 x i8> [[INS_1]], i8 [[V]], i32 2 +; CHECK-NEXT: [[INS_3:%.*]] = insertelement <16 x i8> [[INS_2]], i8 [[V]], i32 3 +; CHECK-NEXT: [[INS_4:%.*]] = insertelement <16 x i8> [[INS_3]], i8 [[V]], i32 4 +; CHECK-NEXT: [[INS_5:%.*]] = insertelement <16 x i8> [[INS_4]], i8 [[V]], i32 5 +; CHECK-NEXT: [[INS_6:%.*]] = insertelement <16 x i8> [[INS_5]], i8 [[V]], i32 6 +; CHECK-NEXT: [[INS_7:%.*]] = insertelement <16 x i8> [[INS_6]], i8 [[V]], i32 7 +; CHECK-NEXT: [[INS_8:%.*]] = insertelement <16 x i8> [[INS_7]], i8 -1, i32 8 +; CHECK-NEXT: [[INS_9:%.*]] = insertelement <16 x i8> [[INS_8]], i8 -1, i32 9 +; CHECK-NEXT: [[INS_10:%.*]] = insertelement <16 x i8> [[INS_9]], i8 -1, i32 10 +; CHECK-NEXT: [[INS_11:%.*]] = insertelement <16 x i8> [[INS_10]], i8 -1, i32 11 +; CHECK-NEXT: [[INS_12:%.*]] = insertelement <16 x i8> [[INS_11]], i8 -1, i32 12 +; CHECK-NEXT: [[INS_13:%.*]] = insertelement <16 x i8> [[INS_12]], i8 -1, i32 13 +; CHECK-NEXT: [[INS_14:%.*]] = insertelement <16 x i8> [[INS_13]], i8 [[V]], i32 14 +; CHECK-NEXT: [[INS_15:%.*]] = insertelement <16 x i8> [[INS_14]], i8 [[V]], i32 15 +; CHECK-NEXT: [[T1:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> ) +; CHECK-NEXT: [[T2:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[INS_15]]) +; CHECK-NEXT: [[S:%.*]] = shufflevector <16 x i8> [[T1]], <16 x i8> [[T2]], <16 x i32> +; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i8> [[S]] +; + %ins.0 = insertelement <16 x i8> poison, i8 %v, i32 0 + %ins.1 = insertelement <16 x i8> %ins.0, i8 %v, i32 1 + %ins.2 = insertelement <16 x i8> %ins.1, i8 %v, i32 2 + %ins.3 = insertelement <16 x i8> %ins.2, i8 %v, i32 3 + %ins.4 = insertelement <16 x i8> %ins.3, i8 %v, i32 4 + %ins.5 = insertelement <16 x i8> %ins.4, i8 %v, i32 5 + %ins.6 = insertelement <16 x i8> %ins.5, i8 %v, i32 6 + %ins.7 = insertelement <16 x i8> %ins.6, i8 %v, i32 7 + %ins.8 = insertelement <16 x i8> %ins.7, i8 -1, i32 8 + %ins.9 = insertelement <16 x i8> %ins.8, i8 -1, i32 9 + %ins.10 = insertelement <16 x i8> %ins.9, i8 -1, i32 10 + %ins.11 = insertelement <16 x i8> %ins.10, i8 -1, i32 11 + %ins.12 = insertelement <16 x i8> %ins.11, i8 -1, i32 12 + %ins.13 = insertelement <16 x i8> %ins.12, i8 -1, i32 13 + %ins.14 = insertelement <16 x i8> %ins.13, i8 %v, i32 14 + %ins.15 = insertelement <16 x i8> %ins.14, i8 %v, i32 15 + %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> ) + %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %ins.15) + %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> + ret <16 x i8> %s +} + + + +define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_shuffle(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) { +; CHECK-LABEL: define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_shuffle +; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]]) { +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[T1:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> ) +; CHECK-NEXT: [[T2:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> ) +; CHECK-NEXT: [[S:%.*]] = shufflevector <16 x i8> [[T1]], <16 x i8> [[T2]], <16 x i32> +; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i8> [[S]] +; + %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> ) + %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> ) + %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> + ret <16 x i8> %s +} + + + +define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_tbl2_mask1(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) { +; CHECK-LABEL: define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_tbl2_mask1 +; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]]) { +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[T1:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> ) +; CHECK-NEXT: [[T2:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> ) +; CHECK-NEXT: [[S:%.*]] = shufflevector <16 x i8> [[T1]], <16 x i8> [[T2]], <16 x i32> +; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i8> [[S]] +; + %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> ) + %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> ) + %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> + ret <16 x i8> %s +} + + + +define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_tbl2_mask2(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) { +; CHECK-LABEL: define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_tbl2_mask2 +; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]]) { +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[T1:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> ) +; CHECK-NEXT: [[T2:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> ) +; CHECK-NEXT: [[S:%.*]] = shufflevector <16 x i8> [[T1]], <16 x i8> [[T2]], <16 x i32> +; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i8> [[S]] +; + %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> ) + %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> ) + %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> + ret <16 x i8> %s +} + +declare <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8>, <8 x i8>) nounwind readnone +declare <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8>, <16 x i8>) nounwind readnone +declare <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone +declare <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone +declare <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone +declare <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone +declare <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone +declare <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone + +define <8 x i8> @tbx1_8b(<8 x i8> %A, <16 x i8> %B, <8 x i8> %C) nounwind { +; CHECK-LABEL: define <8 x i8> @tbx1_8b +; CHECK-SAME: (<8 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <8 x i8> [[C:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[OUT:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> [[A]], <16 x i8> [[B]], <8 x i8> [[C]]) +; CHECK-NEXT: store <8 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i8> [[OUT]] +; + %out = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> %A, <16 x i8> %B, <8 x i8> %C) + ret <8 x i8> %out +} + +define <16 x i8> @tbx1_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) nounwind { +; CHECK-LABEL: define <16 x i8> @tbx1_16b +; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[OUT:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]]) +; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i8> [[OUT]] +; + %out = call <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) + ret <16 x i8> %out +} + +define <8 x i8> @tbx2_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D) { +; CHECK-LABEL: define <8 x i8> @tbx2_8b +; CHECK-SAME: (<8 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <8 x i8> [[D:%.*]]) { +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[OUT:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]], <8 x i8> [[D]]) +; CHECK-NEXT: store <8 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i8> [[OUT]] +; + %out = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D) + ret <8 x i8> %out +} + +define <16 x i8> @tbx2_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) { +; CHECK-LABEL: define <16 x i8> @tbx2_16b +; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]]) { +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[OUT:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]], <16 x i8> [[D]]) +; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i8> [[OUT]] +; + %out = call <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) + ret <16 x i8> %out +} + +define <8 x i8> @tbx3_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E) { +; CHECK-LABEL: define <8 x i8> @tbx3_8b +; CHECK-SAME: (<8 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], <8 x i8> [[E:%.*]]) { +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[OUT:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(<8 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]], <16 x i8> [[D]], <8 x i8> [[E]]) +; CHECK-NEXT: store <8 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i8> [[OUT]] +; + %out = call <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(< 8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E) + ret <8 x i8> %out +} + +define <16 x i8> @tbx3_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) { +; CHECK-LABEL: define <16 x i8> @tbx3_16b +; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], <16 x i8> [[E:%.*]]) { +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[OUT:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> [[E]]) +; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i8> [[OUT]] +; + %out = call <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) + ret <16 x i8> %out +} + +define <8 x i8> @tbx4_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <8 x i8> %F) { +; CHECK-LABEL: define <8 x i8> @tbx4_8b +; CHECK-SAME: (<8 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], <16 x i8> [[E:%.*]], <8 x i8> [[F:%.*]]) { +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[OUT:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> [[E]], <8 x i8> [[F]]) +; CHECK-NEXT: store <8 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i8> [[OUT]] +; + %out = call <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <8 x i8> %F) + ret <8 x i8> %out +} + +define <16 x i8> @tbx4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <16 x i8> %F) { +; CHECK-LABEL: define <16 x i8> @tbx4_16b +; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], <16 x i8> [[E:%.*]], <16 x i8> [[F:%.*]]) { +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[OUT:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> [[E]], <16 x i8> [[F]]) +; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i8> [[OUT]] +; + %out = call <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <16 x i8> %F) + ret <16 x i8> %out +} + +declare <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8>, <16 x i8>, <8 x i8>) nounwind readnone +declare <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone +declare <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone +declare <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone +declare <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(<8 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone +declare <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone +declare <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone +declare <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone From 1cb3f14fa11c217b3b6cddcb3a94d42a73d69daf Mon Sep 17 00:00:00 2001 From: Thurston Dang Date: Thu, 31 Oct 2024 22:34:42 +0000 Subject: [PATCH 2/5] Add origins-tracking test --- .../AArch64/neon_tbl_origins.ll | 534 ++++++++++++++++++ 1 file changed, 534 insertions(+) create mode 100644 llvm/test/Instrumentation/MemorySanitizer/AArch64/neon_tbl_origins.ll diff --git a/llvm/test/Instrumentation/MemorySanitizer/AArch64/neon_tbl_origins.ll b/llvm/test/Instrumentation/MemorySanitizer/AArch64/neon_tbl_origins.ll new file mode 100644 index 0000000000000..44c0a278de96f --- /dev/null +++ b/llvm/test/Instrumentation/MemorySanitizer/AArch64/neon_tbl_origins.ll @@ -0,0 +1,534 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --tool build/bin/opt --version 2 +; Test memory sanitizer instrumentation for Arm NEON tbl instructions. +; +; RUN: build/bin/opt < %s -passes=msan -msan-track-origins=2 -S | FileCheck %s +; +; Forked from llvm/test/CodeGen/AArch64/arm64-tbl.ll + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64--linux-android9001" + +; ----------------------------------------------------------------------------------------------------------------------------------------------- + +define <8 x i8> @tbl1_8b(<16 x i8> %A, <8 x i8> %B) nounwind { +; CHECK-LABEL: define <8 x i8> @tbl1_8b +; CHECK-SAME: (<16 x i8> [[A:%.*]], <8 x i8> [[B:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[OUT:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[A]], <8 x i8> [[B]]) +; CHECK-NEXT: store <8 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store i32 0, ptr @__msan_retval_origin_tls, align 4 +; CHECK-NEXT: ret <8 x i8> [[OUT]] +; + %out = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> %A, <8 x i8> %B) + ret <8 x i8> %out +} + +define <16 x i8> @tbl1_16b(<16 x i8> %A, <16 x i8> %B) nounwind { +; CHECK-LABEL: define <16 x i8> @tbl1_16b +; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[OUT:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> [[A]], <16 x i8> [[B]]) +; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store i32 0, ptr @__msan_retval_origin_tls, align 4 +; CHECK-NEXT: ret <16 x i8> [[OUT]] +; + %out = call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> %A, <16 x i8> %B) + ret <16 x i8> %out +} + +define <8 x i8> @tbl2_8b(<16 x i8> %A, <16 x i8> %B, <8 x i8> %C) { +; CHECK-LABEL: define <8 x i8> @tbl2_8b +; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <8 x i8> [[C:%.*]]) { +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[OUT:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[A]], <16 x i8> [[B]], <8 x i8> [[C]]) +; CHECK-NEXT: store <8 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store i32 0, ptr @__msan_retval_origin_tls, align 4 +; CHECK-NEXT: ret <8 x i8> [[OUT]] +; + %out = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %A, <16 x i8> %B, <8 x i8> %C) + ret <8 x i8> %out +} + +define <16 x i8> @tbl2_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) { +; CHECK-LABEL: define <16 x i8> @tbl2_16b +; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]]) { +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[OUT:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]]) +; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store i32 0, ptr @__msan_retval_origin_tls, align 4 +; CHECK-NEXT: ret <16 x i8> [[OUT]] +; + %out = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) + ret <16 x i8> %out +} + +define <8 x i8> @tbl3_8b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D) { +; CHECK-LABEL: define <8 x i8> @tbl3_8b +; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <8 x i8> [[D:%.*]]) { +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[OUT:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]], <8 x i8> [[D]]) +; CHECK-NEXT: store <8 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store i32 0, ptr @__msan_retval_origin_tls, align 4 +; CHECK-NEXT: ret <8 x i8> [[OUT]] +; + %out = call <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D) + ret <8 x i8> %out +} + +define <16 x i8> @tbl3_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) { +; CHECK-LABEL: define <16 x i8> @tbl3_16b +; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]]) { +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[OUT:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]], <16 x i8> [[D]]) +; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store i32 0, ptr @__msan_retval_origin_tls, align 4 +; CHECK-NEXT: ret <16 x i8> [[OUT]] +; + %out = call <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) + ret <16 x i8> %out +} + +define <8 x i8> @tbl4_8b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E) { +; CHECK-LABEL: define <8 x i8> @tbl4_8b +; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], <8 x i8> [[E:%.*]]) { +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[OUT:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]], <16 x i8> [[D]], <8 x i8> [[E]]) +; CHECK-NEXT: store <8 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store i32 0, ptr @__msan_retval_origin_tls, align 4 +; CHECK-NEXT: ret <8 x i8> [[OUT]] +; + %out = call <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E) + ret <8 x i8> %out +} + +define <16 x i8> @tbl4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) { +; CHECK-LABEL: define <16 x i8> @tbl4_16b +; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], <16 x i8> [[E:%.*]]) { +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[OUT:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> [[E]]) +; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store i32 0, ptr @__msan_retval_origin_tls, align 4 +; CHECK-NEXT: ret <16 x i8> [[OUT]] +; + %out = call <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) + ret <16 x i8> %out +} + + + +define <8 x i8> @shuffled_tbl2_to_tbl4_v8i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) { +; CHECK-LABEL: define <8 x i8> @shuffled_tbl2_to_tbl4_v8i8 +; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]]) { +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[T1:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[A]], <16 x i8> [[B]], <8 x i8> ) +; CHECK-NEXT: [[T2:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[C]], <16 x i8> [[D]], <8 x i8> ) +; CHECK-NEXT: [[S:%.*]] = shufflevector <8 x i8> [[T1]], <8 x i8> [[T2]], <8 x i32> +; CHECK-NEXT: store <8 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store i32 0, ptr @__msan_retval_origin_tls, align 4 +; CHECK-NEXT: ret <8 x i8> [[S]] +; + %t1 = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %a, <16 x i8> %b, <8 x i8> ) + %t2 = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %c, <16 x i8> %d, <8 x i8> ) + %s = shufflevector <8 x i8> %t1, <8 x i8> %t2, <8 x i32> + ret <8 x i8> %s +} + + + +define <16 x i8> @shuffled_tbl2_to_tbl4(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) { +; CHECK-LABEL: define <16 x i8> @shuffled_tbl2_to_tbl4 +; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]]) { +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[T1:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> ) +; CHECK-NEXT: [[T2:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> ) +; CHECK-NEXT: [[S:%.*]] = shufflevector <16 x i8> [[T1]], <16 x i8> [[T2]], <16 x i32> +; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store i32 0, ptr @__msan_retval_origin_tls, align 4 +; CHECK-NEXT: ret <16 x i8> [[S]] +; + %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> ) + %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> ) + %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> + ret <16 x i8> %s +} + + +define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_first_mask(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d, i8 %v) { +; CHECK-LABEL: define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_first_mask +; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], i8 [[V:%.*]]) { +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[INS_0:%.*]] = insertelement <16 x i8> poison, i8 [[V]], i32 0 +; CHECK-NEXT: [[INS_1:%.*]] = insertelement <16 x i8> [[INS_0]], i8 [[V]], i32 1 +; CHECK-NEXT: [[INS_2:%.*]] = insertelement <16 x i8> [[INS_1]], i8 [[V]], i32 2 +; CHECK-NEXT: [[INS_3:%.*]] = insertelement <16 x i8> [[INS_2]], i8 [[V]], i32 3 +; CHECK-NEXT: [[INS_4:%.*]] = insertelement <16 x i8> [[INS_3]], i8 [[V]], i32 4 +; CHECK-NEXT: [[INS_5:%.*]] = insertelement <16 x i8> [[INS_4]], i8 [[V]], i32 5 +; CHECK-NEXT: [[INS_6:%.*]] = insertelement <16 x i8> [[INS_5]], i8 [[V]], i32 6 +; CHECK-NEXT: [[INS_7:%.*]] = insertelement <16 x i8> [[INS_6]], i8 [[V]], i32 7 +; CHECK-NEXT: [[INS_8:%.*]] = insertelement <16 x i8> [[INS_7]], i8 -1, i32 8 +; CHECK-NEXT: [[INS_9:%.*]] = insertelement <16 x i8> [[INS_8]], i8 -1, i32 9 +; CHECK-NEXT: [[INS_10:%.*]] = insertelement <16 x i8> [[INS_9]], i8 -1, i32 10 +; CHECK-NEXT: [[INS_11:%.*]] = insertelement <16 x i8> [[INS_10]], i8 -1, i32 11 +; CHECK-NEXT: [[INS_12:%.*]] = insertelement <16 x i8> [[INS_11]], i8 -1, i32 12 +; CHECK-NEXT: [[INS_13:%.*]] = insertelement <16 x i8> [[INS_12]], i8 -1, i32 13 +; CHECK-NEXT: [[INS_14:%.*]] = insertelement <16 x i8> [[INS_13]], i8 -1, i32 14 +; CHECK-NEXT: [[INS_15:%.*]] = insertelement <16 x i8> [[INS_14]], i8 -1, i32 15 +; CHECK-NEXT: [[T1:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[INS_15]]) +; CHECK-NEXT: [[T2:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> ) +; CHECK-NEXT: [[S:%.*]] = shufflevector <16 x i8> [[T1]], <16 x i8> [[T2]], <16 x i32> +; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store i32 0, ptr @__msan_retval_origin_tls, align 4 +; CHECK-NEXT: ret <16 x i8> [[S]] +; + %ins.0 = insertelement <16 x i8> poison, i8 %v, i32 0 + %ins.1 = insertelement <16 x i8> %ins.0, i8 %v, i32 1 + %ins.2 = insertelement <16 x i8> %ins.1, i8 %v, i32 2 + %ins.3 = insertelement <16 x i8> %ins.2, i8 %v, i32 3 + %ins.4 = insertelement <16 x i8> %ins.3, i8 %v, i32 4 + %ins.5 = insertelement <16 x i8> %ins.4, i8 %v, i32 5 + %ins.6 = insertelement <16 x i8> %ins.5, i8 %v, i32 6 + %ins.7 = insertelement <16 x i8> %ins.6, i8 %v, i32 7 + %ins.8 = insertelement <16 x i8> %ins.7, i8 -1, i32 8 + %ins.9 = insertelement <16 x i8> %ins.8, i8 -1, i32 9 + %ins.10 = insertelement <16 x i8> %ins.9, i8 -1, i32 10 + %ins.11 = insertelement <16 x i8> %ins.10, i8 -1, i32 11 + %ins.12 = insertelement <16 x i8> %ins.11, i8 -1, i32 12 + %ins.13 = insertelement <16 x i8> %ins.12, i8 -1, i32 13 + %ins.14 = insertelement <16 x i8> %ins.13, i8 -1, i32 14 + %ins.15 = insertelement <16 x i8> %ins.14, i8 -1, i32 15 + %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %ins.15) + %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> ) + %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> + ret <16 x i8> %s +} + + +define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_first_mask2(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d, i8 %v) { +; CHECK-LABEL: define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_first_mask2 +; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], i8 [[V:%.*]]) { +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[INS_0:%.*]] = insertelement <16 x i8> poison, i8 1, i32 0 +; CHECK-NEXT: [[INS_1:%.*]] = insertelement <16 x i8> [[INS_0]], i8 1, i32 1 +; CHECK-NEXT: [[INS_2:%.*]] = insertelement <16 x i8> [[INS_1]], i8 1, i32 2 +; CHECK-NEXT: [[INS_3:%.*]] = insertelement <16 x i8> [[INS_2]], i8 1, i32 3 +; CHECK-NEXT: [[INS_4:%.*]] = insertelement <16 x i8> [[INS_3]], i8 1, i32 4 +; CHECK-NEXT: [[INS_5:%.*]] = insertelement <16 x i8> [[INS_4]], i8 1, i32 5 +; CHECK-NEXT: [[INS_6:%.*]] = insertelement <16 x i8> [[INS_5]], i8 1, i32 6 +; CHECK-NEXT: [[INS_7:%.*]] = insertelement <16 x i8> [[INS_6]], i8 1, i32 7 +; CHECK-NEXT: [[INS_8:%.*]] = insertelement <16 x i8> [[INS_7]], i8 -1, i32 8 +; CHECK-NEXT: [[INS_9:%.*]] = insertelement <16 x i8> [[INS_8]], i8 -1, i32 9 +; CHECK-NEXT: [[INS_10:%.*]] = insertelement <16 x i8> [[INS_9]], i8 -1, i32 10 +; CHECK-NEXT: [[INS_11:%.*]] = insertelement <16 x i8> [[INS_10]], i8 -1, i32 11 +; CHECK-NEXT: [[INS_12:%.*]] = insertelement <16 x i8> [[INS_11]], i8 [[V]], i32 12 +; CHECK-NEXT: [[INS_13:%.*]] = insertelement <16 x i8> [[INS_12]], i8 [[V]], i32 13 +; CHECK-NEXT: [[INS_14:%.*]] = insertelement <16 x i8> [[INS_13]], i8 -1, i32 14 +; CHECK-NEXT: [[INS_15:%.*]] = insertelement <16 x i8> [[INS_14]], i8 [[V]], i32 15 +; CHECK-NEXT: [[T1:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[INS_15]]) +; CHECK-NEXT: [[T2:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> ) +; CHECK-NEXT: [[S:%.*]] = shufflevector <16 x i8> [[T1]], <16 x i8> [[T2]], <16 x i32> +; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store i32 0, ptr @__msan_retval_origin_tls, align 4 +; CHECK-NEXT: ret <16 x i8> [[S]] +; + %ins.0 = insertelement <16 x i8> poison, i8 1, i32 0 + %ins.1 = insertelement <16 x i8> %ins.0, i8 1, i32 1 + %ins.2 = insertelement <16 x i8> %ins.1, i8 1, i32 2 + %ins.3 = insertelement <16 x i8> %ins.2, i8 1, i32 3 + %ins.4 = insertelement <16 x i8> %ins.3, i8 1, i32 4 + %ins.5 = insertelement <16 x i8> %ins.4, i8 1, i32 5 + %ins.6 = insertelement <16 x i8> %ins.5, i8 1, i32 6 + %ins.7 = insertelement <16 x i8> %ins.6, i8 1, i32 7 + %ins.8 = insertelement <16 x i8> %ins.7, i8 -1, i32 8 + %ins.9 = insertelement <16 x i8> %ins.8, i8 -1, i32 9 + %ins.10 = insertelement <16 x i8> %ins.9, i8 -1, i32 10 + %ins.11 = insertelement <16 x i8> %ins.10, i8 -1, i32 11 + %ins.12 = insertelement <16 x i8> %ins.11, i8 %v, i32 12 + %ins.13 = insertelement <16 x i8> %ins.12, i8 %v, i32 13 + %ins.14 = insertelement <16 x i8> %ins.13, i8 -1, i32 14 + %ins.15 = insertelement <16 x i8> %ins.14, i8 %v, i32 15 + %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %ins.15) + %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> ) + %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> + ret <16 x i8> %s +} + + + +define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_second_mask(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d, i8 %v) { +; CHECK-LABEL: define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_second_mask +; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], i8 [[V:%.*]]) { +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[INS_0:%.*]] = insertelement <16 x i8> poison, i8 [[V]], i32 0 +; CHECK-NEXT: [[INS_1:%.*]] = insertelement <16 x i8> [[INS_0]], i8 [[V]], i32 1 +; CHECK-NEXT: [[INS_2:%.*]] = insertelement <16 x i8> [[INS_1]], i8 [[V]], i32 2 +; CHECK-NEXT: [[INS_3:%.*]] = insertelement <16 x i8> [[INS_2]], i8 [[V]], i32 3 +; CHECK-NEXT: [[INS_4:%.*]] = insertelement <16 x i8> [[INS_3]], i8 [[V]], i32 4 +; CHECK-NEXT: [[INS_5:%.*]] = insertelement <16 x i8> [[INS_4]], i8 [[V]], i32 5 +; CHECK-NEXT: [[INS_6:%.*]] = insertelement <16 x i8> [[INS_5]], i8 [[V]], i32 6 +; CHECK-NEXT: [[INS_7:%.*]] = insertelement <16 x i8> [[INS_6]], i8 [[V]], i32 7 +; CHECK-NEXT: [[INS_8:%.*]] = insertelement <16 x i8> [[INS_7]], i8 -1, i32 8 +; CHECK-NEXT: [[INS_9:%.*]] = insertelement <16 x i8> [[INS_8]], i8 -1, i32 9 +; CHECK-NEXT: [[INS_10:%.*]] = insertelement <16 x i8> [[INS_9]], i8 -1, i32 10 +; CHECK-NEXT: [[INS_11:%.*]] = insertelement <16 x i8> [[INS_10]], i8 -1, i32 11 +; CHECK-NEXT: [[INS_12:%.*]] = insertelement <16 x i8> [[INS_11]], i8 -1, i32 12 +; CHECK-NEXT: [[INS_13:%.*]] = insertelement <16 x i8> [[INS_12]], i8 -1, i32 13 +; CHECK-NEXT: [[INS_14:%.*]] = insertelement <16 x i8> [[INS_13]], i8 -1, i32 14 +; CHECK-NEXT: [[INS_15:%.*]] = insertelement <16 x i8> [[INS_14]], i8 -1, i32 15 +; CHECK-NEXT: [[T1:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> ) +; CHECK-NEXT: [[T2:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[INS_15]]) +; CHECK-NEXT: [[S:%.*]] = shufflevector <16 x i8> [[T1]], <16 x i8> [[T2]], <16 x i32> +; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store i32 0, ptr @__msan_retval_origin_tls, align 4 +; CHECK-NEXT: ret <16 x i8> [[S]] +; + %ins.0 = insertelement <16 x i8> poison, i8 %v, i32 0 + %ins.1 = insertelement <16 x i8> %ins.0, i8 %v, i32 1 + %ins.2 = insertelement <16 x i8> %ins.1, i8 %v, i32 2 + %ins.3 = insertelement <16 x i8> %ins.2, i8 %v, i32 3 + %ins.4 = insertelement <16 x i8> %ins.3, i8 %v, i32 4 + %ins.5 = insertelement <16 x i8> %ins.4, i8 %v, i32 5 + %ins.6 = insertelement <16 x i8> %ins.5, i8 %v, i32 6 + %ins.7 = insertelement <16 x i8> %ins.6, i8 %v, i32 7 + %ins.8 = insertelement <16 x i8> %ins.7, i8 -1, i32 8 + %ins.9 = insertelement <16 x i8> %ins.8, i8 -1, i32 9 + %ins.10 = insertelement <16 x i8> %ins.9, i8 -1, i32 10 + %ins.11 = insertelement <16 x i8> %ins.10, i8 -1, i32 11 + %ins.12 = insertelement <16 x i8> %ins.11, i8 -1, i32 12 + %ins.13 = insertelement <16 x i8> %ins.12, i8 -1, i32 13 + %ins.14 = insertelement <16 x i8> %ins.13, i8 -1, i32 14 + %ins.15 = insertelement <16 x i8> %ins.14, i8 -1, i32 15 + %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> ) + %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %ins.15) + %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> + ret <16 x i8> %s +} + + + +define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_second_mask2(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d, i8 %v) { +; CHECK-LABEL: define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_second_mask2 +; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], i8 [[V:%.*]]) { +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[INS_0:%.*]] = insertelement <16 x i8> poison, i8 [[V]], i32 0 +; CHECK-NEXT: [[INS_1:%.*]] = insertelement <16 x i8> [[INS_0]], i8 [[V]], i32 1 +; CHECK-NEXT: [[INS_2:%.*]] = insertelement <16 x i8> [[INS_1]], i8 [[V]], i32 2 +; CHECK-NEXT: [[INS_3:%.*]] = insertelement <16 x i8> [[INS_2]], i8 [[V]], i32 3 +; CHECK-NEXT: [[INS_4:%.*]] = insertelement <16 x i8> [[INS_3]], i8 [[V]], i32 4 +; CHECK-NEXT: [[INS_5:%.*]] = insertelement <16 x i8> [[INS_4]], i8 [[V]], i32 5 +; CHECK-NEXT: [[INS_6:%.*]] = insertelement <16 x i8> [[INS_5]], i8 [[V]], i32 6 +; CHECK-NEXT: [[INS_7:%.*]] = insertelement <16 x i8> [[INS_6]], i8 [[V]], i32 7 +; CHECK-NEXT: [[INS_8:%.*]] = insertelement <16 x i8> [[INS_7]], i8 -1, i32 8 +; CHECK-NEXT: [[INS_9:%.*]] = insertelement <16 x i8> [[INS_8]], i8 -1, i32 9 +; CHECK-NEXT: [[INS_10:%.*]] = insertelement <16 x i8> [[INS_9]], i8 -1, i32 10 +; CHECK-NEXT: [[INS_11:%.*]] = insertelement <16 x i8> [[INS_10]], i8 -1, i32 11 +; CHECK-NEXT: [[INS_12:%.*]] = insertelement <16 x i8> [[INS_11]], i8 -1, i32 12 +; CHECK-NEXT: [[INS_13:%.*]] = insertelement <16 x i8> [[INS_12]], i8 -1, i32 13 +; CHECK-NEXT: [[INS_14:%.*]] = insertelement <16 x i8> [[INS_13]], i8 [[V]], i32 14 +; CHECK-NEXT: [[INS_15:%.*]] = insertelement <16 x i8> [[INS_14]], i8 [[V]], i32 15 +; CHECK-NEXT: [[T1:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> ) +; CHECK-NEXT: [[T2:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[INS_15]]) +; CHECK-NEXT: [[S:%.*]] = shufflevector <16 x i8> [[T1]], <16 x i8> [[T2]], <16 x i32> +; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store i32 0, ptr @__msan_retval_origin_tls, align 4 +; CHECK-NEXT: ret <16 x i8> [[S]] +; + %ins.0 = insertelement <16 x i8> poison, i8 %v, i32 0 + %ins.1 = insertelement <16 x i8> %ins.0, i8 %v, i32 1 + %ins.2 = insertelement <16 x i8> %ins.1, i8 %v, i32 2 + %ins.3 = insertelement <16 x i8> %ins.2, i8 %v, i32 3 + %ins.4 = insertelement <16 x i8> %ins.3, i8 %v, i32 4 + %ins.5 = insertelement <16 x i8> %ins.4, i8 %v, i32 5 + %ins.6 = insertelement <16 x i8> %ins.5, i8 %v, i32 6 + %ins.7 = insertelement <16 x i8> %ins.6, i8 %v, i32 7 + %ins.8 = insertelement <16 x i8> %ins.7, i8 -1, i32 8 + %ins.9 = insertelement <16 x i8> %ins.8, i8 -1, i32 9 + %ins.10 = insertelement <16 x i8> %ins.9, i8 -1, i32 10 + %ins.11 = insertelement <16 x i8> %ins.10, i8 -1, i32 11 + %ins.12 = insertelement <16 x i8> %ins.11, i8 -1, i32 12 + %ins.13 = insertelement <16 x i8> %ins.12, i8 -1, i32 13 + %ins.14 = insertelement <16 x i8> %ins.13, i8 %v, i32 14 + %ins.15 = insertelement <16 x i8> %ins.14, i8 %v, i32 15 + %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> ) + %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %ins.15) + %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> + ret <16 x i8> %s +} + + + +define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_shuffle(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) { +; CHECK-LABEL: define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_shuffle +; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]]) { +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[T1:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> ) +; CHECK-NEXT: [[T2:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> ) +; CHECK-NEXT: [[S:%.*]] = shufflevector <16 x i8> [[T1]], <16 x i8> [[T2]], <16 x i32> +; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store i32 0, ptr @__msan_retval_origin_tls, align 4 +; CHECK-NEXT: ret <16 x i8> [[S]] +; + %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> ) + %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> ) + %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> + ret <16 x i8> %s +} + + + +define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_tbl2_mask1(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) { +; CHECK-LABEL: define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_tbl2_mask1 +; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]]) { +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[T1:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> ) +; CHECK-NEXT: [[T2:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> ) +; CHECK-NEXT: [[S:%.*]] = shufflevector <16 x i8> [[T1]], <16 x i8> [[T2]], <16 x i32> +; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store i32 0, ptr @__msan_retval_origin_tls, align 4 +; CHECK-NEXT: ret <16 x i8> [[S]] +; + %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> ) + %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> ) + %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> + ret <16 x i8> %s +} + + + +define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_tbl2_mask2(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) { +; CHECK-LABEL: define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_tbl2_mask2 +; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]]) { +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[T1:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> ) +; CHECK-NEXT: [[T2:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> ) +; CHECK-NEXT: [[S:%.*]] = shufflevector <16 x i8> [[T1]], <16 x i8> [[T2]], <16 x i32> +; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store i32 0, ptr @__msan_retval_origin_tls, align 4 +; CHECK-NEXT: ret <16 x i8> [[S]] +; + %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> ) + %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> ) + %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> + ret <16 x i8> %s +} + +declare <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8>, <8 x i8>) nounwind readnone +declare <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8>, <16 x i8>) nounwind readnone +declare <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone +declare <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone +declare <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone +declare <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone +declare <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone +declare <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone + +define <8 x i8> @tbx1_8b(<8 x i8> %A, <16 x i8> %B, <8 x i8> %C) nounwind { +; CHECK-LABEL: define <8 x i8> @tbx1_8b +; CHECK-SAME: (<8 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <8 x i8> [[C:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[OUT:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> [[A]], <16 x i8> [[B]], <8 x i8> [[C]]) +; CHECK-NEXT: store <8 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store i32 0, ptr @__msan_retval_origin_tls, align 4 +; CHECK-NEXT: ret <8 x i8> [[OUT]] +; + %out = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> %A, <16 x i8> %B, <8 x i8> %C) + ret <8 x i8> %out +} + +define <16 x i8> @tbx1_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) nounwind { +; CHECK-LABEL: define <16 x i8> @tbx1_16b +; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[OUT:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]]) +; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store i32 0, ptr @__msan_retval_origin_tls, align 4 +; CHECK-NEXT: ret <16 x i8> [[OUT]] +; + %out = call <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) + ret <16 x i8> %out +} + +define <8 x i8> @tbx2_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D) { +; CHECK-LABEL: define <8 x i8> @tbx2_8b +; CHECK-SAME: (<8 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <8 x i8> [[D:%.*]]) { +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[OUT:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]], <8 x i8> [[D]]) +; CHECK-NEXT: store <8 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store i32 0, ptr @__msan_retval_origin_tls, align 4 +; CHECK-NEXT: ret <8 x i8> [[OUT]] +; + %out = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D) + ret <8 x i8> %out +} + +define <16 x i8> @tbx2_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) { +; CHECK-LABEL: define <16 x i8> @tbx2_16b +; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]]) { +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[OUT:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]], <16 x i8> [[D]]) +; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store i32 0, ptr @__msan_retval_origin_tls, align 4 +; CHECK-NEXT: ret <16 x i8> [[OUT]] +; + %out = call <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) + ret <16 x i8> %out +} + +define <8 x i8> @tbx3_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E) { +; CHECK-LABEL: define <8 x i8> @tbx3_8b +; CHECK-SAME: (<8 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], <8 x i8> [[E:%.*]]) { +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[OUT:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(<8 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]], <16 x i8> [[D]], <8 x i8> [[E]]) +; CHECK-NEXT: store <8 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store i32 0, ptr @__msan_retval_origin_tls, align 4 +; CHECK-NEXT: ret <8 x i8> [[OUT]] +; + %out = call <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(< 8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E) + ret <8 x i8> %out +} + +define <16 x i8> @tbx3_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) { +; CHECK-LABEL: define <16 x i8> @tbx3_16b +; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], <16 x i8> [[E:%.*]]) { +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[OUT:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> [[E]]) +; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store i32 0, ptr @__msan_retval_origin_tls, align 4 +; CHECK-NEXT: ret <16 x i8> [[OUT]] +; + %out = call <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) + ret <16 x i8> %out +} + +define <8 x i8> @tbx4_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <8 x i8> %F) { +; CHECK-LABEL: define <8 x i8> @tbx4_8b +; CHECK-SAME: (<8 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], <16 x i8> [[E:%.*]], <8 x i8> [[F:%.*]]) { +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[OUT:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> [[E]], <8 x i8> [[F]]) +; CHECK-NEXT: store <8 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store i32 0, ptr @__msan_retval_origin_tls, align 4 +; CHECK-NEXT: ret <8 x i8> [[OUT]] +; + %out = call <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <8 x i8> %F) + ret <8 x i8> %out +} + +define <16 x i8> @tbx4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <16 x i8> %F) { +; CHECK-LABEL: define <16 x i8> @tbx4_16b +; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], <16 x i8> [[E:%.*]], <16 x i8> [[F:%.*]]) { +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[OUT:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> [[E]], <16 x i8> [[F]]) +; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store i32 0, ptr @__msan_retval_origin_tls, align 4 +; CHECK-NEXT: ret <16 x i8> [[OUT]] +; + %out = call <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <16 x i8> %F) + ret <16 x i8> %out +} + +declare <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8>, <16 x i8>, <8 x i8>) nounwind readnone +declare <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone +declare <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone +declare <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone +declare <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(<8 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone +declare <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone +declare <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone +declare <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone From 516d4aa80d9cc275906d08679c9ca238f5b3e938 Mon Sep 17 00:00:00 2001 From: Thurston Dang Date: Thu, 31 Oct 2024 22:36:30 +0000 Subject: [PATCH 3/5] Fix tool name --- .../Instrumentation/MemorySanitizer/AArch64/neon_tbl_origins.ll | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/test/Instrumentation/MemorySanitizer/AArch64/neon_tbl_origins.ll b/llvm/test/Instrumentation/MemorySanitizer/AArch64/neon_tbl_origins.ll index 44c0a278de96f..bb22277ef4b05 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/AArch64/neon_tbl_origins.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/AArch64/neon_tbl_origins.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --tool build/bin/opt --version 2 ; Test memory sanitizer instrumentation for Arm NEON tbl instructions. ; -; RUN: build/bin/opt < %s -passes=msan -msan-track-origins=2 -S | FileCheck %s +; RUN: opt < %s -passes=msan -msan-track-origins=2 -S | FileCheck %s ; ; Forked from llvm/test/CodeGen/AArch64/arm64-tbl.ll From e7cd4721c66e607890dab98fb86299cb75752408 Mon Sep 17 00:00:00 2001 From: Thurston Dang Date: Thu, 31 Oct 2024 22:57:17 +0000 Subject: [PATCH 4/5] Add 'sanitize_memory' annotations to tests --- .../MemorySanitizer/AArch64/neon_tbl.ll | 524 ++++++++-- .../AArch64/neon_tbl_origins.ll | 978 ++++++++++++++++-- 2 files changed, 1330 insertions(+), 172 deletions(-) diff --git a/llvm/test/Instrumentation/MemorySanitizer/AArch64/neon_tbl.ll b/llvm/test/Instrumentation/MemorySanitizer/AArch64/neon_tbl.ll index 42f79fbf02748..2505a3e95fe02 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/AArch64/neon_tbl.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/AArch64/neon_tbl.ll @@ -10,10 +10,22 @@ target triple = "aarch64--linux-android9001" ; ----------------------------------------------------------------------------------------------------------------------------------------------- -define <8 x i8> @tbl1_8b(<16 x i8> %A, <8 x i8> %B) nounwind { +define <8 x i8> @tbl1_8b(<16 x i8> %A, <8 x i8> %B) nounwind sanitize_memory { ; CHECK-LABEL: define <8 x i8> @tbl1_8b ; CHECK-SAME: (<16 x i8> [[A:%.*]], <8 x i8> [[B:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP2]] to i64 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1:![0-9]+]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4:[0-9]+]] +; CHECK-NEXT: unreachable +; CHECK: 6: ; CHECK-NEXT: [[OUT:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[A]], <8 x i8> [[B]]) ; CHECK-NEXT: store <8 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i8> [[OUT]] @@ -22,22 +34,41 @@ define <8 x i8> @tbl1_8b(<16 x i8> %A, <8 x i8> %B) nounwind { ret <8 x i8> %out } -define <16 x i8> @tbl1_16b(<16 x i8> %A, <16 x i8> %B) nounwind { +define <16 x i8> @tbl1_16b(<16 x i8> %A, <16 x i8> %B) nounwind sanitize_memory { ; CHECK-LABEL: define <16 x i8> @tbl1_16b ; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[TMP1]], [[TMP2]] ; CHECK-NEXT: [[OUT:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> [[A]], <16 x i8> [[B]]) -; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x i8> [[OUT]] ; %out = call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> %A, <16 x i8> %B) ret <16 x i8> %out } -define <8 x i8> @tbl2_8b(<16 x i8> %A, <16 x i8> %B, <8 x i8> %C) { +define <8 x i8> @tbl2_8b(<16 x i8> %A, <16 x i8> %B, <8 x i8> %C) sanitize_memory { ; CHECK-LABEL: define <8 x i8> @tbl2_8b -; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <8 x i8> [[C:%.*]]) { +; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <8 x i8> [[C:%.*]]) #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP3]] to i64 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 8: ; CHECK-NEXT: [[OUT:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[A]], <16 x i8> [[B]], <8 x i8> [[C]]) ; CHECK-NEXT: store <8 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i8> [[OUT]] @@ -46,22 +77,47 @@ define <8 x i8> @tbl2_8b(<16 x i8> %A, <16 x i8> %B, <8 x i8> %C) { ret <8 x i8> %out } -define <16 x i8> @tbl2_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) { +define <16 x i8> @tbl2_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) sanitize_memory { ; CHECK-LABEL: define <16 x i8> @tbl2_16b -; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]]) { +; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i8> [[_MSPROP]], [[TMP3]] ; CHECK-NEXT: [[OUT:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]]) -; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <16 x i8> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x i8> [[OUT]] ; %out = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) ret <16 x i8> %out } -define <8 x i8> @tbl3_8b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D) { +define <8 x i8> @tbl3_8b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D) sanitize_memory { ; CHECK-LABEL: define <8 x i8> @tbl3_8b -; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <8 x i8> [[D:%.*]]) { +; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <8 x i8> [[D:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP4]] to i64 +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i64 [[TMP8]], 0 +; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] +; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] +; CHECK: 9: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 10: ; CHECK-NEXT: [[OUT:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]], <8 x i8> [[D]]) ; CHECK-NEXT: store <8 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i8> [[OUT]] @@ -70,22 +126,53 @@ define <8 x i8> @tbl3_8b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D) ret <8 x i8> %out } -define <16 x i8> @tbl3_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) { +define <16 x i8> @tbl3_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) sanitize_memory { ; CHECK-LABEL: define <16 x i8> @tbl3_16b -; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]]) { +; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i8> [[_MSPROP]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP2:%.*]] = or <16 x i8> [[_MSPROP1]], [[TMP4]] ; CHECK-NEXT: [[OUT:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]], <16 x i8> [[D]]) -; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <16 x i8> [[_MSPROP2]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x i8> [[OUT]] ; %out = call <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) ret <16 x i8> %out } -define <8 x i8> @tbl4_8b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E) { +define <8 x i8> @tbl4_8b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E) sanitize_memory { ; CHECK-LABEL: define <8 x i8> @tbl4_8b -; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], <8 x i8> [[E:%.*]]) { +; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], <8 x i8> [[E:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: [[TMP5:%.*]] = load <8 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP6]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP8]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to i128 +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP9]], 0 +; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to i64 +; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i64 [[TMP10]], 0 +; CHECK-NEXT: [[_MSOR7:%.*]] = or i1 [[_MSOR5]], [[_MSCMP6]] +; CHECK-NEXT: br i1 [[_MSOR7]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF1]] +; CHECK: 11: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 12: ; CHECK-NEXT: [[OUT:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]], <16 x i8> [[D]], <8 x i8> [[E]]) ; CHECK-NEXT: store <8 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i8> [[OUT]] @@ -94,12 +181,21 @@ define <8 x i8> @tbl4_8b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, ret <8 x i8> %out } -define <16 x i8> @tbl4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) { +define <16 x i8> @tbl4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) sanitize_memory { ; CHECK-LABEL: define <16 x i8> @tbl4_16b -; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], <16 x i8> [[E:%.*]]) { +; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], <16 x i8> [[E:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i8> [[_MSPROP]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP2:%.*]] = or <16 x i8> [[_MSPROP1]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP3:%.*]] = or <16 x i8> [[_MSPROP2]], [[TMP5]] ; CHECK-NEXT: [[OUT:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> [[E]]) -; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <16 x i8> [[_MSPROP3]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x i8> [[OUT]] ; %out = call <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) @@ -108,11 +204,35 @@ define <16 x i8> @tbl4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> % -define <8 x i8> @shuffled_tbl2_to_tbl4_v8i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) { +define <8 x i8> @shuffled_tbl2_to_tbl4_v8i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) sanitize_memory { ; CHECK-LABEL: define <8 x i8> @shuffled_tbl2_to_tbl4_v8i8 -; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]]) { +; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 8: ; CHECK-NEXT: [[T1:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[A]], <16 x i8> [[B]], <8 x i8> ) +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP9]], 0 +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP4]] to i128 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP10]], 0 +; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] +; CHECK-NEXT: br i1 [[_MSOR4]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF1]] +; CHECK: 11: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 12: ; CHECK-NEXT: [[T2:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[C]], <16 x i8> [[D]], <8 x i8> ) ; CHECK-NEXT: [[S:%.*]] = shufflevector <8 x i8> [[T1]], <8 x i8> [[T2]], <8 x i32> ; CHECK-NEXT: store <8 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 @@ -126,14 +246,23 @@ define <8 x i8> @shuffled_tbl2_to_tbl4_v8i8(<16 x i8> %a, <16 x i8> %b, <16 x i8 -define <16 x i8> @shuffled_tbl2_to_tbl4(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) { +define <16 x i8> @shuffled_tbl2_to_tbl4(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) sanitize_memory { ; CHECK-LABEL: define <16 x i8> @shuffled_tbl2_to_tbl4 -; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]]) { +; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i8> [[_MSPROP]], zeroinitializer ; CHECK-NEXT: [[T1:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> ) +; CHECK-NEXT: [[_MSPROP2:%.*]] = or <16 x i8> [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP3:%.*]] = or <16 x i8> [[_MSPROP2]], zeroinitializer ; CHECK-NEXT: [[T2:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> ) +; CHECK-NEXT: [[_MSPROP4:%.*]] = shufflevector <16 x i8> [[_MSPROP1]], <16 x i8> [[_MSPROP3]], <16 x i32> ; CHECK-NEXT: [[S:%.*]] = shufflevector <16 x i8> [[T1]], <16 x i8> [[T2]], <16 x i32> -; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <16 x i8> [[_MSPROP4]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x i8> [[S]] ; %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> ) @@ -143,30 +272,56 @@ define <16 x i8> @shuffled_tbl2_to_tbl4(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c } -define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_first_mask(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d, i8 %v) { +define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_first_mask(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d, i8 %v) sanitize_memory { ; CHECK-LABEL: define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_first_mask -; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], i8 [[V:%.*]]) { +; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], i8 [[V:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <16 x i8> , i8 [[TMP1]], i32 0 ; CHECK-NEXT: [[INS_0:%.*]] = insertelement <16 x i8> poison, i8 [[V]], i32 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = insertelement <16 x i8> [[_MSPROP]], i8 [[TMP1]], i32 1 ; CHECK-NEXT: [[INS_1:%.*]] = insertelement <16 x i8> [[INS_0]], i8 [[V]], i32 1 +; CHECK-NEXT: [[_MSPROP2:%.*]] = insertelement <16 x i8> [[_MSPROP1]], i8 [[TMP1]], i32 2 ; CHECK-NEXT: [[INS_2:%.*]] = insertelement <16 x i8> [[INS_1]], i8 [[V]], i32 2 +; CHECK-NEXT: [[_MSPROP3:%.*]] = insertelement <16 x i8> [[_MSPROP2]], i8 [[TMP1]], i32 3 ; CHECK-NEXT: [[INS_3:%.*]] = insertelement <16 x i8> [[INS_2]], i8 [[V]], i32 3 +; CHECK-NEXT: [[_MSPROP4:%.*]] = insertelement <16 x i8> [[_MSPROP3]], i8 [[TMP1]], i32 4 ; CHECK-NEXT: [[INS_4:%.*]] = insertelement <16 x i8> [[INS_3]], i8 [[V]], i32 4 +; CHECK-NEXT: [[_MSPROP5:%.*]] = insertelement <16 x i8> [[_MSPROP4]], i8 [[TMP1]], i32 5 ; CHECK-NEXT: [[INS_5:%.*]] = insertelement <16 x i8> [[INS_4]], i8 [[V]], i32 5 +; CHECK-NEXT: [[_MSPROP6:%.*]] = insertelement <16 x i8> [[_MSPROP5]], i8 [[TMP1]], i32 6 ; CHECK-NEXT: [[INS_6:%.*]] = insertelement <16 x i8> [[INS_5]], i8 [[V]], i32 6 +; CHECK-NEXT: [[_MSPROP7:%.*]] = insertelement <16 x i8> [[_MSPROP6]], i8 [[TMP1]], i32 7 ; CHECK-NEXT: [[INS_7:%.*]] = insertelement <16 x i8> [[INS_6]], i8 [[V]], i32 7 +; CHECK-NEXT: [[_MSPROP8:%.*]] = insertelement <16 x i8> [[_MSPROP7]], i8 0, i32 8 ; CHECK-NEXT: [[INS_8:%.*]] = insertelement <16 x i8> [[INS_7]], i8 -1, i32 8 +; CHECK-NEXT: [[_MSPROP9:%.*]] = insertelement <16 x i8> [[_MSPROP8]], i8 0, i32 9 ; CHECK-NEXT: [[INS_9:%.*]] = insertelement <16 x i8> [[INS_8]], i8 -1, i32 9 +; CHECK-NEXT: [[_MSPROP10:%.*]] = insertelement <16 x i8> [[_MSPROP9]], i8 0, i32 10 ; CHECK-NEXT: [[INS_10:%.*]] = insertelement <16 x i8> [[INS_9]], i8 -1, i32 10 +; CHECK-NEXT: [[_MSPROP11:%.*]] = insertelement <16 x i8> [[_MSPROP10]], i8 0, i32 11 ; CHECK-NEXT: [[INS_11:%.*]] = insertelement <16 x i8> [[INS_10]], i8 -1, i32 11 +; CHECK-NEXT: [[_MSPROP12:%.*]] = insertelement <16 x i8> [[_MSPROP11]], i8 0, i32 12 ; CHECK-NEXT: [[INS_12:%.*]] = insertelement <16 x i8> [[INS_11]], i8 -1, i32 12 +; CHECK-NEXT: [[_MSPROP13:%.*]] = insertelement <16 x i8> [[_MSPROP12]], i8 0, i32 13 ; CHECK-NEXT: [[INS_13:%.*]] = insertelement <16 x i8> [[INS_12]], i8 -1, i32 13 +; CHECK-NEXT: [[_MSPROP14:%.*]] = insertelement <16 x i8> [[_MSPROP13]], i8 0, i32 14 ; CHECK-NEXT: [[INS_14:%.*]] = insertelement <16 x i8> [[INS_13]], i8 -1, i32 14 +; CHECK-NEXT: [[_MSPROP15:%.*]] = insertelement <16 x i8> [[_MSPROP14]], i8 0, i32 15 ; CHECK-NEXT: [[INS_15:%.*]] = insertelement <16 x i8> [[INS_14]], i8 -1, i32 15 +; CHECK-NEXT: [[_MSPROP16:%.*]] = or <16 x i8> [[TMP2]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP17:%.*]] = or <16 x i8> [[_MSPROP16]], [[_MSPROP15]] ; CHECK-NEXT: [[T1:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[INS_15]]) +; CHECK-NEXT: [[_MSPROP18:%.*]] = or <16 x i8> [[TMP4]], [[TMP5]] +; CHECK-NEXT: [[_MSPROP19:%.*]] = or <16 x i8> [[_MSPROP18]], zeroinitializer ; CHECK-NEXT: [[T2:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> ) +; CHECK-NEXT: [[_MSPROP20:%.*]] = shufflevector <16 x i8> [[_MSPROP17]], <16 x i8> [[_MSPROP19]], <16 x i32> ; CHECK-NEXT: [[S:%.*]] = shufflevector <16 x i8> [[T1]], <16 x i8> [[T2]], <16 x i32> -; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <16 x i8> [[_MSPROP20]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x i8> [[S]] ; %ins.0 = insertelement <16 x i8> poison, i8 %v, i32 0 @@ -192,9 +347,14 @@ define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_first_mask(<16 x i8> %a, <16 x } -define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_first_mask2(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d, i8 %v) { +define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_first_mask2(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d, i8 %v) sanitize_memory { ; CHECK-LABEL: define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_first_mask2 -; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], i8 [[V:%.*]]) { +; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], i8 [[V:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[INS_0:%.*]] = insertelement <16 x i8> poison, i8 1, i32 0 ; CHECK-NEXT: [[INS_1:%.*]] = insertelement <16 x i8> [[INS_0]], i8 1, i32 1 @@ -208,14 +368,23 @@ define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_first_mask2(<16 x i8> %a, <16 x ; CHECK-NEXT: [[INS_9:%.*]] = insertelement <16 x i8> [[INS_8]], i8 -1, i32 9 ; CHECK-NEXT: [[INS_10:%.*]] = insertelement <16 x i8> [[INS_9]], i8 -1, i32 10 ; CHECK-NEXT: [[INS_11:%.*]] = insertelement <16 x i8> [[INS_10]], i8 -1, i32 11 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <16 x i8> , i8 [[TMP1]], i32 12 ; CHECK-NEXT: [[INS_12:%.*]] = insertelement <16 x i8> [[INS_11]], i8 [[V]], i32 12 +; CHECK-NEXT: [[_MSPROP1:%.*]] = insertelement <16 x i8> [[_MSPROP]], i8 [[TMP1]], i32 13 ; CHECK-NEXT: [[INS_13:%.*]] = insertelement <16 x i8> [[INS_12]], i8 [[V]], i32 13 +; CHECK-NEXT: [[_MSPROP2:%.*]] = insertelement <16 x i8> [[_MSPROP1]], i8 0, i32 14 ; CHECK-NEXT: [[INS_14:%.*]] = insertelement <16 x i8> [[INS_13]], i8 -1, i32 14 +; CHECK-NEXT: [[_MSPROP3:%.*]] = insertelement <16 x i8> [[_MSPROP2]], i8 [[TMP1]], i32 15 ; CHECK-NEXT: [[INS_15:%.*]] = insertelement <16 x i8> [[INS_14]], i8 [[V]], i32 15 +; CHECK-NEXT: [[_MSPROP4:%.*]] = or <16 x i8> [[TMP2]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP5:%.*]] = or <16 x i8> [[_MSPROP4]], [[_MSPROP3]] ; CHECK-NEXT: [[T1:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[INS_15]]) +; CHECK-NEXT: [[_MSPROP6:%.*]] = or <16 x i8> [[TMP4]], [[TMP5]] +; CHECK-NEXT: [[_MSPROP7:%.*]] = or <16 x i8> [[_MSPROP6]], zeroinitializer ; CHECK-NEXT: [[T2:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> ) +; CHECK-NEXT: [[_MSPROP8:%.*]] = shufflevector <16 x i8> [[_MSPROP5]], <16 x i8> [[_MSPROP7]], <16 x i32> ; CHECK-NEXT: [[S:%.*]] = shufflevector <16 x i8> [[T1]], <16 x i8> [[T2]], <16 x i32> -; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <16 x i8> [[_MSPROP8]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x i8> [[S]] ; %ins.0 = insertelement <16 x i8> poison, i8 1, i32 0 @@ -242,30 +411,56 @@ define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_first_mask2(<16 x i8> %a, <16 x -define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_second_mask(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d, i8 %v) { +define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_second_mask(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d, i8 %v) sanitize_memory { ; CHECK-LABEL: define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_second_mask -; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], i8 [[V:%.*]]) { +; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], i8 [[V:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <16 x i8> , i8 [[TMP1]], i32 0 ; CHECK-NEXT: [[INS_0:%.*]] = insertelement <16 x i8> poison, i8 [[V]], i32 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = insertelement <16 x i8> [[_MSPROP]], i8 [[TMP1]], i32 1 ; CHECK-NEXT: [[INS_1:%.*]] = insertelement <16 x i8> [[INS_0]], i8 [[V]], i32 1 +; CHECK-NEXT: [[_MSPROP2:%.*]] = insertelement <16 x i8> [[_MSPROP1]], i8 [[TMP1]], i32 2 ; CHECK-NEXT: [[INS_2:%.*]] = insertelement <16 x i8> [[INS_1]], i8 [[V]], i32 2 +; CHECK-NEXT: [[_MSPROP3:%.*]] = insertelement <16 x i8> [[_MSPROP2]], i8 [[TMP1]], i32 3 ; CHECK-NEXT: [[INS_3:%.*]] = insertelement <16 x i8> [[INS_2]], i8 [[V]], i32 3 +; CHECK-NEXT: [[_MSPROP4:%.*]] = insertelement <16 x i8> [[_MSPROP3]], i8 [[TMP1]], i32 4 ; CHECK-NEXT: [[INS_4:%.*]] = insertelement <16 x i8> [[INS_3]], i8 [[V]], i32 4 +; CHECK-NEXT: [[_MSPROP5:%.*]] = insertelement <16 x i8> [[_MSPROP4]], i8 [[TMP1]], i32 5 ; CHECK-NEXT: [[INS_5:%.*]] = insertelement <16 x i8> [[INS_4]], i8 [[V]], i32 5 +; CHECK-NEXT: [[_MSPROP6:%.*]] = insertelement <16 x i8> [[_MSPROP5]], i8 [[TMP1]], i32 6 ; CHECK-NEXT: [[INS_6:%.*]] = insertelement <16 x i8> [[INS_5]], i8 [[V]], i32 6 +; CHECK-NEXT: [[_MSPROP7:%.*]] = insertelement <16 x i8> [[_MSPROP6]], i8 [[TMP1]], i32 7 ; CHECK-NEXT: [[INS_7:%.*]] = insertelement <16 x i8> [[INS_6]], i8 [[V]], i32 7 +; CHECK-NEXT: [[_MSPROP8:%.*]] = insertelement <16 x i8> [[_MSPROP7]], i8 0, i32 8 ; CHECK-NEXT: [[INS_8:%.*]] = insertelement <16 x i8> [[INS_7]], i8 -1, i32 8 +; CHECK-NEXT: [[_MSPROP9:%.*]] = insertelement <16 x i8> [[_MSPROP8]], i8 0, i32 9 ; CHECK-NEXT: [[INS_9:%.*]] = insertelement <16 x i8> [[INS_8]], i8 -1, i32 9 +; CHECK-NEXT: [[_MSPROP10:%.*]] = insertelement <16 x i8> [[_MSPROP9]], i8 0, i32 10 ; CHECK-NEXT: [[INS_10:%.*]] = insertelement <16 x i8> [[INS_9]], i8 -1, i32 10 +; CHECK-NEXT: [[_MSPROP11:%.*]] = insertelement <16 x i8> [[_MSPROP10]], i8 0, i32 11 ; CHECK-NEXT: [[INS_11:%.*]] = insertelement <16 x i8> [[INS_10]], i8 -1, i32 11 +; CHECK-NEXT: [[_MSPROP12:%.*]] = insertelement <16 x i8> [[_MSPROP11]], i8 0, i32 12 ; CHECK-NEXT: [[INS_12:%.*]] = insertelement <16 x i8> [[INS_11]], i8 -1, i32 12 +; CHECK-NEXT: [[_MSPROP13:%.*]] = insertelement <16 x i8> [[_MSPROP12]], i8 0, i32 13 ; CHECK-NEXT: [[INS_13:%.*]] = insertelement <16 x i8> [[INS_12]], i8 -1, i32 13 +; CHECK-NEXT: [[_MSPROP14:%.*]] = insertelement <16 x i8> [[_MSPROP13]], i8 0, i32 14 ; CHECK-NEXT: [[INS_14:%.*]] = insertelement <16 x i8> [[INS_13]], i8 -1, i32 14 +; CHECK-NEXT: [[_MSPROP15:%.*]] = insertelement <16 x i8> [[_MSPROP14]], i8 0, i32 15 ; CHECK-NEXT: [[INS_15:%.*]] = insertelement <16 x i8> [[INS_14]], i8 -1, i32 15 +; CHECK-NEXT: [[_MSPROP16:%.*]] = or <16 x i8> [[TMP2]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP17:%.*]] = or <16 x i8> [[_MSPROP16]], zeroinitializer ; CHECK-NEXT: [[T1:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> ) +; CHECK-NEXT: [[_MSPROP18:%.*]] = or <16 x i8> [[TMP4]], [[TMP5]] +; CHECK-NEXT: [[_MSPROP19:%.*]] = or <16 x i8> [[_MSPROP18]], [[_MSPROP15]] ; CHECK-NEXT: [[T2:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[INS_15]]) +; CHECK-NEXT: [[_MSPROP20:%.*]] = shufflevector <16 x i8> [[_MSPROP17]], <16 x i8> [[_MSPROP19]], <16 x i32> ; CHECK-NEXT: [[S:%.*]] = shufflevector <16 x i8> [[T1]], <16 x i8> [[T2]], <16 x i32> -; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <16 x i8> [[_MSPROP20]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x i8> [[S]] ; %ins.0 = insertelement <16 x i8> poison, i8 %v, i32 0 @@ -292,30 +487,56 @@ define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_second_mask(<16 x i8> %a, <16 x -define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_second_mask2(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d, i8 %v) { +define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_second_mask2(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d, i8 %v) sanitize_memory { ; CHECK-LABEL: define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_second_mask2 -; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], i8 [[V:%.*]]) { +; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], i8 [[V:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <16 x i8> , i8 [[TMP1]], i32 0 ; CHECK-NEXT: [[INS_0:%.*]] = insertelement <16 x i8> poison, i8 [[V]], i32 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = insertelement <16 x i8> [[_MSPROP]], i8 [[TMP1]], i32 1 ; CHECK-NEXT: [[INS_1:%.*]] = insertelement <16 x i8> [[INS_0]], i8 [[V]], i32 1 +; CHECK-NEXT: [[_MSPROP2:%.*]] = insertelement <16 x i8> [[_MSPROP1]], i8 [[TMP1]], i32 2 ; CHECK-NEXT: [[INS_2:%.*]] = insertelement <16 x i8> [[INS_1]], i8 [[V]], i32 2 +; CHECK-NEXT: [[_MSPROP3:%.*]] = insertelement <16 x i8> [[_MSPROP2]], i8 [[TMP1]], i32 3 ; CHECK-NEXT: [[INS_3:%.*]] = insertelement <16 x i8> [[INS_2]], i8 [[V]], i32 3 +; CHECK-NEXT: [[_MSPROP4:%.*]] = insertelement <16 x i8> [[_MSPROP3]], i8 [[TMP1]], i32 4 ; CHECK-NEXT: [[INS_4:%.*]] = insertelement <16 x i8> [[INS_3]], i8 [[V]], i32 4 +; CHECK-NEXT: [[_MSPROP5:%.*]] = insertelement <16 x i8> [[_MSPROP4]], i8 [[TMP1]], i32 5 ; CHECK-NEXT: [[INS_5:%.*]] = insertelement <16 x i8> [[INS_4]], i8 [[V]], i32 5 +; CHECK-NEXT: [[_MSPROP6:%.*]] = insertelement <16 x i8> [[_MSPROP5]], i8 [[TMP1]], i32 6 ; CHECK-NEXT: [[INS_6:%.*]] = insertelement <16 x i8> [[INS_5]], i8 [[V]], i32 6 +; CHECK-NEXT: [[_MSPROP7:%.*]] = insertelement <16 x i8> [[_MSPROP6]], i8 [[TMP1]], i32 7 ; CHECK-NEXT: [[INS_7:%.*]] = insertelement <16 x i8> [[INS_6]], i8 [[V]], i32 7 +; CHECK-NEXT: [[_MSPROP8:%.*]] = insertelement <16 x i8> [[_MSPROP7]], i8 0, i32 8 ; CHECK-NEXT: [[INS_8:%.*]] = insertelement <16 x i8> [[INS_7]], i8 -1, i32 8 +; CHECK-NEXT: [[_MSPROP9:%.*]] = insertelement <16 x i8> [[_MSPROP8]], i8 0, i32 9 ; CHECK-NEXT: [[INS_9:%.*]] = insertelement <16 x i8> [[INS_8]], i8 -1, i32 9 +; CHECK-NEXT: [[_MSPROP10:%.*]] = insertelement <16 x i8> [[_MSPROP9]], i8 0, i32 10 ; CHECK-NEXT: [[INS_10:%.*]] = insertelement <16 x i8> [[INS_9]], i8 -1, i32 10 +; CHECK-NEXT: [[_MSPROP11:%.*]] = insertelement <16 x i8> [[_MSPROP10]], i8 0, i32 11 ; CHECK-NEXT: [[INS_11:%.*]] = insertelement <16 x i8> [[INS_10]], i8 -1, i32 11 +; CHECK-NEXT: [[_MSPROP12:%.*]] = insertelement <16 x i8> [[_MSPROP11]], i8 0, i32 12 ; CHECK-NEXT: [[INS_12:%.*]] = insertelement <16 x i8> [[INS_11]], i8 -1, i32 12 +; CHECK-NEXT: [[_MSPROP13:%.*]] = insertelement <16 x i8> [[_MSPROP12]], i8 0, i32 13 ; CHECK-NEXT: [[INS_13:%.*]] = insertelement <16 x i8> [[INS_12]], i8 -1, i32 13 +; CHECK-NEXT: [[_MSPROP14:%.*]] = insertelement <16 x i8> [[_MSPROP13]], i8 [[TMP1]], i32 14 ; CHECK-NEXT: [[INS_14:%.*]] = insertelement <16 x i8> [[INS_13]], i8 [[V]], i32 14 +; CHECK-NEXT: [[_MSPROP15:%.*]] = insertelement <16 x i8> [[_MSPROP14]], i8 [[TMP1]], i32 15 ; CHECK-NEXT: [[INS_15:%.*]] = insertelement <16 x i8> [[INS_14]], i8 [[V]], i32 15 +; CHECK-NEXT: [[_MSPROP16:%.*]] = or <16 x i8> [[TMP2]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP17:%.*]] = or <16 x i8> [[_MSPROP16]], zeroinitializer ; CHECK-NEXT: [[T1:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> ) +; CHECK-NEXT: [[_MSPROP18:%.*]] = or <16 x i8> [[TMP4]], [[TMP5]] +; CHECK-NEXT: [[_MSPROP19:%.*]] = or <16 x i8> [[_MSPROP18]], [[_MSPROP15]] ; CHECK-NEXT: [[T2:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[INS_15]]) +; CHECK-NEXT: [[_MSPROP20:%.*]] = shufflevector <16 x i8> [[_MSPROP17]], <16 x i8> [[_MSPROP19]], <16 x i32> ; CHECK-NEXT: [[S:%.*]] = shufflevector <16 x i8> [[T1]], <16 x i8> [[T2]], <16 x i32> -; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <16 x i8> [[_MSPROP20]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x i8> [[S]] ; %ins.0 = insertelement <16 x i8> poison, i8 %v, i32 0 @@ -342,14 +563,23 @@ define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_second_mask2(<16 x i8> %a, <16 -define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_shuffle(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) { +define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_shuffle(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) sanitize_memory { ; CHECK-LABEL: define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_shuffle -; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]]) { +; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i8> [[_MSPROP]], zeroinitializer ; CHECK-NEXT: [[T1:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> ) +; CHECK-NEXT: [[_MSPROP2:%.*]] = or <16 x i8> [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP3:%.*]] = or <16 x i8> [[_MSPROP2]], zeroinitializer ; CHECK-NEXT: [[T2:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> ) +; CHECK-NEXT: [[_MSPROP4:%.*]] = shufflevector <16 x i8> [[_MSPROP1]], <16 x i8> [[_MSPROP3]], <16 x i32> ; CHECK-NEXT: [[S:%.*]] = shufflevector <16 x i8> [[T1]], <16 x i8> [[T2]], <16 x i32> -; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <16 x i8> [[_MSPROP4]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x i8> [[S]] ; %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> ) @@ -360,14 +590,23 @@ define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_shuffle(<16 x i8> %a, <16 x i8> %b -define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_tbl2_mask1(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) { +define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_tbl2_mask1(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) sanitize_memory { ; CHECK-LABEL: define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_tbl2_mask1 -; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]]) { +; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i8> [[_MSPROP]], zeroinitializer ; CHECK-NEXT: [[T1:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> ) +; CHECK-NEXT: [[_MSPROP2:%.*]] = or <16 x i8> [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP3:%.*]] = or <16 x i8> [[_MSPROP2]], zeroinitializer ; CHECK-NEXT: [[T2:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> ) +; CHECK-NEXT: [[_MSPROP4:%.*]] = shufflevector <16 x i8> [[_MSPROP1]], <16 x i8> [[_MSPROP3]], <16 x i32> ; CHECK-NEXT: [[S:%.*]] = shufflevector <16 x i8> [[T1]], <16 x i8> [[T2]], <16 x i32> -; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <16 x i8> [[_MSPROP4]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x i8> [[S]] ; %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> ) @@ -378,14 +617,23 @@ define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_tbl2_mask1(<16 x i8> %a, <16 x i8> -define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_tbl2_mask2(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) { +define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_tbl2_mask2(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) sanitize_memory { ; CHECK-LABEL: define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_tbl2_mask2 -; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]]) { +; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i8> [[_MSPROP]], zeroinitializer ; CHECK-NEXT: [[T1:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> ) +; CHECK-NEXT: [[_MSPROP2:%.*]] = or <16 x i8> [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP3:%.*]] = or <16 x i8> [[_MSPROP2]], zeroinitializer ; CHECK-NEXT: [[T2:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> ) +; CHECK-NEXT: [[_MSPROP4:%.*]] = shufflevector <16 x i8> [[_MSPROP1]], <16 x i8> [[_MSPROP3]], <16 x i32> ; CHECK-NEXT: [[S:%.*]] = shufflevector <16 x i8> [[T1]], <16 x i8> [[T2]], <16 x i32> -; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <16 x i8> [[_MSPROP4]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x i8> [[S]] ; %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> ) @@ -394,19 +642,35 @@ define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_tbl2_mask2(<16 x i8> %a, <16 x i8> ret <16 x i8> %s } -declare <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8>, <8 x i8>) nounwind readnone -declare <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8>, <16 x i8>) nounwind readnone -declare <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone -declare <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone -declare <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone -declare <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone -declare <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone -declare <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone +declare <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8>, <8 x i8>) nounwind sanitize_memory readnone +declare <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8>, <16 x i8>) nounwind sanitize_memory readnone +declare <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8>, <16 x i8>, <8 x i8>) nounwind sanitize_memory readnone +declare <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) nounwind sanitize_memory readnone +declare <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind sanitize_memory readnone +declare <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind sanitize_memory readnone +declare <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind sanitize_memory readnone +declare <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind sanitize_memory readnone -define <8 x i8> @tbx1_8b(<8 x i8> %A, <16 x i8> %B, <8 x i8> %C) nounwind { +define <8 x i8> @tbx1_8b(<8 x i8> %A, <16 x i8> %B, <8 x i8> %C) nounwind sanitize_memory { ; CHECK-LABEL: define <8 x i8> @tbx1_8b ; CHECK-SAME: (<8 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <8 x i8> [[C:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to i64 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP3]] to i64 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 8: ; CHECK-NEXT: [[OUT:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> [[A]], <16 x i8> [[B]], <8 x i8> [[C]]) ; CHECK-NEXT: store <8 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i8> [[OUT]] @@ -415,22 +679,47 @@ define <8 x i8> @tbx1_8b(<8 x i8> %A, <16 x i8> %B, <8 x i8> %C) nounwind { ret <8 x i8> %out } -define <16 x i8> @tbx1_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) nounwind { +define <16 x i8> @tbx1_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) nounwind sanitize_memory { ; CHECK-LABEL: define <16 x i8> @tbx1_16b ; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i8> [[_MSPROP]], [[TMP3]] ; CHECK-NEXT: [[OUT:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]]) -; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <16 x i8> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x i8> [[OUT]] ; %out = call <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) ret <16 x i8> %out } -define <8 x i8> @tbx2_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D) { +define <8 x i8> @tbx2_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D) sanitize_memory { ; CHECK-LABEL: define <8 x i8> @tbx2_8b -; CHECK-SAME: (<8 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <8 x i8> [[D:%.*]]) { +; CHECK-SAME: (<8 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <8 x i8> [[D:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP1]] to i64 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP4]] to i64 +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i64 [[TMP8]], 0 +; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] +; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] +; CHECK: 9: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 10: ; CHECK-NEXT: [[OUT:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]], <8 x i8> [[D]]) ; CHECK-NEXT: store <8 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i8> [[OUT]] @@ -439,22 +728,53 @@ define <8 x i8> @tbx2_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D) { ret <8 x i8> %out } -define <16 x i8> @tbx2_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) { +define <16 x i8> @tbx2_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) sanitize_memory { ; CHECK-LABEL: define <16 x i8> @tbx2_16b -; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]]) { +; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i8> [[_MSPROP]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP2:%.*]] = or <16 x i8> [[_MSPROP1]], [[TMP4]] ; CHECK-NEXT: [[OUT:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]], <16 x i8> [[D]]) -; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <16 x i8> [[_MSPROP2]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x i8> [[OUT]] ; %out = call <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) ret <16 x i8> %out } -define <8 x i8> @tbx3_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E) { +define <8 x i8> @tbx3_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E) sanitize_memory { ; CHECK-LABEL: define <8 x i8> @tbx3_8b -; CHECK-SAME: (<8 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], <8 x i8> [[E:%.*]]) { +; CHECK-SAME: (<8 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], <8 x i8> [[E:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8 +; CHECK-NEXT: [[TMP5:%.*]] = load <8 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 56) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP1]] to i64 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP6]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP8]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to i128 +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP9]], 0 +; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to i64 +; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i64 [[TMP10]], 0 +; CHECK-NEXT: [[_MSOR7:%.*]] = or i1 [[_MSOR5]], [[_MSCMP6]] +; CHECK-NEXT: br i1 [[_MSOR7]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF1]] +; CHECK: 11: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 12: ; CHECK-NEXT: [[OUT:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(<8 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]], <16 x i8> [[D]], <8 x i8> [[E]]) ; CHECK-NEXT: store <8 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i8> [[OUT]] @@ -463,22 +783,59 @@ define <8 x i8> @tbx3_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, ret <8 x i8> %out } -define <16 x i8> @tbx3_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) { +define <16 x i8> @tbx3_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) sanitize_memory { ; CHECK-LABEL: define <16 x i8> @tbx3_16b -; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], <16 x i8> [[E:%.*]]) { +; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], <16 x i8> [[E:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i8> [[_MSPROP]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP2:%.*]] = or <16 x i8> [[_MSPROP1]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP3:%.*]] = or <16 x i8> [[_MSPROP2]], [[TMP5]] ; CHECK-NEXT: [[OUT:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> [[E]]) -; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <16 x i8> [[_MSPROP3]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x i8> [[OUT]] ; %out = call <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) ret <16 x i8> %out } -define <8 x i8> @tbx4_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <8 x i8> %F) { +define <8 x i8> @tbx4_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <8 x i8> %F) sanitize_memory { ; CHECK-LABEL: define <8 x i8> @tbx4_8b -; CHECK-SAME: (<8 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], <16 x i8> [[E:%.*]], <8 x i8> [[F:%.*]]) { +; CHECK-SAME: (<8 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], <16 x i8> [[E:%.*]], <8 x i8> [[F:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8 +; CHECK-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 56) to ptr), align 8 +; CHECK-NEXT: [[TMP6:%.*]] = load <8 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP1]] to i64 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP7]], 0 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP8]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP9]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP4]] to i128 +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP10]], 0 +; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP5]] to i128 +; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i128 [[TMP11]], 0 +; CHECK-NEXT: [[_MSOR7:%.*]] = or i1 [[_MSOR5]], [[_MSCMP6]] +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to i64 +; CHECK-NEXT: [[_MSCMP8:%.*]] = icmp ne i64 [[TMP12]], 0 +; CHECK-NEXT: [[_MSOR9:%.*]] = or i1 [[_MSOR7]], [[_MSCMP8]] +; CHECK-NEXT: br i1 [[_MSOR9]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]] +; CHECK: 13: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 14: ; CHECK-NEXT: [[OUT:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> [[E]], <8 x i8> [[F]]) ; CHECK-NEXT: store <8 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i8> [[OUT]] @@ -487,23 +844,34 @@ define <8 x i8> @tbx4_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, ret <8 x i8> %out } -define <16 x i8> @tbx4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <16 x i8> %F) { +define <16 x i8> @tbx4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <16 x i8> %F) sanitize_memory { ; CHECK-LABEL: define <16 x i8> @tbx4_16b -; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], <16 x i8> [[E:%.*]], <16 x i8> [[F:%.*]]) { +; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], <16 x i8> [[E:%.*]], <16 x i8> [[F:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP6:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i8> [[_MSPROP]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP2:%.*]] = or <16 x i8> [[_MSPROP1]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP3:%.*]] = or <16 x i8> [[_MSPROP2]], [[TMP5]] +; CHECK-NEXT: [[_MSPROP4:%.*]] = or <16 x i8> [[_MSPROP3]], [[TMP6]] ; CHECK-NEXT: [[OUT:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> [[E]], <16 x i8> [[F]]) -; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <16 x i8> [[_MSPROP4]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x i8> [[OUT]] ; %out = call <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <16 x i8> %F) ret <16 x i8> %out } -declare <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8>, <16 x i8>, <8 x i8>) nounwind readnone -declare <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone -declare <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone -declare <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone -declare <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(<8 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone -declare <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone -declare <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone -declare <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone +declare <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8>, <16 x i8>, <8 x i8>) nounwind sanitize_memory readnone +declare <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) nounwind sanitize_memory readnone +declare <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind sanitize_memory readnone +declare <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind sanitize_memory readnone +declare <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(<8 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind sanitize_memory readnone +declare <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind sanitize_memory readnone +declare <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind sanitize_memory readnone +declare <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind sanitize_memory readnone diff --git a/llvm/test/Instrumentation/MemorySanitizer/AArch64/neon_tbl_origins.ll b/llvm/test/Instrumentation/MemorySanitizer/AArch64/neon_tbl_origins.ll index bb22277ef4b05..034437a8d7beb 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/AArch64/neon_tbl_origins.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/AArch64/neon_tbl_origins.ll @@ -10,10 +10,28 @@ target triple = "aarch64--linux-android9001" ; ----------------------------------------------------------------------------------------------------------------------------------------------- -define <8 x i8> @tbl1_8b(<16 x i8> %A, <8 x i8> %B) nounwind { +define <8 x i8> @tbl1_8b(<16 x i8> %A, <8 x i8> %B) nounwind sanitize_memory { ; CHECK-LABEL: define <8 x i8> @tbl1_8b ; CHECK-SAME: (<16 x i8> [[A:%.*]], <8 x i8> [[B:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 16) to ptr), align 4 ; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1:![0-9]+]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 [[TMP2]]) #[[ATTR4:[0-9]+]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP3]] to i64 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP8]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] +; CHECK: 9: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 [[TMP4]]) #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 10: ; CHECK-NEXT: [[OUT:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[A]], <8 x i8> [[B]]) ; CHECK-NEXT: store <8 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: store i32 0, ptr @__msan_retval_origin_tls, align 4 @@ -23,23 +41,58 @@ define <8 x i8> @tbl1_8b(<16 x i8> %A, <8 x i8> %B) nounwind { ret <8 x i8> %out } -define <16 x i8> @tbl1_16b(<16 x i8> %A, <16 x i8> %B) nounwind { +define <16 x i8> @tbl1_16b(<16 x i8> %A, <16 x i8> %B) nounwind sanitize_memory { ; CHECK-LABEL: define <16 x i8> @tbl1_16b ; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 16) to ptr), align 4 ; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[TMP1]], [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 +; CHECK-NEXT: [[TMP6:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP6]], i32 [[TMP4]], i32 [[TMP2]] ; CHECK-NEXT: [[OUT:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> [[A]], <16 x i8> [[B]]) -; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 -; CHECK-NEXT: store i32 0, ptr @__msan_retval_origin_tls, align 4 +; CHECK-NEXT: store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store i32 [[TMP7]], ptr @__msan_retval_origin_tls, align 4 ; CHECK-NEXT: ret <16 x i8> [[OUT]] ; %out = call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> %A, <16 x i8> %B) ret <16 x i8> %out } -define <8 x i8> @tbl2_8b(<16 x i8> %A, <16 x i8> %B, <8 x i8> %C) { +define <8 x i8> @tbl2_8b(<16 x i8> %A, <16 x i8> %B, <8 x i8> %C) sanitize_memory { ; CHECK-LABEL: define <8 x i8> @tbl2_8b -; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <8 x i8> [[C:%.*]]) { +; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <8 x i8> [[C:%.*]]) #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 16) to ptr), align 4 +; CHECK-NEXT: [[TMP5:%.*]] = load <8 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 32) to ptr), align 4 ; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP7]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 [[TMP2]]) #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP10]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF1]] +; CHECK: 11: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 [[TMP4]]) #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 12: +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP5]] to i64 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP13]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP14:%.*]], label [[TMP15:%.*]], !prof [[PROF1]] +; CHECK: 14: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 [[TMP6]]) #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 15: ; CHECK-NEXT: [[OUT:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[A]], <16 x i8> [[B]], <8 x i8> [[C]]) ; CHECK-NEXT: store <8 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: store i32 0, ptr @__msan_retval_origin_tls, align 4 @@ -49,23 +102,73 @@ define <8 x i8> @tbl2_8b(<16 x i8> %A, <16 x i8> %B, <8 x i8> %C) { ret <8 x i8> %out } -define <16 x i8> @tbl2_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) { +define <16 x i8> @tbl2_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) sanitize_memory { ; CHECK-LABEL: define <16 x i8> @tbl2_16b -; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]]) { +; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 16) to ptr), align 4 +; CHECK-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 32) to ptr), align 4 ; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[TMP1]], [[TMP3]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 +; CHECK-NEXT: [[TMP8:%.*]] = icmp ne i128 [[TMP7]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i32 [[TMP4]], i32 [[TMP2]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i8> [[_MSPROP]], [[TMP5]] +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to i128 +; CHECK-NEXT: [[TMP11:%.*]] = icmp ne i128 [[TMP10]], 0 +; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP6]], i32 [[TMP9]] ; CHECK-NEXT: [[OUT:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]]) -; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 -; CHECK-NEXT: store i32 0, ptr @__msan_retval_origin_tls, align 4 +; CHECK-NEXT: store <16 x i8> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store i32 [[TMP12]], ptr @__msan_retval_origin_tls, align 4 ; CHECK-NEXT: ret <16 x i8> [[OUT]] ; %out = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) ret <16 x i8> %out } -define <8 x i8> @tbl3_8b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D) { +define <8 x i8> @tbl3_8b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D) sanitize_memory { ; CHECK-LABEL: define <8 x i8> @tbl3_8b -; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <8 x i8> [[D:%.*]]) { +; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <8 x i8> [[D:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 16) to ptr), align 4 +; CHECK-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 32) to ptr), align 4 +; CHECK-NEXT: [[TMP7:%.*]] = load <8 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 48) to ptr), align 4 ; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP9]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] +; CHECK: 10: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 [[TMP2]]) #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 11: +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP12]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]] +; CHECK: 13: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 [[TMP4]]) #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 14: +; CHECK-NEXT: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP5]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP15]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP16:%.*]], label [[TMP17:%.*]], !prof [[PROF1]] +; CHECK: 16: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 [[TMP6]]) #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 17: +; CHECK-NEXT: [[TMP18:%.*]] = bitcast <8 x i8> [[TMP7]] to i64 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP18]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP19:%.*]], label [[TMP20:%.*]], !prof [[PROF1]] +; CHECK: 19: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 [[TMP8]]) #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 20: ; CHECK-NEXT: [[OUT:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]], <8 x i8> [[D]]) ; CHECK-NEXT: store <8 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: store i32 0, ptr @__msan_retval_origin_tls, align 4 @@ -75,23 +178,88 @@ define <8 x i8> @tbl3_8b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D) ret <8 x i8> %out } -define <16 x i8> @tbl3_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) { +define <16 x i8> @tbl3_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) sanitize_memory { ; CHECK-LABEL: define <16 x i8> @tbl3_16b -; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]]) { +; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 16) to ptr), align 4 +; CHECK-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 32) to ptr), align 4 +; CHECK-NEXT: [[TMP7:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 48) to ptr), align 4 ; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[TMP1]], [[TMP3]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 +; CHECK-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0 +; CHECK-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i32 [[TMP4]], i32 [[TMP2]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i8> [[_MSPROP]], [[TMP5]] +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to i128 +; CHECK-NEXT: [[TMP13:%.*]] = icmp ne i128 [[TMP12]], 0 +; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP6]], i32 [[TMP11]] +; CHECK-NEXT: [[_MSPROP2:%.*]] = or <16 x i8> [[_MSPROP1]], [[TMP7]] +; CHECK-NEXT: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP7]] to i128 +; CHECK-NEXT: [[TMP16:%.*]] = icmp ne i128 [[TMP15]], 0 +; CHECK-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[TMP8]], i32 [[TMP14]] ; CHECK-NEXT: [[OUT:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]], <16 x i8> [[D]]) -; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 -; CHECK-NEXT: store i32 0, ptr @__msan_retval_origin_tls, align 4 +; CHECK-NEXT: store <16 x i8> [[_MSPROP2]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store i32 [[TMP17]], ptr @__msan_retval_origin_tls, align 4 ; CHECK-NEXT: ret <16 x i8> [[OUT]] ; %out = call <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) ret <16 x i8> %out } -define <8 x i8> @tbl4_8b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E) { +define <8 x i8> @tbl4_8b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E) sanitize_memory { ; CHECK-LABEL: define <8 x i8> @tbl4_8b -; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], <8 x i8> [[E:%.*]]) { +; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], <8 x i8> [[E:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 16) to ptr), align 4 +; CHECK-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 32) to ptr), align 4 +; CHECK-NEXT: [[TMP7:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 48) to ptr), align 4 +; CHECK-NEXT: [[TMP9:%.*]] = load <8 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 64) to ptr), align 4 ; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP11]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]] +; CHECK: 12: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 [[TMP2]]) #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 13: +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP14]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF1]] +; CHECK: 15: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 [[TMP4]]) #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 16: +; CHECK-NEXT: [[TMP17:%.*]] = bitcast <16 x i8> [[TMP5]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP17]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP18:%.*]], label [[TMP19:%.*]], !prof [[PROF1]] +; CHECK: 18: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 [[TMP6]]) #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 19: +; CHECK-NEXT: [[TMP20:%.*]] = bitcast <16 x i8> [[TMP7]] to i128 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP20]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP21:%.*]], label [[TMP22:%.*]], !prof [[PROF1]] +; CHECK: 21: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 [[TMP8]]) #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 22: +; CHECK-NEXT: [[TMP23:%.*]] = bitcast <8 x i8> [[TMP9]] to i64 +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i64 [[TMP23]], 0 +; CHECK-NEXT: br i1 [[_MSCMP4]], label [[TMP24:%.*]], label [[TMP25:%.*]], !prof [[PROF1]] +; CHECK: 24: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 [[TMP10]]) #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 25: ; CHECK-NEXT: [[OUT:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]], <16 x i8> [[D]], <8 x i8> [[E]]) ; CHECK-NEXT: store <8 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: store i32 0, ptr @__msan_retval_origin_tls, align 4 @@ -101,13 +269,39 @@ define <8 x i8> @tbl4_8b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, ret <8 x i8> %out } -define <16 x i8> @tbl4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) { +define <16 x i8> @tbl4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) sanitize_memory { ; CHECK-LABEL: define <16 x i8> @tbl4_16b -; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], <16 x i8> [[E:%.*]]) { +; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], <16 x i8> [[E:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 16) to ptr), align 4 +; CHECK-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 32) to ptr), align 4 +; CHECK-NEXT: [[TMP7:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 48) to ptr), align 4 +; CHECK-NEXT: [[TMP9:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 64) to ptr), align 4 ; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[TMP1]], [[TMP3]] +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 +; CHECK-NEXT: [[TMP12:%.*]] = icmp ne i128 [[TMP11]], 0 +; CHECK-NEXT: [[TMP13:%.*]] = select i1 [[TMP12]], i32 [[TMP4]], i32 [[TMP2]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i8> [[_MSPROP]], [[TMP5]] +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP5]] to i128 +; CHECK-NEXT: [[TMP15:%.*]] = icmp ne i128 [[TMP14]], 0 +; CHECK-NEXT: [[TMP16:%.*]] = select i1 [[TMP15]], i32 [[TMP6]], i32 [[TMP13]] +; CHECK-NEXT: [[_MSPROP2:%.*]] = or <16 x i8> [[_MSPROP1]], [[TMP7]] +; CHECK-NEXT: [[TMP17:%.*]] = bitcast <16 x i8> [[TMP7]] to i128 +; CHECK-NEXT: [[TMP18:%.*]] = icmp ne i128 [[TMP17]], 0 +; CHECK-NEXT: [[TMP19:%.*]] = select i1 [[TMP18]], i32 [[TMP8]], i32 [[TMP16]] +; CHECK-NEXT: [[_MSPROP3:%.*]] = or <16 x i8> [[_MSPROP2]], [[TMP9]] +; CHECK-NEXT: [[TMP20:%.*]] = bitcast <16 x i8> [[TMP9]] to i128 +; CHECK-NEXT: [[TMP21:%.*]] = icmp ne i128 [[TMP20]], 0 +; CHECK-NEXT: [[TMP22:%.*]] = select i1 [[TMP21]], i32 [[TMP10]], i32 [[TMP19]] ; CHECK-NEXT: [[OUT:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> [[E]]) -; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 -; CHECK-NEXT: store i32 0, ptr @__msan_retval_origin_tls, align 4 +; CHECK-NEXT: store <16 x i8> [[_MSPROP3]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store i32 [[TMP22]], ptr @__msan_retval_origin_tls, align 4 ; CHECK-NEXT: ret <16 x i8> [[OUT]] ; %out = call <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) @@ -116,11 +310,47 @@ define <16 x i8> @tbl4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> % -define <8 x i8> @shuffled_tbl2_to_tbl4_v8i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) { +define <8 x i8> @shuffled_tbl2_to_tbl4_v8i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) sanitize_memory { ; CHECK-LABEL: define <8 x i8> @shuffled_tbl2_to_tbl4_v8i8 -; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]]) { +; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 16) to ptr), align 4 +; CHECK-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 32) to ptr), align 4 +; CHECK-NEXT: [[TMP7:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 48) to ptr), align 4 ; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP9]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] +; CHECK: 10: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 [[TMP2]]) #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 11: +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP12]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]] +; CHECK: 13: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 [[TMP4]]) #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 14: ; CHECK-NEXT: [[T1:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[A]], <16 x i8> [[B]], <8 x i8> ) +; CHECK-NEXT: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP5]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP15]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP16:%.*]], label [[TMP17:%.*]], !prof [[PROF1]] +; CHECK: 16: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 [[TMP6]]) #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 17: +; CHECK-NEXT: [[TMP18:%.*]] = bitcast <16 x i8> [[TMP7]] to i128 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP18]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP19:%.*]], label [[TMP20:%.*]], !prof [[PROF1]] +; CHECK: 19: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 [[TMP8]]) #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 20: ; CHECK-NEXT: [[T2:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[C]], <16 x i8> [[D]], <8 x i8> ) ; CHECK-NEXT: [[S:%.*]] = shufflevector <8 x i8> [[T1]], <8 x i8> [[T2]], <8 x i32> ; CHECK-NEXT: store <8 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 @@ -135,15 +365,37 @@ define <8 x i8> @shuffled_tbl2_to_tbl4_v8i8(<16 x i8> %a, <16 x i8> %b, <16 x i8 -define <16 x i8> @shuffled_tbl2_to_tbl4(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) { +define <16 x i8> @shuffled_tbl2_to_tbl4(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) sanitize_memory { ; CHECK-LABEL: define <16 x i8> @shuffled_tbl2_to_tbl4 -; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]]) { +; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 16) to ptr), align 4 +; CHECK-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 32) to ptr), align 4 +; CHECK-NEXT: [[TMP7:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 48) to ptr), align 4 ; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[TMP1]], [[TMP3]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 +; CHECK-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0 +; CHECK-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i32 [[TMP4]], i32 [[TMP2]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i8> [[_MSPROP]], zeroinitializer ; CHECK-NEXT: [[T1:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> ) +; CHECK-NEXT: [[_MSPROP2:%.*]] = or <16 x i8> [[TMP5]], [[TMP7]] +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP7]] to i128 +; CHECK-NEXT: [[TMP13:%.*]] = icmp ne i128 [[TMP12]], 0 +; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP8]], i32 [[TMP6]] +; CHECK-NEXT: [[_MSPROP3:%.*]] = or <16 x i8> [[_MSPROP2]], zeroinitializer ; CHECK-NEXT: [[T2:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> ) +; CHECK-NEXT: [[_MSPROP4:%.*]] = shufflevector <16 x i8> [[_MSPROP1]], <16 x i8> [[_MSPROP3]], <16 x i32> +; CHECK-NEXT: [[TMP15:%.*]] = bitcast <16 x i8> [[_MSPROP3]] to i128 +; CHECK-NEXT: [[TMP16:%.*]] = icmp ne i128 [[TMP15]], 0 +; CHECK-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[TMP14]], i32 [[TMP11]] ; CHECK-NEXT: [[S:%.*]] = shufflevector <16 x i8> [[T1]], <16 x i8> [[T2]], <16 x i32> -; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 -; CHECK-NEXT: store i32 0, ptr @__msan_retval_origin_tls, align 4 +; CHECK-NEXT: store <16 x i8> [[_MSPROP4]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store i32 [[TMP17]], ptr @__msan_retval_origin_tls, align 4 ; CHECK-NEXT: ret <16 x i8> [[S]] ; %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> ) @@ -153,31 +405,90 @@ define <16 x i8> @shuffled_tbl2_to_tbl4(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c } -define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_first_mask(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d, i8 %v) { +define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_first_mask(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d, i8 %v) sanitize_memory { ; CHECK-LABEL: define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_first_mask -; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], i8 [[V:%.*]]) { +; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], i8 [[V:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 64) to ptr), align 4 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4 +; CHECK-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 16) to ptr), align 4 +; CHECK-NEXT: [[TMP7:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 32) to ptr), align 4 +; CHECK-NEXT: [[TMP9:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 48) to ptr), align 4 ; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <16 x i8> , i8 [[TMP1]], i32 0 +; CHECK-NEXT: [[TMP11:%.*]] = icmp ne i8 [[TMP1]], 0 +; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP2]], i32 0 ; CHECK-NEXT: [[INS_0:%.*]] = insertelement <16 x i8> poison, i8 [[V]], i32 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = insertelement <16 x i8> [[_MSPROP]], i8 [[TMP1]], i32 1 +; CHECK-NEXT: [[TMP13:%.*]] = icmp ne i8 [[TMP1]], 0 +; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP2]], i32 [[TMP12]] ; CHECK-NEXT: [[INS_1:%.*]] = insertelement <16 x i8> [[INS_0]], i8 [[V]], i32 1 +; CHECK-NEXT: [[_MSPROP2:%.*]] = insertelement <16 x i8> [[_MSPROP1]], i8 [[TMP1]], i32 2 +; CHECK-NEXT: [[TMP15:%.*]] = icmp ne i8 [[TMP1]], 0 +; CHECK-NEXT: [[TMP16:%.*]] = select i1 [[TMP15]], i32 [[TMP2]], i32 [[TMP14]] ; CHECK-NEXT: [[INS_2:%.*]] = insertelement <16 x i8> [[INS_1]], i8 [[V]], i32 2 +; CHECK-NEXT: [[_MSPROP3:%.*]] = insertelement <16 x i8> [[_MSPROP2]], i8 [[TMP1]], i32 3 +; CHECK-NEXT: [[TMP17:%.*]] = icmp ne i8 [[TMP1]], 0 +; CHECK-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], i32 [[TMP2]], i32 [[TMP16]] ; CHECK-NEXT: [[INS_3:%.*]] = insertelement <16 x i8> [[INS_2]], i8 [[V]], i32 3 +; CHECK-NEXT: [[_MSPROP4:%.*]] = insertelement <16 x i8> [[_MSPROP3]], i8 [[TMP1]], i32 4 +; CHECK-NEXT: [[TMP19:%.*]] = icmp ne i8 [[TMP1]], 0 +; CHECK-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], i32 [[TMP2]], i32 [[TMP18]] ; CHECK-NEXT: [[INS_4:%.*]] = insertelement <16 x i8> [[INS_3]], i8 [[V]], i32 4 +; CHECK-NEXT: [[_MSPROP5:%.*]] = insertelement <16 x i8> [[_MSPROP4]], i8 [[TMP1]], i32 5 +; CHECK-NEXT: [[TMP21:%.*]] = icmp ne i8 [[TMP1]], 0 +; CHECK-NEXT: [[TMP22:%.*]] = select i1 [[TMP21]], i32 [[TMP2]], i32 [[TMP20]] ; CHECK-NEXT: [[INS_5:%.*]] = insertelement <16 x i8> [[INS_4]], i8 [[V]], i32 5 +; CHECK-NEXT: [[_MSPROP6:%.*]] = insertelement <16 x i8> [[_MSPROP5]], i8 [[TMP1]], i32 6 +; CHECK-NEXT: [[TMP23:%.*]] = icmp ne i8 [[TMP1]], 0 +; CHECK-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[TMP2]], i32 [[TMP22]] ; CHECK-NEXT: [[INS_6:%.*]] = insertelement <16 x i8> [[INS_5]], i8 [[V]], i32 6 +; CHECK-NEXT: [[_MSPROP7:%.*]] = insertelement <16 x i8> [[_MSPROP6]], i8 [[TMP1]], i32 7 +; CHECK-NEXT: [[TMP25:%.*]] = icmp ne i8 [[TMP1]], 0 +; CHECK-NEXT: [[TMP26:%.*]] = select i1 [[TMP25]], i32 [[TMP2]], i32 [[TMP24]] ; CHECK-NEXT: [[INS_7:%.*]] = insertelement <16 x i8> [[INS_6]], i8 [[V]], i32 7 +; CHECK-NEXT: [[_MSPROP8:%.*]] = insertelement <16 x i8> [[_MSPROP7]], i8 0, i32 8 ; CHECK-NEXT: [[INS_8:%.*]] = insertelement <16 x i8> [[INS_7]], i8 -1, i32 8 +; CHECK-NEXT: [[_MSPROP9:%.*]] = insertelement <16 x i8> [[_MSPROP8]], i8 0, i32 9 ; CHECK-NEXT: [[INS_9:%.*]] = insertelement <16 x i8> [[INS_8]], i8 -1, i32 9 +; CHECK-NEXT: [[_MSPROP10:%.*]] = insertelement <16 x i8> [[_MSPROP9]], i8 0, i32 10 ; CHECK-NEXT: [[INS_10:%.*]] = insertelement <16 x i8> [[INS_9]], i8 -1, i32 10 +; CHECK-NEXT: [[_MSPROP11:%.*]] = insertelement <16 x i8> [[_MSPROP10]], i8 0, i32 11 ; CHECK-NEXT: [[INS_11:%.*]] = insertelement <16 x i8> [[INS_10]], i8 -1, i32 11 +; CHECK-NEXT: [[_MSPROP12:%.*]] = insertelement <16 x i8> [[_MSPROP11]], i8 0, i32 12 ; CHECK-NEXT: [[INS_12:%.*]] = insertelement <16 x i8> [[INS_11]], i8 -1, i32 12 +; CHECK-NEXT: [[_MSPROP13:%.*]] = insertelement <16 x i8> [[_MSPROP12]], i8 0, i32 13 ; CHECK-NEXT: [[INS_13:%.*]] = insertelement <16 x i8> [[INS_12]], i8 -1, i32 13 +; CHECK-NEXT: [[_MSPROP14:%.*]] = insertelement <16 x i8> [[_MSPROP13]], i8 0, i32 14 ; CHECK-NEXT: [[INS_14:%.*]] = insertelement <16 x i8> [[INS_13]], i8 -1, i32 14 +; CHECK-NEXT: [[_MSPROP15:%.*]] = insertelement <16 x i8> [[_MSPROP14]], i8 0, i32 15 ; CHECK-NEXT: [[INS_15:%.*]] = insertelement <16 x i8> [[INS_14]], i8 -1, i32 15 +; CHECK-NEXT: [[_MSPROP16:%.*]] = or <16 x i8> [[TMP3]], [[TMP5]] +; CHECK-NEXT: [[TMP27:%.*]] = bitcast <16 x i8> [[TMP5]] to i128 +; CHECK-NEXT: [[TMP28:%.*]] = icmp ne i128 [[TMP27]], 0 +; CHECK-NEXT: [[TMP29:%.*]] = select i1 [[TMP28]], i32 [[TMP6]], i32 [[TMP4]] +; CHECK-NEXT: [[_MSPROP17:%.*]] = or <16 x i8> [[_MSPROP16]], [[_MSPROP15]] +; CHECK-NEXT: [[TMP30:%.*]] = bitcast <16 x i8> [[_MSPROP15]] to i128 +; CHECK-NEXT: [[TMP31:%.*]] = icmp ne i128 [[TMP30]], 0 +; CHECK-NEXT: [[TMP32:%.*]] = select i1 [[TMP31]], i32 [[TMP26]], i32 [[TMP29]] ; CHECK-NEXT: [[T1:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[INS_15]]) +; CHECK-NEXT: [[_MSPROP18:%.*]] = or <16 x i8> [[TMP7]], [[TMP9]] +; CHECK-NEXT: [[TMP33:%.*]] = bitcast <16 x i8> [[TMP9]] to i128 +; CHECK-NEXT: [[TMP34:%.*]] = icmp ne i128 [[TMP33]], 0 +; CHECK-NEXT: [[TMP35:%.*]] = select i1 [[TMP34]], i32 [[TMP10]], i32 [[TMP8]] +; CHECK-NEXT: [[_MSPROP19:%.*]] = or <16 x i8> [[_MSPROP18]], zeroinitializer ; CHECK-NEXT: [[T2:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> ) +; CHECK-NEXT: [[_MSPROP20:%.*]] = shufflevector <16 x i8> [[_MSPROP17]], <16 x i8> [[_MSPROP19]], <16 x i32> +; CHECK-NEXT: [[TMP36:%.*]] = bitcast <16 x i8> [[_MSPROP19]] to i128 +; CHECK-NEXT: [[TMP37:%.*]] = icmp ne i128 [[TMP36]], 0 +; CHECK-NEXT: [[TMP38:%.*]] = select i1 [[TMP37]], i32 [[TMP35]], i32 [[TMP32]] ; CHECK-NEXT: [[S:%.*]] = shufflevector <16 x i8> [[T1]], <16 x i8> [[T2]], <16 x i32> -; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 -; CHECK-NEXT: store i32 0, ptr @__msan_retval_origin_tls, align 4 +; CHECK-NEXT: store <16 x i8> [[_MSPROP20]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store i32 [[TMP38]], ptr @__msan_retval_origin_tls, align 4 ; CHECK-NEXT: ret <16 x i8> [[S]] ; %ins.0 = insertelement <16 x i8> poison, i8 %v, i32 0 @@ -203,9 +514,19 @@ define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_first_mask(<16 x i8> %a, <16 x } -define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_first_mask2(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d, i8 %v) { +define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_first_mask2(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d, i8 %v) sanitize_memory { ; CHECK-LABEL: define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_first_mask2 -; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], i8 [[V:%.*]]) { +; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], i8 [[V:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 64) to ptr), align 4 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4 +; CHECK-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 16) to ptr), align 4 +; CHECK-NEXT: [[TMP7:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 32) to ptr), align 4 +; CHECK-NEXT: [[TMP9:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 48) to ptr), align 4 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[INS_0:%.*]] = insertelement <16 x i8> poison, i8 1, i32 0 ; CHECK-NEXT: [[INS_1:%.*]] = insertelement <16 x i8> [[INS_0]], i8 1, i32 1 @@ -219,15 +540,42 @@ define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_first_mask2(<16 x i8> %a, <16 x ; CHECK-NEXT: [[INS_9:%.*]] = insertelement <16 x i8> [[INS_8]], i8 -1, i32 9 ; CHECK-NEXT: [[INS_10:%.*]] = insertelement <16 x i8> [[INS_9]], i8 -1, i32 10 ; CHECK-NEXT: [[INS_11:%.*]] = insertelement <16 x i8> [[INS_10]], i8 -1, i32 11 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <16 x i8> , i8 [[TMP1]], i32 12 +; CHECK-NEXT: [[TMP11:%.*]] = icmp ne i8 [[TMP1]], 0 +; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP2]], i32 0 ; CHECK-NEXT: [[INS_12:%.*]] = insertelement <16 x i8> [[INS_11]], i8 [[V]], i32 12 +; CHECK-NEXT: [[_MSPROP1:%.*]] = insertelement <16 x i8> [[_MSPROP]], i8 [[TMP1]], i32 13 +; CHECK-NEXT: [[TMP13:%.*]] = icmp ne i8 [[TMP1]], 0 +; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP2]], i32 [[TMP12]] ; CHECK-NEXT: [[INS_13:%.*]] = insertelement <16 x i8> [[INS_12]], i8 [[V]], i32 13 +; CHECK-NEXT: [[_MSPROP2:%.*]] = insertelement <16 x i8> [[_MSPROP1]], i8 0, i32 14 ; CHECK-NEXT: [[INS_14:%.*]] = insertelement <16 x i8> [[INS_13]], i8 -1, i32 14 +; CHECK-NEXT: [[_MSPROP3:%.*]] = insertelement <16 x i8> [[_MSPROP2]], i8 [[TMP1]], i32 15 +; CHECK-NEXT: [[TMP15:%.*]] = icmp ne i8 [[TMP1]], 0 +; CHECK-NEXT: [[TMP16:%.*]] = select i1 [[TMP15]], i32 [[TMP2]], i32 [[TMP14]] ; CHECK-NEXT: [[INS_15:%.*]] = insertelement <16 x i8> [[INS_14]], i8 [[V]], i32 15 +; CHECK-NEXT: [[_MSPROP4:%.*]] = or <16 x i8> [[TMP3]], [[TMP5]] +; CHECK-NEXT: [[TMP17:%.*]] = bitcast <16 x i8> [[TMP5]] to i128 +; CHECK-NEXT: [[TMP18:%.*]] = icmp ne i128 [[TMP17]], 0 +; CHECK-NEXT: [[TMP19:%.*]] = select i1 [[TMP18]], i32 [[TMP6]], i32 [[TMP4]] +; CHECK-NEXT: [[_MSPROP5:%.*]] = or <16 x i8> [[_MSPROP4]], [[_MSPROP3]] +; CHECK-NEXT: [[TMP20:%.*]] = bitcast <16 x i8> [[_MSPROP3]] to i128 +; CHECK-NEXT: [[TMP21:%.*]] = icmp ne i128 [[TMP20]], 0 +; CHECK-NEXT: [[TMP22:%.*]] = select i1 [[TMP21]], i32 [[TMP16]], i32 [[TMP19]] ; CHECK-NEXT: [[T1:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[INS_15]]) +; CHECK-NEXT: [[_MSPROP6:%.*]] = or <16 x i8> [[TMP7]], [[TMP9]] +; CHECK-NEXT: [[TMP23:%.*]] = bitcast <16 x i8> [[TMP9]] to i128 +; CHECK-NEXT: [[TMP24:%.*]] = icmp ne i128 [[TMP23]], 0 +; CHECK-NEXT: [[TMP25:%.*]] = select i1 [[TMP24]], i32 [[TMP10]], i32 [[TMP8]] +; CHECK-NEXT: [[_MSPROP7:%.*]] = or <16 x i8> [[_MSPROP6]], zeroinitializer ; CHECK-NEXT: [[T2:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> ) +; CHECK-NEXT: [[_MSPROP8:%.*]] = shufflevector <16 x i8> [[_MSPROP5]], <16 x i8> [[_MSPROP7]], <16 x i32> +; CHECK-NEXT: [[TMP26:%.*]] = bitcast <16 x i8> [[_MSPROP7]] to i128 +; CHECK-NEXT: [[TMP27:%.*]] = icmp ne i128 [[TMP26]], 0 +; CHECK-NEXT: [[TMP28:%.*]] = select i1 [[TMP27]], i32 [[TMP25]], i32 [[TMP22]] ; CHECK-NEXT: [[S:%.*]] = shufflevector <16 x i8> [[T1]], <16 x i8> [[T2]], <16 x i32> -; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 -; CHECK-NEXT: store i32 0, ptr @__msan_retval_origin_tls, align 4 +; CHECK-NEXT: store <16 x i8> [[_MSPROP8]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store i32 [[TMP28]], ptr @__msan_retval_origin_tls, align 4 ; CHECK-NEXT: ret <16 x i8> [[S]] ; %ins.0 = insertelement <16 x i8> poison, i8 1, i32 0 @@ -254,31 +602,90 @@ define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_first_mask2(<16 x i8> %a, <16 x -define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_second_mask(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d, i8 %v) { +define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_second_mask(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d, i8 %v) sanitize_memory { ; CHECK-LABEL: define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_second_mask -; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], i8 [[V:%.*]]) { +; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], i8 [[V:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 64) to ptr), align 4 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 32) to ptr), align 4 +; CHECK-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 48) to ptr), align 4 +; CHECK-NEXT: [[TMP7:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4 +; CHECK-NEXT: [[TMP9:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 16) to ptr), align 4 ; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <16 x i8> , i8 [[TMP1]], i32 0 +; CHECK-NEXT: [[TMP11:%.*]] = icmp ne i8 [[TMP1]], 0 +; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP2]], i32 0 ; CHECK-NEXT: [[INS_0:%.*]] = insertelement <16 x i8> poison, i8 [[V]], i32 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = insertelement <16 x i8> [[_MSPROP]], i8 [[TMP1]], i32 1 +; CHECK-NEXT: [[TMP13:%.*]] = icmp ne i8 [[TMP1]], 0 +; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP2]], i32 [[TMP12]] ; CHECK-NEXT: [[INS_1:%.*]] = insertelement <16 x i8> [[INS_0]], i8 [[V]], i32 1 +; CHECK-NEXT: [[_MSPROP2:%.*]] = insertelement <16 x i8> [[_MSPROP1]], i8 [[TMP1]], i32 2 +; CHECK-NEXT: [[TMP15:%.*]] = icmp ne i8 [[TMP1]], 0 +; CHECK-NEXT: [[TMP16:%.*]] = select i1 [[TMP15]], i32 [[TMP2]], i32 [[TMP14]] ; CHECK-NEXT: [[INS_2:%.*]] = insertelement <16 x i8> [[INS_1]], i8 [[V]], i32 2 +; CHECK-NEXT: [[_MSPROP3:%.*]] = insertelement <16 x i8> [[_MSPROP2]], i8 [[TMP1]], i32 3 +; CHECK-NEXT: [[TMP17:%.*]] = icmp ne i8 [[TMP1]], 0 +; CHECK-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], i32 [[TMP2]], i32 [[TMP16]] ; CHECK-NEXT: [[INS_3:%.*]] = insertelement <16 x i8> [[INS_2]], i8 [[V]], i32 3 +; CHECK-NEXT: [[_MSPROP4:%.*]] = insertelement <16 x i8> [[_MSPROP3]], i8 [[TMP1]], i32 4 +; CHECK-NEXT: [[TMP19:%.*]] = icmp ne i8 [[TMP1]], 0 +; CHECK-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], i32 [[TMP2]], i32 [[TMP18]] ; CHECK-NEXT: [[INS_4:%.*]] = insertelement <16 x i8> [[INS_3]], i8 [[V]], i32 4 +; CHECK-NEXT: [[_MSPROP5:%.*]] = insertelement <16 x i8> [[_MSPROP4]], i8 [[TMP1]], i32 5 +; CHECK-NEXT: [[TMP21:%.*]] = icmp ne i8 [[TMP1]], 0 +; CHECK-NEXT: [[TMP22:%.*]] = select i1 [[TMP21]], i32 [[TMP2]], i32 [[TMP20]] ; CHECK-NEXT: [[INS_5:%.*]] = insertelement <16 x i8> [[INS_4]], i8 [[V]], i32 5 +; CHECK-NEXT: [[_MSPROP6:%.*]] = insertelement <16 x i8> [[_MSPROP5]], i8 [[TMP1]], i32 6 +; CHECK-NEXT: [[TMP23:%.*]] = icmp ne i8 [[TMP1]], 0 +; CHECK-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[TMP2]], i32 [[TMP22]] ; CHECK-NEXT: [[INS_6:%.*]] = insertelement <16 x i8> [[INS_5]], i8 [[V]], i32 6 +; CHECK-NEXT: [[_MSPROP7:%.*]] = insertelement <16 x i8> [[_MSPROP6]], i8 [[TMP1]], i32 7 +; CHECK-NEXT: [[TMP25:%.*]] = icmp ne i8 [[TMP1]], 0 +; CHECK-NEXT: [[TMP26:%.*]] = select i1 [[TMP25]], i32 [[TMP2]], i32 [[TMP24]] ; CHECK-NEXT: [[INS_7:%.*]] = insertelement <16 x i8> [[INS_6]], i8 [[V]], i32 7 +; CHECK-NEXT: [[_MSPROP8:%.*]] = insertelement <16 x i8> [[_MSPROP7]], i8 0, i32 8 ; CHECK-NEXT: [[INS_8:%.*]] = insertelement <16 x i8> [[INS_7]], i8 -1, i32 8 +; CHECK-NEXT: [[_MSPROP9:%.*]] = insertelement <16 x i8> [[_MSPROP8]], i8 0, i32 9 ; CHECK-NEXT: [[INS_9:%.*]] = insertelement <16 x i8> [[INS_8]], i8 -1, i32 9 +; CHECK-NEXT: [[_MSPROP10:%.*]] = insertelement <16 x i8> [[_MSPROP9]], i8 0, i32 10 ; CHECK-NEXT: [[INS_10:%.*]] = insertelement <16 x i8> [[INS_9]], i8 -1, i32 10 +; CHECK-NEXT: [[_MSPROP11:%.*]] = insertelement <16 x i8> [[_MSPROP10]], i8 0, i32 11 ; CHECK-NEXT: [[INS_11:%.*]] = insertelement <16 x i8> [[INS_10]], i8 -1, i32 11 +; CHECK-NEXT: [[_MSPROP12:%.*]] = insertelement <16 x i8> [[_MSPROP11]], i8 0, i32 12 ; CHECK-NEXT: [[INS_12:%.*]] = insertelement <16 x i8> [[INS_11]], i8 -1, i32 12 +; CHECK-NEXT: [[_MSPROP13:%.*]] = insertelement <16 x i8> [[_MSPROP12]], i8 0, i32 13 ; CHECK-NEXT: [[INS_13:%.*]] = insertelement <16 x i8> [[INS_12]], i8 -1, i32 13 +; CHECK-NEXT: [[_MSPROP14:%.*]] = insertelement <16 x i8> [[_MSPROP13]], i8 0, i32 14 ; CHECK-NEXT: [[INS_14:%.*]] = insertelement <16 x i8> [[INS_13]], i8 -1, i32 14 +; CHECK-NEXT: [[_MSPROP15:%.*]] = insertelement <16 x i8> [[_MSPROP14]], i8 0, i32 15 ; CHECK-NEXT: [[INS_15:%.*]] = insertelement <16 x i8> [[INS_14]], i8 -1, i32 15 +; CHECK-NEXT: [[_MSPROP16:%.*]] = or <16 x i8> [[TMP3]], [[TMP5]] +; CHECK-NEXT: [[TMP27:%.*]] = bitcast <16 x i8> [[TMP5]] to i128 +; CHECK-NEXT: [[TMP28:%.*]] = icmp ne i128 [[TMP27]], 0 +; CHECK-NEXT: [[TMP29:%.*]] = select i1 [[TMP28]], i32 [[TMP6]], i32 [[TMP4]] +; CHECK-NEXT: [[_MSPROP17:%.*]] = or <16 x i8> [[_MSPROP16]], zeroinitializer ; CHECK-NEXT: [[T1:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> ) +; CHECK-NEXT: [[_MSPROP18:%.*]] = or <16 x i8> [[TMP7]], [[TMP9]] +; CHECK-NEXT: [[TMP30:%.*]] = bitcast <16 x i8> [[TMP9]] to i128 +; CHECK-NEXT: [[TMP31:%.*]] = icmp ne i128 [[TMP30]], 0 +; CHECK-NEXT: [[TMP32:%.*]] = select i1 [[TMP31]], i32 [[TMP10]], i32 [[TMP8]] +; CHECK-NEXT: [[_MSPROP19:%.*]] = or <16 x i8> [[_MSPROP18]], [[_MSPROP15]] +; CHECK-NEXT: [[TMP33:%.*]] = bitcast <16 x i8> [[_MSPROP15]] to i128 +; CHECK-NEXT: [[TMP34:%.*]] = icmp ne i128 [[TMP33]], 0 +; CHECK-NEXT: [[TMP35:%.*]] = select i1 [[TMP34]], i32 [[TMP26]], i32 [[TMP32]] ; CHECK-NEXT: [[T2:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[INS_15]]) +; CHECK-NEXT: [[_MSPROP20:%.*]] = shufflevector <16 x i8> [[_MSPROP17]], <16 x i8> [[_MSPROP19]], <16 x i32> +; CHECK-NEXT: [[TMP36:%.*]] = bitcast <16 x i8> [[_MSPROP19]] to i128 +; CHECK-NEXT: [[TMP37:%.*]] = icmp ne i128 [[TMP36]], 0 +; CHECK-NEXT: [[TMP38:%.*]] = select i1 [[TMP37]], i32 [[TMP35]], i32 [[TMP29]] ; CHECK-NEXT: [[S:%.*]] = shufflevector <16 x i8> [[T1]], <16 x i8> [[T2]], <16 x i32> -; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 -; CHECK-NEXT: store i32 0, ptr @__msan_retval_origin_tls, align 4 +; CHECK-NEXT: store <16 x i8> [[_MSPROP20]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store i32 [[TMP38]], ptr @__msan_retval_origin_tls, align 4 ; CHECK-NEXT: ret <16 x i8> [[S]] ; %ins.0 = insertelement <16 x i8> poison, i8 %v, i32 0 @@ -305,31 +712,94 @@ define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_second_mask(<16 x i8> %a, <16 x -define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_second_mask2(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d, i8 %v) { +define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_second_mask2(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d, i8 %v) sanitize_memory { ; CHECK-LABEL: define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_second_mask2 -; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], i8 [[V:%.*]]) { +; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], i8 [[V:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 64) to ptr), align 4 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 32) to ptr), align 4 +; CHECK-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 48) to ptr), align 4 +; CHECK-NEXT: [[TMP7:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4 +; CHECK-NEXT: [[TMP9:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 16) to ptr), align 4 ; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <16 x i8> , i8 [[TMP1]], i32 0 +; CHECK-NEXT: [[TMP11:%.*]] = icmp ne i8 [[TMP1]], 0 +; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP2]], i32 0 ; CHECK-NEXT: [[INS_0:%.*]] = insertelement <16 x i8> poison, i8 [[V]], i32 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = insertelement <16 x i8> [[_MSPROP]], i8 [[TMP1]], i32 1 +; CHECK-NEXT: [[TMP13:%.*]] = icmp ne i8 [[TMP1]], 0 +; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP2]], i32 [[TMP12]] ; CHECK-NEXT: [[INS_1:%.*]] = insertelement <16 x i8> [[INS_0]], i8 [[V]], i32 1 +; CHECK-NEXT: [[_MSPROP2:%.*]] = insertelement <16 x i8> [[_MSPROP1]], i8 [[TMP1]], i32 2 +; CHECK-NEXT: [[TMP15:%.*]] = icmp ne i8 [[TMP1]], 0 +; CHECK-NEXT: [[TMP16:%.*]] = select i1 [[TMP15]], i32 [[TMP2]], i32 [[TMP14]] ; CHECK-NEXT: [[INS_2:%.*]] = insertelement <16 x i8> [[INS_1]], i8 [[V]], i32 2 +; CHECK-NEXT: [[_MSPROP3:%.*]] = insertelement <16 x i8> [[_MSPROP2]], i8 [[TMP1]], i32 3 +; CHECK-NEXT: [[TMP17:%.*]] = icmp ne i8 [[TMP1]], 0 +; CHECK-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], i32 [[TMP2]], i32 [[TMP16]] ; CHECK-NEXT: [[INS_3:%.*]] = insertelement <16 x i8> [[INS_2]], i8 [[V]], i32 3 +; CHECK-NEXT: [[_MSPROP4:%.*]] = insertelement <16 x i8> [[_MSPROP3]], i8 [[TMP1]], i32 4 +; CHECK-NEXT: [[TMP19:%.*]] = icmp ne i8 [[TMP1]], 0 +; CHECK-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], i32 [[TMP2]], i32 [[TMP18]] ; CHECK-NEXT: [[INS_4:%.*]] = insertelement <16 x i8> [[INS_3]], i8 [[V]], i32 4 +; CHECK-NEXT: [[_MSPROP5:%.*]] = insertelement <16 x i8> [[_MSPROP4]], i8 [[TMP1]], i32 5 +; CHECK-NEXT: [[TMP21:%.*]] = icmp ne i8 [[TMP1]], 0 +; CHECK-NEXT: [[TMP22:%.*]] = select i1 [[TMP21]], i32 [[TMP2]], i32 [[TMP20]] ; CHECK-NEXT: [[INS_5:%.*]] = insertelement <16 x i8> [[INS_4]], i8 [[V]], i32 5 +; CHECK-NEXT: [[_MSPROP6:%.*]] = insertelement <16 x i8> [[_MSPROP5]], i8 [[TMP1]], i32 6 +; CHECK-NEXT: [[TMP23:%.*]] = icmp ne i8 [[TMP1]], 0 +; CHECK-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[TMP2]], i32 [[TMP22]] ; CHECK-NEXT: [[INS_6:%.*]] = insertelement <16 x i8> [[INS_5]], i8 [[V]], i32 6 +; CHECK-NEXT: [[_MSPROP7:%.*]] = insertelement <16 x i8> [[_MSPROP6]], i8 [[TMP1]], i32 7 +; CHECK-NEXT: [[TMP25:%.*]] = icmp ne i8 [[TMP1]], 0 +; CHECK-NEXT: [[TMP26:%.*]] = select i1 [[TMP25]], i32 [[TMP2]], i32 [[TMP24]] ; CHECK-NEXT: [[INS_7:%.*]] = insertelement <16 x i8> [[INS_6]], i8 [[V]], i32 7 +; CHECK-NEXT: [[_MSPROP8:%.*]] = insertelement <16 x i8> [[_MSPROP7]], i8 0, i32 8 ; CHECK-NEXT: [[INS_8:%.*]] = insertelement <16 x i8> [[INS_7]], i8 -1, i32 8 +; CHECK-NEXT: [[_MSPROP9:%.*]] = insertelement <16 x i8> [[_MSPROP8]], i8 0, i32 9 ; CHECK-NEXT: [[INS_9:%.*]] = insertelement <16 x i8> [[INS_8]], i8 -1, i32 9 +; CHECK-NEXT: [[_MSPROP10:%.*]] = insertelement <16 x i8> [[_MSPROP9]], i8 0, i32 10 ; CHECK-NEXT: [[INS_10:%.*]] = insertelement <16 x i8> [[INS_9]], i8 -1, i32 10 +; CHECK-NEXT: [[_MSPROP11:%.*]] = insertelement <16 x i8> [[_MSPROP10]], i8 0, i32 11 ; CHECK-NEXT: [[INS_11:%.*]] = insertelement <16 x i8> [[INS_10]], i8 -1, i32 11 +; CHECK-NEXT: [[_MSPROP12:%.*]] = insertelement <16 x i8> [[_MSPROP11]], i8 0, i32 12 ; CHECK-NEXT: [[INS_12:%.*]] = insertelement <16 x i8> [[INS_11]], i8 -1, i32 12 +; CHECK-NEXT: [[_MSPROP13:%.*]] = insertelement <16 x i8> [[_MSPROP12]], i8 0, i32 13 ; CHECK-NEXT: [[INS_13:%.*]] = insertelement <16 x i8> [[INS_12]], i8 -1, i32 13 +; CHECK-NEXT: [[_MSPROP14:%.*]] = insertelement <16 x i8> [[_MSPROP13]], i8 [[TMP1]], i32 14 +; CHECK-NEXT: [[TMP27:%.*]] = icmp ne i8 [[TMP1]], 0 +; CHECK-NEXT: [[TMP28:%.*]] = select i1 [[TMP27]], i32 [[TMP2]], i32 [[TMP26]] ; CHECK-NEXT: [[INS_14:%.*]] = insertelement <16 x i8> [[INS_13]], i8 [[V]], i32 14 +; CHECK-NEXT: [[_MSPROP15:%.*]] = insertelement <16 x i8> [[_MSPROP14]], i8 [[TMP1]], i32 15 +; CHECK-NEXT: [[TMP29:%.*]] = icmp ne i8 [[TMP1]], 0 +; CHECK-NEXT: [[TMP30:%.*]] = select i1 [[TMP29]], i32 [[TMP2]], i32 [[TMP28]] ; CHECK-NEXT: [[INS_15:%.*]] = insertelement <16 x i8> [[INS_14]], i8 [[V]], i32 15 +; CHECK-NEXT: [[_MSPROP16:%.*]] = or <16 x i8> [[TMP3]], [[TMP5]] +; CHECK-NEXT: [[TMP31:%.*]] = bitcast <16 x i8> [[TMP5]] to i128 +; CHECK-NEXT: [[TMP32:%.*]] = icmp ne i128 [[TMP31]], 0 +; CHECK-NEXT: [[TMP33:%.*]] = select i1 [[TMP32]], i32 [[TMP6]], i32 [[TMP4]] +; CHECK-NEXT: [[_MSPROP17:%.*]] = or <16 x i8> [[_MSPROP16]], zeroinitializer ; CHECK-NEXT: [[T1:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> ) +; CHECK-NEXT: [[_MSPROP18:%.*]] = or <16 x i8> [[TMP7]], [[TMP9]] +; CHECK-NEXT: [[TMP34:%.*]] = bitcast <16 x i8> [[TMP9]] to i128 +; CHECK-NEXT: [[TMP35:%.*]] = icmp ne i128 [[TMP34]], 0 +; CHECK-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], i32 [[TMP10]], i32 [[TMP8]] +; CHECK-NEXT: [[_MSPROP19:%.*]] = or <16 x i8> [[_MSPROP18]], [[_MSPROP15]] +; CHECK-NEXT: [[TMP37:%.*]] = bitcast <16 x i8> [[_MSPROP15]] to i128 +; CHECK-NEXT: [[TMP38:%.*]] = icmp ne i128 [[TMP37]], 0 +; CHECK-NEXT: [[TMP39:%.*]] = select i1 [[TMP38]], i32 [[TMP30]], i32 [[TMP36]] ; CHECK-NEXT: [[T2:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[INS_15]]) +; CHECK-NEXT: [[_MSPROP20:%.*]] = shufflevector <16 x i8> [[_MSPROP17]], <16 x i8> [[_MSPROP19]], <16 x i32> +; CHECK-NEXT: [[TMP40:%.*]] = bitcast <16 x i8> [[_MSPROP19]] to i128 +; CHECK-NEXT: [[TMP41:%.*]] = icmp ne i128 [[TMP40]], 0 +; CHECK-NEXT: [[TMP42:%.*]] = select i1 [[TMP41]], i32 [[TMP39]], i32 [[TMP33]] ; CHECK-NEXT: [[S:%.*]] = shufflevector <16 x i8> [[T1]], <16 x i8> [[T2]], <16 x i32> -; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 -; CHECK-NEXT: store i32 0, ptr @__msan_retval_origin_tls, align 4 +; CHECK-NEXT: store <16 x i8> [[_MSPROP20]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store i32 [[TMP42]], ptr @__msan_retval_origin_tls, align 4 ; CHECK-NEXT: ret <16 x i8> [[S]] ; %ins.0 = insertelement <16 x i8> poison, i8 %v, i32 0 @@ -356,15 +826,37 @@ define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_second_mask2(<16 x i8> %a, <16 -define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_shuffle(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) { +define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_shuffle(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) sanitize_memory { ; CHECK-LABEL: define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_shuffle -; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]]) { +; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 16) to ptr), align 4 +; CHECK-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 32) to ptr), align 4 +; CHECK-NEXT: [[TMP7:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 48) to ptr), align 4 ; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[TMP1]], [[TMP3]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 +; CHECK-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0 +; CHECK-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i32 [[TMP4]], i32 [[TMP2]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i8> [[_MSPROP]], zeroinitializer ; CHECK-NEXT: [[T1:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> ) +; CHECK-NEXT: [[_MSPROP2:%.*]] = or <16 x i8> [[TMP5]], [[TMP7]] +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP7]] to i128 +; CHECK-NEXT: [[TMP13:%.*]] = icmp ne i128 [[TMP12]], 0 +; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP8]], i32 [[TMP6]] +; CHECK-NEXT: [[_MSPROP3:%.*]] = or <16 x i8> [[_MSPROP2]], zeroinitializer ; CHECK-NEXT: [[T2:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> ) +; CHECK-NEXT: [[_MSPROP4:%.*]] = shufflevector <16 x i8> [[_MSPROP1]], <16 x i8> [[_MSPROP3]], <16 x i32> +; CHECK-NEXT: [[TMP15:%.*]] = bitcast <16 x i8> [[_MSPROP3]] to i128 +; CHECK-NEXT: [[TMP16:%.*]] = icmp ne i128 [[TMP15]], 0 +; CHECK-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[TMP14]], i32 [[TMP11]] ; CHECK-NEXT: [[S:%.*]] = shufflevector <16 x i8> [[T1]], <16 x i8> [[T2]], <16 x i32> -; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 -; CHECK-NEXT: store i32 0, ptr @__msan_retval_origin_tls, align 4 +; CHECK-NEXT: store <16 x i8> [[_MSPROP4]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store i32 [[TMP17]], ptr @__msan_retval_origin_tls, align 4 ; CHECK-NEXT: ret <16 x i8> [[S]] ; %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> ) @@ -375,15 +867,37 @@ define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_shuffle(<16 x i8> %a, <16 x i8> %b -define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_tbl2_mask1(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) { +define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_tbl2_mask1(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) sanitize_memory { ; CHECK-LABEL: define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_tbl2_mask1 -; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]]) { +; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 16) to ptr), align 4 +; CHECK-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 32) to ptr), align 4 +; CHECK-NEXT: [[TMP7:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 48) to ptr), align 4 ; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[TMP1]], [[TMP3]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 +; CHECK-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0 +; CHECK-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i32 [[TMP4]], i32 [[TMP2]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i8> [[_MSPROP]], zeroinitializer ; CHECK-NEXT: [[T1:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> ) +; CHECK-NEXT: [[_MSPROP2:%.*]] = or <16 x i8> [[TMP5]], [[TMP7]] +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP7]] to i128 +; CHECK-NEXT: [[TMP13:%.*]] = icmp ne i128 [[TMP12]], 0 +; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP8]], i32 [[TMP6]] +; CHECK-NEXT: [[_MSPROP3:%.*]] = or <16 x i8> [[_MSPROP2]], zeroinitializer ; CHECK-NEXT: [[T2:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> ) +; CHECK-NEXT: [[_MSPROP4:%.*]] = shufflevector <16 x i8> [[_MSPROP1]], <16 x i8> [[_MSPROP3]], <16 x i32> +; CHECK-NEXT: [[TMP15:%.*]] = bitcast <16 x i8> [[_MSPROP3]] to i128 +; CHECK-NEXT: [[TMP16:%.*]] = icmp ne i128 [[TMP15]], 0 +; CHECK-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[TMP14]], i32 [[TMP11]] ; CHECK-NEXT: [[S:%.*]] = shufflevector <16 x i8> [[T1]], <16 x i8> [[T2]], <16 x i32> -; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 -; CHECK-NEXT: store i32 0, ptr @__msan_retval_origin_tls, align 4 +; CHECK-NEXT: store <16 x i8> [[_MSPROP4]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store i32 [[TMP17]], ptr @__msan_retval_origin_tls, align 4 ; CHECK-NEXT: ret <16 x i8> [[S]] ; %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> ) @@ -394,15 +908,37 @@ define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_tbl2_mask1(<16 x i8> %a, <16 x i8> -define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_tbl2_mask2(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) { +define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_tbl2_mask2(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) sanitize_memory { ; CHECK-LABEL: define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_tbl2_mask2 -; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]]) { +; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 16) to ptr), align 4 +; CHECK-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 32) to ptr), align 4 +; CHECK-NEXT: [[TMP7:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 48) to ptr), align 4 ; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[TMP1]], [[TMP3]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 +; CHECK-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0 +; CHECK-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i32 [[TMP4]], i32 [[TMP2]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i8> [[_MSPROP]], zeroinitializer ; CHECK-NEXT: [[T1:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> ) +; CHECK-NEXT: [[_MSPROP2:%.*]] = or <16 x i8> [[TMP5]], [[TMP7]] +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP7]] to i128 +; CHECK-NEXT: [[TMP13:%.*]] = icmp ne i128 [[TMP12]], 0 +; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP8]], i32 [[TMP6]] +; CHECK-NEXT: [[_MSPROP3:%.*]] = or <16 x i8> [[_MSPROP2]], zeroinitializer ; CHECK-NEXT: [[T2:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> ) +; CHECK-NEXT: [[_MSPROP4:%.*]] = shufflevector <16 x i8> [[_MSPROP1]], <16 x i8> [[_MSPROP3]], <16 x i32> +; CHECK-NEXT: [[TMP15:%.*]] = bitcast <16 x i8> [[_MSPROP3]] to i128 +; CHECK-NEXT: [[TMP16:%.*]] = icmp ne i128 [[TMP15]], 0 +; CHECK-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[TMP14]], i32 [[TMP11]] ; CHECK-NEXT: [[S:%.*]] = shufflevector <16 x i8> [[T1]], <16 x i8> [[T2]], <16 x i32> -; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 -; CHECK-NEXT: store i32 0, ptr @__msan_retval_origin_tls, align 4 +; CHECK-NEXT: store <16 x i8> [[_MSPROP4]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store i32 [[TMP17]], ptr @__msan_retval_origin_tls, align 4 ; CHECK-NEXT: ret <16 x i8> [[S]] ; %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> ) @@ -411,19 +947,46 @@ define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_tbl2_mask2(<16 x i8> %a, <16 x i8> ret <16 x i8> %s } -declare <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8>, <8 x i8>) nounwind readnone -declare <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8>, <16 x i8>) nounwind readnone -declare <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone -declare <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone -declare <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone -declare <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone -declare <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone -declare <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone +declare <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8>, <8 x i8>) nounwind sanitize_memory readnone +declare <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8>, <16 x i8>) nounwind sanitize_memory readnone +declare <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8>, <16 x i8>, <8 x i8>) nounwind sanitize_memory readnone +declare <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) nounwind sanitize_memory readnone +declare <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind sanitize_memory readnone +declare <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind sanitize_memory readnone +declare <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind sanitize_memory readnone +declare <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind sanitize_memory readnone -define <8 x i8> @tbx1_8b(<8 x i8> %A, <16 x i8> %B, <8 x i8> %C) nounwind { +define <8 x i8> @tbx1_8b(<8 x i8> %A, <16 x i8> %B, <8 x i8> %C) nounwind sanitize_memory { ; CHECK-LABEL: define <8 x i8> @tbx1_8b ; CHECK-SAME: (<8 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <8 x i8> [[C:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 8) to ptr), align 4 +; CHECK-NEXT: [[TMP5:%.*]] = load <8 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8 +; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 24) to ptr), align 4 ; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP1]] to i64 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP7]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 [[TMP2]]) #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP10]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF1]] +; CHECK: 11: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 [[TMP4]]) #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 12: +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP5]] to i64 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP13]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP14:%.*]], label [[TMP15:%.*]], !prof [[PROF1]] +; CHECK: 14: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 [[TMP6]]) #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 15: ; CHECK-NEXT: [[OUT:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> [[A]], <16 x i8> [[B]], <8 x i8> [[C]]) ; CHECK-NEXT: store <8 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: store i32 0, ptr @__msan_retval_origin_tls, align 4 @@ -433,23 +996,73 @@ define <8 x i8> @tbx1_8b(<8 x i8> %A, <16 x i8> %B, <8 x i8> %C) nounwind { ret <8 x i8> %out } -define <16 x i8> @tbx1_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) nounwind { +define <16 x i8> @tbx1_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) nounwind sanitize_memory { ; CHECK-LABEL: define <16 x i8> @tbx1_16b ; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 16) to ptr), align 4 +; CHECK-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 32) to ptr), align 4 ; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[TMP1]], [[TMP3]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 +; CHECK-NEXT: [[TMP8:%.*]] = icmp ne i128 [[TMP7]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i32 [[TMP4]], i32 [[TMP2]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i8> [[_MSPROP]], [[TMP5]] +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to i128 +; CHECK-NEXT: [[TMP11:%.*]] = icmp ne i128 [[TMP10]], 0 +; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP6]], i32 [[TMP9]] ; CHECK-NEXT: [[OUT:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]]) -; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 -; CHECK-NEXT: store i32 0, ptr @__msan_retval_origin_tls, align 4 +; CHECK-NEXT: store <16 x i8> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store i32 [[TMP12]], ptr @__msan_retval_origin_tls, align 4 ; CHECK-NEXT: ret <16 x i8> [[OUT]] ; %out = call <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) ret <16 x i8> %out } -define <8 x i8> @tbx2_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D) { +define <8 x i8> @tbx2_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D) sanitize_memory { ; CHECK-LABEL: define <8 x i8> @tbx2_8b -; CHECK-SAME: (<8 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <8 x i8> [[D:%.*]]) { +; CHECK-SAME: (<8 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <8 x i8> [[D:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 8) to ptr), align 4 +; CHECK-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8 +; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 24) to ptr), align 4 +; CHECK-NEXT: [[TMP7:%.*]] = load <8 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8 +; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 40) to ptr), align 4 ; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP1]] to i64 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP9]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] +; CHECK: 10: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 [[TMP2]]) #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 11: +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP12]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]] +; CHECK: 13: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 [[TMP4]]) #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 14: +; CHECK-NEXT: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP5]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP15]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP16:%.*]], label [[TMP17:%.*]], !prof [[PROF1]] +; CHECK: 16: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 [[TMP6]]) #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 17: +; CHECK-NEXT: [[TMP18:%.*]] = bitcast <8 x i8> [[TMP7]] to i64 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP18]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP19:%.*]], label [[TMP20:%.*]], !prof [[PROF1]] +; CHECK: 19: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 [[TMP8]]) #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 20: ; CHECK-NEXT: [[OUT:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]], <8 x i8> [[D]]) ; CHECK-NEXT: store <8 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: store i32 0, ptr @__msan_retval_origin_tls, align 4 @@ -459,23 +1072,88 @@ define <8 x i8> @tbx2_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D) { ret <8 x i8> %out } -define <16 x i8> @tbx2_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) { +define <16 x i8> @tbx2_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) sanitize_memory { ; CHECK-LABEL: define <16 x i8> @tbx2_16b -; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]]) { +; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 16) to ptr), align 4 +; CHECK-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 32) to ptr), align 4 +; CHECK-NEXT: [[TMP7:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 48) to ptr), align 4 ; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[TMP1]], [[TMP3]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 +; CHECK-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0 +; CHECK-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i32 [[TMP4]], i32 [[TMP2]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i8> [[_MSPROP]], [[TMP5]] +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to i128 +; CHECK-NEXT: [[TMP13:%.*]] = icmp ne i128 [[TMP12]], 0 +; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP6]], i32 [[TMP11]] +; CHECK-NEXT: [[_MSPROP2:%.*]] = or <16 x i8> [[_MSPROP1]], [[TMP7]] +; CHECK-NEXT: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP7]] to i128 +; CHECK-NEXT: [[TMP16:%.*]] = icmp ne i128 [[TMP15]], 0 +; CHECK-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[TMP8]], i32 [[TMP14]] ; CHECK-NEXT: [[OUT:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]], <16 x i8> [[D]]) -; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 -; CHECK-NEXT: store i32 0, ptr @__msan_retval_origin_tls, align 4 +; CHECK-NEXT: store <16 x i8> [[_MSPROP2]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store i32 [[TMP17]], ptr @__msan_retval_origin_tls, align 4 ; CHECK-NEXT: ret <16 x i8> [[OUT]] ; %out = call <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) ret <16 x i8> %out } -define <8 x i8> @tbx3_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E) { +define <8 x i8> @tbx3_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E) sanitize_memory { ; CHECK-LABEL: define <8 x i8> @tbx3_8b -; CHECK-SAME: (<8 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], <8 x i8> [[E:%.*]]) { +; CHECK-SAME: (<8 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], <8 x i8> [[E:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 8) to ptr), align 4 +; CHECK-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8 +; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 24) to ptr), align 4 +; CHECK-NEXT: [[TMP7:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8 +; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 40) to ptr), align 4 +; CHECK-NEXT: [[TMP9:%.*]] = load <8 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 56) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 56) to ptr), align 4 ; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP1]] to i64 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP11]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]] +; CHECK: 12: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 [[TMP2]]) #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 13: +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP14]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF1]] +; CHECK: 15: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 [[TMP4]]) #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 16: +; CHECK-NEXT: [[TMP17:%.*]] = bitcast <16 x i8> [[TMP5]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP17]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP18:%.*]], label [[TMP19:%.*]], !prof [[PROF1]] +; CHECK: 18: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 [[TMP6]]) #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 19: +; CHECK-NEXT: [[TMP20:%.*]] = bitcast <16 x i8> [[TMP7]] to i128 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP20]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP21:%.*]], label [[TMP22:%.*]], !prof [[PROF1]] +; CHECK: 21: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 [[TMP8]]) #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 22: +; CHECK-NEXT: [[TMP23:%.*]] = bitcast <8 x i8> [[TMP9]] to i64 +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i64 [[TMP23]], 0 +; CHECK-NEXT: br i1 [[_MSCMP4]], label [[TMP24:%.*]], label [[TMP25:%.*]], !prof [[PROF1]] +; CHECK: 24: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 [[TMP10]]) #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 25: ; CHECK-NEXT: [[OUT:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(<8 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]], <16 x i8> [[D]], <8 x i8> [[E]]) ; CHECK-NEXT: store <8 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: store i32 0, ptr @__msan_retval_origin_tls, align 4 @@ -485,23 +1163,103 @@ define <8 x i8> @tbx3_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, ret <8 x i8> %out } -define <16 x i8> @tbx3_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) { +define <16 x i8> @tbx3_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) sanitize_memory { ; CHECK-LABEL: define <16 x i8> @tbx3_16b -; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], <16 x i8> [[E:%.*]]) { +; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], <16 x i8> [[E:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 16) to ptr), align 4 +; CHECK-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 32) to ptr), align 4 +; CHECK-NEXT: [[TMP7:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 48) to ptr), align 4 +; CHECK-NEXT: [[TMP9:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 64) to ptr), align 4 ; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[TMP1]], [[TMP3]] +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 +; CHECK-NEXT: [[TMP12:%.*]] = icmp ne i128 [[TMP11]], 0 +; CHECK-NEXT: [[TMP13:%.*]] = select i1 [[TMP12]], i32 [[TMP4]], i32 [[TMP2]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i8> [[_MSPROP]], [[TMP5]] +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP5]] to i128 +; CHECK-NEXT: [[TMP15:%.*]] = icmp ne i128 [[TMP14]], 0 +; CHECK-NEXT: [[TMP16:%.*]] = select i1 [[TMP15]], i32 [[TMP6]], i32 [[TMP13]] +; CHECK-NEXT: [[_MSPROP2:%.*]] = or <16 x i8> [[_MSPROP1]], [[TMP7]] +; CHECK-NEXT: [[TMP17:%.*]] = bitcast <16 x i8> [[TMP7]] to i128 +; CHECK-NEXT: [[TMP18:%.*]] = icmp ne i128 [[TMP17]], 0 +; CHECK-NEXT: [[TMP19:%.*]] = select i1 [[TMP18]], i32 [[TMP8]], i32 [[TMP16]] +; CHECK-NEXT: [[_MSPROP3:%.*]] = or <16 x i8> [[_MSPROP2]], [[TMP9]] +; CHECK-NEXT: [[TMP20:%.*]] = bitcast <16 x i8> [[TMP9]] to i128 +; CHECK-NEXT: [[TMP21:%.*]] = icmp ne i128 [[TMP20]], 0 +; CHECK-NEXT: [[TMP22:%.*]] = select i1 [[TMP21]], i32 [[TMP10]], i32 [[TMP19]] ; CHECK-NEXT: [[OUT:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> [[E]]) -; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 -; CHECK-NEXT: store i32 0, ptr @__msan_retval_origin_tls, align 4 +; CHECK-NEXT: store <16 x i8> [[_MSPROP3]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store i32 [[TMP22]], ptr @__msan_retval_origin_tls, align 4 ; CHECK-NEXT: ret <16 x i8> [[OUT]] ; %out = call <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) ret <16 x i8> %out } -define <8 x i8> @tbx4_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <8 x i8> %F) { +define <8 x i8> @tbx4_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <8 x i8> %F) sanitize_memory { ; CHECK-LABEL: define <8 x i8> @tbx4_8b -; CHECK-SAME: (<8 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], <16 x i8> [[E:%.*]], <8 x i8> [[F:%.*]]) { +; CHECK-SAME: (<8 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], <16 x i8> [[E:%.*]], <8 x i8> [[F:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 8) to ptr), align 4 +; CHECK-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8 +; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 24) to ptr), align 4 +; CHECK-NEXT: [[TMP7:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8 +; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 40) to ptr), align 4 +; CHECK-NEXT: [[TMP9:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 56) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 56) to ptr), align 4 +; CHECK-NEXT: [[TMP11:%.*]] = load <8 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 72) to ptr), align 4 ; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP1]] to i64 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP13]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP14:%.*]], label [[TMP15:%.*]], !prof [[PROF1]] +; CHECK: 14: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 [[TMP2]]) #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 15: +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP16]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP17:%.*]], label [[TMP18:%.*]], !prof [[PROF1]] +; CHECK: 17: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 [[TMP4]]) #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 18: +; CHECK-NEXT: [[TMP19:%.*]] = bitcast <16 x i8> [[TMP5]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP19]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP20:%.*]], label [[TMP21:%.*]], !prof [[PROF1]] +; CHECK: 20: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 [[TMP6]]) #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 21: +; CHECK-NEXT: [[TMP22:%.*]] = bitcast <16 x i8> [[TMP7]] to i128 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP22]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP23:%.*]], label [[TMP24:%.*]], !prof [[PROF1]] +; CHECK: 23: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 [[TMP8]]) #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 24: +; CHECK-NEXT: [[TMP25:%.*]] = bitcast <16 x i8> [[TMP9]] to i128 +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP25]], 0 +; CHECK-NEXT: br i1 [[_MSCMP4]], label [[TMP26:%.*]], label [[TMP27:%.*]], !prof [[PROF1]] +; CHECK: 26: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 [[TMP10]]) #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 27: +; CHECK-NEXT: [[TMP28:%.*]] = bitcast <8 x i8> [[TMP11]] to i64 +; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i64 [[TMP28]], 0 +; CHECK-NEXT: br i1 [[_MSCMP5]], label [[TMP29:%.*]], label [[TMP30:%.*]], !prof [[PROF1]] +; CHECK: 29: +; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 [[TMP12]]) #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 30: ; CHECK-NEXT: [[OUT:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> [[E]], <8 x i8> [[F]]) ; CHECK-NEXT: store <8 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: store i32 0, ptr @__msan_retval_origin_tls, align 4 @@ -511,24 +1269,56 @@ define <8 x i8> @tbx4_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, ret <8 x i8> %out } -define <16 x i8> @tbx4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <16 x i8> %F) { +define <16 x i8> @tbx4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <16 x i8> %F) sanitize_memory { ; CHECK-LABEL: define <16 x i8> @tbx4_16b -; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], <16 x i8> [[E:%.*]], <16 x i8> [[F:%.*]]) { +; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], <16 x i8> [[E:%.*]], <16 x i8> [[F:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 16) to ptr), align 4 +; CHECK-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 32) to ptr), align 4 +; CHECK-NEXT: [[TMP7:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 48) to ptr), align 4 +; CHECK-NEXT: [[TMP9:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 64) to ptr), align 4 +; CHECK-NEXT: [[TMP11:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8 +; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 80) to ptr), align 4 ; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[TMP1]], [[TMP3]] +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 +; CHECK-NEXT: [[TMP14:%.*]] = icmp ne i128 [[TMP13]], 0 +; CHECK-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[TMP4]], i32 [[TMP2]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i8> [[_MSPROP]], [[TMP5]] +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <16 x i8> [[TMP5]] to i128 +; CHECK-NEXT: [[TMP17:%.*]] = icmp ne i128 [[TMP16]], 0 +; CHECK-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], i32 [[TMP6]], i32 [[TMP15]] +; CHECK-NEXT: [[_MSPROP2:%.*]] = or <16 x i8> [[_MSPROP1]], [[TMP7]] +; CHECK-NEXT: [[TMP19:%.*]] = bitcast <16 x i8> [[TMP7]] to i128 +; CHECK-NEXT: [[TMP20:%.*]] = icmp ne i128 [[TMP19]], 0 +; CHECK-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], i32 [[TMP8]], i32 [[TMP18]] +; CHECK-NEXT: [[_MSPROP3:%.*]] = or <16 x i8> [[_MSPROP2]], [[TMP9]] +; CHECK-NEXT: [[TMP22:%.*]] = bitcast <16 x i8> [[TMP9]] to i128 +; CHECK-NEXT: [[TMP23:%.*]] = icmp ne i128 [[TMP22]], 0 +; CHECK-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[TMP10]], i32 [[TMP21]] +; CHECK-NEXT: [[_MSPROP4:%.*]] = or <16 x i8> [[_MSPROP3]], [[TMP11]] +; CHECK-NEXT: [[TMP25:%.*]] = bitcast <16 x i8> [[TMP11]] to i128 +; CHECK-NEXT: [[TMP26:%.*]] = icmp ne i128 [[TMP25]], 0 +; CHECK-NEXT: [[TMP27:%.*]] = select i1 [[TMP26]], i32 [[TMP12]], i32 [[TMP24]] ; CHECK-NEXT: [[OUT:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> [[E]], <16 x i8> [[F]]) -; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 -; CHECK-NEXT: store i32 0, ptr @__msan_retval_origin_tls, align 4 +; CHECK-NEXT: store <16 x i8> [[_MSPROP4]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store i32 [[TMP27]], ptr @__msan_retval_origin_tls, align 4 ; CHECK-NEXT: ret <16 x i8> [[OUT]] ; %out = call <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <16 x i8> %F) ret <16 x i8> %out } -declare <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8>, <16 x i8>, <8 x i8>) nounwind readnone -declare <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone -declare <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone -declare <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone -declare <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(<8 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone -declare <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone -declare <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone -declare <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone +declare <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8>, <16 x i8>, <8 x i8>) nounwind sanitize_memory readnone +declare <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) nounwind sanitize_memory readnone +declare <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind sanitize_memory readnone +declare <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind sanitize_memory readnone +declare <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(<8 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind sanitize_memory readnone +declare <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind sanitize_memory readnone +declare <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind sanitize_memory readnone +declare <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind sanitize_memory readnone From 52d9058cd9f472fb3945ca08731610ac39e15711 Mon Sep 17 00:00:00 2001 From: Thurston Dang Date: Thu, 31 Oct 2024 23:43:49 +0000 Subject: [PATCH 5/5] Remove origins test --- .../AArch64/neon_tbl_origins.ll | 1324 ----------------- 1 file changed, 1324 deletions(-) delete mode 100644 llvm/test/Instrumentation/MemorySanitizer/AArch64/neon_tbl_origins.ll diff --git a/llvm/test/Instrumentation/MemorySanitizer/AArch64/neon_tbl_origins.ll b/llvm/test/Instrumentation/MemorySanitizer/AArch64/neon_tbl_origins.ll deleted file mode 100644 index 034437a8d7beb..0000000000000 --- a/llvm/test/Instrumentation/MemorySanitizer/AArch64/neon_tbl_origins.ll +++ /dev/null @@ -1,1324 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --tool build/bin/opt --version 2 -; Test memory sanitizer instrumentation for Arm NEON tbl instructions. -; -; RUN: opt < %s -passes=msan -msan-track-origins=2 -S | FileCheck %s -; -; Forked from llvm/test/CodeGen/AArch64/arm64-tbl.ll - -target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" -target triple = "aarch64--linux-android9001" - -; ----------------------------------------------------------------------------------------------------------------------------------------------- - -define <8 x i8> @tbl1_8b(<16 x i8> %A, <8 x i8> %B) nounwind sanitize_memory { -; CHECK-LABEL: define <8 x i8> @tbl1_8b -; CHECK-SAME: (<16 x i8> [[A:%.*]], <8 x i8> [[B:%.*]]) #[[ATTR0:[0-9]+]] { -; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 -; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4 -; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 -; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 16) to ptr), align 4 -; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1:![0-9]+]] -; CHECK: 6: -; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 [[TMP2]]) #[[ATTR4:[0-9]+]] -; CHECK-NEXT: unreachable -; CHECK: 7: -; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP3]] to i64 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP8]], 0 -; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] -; CHECK: 9: -; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 [[TMP4]]) #[[ATTR4]] -; CHECK-NEXT: unreachable -; CHECK: 10: -; CHECK-NEXT: [[OUT:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[A]], <8 x i8> [[B]]) -; CHECK-NEXT: store <8 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 -; CHECK-NEXT: store i32 0, ptr @__msan_retval_origin_tls, align 4 -; CHECK-NEXT: ret <8 x i8> [[OUT]] -; - %out = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> %A, <8 x i8> %B) - ret <8 x i8> %out -} - -define <16 x i8> @tbl1_16b(<16 x i8> %A, <16 x i8> %B) nounwind sanitize_memory { -; CHECK-LABEL: define <16 x i8> @tbl1_16b -; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 -; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4 -; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 -; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 16) to ptr), align 4 -; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[TMP1]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 -; CHECK-NEXT: [[TMP6:%.*]] = icmp ne i128 [[TMP5]], 0 -; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP6]], i32 [[TMP4]], i32 [[TMP2]] -; CHECK-NEXT: [[OUT:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> [[A]], <16 x i8> [[B]]) -; CHECK-NEXT: store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 -; CHECK-NEXT: store i32 [[TMP7]], ptr @__msan_retval_origin_tls, align 4 -; CHECK-NEXT: ret <16 x i8> [[OUT]] -; - %out = call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> %A, <16 x i8> %B) - ret <16 x i8> %out -} - -define <8 x i8> @tbl2_8b(<16 x i8> %A, <16 x i8> %B, <8 x i8> %C) sanitize_memory { -; CHECK-LABEL: define <8 x i8> @tbl2_8b -; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <8 x i8> [[C:%.*]]) #[[ATTR1:[0-9]+]] { -; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 -; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4 -; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 -; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 16) to ptr), align 4 -; CHECK-NEXT: [[TMP5:%.*]] = load <8 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 -; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 32) to ptr), align 4 -; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP7]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] -; CHECK: 8: -; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 [[TMP2]]) #[[ATTR4]] -; CHECK-NEXT: unreachable -; CHECK: 9: -; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP10]], 0 -; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF1]] -; CHECK: 11: -; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 [[TMP4]]) #[[ATTR4]] -; CHECK-NEXT: unreachable -; CHECK: 12: -; CHECK-NEXT: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP5]] to i64 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP13]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP14:%.*]], label [[TMP15:%.*]], !prof [[PROF1]] -; CHECK: 14: -; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 [[TMP6]]) #[[ATTR4]] -; CHECK-NEXT: unreachable -; CHECK: 15: -; CHECK-NEXT: [[OUT:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[A]], <16 x i8> [[B]], <8 x i8> [[C]]) -; CHECK-NEXT: store <8 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 -; CHECK-NEXT: store i32 0, ptr @__msan_retval_origin_tls, align 4 -; CHECK-NEXT: ret <8 x i8> [[OUT]] -; - %out = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %A, <16 x i8> %B, <8 x i8> %C) - ret <8 x i8> %out -} - -define <16 x i8> @tbl2_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) sanitize_memory { -; CHECK-LABEL: define <16 x i8> @tbl2_16b -; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 -; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4 -; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 -; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 16) to ptr), align 4 -; CHECK-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 -; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 32) to ptr), align 4 -; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[TMP1]], [[TMP3]] -; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 -; CHECK-NEXT: [[TMP8:%.*]] = icmp ne i128 [[TMP7]], 0 -; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i32 [[TMP4]], i32 [[TMP2]] -; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i8> [[_MSPROP]], [[TMP5]] -; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to i128 -; CHECK-NEXT: [[TMP11:%.*]] = icmp ne i128 [[TMP10]], 0 -; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP6]], i32 [[TMP9]] -; CHECK-NEXT: [[OUT:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]]) -; CHECK-NEXT: store <16 x i8> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 -; CHECK-NEXT: store i32 [[TMP12]], ptr @__msan_retval_origin_tls, align 4 -; CHECK-NEXT: ret <16 x i8> [[OUT]] -; - %out = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) - ret <16 x i8> %out -} - -define <8 x i8> @tbl3_8b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D) sanitize_memory { -; CHECK-LABEL: define <8 x i8> @tbl3_8b -; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <8 x i8> [[D:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 -; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4 -; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 -; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 16) to ptr), align 4 -; CHECK-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 -; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 32) to ptr), align 4 -; CHECK-NEXT: [[TMP7:%.*]] = load <8 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 -; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 48) to ptr), align 4 -; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP9]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] -; CHECK: 10: -; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 [[TMP2]]) #[[ATTR4]] -; CHECK-NEXT: unreachable -; CHECK: 11: -; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP12]], 0 -; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]] -; CHECK: 13: -; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 [[TMP4]]) #[[ATTR4]] -; CHECK-NEXT: unreachable -; CHECK: 14: -; CHECK-NEXT: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP5]] to i128 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP15]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP16:%.*]], label [[TMP17:%.*]], !prof [[PROF1]] -; CHECK: 16: -; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 [[TMP6]]) #[[ATTR4]] -; CHECK-NEXT: unreachable -; CHECK: 17: -; CHECK-NEXT: [[TMP18:%.*]] = bitcast <8 x i8> [[TMP7]] to i64 -; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP18]], 0 -; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP19:%.*]], label [[TMP20:%.*]], !prof [[PROF1]] -; CHECK: 19: -; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 [[TMP8]]) #[[ATTR4]] -; CHECK-NEXT: unreachable -; CHECK: 20: -; CHECK-NEXT: [[OUT:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]], <8 x i8> [[D]]) -; CHECK-NEXT: store <8 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 -; CHECK-NEXT: store i32 0, ptr @__msan_retval_origin_tls, align 4 -; CHECK-NEXT: ret <8 x i8> [[OUT]] -; - %out = call <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D) - ret <8 x i8> %out -} - -define <16 x i8> @tbl3_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) sanitize_memory { -; CHECK-LABEL: define <16 x i8> @tbl3_16b -; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 -; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4 -; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 -; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 16) to ptr), align 4 -; CHECK-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 -; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 32) to ptr), align 4 -; CHECK-NEXT: [[TMP7:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 -; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 48) to ptr), align 4 -; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[TMP1]], [[TMP3]] -; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 -; CHECK-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0 -; CHECK-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i32 [[TMP4]], i32 [[TMP2]] -; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i8> [[_MSPROP]], [[TMP5]] -; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to i128 -; CHECK-NEXT: [[TMP13:%.*]] = icmp ne i128 [[TMP12]], 0 -; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP6]], i32 [[TMP11]] -; CHECK-NEXT: [[_MSPROP2:%.*]] = or <16 x i8> [[_MSPROP1]], [[TMP7]] -; CHECK-NEXT: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP7]] to i128 -; CHECK-NEXT: [[TMP16:%.*]] = icmp ne i128 [[TMP15]], 0 -; CHECK-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[TMP8]], i32 [[TMP14]] -; CHECK-NEXT: [[OUT:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]], <16 x i8> [[D]]) -; CHECK-NEXT: store <16 x i8> [[_MSPROP2]], ptr @__msan_retval_tls, align 8 -; CHECK-NEXT: store i32 [[TMP17]], ptr @__msan_retval_origin_tls, align 4 -; CHECK-NEXT: ret <16 x i8> [[OUT]] -; - %out = call <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) - ret <16 x i8> %out -} - -define <8 x i8> @tbl4_8b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E) sanitize_memory { -; CHECK-LABEL: define <8 x i8> @tbl4_8b -; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], <8 x i8> [[E:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 -; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4 -; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 -; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 16) to ptr), align 4 -; CHECK-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 -; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 32) to ptr), align 4 -; CHECK-NEXT: [[TMP7:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 -; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 48) to ptr), align 4 -; CHECK-NEXT: [[TMP9:%.*]] = load <8 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 -; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 64) to ptr), align 4 -; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP11]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]] -; CHECK: 12: -; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 [[TMP2]]) #[[ATTR4]] -; CHECK-NEXT: unreachable -; CHECK: 13: -; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP14]], 0 -; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF1]] -; CHECK: 15: -; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 [[TMP4]]) #[[ATTR4]] -; CHECK-NEXT: unreachable -; CHECK: 16: -; CHECK-NEXT: [[TMP17:%.*]] = bitcast <16 x i8> [[TMP5]] to i128 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP17]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP18:%.*]], label [[TMP19:%.*]], !prof [[PROF1]] -; CHECK: 18: -; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 [[TMP6]]) #[[ATTR4]] -; CHECK-NEXT: unreachable -; CHECK: 19: -; CHECK-NEXT: [[TMP20:%.*]] = bitcast <16 x i8> [[TMP7]] to i128 -; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP20]], 0 -; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP21:%.*]], label [[TMP22:%.*]], !prof [[PROF1]] -; CHECK: 21: -; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 [[TMP8]]) #[[ATTR4]] -; CHECK-NEXT: unreachable -; CHECK: 22: -; CHECK-NEXT: [[TMP23:%.*]] = bitcast <8 x i8> [[TMP9]] to i64 -; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i64 [[TMP23]], 0 -; CHECK-NEXT: br i1 [[_MSCMP4]], label [[TMP24:%.*]], label [[TMP25:%.*]], !prof [[PROF1]] -; CHECK: 24: -; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 [[TMP10]]) #[[ATTR4]] -; CHECK-NEXT: unreachable -; CHECK: 25: -; CHECK-NEXT: [[OUT:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]], <16 x i8> [[D]], <8 x i8> [[E]]) -; CHECK-NEXT: store <8 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 -; CHECK-NEXT: store i32 0, ptr @__msan_retval_origin_tls, align 4 -; CHECK-NEXT: ret <8 x i8> [[OUT]] -; - %out = call <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E) - ret <8 x i8> %out -} - -define <16 x i8> @tbl4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) sanitize_memory { -; CHECK-LABEL: define <16 x i8> @tbl4_16b -; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], <16 x i8> [[E:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 -; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4 -; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 -; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 16) to ptr), align 4 -; CHECK-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 -; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 32) to ptr), align 4 -; CHECK-NEXT: [[TMP7:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 -; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 48) to ptr), align 4 -; CHECK-NEXT: [[TMP9:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 -; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 64) to ptr), align 4 -; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[TMP1]], [[TMP3]] -; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 -; CHECK-NEXT: [[TMP12:%.*]] = icmp ne i128 [[TMP11]], 0 -; CHECK-NEXT: [[TMP13:%.*]] = select i1 [[TMP12]], i32 [[TMP4]], i32 [[TMP2]] -; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i8> [[_MSPROP]], [[TMP5]] -; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP5]] to i128 -; CHECK-NEXT: [[TMP15:%.*]] = icmp ne i128 [[TMP14]], 0 -; CHECK-NEXT: [[TMP16:%.*]] = select i1 [[TMP15]], i32 [[TMP6]], i32 [[TMP13]] -; CHECK-NEXT: [[_MSPROP2:%.*]] = or <16 x i8> [[_MSPROP1]], [[TMP7]] -; CHECK-NEXT: [[TMP17:%.*]] = bitcast <16 x i8> [[TMP7]] to i128 -; CHECK-NEXT: [[TMP18:%.*]] = icmp ne i128 [[TMP17]], 0 -; CHECK-NEXT: [[TMP19:%.*]] = select i1 [[TMP18]], i32 [[TMP8]], i32 [[TMP16]] -; CHECK-NEXT: [[_MSPROP3:%.*]] = or <16 x i8> [[_MSPROP2]], [[TMP9]] -; CHECK-NEXT: [[TMP20:%.*]] = bitcast <16 x i8> [[TMP9]] to i128 -; CHECK-NEXT: [[TMP21:%.*]] = icmp ne i128 [[TMP20]], 0 -; CHECK-NEXT: [[TMP22:%.*]] = select i1 [[TMP21]], i32 [[TMP10]], i32 [[TMP19]] -; CHECK-NEXT: [[OUT:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> [[E]]) -; CHECK-NEXT: store <16 x i8> [[_MSPROP3]], ptr @__msan_retval_tls, align 8 -; CHECK-NEXT: store i32 [[TMP22]], ptr @__msan_retval_origin_tls, align 4 -; CHECK-NEXT: ret <16 x i8> [[OUT]] -; - %out = call <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) - ret <16 x i8> %out -} - - - -define <8 x i8> @shuffled_tbl2_to_tbl4_v8i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) sanitize_memory { -; CHECK-LABEL: define <8 x i8> @shuffled_tbl2_to_tbl4_v8i8 -; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 -; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4 -; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 -; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 16) to ptr), align 4 -; CHECK-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 -; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 32) to ptr), align 4 -; CHECK-NEXT: [[TMP7:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 -; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 48) to ptr), align 4 -; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP9]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] -; CHECK: 10: -; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 [[TMP2]]) #[[ATTR4]] -; CHECK-NEXT: unreachable -; CHECK: 11: -; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP12]], 0 -; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]] -; CHECK: 13: -; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 [[TMP4]]) #[[ATTR4]] -; CHECK-NEXT: unreachable -; CHECK: 14: -; CHECK-NEXT: [[T1:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[A]], <16 x i8> [[B]], <8 x i8> ) -; CHECK-NEXT: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP5]] to i128 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP15]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP16:%.*]], label [[TMP17:%.*]], !prof [[PROF1]] -; CHECK: 16: -; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 [[TMP6]]) #[[ATTR4]] -; CHECK-NEXT: unreachable -; CHECK: 17: -; CHECK-NEXT: [[TMP18:%.*]] = bitcast <16 x i8> [[TMP7]] to i128 -; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP18]], 0 -; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP19:%.*]], label [[TMP20:%.*]], !prof [[PROF1]] -; CHECK: 19: -; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 [[TMP8]]) #[[ATTR4]] -; CHECK-NEXT: unreachable -; CHECK: 20: -; CHECK-NEXT: [[T2:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[C]], <16 x i8> [[D]], <8 x i8> ) -; CHECK-NEXT: [[S:%.*]] = shufflevector <8 x i8> [[T1]], <8 x i8> [[T2]], <8 x i32> -; CHECK-NEXT: store <8 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 -; CHECK-NEXT: store i32 0, ptr @__msan_retval_origin_tls, align 4 -; CHECK-NEXT: ret <8 x i8> [[S]] -; - %t1 = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %a, <16 x i8> %b, <8 x i8> ) - %t2 = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %c, <16 x i8> %d, <8 x i8> ) - %s = shufflevector <8 x i8> %t1, <8 x i8> %t2, <8 x i32> - ret <8 x i8> %s -} - - - -define <16 x i8> @shuffled_tbl2_to_tbl4(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) sanitize_memory { -; CHECK-LABEL: define <16 x i8> @shuffled_tbl2_to_tbl4 -; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 -; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4 -; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 -; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 16) to ptr), align 4 -; CHECK-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 -; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 32) to ptr), align 4 -; CHECK-NEXT: [[TMP7:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 -; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 48) to ptr), align 4 -; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[TMP1]], [[TMP3]] -; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 -; CHECK-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0 -; CHECK-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i32 [[TMP4]], i32 [[TMP2]] -; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i8> [[_MSPROP]], zeroinitializer -; CHECK-NEXT: [[T1:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> ) -; CHECK-NEXT: [[_MSPROP2:%.*]] = or <16 x i8> [[TMP5]], [[TMP7]] -; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP7]] to i128 -; CHECK-NEXT: [[TMP13:%.*]] = icmp ne i128 [[TMP12]], 0 -; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP8]], i32 [[TMP6]] -; CHECK-NEXT: [[_MSPROP3:%.*]] = or <16 x i8> [[_MSPROP2]], zeroinitializer -; CHECK-NEXT: [[T2:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> ) -; CHECK-NEXT: [[_MSPROP4:%.*]] = shufflevector <16 x i8> [[_MSPROP1]], <16 x i8> [[_MSPROP3]], <16 x i32> -; CHECK-NEXT: [[TMP15:%.*]] = bitcast <16 x i8> [[_MSPROP3]] to i128 -; CHECK-NEXT: [[TMP16:%.*]] = icmp ne i128 [[TMP15]], 0 -; CHECK-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[TMP14]], i32 [[TMP11]] -; CHECK-NEXT: [[S:%.*]] = shufflevector <16 x i8> [[T1]], <16 x i8> [[T2]], <16 x i32> -; CHECK-NEXT: store <16 x i8> [[_MSPROP4]], ptr @__msan_retval_tls, align 8 -; CHECK-NEXT: store i32 [[TMP17]], ptr @__msan_retval_origin_tls, align 4 -; CHECK-NEXT: ret <16 x i8> [[S]] -; - %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> ) - %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> ) - %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> - ret <16 x i8> %s -} - - -define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_first_mask(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d, i8 %v) sanitize_memory { -; CHECK-LABEL: define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_first_mask -; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], i8 [[V:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 -; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 64) to ptr), align 4 -; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 -; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4 -; CHECK-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 -; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 16) to ptr), align 4 -; CHECK-NEXT: [[TMP7:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 -; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 32) to ptr), align 4 -; CHECK-NEXT: [[TMP9:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 -; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 48) to ptr), align 4 -; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <16 x i8> , i8 [[TMP1]], i32 0 -; CHECK-NEXT: [[TMP11:%.*]] = icmp ne i8 [[TMP1]], 0 -; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP2]], i32 0 -; CHECK-NEXT: [[INS_0:%.*]] = insertelement <16 x i8> poison, i8 [[V]], i32 0 -; CHECK-NEXT: [[_MSPROP1:%.*]] = insertelement <16 x i8> [[_MSPROP]], i8 [[TMP1]], i32 1 -; CHECK-NEXT: [[TMP13:%.*]] = icmp ne i8 [[TMP1]], 0 -; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP2]], i32 [[TMP12]] -; CHECK-NEXT: [[INS_1:%.*]] = insertelement <16 x i8> [[INS_0]], i8 [[V]], i32 1 -; CHECK-NEXT: [[_MSPROP2:%.*]] = insertelement <16 x i8> [[_MSPROP1]], i8 [[TMP1]], i32 2 -; CHECK-NEXT: [[TMP15:%.*]] = icmp ne i8 [[TMP1]], 0 -; CHECK-NEXT: [[TMP16:%.*]] = select i1 [[TMP15]], i32 [[TMP2]], i32 [[TMP14]] -; CHECK-NEXT: [[INS_2:%.*]] = insertelement <16 x i8> [[INS_1]], i8 [[V]], i32 2 -; CHECK-NEXT: [[_MSPROP3:%.*]] = insertelement <16 x i8> [[_MSPROP2]], i8 [[TMP1]], i32 3 -; CHECK-NEXT: [[TMP17:%.*]] = icmp ne i8 [[TMP1]], 0 -; CHECK-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], i32 [[TMP2]], i32 [[TMP16]] -; CHECK-NEXT: [[INS_3:%.*]] = insertelement <16 x i8> [[INS_2]], i8 [[V]], i32 3 -; CHECK-NEXT: [[_MSPROP4:%.*]] = insertelement <16 x i8> [[_MSPROP3]], i8 [[TMP1]], i32 4 -; CHECK-NEXT: [[TMP19:%.*]] = icmp ne i8 [[TMP1]], 0 -; CHECK-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], i32 [[TMP2]], i32 [[TMP18]] -; CHECK-NEXT: [[INS_4:%.*]] = insertelement <16 x i8> [[INS_3]], i8 [[V]], i32 4 -; CHECK-NEXT: [[_MSPROP5:%.*]] = insertelement <16 x i8> [[_MSPROP4]], i8 [[TMP1]], i32 5 -; CHECK-NEXT: [[TMP21:%.*]] = icmp ne i8 [[TMP1]], 0 -; CHECK-NEXT: [[TMP22:%.*]] = select i1 [[TMP21]], i32 [[TMP2]], i32 [[TMP20]] -; CHECK-NEXT: [[INS_5:%.*]] = insertelement <16 x i8> [[INS_4]], i8 [[V]], i32 5 -; CHECK-NEXT: [[_MSPROP6:%.*]] = insertelement <16 x i8> [[_MSPROP5]], i8 [[TMP1]], i32 6 -; CHECK-NEXT: [[TMP23:%.*]] = icmp ne i8 [[TMP1]], 0 -; CHECK-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[TMP2]], i32 [[TMP22]] -; CHECK-NEXT: [[INS_6:%.*]] = insertelement <16 x i8> [[INS_5]], i8 [[V]], i32 6 -; CHECK-NEXT: [[_MSPROP7:%.*]] = insertelement <16 x i8> [[_MSPROP6]], i8 [[TMP1]], i32 7 -; CHECK-NEXT: [[TMP25:%.*]] = icmp ne i8 [[TMP1]], 0 -; CHECK-NEXT: [[TMP26:%.*]] = select i1 [[TMP25]], i32 [[TMP2]], i32 [[TMP24]] -; CHECK-NEXT: [[INS_7:%.*]] = insertelement <16 x i8> [[INS_6]], i8 [[V]], i32 7 -; CHECK-NEXT: [[_MSPROP8:%.*]] = insertelement <16 x i8> [[_MSPROP7]], i8 0, i32 8 -; CHECK-NEXT: [[INS_8:%.*]] = insertelement <16 x i8> [[INS_7]], i8 -1, i32 8 -; CHECK-NEXT: [[_MSPROP9:%.*]] = insertelement <16 x i8> [[_MSPROP8]], i8 0, i32 9 -; CHECK-NEXT: [[INS_9:%.*]] = insertelement <16 x i8> [[INS_8]], i8 -1, i32 9 -; CHECK-NEXT: [[_MSPROP10:%.*]] = insertelement <16 x i8> [[_MSPROP9]], i8 0, i32 10 -; CHECK-NEXT: [[INS_10:%.*]] = insertelement <16 x i8> [[INS_9]], i8 -1, i32 10 -; CHECK-NEXT: [[_MSPROP11:%.*]] = insertelement <16 x i8> [[_MSPROP10]], i8 0, i32 11 -; CHECK-NEXT: [[INS_11:%.*]] = insertelement <16 x i8> [[INS_10]], i8 -1, i32 11 -; CHECK-NEXT: [[_MSPROP12:%.*]] = insertelement <16 x i8> [[_MSPROP11]], i8 0, i32 12 -; CHECK-NEXT: [[INS_12:%.*]] = insertelement <16 x i8> [[INS_11]], i8 -1, i32 12 -; CHECK-NEXT: [[_MSPROP13:%.*]] = insertelement <16 x i8> [[_MSPROP12]], i8 0, i32 13 -; CHECK-NEXT: [[INS_13:%.*]] = insertelement <16 x i8> [[INS_12]], i8 -1, i32 13 -; CHECK-NEXT: [[_MSPROP14:%.*]] = insertelement <16 x i8> [[_MSPROP13]], i8 0, i32 14 -; CHECK-NEXT: [[INS_14:%.*]] = insertelement <16 x i8> [[INS_13]], i8 -1, i32 14 -; CHECK-NEXT: [[_MSPROP15:%.*]] = insertelement <16 x i8> [[_MSPROP14]], i8 0, i32 15 -; CHECK-NEXT: [[INS_15:%.*]] = insertelement <16 x i8> [[INS_14]], i8 -1, i32 15 -; CHECK-NEXT: [[_MSPROP16:%.*]] = or <16 x i8> [[TMP3]], [[TMP5]] -; CHECK-NEXT: [[TMP27:%.*]] = bitcast <16 x i8> [[TMP5]] to i128 -; CHECK-NEXT: [[TMP28:%.*]] = icmp ne i128 [[TMP27]], 0 -; CHECK-NEXT: [[TMP29:%.*]] = select i1 [[TMP28]], i32 [[TMP6]], i32 [[TMP4]] -; CHECK-NEXT: [[_MSPROP17:%.*]] = or <16 x i8> [[_MSPROP16]], [[_MSPROP15]] -; CHECK-NEXT: [[TMP30:%.*]] = bitcast <16 x i8> [[_MSPROP15]] to i128 -; CHECK-NEXT: [[TMP31:%.*]] = icmp ne i128 [[TMP30]], 0 -; CHECK-NEXT: [[TMP32:%.*]] = select i1 [[TMP31]], i32 [[TMP26]], i32 [[TMP29]] -; CHECK-NEXT: [[T1:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[INS_15]]) -; CHECK-NEXT: [[_MSPROP18:%.*]] = or <16 x i8> [[TMP7]], [[TMP9]] -; CHECK-NEXT: [[TMP33:%.*]] = bitcast <16 x i8> [[TMP9]] to i128 -; CHECK-NEXT: [[TMP34:%.*]] = icmp ne i128 [[TMP33]], 0 -; CHECK-NEXT: [[TMP35:%.*]] = select i1 [[TMP34]], i32 [[TMP10]], i32 [[TMP8]] -; CHECK-NEXT: [[_MSPROP19:%.*]] = or <16 x i8> [[_MSPROP18]], zeroinitializer -; CHECK-NEXT: [[T2:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> ) -; CHECK-NEXT: [[_MSPROP20:%.*]] = shufflevector <16 x i8> [[_MSPROP17]], <16 x i8> [[_MSPROP19]], <16 x i32> -; CHECK-NEXT: [[TMP36:%.*]] = bitcast <16 x i8> [[_MSPROP19]] to i128 -; CHECK-NEXT: [[TMP37:%.*]] = icmp ne i128 [[TMP36]], 0 -; CHECK-NEXT: [[TMP38:%.*]] = select i1 [[TMP37]], i32 [[TMP35]], i32 [[TMP32]] -; CHECK-NEXT: [[S:%.*]] = shufflevector <16 x i8> [[T1]], <16 x i8> [[T2]], <16 x i32> -; CHECK-NEXT: store <16 x i8> [[_MSPROP20]], ptr @__msan_retval_tls, align 8 -; CHECK-NEXT: store i32 [[TMP38]], ptr @__msan_retval_origin_tls, align 4 -; CHECK-NEXT: ret <16 x i8> [[S]] -; - %ins.0 = insertelement <16 x i8> poison, i8 %v, i32 0 - %ins.1 = insertelement <16 x i8> %ins.0, i8 %v, i32 1 - %ins.2 = insertelement <16 x i8> %ins.1, i8 %v, i32 2 - %ins.3 = insertelement <16 x i8> %ins.2, i8 %v, i32 3 - %ins.4 = insertelement <16 x i8> %ins.3, i8 %v, i32 4 - %ins.5 = insertelement <16 x i8> %ins.4, i8 %v, i32 5 - %ins.6 = insertelement <16 x i8> %ins.5, i8 %v, i32 6 - %ins.7 = insertelement <16 x i8> %ins.6, i8 %v, i32 7 - %ins.8 = insertelement <16 x i8> %ins.7, i8 -1, i32 8 - %ins.9 = insertelement <16 x i8> %ins.8, i8 -1, i32 9 - %ins.10 = insertelement <16 x i8> %ins.9, i8 -1, i32 10 - %ins.11 = insertelement <16 x i8> %ins.10, i8 -1, i32 11 - %ins.12 = insertelement <16 x i8> %ins.11, i8 -1, i32 12 - %ins.13 = insertelement <16 x i8> %ins.12, i8 -1, i32 13 - %ins.14 = insertelement <16 x i8> %ins.13, i8 -1, i32 14 - %ins.15 = insertelement <16 x i8> %ins.14, i8 -1, i32 15 - %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %ins.15) - %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> ) - %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> - ret <16 x i8> %s -} - - -define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_first_mask2(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d, i8 %v) sanitize_memory { -; CHECK-LABEL: define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_first_mask2 -; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], i8 [[V:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 -; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 64) to ptr), align 4 -; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 -; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4 -; CHECK-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 -; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 16) to ptr), align 4 -; CHECK-NEXT: [[TMP7:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 -; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 32) to ptr), align 4 -; CHECK-NEXT: [[TMP9:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 -; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 48) to ptr), align 4 -; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[INS_0:%.*]] = insertelement <16 x i8> poison, i8 1, i32 0 -; CHECK-NEXT: [[INS_1:%.*]] = insertelement <16 x i8> [[INS_0]], i8 1, i32 1 -; CHECK-NEXT: [[INS_2:%.*]] = insertelement <16 x i8> [[INS_1]], i8 1, i32 2 -; CHECK-NEXT: [[INS_3:%.*]] = insertelement <16 x i8> [[INS_2]], i8 1, i32 3 -; CHECK-NEXT: [[INS_4:%.*]] = insertelement <16 x i8> [[INS_3]], i8 1, i32 4 -; CHECK-NEXT: [[INS_5:%.*]] = insertelement <16 x i8> [[INS_4]], i8 1, i32 5 -; CHECK-NEXT: [[INS_6:%.*]] = insertelement <16 x i8> [[INS_5]], i8 1, i32 6 -; CHECK-NEXT: [[INS_7:%.*]] = insertelement <16 x i8> [[INS_6]], i8 1, i32 7 -; CHECK-NEXT: [[INS_8:%.*]] = insertelement <16 x i8> [[INS_7]], i8 -1, i32 8 -; CHECK-NEXT: [[INS_9:%.*]] = insertelement <16 x i8> [[INS_8]], i8 -1, i32 9 -; CHECK-NEXT: [[INS_10:%.*]] = insertelement <16 x i8> [[INS_9]], i8 -1, i32 10 -; CHECK-NEXT: [[INS_11:%.*]] = insertelement <16 x i8> [[INS_10]], i8 -1, i32 11 -; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <16 x i8> , i8 [[TMP1]], i32 12 -; CHECK-NEXT: [[TMP11:%.*]] = icmp ne i8 [[TMP1]], 0 -; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP2]], i32 0 -; CHECK-NEXT: [[INS_12:%.*]] = insertelement <16 x i8> [[INS_11]], i8 [[V]], i32 12 -; CHECK-NEXT: [[_MSPROP1:%.*]] = insertelement <16 x i8> [[_MSPROP]], i8 [[TMP1]], i32 13 -; CHECK-NEXT: [[TMP13:%.*]] = icmp ne i8 [[TMP1]], 0 -; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP2]], i32 [[TMP12]] -; CHECK-NEXT: [[INS_13:%.*]] = insertelement <16 x i8> [[INS_12]], i8 [[V]], i32 13 -; CHECK-NEXT: [[_MSPROP2:%.*]] = insertelement <16 x i8> [[_MSPROP1]], i8 0, i32 14 -; CHECK-NEXT: [[INS_14:%.*]] = insertelement <16 x i8> [[INS_13]], i8 -1, i32 14 -; CHECK-NEXT: [[_MSPROP3:%.*]] = insertelement <16 x i8> [[_MSPROP2]], i8 [[TMP1]], i32 15 -; CHECK-NEXT: [[TMP15:%.*]] = icmp ne i8 [[TMP1]], 0 -; CHECK-NEXT: [[TMP16:%.*]] = select i1 [[TMP15]], i32 [[TMP2]], i32 [[TMP14]] -; CHECK-NEXT: [[INS_15:%.*]] = insertelement <16 x i8> [[INS_14]], i8 [[V]], i32 15 -; CHECK-NEXT: [[_MSPROP4:%.*]] = or <16 x i8> [[TMP3]], [[TMP5]] -; CHECK-NEXT: [[TMP17:%.*]] = bitcast <16 x i8> [[TMP5]] to i128 -; CHECK-NEXT: [[TMP18:%.*]] = icmp ne i128 [[TMP17]], 0 -; CHECK-NEXT: [[TMP19:%.*]] = select i1 [[TMP18]], i32 [[TMP6]], i32 [[TMP4]] -; CHECK-NEXT: [[_MSPROP5:%.*]] = or <16 x i8> [[_MSPROP4]], [[_MSPROP3]] -; CHECK-NEXT: [[TMP20:%.*]] = bitcast <16 x i8> [[_MSPROP3]] to i128 -; CHECK-NEXT: [[TMP21:%.*]] = icmp ne i128 [[TMP20]], 0 -; CHECK-NEXT: [[TMP22:%.*]] = select i1 [[TMP21]], i32 [[TMP16]], i32 [[TMP19]] -; CHECK-NEXT: [[T1:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[INS_15]]) -; CHECK-NEXT: [[_MSPROP6:%.*]] = or <16 x i8> [[TMP7]], [[TMP9]] -; CHECK-NEXT: [[TMP23:%.*]] = bitcast <16 x i8> [[TMP9]] to i128 -; CHECK-NEXT: [[TMP24:%.*]] = icmp ne i128 [[TMP23]], 0 -; CHECK-NEXT: [[TMP25:%.*]] = select i1 [[TMP24]], i32 [[TMP10]], i32 [[TMP8]] -; CHECK-NEXT: [[_MSPROP7:%.*]] = or <16 x i8> [[_MSPROP6]], zeroinitializer -; CHECK-NEXT: [[T2:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> ) -; CHECK-NEXT: [[_MSPROP8:%.*]] = shufflevector <16 x i8> [[_MSPROP5]], <16 x i8> [[_MSPROP7]], <16 x i32> -; CHECK-NEXT: [[TMP26:%.*]] = bitcast <16 x i8> [[_MSPROP7]] to i128 -; CHECK-NEXT: [[TMP27:%.*]] = icmp ne i128 [[TMP26]], 0 -; CHECK-NEXT: [[TMP28:%.*]] = select i1 [[TMP27]], i32 [[TMP25]], i32 [[TMP22]] -; CHECK-NEXT: [[S:%.*]] = shufflevector <16 x i8> [[T1]], <16 x i8> [[T2]], <16 x i32> -; CHECK-NEXT: store <16 x i8> [[_MSPROP8]], ptr @__msan_retval_tls, align 8 -; CHECK-NEXT: store i32 [[TMP28]], ptr @__msan_retval_origin_tls, align 4 -; CHECK-NEXT: ret <16 x i8> [[S]] -; - %ins.0 = insertelement <16 x i8> poison, i8 1, i32 0 - %ins.1 = insertelement <16 x i8> %ins.0, i8 1, i32 1 - %ins.2 = insertelement <16 x i8> %ins.1, i8 1, i32 2 - %ins.3 = insertelement <16 x i8> %ins.2, i8 1, i32 3 - %ins.4 = insertelement <16 x i8> %ins.3, i8 1, i32 4 - %ins.5 = insertelement <16 x i8> %ins.4, i8 1, i32 5 - %ins.6 = insertelement <16 x i8> %ins.5, i8 1, i32 6 - %ins.7 = insertelement <16 x i8> %ins.6, i8 1, i32 7 - %ins.8 = insertelement <16 x i8> %ins.7, i8 -1, i32 8 - %ins.9 = insertelement <16 x i8> %ins.8, i8 -1, i32 9 - %ins.10 = insertelement <16 x i8> %ins.9, i8 -1, i32 10 - %ins.11 = insertelement <16 x i8> %ins.10, i8 -1, i32 11 - %ins.12 = insertelement <16 x i8> %ins.11, i8 %v, i32 12 - %ins.13 = insertelement <16 x i8> %ins.12, i8 %v, i32 13 - %ins.14 = insertelement <16 x i8> %ins.13, i8 -1, i32 14 - %ins.15 = insertelement <16 x i8> %ins.14, i8 %v, i32 15 - %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %ins.15) - %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> ) - %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> - ret <16 x i8> %s -} - - - -define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_second_mask(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d, i8 %v) sanitize_memory { -; CHECK-LABEL: define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_second_mask -; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], i8 [[V:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 -; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 64) to ptr), align 4 -; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 -; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 32) to ptr), align 4 -; CHECK-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 -; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 48) to ptr), align 4 -; CHECK-NEXT: [[TMP7:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 -; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4 -; CHECK-NEXT: [[TMP9:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 -; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 16) to ptr), align 4 -; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <16 x i8> , i8 [[TMP1]], i32 0 -; CHECK-NEXT: [[TMP11:%.*]] = icmp ne i8 [[TMP1]], 0 -; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP2]], i32 0 -; CHECK-NEXT: [[INS_0:%.*]] = insertelement <16 x i8> poison, i8 [[V]], i32 0 -; CHECK-NEXT: [[_MSPROP1:%.*]] = insertelement <16 x i8> [[_MSPROP]], i8 [[TMP1]], i32 1 -; CHECK-NEXT: [[TMP13:%.*]] = icmp ne i8 [[TMP1]], 0 -; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP2]], i32 [[TMP12]] -; CHECK-NEXT: [[INS_1:%.*]] = insertelement <16 x i8> [[INS_0]], i8 [[V]], i32 1 -; CHECK-NEXT: [[_MSPROP2:%.*]] = insertelement <16 x i8> [[_MSPROP1]], i8 [[TMP1]], i32 2 -; CHECK-NEXT: [[TMP15:%.*]] = icmp ne i8 [[TMP1]], 0 -; CHECK-NEXT: [[TMP16:%.*]] = select i1 [[TMP15]], i32 [[TMP2]], i32 [[TMP14]] -; CHECK-NEXT: [[INS_2:%.*]] = insertelement <16 x i8> [[INS_1]], i8 [[V]], i32 2 -; CHECK-NEXT: [[_MSPROP3:%.*]] = insertelement <16 x i8> [[_MSPROP2]], i8 [[TMP1]], i32 3 -; CHECK-NEXT: [[TMP17:%.*]] = icmp ne i8 [[TMP1]], 0 -; CHECK-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], i32 [[TMP2]], i32 [[TMP16]] -; CHECK-NEXT: [[INS_3:%.*]] = insertelement <16 x i8> [[INS_2]], i8 [[V]], i32 3 -; CHECK-NEXT: [[_MSPROP4:%.*]] = insertelement <16 x i8> [[_MSPROP3]], i8 [[TMP1]], i32 4 -; CHECK-NEXT: [[TMP19:%.*]] = icmp ne i8 [[TMP1]], 0 -; CHECK-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], i32 [[TMP2]], i32 [[TMP18]] -; CHECK-NEXT: [[INS_4:%.*]] = insertelement <16 x i8> [[INS_3]], i8 [[V]], i32 4 -; CHECK-NEXT: [[_MSPROP5:%.*]] = insertelement <16 x i8> [[_MSPROP4]], i8 [[TMP1]], i32 5 -; CHECK-NEXT: [[TMP21:%.*]] = icmp ne i8 [[TMP1]], 0 -; CHECK-NEXT: [[TMP22:%.*]] = select i1 [[TMP21]], i32 [[TMP2]], i32 [[TMP20]] -; CHECK-NEXT: [[INS_5:%.*]] = insertelement <16 x i8> [[INS_4]], i8 [[V]], i32 5 -; CHECK-NEXT: [[_MSPROP6:%.*]] = insertelement <16 x i8> [[_MSPROP5]], i8 [[TMP1]], i32 6 -; CHECK-NEXT: [[TMP23:%.*]] = icmp ne i8 [[TMP1]], 0 -; CHECK-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[TMP2]], i32 [[TMP22]] -; CHECK-NEXT: [[INS_6:%.*]] = insertelement <16 x i8> [[INS_5]], i8 [[V]], i32 6 -; CHECK-NEXT: [[_MSPROP7:%.*]] = insertelement <16 x i8> [[_MSPROP6]], i8 [[TMP1]], i32 7 -; CHECK-NEXT: [[TMP25:%.*]] = icmp ne i8 [[TMP1]], 0 -; CHECK-NEXT: [[TMP26:%.*]] = select i1 [[TMP25]], i32 [[TMP2]], i32 [[TMP24]] -; CHECK-NEXT: [[INS_7:%.*]] = insertelement <16 x i8> [[INS_6]], i8 [[V]], i32 7 -; CHECK-NEXT: [[_MSPROP8:%.*]] = insertelement <16 x i8> [[_MSPROP7]], i8 0, i32 8 -; CHECK-NEXT: [[INS_8:%.*]] = insertelement <16 x i8> [[INS_7]], i8 -1, i32 8 -; CHECK-NEXT: [[_MSPROP9:%.*]] = insertelement <16 x i8> [[_MSPROP8]], i8 0, i32 9 -; CHECK-NEXT: [[INS_9:%.*]] = insertelement <16 x i8> [[INS_8]], i8 -1, i32 9 -; CHECK-NEXT: [[_MSPROP10:%.*]] = insertelement <16 x i8> [[_MSPROP9]], i8 0, i32 10 -; CHECK-NEXT: [[INS_10:%.*]] = insertelement <16 x i8> [[INS_9]], i8 -1, i32 10 -; CHECK-NEXT: [[_MSPROP11:%.*]] = insertelement <16 x i8> [[_MSPROP10]], i8 0, i32 11 -; CHECK-NEXT: [[INS_11:%.*]] = insertelement <16 x i8> [[INS_10]], i8 -1, i32 11 -; CHECK-NEXT: [[_MSPROP12:%.*]] = insertelement <16 x i8> [[_MSPROP11]], i8 0, i32 12 -; CHECK-NEXT: [[INS_12:%.*]] = insertelement <16 x i8> [[INS_11]], i8 -1, i32 12 -; CHECK-NEXT: [[_MSPROP13:%.*]] = insertelement <16 x i8> [[_MSPROP12]], i8 0, i32 13 -; CHECK-NEXT: [[INS_13:%.*]] = insertelement <16 x i8> [[INS_12]], i8 -1, i32 13 -; CHECK-NEXT: [[_MSPROP14:%.*]] = insertelement <16 x i8> [[_MSPROP13]], i8 0, i32 14 -; CHECK-NEXT: [[INS_14:%.*]] = insertelement <16 x i8> [[INS_13]], i8 -1, i32 14 -; CHECK-NEXT: [[_MSPROP15:%.*]] = insertelement <16 x i8> [[_MSPROP14]], i8 0, i32 15 -; CHECK-NEXT: [[INS_15:%.*]] = insertelement <16 x i8> [[INS_14]], i8 -1, i32 15 -; CHECK-NEXT: [[_MSPROP16:%.*]] = or <16 x i8> [[TMP3]], [[TMP5]] -; CHECK-NEXT: [[TMP27:%.*]] = bitcast <16 x i8> [[TMP5]] to i128 -; CHECK-NEXT: [[TMP28:%.*]] = icmp ne i128 [[TMP27]], 0 -; CHECK-NEXT: [[TMP29:%.*]] = select i1 [[TMP28]], i32 [[TMP6]], i32 [[TMP4]] -; CHECK-NEXT: [[_MSPROP17:%.*]] = or <16 x i8> [[_MSPROP16]], zeroinitializer -; CHECK-NEXT: [[T1:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> ) -; CHECK-NEXT: [[_MSPROP18:%.*]] = or <16 x i8> [[TMP7]], [[TMP9]] -; CHECK-NEXT: [[TMP30:%.*]] = bitcast <16 x i8> [[TMP9]] to i128 -; CHECK-NEXT: [[TMP31:%.*]] = icmp ne i128 [[TMP30]], 0 -; CHECK-NEXT: [[TMP32:%.*]] = select i1 [[TMP31]], i32 [[TMP10]], i32 [[TMP8]] -; CHECK-NEXT: [[_MSPROP19:%.*]] = or <16 x i8> [[_MSPROP18]], [[_MSPROP15]] -; CHECK-NEXT: [[TMP33:%.*]] = bitcast <16 x i8> [[_MSPROP15]] to i128 -; CHECK-NEXT: [[TMP34:%.*]] = icmp ne i128 [[TMP33]], 0 -; CHECK-NEXT: [[TMP35:%.*]] = select i1 [[TMP34]], i32 [[TMP26]], i32 [[TMP32]] -; CHECK-NEXT: [[T2:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[INS_15]]) -; CHECK-NEXT: [[_MSPROP20:%.*]] = shufflevector <16 x i8> [[_MSPROP17]], <16 x i8> [[_MSPROP19]], <16 x i32> -; CHECK-NEXT: [[TMP36:%.*]] = bitcast <16 x i8> [[_MSPROP19]] to i128 -; CHECK-NEXT: [[TMP37:%.*]] = icmp ne i128 [[TMP36]], 0 -; CHECK-NEXT: [[TMP38:%.*]] = select i1 [[TMP37]], i32 [[TMP35]], i32 [[TMP29]] -; CHECK-NEXT: [[S:%.*]] = shufflevector <16 x i8> [[T1]], <16 x i8> [[T2]], <16 x i32> -; CHECK-NEXT: store <16 x i8> [[_MSPROP20]], ptr @__msan_retval_tls, align 8 -; CHECK-NEXT: store i32 [[TMP38]], ptr @__msan_retval_origin_tls, align 4 -; CHECK-NEXT: ret <16 x i8> [[S]] -; - %ins.0 = insertelement <16 x i8> poison, i8 %v, i32 0 - %ins.1 = insertelement <16 x i8> %ins.0, i8 %v, i32 1 - %ins.2 = insertelement <16 x i8> %ins.1, i8 %v, i32 2 - %ins.3 = insertelement <16 x i8> %ins.2, i8 %v, i32 3 - %ins.4 = insertelement <16 x i8> %ins.3, i8 %v, i32 4 - %ins.5 = insertelement <16 x i8> %ins.4, i8 %v, i32 5 - %ins.6 = insertelement <16 x i8> %ins.5, i8 %v, i32 6 - %ins.7 = insertelement <16 x i8> %ins.6, i8 %v, i32 7 - %ins.8 = insertelement <16 x i8> %ins.7, i8 -1, i32 8 - %ins.9 = insertelement <16 x i8> %ins.8, i8 -1, i32 9 - %ins.10 = insertelement <16 x i8> %ins.9, i8 -1, i32 10 - %ins.11 = insertelement <16 x i8> %ins.10, i8 -1, i32 11 - %ins.12 = insertelement <16 x i8> %ins.11, i8 -1, i32 12 - %ins.13 = insertelement <16 x i8> %ins.12, i8 -1, i32 13 - %ins.14 = insertelement <16 x i8> %ins.13, i8 -1, i32 14 - %ins.15 = insertelement <16 x i8> %ins.14, i8 -1, i32 15 - %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> ) - %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %ins.15) - %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> - ret <16 x i8> %s -} - - - -define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_second_mask2(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d, i8 %v) sanitize_memory { -; CHECK-LABEL: define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_second_mask2 -; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], i8 [[V:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 -; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 64) to ptr), align 4 -; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 -; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 32) to ptr), align 4 -; CHECK-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 -; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 48) to ptr), align 4 -; CHECK-NEXT: [[TMP7:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 -; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4 -; CHECK-NEXT: [[TMP9:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 -; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 16) to ptr), align 4 -; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <16 x i8> , i8 [[TMP1]], i32 0 -; CHECK-NEXT: [[TMP11:%.*]] = icmp ne i8 [[TMP1]], 0 -; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP2]], i32 0 -; CHECK-NEXT: [[INS_0:%.*]] = insertelement <16 x i8> poison, i8 [[V]], i32 0 -; CHECK-NEXT: [[_MSPROP1:%.*]] = insertelement <16 x i8> [[_MSPROP]], i8 [[TMP1]], i32 1 -; CHECK-NEXT: [[TMP13:%.*]] = icmp ne i8 [[TMP1]], 0 -; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP2]], i32 [[TMP12]] -; CHECK-NEXT: [[INS_1:%.*]] = insertelement <16 x i8> [[INS_0]], i8 [[V]], i32 1 -; CHECK-NEXT: [[_MSPROP2:%.*]] = insertelement <16 x i8> [[_MSPROP1]], i8 [[TMP1]], i32 2 -; CHECK-NEXT: [[TMP15:%.*]] = icmp ne i8 [[TMP1]], 0 -; CHECK-NEXT: [[TMP16:%.*]] = select i1 [[TMP15]], i32 [[TMP2]], i32 [[TMP14]] -; CHECK-NEXT: [[INS_2:%.*]] = insertelement <16 x i8> [[INS_1]], i8 [[V]], i32 2 -; CHECK-NEXT: [[_MSPROP3:%.*]] = insertelement <16 x i8> [[_MSPROP2]], i8 [[TMP1]], i32 3 -; CHECK-NEXT: [[TMP17:%.*]] = icmp ne i8 [[TMP1]], 0 -; CHECK-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], i32 [[TMP2]], i32 [[TMP16]] -; CHECK-NEXT: [[INS_3:%.*]] = insertelement <16 x i8> [[INS_2]], i8 [[V]], i32 3 -; CHECK-NEXT: [[_MSPROP4:%.*]] = insertelement <16 x i8> [[_MSPROP3]], i8 [[TMP1]], i32 4 -; CHECK-NEXT: [[TMP19:%.*]] = icmp ne i8 [[TMP1]], 0 -; CHECK-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], i32 [[TMP2]], i32 [[TMP18]] -; CHECK-NEXT: [[INS_4:%.*]] = insertelement <16 x i8> [[INS_3]], i8 [[V]], i32 4 -; CHECK-NEXT: [[_MSPROP5:%.*]] = insertelement <16 x i8> [[_MSPROP4]], i8 [[TMP1]], i32 5 -; CHECK-NEXT: [[TMP21:%.*]] = icmp ne i8 [[TMP1]], 0 -; CHECK-NEXT: [[TMP22:%.*]] = select i1 [[TMP21]], i32 [[TMP2]], i32 [[TMP20]] -; CHECK-NEXT: [[INS_5:%.*]] = insertelement <16 x i8> [[INS_4]], i8 [[V]], i32 5 -; CHECK-NEXT: [[_MSPROP6:%.*]] = insertelement <16 x i8> [[_MSPROP5]], i8 [[TMP1]], i32 6 -; CHECK-NEXT: [[TMP23:%.*]] = icmp ne i8 [[TMP1]], 0 -; CHECK-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[TMP2]], i32 [[TMP22]] -; CHECK-NEXT: [[INS_6:%.*]] = insertelement <16 x i8> [[INS_5]], i8 [[V]], i32 6 -; CHECK-NEXT: [[_MSPROP7:%.*]] = insertelement <16 x i8> [[_MSPROP6]], i8 [[TMP1]], i32 7 -; CHECK-NEXT: [[TMP25:%.*]] = icmp ne i8 [[TMP1]], 0 -; CHECK-NEXT: [[TMP26:%.*]] = select i1 [[TMP25]], i32 [[TMP2]], i32 [[TMP24]] -; CHECK-NEXT: [[INS_7:%.*]] = insertelement <16 x i8> [[INS_6]], i8 [[V]], i32 7 -; CHECK-NEXT: [[_MSPROP8:%.*]] = insertelement <16 x i8> [[_MSPROP7]], i8 0, i32 8 -; CHECK-NEXT: [[INS_8:%.*]] = insertelement <16 x i8> [[INS_7]], i8 -1, i32 8 -; CHECK-NEXT: [[_MSPROP9:%.*]] = insertelement <16 x i8> [[_MSPROP8]], i8 0, i32 9 -; CHECK-NEXT: [[INS_9:%.*]] = insertelement <16 x i8> [[INS_8]], i8 -1, i32 9 -; CHECK-NEXT: [[_MSPROP10:%.*]] = insertelement <16 x i8> [[_MSPROP9]], i8 0, i32 10 -; CHECK-NEXT: [[INS_10:%.*]] = insertelement <16 x i8> [[INS_9]], i8 -1, i32 10 -; CHECK-NEXT: [[_MSPROP11:%.*]] = insertelement <16 x i8> [[_MSPROP10]], i8 0, i32 11 -; CHECK-NEXT: [[INS_11:%.*]] = insertelement <16 x i8> [[INS_10]], i8 -1, i32 11 -; CHECK-NEXT: [[_MSPROP12:%.*]] = insertelement <16 x i8> [[_MSPROP11]], i8 0, i32 12 -; CHECK-NEXT: [[INS_12:%.*]] = insertelement <16 x i8> [[INS_11]], i8 -1, i32 12 -; CHECK-NEXT: [[_MSPROP13:%.*]] = insertelement <16 x i8> [[_MSPROP12]], i8 0, i32 13 -; CHECK-NEXT: [[INS_13:%.*]] = insertelement <16 x i8> [[INS_12]], i8 -1, i32 13 -; CHECK-NEXT: [[_MSPROP14:%.*]] = insertelement <16 x i8> [[_MSPROP13]], i8 [[TMP1]], i32 14 -; CHECK-NEXT: [[TMP27:%.*]] = icmp ne i8 [[TMP1]], 0 -; CHECK-NEXT: [[TMP28:%.*]] = select i1 [[TMP27]], i32 [[TMP2]], i32 [[TMP26]] -; CHECK-NEXT: [[INS_14:%.*]] = insertelement <16 x i8> [[INS_13]], i8 [[V]], i32 14 -; CHECK-NEXT: [[_MSPROP15:%.*]] = insertelement <16 x i8> [[_MSPROP14]], i8 [[TMP1]], i32 15 -; CHECK-NEXT: [[TMP29:%.*]] = icmp ne i8 [[TMP1]], 0 -; CHECK-NEXT: [[TMP30:%.*]] = select i1 [[TMP29]], i32 [[TMP2]], i32 [[TMP28]] -; CHECK-NEXT: [[INS_15:%.*]] = insertelement <16 x i8> [[INS_14]], i8 [[V]], i32 15 -; CHECK-NEXT: [[_MSPROP16:%.*]] = or <16 x i8> [[TMP3]], [[TMP5]] -; CHECK-NEXT: [[TMP31:%.*]] = bitcast <16 x i8> [[TMP5]] to i128 -; CHECK-NEXT: [[TMP32:%.*]] = icmp ne i128 [[TMP31]], 0 -; CHECK-NEXT: [[TMP33:%.*]] = select i1 [[TMP32]], i32 [[TMP6]], i32 [[TMP4]] -; CHECK-NEXT: [[_MSPROP17:%.*]] = or <16 x i8> [[_MSPROP16]], zeroinitializer -; CHECK-NEXT: [[T1:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> ) -; CHECK-NEXT: [[_MSPROP18:%.*]] = or <16 x i8> [[TMP7]], [[TMP9]] -; CHECK-NEXT: [[TMP34:%.*]] = bitcast <16 x i8> [[TMP9]] to i128 -; CHECK-NEXT: [[TMP35:%.*]] = icmp ne i128 [[TMP34]], 0 -; CHECK-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], i32 [[TMP10]], i32 [[TMP8]] -; CHECK-NEXT: [[_MSPROP19:%.*]] = or <16 x i8> [[_MSPROP18]], [[_MSPROP15]] -; CHECK-NEXT: [[TMP37:%.*]] = bitcast <16 x i8> [[_MSPROP15]] to i128 -; CHECK-NEXT: [[TMP38:%.*]] = icmp ne i128 [[TMP37]], 0 -; CHECK-NEXT: [[TMP39:%.*]] = select i1 [[TMP38]], i32 [[TMP30]], i32 [[TMP36]] -; CHECK-NEXT: [[T2:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[INS_15]]) -; CHECK-NEXT: [[_MSPROP20:%.*]] = shufflevector <16 x i8> [[_MSPROP17]], <16 x i8> [[_MSPROP19]], <16 x i32> -; CHECK-NEXT: [[TMP40:%.*]] = bitcast <16 x i8> [[_MSPROP19]] to i128 -; CHECK-NEXT: [[TMP41:%.*]] = icmp ne i128 [[TMP40]], 0 -; CHECK-NEXT: [[TMP42:%.*]] = select i1 [[TMP41]], i32 [[TMP39]], i32 [[TMP33]] -; CHECK-NEXT: [[S:%.*]] = shufflevector <16 x i8> [[T1]], <16 x i8> [[T2]], <16 x i32> -; CHECK-NEXT: store <16 x i8> [[_MSPROP20]], ptr @__msan_retval_tls, align 8 -; CHECK-NEXT: store i32 [[TMP42]], ptr @__msan_retval_origin_tls, align 4 -; CHECK-NEXT: ret <16 x i8> [[S]] -; - %ins.0 = insertelement <16 x i8> poison, i8 %v, i32 0 - %ins.1 = insertelement <16 x i8> %ins.0, i8 %v, i32 1 - %ins.2 = insertelement <16 x i8> %ins.1, i8 %v, i32 2 - %ins.3 = insertelement <16 x i8> %ins.2, i8 %v, i32 3 - %ins.4 = insertelement <16 x i8> %ins.3, i8 %v, i32 4 - %ins.5 = insertelement <16 x i8> %ins.4, i8 %v, i32 5 - %ins.6 = insertelement <16 x i8> %ins.5, i8 %v, i32 6 - %ins.7 = insertelement <16 x i8> %ins.6, i8 %v, i32 7 - %ins.8 = insertelement <16 x i8> %ins.7, i8 -1, i32 8 - %ins.9 = insertelement <16 x i8> %ins.8, i8 -1, i32 9 - %ins.10 = insertelement <16 x i8> %ins.9, i8 -1, i32 10 - %ins.11 = insertelement <16 x i8> %ins.10, i8 -1, i32 11 - %ins.12 = insertelement <16 x i8> %ins.11, i8 -1, i32 12 - %ins.13 = insertelement <16 x i8> %ins.12, i8 -1, i32 13 - %ins.14 = insertelement <16 x i8> %ins.13, i8 %v, i32 14 - %ins.15 = insertelement <16 x i8> %ins.14, i8 %v, i32 15 - %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> ) - %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %ins.15) - %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> - ret <16 x i8> %s -} - - - -define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_shuffle(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) sanitize_memory { -; CHECK-LABEL: define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_shuffle -; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 -; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4 -; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 -; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 16) to ptr), align 4 -; CHECK-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 -; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 32) to ptr), align 4 -; CHECK-NEXT: [[TMP7:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 -; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 48) to ptr), align 4 -; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[TMP1]], [[TMP3]] -; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 -; CHECK-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0 -; CHECK-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i32 [[TMP4]], i32 [[TMP2]] -; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i8> [[_MSPROP]], zeroinitializer -; CHECK-NEXT: [[T1:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> ) -; CHECK-NEXT: [[_MSPROP2:%.*]] = or <16 x i8> [[TMP5]], [[TMP7]] -; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP7]] to i128 -; CHECK-NEXT: [[TMP13:%.*]] = icmp ne i128 [[TMP12]], 0 -; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP8]], i32 [[TMP6]] -; CHECK-NEXT: [[_MSPROP3:%.*]] = or <16 x i8> [[_MSPROP2]], zeroinitializer -; CHECK-NEXT: [[T2:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> ) -; CHECK-NEXT: [[_MSPROP4:%.*]] = shufflevector <16 x i8> [[_MSPROP1]], <16 x i8> [[_MSPROP3]], <16 x i32> -; CHECK-NEXT: [[TMP15:%.*]] = bitcast <16 x i8> [[_MSPROP3]] to i128 -; CHECK-NEXT: [[TMP16:%.*]] = icmp ne i128 [[TMP15]], 0 -; CHECK-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[TMP14]], i32 [[TMP11]] -; CHECK-NEXT: [[S:%.*]] = shufflevector <16 x i8> [[T1]], <16 x i8> [[T2]], <16 x i32> -; CHECK-NEXT: store <16 x i8> [[_MSPROP4]], ptr @__msan_retval_tls, align 8 -; CHECK-NEXT: store i32 [[TMP17]], ptr @__msan_retval_origin_tls, align 4 -; CHECK-NEXT: ret <16 x i8> [[S]] -; - %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> ) - %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> ) - %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> - ret <16 x i8> %s -} - - - -define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_tbl2_mask1(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) sanitize_memory { -; CHECK-LABEL: define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_tbl2_mask1 -; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 -; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4 -; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 -; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 16) to ptr), align 4 -; CHECK-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 -; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 32) to ptr), align 4 -; CHECK-NEXT: [[TMP7:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 -; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 48) to ptr), align 4 -; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[TMP1]], [[TMP3]] -; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 -; CHECK-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0 -; CHECK-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i32 [[TMP4]], i32 [[TMP2]] -; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i8> [[_MSPROP]], zeroinitializer -; CHECK-NEXT: [[T1:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> ) -; CHECK-NEXT: [[_MSPROP2:%.*]] = or <16 x i8> [[TMP5]], [[TMP7]] -; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP7]] to i128 -; CHECK-NEXT: [[TMP13:%.*]] = icmp ne i128 [[TMP12]], 0 -; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP8]], i32 [[TMP6]] -; CHECK-NEXT: [[_MSPROP3:%.*]] = or <16 x i8> [[_MSPROP2]], zeroinitializer -; CHECK-NEXT: [[T2:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> ) -; CHECK-NEXT: [[_MSPROP4:%.*]] = shufflevector <16 x i8> [[_MSPROP1]], <16 x i8> [[_MSPROP3]], <16 x i32> -; CHECK-NEXT: [[TMP15:%.*]] = bitcast <16 x i8> [[_MSPROP3]] to i128 -; CHECK-NEXT: [[TMP16:%.*]] = icmp ne i128 [[TMP15]], 0 -; CHECK-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[TMP14]], i32 [[TMP11]] -; CHECK-NEXT: [[S:%.*]] = shufflevector <16 x i8> [[T1]], <16 x i8> [[T2]], <16 x i32> -; CHECK-NEXT: store <16 x i8> [[_MSPROP4]], ptr @__msan_retval_tls, align 8 -; CHECK-NEXT: store i32 [[TMP17]], ptr @__msan_retval_origin_tls, align 4 -; CHECK-NEXT: ret <16 x i8> [[S]] -; - %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> ) - %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> ) - %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> - ret <16 x i8> %s -} - - - -define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_tbl2_mask2(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) sanitize_memory { -; CHECK-LABEL: define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_tbl2_mask2 -; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 -; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4 -; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 -; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 16) to ptr), align 4 -; CHECK-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 -; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 32) to ptr), align 4 -; CHECK-NEXT: [[TMP7:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 -; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 48) to ptr), align 4 -; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[TMP1]], [[TMP3]] -; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 -; CHECK-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0 -; CHECK-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i32 [[TMP4]], i32 [[TMP2]] -; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i8> [[_MSPROP]], zeroinitializer -; CHECK-NEXT: [[T1:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> ) -; CHECK-NEXT: [[_MSPROP2:%.*]] = or <16 x i8> [[TMP5]], [[TMP7]] -; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP7]] to i128 -; CHECK-NEXT: [[TMP13:%.*]] = icmp ne i128 [[TMP12]], 0 -; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP8]], i32 [[TMP6]] -; CHECK-NEXT: [[_MSPROP3:%.*]] = or <16 x i8> [[_MSPROP2]], zeroinitializer -; CHECK-NEXT: [[T2:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> ) -; CHECK-NEXT: [[_MSPROP4:%.*]] = shufflevector <16 x i8> [[_MSPROP1]], <16 x i8> [[_MSPROP3]], <16 x i32> -; CHECK-NEXT: [[TMP15:%.*]] = bitcast <16 x i8> [[_MSPROP3]] to i128 -; CHECK-NEXT: [[TMP16:%.*]] = icmp ne i128 [[TMP15]], 0 -; CHECK-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[TMP14]], i32 [[TMP11]] -; CHECK-NEXT: [[S:%.*]] = shufflevector <16 x i8> [[T1]], <16 x i8> [[T2]], <16 x i32> -; CHECK-NEXT: store <16 x i8> [[_MSPROP4]], ptr @__msan_retval_tls, align 8 -; CHECK-NEXT: store i32 [[TMP17]], ptr @__msan_retval_origin_tls, align 4 -; CHECK-NEXT: ret <16 x i8> [[S]] -; - %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> ) - %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> ) - %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> - ret <16 x i8> %s -} - -declare <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8>, <8 x i8>) nounwind sanitize_memory readnone -declare <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8>, <16 x i8>) nounwind sanitize_memory readnone -declare <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8>, <16 x i8>, <8 x i8>) nounwind sanitize_memory readnone -declare <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) nounwind sanitize_memory readnone -declare <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind sanitize_memory readnone -declare <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind sanitize_memory readnone -declare <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind sanitize_memory readnone -declare <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind sanitize_memory readnone - -define <8 x i8> @tbx1_8b(<8 x i8> %A, <16 x i8> %B, <8 x i8> %C) nounwind sanitize_memory { -; CHECK-LABEL: define <8 x i8> @tbx1_8b -; CHECK-SAME: (<8 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <8 x i8> [[C:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr @__msan_param_tls, align 8 -; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4 -; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 -; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 8) to ptr), align 4 -; CHECK-NEXT: [[TMP5:%.*]] = load <8 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8 -; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 24) to ptr), align 4 -; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP1]] to i64 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP7]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] -; CHECK: 8: -; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 [[TMP2]]) #[[ATTR4]] -; CHECK-NEXT: unreachable -; CHECK: 9: -; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP10]], 0 -; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF1]] -; CHECK: 11: -; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 [[TMP4]]) #[[ATTR4]] -; CHECK-NEXT: unreachable -; CHECK: 12: -; CHECK-NEXT: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP5]] to i64 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP13]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP14:%.*]], label [[TMP15:%.*]], !prof [[PROF1]] -; CHECK: 14: -; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 [[TMP6]]) #[[ATTR4]] -; CHECK-NEXT: unreachable -; CHECK: 15: -; CHECK-NEXT: [[OUT:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> [[A]], <16 x i8> [[B]], <8 x i8> [[C]]) -; CHECK-NEXT: store <8 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 -; CHECK-NEXT: store i32 0, ptr @__msan_retval_origin_tls, align 4 -; CHECK-NEXT: ret <8 x i8> [[OUT]] -; - %out = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> %A, <16 x i8> %B, <8 x i8> %C) - ret <8 x i8> %out -} - -define <16 x i8> @tbx1_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) nounwind sanitize_memory { -; CHECK-LABEL: define <16 x i8> @tbx1_16b -; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 -; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4 -; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 -; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 16) to ptr), align 4 -; CHECK-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 -; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 32) to ptr), align 4 -; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[TMP1]], [[TMP3]] -; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 -; CHECK-NEXT: [[TMP8:%.*]] = icmp ne i128 [[TMP7]], 0 -; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i32 [[TMP4]], i32 [[TMP2]] -; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i8> [[_MSPROP]], [[TMP5]] -; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to i128 -; CHECK-NEXT: [[TMP11:%.*]] = icmp ne i128 [[TMP10]], 0 -; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP6]], i32 [[TMP9]] -; CHECK-NEXT: [[OUT:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]]) -; CHECK-NEXT: store <16 x i8> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 -; CHECK-NEXT: store i32 [[TMP12]], ptr @__msan_retval_origin_tls, align 4 -; CHECK-NEXT: ret <16 x i8> [[OUT]] -; - %out = call <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) - ret <16 x i8> %out -} - -define <8 x i8> @tbx2_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D) sanitize_memory { -; CHECK-LABEL: define <8 x i8> @tbx2_8b -; CHECK-SAME: (<8 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <8 x i8> [[D:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr @__msan_param_tls, align 8 -; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4 -; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 -; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 8) to ptr), align 4 -; CHECK-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8 -; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 24) to ptr), align 4 -; CHECK-NEXT: [[TMP7:%.*]] = load <8 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8 -; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 40) to ptr), align 4 -; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP1]] to i64 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP9]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] -; CHECK: 10: -; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 [[TMP2]]) #[[ATTR4]] -; CHECK-NEXT: unreachable -; CHECK: 11: -; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP12]], 0 -; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]] -; CHECK: 13: -; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 [[TMP4]]) #[[ATTR4]] -; CHECK-NEXT: unreachable -; CHECK: 14: -; CHECK-NEXT: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP5]] to i128 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP15]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP16:%.*]], label [[TMP17:%.*]], !prof [[PROF1]] -; CHECK: 16: -; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 [[TMP6]]) #[[ATTR4]] -; CHECK-NEXT: unreachable -; CHECK: 17: -; CHECK-NEXT: [[TMP18:%.*]] = bitcast <8 x i8> [[TMP7]] to i64 -; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP18]], 0 -; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP19:%.*]], label [[TMP20:%.*]], !prof [[PROF1]] -; CHECK: 19: -; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 [[TMP8]]) #[[ATTR4]] -; CHECK-NEXT: unreachable -; CHECK: 20: -; CHECK-NEXT: [[OUT:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]], <8 x i8> [[D]]) -; CHECK-NEXT: store <8 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 -; CHECK-NEXT: store i32 0, ptr @__msan_retval_origin_tls, align 4 -; CHECK-NEXT: ret <8 x i8> [[OUT]] -; - %out = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D) - ret <8 x i8> %out -} - -define <16 x i8> @tbx2_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) sanitize_memory { -; CHECK-LABEL: define <16 x i8> @tbx2_16b -; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 -; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4 -; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 -; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 16) to ptr), align 4 -; CHECK-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 -; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 32) to ptr), align 4 -; CHECK-NEXT: [[TMP7:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 -; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 48) to ptr), align 4 -; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[TMP1]], [[TMP3]] -; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 -; CHECK-NEXT: [[TMP10:%.*]] = icmp ne i128 [[TMP9]], 0 -; CHECK-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i32 [[TMP4]], i32 [[TMP2]] -; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i8> [[_MSPROP]], [[TMP5]] -; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to i128 -; CHECK-NEXT: [[TMP13:%.*]] = icmp ne i128 [[TMP12]], 0 -; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP6]], i32 [[TMP11]] -; CHECK-NEXT: [[_MSPROP2:%.*]] = or <16 x i8> [[_MSPROP1]], [[TMP7]] -; CHECK-NEXT: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP7]] to i128 -; CHECK-NEXT: [[TMP16:%.*]] = icmp ne i128 [[TMP15]], 0 -; CHECK-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[TMP8]], i32 [[TMP14]] -; CHECK-NEXT: [[OUT:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]], <16 x i8> [[D]]) -; CHECK-NEXT: store <16 x i8> [[_MSPROP2]], ptr @__msan_retval_tls, align 8 -; CHECK-NEXT: store i32 [[TMP17]], ptr @__msan_retval_origin_tls, align 4 -; CHECK-NEXT: ret <16 x i8> [[OUT]] -; - %out = call <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) - ret <16 x i8> %out -} - -define <8 x i8> @tbx3_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E) sanitize_memory { -; CHECK-LABEL: define <8 x i8> @tbx3_8b -; CHECK-SAME: (<8 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], <8 x i8> [[E:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr @__msan_param_tls, align 8 -; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4 -; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 -; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 8) to ptr), align 4 -; CHECK-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8 -; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 24) to ptr), align 4 -; CHECK-NEXT: [[TMP7:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8 -; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 40) to ptr), align 4 -; CHECK-NEXT: [[TMP9:%.*]] = load <8 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 56) to ptr), align 8 -; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 56) to ptr), align 4 -; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP1]] to i64 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP11]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]] -; CHECK: 12: -; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 [[TMP2]]) #[[ATTR4]] -; CHECK-NEXT: unreachable -; CHECK: 13: -; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP14]], 0 -; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF1]] -; CHECK: 15: -; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 [[TMP4]]) #[[ATTR4]] -; CHECK-NEXT: unreachable -; CHECK: 16: -; CHECK-NEXT: [[TMP17:%.*]] = bitcast <16 x i8> [[TMP5]] to i128 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP17]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP18:%.*]], label [[TMP19:%.*]], !prof [[PROF1]] -; CHECK: 18: -; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 [[TMP6]]) #[[ATTR4]] -; CHECK-NEXT: unreachable -; CHECK: 19: -; CHECK-NEXT: [[TMP20:%.*]] = bitcast <16 x i8> [[TMP7]] to i128 -; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP20]], 0 -; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP21:%.*]], label [[TMP22:%.*]], !prof [[PROF1]] -; CHECK: 21: -; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 [[TMP8]]) #[[ATTR4]] -; CHECK-NEXT: unreachable -; CHECK: 22: -; CHECK-NEXT: [[TMP23:%.*]] = bitcast <8 x i8> [[TMP9]] to i64 -; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i64 [[TMP23]], 0 -; CHECK-NEXT: br i1 [[_MSCMP4]], label [[TMP24:%.*]], label [[TMP25:%.*]], !prof [[PROF1]] -; CHECK: 24: -; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 [[TMP10]]) #[[ATTR4]] -; CHECK-NEXT: unreachable -; CHECK: 25: -; CHECK-NEXT: [[OUT:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(<8 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]], <16 x i8> [[D]], <8 x i8> [[E]]) -; CHECK-NEXT: store <8 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 -; CHECK-NEXT: store i32 0, ptr @__msan_retval_origin_tls, align 4 -; CHECK-NEXT: ret <8 x i8> [[OUT]] -; - %out = call <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(< 8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E) - ret <8 x i8> %out -} - -define <16 x i8> @tbx3_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) sanitize_memory { -; CHECK-LABEL: define <16 x i8> @tbx3_16b -; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], <16 x i8> [[E:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 -; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4 -; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 -; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 16) to ptr), align 4 -; CHECK-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 -; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 32) to ptr), align 4 -; CHECK-NEXT: [[TMP7:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 -; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 48) to ptr), align 4 -; CHECK-NEXT: [[TMP9:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 -; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 64) to ptr), align 4 -; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[TMP1]], [[TMP3]] -; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 -; CHECK-NEXT: [[TMP12:%.*]] = icmp ne i128 [[TMP11]], 0 -; CHECK-NEXT: [[TMP13:%.*]] = select i1 [[TMP12]], i32 [[TMP4]], i32 [[TMP2]] -; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i8> [[_MSPROP]], [[TMP5]] -; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP5]] to i128 -; CHECK-NEXT: [[TMP15:%.*]] = icmp ne i128 [[TMP14]], 0 -; CHECK-NEXT: [[TMP16:%.*]] = select i1 [[TMP15]], i32 [[TMP6]], i32 [[TMP13]] -; CHECK-NEXT: [[_MSPROP2:%.*]] = or <16 x i8> [[_MSPROP1]], [[TMP7]] -; CHECK-NEXT: [[TMP17:%.*]] = bitcast <16 x i8> [[TMP7]] to i128 -; CHECK-NEXT: [[TMP18:%.*]] = icmp ne i128 [[TMP17]], 0 -; CHECK-NEXT: [[TMP19:%.*]] = select i1 [[TMP18]], i32 [[TMP8]], i32 [[TMP16]] -; CHECK-NEXT: [[_MSPROP3:%.*]] = or <16 x i8> [[_MSPROP2]], [[TMP9]] -; CHECK-NEXT: [[TMP20:%.*]] = bitcast <16 x i8> [[TMP9]] to i128 -; CHECK-NEXT: [[TMP21:%.*]] = icmp ne i128 [[TMP20]], 0 -; CHECK-NEXT: [[TMP22:%.*]] = select i1 [[TMP21]], i32 [[TMP10]], i32 [[TMP19]] -; CHECK-NEXT: [[OUT:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> [[E]]) -; CHECK-NEXT: store <16 x i8> [[_MSPROP3]], ptr @__msan_retval_tls, align 8 -; CHECK-NEXT: store i32 [[TMP22]], ptr @__msan_retval_origin_tls, align 4 -; CHECK-NEXT: ret <16 x i8> [[OUT]] -; - %out = call <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) - ret <16 x i8> %out -} - -define <8 x i8> @tbx4_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <8 x i8> %F) sanitize_memory { -; CHECK-LABEL: define <8 x i8> @tbx4_8b -; CHECK-SAME: (<8 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], <16 x i8> [[E:%.*]], <8 x i8> [[F:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr @__msan_param_tls, align 8 -; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4 -; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 -; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 8) to ptr), align 4 -; CHECK-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8 -; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 24) to ptr), align 4 -; CHECK-NEXT: [[TMP7:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8 -; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 40) to ptr), align 4 -; CHECK-NEXT: [[TMP9:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 56) to ptr), align 8 -; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 56) to ptr), align 4 -; CHECK-NEXT: [[TMP11:%.*]] = load <8 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 -; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 72) to ptr), align 4 -; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP1]] to i64 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP13]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP14:%.*]], label [[TMP15:%.*]], !prof [[PROF1]] -; CHECK: 14: -; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 [[TMP2]]) #[[ATTR4]] -; CHECK-NEXT: unreachable -; CHECK: 15: -; CHECK-NEXT: [[TMP16:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP16]], 0 -; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP17:%.*]], label [[TMP18:%.*]], !prof [[PROF1]] -; CHECK: 17: -; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 [[TMP4]]) #[[ATTR4]] -; CHECK-NEXT: unreachable -; CHECK: 18: -; CHECK-NEXT: [[TMP19:%.*]] = bitcast <16 x i8> [[TMP5]] to i128 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP19]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP20:%.*]], label [[TMP21:%.*]], !prof [[PROF1]] -; CHECK: 20: -; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 [[TMP6]]) #[[ATTR4]] -; CHECK-NEXT: unreachable -; CHECK: 21: -; CHECK-NEXT: [[TMP22:%.*]] = bitcast <16 x i8> [[TMP7]] to i128 -; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP22]], 0 -; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP23:%.*]], label [[TMP24:%.*]], !prof [[PROF1]] -; CHECK: 23: -; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 [[TMP8]]) #[[ATTR4]] -; CHECK-NEXT: unreachable -; CHECK: 24: -; CHECK-NEXT: [[TMP25:%.*]] = bitcast <16 x i8> [[TMP9]] to i128 -; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP25]], 0 -; CHECK-NEXT: br i1 [[_MSCMP4]], label [[TMP26:%.*]], label [[TMP27:%.*]], !prof [[PROF1]] -; CHECK: 26: -; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 [[TMP10]]) #[[ATTR4]] -; CHECK-NEXT: unreachable -; CHECK: 27: -; CHECK-NEXT: [[TMP28:%.*]] = bitcast <8 x i8> [[TMP11]] to i64 -; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i64 [[TMP28]], 0 -; CHECK-NEXT: br i1 [[_MSCMP5]], label [[TMP29:%.*]], label [[TMP30:%.*]], !prof [[PROF1]] -; CHECK: 29: -; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 [[TMP12]]) #[[ATTR4]] -; CHECK-NEXT: unreachable -; CHECK: 30: -; CHECK-NEXT: [[OUT:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> [[E]], <8 x i8> [[F]]) -; CHECK-NEXT: store <8 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 -; CHECK-NEXT: store i32 0, ptr @__msan_retval_origin_tls, align 4 -; CHECK-NEXT: ret <8 x i8> [[OUT]] -; - %out = call <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <8 x i8> %F) - ret <8 x i8> %out -} - -define <16 x i8> @tbx4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <16 x i8> %F) sanitize_memory { -; CHECK-LABEL: define <16 x i8> @tbx4_16b -; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], <16 x i8> [[E:%.*]], <16 x i8> [[F:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 -; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4 -; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 -; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 16) to ptr), align 4 -; CHECK-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 -; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 32) to ptr), align 4 -; CHECK-NEXT: [[TMP7:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 -; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 48) to ptr), align 4 -; CHECK-NEXT: [[TMP9:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 -; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 64) to ptr), align 4 -; CHECK-NEXT: [[TMP11:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8 -; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 80) to ptr), align 4 -; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[TMP1]], [[TMP3]] -; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 -; CHECK-NEXT: [[TMP14:%.*]] = icmp ne i128 [[TMP13]], 0 -; CHECK-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[TMP4]], i32 [[TMP2]] -; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i8> [[_MSPROP]], [[TMP5]] -; CHECK-NEXT: [[TMP16:%.*]] = bitcast <16 x i8> [[TMP5]] to i128 -; CHECK-NEXT: [[TMP17:%.*]] = icmp ne i128 [[TMP16]], 0 -; CHECK-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], i32 [[TMP6]], i32 [[TMP15]] -; CHECK-NEXT: [[_MSPROP2:%.*]] = or <16 x i8> [[_MSPROP1]], [[TMP7]] -; CHECK-NEXT: [[TMP19:%.*]] = bitcast <16 x i8> [[TMP7]] to i128 -; CHECK-NEXT: [[TMP20:%.*]] = icmp ne i128 [[TMP19]], 0 -; CHECK-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], i32 [[TMP8]], i32 [[TMP18]] -; CHECK-NEXT: [[_MSPROP3:%.*]] = or <16 x i8> [[_MSPROP2]], [[TMP9]] -; CHECK-NEXT: [[TMP22:%.*]] = bitcast <16 x i8> [[TMP9]] to i128 -; CHECK-NEXT: [[TMP23:%.*]] = icmp ne i128 [[TMP22]], 0 -; CHECK-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[TMP10]], i32 [[TMP21]] -; CHECK-NEXT: [[_MSPROP4:%.*]] = or <16 x i8> [[_MSPROP3]], [[TMP11]] -; CHECK-NEXT: [[TMP25:%.*]] = bitcast <16 x i8> [[TMP11]] to i128 -; CHECK-NEXT: [[TMP26:%.*]] = icmp ne i128 [[TMP25]], 0 -; CHECK-NEXT: [[TMP27:%.*]] = select i1 [[TMP26]], i32 [[TMP12]], i32 [[TMP24]] -; CHECK-NEXT: [[OUT:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> [[E]], <16 x i8> [[F]]) -; CHECK-NEXT: store <16 x i8> [[_MSPROP4]], ptr @__msan_retval_tls, align 8 -; CHECK-NEXT: store i32 [[TMP27]], ptr @__msan_retval_origin_tls, align 4 -; CHECK-NEXT: ret <16 x i8> [[OUT]] -; - %out = call <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <16 x i8> %F) - ret <16 x i8> %out -} - -declare <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8>, <16 x i8>, <8 x i8>) nounwind sanitize_memory readnone -declare <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) nounwind sanitize_memory readnone -declare <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind sanitize_memory readnone -declare <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind sanitize_memory readnone -declare <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(<8 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind sanitize_memory readnone -declare <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind sanitize_memory readnone -declare <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind sanitize_memory readnone -declare <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind sanitize_memory readnone