|
| 1 | +// RUN: %clang_cc1 -triple aarch64-none-linux-android24 -fclangir \ |
| 2 | +// RUN: -emit-cir -target-feature +neon %s -o %t.cir |
| 3 | +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s |
| 4 | +// RUN: %clang_cc1 -triple aarch64-none-linux-android24 -fclangir \ |
| 5 | +// RUN: -emit-llvm -target-feature +neon %s -o %t.ll |
| 6 | +// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s |
| 7 | + |
| 8 | +// This test file is similar to but not the same as |
| 9 | +// clang/test/CodeGen/aarch64-neon-vget.c |
| 10 | +// The difference is that this file only tests uses vset intrinsics, as we feel |
| 11 | +// it would be proper to have a separate test file testing vget intrinsics |
| 12 | +// with the file name aarch64-neon-vget.c |
| 13 | +// Also, for each integer type, we only test signed or unsigned, not both. |
| 14 | +// This is because integer types of the same size just use same intrinsic. |
| 15 | + |
| 16 | +// REQUIRES: aarch64-registered-target || arm-registered-target |
| 17 | +#include <arm_neon.h> |
| 18 | + |
| 19 | +uint8x8_t test_vset_lane_u8(uint8_t a, uint8x8_t b) { |
| 20 | + return vset_lane_u8(a, b, 7); |
| 21 | +} |
| 22 | + |
| 23 | +// CIR-LABEL: test_vset_lane_u8 |
| 24 | +// CIR: [[IDX:%.*]] = cir.const #cir.int<7> : !s32i loc(#loc7) |
| 25 | +// CIR: {{%.*}} = cir.vec.insert {{%.*}}, {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!s8i x 8> |
| 26 | + |
| 27 | +// LLVM: define dso_local <8 x i8> @test_vset_lane_u8(i8 [[A:%.*]], <8 x i8> [[B:%.*]]) |
| 28 | +// LLVM: [[A_ADR:%.*]] = alloca i8, i64 1, align 1 |
| 29 | +// LLVM: [[B_ADR:%.*]] = alloca <8 x i8>, i64 1, align 8 |
| 30 | +// LLVM: store i8 [[A]], ptr [[A_ADR]], align 1 |
| 31 | +// LLVM: store <8 x i8> [[B]], ptr [[B_ADR]], align 8 |
| 32 | +// LLVM: [[TMP_A0:%.*]] = load i8, ptr [[A_ADR]], align 1 |
| 33 | +// LLVM: store i8 [[TMP_A0]], ptr [[S0:%.*]], align 1 |
| 34 | +// LLVM: [[TMP_B0:%.*]] = load <8 x i8>, ptr [[B_ADR]], align 8 |
| 35 | +// LLVM: store <8 x i8> [[TMP_B0]], ptr [[S1:%.*]], align 8 |
| 36 | +// LLVM: [[INTRN_ARG0:%.*]] = load i8, ptr [[S0]], align 1 |
| 37 | +// LLVM: [[INTRN_ARG1:%.*]] = load <8 x i8>, ptr [[S1]], align 8 |
| 38 | +// LLVM: [[INTRN_RES:%.*]] = insertelement <8 x i8> [[INTRN_ARG1]], i8 [[INTRN_ARG0]], i32 7 |
| 39 | +// LLVM: ret <8 x i8> {{%.*}} |
| 40 | + |
| 41 | +uint16x4_t test_vset_lane_u16(uint16_t a, uint16x4_t b) { |
| 42 | + return vset_lane_u16(a, b, 3); |
| 43 | +} |
| 44 | + |
| 45 | +// CIR-LABEL: test_vset_lane_u16 |
| 46 | +// CIR: [[IDX:%.*]] = cir.const #cir.int<3> : !s32i |
| 47 | +// CIR: {{%.*}} = cir.vec.insert {{%.*}}, {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!s16i x 4> |
| 48 | + |
| 49 | +// LLVM: define dso_local <4 x i16> @test_vset_lane_u16(i16 [[A:%.*]], <4 x i16> [[B:%.*]]) |
| 50 | +// LLVM: [[A_ADR:%.*]] = alloca i16, i64 1, align 2 |
| 51 | +// LLVM: [[B_ADR:%.*]] = alloca <4 x i16>, i64 1, align 8 |
| 52 | +// LLVM: store i16 [[A]], ptr [[A_ADR]], align 2 |
| 53 | +// LLVM: store <4 x i16> [[B]], ptr [[B_ADR]], align 8 |
| 54 | +// LLVM: [[TMP_A0:%.*]] = load i16, ptr [[A_ADR]], align 2 |
| 55 | +// LLVM: store i16 [[TMP_A0]], ptr [[S0:%.*]], align 2 |
| 56 | +// LLVM: [[TMP_B0:%.*]] = load <4 x i16>, ptr [[B_ADR]], align 8 |
| 57 | +// LLVM: store <4 x i16> [[TMP_B0]], ptr [[S1:%.*]], align 8 |
| 58 | +// LLVM: [[INTRN_ARG0:%.*]] = load i16, ptr [[S0]], align 2 |
| 59 | +// LLVM: [[INTRN_ARG1:%.*]] = load <4 x i16>, ptr [[S1]], align 8 |
| 60 | +// LLVM: [[INTRN_RES:%.*]] = insertelement <4 x i16> [[INTRN_ARG1]], i16 [[INTRN_ARG0]], i32 3 |
| 61 | +// LLVM: ret <4 x i16> {{%.*}} |
| 62 | + |
| 63 | +uint32x2_t test_vset_lane_u32(uint32_t a, uint32x2_t b) { |
| 64 | + return vset_lane_u32(a, b, 1); |
| 65 | +} |
| 66 | + |
| 67 | +// CIR-LABEL: test_vset_lane_u32 |
| 68 | +// CIR: [[IDX:%.*]] = cir.const #cir.int<1> : !s32i |
| 69 | +// CIR: {{%.*}} = cir.vec.insert {{%.*}}, {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!s32i x 2> |
| 70 | + |
| 71 | +// LLVM: define dso_local <2 x i32> @test_vset_lane_u32(i32 [[A:%.*]], <2 x i32> [[B:%.*]]) |
| 72 | +// LLVM: [[A_ADR:%.*]] = alloca i32, i64 1, align 4 |
| 73 | +// LLVM: [[B_ADR:%.*]] = alloca <2 x i32>, i64 1, align 8 |
| 74 | +// LLVM: store i32 [[A]], ptr [[A_ADR]], align 4 |
| 75 | +// LLVM: store <2 x i32> [[B]], ptr [[B_ADR]], align 8 |
| 76 | +// LLVM: [[TMP_A0:%.*]] = load i32, ptr [[A_ADR]], align 4 |
| 77 | +// LLVM: store i32 [[TMP_A0]], ptr [[S0:%.*]], align 4 |
| 78 | +// LLVM: [[TMP_B0:%.*]] = load <2 x i32>, ptr [[B_ADR]], align 8 |
| 79 | +// LLVM: store <2 x i32> [[TMP_B0]], ptr [[S1:%.*]], align 8 |
| 80 | +// LLVM: [[INTRN_ARG0:%.*]] = load i32, ptr [[S0]], align 4 |
| 81 | +// LLVM: [[INTRN_ARG1:%.*]] = load <2 x i32>, ptr [[S1]], align 8 |
| 82 | +// LLVM: [[INTRN_RES:%.*]] = insertelement <2 x i32> [[INTRN_ARG1]], i32 [[INTRN_ARG0]], i32 1 |
| 83 | +// LLVM: ret <2 x i32> {{%.*}} |
| 84 | + |
| 85 | + |
| 86 | +int64x1_t test_vset_lane_u64(int64_t a, int64x1_t b) { |
| 87 | + return vset_lane_u64(a, b, 0); |
| 88 | +} |
| 89 | + |
| 90 | +// CIR-LABEL: test_vset_lane_u64 |
| 91 | +// CIR: [[IDX:%.*]] = cir.const #cir.int<0> : !s32i |
| 92 | +// CIR: {{%.*}} = cir.vec.insert {{%.*}}, {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!s64i x 1> |
| 93 | + |
| 94 | +// LLVM: define dso_local <1 x i64> @test_vset_lane_u64(i64 [[A:%.*]], <1 x i64> [[B:%.*]]) |
| 95 | +// LLVM: [[A_ADR:%.*]] = alloca i64, i64 1, align 8 |
| 96 | +// LLVM: [[B_ADR:%.*]] = alloca <1 x i64>, i64 1, align 8 |
| 97 | +// LLVM: store i64 [[A]], ptr [[A_ADR]], align 8 |
| 98 | +// LLVM: store <1 x i64> [[B]], ptr [[B_ADR]], align 8 |
| 99 | +// LLVM: [[TMP_A0:%.*]] = load i64, ptr [[A_ADR]], align 8 |
| 100 | +// LLVM: store i64 [[TMP_A0]], ptr [[S0:%.*]], align 8 |
| 101 | +// LLVM: [[TMP_B0:%.*]] = load <1 x i64>, ptr [[B_ADR]], align 8 |
| 102 | +// LLVM: store <1 x i64> [[TMP_B0]], ptr [[S1:%.*]], align 8 |
| 103 | +// LLVM: [[INTRN_ARG0:%.*]] = load i64, ptr [[S0]], align 8 |
| 104 | +// LLVM: [[INTRN_ARG1:%.*]] = load <1 x i64>, ptr [[S1]], align 8 |
| 105 | +// LLVM: [[INTRN_RES:%.*]] = insertelement <1 x i64> [[INTRN_ARG1]], i64 [[INTRN_ARG0]], i32 0 |
| 106 | +// LLVM: ret <1 x i64> {{%.*}} |
| 107 | + |
| 108 | +float32x2_t test_vset_lane_f32(float32_t a, float32x2_t b) { |
| 109 | + return vset_lane_f32(a, b, 1); |
| 110 | +} |
| 111 | + |
| 112 | +// CIR-LABEL: test_vset_lane_f32 |
| 113 | +// CIR: [[IDX:%.*]] = cir.const #cir.int<1> : !s32i |
| 114 | +// CIR: {{%.*}} = cir.vec.insert {{%.*}}, {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!cir.float x 2> |
| 115 | + |
| 116 | +// LLVM: define dso_local <2 x float> @test_vset_lane_f32(float [[A:%.*]], <2 x float> [[B:%.*]]) |
| 117 | +// LLVM: [[A_ADR:%.*]] = alloca float, i64 1, align 4 |
| 118 | +// LLVM: [[B_ADR:%.*]] = alloca <2 x float>, i64 1, align 8 |
| 119 | +// LLVM: store float [[A]], ptr [[A_ADR]], align 4 |
| 120 | +// LLVM: store <2 x float> [[B]], ptr [[B_ADR]], align 8 |
| 121 | +// LLVM: [[TMP_A0:%.*]] = load float, ptr [[A_ADR]], align 4 |
| 122 | +// LLVM: store float [[TMP_A0]], ptr [[S0:%.*]], align 4 |
| 123 | +// LLVM: [[TMP_B0:%.*]] = load <2 x float>, ptr [[B_ADR]], align 8 |
| 124 | +// LLVM: store <2 x float> [[TMP_B0]], ptr [[S1:%.*]], align 8 |
| 125 | +// LLVM: [[INTRN_ARG0:%.*]] = load float, ptr [[S0]], align 4 |
| 126 | +// LLVM: [[INTRN_ARG1:%.*]] = load <2 x float>, ptr [[S1]], align 8 |
| 127 | +// LLVM: [[INTRN_RES:%.*]] = insertelement <2 x float> [[INTRN_ARG1]], float [[INTRN_ARG0]], i32 1 |
| 128 | +// LLVM: ret <2 x float> {{%.*}} |
| 129 | + |
| 130 | +uint8x16_t test_vsetq_lane_u8(uint8_t a, uint8x16_t b) { |
| 131 | + return vsetq_lane_u8(a, b, 15); |
| 132 | +} |
| 133 | + |
| 134 | +// CIR-LABEL: test_vsetq_lane_u8 |
| 135 | +// CIR: [[IDX:%.*]] = cir.const #cir.int<15> : !s32i |
| 136 | +// CIR: {{%.*}} = cir.vec.insert {{%.*}}, {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!s8i x 16> |
| 137 | + |
| 138 | +// LLVM: define dso_local <16 x i8> @test_vsetq_lane_u8(i8 [[A:%.*]], <16 x i8> [[B:%.*]]) |
| 139 | +// LLVM: [[A_ADR:%.*]] = alloca i8, i64 1, align 1 |
| 140 | +// LLVM: [[B_ADR:%.*]] = alloca <16 x i8>, i64 1, align 16 |
| 141 | +// LLVM: store i8 [[A]], ptr [[A_ADR]], align 1 |
| 142 | +// LLVM: store <16 x i8> [[B]], ptr [[B_ADR]], align 16 |
| 143 | +// LLVM: [[TMP_A0:%.*]] = load i8, ptr [[A_ADR]], align 1 |
| 144 | +// LLVM: store i8 [[TMP_A0]], ptr [[S0:%.*]], align 1 |
| 145 | +// LLVM: [[TMP_B0:%.*]] = load <16 x i8>, ptr [[B_ADR]], align 16 |
| 146 | +// LLVM: store <16 x i8> [[TMP_B0]], ptr [[S1:%.*]], align 16 |
| 147 | +// LLVM: [[INTRN_ARG0:%.*]] = load i8, ptr [[S0]], align 1 |
| 148 | +// LLVM: [[INTRN_ARG1:%.*]] = load <16 x i8>, ptr [[S1]], align 16 |
| 149 | +// LLVM: [[INTRN_RES:%.*]] = insertelement <16 x i8> [[INTRN_ARG1]], i8 [[INTRN_ARG0]], i32 15 |
| 150 | +// LLVM: ret <16 x i8> {{%.*}} |
| 151 | + |
| 152 | +uint16x8_t test_vsetq_lane_u16(uint16_t a, uint16x8_t b) { |
| 153 | + return vsetq_lane_u16(a, b, 7); |
| 154 | +} |
| 155 | + |
| 156 | +// CIR-LABEL: test_vsetq_lane_u16 |
| 157 | +// CIR: [[IDX:%.*]] = cir.const #cir.int<7> : !s32i |
| 158 | +// CIR: {{%.*}} = cir.vec.insert {{%.*}}, {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!s16i x 8> |
| 159 | + |
| 160 | +// LLVM: define dso_local <8 x i16> @test_vsetq_lane_u16(i16 [[A:%.*]], <8 x i16> [[B:%.*]]) |
| 161 | +// LLVM: [[A_ADR:%.*]] = alloca i16, i64 1, align 2 |
| 162 | +// LLVM: [[B_ADR:%.*]] = alloca <8 x i16>, i64 1, align 16 |
| 163 | +// LLVM: store i16 [[A]], ptr [[A_ADR]], align 2 |
| 164 | +// LLVM: store <8 x i16> [[B]], ptr [[B_ADR]], align 16 |
| 165 | +// LLVM: [[TMP_A0:%.*]] = load i16, ptr [[A_ADR]], align 2 |
| 166 | +// LLVM: store i16 [[TMP_A0]], ptr [[S0:%.*]], align 2 |
| 167 | +// LLVM: [[TMP_B0:%.*]] = load <8 x i16>, ptr [[B_ADR]], align 16 |
| 168 | +// LLVM: store <8 x i16> [[TMP_B0]], ptr [[S1:%.*]], align 16 |
| 169 | +// LLVM: [[INTRN_ARG0:%.*]] = load i16, ptr [[S0]], align 2 |
| 170 | +// LLVM: [[INTRN_ARG1:%.*]] = load <8 x i16>, ptr [[S1]], align 16 |
| 171 | +// LLVM: [[INTRN_RES:%.*]] = insertelement <8 x i16> [[INTRN_ARG1]], i16 [[INTRN_ARG0]], i32 7 |
| 172 | +// LLVM: ret <8 x i16> {{%.*}} |
| 173 | + |
| 174 | +uint32x4_t test_vsetq_lane_u32(uint32_t a, uint32x4_t b) { |
| 175 | + return vsetq_lane_u32(a, b, 3); |
| 176 | +} |
| 177 | + |
| 178 | +// CIR-LABEL: test_vsetq_lane_u32 |
| 179 | +// CIR: [[IDX:%.*]] = cir.const #cir.int<3> : !s32i |
| 180 | +// CIR: {{%.*}} = cir.vec.insert {{%.*}}, {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!s32i x 4> |
| 181 | + |
| 182 | +// LLVM: define dso_local <4 x i32> @test_vsetq_lane_u32(i32 [[A:%.*]], <4 x i32> [[B:%.*]]) |
| 183 | +// LLVM: [[A_ADR:%.*]] = alloca i32, i64 1, align 4 |
| 184 | +// LLVM: [[B_ADR:%.*]] = alloca <4 x i32>, i64 1, align 16 |
| 185 | +// LLVM: store i32 [[A]], ptr [[A_ADR]], align 4 |
| 186 | +// LLVM: store <4 x i32> [[B]], ptr [[B_ADR]], align 16 |
| 187 | +// LLVM: [[TMP_A0:%.*]] = load i32, ptr [[A_ADR]], align 4 |
| 188 | +// LLVM: store i32 [[TMP_A0]], ptr [[S0:%.*]], align 4 |
| 189 | +// LLVM: [[TMP_B0:%.*]] = load <4 x i32>, ptr [[B_ADR]], align 16 |
| 190 | +// LLVM: store <4 x i32> [[TMP_B0]], ptr [[S1:%.*]], align 16 |
| 191 | +// LLVM: [[INTRN_ARG0:%.*]] = load i32, ptr [[S0]], align 4 |
| 192 | +// LLVM: [[INTRN_ARG1:%.*]] = load <4 x i32>, ptr [[S1]], align 16 |
| 193 | +// LLVM: [[INTRN_RES:%.*]] = insertelement <4 x i32> [[INTRN_ARG1]], i32 [[INTRN_ARG0]], i32 3 |
| 194 | +// LLVM: ret <4 x i32> {{%.*}} |
| 195 | + |
| 196 | +int64x2_t test_vsetq_lane_s64(int64_t a, int64x2_t b) { |
| 197 | + return vsetq_lane_s64(a, b, 1); |
| 198 | +} |
| 199 | + |
| 200 | +// CIR-LABEL: test_vsetq_lane_s64 |
| 201 | +// CIR: [[IDX:%.*]] = cir.const #cir.int<1> : !s32i |
| 202 | +// CIR: {{%.*}} = cir.vec.insert {{%.*}}, {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!s64i x 2> |
| 203 | + |
| 204 | +// LLVM: define dso_local <2 x i64> @test_vsetq_lane_s64(i64 [[A:%.*]], <2 x i64> [[B:%.*]]) |
| 205 | +// LLVM: [[A_ADR:%.*]] = alloca i64, i64 1, align 8 |
| 206 | +// LLVM: [[B_ADR:%.*]] = alloca <2 x i64>, i64 1, align 16 |
| 207 | +// LLVM: store i64 [[A]], ptr [[A_ADR]], align 8 |
| 208 | +// LLVM: store <2 x i64> [[B]], ptr [[B_ADR]], align 16 |
| 209 | +// LLVM: [[TMP_A0:%.*]] = load i64, ptr [[A_ADR]], align 8 |
| 210 | +// LLVM: store i64 [[TMP_A0]], ptr [[S0:%.*]], align 8 |
| 211 | +// LLVM: [[TMP_B0:%.*]] = load <2 x i64>, ptr [[B_ADR]], align 16 |
| 212 | +// LLVM: store <2 x i64> [[TMP_B0]], ptr [[S1:%.*]], align 16 |
| 213 | +// LLVM: [[INTRN_ARG0:%.*]] = load i64, ptr [[S0]], align 8 |
| 214 | +// LLVM: [[INTRN_ARG1:%.*]] = load <2 x i64>, ptr [[S1]], align 16 |
| 215 | +// LLVM: [[INTRN_RES:%.*]] = insertelement <2 x i64> [[INTRN_ARG1]], i64 [[INTRN_ARG0]], i32 1 |
| 216 | +// LLVM: ret <2 x i64> {{%.*}} |
| 217 | + |
| 218 | +float32x4_t test_vsetq_lane_f32(float32_t a, float32x4_t b) { |
| 219 | + return vsetq_lane_f32(a, b, 3); |
| 220 | +} |
| 221 | + |
| 222 | +// CIR-LABEL: test_vsetq_lane_f32 |
| 223 | +// CIR: [[IDX:%.*]] = cir.const #cir.int<3> : !s32i |
| 224 | +// CIR: {{%.*}} = cir.vec.insert {{%.*}}, {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!cir.float x 4> |
| 225 | + |
| 226 | +// LLVM: define dso_local <4 x float> @test_vsetq_lane_f32(float [[A:%.*]], <4 x float> [[B:%.*]]) |
| 227 | +// LLVM: [[A_ADR:%.*]] = alloca float, i64 1, align 4 |
| 228 | +// LLVM: [[B_ADR:%.*]] = alloca <4 x float>, i64 1, align 16 |
| 229 | +// LLVM: store float [[A]], ptr [[A_ADR]], align 4 |
| 230 | +// LLVM: store <4 x float> [[B]], ptr [[B_ADR]], align 16 |
| 231 | +// LLVM: [[TMP_A0:%.*]] = load float, ptr [[A_ADR]], align 4 |
| 232 | +// LLVM: store float [[TMP_A0]], ptr [[S0:%.*]], align 4 |
| 233 | +// LLVM: [[TMP_B0:%.*]] = load <4 x float>, ptr [[B_ADR]], align 16 |
| 234 | +// LLVM: store <4 x float> [[TMP_B0]], ptr [[S1:%.*]], align 16 |
| 235 | +// LLVM: [[INTRN_ARG0:%.*]] = load float, ptr [[S0]], align 4 |
| 236 | +// LLVM: [[INTRN_ARG1:%.*]] = load <4 x float>, ptr [[S1]], align 16 |
| 237 | +// LLVM: [[INTRN_RES:%.*]] = insertelement <4 x float> [[INTRN_ARG1]], float [[INTRN_ARG0]], i32 3 |
| 238 | +// LLVM: ret <4 x float> {{%.*}} |
0 commit comments