From 36dacb0dafa98b5a3be44d98cca332c35b86a064 Mon Sep 17 00:00:00 2001 From: Igor Kirillov Date: Mon, 23 Oct 2023 11:01:44 +0000 Subject: [PATCH 1/4] Pre-commit tests for ExpandMemcmp tail expansion --- llvm/test/CodeGen/AArch64/memcmp.ll | 3024 +++++++++++++++++ .../Transforms/ExpandMemCmp/AArch64/memcmp.ll | 921 +++++ 2 files changed, 3945 insertions(+) create mode 100644 llvm/test/CodeGen/AArch64/memcmp.ll create mode 100644 llvm/test/Transforms/ExpandMemCmp/AArch64/memcmp.ll diff --git a/llvm/test/CodeGen/AArch64/memcmp.ll b/llvm/test/CodeGen/AArch64/memcmp.ll new file mode 100644 index 0000000000000..dd4431108c440 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/memcmp.ll @@ -0,0 +1,3024 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: llc < %s -mtriple=aarch64-unknown-unknown | FileCheck %s + +@.str = private constant [513 x i8] c"01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901\00", align 1 + +declare dso_local i32 @memcmp(ptr, ptr, i64) + +define i32 @length0(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length0: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 0) nounwind + ret i32 %m + } + +define i1 @length0_eq(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length0_eq: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w0, #1 // =0x1 +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 0) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i1 @length0_lt(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length0_lt: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 0) nounwind + %c = icmp slt i32 %m, 0 + ret i1 %c +} + +define i32 @length2(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length2: +; CHECK: // %bb.0: +; CHECK-NEXT: ldrh w8, [x0] +; CHECK-NEXT: ldrh w9, [x1] +; CHECK-NEXT: rev w8, w8 +; CHECK-NEXT: rev w9, w9 +; CHECK-NEXT: lsr w8, w8, #16 +; CHECK-NEXT: sub w0, w8, w9, lsr #16 +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind + ret i32 %m +} + +define i32 @length2_const(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length2_const: +; CHECK: // %bb.0: +; CHECK-NEXT: ldrh w9, [x0] +; CHECK-NEXT: mov w8, #-12594 // =0xffffcece +; CHECK-NEXT: rev w9, w9 +; CHECK-NEXT: add w0, w8, w9, lsr #16 +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i64 2) nounwind + ret i32 %m +} + +define i1 @length2_gt_const(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length2_gt_const: +; CHECK: // %bb.0: +; CHECK-NEXT: ldrh w9, [x0] +; CHECK-NEXT: mov w8, #-12594 // =0xffffcece +; CHECK-NEXT: rev w9, w9 +; CHECK-NEXT: add w8, w8, w9, lsr #16 +; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: cset w0, gt +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i64 2) nounwind + %c = icmp sgt i32 %m, 0 + ret i1 %c +} + +define i1 @length2_eq(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length2_eq: +; CHECK: // %bb.0: +; CHECK-NEXT: ldrh w8, [x0] +; CHECK-NEXT: ldrh w9, [x1] +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i1 @length2_lt(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length2_lt: +; CHECK: // %bb.0: +; CHECK-NEXT: ldrh w8, [x0] +; CHECK-NEXT: ldrh w9, [x1] +; CHECK-NEXT: rev w8, w8 +; CHECK-NEXT: rev w9, w9 +; CHECK-NEXT: lsr w8, w8, #16 +; CHECK-NEXT: sub w8, w8, w9, lsr #16 +; CHECK-NEXT: lsr w0, w8, #31 +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind + %c = icmp slt i32 %m, 0 + ret i1 %c +} + +define i1 @length2_gt(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length2_gt: +; CHECK: // %bb.0: +; CHECK-NEXT: ldrh w8, [x0] +; CHECK-NEXT: ldrh w9, [x1] +; CHECK-NEXT: rev w8, w8 +; CHECK-NEXT: rev w9, w9 +; CHECK-NEXT: lsr w8, w8, #16 +; CHECK-NEXT: sub w8, w8, w9, lsr #16 +; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: cset w0, gt +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind + %c = icmp sgt i32 %m, 0 + ret i1 %c +} + +define i1 @length2_eq_const(ptr %X) nounwind { +; CHECK-LABEL: length2_eq_const: +; CHECK: // %bb.0: +; CHECK-NEXT: ldrh w8, [x0] +; CHECK-NEXT: mov w9, #12849 // =0x3231 +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i64 2) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i1 @length2_eq_nobuiltin_attr(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length2_eq_nobuiltin_attr: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: mov w2, #2 // =0x2 +; CHECK-NEXT: bl memcmp +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 2) nounwind nobuiltin + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length3(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length3: +; CHECK: // %bb.0: +; CHECK-NEXT: ldrh w8, [x0] +; CHECK-NEXT: ldrh w9, [x1] +; CHECK-NEXT: rev w8, w8 +; CHECK-NEXT: rev w9, w9 +; CHECK-NEXT: lsr w8, w8, #16 +; CHECK-NEXT: lsr w9, w9, #16 +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: b.ne .LBB11_2 +; CHECK-NEXT: // %bb.1: // %loadbb1 +; CHECK-NEXT: ldrb w8, [x0, #2] +; CHECK-NEXT: ldrb w9, [x1, #2] +; CHECK-NEXT: sub w0, w8, w9 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB11_2: // %res_block +; CHECK-NEXT: mov w8, #-1 // =0xffffffff +; CHECK-NEXT: cneg w0, w8, hs +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 3) nounwind + ret i32 %m +} + +define i1 @length3_eq(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length3_eq: +; CHECK: // %bb.0: +; CHECK-NEXT: ldrh w8, [x0] +; CHECK-NEXT: ldrh w9, [x1] +; CHECK-NEXT: ldrb w10, [x0, #2] +; CHECK-NEXT: ldrb w11, [x1, #2] +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: ccmp w10, w11, #0, eq +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 3) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i32 @length4(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length4: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: ldr w9, [x1] +; CHECK-NEXT: rev w8, w8 +; CHECK-NEXT: rev w9, w9 +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: cset w8, hi +; CHECK-NEXT: cset w9, lo +; CHECK-NEXT: sub w0, w8, w9 +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind + ret i32 %m +} + +define i1 @length4_eq(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length4_eq: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: ldr w9, [x1] +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i1 @length4_lt(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length4_lt: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: ldr w9, [x1] +; CHECK-NEXT: rev w8, w8 +; CHECK-NEXT: rev w9, w9 +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: cset w8, hi +; CHECK-NEXT: cset w9, lo +; CHECK-NEXT: sub w8, w8, w9 +; CHECK-NEXT: lsr w0, w8, #31 +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind + %c = icmp slt i32 %m, 0 + ret i1 %c +} + +define i1 @length4_gt(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length4_gt: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: ldr w9, [x1] +; CHECK-NEXT: rev w8, w8 +; CHECK-NEXT: rev w9, w9 +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: cset w8, hi +; CHECK-NEXT: cset w9, lo +; CHECK-NEXT: sub w8, w8, w9 +; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: cset w0, gt +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind + %c = icmp sgt i32 %m, 0 + ret i1 %c +} + +define i1 @length4_eq_const(ptr %X) nounwind { +; CHECK-LABEL: length4_eq_const: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: mov w9, #12849 // =0x3231 +; CHECK-NEXT: movk w9, #13363, lsl #16 +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i64 4) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length5(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length5: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: ldr w9, [x1] +; CHECK-NEXT: rev w8, w8 +; CHECK-NEXT: rev w9, w9 +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: b.ne .LBB18_2 +; CHECK-NEXT: // %bb.1: // %loadbb1 +; CHECK-NEXT: ldrb w8, [x0, #4] +; CHECK-NEXT: ldrb w9, [x1, #4] +; CHECK-NEXT: sub w0, w8, w9 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB18_2: // %res_block +; CHECK-NEXT: mov w8, #-1 // =0xffffffff +; CHECK-NEXT: cneg w0, w8, hs +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 5) nounwind + ret i32 %m +} + +define i1 @length5_eq(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length5_eq: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: ldr w9, [x1] +; CHECK-NEXT: ldrb w10, [x0, #4] +; CHECK-NEXT: ldrb w11, [x1, #4] +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: ccmp w10, w11, #0, eq +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 5) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i1 @length5_lt(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length5_lt: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: ldr w9, [x1] +; CHECK-NEXT: rev w8, w8 +; CHECK-NEXT: rev w9, w9 +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: b.ne .LBB20_2 +; CHECK-NEXT: // %bb.1: // %loadbb1 +; CHECK-NEXT: ldrb w8, [x0, #4] +; CHECK-NEXT: ldrb w9, [x1, #4] +; CHECK-NEXT: sub w8, w8, w9 +; CHECK-NEXT: lsr w0, w8, #31 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB20_2: // %res_block +; CHECK-NEXT: mov w8, #-1 // =0xffffffff +; CHECK-NEXT: cneg w8, w8, hs +; CHECK-NEXT: lsr w0, w8, #31 +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 5) nounwind + %c = icmp slt i32 %m, 0 + ret i1 %c +} + +define i32 @length6(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length6: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: ldr w9, [x1] +; CHECK-NEXT: rev w8, w8 +; CHECK-NEXT: rev w9, w9 +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: b.ne .LBB21_3 +; CHECK-NEXT: // %bb.1: // %loadbb1 +; CHECK-NEXT: ldrh w8, [x0, #4] +; CHECK-NEXT: ldrh w9, [x1, #4] +; CHECK-NEXT: rev w8, w8 +; CHECK-NEXT: rev w9, w9 +; CHECK-NEXT: lsr w8, w8, #16 +; CHECK-NEXT: lsr w9, w9, #16 +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: b.ne .LBB21_3 +; CHECK-NEXT: // %bb.2: +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB21_3: // %res_block +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: mov w8, #-1 // =0xffffffff +; CHECK-NEXT: cneg w0, w8, hs +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 6) nounwind + ret i32 %m +} + +define i32 @length7(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length7: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: ldr w9, [x1] +; CHECK-NEXT: rev w8, w8 +; CHECK-NEXT: rev w9, w9 +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: b.ne .LBB22_3 +; CHECK-NEXT: // %bb.1: // %loadbb1 +; CHECK-NEXT: ldur w8, [x0, #3] +; CHECK-NEXT: ldur w9, [x1, #3] +; CHECK-NEXT: rev w8, w8 +; CHECK-NEXT: rev w9, w9 +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: b.ne .LBB22_3 +; CHECK-NEXT: // %bb.2: +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB22_3: // %res_block +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: mov w8, #-1 // =0xffffffff +; CHECK-NEXT: cneg w0, w8, hs +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 7) nounwind + ret i32 %m +} + +define i1 @length7_lt(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length7_lt: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: ldr w9, [x1] +; CHECK-NEXT: rev w8, w8 +; CHECK-NEXT: rev w9, w9 +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: b.ne .LBB23_3 +; CHECK-NEXT: // %bb.1: // %loadbb1 +; CHECK-NEXT: ldur w8, [x0, #3] +; CHECK-NEXT: ldur w9, [x1, #3] +; CHECK-NEXT: rev w8, w8 +; CHECK-NEXT: rev w9, w9 +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: b.ne .LBB23_3 +; CHECK-NEXT: // %bb.2: +; CHECK-NEXT: lsr w0, wzr, #31 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB23_3: // %res_block +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: mov w8, #-1 // =0xffffffff +; CHECK-NEXT: cneg w8, w8, hs +; CHECK-NEXT: lsr w0, w8, #31 +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 7) nounwind + %c = icmp slt i32 %m, 0 + ret i1 %c +} + +define i1 @length7_eq(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length7_eq: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: ldr w9, [x1] +; CHECK-NEXT: ldur w10, [x0, #3] +; CHECK-NEXT: ldur w11, [x1, #3] +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: ccmp w10, w11, #0, eq +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 7) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i32 @length8(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length8: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: ldr x9, [x1] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: cset w8, hi +; CHECK-NEXT: cset w9, lo +; CHECK-NEXT: sub w0, w8, w9 +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 8) nounwind + ret i32 %m +} + +define i1 @length8_eq(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length8_eq: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: ldr x9, [x1] +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 8) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i1 @length8_eq_const(ptr %X) nounwind { +; CHECK-LABEL: length8_eq_const: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x9, #12592 // =0x3130 +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: movk x9, #13106, lsl #16 +; CHECK-NEXT: movk x9, #13620, lsl #32 +; CHECK-NEXT: movk x9, #14134, lsl #48 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 8) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i32 @length9(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length9: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: ldr x9, [x1] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB28_2 +; CHECK-NEXT: // %bb.1: // %loadbb1 +; CHECK-NEXT: ldrb w8, [x0, #8] +; CHECK-NEXT: ldrb w9, [x1, #8] +; CHECK-NEXT: sub w0, w8, w9 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB28_2: // %res_block +; CHECK-NEXT: mov w8, #-1 // =0xffffffff +; CHECK-NEXT: cneg w0, w8, hs +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 9) nounwind + ret i32 %m +} + +define i1 @length9_eq(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length9_eq: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: ldr x9, [x1] +; CHECK-NEXT: ldrb w10, [x0, #8] +; CHECK-NEXT: ldrb w11, [x1, #8] +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: ccmp x10, x11, #0, eq +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 9) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length10(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length10: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: ldr x9, [x1] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB30_3 +; CHECK-NEXT: // %bb.1: // %loadbb1 +; CHECK-NEXT: ldrh w8, [x0, #8] +; CHECK-NEXT: ldrh w9, [x1, #8] +; CHECK-NEXT: rev w8, w8 +; CHECK-NEXT: rev w9, w9 +; CHECK-NEXT: lsr w8, w8, #16 +; CHECK-NEXT: lsr w9, w9, #16 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB30_3 +; CHECK-NEXT: // %bb.2: +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB30_3: // %res_block +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: mov w8, #-1 // =0xffffffff +; CHECK-NEXT: cneg w0, w8, hs +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 10) nounwind + ret i32 %m +} + +define i1 @length10_eq(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length10_eq: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: ldr x9, [x1] +; CHECK-NEXT: ldrh w10, [x0, #8] +; CHECK-NEXT: ldrh w11, [x1, #8] +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: ccmp x10, x11, #0, eq +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 10) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length11(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length11: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: ldr x9, [x1] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB32_3 +; CHECK-NEXT: // %bb.1: // %loadbb1 +; CHECK-NEXT: ldur x8, [x0, #3] +; CHECK-NEXT: ldur x9, [x1, #3] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB32_3 +; CHECK-NEXT: // %bb.2: +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB32_3: // %res_block +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: mov w8, #-1 // =0xffffffff +; CHECK-NEXT: cneg w0, w8, hs +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 11) nounwind + ret i32 %m +} + +define i1 @length11_eq(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length11_eq: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: ldr x9, [x1] +; CHECK-NEXT: ldur x10, [x0, #3] +; CHECK-NEXT: ldur x11, [x1, #3] +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: ccmp x10, x11, #0, eq +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 11) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i1 @length12_eq(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length12_eq: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: ldr x9, [x1] +; CHECK-NEXT: ldr w10, [x0, #8] +; CHECK-NEXT: ldr w11, [x1, #8] +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: ccmp x10, x11, #0, eq +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 12) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i32 @length12(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length12: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: ldr x9, [x1] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB35_3 +; CHECK-NEXT: // %bb.1: // %loadbb1 +; CHECK-NEXT: ldr w8, [x0, #8] +; CHECK-NEXT: ldr w9, [x1, #8] +; CHECK-NEXT: rev w8, w8 +; CHECK-NEXT: rev w9, w9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB35_3 +; CHECK-NEXT: // %bb.2: +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB35_3: // %res_block +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: mov w8, #-1 // =0xffffffff +; CHECK-NEXT: cneg w0, w8, hs +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 12) nounwind + ret i32 %m +} + +define i1 @length13_eq(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length13_eq: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: ldr x9, [x1] +; CHECK-NEXT: ldur x10, [x0, #5] +; CHECK-NEXT: ldur x11, [x1, #5] +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: ccmp x10, x11, #0, eq +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 13) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i1 @length14_eq(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length14_eq: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: ldr x9, [x1] +; CHECK-NEXT: ldur x10, [x0, #6] +; CHECK-NEXT: ldur x11, [x1, #6] +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: ccmp x10, x11, #0, eq +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 14) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length15(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length15: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: ldr x9, [x1] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB38_3 +; CHECK-NEXT: // %bb.1: // %loadbb1 +; CHECK-NEXT: ldur x8, [x0, #7] +; CHECK-NEXT: ldur x9, [x1, #7] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB38_3 +; CHECK-NEXT: // %bb.2: +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB38_3: // %res_block +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: mov w8, #-1 // =0xffffffff +; CHECK-NEXT: cneg w0, w8, hs +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 15) nounwind + ret i32 %m +} + +define i1 @length15_lt(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length15_lt: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: ldr x9, [x1] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB39_3 +; CHECK-NEXT: // %bb.1: // %loadbb1 +; CHECK-NEXT: ldur x8, [x0, #7] +; CHECK-NEXT: ldur x9, [x1, #7] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB39_3 +; CHECK-NEXT: // %bb.2: +; CHECK-NEXT: lsr w0, wzr, #31 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB39_3: // %res_block +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: mov w8, #-1 // =0xffffffff +; CHECK-NEXT: cneg w8, w8, hs +; CHECK-NEXT: lsr w0, w8, #31 +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 15) nounwind + %c = icmp slt i32 %m, 0 + ret i1 %c +} + +define i32 @length15_const(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length15_const: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, #14136 // =0x3738 +; CHECK-NEXT: ldr x9, [x0] +; CHECK-NEXT: movk x8, #13622, lsl #16 +; CHECK-NEXT: movk x8, #13108, lsl #32 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: movk x8, #12594, lsl #48 +; CHECK-NEXT: cmp x9, x8 +; CHECK-NEXT: b.ne .LBB40_3 +; CHECK-NEXT: // %bb.1: // %loadbb1 +; CHECK-NEXT: mov x8, #13365 // =0x3435 +; CHECK-NEXT: ldur x9, [x0, #7] +; CHECK-NEXT: movk x8, #12851, lsl #16 +; CHECK-NEXT: movk x8, #12337, lsl #32 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: movk x8, #14393, lsl #48 +; CHECK-NEXT: cmp x9, x8 +; CHECK-NEXT: b.ne .LBB40_3 +; CHECK-NEXT: // %bb.2: +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB40_3: // %res_block +; CHECK-NEXT: cmp x9, x8 +; CHECK-NEXT: mov w8, #-1 // =0xffffffff +; CHECK-NEXT: cneg w0, w8, hs +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i64 15) nounwind + ret i32 %m +} + +define i1 @length15_eq(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length15_eq: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: ldr x9, [x1] +; CHECK-NEXT: ldur x10, [x0, #7] +; CHECK-NEXT: ldur x11, [x1, #7] +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: ccmp x10, x11, #0, eq +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 15) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i1 @length15_gt_const(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length15_gt_const: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, #14136 // =0x3738 +; CHECK-NEXT: ldr x9, [x0] +; CHECK-NEXT: movk x8, #13622, lsl #16 +; CHECK-NEXT: movk x8, #13108, lsl #32 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: movk x8, #12594, lsl #48 +; CHECK-NEXT: cmp x9, x8 +; CHECK-NEXT: b.ne .LBB42_3 +; CHECK-NEXT: // %bb.1: // %loadbb1 +; CHECK-NEXT: mov x8, #13365 // =0x3435 +; CHECK-NEXT: ldur x9, [x0, #7] +; CHECK-NEXT: movk x8, #12851, lsl #16 +; CHECK-NEXT: movk x8, #12337, lsl #32 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: movk x8, #14393, lsl #48 +; CHECK-NEXT: cmp x9, x8 +; CHECK-NEXT: b.ne .LBB42_3 +; CHECK-NEXT: // %bb.2: +; CHECK-NEXT: mov w8, wzr +; CHECK-NEXT: b .LBB42_4 +; CHECK-NEXT: .LBB42_3: // %res_block +; CHECK-NEXT: cmp x9, x8 +; CHECK-NEXT: mov w8, #-1 // =0xffffffff +; CHECK-NEXT: cneg w8, w8, hs +; CHECK-NEXT: .LBB42_4: // %endblock +; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: cset w0, gt +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i64 15) nounwind + %c = icmp sgt i32 %m, 0 + ret i1 %c +} + + +define i32 @length16(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length16: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: ldr x9, [x1] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB43_3 +; CHECK-NEXT: // %bb.1: // %loadbb1 +; CHECK-NEXT: ldr x8, [x0, #8] +; CHECK-NEXT: ldr x9, [x1, #8] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB43_3 +; CHECK-NEXT: // %bb.2: +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB43_3: // %res_block +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: mov w8, #-1 // =0xffffffff +; CHECK-NEXT: cneg w0, w8, hs +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 16) nounwind + ret i32 %m +} + +define i1 @length16_eq(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: length16_eq: +; CHECK: // %bb.0: +; CHECK-NEXT: ldp x8, x11, [x1] +; CHECK-NEXT: ldp x9, x10, [x0] +; CHECK-NEXT: cmp x9, x8 +; CHECK-NEXT: ccmp x10, x11, #0, eq +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 16) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length16_lt(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: length16_lt: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: ldr x9, [x1] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB45_3 +; CHECK-NEXT: // %bb.1: // %loadbb1 +; CHECK-NEXT: ldr x8, [x0, #8] +; CHECK-NEXT: ldr x9, [x1, #8] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB45_3 +; CHECK-NEXT: // %bb.2: +; CHECK-NEXT: lsr w0, wzr, #31 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB45_3: // %res_block +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: mov w8, #-1 // =0xffffffff +; CHECK-NEXT: cneg w8, w8, hs +; CHECK-NEXT: lsr w0, w8, #31 +; CHECK-NEXT: ret + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 16) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length16_gt(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: length16_gt: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: ldr x9, [x1] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB46_3 +; CHECK-NEXT: // %bb.1: // %loadbb1 +; CHECK-NEXT: ldr x8, [x0, #8] +; CHECK-NEXT: ldr x9, [x1, #8] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB46_3 +; CHECK-NEXT: // %bb.2: +; CHECK-NEXT: mov w8, wzr +; CHECK-NEXT: b .LBB46_4 +; CHECK-NEXT: .LBB46_3: // %res_block +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: mov w8, #-1 // =0xffffffff +; CHECK-NEXT: cneg w8, w8, hs +; CHECK-NEXT: .LBB46_4: // %endblock +; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: cset w0, gt +; CHECK-NEXT: ret + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 16) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length16_eq_const(ptr %X) nounwind { +; CHECK-LABEL: length16_eq_const: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, #12592 // =0x3130 +; CHECK-NEXT: ldp x9, x10, [x0] +; CHECK-NEXT: movk x8, #13106, lsl #16 +; CHECK-NEXT: movk x8, #13620, lsl #32 +; CHECK-NEXT: movk x8, #14134, lsl #48 +; CHECK-NEXT: cmp x9, x8 +; CHECK-NEXT: mov x8, #14648 // =0x3938 +; CHECK-NEXT: movk x8, #12592, lsl #16 +; CHECK-NEXT: movk x8, #13106, lsl #32 +; CHECK-NEXT: movk x8, #13620, lsl #48 +; CHECK-NEXT: ccmp x10, x8, #0, eq +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 16) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + + +define i32 @length24(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length24: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: ldr x9, [x1] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB48_4 +; CHECK-NEXT: // %bb.1: // %loadbb1 +; CHECK-NEXT: ldr x8, [x0, #8] +; CHECK-NEXT: ldr x9, [x1, #8] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB48_4 +; CHECK-NEXT: // %bb.2: // %loadbb2 +; CHECK-NEXT: ldr x8, [x0, #16] +; CHECK-NEXT: ldr x9, [x1, #16] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB48_4 +; CHECK-NEXT: // %bb.3: +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB48_4: // %res_block +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: mov w8, #-1 // =0xffffffff +; CHECK-NEXT: cneg w0, w8, hs +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 24) nounwind + ret i32 %m +} + +define i1 @length24_eq(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: length24_eq: +; CHECK: // %bb.0: +; CHECK-NEXT: ldp x8, x11, [x1] +; CHECK-NEXT: ldr x12, [x0, #16] +; CHECK-NEXT: ldp x9, x10, [x0] +; CHECK-NEXT: ldr x13, [x1, #16] +; CHECK-NEXT: cmp x9, x8 +; CHECK-NEXT: ccmp x10, x11, #0, eq +; CHECK-NEXT: ccmp x12, x13, #0, eq +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 24) nounwind + %cmp = icmp eq i32 %call, 0 + ret i1 %cmp +} + +define i1 @length24_lt(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: length24_lt: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: ldr x9, [x1] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB50_4 +; CHECK-NEXT: // %bb.1: // %loadbb1 +; CHECK-NEXT: ldr x8, [x0, #8] +; CHECK-NEXT: ldr x9, [x1, #8] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB50_4 +; CHECK-NEXT: // %bb.2: // %loadbb2 +; CHECK-NEXT: ldr x8, [x0, #16] +; CHECK-NEXT: ldr x9, [x1, #16] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB50_4 +; CHECK-NEXT: // %bb.3: +; CHECK-NEXT: lsr w0, wzr, #31 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB50_4: // %res_block +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: mov w8, #-1 // =0xffffffff +; CHECK-NEXT: cneg w8, w8, hs +; CHECK-NEXT: lsr w0, w8, #31 +; CHECK-NEXT: ret + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 24) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length24_gt(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: length24_gt: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: ldr x9, [x1] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB51_4 +; CHECK-NEXT: // %bb.1: // %loadbb1 +; CHECK-NEXT: ldr x8, [x0, #8] +; CHECK-NEXT: ldr x9, [x1, #8] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB51_4 +; CHECK-NEXT: // %bb.2: // %loadbb2 +; CHECK-NEXT: ldr x8, [x0, #16] +; CHECK-NEXT: ldr x9, [x1, #16] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB51_4 +; CHECK-NEXT: // %bb.3: +; CHECK-NEXT: mov w8, wzr +; CHECK-NEXT: b .LBB51_5 +; CHECK-NEXT: .LBB51_4: // %res_block +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: mov w8, #-1 // =0xffffffff +; CHECK-NEXT: cneg w8, w8, hs +; CHECK-NEXT: .LBB51_5: // %endblock +; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: cset w0, gt +; CHECK-NEXT: ret + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 24) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length24_eq_const(ptr %X) nounwind { +; CHECK-LABEL: length24_eq_const: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, #12592 // =0x3130 +; CHECK-NEXT: ldp x9, x10, [x0] +; CHECK-NEXT: movk x8, #13106, lsl #16 +; CHECK-NEXT: ldr x11, [x0, #16] +; CHECK-NEXT: movk x8, #13620, lsl #32 +; CHECK-NEXT: movk x8, #14134, lsl #48 +; CHECK-NEXT: cmp x9, x8 +; CHECK-NEXT: mov x8, #14648 // =0x3938 +; CHECK-NEXT: movk x8, #12592, lsl #16 +; CHECK-NEXT: movk x8, #13106, lsl #32 +; CHECK-NEXT: movk x8, #13620, lsl #48 +; CHECK-NEXT: ccmp x10, x8, #0, eq +; CHECK-NEXT: mov x8, #14134 // =0x3736 +; CHECK-NEXT: movk x8, #14648, lsl #16 +; CHECK-NEXT: movk x8, #12592, lsl #32 +; CHECK-NEXT: movk x8, #13106, lsl #48 +; CHECK-NEXT: ccmp x11, x8, #0, eq +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 24) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i32 @length31(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length31: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: ldr x9, [x1] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB53_5 +; CHECK-NEXT: // %bb.1: // %loadbb1 +; CHECK-NEXT: ldr x8, [x0, #8] +; CHECK-NEXT: ldr x9, [x1, #8] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB53_5 +; CHECK-NEXT: // %bb.2: // %loadbb2 +; CHECK-NEXT: ldr x8, [x0, #16] +; CHECK-NEXT: ldr x9, [x1, #16] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB53_5 +; CHECK-NEXT: // %bb.3: // %loadbb3 +; CHECK-NEXT: ldur x8, [x0, #23] +; CHECK-NEXT: ldur x9, [x1, #23] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB53_5 +; CHECK-NEXT: // %bb.4: +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB53_5: // %res_block +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: mov w8, #-1 // =0xffffffff +; CHECK-NEXT: cneg w0, w8, hs +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 31) nounwind + ret i32 %m +} + +define i1 @length31_eq(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: length31_eq: +; CHECK: // %bb.0: +; CHECK-NEXT: ldp x8, x11, [x1] +; CHECK-NEXT: ldr x12, [x0, #16] +; CHECK-NEXT: ldp x9, x10, [x0] +; CHECK-NEXT: ldr x13, [x1, #16] +; CHECK-NEXT: cmp x9, x8 +; CHECK-NEXT: ldur x8, [x0, #23] +; CHECK-NEXT: ldur x9, [x1, #23] +; CHECK-NEXT: ccmp x10, x11, #0, eq +; CHECK-NEXT: ccmp x12, x13, #0, eq +; CHECK-NEXT: ccmp x8, x9, #0, eq +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 31) nounwind + %cmp = icmp eq i32 %call, 0 + ret i1 %cmp +} + +define i1 @length31_lt(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: length31_lt: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: ldr x9, [x1] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB55_5 +; CHECK-NEXT: // %bb.1: // %loadbb1 +; CHECK-NEXT: ldr x8, [x0, #8] +; CHECK-NEXT: ldr x9, [x1, #8] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB55_5 +; CHECK-NEXT: // %bb.2: // %loadbb2 +; CHECK-NEXT: ldr x8, [x0, #16] +; CHECK-NEXT: ldr x9, [x1, #16] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB55_5 +; CHECK-NEXT: // %bb.3: // %loadbb3 +; CHECK-NEXT: ldur x8, [x0, #23] +; CHECK-NEXT: ldur x9, [x1, #23] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB55_5 +; CHECK-NEXT: // %bb.4: +; CHECK-NEXT: lsr w0, wzr, #31 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB55_5: // %res_block +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: mov w8, #-1 // =0xffffffff +; CHECK-NEXT: cneg w8, w8, hs +; CHECK-NEXT: lsr w0, w8, #31 +; CHECK-NEXT: ret + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 31) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length31_gt(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: length31_gt: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: ldr x9, [x1] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB56_5 +; CHECK-NEXT: // %bb.1: // %loadbb1 +; CHECK-NEXT: ldr x8, [x0, #8] +; CHECK-NEXT: ldr x9, [x1, #8] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB56_5 +; CHECK-NEXT: // %bb.2: // %loadbb2 +; CHECK-NEXT: ldr x8, [x0, #16] +; CHECK-NEXT: ldr x9, [x1, #16] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB56_5 +; CHECK-NEXT: // %bb.3: // %loadbb3 +; CHECK-NEXT: ldur x8, [x0, #23] +; CHECK-NEXT: ldur x9, [x1, #23] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB56_5 +; CHECK-NEXT: // %bb.4: +; CHECK-NEXT: mov w8, wzr +; CHECK-NEXT: b .LBB56_6 +; CHECK-NEXT: .LBB56_5: // %res_block +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: mov w8, #-1 // =0xffffffff +; CHECK-NEXT: cneg w8, w8, hs +; CHECK-NEXT: .LBB56_6: // %endblock +; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: cset w0, gt +; CHECK-NEXT: ret + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 31) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length31_eq_prefer128(ptr %x, ptr %y) nounwind "prefer-vector-width"="128" { +; CHECK-LABEL: length31_eq_prefer128: +; CHECK: // %bb.0: +; CHECK-NEXT: ldp x8, x11, [x1] +; CHECK-NEXT: ldr x12, [x0, #16] +; CHECK-NEXT: ldp x9, x10, [x0] +; CHECK-NEXT: ldr x13, [x1, #16] +; CHECK-NEXT: cmp x9, x8 +; CHECK-NEXT: ldur x8, [x0, #23] +; CHECK-NEXT: ldur x9, [x1, #23] +; CHECK-NEXT: ccmp x10, x11, #0, eq +; CHECK-NEXT: ccmp x12, x13, #0, eq +; CHECK-NEXT: ccmp x8, x9, #0, eq +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 31) nounwind + %cmp = icmp eq i32 %call, 0 + ret i1 %cmp +} + +define i1 @length31_eq_const(ptr %X) nounwind { +; CHECK-LABEL: length31_eq_const: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, #12592 // =0x3130 +; CHECK-NEXT: ldp x9, x10, [x0] +; CHECK-NEXT: movk x8, #13106, lsl #16 +; CHECK-NEXT: ldr x11, [x0, #16] +; CHECK-NEXT: movk x8, #13620, lsl #32 +; CHECK-NEXT: movk x8, #14134, lsl #48 +; CHECK-NEXT: cmp x9, x8 +; CHECK-NEXT: mov x8, #14648 // =0x3938 +; CHECK-NEXT: ldur x9, [x0, #23] +; CHECK-NEXT: movk x8, #12592, lsl #16 +; CHECK-NEXT: movk x8, #13106, lsl #32 +; CHECK-NEXT: movk x8, #13620, lsl #48 +; CHECK-NEXT: ccmp x10, x8, #0, eq +; CHECK-NEXT: mov x8, #14134 // =0x3736 +; CHECK-NEXT: movk x8, #14648, lsl #16 +; CHECK-NEXT: movk x8, #12592, lsl #32 +; CHECK-NEXT: movk x8, #13106, lsl #48 +; CHECK-NEXT: ccmp x11, x8, #0, eq +; CHECK-NEXT: mov x8, #13363 // =0x3433 +; CHECK-NEXT: movk x8, #13877, lsl #16 +; CHECK-NEXT: movk x8, #14391, lsl #32 +; CHECK-NEXT: movk x8, #12345, lsl #48 +; CHECK-NEXT: ccmp x9, x8, #0, eq +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 31) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i32 @length32(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length32: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: ldr x9, [x1] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB59_5 +; CHECK-NEXT: // %bb.1: // %loadbb1 +; CHECK-NEXT: ldr x8, [x0, #8] +; CHECK-NEXT: ldr x9, [x1, #8] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB59_5 +; CHECK-NEXT: // %bb.2: // %loadbb2 +; CHECK-NEXT: ldr x8, [x0, #16] +; CHECK-NEXT: ldr x9, [x1, #16] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB59_5 +; CHECK-NEXT: // %bb.3: // %loadbb3 +; CHECK-NEXT: ldr x8, [x0, #24] +; CHECK-NEXT: ldr x9, [x1, #24] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB59_5 +; CHECK-NEXT: // %bb.4: +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB59_5: // %res_block +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: mov w8, #-1 // =0xffffffff +; CHECK-NEXT: cneg w0, w8, hs +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 32) nounwind + ret i32 %m +} + + +define i1 @length32_eq(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: length32_eq: +; CHECK: // %bb.0: +; CHECK-NEXT: ldp x8, x11, [x1] +; CHECK-NEXT: ldp x9, x10, [x0] +; CHECK-NEXT: ldp x12, x13, [x1, #16] +; CHECK-NEXT: cmp x9, x8 +; CHECK-NEXT: ldp x8, x9, [x0, #16] +; CHECK-NEXT: ccmp x10, x11, #0, eq +; CHECK-NEXT: ccmp x8, x12, #0, eq +; CHECK-NEXT: ccmp x9, x13, #0, eq +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 32) nounwind + %cmp = icmp eq i32 %call, 0 + ret i1 %cmp +} + +define i1 @length32_lt(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: length32_lt: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: ldr x9, [x1] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB61_5 +; CHECK-NEXT: // %bb.1: // %loadbb1 +; CHECK-NEXT: ldr x8, [x0, #8] +; CHECK-NEXT: ldr x9, [x1, #8] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB61_5 +; CHECK-NEXT: // %bb.2: // %loadbb2 +; CHECK-NEXT: ldr x8, [x0, #16] +; CHECK-NEXT: ldr x9, [x1, #16] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB61_5 +; CHECK-NEXT: // %bb.3: // %loadbb3 +; CHECK-NEXT: ldr x8, [x0, #24] +; CHECK-NEXT: ldr x9, [x1, #24] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB61_5 +; CHECK-NEXT: // %bb.4: +; CHECK-NEXT: lsr w0, wzr, #31 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB61_5: // %res_block +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: mov w8, #-1 // =0xffffffff +; CHECK-NEXT: cneg w8, w8, hs +; CHECK-NEXT: lsr w0, w8, #31 +; CHECK-NEXT: ret + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 32) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length32_gt(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: length32_gt: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: ldr x9, [x1] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB62_5 +; CHECK-NEXT: // %bb.1: // %loadbb1 +; CHECK-NEXT: ldr x8, [x0, #8] +; CHECK-NEXT: ldr x9, [x1, #8] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB62_5 +; CHECK-NEXT: // %bb.2: // %loadbb2 +; CHECK-NEXT: ldr x8, [x0, #16] +; CHECK-NEXT: ldr x9, [x1, #16] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB62_5 +; CHECK-NEXT: // %bb.3: // %loadbb3 +; CHECK-NEXT: ldr x8, [x0, #24] +; CHECK-NEXT: ldr x9, [x1, #24] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB62_5 +; CHECK-NEXT: // %bb.4: +; CHECK-NEXT: mov w8, wzr +; CHECK-NEXT: b .LBB62_6 +; CHECK-NEXT: .LBB62_5: // %res_block +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: mov w8, #-1 // =0xffffffff +; CHECK-NEXT: cneg w8, w8, hs +; CHECK-NEXT: .LBB62_6: // %endblock +; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: cset w0, gt +; CHECK-NEXT: ret + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 32) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length32_eq_prefer128(ptr %x, ptr %y) nounwind "prefer-vector-width"="128" { +; CHECK-LABEL: length32_eq_prefer128: +; CHECK: // %bb.0: +; CHECK-NEXT: ldp x8, x11, [x1] +; CHECK-NEXT: ldp x9, x10, [x0] +; CHECK-NEXT: ldp x12, x13, [x1, #16] +; CHECK-NEXT: cmp x9, x8 +; CHECK-NEXT: ldp x8, x9, [x0, #16] +; CHECK-NEXT: ccmp x10, x11, #0, eq +; CHECK-NEXT: ccmp x8, x12, #0, eq +; CHECK-NEXT: ccmp x9, x13, #0, eq +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 32) nounwind + %cmp = icmp eq i32 %call, 0 + ret i1 %cmp +} + +define i1 @length32_eq_const(ptr %X) nounwind { +; CHECK-LABEL: length32_eq_const: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, #12592 // =0x3130 +; CHECK-NEXT: ldp x9, x10, [x0] +; CHECK-NEXT: movk x8, #13106, lsl #16 +; CHECK-NEXT: movk x8, #13620, lsl #32 +; CHECK-NEXT: movk x8, #14134, lsl #48 +; CHECK-NEXT: cmp x9, x8 +; CHECK-NEXT: mov x8, #14648 // =0x3938 +; CHECK-NEXT: movk x8, #12592, lsl #16 +; CHECK-NEXT: ldp x9, x11, [x0, #16] +; CHECK-NEXT: movk x8, #13106, lsl #32 +; CHECK-NEXT: movk x8, #13620, lsl #48 +; CHECK-NEXT: ccmp x10, x8, #0, eq +; CHECK-NEXT: mov x8, #14134 // =0x3736 +; CHECK-NEXT: movk x8, #14648, lsl #16 +; CHECK-NEXT: movk x8, #12592, lsl #32 +; CHECK-NEXT: movk x8, #13106, lsl #48 +; CHECK-NEXT: ccmp x9, x8, #0, eq +; CHECK-NEXT: mov x8, #13620 // =0x3534 +; CHECK-NEXT: movk x8, #14134, lsl #16 +; CHECK-NEXT: movk x8, #14648, lsl #32 +; CHECK-NEXT: movk x8, #12592, lsl #48 +; CHECK-NEXT: ccmp x11, x8, #0, eq +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 32) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i32 @length48(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length48: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: ldr x9, [x1] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB65_7 +; CHECK-NEXT: // %bb.1: // %loadbb1 +; CHECK-NEXT: ldr x8, [x0, #8] +; CHECK-NEXT: ldr x9, [x1, #8] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB65_7 +; CHECK-NEXT: // %bb.2: // %loadbb2 +; CHECK-NEXT: ldr x8, [x0, #16] +; CHECK-NEXT: ldr x9, [x1, #16] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB65_7 +; CHECK-NEXT: // %bb.3: // %loadbb3 +; CHECK-NEXT: ldr x8, [x0, #24] +; CHECK-NEXT: ldr x9, [x1, #24] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB65_7 +; CHECK-NEXT: // %bb.4: // %loadbb4 +; CHECK-NEXT: ldr x8, [x0, #32] +; CHECK-NEXT: ldr x9, [x1, #32] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB65_7 +; CHECK-NEXT: // %bb.5: // %loadbb5 +; CHECK-NEXT: ldr x8, [x0, #40] +; CHECK-NEXT: ldr x9, [x1, #40] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB65_7 +; CHECK-NEXT: // %bb.6: +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB65_7: // %res_block +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: mov w8, #-1 // =0xffffffff +; CHECK-NEXT: cneg w0, w8, hs +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 48) nounwind + ret i32 %m +} + +define i1 @length48_eq(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: length48_eq: +; CHECK: // %bb.0: +; CHECK-NEXT: ldp x8, x11, [x1] +; CHECK-NEXT: ldp x9, x10, [x0] +; CHECK-NEXT: ldp x12, x13, [x1, #16] +; CHECK-NEXT: cmp x9, x8 +; CHECK-NEXT: ldp x8, x9, [x0, #16] +; CHECK-NEXT: ccmp x10, x11, #0, eq +; CHECK-NEXT: ccmp x8, x12, #0, eq +; CHECK-NEXT: ldp x8, x11, [x0, #32] +; CHECK-NEXT: ldp x10, x12, [x1, #32] +; CHECK-NEXT: ccmp x9, x13, #0, eq +; CHECK-NEXT: ccmp x8, x10, #0, eq +; CHECK-NEXT: ccmp x11, x12, #0, eq +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 48) nounwind + %cmp = icmp eq i32 %call, 0 + ret i1 %cmp +} + +define i1 @length48_lt(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: length48_lt: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: ldr x9, [x1] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB67_7 +; CHECK-NEXT: // %bb.1: // %loadbb1 +; CHECK-NEXT: ldr x8, [x0, #8] +; CHECK-NEXT: ldr x9, [x1, #8] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB67_7 +; CHECK-NEXT: // %bb.2: // %loadbb2 +; CHECK-NEXT: ldr x8, [x0, #16] +; CHECK-NEXT: ldr x9, [x1, #16] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB67_7 +; CHECK-NEXT: // %bb.3: // %loadbb3 +; CHECK-NEXT: ldr x8, [x0, #24] +; CHECK-NEXT: ldr x9, [x1, #24] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB67_7 +; CHECK-NEXT: // %bb.4: // %loadbb4 +; CHECK-NEXT: ldr x8, [x0, #32] +; CHECK-NEXT: ldr x9, [x1, #32] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB67_7 +; CHECK-NEXT: // %bb.5: // %loadbb5 +; CHECK-NEXT: ldr x8, [x0, #40] +; CHECK-NEXT: ldr x9, [x1, #40] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB67_7 +; CHECK-NEXT: // %bb.6: +; CHECK-NEXT: lsr w0, wzr, #31 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB67_7: // %res_block +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: mov w8, #-1 // =0xffffffff +; CHECK-NEXT: cneg w8, w8, hs +; CHECK-NEXT: lsr w0, w8, #31 +; CHECK-NEXT: ret + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 48) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length48_gt(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: length48_gt: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: ldr x9, [x1] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB68_7 +; CHECK-NEXT: // %bb.1: // %loadbb1 +; CHECK-NEXT: ldr x8, [x0, #8] +; CHECK-NEXT: ldr x9, [x1, #8] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB68_7 +; CHECK-NEXT: // %bb.2: // %loadbb2 +; CHECK-NEXT: ldr x8, [x0, #16] +; CHECK-NEXT: ldr x9, [x1, #16] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB68_7 +; CHECK-NEXT: // %bb.3: // %loadbb3 +; CHECK-NEXT: ldr x8, [x0, #24] +; CHECK-NEXT: ldr x9, [x1, #24] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB68_7 +; CHECK-NEXT: // %bb.4: // %loadbb4 +; CHECK-NEXT: ldr x8, [x0, #32] +; CHECK-NEXT: ldr x9, [x1, #32] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB68_7 +; CHECK-NEXT: // %bb.5: // %loadbb5 +; CHECK-NEXT: ldr x8, [x0, #40] +; CHECK-NEXT: ldr x9, [x1, #40] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB68_7 +; CHECK-NEXT: // %bb.6: +; CHECK-NEXT: mov w8, wzr +; CHECK-NEXT: b .LBB68_8 +; CHECK-NEXT: .LBB68_7: // %res_block +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: mov w8, #-1 // =0xffffffff +; CHECK-NEXT: cneg w8, w8, hs +; CHECK-NEXT: .LBB68_8: // %endblock +; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: cset w0, gt +; CHECK-NEXT: ret + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 48) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length48_eq_prefer128(ptr %x, ptr %y) nounwind "prefer-vector-width"="128" { +; CHECK-LABEL: length48_eq_prefer128: +; CHECK: // %bb.0: +; CHECK-NEXT: ldp x8, x11, [x1] +; CHECK-NEXT: ldp x9, x10, [x0] +; CHECK-NEXT: ldp x12, x13, [x1, #16] +; CHECK-NEXT: cmp x9, x8 +; CHECK-NEXT: ldp x8, x9, [x0, #16] +; CHECK-NEXT: ccmp x10, x11, #0, eq +; CHECK-NEXT: ccmp x8, x12, #0, eq +; CHECK-NEXT: ldp x8, x11, [x0, #32] +; CHECK-NEXT: ldp x10, x12, [x1, #32] +; CHECK-NEXT: ccmp x9, x13, #0, eq +; CHECK-NEXT: ccmp x8, x10, #0, eq +; CHECK-NEXT: ccmp x11, x12, #0, eq +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 48) nounwind + %cmp = icmp eq i32 %call, 0 + ret i1 %cmp +} + +define i1 @length48_eq_const(ptr %X) nounwind { +; CHECK-LABEL: length48_eq_const: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, #12592 // =0x3130 +; CHECK-NEXT: ldp x9, x10, [x0] +; CHECK-NEXT: movk x8, #13106, lsl #16 +; CHECK-NEXT: ldp x11, x12, [x0, #16] +; CHECK-NEXT: movk x8, #13620, lsl #32 +; CHECK-NEXT: movk x8, #14134, lsl #48 +; CHECK-NEXT: cmp x9, x8 +; CHECK-NEXT: mov x9, #14648 // =0x3938 +; CHECK-NEXT: movk x9, #12592, lsl #16 +; CHECK-NEXT: movk x9, #13106, lsl #32 +; CHECK-NEXT: movk x9, #13620, lsl #48 +; CHECK-NEXT: ccmp x10, x9, #0, eq +; CHECK-NEXT: mov x9, #14134 // =0x3736 +; CHECK-NEXT: movk x9, #14648, lsl #16 +; CHECK-NEXT: movk x9, #12592, lsl #32 +; CHECK-NEXT: movk x9, #13106, lsl #48 +; CHECK-NEXT: ccmp x11, x9, #0, eq +; CHECK-NEXT: mov x9, #13620 // =0x3534 +; CHECK-NEXT: movk x9, #14134, lsl #16 +; CHECK-NEXT: ldp x10, x11, [x0, #32] +; CHECK-NEXT: movk x9, #14648, lsl #32 +; CHECK-NEXT: movk x9, #12592, lsl #48 +; CHECK-NEXT: ccmp x12, x9, #0, eq +; CHECK-NEXT: mov x9, #13106 // =0x3332 +; CHECK-NEXT: movk x9, #13620, lsl #16 +; CHECK-NEXT: movk x9, #14134, lsl #32 +; CHECK-NEXT: movk x9, #14648, lsl #48 +; CHECK-NEXT: ccmp x10, x9, #0, eq +; CHECK-NEXT: ccmp x11, x8, #0, eq +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 48) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i32 @length63(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length63: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: ldr x9, [x1] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB71_9 +; CHECK-NEXT: // %bb.1: // %loadbb1 +; CHECK-NEXT: ldr x8, [x0, #8] +; CHECK-NEXT: ldr x9, [x1, #8] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB71_9 +; CHECK-NEXT: // %bb.2: // %loadbb2 +; CHECK-NEXT: ldr x8, [x0, #16] +; CHECK-NEXT: ldr x9, [x1, #16] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB71_9 +; CHECK-NEXT: // %bb.3: // %loadbb3 +; CHECK-NEXT: ldr x8, [x0, #24] +; CHECK-NEXT: ldr x9, [x1, #24] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB71_9 +; CHECK-NEXT: // %bb.4: // %loadbb4 +; CHECK-NEXT: ldr x8, [x0, #32] +; CHECK-NEXT: ldr x9, [x1, #32] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB71_9 +; CHECK-NEXT: // %bb.5: // %loadbb5 +; CHECK-NEXT: ldr x8, [x0, #40] +; CHECK-NEXT: ldr x9, [x1, #40] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB71_9 +; CHECK-NEXT: // %bb.6: // %loadbb6 +; CHECK-NEXT: ldr x8, [x0, #48] +; CHECK-NEXT: ldr x9, [x1, #48] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB71_9 +; CHECK-NEXT: // %bb.7: // %loadbb7 +; CHECK-NEXT: ldur x8, [x0, #55] +; CHECK-NEXT: ldur x9, [x1, #55] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB71_9 +; CHECK-NEXT: // %bb.8: +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB71_9: // %res_block +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: mov w8, #-1 // =0xffffffff +; CHECK-NEXT: cneg w0, w8, hs +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 63) nounwind + ret i32 %m +} + +define i1 @length63_eq(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: length63_eq: +; CHECK: // %bb.0: +; CHECK-NEXT: ldp x8, x11, [x1] +; CHECK-NEXT: ldp x9, x10, [x0] +; CHECK-NEXT: ldp x12, x13, [x1, #16] +; CHECK-NEXT: cmp x9, x8 +; CHECK-NEXT: ldp x8, x9, [x0, #16] +; CHECK-NEXT: ccmp x10, x11, #0, eq +; CHECK-NEXT: ccmp x8, x12, #0, eq +; CHECK-NEXT: ldp x8, x11, [x0, #32] +; CHECK-NEXT: ldp x10, x12, [x1, #32] +; CHECK-NEXT: ccmp x9, x13, #0, eq +; CHECK-NEXT: ldr x9, [x0, #48] +; CHECK-NEXT: ldr x13, [x1, #48] +; CHECK-NEXT: ccmp x8, x10, #0, eq +; CHECK-NEXT: ldur x8, [x0, #55] +; CHECK-NEXT: ldur x10, [x1, #55] +; CHECK-NEXT: ccmp x11, x12, #0, eq +; CHECK-NEXT: ccmp x9, x13, #0, eq +; CHECK-NEXT: ccmp x8, x10, #0, eq +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 63) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length63_lt(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: length63_lt: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: ldr x9, [x1] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB73_9 +; CHECK-NEXT: // %bb.1: // %loadbb1 +; CHECK-NEXT: ldr x8, [x0, #8] +; CHECK-NEXT: ldr x9, [x1, #8] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB73_9 +; CHECK-NEXT: // %bb.2: // %loadbb2 +; CHECK-NEXT: ldr x8, [x0, #16] +; CHECK-NEXT: ldr x9, [x1, #16] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB73_9 +; CHECK-NEXT: // %bb.3: // %loadbb3 +; CHECK-NEXT: ldr x8, [x0, #24] +; CHECK-NEXT: ldr x9, [x1, #24] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB73_9 +; CHECK-NEXT: // %bb.4: // %loadbb4 +; CHECK-NEXT: ldr x8, [x0, #32] +; CHECK-NEXT: ldr x9, [x1, #32] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB73_9 +; CHECK-NEXT: // %bb.5: // %loadbb5 +; CHECK-NEXT: ldr x8, [x0, #40] +; CHECK-NEXT: ldr x9, [x1, #40] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB73_9 +; CHECK-NEXT: // %bb.6: // %loadbb6 +; CHECK-NEXT: ldr x8, [x0, #48] +; CHECK-NEXT: ldr x9, [x1, #48] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB73_9 +; CHECK-NEXT: // %bb.7: // %loadbb7 +; CHECK-NEXT: ldur x8, [x0, #55] +; CHECK-NEXT: ldur x9, [x1, #55] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB73_9 +; CHECK-NEXT: // %bb.8: +; CHECK-NEXT: lsr w0, wzr, #31 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB73_9: // %res_block +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: mov w8, #-1 // =0xffffffff +; CHECK-NEXT: cneg w8, w8, hs +; CHECK-NEXT: lsr w0, w8, #31 +; CHECK-NEXT: ret + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 63) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length63_gt(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: length63_gt: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: ldr x9, [x1] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB74_9 +; CHECK-NEXT: // %bb.1: // %loadbb1 +; CHECK-NEXT: ldr x8, [x0, #8] +; CHECK-NEXT: ldr x9, [x1, #8] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB74_9 +; CHECK-NEXT: // %bb.2: // %loadbb2 +; CHECK-NEXT: ldr x8, [x0, #16] +; CHECK-NEXT: ldr x9, [x1, #16] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB74_9 +; CHECK-NEXT: // %bb.3: // %loadbb3 +; CHECK-NEXT: ldr x8, [x0, #24] +; CHECK-NEXT: ldr x9, [x1, #24] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB74_9 +; CHECK-NEXT: // %bb.4: // %loadbb4 +; CHECK-NEXT: ldr x8, [x0, #32] +; CHECK-NEXT: ldr x9, [x1, #32] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB74_9 +; CHECK-NEXT: // %bb.5: // %loadbb5 +; CHECK-NEXT: ldr x8, [x0, #40] +; CHECK-NEXT: ldr x9, [x1, #40] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB74_9 +; CHECK-NEXT: // %bb.6: // %loadbb6 +; CHECK-NEXT: ldr x8, [x0, #48] +; CHECK-NEXT: ldr x9, [x1, #48] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB74_9 +; CHECK-NEXT: // %bb.7: // %loadbb7 +; CHECK-NEXT: ldur x8, [x0, #55] +; CHECK-NEXT: ldur x9, [x1, #55] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB74_9 +; CHECK-NEXT: // %bb.8: +; CHECK-NEXT: mov w8, wzr +; CHECK-NEXT: b .LBB74_10 +; CHECK-NEXT: .LBB74_9: // %res_block +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: mov w8, #-1 // =0xffffffff +; CHECK-NEXT: cneg w8, w8, hs +; CHECK-NEXT: .LBB74_10: // %endblock +; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: cset w0, gt +; CHECK-NEXT: ret + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 63) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length63_eq_const(ptr %X) nounwind { +; CHECK-LABEL: length63_eq_const: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, #12592 // =0x3130 +; CHECK-NEXT: ldp x9, x10, [x0] +; CHECK-NEXT: movk x8, #13106, lsl #16 +; CHECK-NEXT: ldp x11, x12, [x0, #16] +; CHECK-NEXT: movk x8, #13620, lsl #32 +; CHECK-NEXT: movk x8, #14134, lsl #48 +; CHECK-NEXT: cmp x9, x8 +; CHECK-NEXT: mov x9, #14648 // =0x3938 +; CHECK-NEXT: movk x9, #12592, lsl #16 +; CHECK-NEXT: movk x9, #13106, lsl #32 +; CHECK-NEXT: movk x9, #13620, lsl #48 +; CHECK-NEXT: ccmp x10, x9, #0, eq +; CHECK-NEXT: mov x10, #14134 // =0x3736 +; CHECK-NEXT: movk x10, #14648, lsl #16 +; CHECK-NEXT: movk x10, #12592, lsl #32 +; CHECK-NEXT: movk x10, #13106, lsl #48 +; CHECK-NEXT: ccmp x11, x10, #0, eq +; CHECK-NEXT: mov x10, #13620 // =0x3534 +; CHECK-NEXT: movk x10, #14134, lsl #16 +; CHECK-NEXT: ldp x11, x13, [x0, #32] +; CHECK-NEXT: movk x10, #14648, lsl #32 +; CHECK-NEXT: movk x10, #12592, lsl #48 +; CHECK-NEXT: ccmp x12, x10, #0, eq +; CHECK-NEXT: mov x10, #13106 // =0x3332 +; CHECK-NEXT: ldr x12, [x0, #48] +; CHECK-NEXT: movk x10, #13620, lsl #16 +; CHECK-NEXT: movk x10, #14134, lsl #32 +; CHECK-NEXT: movk x10, #14648, lsl #48 +; CHECK-NEXT: ccmp x11, x10, #0, eq +; CHECK-NEXT: ldur x10, [x0, #55] +; CHECK-NEXT: ccmp x13, x8, #0, eq +; CHECK-NEXT: mov x8, #13877 // =0x3635 +; CHECK-NEXT: movk x8, #14391, lsl #16 +; CHECK-NEXT: ccmp x12, x9, #0, eq +; CHECK-NEXT: movk x8, #12345, lsl #32 +; CHECK-NEXT: movk x8, #12849, lsl #48 +; CHECK-NEXT: ccmp x10, x8, #0, eq +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 63) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length64(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length64: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: ldr x9, [x1] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB76_9 +; CHECK-NEXT: // %bb.1: // %loadbb1 +; CHECK-NEXT: ldr x8, [x0, #8] +; CHECK-NEXT: ldr x9, [x1, #8] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB76_9 +; CHECK-NEXT: // %bb.2: // %loadbb2 +; CHECK-NEXT: ldr x8, [x0, #16] +; CHECK-NEXT: ldr x9, [x1, #16] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB76_9 +; CHECK-NEXT: // %bb.3: // %loadbb3 +; CHECK-NEXT: ldr x8, [x0, #24] +; CHECK-NEXT: ldr x9, [x1, #24] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB76_9 +; CHECK-NEXT: // %bb.4: // %loadbb4 +; CHECK-NEXT: ldr x8, [x0, #32] +; CHECK-NEXT: ldr x9, [x1, #32] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB76_9 +; CHECK-NEXT: // %bb.5: // %loadbb5 +; CHECK-NEXT: ldr x8, [x0, #40] +; CHECK-NEXT: ldr x9, [x1, #40] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB76_9 +; CHECK-NEXT: // %bb.6: // %loadbb6 +; CHECK-NEXT: ldr x8, [x0, #48] +; CHECK-NEXT: ldr x9, [x1, #48] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB76_9 +; CHECK-NEXT: // %bb.7: // %loadbb7 +; CHECK-NEXT: ldr x8, [x0, #56] +; CHECK-NEXT: ldr x9, [x1, #56] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB76_9 +; CHECK-NEXT: // %bb.8: +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB76_9: // %res_block +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: mov w8, #-1 // =0xffffffff +; CHECK-NEXT: cneg w0, w8, hs +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 64) nounwind + ret i32 %m +} + +define i1 @length64_eq(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: length64_eq: +; CHECK: // %bb.0: +; CHECK-NEXT: ldp x8, x11, [x1] +; CHECK-NEXT: ldp x9, x10, [x0] +; CHECK-NEXT: ldp x12, x13, [x1, #16] +; CHECK-NEXT: cmp x9, x8 +; CHECK-NEXT: ldp x8, x9, [x0, #16] +; CHECK-NEXT: ccmp x10, x11, #0, eq +; CHECK-NEXT: ccmp x8, x12, #0, eq +; CHECK-NEXT: ldp x8, x11, [x0, #32] +; CHECK-NEXT: ldp x10, x12, [x1, #32] +; CHECK-NEXT: ccmp x9, x13, #0, eq +; CHECK-NEXT: ldp x9, x13, [x1, #48] +; CHECK-NEXT: ccmp x8, x10, #0, eq +; CHECK-NEXT: ldp x8, x10, [x0, #48] +; CHECK-NEXT: ccmp x11, x12, #0, eq +; CHECK-NEXT: ccmp x8, x9, #0, eq +; CHECK-NEXT: ccmp x10, x13, #0, eq +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 64) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length64_lt(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: length64_lt: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: ldr x9, [x1] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB78_9 +; CHECK-NEXT: // %bb.1: // %loadbb1 +; CHECK-NEXT: ldr x8, [x0, #8] +; CHECK-NEXT: ldr x9, [x1, #8] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB78_9 +; CHECK-NEXT: // %bb.2: // %loadbb2 +; CHECK-NEXT: ldr x8, [x0, #16] +; CHECK-NEXT: ldr x9, [x1, #16] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB78_9 +; CHECK-NEXT: // %bb.3: // %loadbb3 +; CHECK-NEXT: ldr x8, [x0, #24] +; CHECK-NEXT: ldr x9, [x1, #24] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB78_9 +; CHECK-NEXT: // %bb.4: // %loadbb4 +; CHECK-NEXT: ldr x8, [x0, #32] +; CHECK-NEXT: ldr x9, [x1, #32] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB78_9 +; CHECK-NEXT: // %bb.5: // %loadbb5 +; CHECK-NEXT: ldr x8, [x0, #40] +; CHECK-NEXT: ldr x9, [x1, #40] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB78_9 +; CHECK-NEXT: // %bb.6: // %loadbb6 +; CHECK-NEXT: ldr x8, [x0, #48] +; CHECK-NEXT: ldr x9, [x1, #48] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB78_9 +; CHECK-NEXT: // %bb.7: // %loadbb7 +; CHECK-NEXT: ldr x8, [x0, #56] +; CHECK-NEXT: ldr x9, [x1, #56] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB78_9 +; CHECK-NEXT: // %bb.8: +; CHECK-NEXT: lsr w0, wzr, #31 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB78_9: // %res_block +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: mov w8, #-1 // =0xffffffff +; CHECK-NEXT: cneg w8, w8, hs +; CHECK-NEXT: lsr w0, w8, #31 +; CHECK-NEXT: ret + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 64) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length64_gt(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: length64_gt: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: ldr x9, [x1] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB79_9 +; CHECK-NEXT: // %bb.1: // %loadbb1 +; CHECK-NEXT: ldr x8, [x0, #8] +; CHECK-NEXT: ldr x9, [x1, #8] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB79_9 +; CHECK-NEXT: // %bb.2: // %loadbb2 +; CHECK-NEXT: ldr x8, [x0, #16] +; CHECK-NEXT: ldr x9, [x1, #16] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB79_9 +; CHECK-NEXT: // %bb.3: // %loadbb3 +; CHECK-NEXT: ldr x8, [x0, #24] +; CHECK-NEXT: ldr x9, [x1, #24] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB79_9 +; CHECK-NEXT: // %bb.4: // %loadbb4 +; CHECK-NEXT: ldr x8, [x0, #32] +; CHECK-NEXT: ldr x9, [x1, #32] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB79_9 +; CHECK-NEXT: // %bb.5: // %loadbb5 +; CHECK-NEXT: ldr x8, [x0, #40] +; CHECK-NEXT: ldr x9, [x1, #40] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB79_9 +; CHECK-NEXT: // %bb.6: // %loadbb6 +; CHECK-NEXT: ldr x8, [x0, #48] +; CHECK-NEXT: ldr x9, [x1, #48] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB79_9 +; CHECK-NEXT: // %bb.7: // %loadbb7 +; CHECK-NEXT: ldr x8, [x0, #56] +; CHECK-NEXT: ldr x9, [x1, #56] +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB79_9 +; CHECK-NEXT: // %bb.8: +; CHECK-NEXT: mov w8, wzr +; CHECK-NEXT: b .LBB79_10 +; CHECK-NEXT: .LBB79_9: // %res_block +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: mov w8, #-1 // =0xffffffff +; CHECK-NEXT: cneg w8, w8, hs +; CHECK-NEXT: .LBB79_10: // %endblock +; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: cset w0, gt +; CHECK-NEXT: ret + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 64) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length64_eq_const(ptr %X) nounwind { +; CHECK-LABEL: length64_eq_const: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, #12592 // =0x3130 +; CHECK-NEXT: ldp x9, x10, [x0] +; CHECK-NEXT: movk x8, #13106, lsl #16 +; CHECK-NEXT: ldp x11, x12, [x0, #16] +; CHECK-NEXT: movk x8, #13620, lsl #32 +; CHECK-NEXT: ldp x13, x14, [x0, #32] +; CHECK-NEXT: movk x8, #14134, lsl #48 +; CHECK-NEXT: cmp x9, x8 +; CHECK-NEXT: mov x9, #14648 // =0x3938 +; CHECK-NEXT: movk x9, #12592, lsl #16 +; CHECK-NEXT: movk x9, #13106, lsl #32 +; CHECK-NEXT: movk x9, #13620, lsl #48 +; CHECK-NEXT: ccmp x10, x9, #0, eq +; CHECK-NEXT: mov x10, #14134 // =0x3736 +; CHECK-NEXT: movk x10, #14648, lsl #16 +; CHECK-NEXT: movk x10, #12592, lsl #32 +; CHECK-NEXT: movk x10, #13106, lsl #48 +; CHECK-NEXT: ccmp x11, x10, #0, eq +; CHECK-NEXT: mov x11, #13620 // =0x3534 +; CHECK-NEXT: movk x11, #14134, lsl #16 +; CHECK-NEXT: movk x11, #14648, lsl #32 +; CHECK-NEXT: movk x11, #12592, lsl #48 +; CHECK-NEXT: ccmp x12, x11, #0, eq +; CHECK-NEXT: mov x11, #13106 // =0x3332 +; CHECK-NEXT: movk x11, #13620, lsl #16 +; CHECK-NEXT: movk x11, #14134, lsl #32 +; CHECK-NEXT: movk x11, #14648, lsl #48 +; CHECK-NEXT: ccmp x13, x11, #0, eq +; CHECK-NEXT: ldp x11, x12, [x0, #48] +; CHECK-NEXT: ccmp x14, x8, #0, eq +; CHECK-NEXT: ccmp x11, x9, #0, eq +; CHECK-NEXT: ccmp x12, x10, #0, eq +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 64) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length96(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length96: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w2, #96 // =0x60 +; CHECK-NEXT: b memcmp + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 96) nounwind + ret i32 %m +} + +define i1 @length96_eq(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: length96_eq: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: mov w2, #96 // =0x60 +; CHECK-NEXT: bl memcmp +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 96) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length96_lt(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: length96_lt: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: mov w2, #96 // =0x60 +; CHECK-NEXT: bl memcmp +; CHECK-NEXT: lsr w0, w0, #31 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 96) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length96_gt(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: length96_gt: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: mov w2, #96 // =0x60 +; CHECK-NEXT: bl memcmp +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: cset w0, gt +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 96) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length96_eq_const(ptr %X) nounwind { +; CHECK-LABEL: length96_eq_const: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: adrp x1, .L.str +; CHECK-NEXT: add x1, x1, :lo12:.L.str +; CHECK-NEXT: mov w2, #96 // =0x60 +; CHECK-NEXT: bl memcmp +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 96) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length127(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length127: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w2, #127 // =0x7f +; CHECK-NEXT: b memcmp + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 127) nounwind + ret i32 %m +} + +define i1 @length127_eq(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: length127_eq: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: mov w2, #127 // =0x7f +; CHECK-NEXT: bl memcmp +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 127) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length127_lt(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: length127_lt: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: mov w2, #127 // =0x7f +; CHECK-NEXT: bl memcmp +; CHECK-NEXT: lsr w0, w0, #31 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 127) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length127_gt(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: length127_gt: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: mov w2, #127 // =0x7f +; CHECK-NEXT: bl memcmp +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: cset w0, gt +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 127) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length127_eq_const(ptr %X) nounwind { +; CHECK-LABEL: length127_eq_const: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: adrp x1, .L.str +; CHECK-NEXT: add x1, x1, :lo12:.L.str +; CHECK-NEXT: mov w2, #127 // =0x7f +; CHECK-NEXT: bl memcmp +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 127) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length128(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length128: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w2, #128 // =0x80 +; CHECK-NEXT: b memcmp + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 128) nounwind + ret i32 %m +} + +define i1 @length128_eq(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: length128_eq: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: mov w2, #128 // =0x80 +; CHECK-NEXT: bl memcmp +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 128) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length128_lt(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: length128_lt: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: mov w2, #128 // =0x80 +; CHECK-NEXT: bl memcmp +; CHECK-NEXT: lsr w0, w0, #31 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 128) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length128_gt(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: length128_gt: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: mov w2, #128 // =0x80 +; CHECK-NEXT: bl memcmp +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: cset w0, gt +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 128) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length128_eq_const(ptr %X) nounwind { +; CHECK-LABEL: length128_eq_const: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: adrp x1, .L.str +; CHECK-NEXT: add x1, x1, :lo12:.L.str +; CHECK-NEXT: mov w2, #128 // =0x80 +; CHECK-NEXT: bl memcmp +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 128) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length192(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length192: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w2, #192 // =0xc0 +; CHECK-NEXT: b memcmp + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 192) nounwind + ret i32 %m +} + +define i1 @length192_eq(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: length192_eq: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: mov w2, #192 // =0xc0 +; CHECK-NEXT: bl memcmp +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 192) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length192_lt(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: length192_lt: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: mov w2, #192 // =0xc0 +; CHECK-NEXT: bl memcmp +; CHECK-NEXT: lsr w0, w0, #31 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 192) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length192_gt(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: length192_gt: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: mov w2, #192 // =0xc0 +; CHECK-NEXT: bl memcmp +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: cset w0, gt +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 192) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length192_eq_const(ptr %X) nounwind { +; CHECK-LABEL: length192_eq_const: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: adrp x1, .L.str +; CHECK-NEXT: add x1, x1, :lo12:.L.str +; CHECK-NEXT: mov w2, #192 // =0xc0 +; CHECK-NEXT: bl memcmp +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 192) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length255(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length255: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w2, #255 // =0xff +; CHECK-NEXT: b memcmp + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 255) nounwind + ret i32 %m +} + +define i1 @length255_eq(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: length255_eq: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: mov w2, #255 // =0xff +; CHECK-NEXT: bl memcmp +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 255) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length255_lt(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: length255_lt: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: mov w2, #255 // =0xff +; CHECK-NEXT: bl memcmp +; CHECK-NEXT: lsr w0, w0, #31 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 255) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length255_gt(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: length255_gt: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: mov w2, #255 // =0xff +; CHECK-NEXT: bl memcmp +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: cset w0, gt +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 255) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length255_eq_const(ptr %X) nounwind { +; CHECK-LABEL: length255_eq_const: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: adrp x1, .L.str +; CHECK-NEXT: add x1, x1, :lo12:.L.str +; CHECK-NEXT: mov w2, #255 // =0xff +; CHECK-NEXT: bl memcmp +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 255) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length256(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length256: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w2, #256 // =0x100 +; CHECK-NEXT: b memcmp + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 256) nounwind + ret i32 %m +} + +define i1 @length256_eq(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: length256_eq: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: mov w2, #256 // =0x100 +; CHECK-NEXT: bl memcmp +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 256) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length256_lt(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: length256_lt: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: mov w2, #256 // =0x100 +; CHECK-NEXT: bl memcmp +; CHECK-NEXT: lsr w0, w0, #31 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 256) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length256_gt(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: length256_gt: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: mov w2, #256 // =0x100 +; CHECK-NEXT: bl memcmp +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: cset w0, gt +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 256) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length256_eq_const(ptr %X) nounwind { +; CHECK-LABEL: length256_eq_const: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: adrp x1, .L.str +; CHECK-NEXT: add x1, x1, :lo12:.L.str +; CHECK-NEXT: mov w2, #256 // =0x100 +; CHECK-NEXT: bl memcmp +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 256) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length384(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length384: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w2, #384 // =0x180 +; CHECK-NEXT: b memcmp + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 384) nounwind + ret i32 %m +} + +define i1 @length384_eq(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: length384_eq: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: mov w2, #384 // =0x180 +; CHECK-NEXT: bl memcmp +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 384) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length384_lt(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: length384_lt: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: mov w2, #384 // =0x180 +; CHECK-NEXT: bl memcmp +; CHECK-NEXT: lsr w0, w0, #31 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 384) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length384_gt(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: length384_gt: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: mov w2, #384 // =0x180 +; CHECK-NEXT: bl memcmp +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: cset w0, gt +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 384) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length384_eq_const(ptr %X) nounwind { +; CHECK-LABEL: length384_eq_const: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: adrp x1, .L.str +; CHECK-NEXT: add x1, x1, :lo12:.L.str +; CHECK-NEXT: mov w2, #384 // =0x180 +; CHECK-NEXT: bl memcmp +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 384) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length511(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length511: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w2, #511 // =0x1ff +; CHECK-NEXT: b memcmp + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 511) nounwind + ret i32 %m +} + +define i1 @length511_eq(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: length511_eq: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: mov w2, #511 // =0x1ff +; CHECK-NEXT: bl memcmp +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 511) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length511_lt(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: length511_lt: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: mov w2, #511 // =0x1ff +; CHECK-NEXT: bl memcmp +; CHECK-NEXT: lsr w0, w0, #31 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 511) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length511_gt(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: length511_gt: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: mov w2, #511 // =0x1ff +; CHECK-NEXT: bl memcmp +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: cset w0, gt +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 511) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length511_eq_const(ptr %X) nounwind { +; CHECK-LABEL: length511_eq_const: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: adrp x1, .L.str +; CHECK-NEXT: add x1, x1, :lo12:.L.str +; CHECK-NEXT: mov w2, #511 // =0x1ff +; CHECK-NEXT: bl memcmp +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 511) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @length512(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: length512: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w2, #512 // =0x200 +; CHECK-NEXT: b memcmp + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 512) nounwind + ret i32 %m +} + +define i1 @length512_eq(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: length512_eq: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: mov w2, #512 // =0x200 +; CHECK-NEXT: bl memcmp +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 512) nounwind + %cmp = icmp ne i32 %call, 0 + ret i1 %cmp +} + +define i1 @length512_lt(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: length512_lt: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: mov w2, #512 // =0x200 +; CHECK-NEXT: bl memcmp +; CHECK-NEXT: lsr w0, w0, #31 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 512) nounwind + %cmp = icmp slt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length512_gt(ptr %x, ptr %y) nounwind { +; CHECK-LABEL: length512_gt: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: mov w2, #512 // =0x200 +; CHECK-NEXT: bl memcmp +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: cset w0, gt +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 512) nounwind + %cmp = icmp sgt i32 %call, 0 + ret i1 %cmp +} + +define i1 @length512_eq_const(ptr %X) nounwind { +; CHECK-LABEL: length512_eq_const: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: adrp x1, .L.str +; CHECK-NEXT: add x1, x1, :lo12:.L.str +; CHECK-NEXT: mov w2, #512 // =0x200 +; CHECK-NEXT: bl memcmp +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr @.str, i64 512) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @huge_length(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: huge_length: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x2, #9223372036854775807 // =0x7fffffffffffffff +; CHECK-NEXT: b memcmp + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 9223372036854775807) nounwind + ret i32 %m +} + +define i1 @huge_length_eq(ptr %X, ptr %Y) nounwind { +; CHECK-LABEL: huge_length_eq: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: mov x2, #9223372036854775807 // =0x7fffffffffffffff +; CHECK-NEXT: bl memcmp +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 9223372036854775807) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} + +define i32 @nonconst_length(ptr %X, ptr %Y, i64 %size) nounwind { +; CHECK-LABEL: nonconst_length: +; CHECK: // %bb.0: +; CHECK-NEXT: b memcmp + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 %size) nounwind + ret i32 %m +} + +define i1 @nonconst_length_eq(ptr %X, ptr %Y, i64 %size) nounwind { +; CHECK-LABEL: nonconst_length_eq: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: bl memcmp +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 %size) nounwind + %c = icmp eq i32 %m, 0 + ret i1 %c +} diff --git a/llvm/test/Transforms/ExpandMemCmp/AArch64/memcmp.ll b/llvm/test/Transforms/ExpandMemCmp/AArch64/memcmp.ll new file mode 100644 index 0000000000000..e5b78fef7a064 --- /dev/null +++ b/llvm/test/Transforms/ExpandMemCmp/AArch64/memcmp.ll @@ -0,0 +1,921 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 +; RUN: opt -S -expandmemcmp -memcmp-num-loads-per-block=1 -mtriple=aarch64-unknown-unknown < %s | FileCheck %s + +declare i32 @memcmp(ptr nocapture, ptr nocapture, i64) + +define i32 @cmp2(ptr nocapture readonly %x, ptr nocapture readonly %y) { +; CHECK-LABEL: define i32 @cmp2( +; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; CHECK-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; CHECK-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; CHECK-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; CHECK-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; CHECK-NEXT: ret i32 [[TMP7]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 2) + ret i32 %call +} + +define i32 @cmp2_align2(ptr nocapture readonly align 2 %x, ptr nocapture readonly align 2 %y) { +; CHECK-LABEL: define i32 @cmp2_align2( +; CHECK-SAME: ptr nocapture readonly align 2 [[X:%.*]], ptr nocapture readonly align 2 [[Y:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 2 +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 2 +; CHECK-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]]) +; CHECK-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; CHECK-NEXT: [[TMP5:%.*]] = zext i16 [[TMP3]] to i32 +; CHECK-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; CHECK-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; CHECK-NEXT: ret i32 [[TMP7]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 2) + ret i32 %call +} + +define i32 @cmp3(ptr nocapture readonly %x, ptr nocapture readonly %y) { +; CHECK-LABEL: define i32 @cmp3( +; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { +; CHECK-NEXT: br label [[LOADBB:%.*]] +; CHECK: res_block: +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP5:%.*]], [[TMP6:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; CHECK-NEXT: br label [[ENDBLOCK:%.*]] +; CHECK: loadbb: +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP5]] = call i16 @llvm.bswap.i16(i16 [[TMP3]]) +; CHECK-NEXT: [[TMP6]] = call i16 @llvm.bswap.i16(i16 [[TMP4]]) +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i16 [[TMP5]], [[TMP6]] +; CHECK-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; CHECK: loadbb1: +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 2 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 2 +; CHECK-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; CHECK-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; CHECK-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; CHECK-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; CHECK-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; CHECK-NEXT: br label [[ENDBLOCK]] +; CHECK: endblock: +; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; CHECK-NEXT: ret i32 [[PHI_RES]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 3) + ret i32 %call +} + +define i32 @cmp4(ptr nocapture readonly %x, ptr nocapture readonly %y) { +; CHECK-LABEL: define i32 @cmp4( +; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; CHECK-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32 +; CHECK-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32 +; CHECK-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]] +; CHECK-NEXT: ret i32 [[TMP9]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 4) + ret i32 %call +} + +define i32 @cmp5(ptr nocapture readonly %x, ptr nocapture readonly %y) { +; CHECK-LABEL: define i32 @cmp5( +; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { +; CHECK-NEXT: br label [[LOADBB:%.*]] +; CHECK: res_block: +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; CHECK-NEXT: br label [[ENDBLOCK:%.*]] +; CHECK: loadbb: +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; CHECK-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; CHECK-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; CHECK: loadbb1: +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; CHECK-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; CHECK-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; CHECK-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; CHECK-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; CHECK-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; CHECK-NEXT: br label [[ENDBLOCK]] +; CHECK: endblock: +; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; CHECK-NEXT: ret i32 [[PHI_RES]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 5) + ret i32 %call +} + +define i32 @cmp6(ptr nocapture readonly %x, ptr nocapture readonly %y) { +; CHECK-LABEL: define i32 @cmp6( +; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { +; CHECK-NEXT: br label [[LOADBB:%.*]] +; CHECK: res_block: +; CHECK-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP14:%.*]], [[LOADBB1:%.*]] ] +; CHECK-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP15:%.*]], [[LOADBB1]] ] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; CHECK-NEXT: br label [[ENDBLOCK:%.*]] +; CHECK: loadbb: +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; CHECK-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; CHECK-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; CHECK: loadbb1: +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; CHECK-NEXT: [[TMP10:%.*]] = load i16, ptr [[TMP8]], align 1 +; CHECK-NEXT: [[TMP11:%.*]] = load i16, ptr [[TMP9]], align 1 +; CHECK-NEXT: [[TMP12:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP10]]) +; CHECK-NEXT: [[TMP13:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP11]]) +; CHECK-NEXT: [[TMP14]] = zext i16 [[TMP12]] to i32 +; CHECK-NEXT: [[TMP15]] = zext i16 [[TMP13]] to i32 +; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i32 [[TMP14]], [[TMP15]] +; CHECK-NEXT: br i1 [[TMP16]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; CHECK: endblock: +; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; CHECK-NEXT: ret i32 [[PHI_RES]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 6) + ret i32 %call +} + +define i32 @cmp7(ptr nocapture readonly %x, ptr nocapture readonly %y) { +; CHECK-LABEL: define i32 @cmp7( +; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { +; CHECK-NEXT: br label [[LOADBB:%.*]] +; CHECK: res_block: +; CHECK-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; CHECK-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; CHECK-NEXT: br label [[ENDBLOCK:%.*]] +; CHECK: loadbb: +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; CHECK-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; CHECK-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; CHECK: loadbb1: +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; CHECK-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; CHECK-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]] +; CHECK-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; CHECK: endblock: +; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; CHECK-NEXT: ret i32 [[PHI_RES]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 7) + ret i32 %call +} + +define i32 @cmp8(ptr nocapture readonly %x, ptr nocapture readonly %y) { +; CHECK-LABEL: define i32 @cmp8( +; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP1]]) +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]]) +; CHECK-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = icmp ult i64 [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[TMP7:%.*]] = zext i1 [[TMP5]] to i32 +; CHECK-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32 +; CHECK-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]] +; CHECK-NEXT: ret i32 [[TMP9]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 8) + ret i32 %call +} + +define i32 @cmp9(ptr nocapture readonly %x, ptr nocapture readonly %y) { +; CHECK-LABEL: define i32 @cmp9( +; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { +; CHECK-NEXT: br label [[LOADBB:%.*]] +; CHECK: res_block: +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP5:%.*]], [[TMP6:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; CHECK-NEXT: br label [[ENDBLOCK:%.*]] +; CHECK: loadbb: +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; CHECK-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; CHECK-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; CHECK: loadbb1: +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; CHECK-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +; CHECK-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 +; CHECK-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 +; CHECK-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; CHECK-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] +; CHECK-NEXT: br label [[ENDBLOCK]] +; CHECK: endblock: +; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; CHECK-NEXT: ret i32 [[PHI_RES]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 9) + ret i32 %call +} + +define i32 @cmp10(ptr nocapture readonly %x, ptr nocapture readonly %y) { +; CHECK-LABEL: define i32 @cmp10( +; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { +; CHECK-NEXT: br label [[LOADBB:%.*]] +; CHECK: res_block: +; CHECK-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP14:%.*]], [[LOADBB1:%.*]] ] +; CHECK-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP15:%.*]], [[LOADBB1]] ] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; CHECK-NEXT: br label [[ENDBLOCK:%.*]] +; CHECK: loadbb: +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; CHECK-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; CHECK-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; CHECK: loadbb1: +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; CHECK-NEXT: [[TMP10:%.*]] = load i16, ptr [[TMP8]], align 1 +; CHECK-NEXT: [[TMP11:%.*]] = load i16, ptr [[TMP9]], align 1 +; CHECK-NEXT: [[TMP12:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP10]]) +; CHECK-NEXT: [[TMP13:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP11]]) +; CHECK-NEXT: [[TMP14]] = zext i16 [[TMP12]] to i64 +; CHECK-NEXT: [[TMP15]] = zext i16 [[TMP13]] to i64 +; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[TMP14]], [[TMP15]] +; CHECK-NEXT: br i1 [[TMP16]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; CHECK: endblock: +; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; CHECK-NEXT: ret i32 [[PHI_RES]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 10) + ret i32 %call +} + +define i32 @cmp11(ptr nocapture readonly %x, ptr nocapture readonly %y) { +; CHECK-LABEL: define i32 @cmp11( +; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { +; CHECK-NEXT: br label [[LOADBB:%.*]] +; CHECK: res_block: +; CHECK-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; CHECK-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; CHECK-NEXT: br label [[ENDBLOCK:%.*]] +; CHECK: loadbb: +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; CHECK-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; CHECK-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; CHECK: loadbb1: +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; CHECK-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; CHECK-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; CHECK-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; CHECK-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; CHECK: endblock: +; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; CHECK-NEXT: ret i32 [[PHI_RES]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 11) + ret i32 %call +} + +define i32 @cmp12(ptr nocapture readonly %x, ptr nocapture readonly %y) { +; CHECK-LABEL: define i32 @cmp12( +; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { +; CHECK-NEXT: br label [[LOADBB:%.*]] +; CHECK: res_block: +; CHECK-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP14:%.*]], [[LOADBB1:%.*]] ] +; CHECK-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP15:%.*]], [[LOADBB1]] ] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; CHECK-NEXT: br label [[ENDBLOCK:%.*]] +; CHECK: loadbb: +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; CHECK-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; CHECK-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; CHECK: loadbb1: +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 1 +; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 1 +; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP10]]) +; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP11]]) +; CHECK-NEXT: [[TMP14]] = zext i32 [[TMP12]] to i64 +; CHECK-NEXT: [[TMP15]] = zext i32 [[TMP13]] to i64 +; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[TMP14]], [[TMP15]] +; CHECK-NEXT: br i1 [[TMP16]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; CHECK: endblock: +; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; CHECK-NEXT: ret i32 [[PHI_RES]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 12) + ret i32 %call +} + +define i32 @cmp13(ptr nocapture readonly %x, ptr nocapture readonly %y) { +; CHECK-LABEL: define i32 @cmp13( +; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { +; CHECK-NEXT: br label [[LOADBB:%.*]] +; CHECK: res_block: +; CHECK-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; CHECK-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; CHECK-NEXT: br label [[ENDBLOCK:%.*]] +; CHECK: loadbb: +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; CHECK-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; CHECK-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; CHECK: loadbb1: +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 5 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 5 +; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; CHECK-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; CHECK-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; CHECK-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; CHECK-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; CHECK: endblock: +; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; CHECK-NEXT: ret i32 [[PHI_RES]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 13) + ret i32 %call +} + +define i32 @cmp14(ptr nocapture readonly %x, ptr nocapture readonly %y) { +; CHECK-LABEL: define i32 @cmp14( +; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { +; CHECK-NEXT: br label [[LOADBB:%.*]] +; CHECK: res_block: +; CHECK-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; CHECK-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; CHECK-NEXT: br label [[ENDBLOCK:%.*]] +; CHECK: loadbb: +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; CHECK-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; CHECK-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; CHECK: loadbb1: +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 6 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 6 +; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; CHECK-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; CHECK-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; CHECK-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; CHECK-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; CHECK: endblock: +; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; CHECK-NEXT: ret i32 [[PHI_RES]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 14) + ret i32 %call +} + +define i32 @cmp15(ptr nocapture readonly %x, ptr nocapture readonly %y) { +; CHECK-LABEL: define i32 @cmp15( +; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { +; CHECK-NEXT: br label [[LOADBB:%.*]] +; CHECK: res_block: +; CHECK-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; CHECK-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; CHECK-NEXT: br label [[ENDBLOCK:%.*]] +; CHECK: loadbb: +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; CHECK-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; CHECK-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; CHECK: loadbb1: +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 7 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 7 +; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; CHECK-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; CHECK-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; CHECK-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; CHECK-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; CHECK: endblock: +; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; CHECK-NEXT: ret i32 [[PHI_RES]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 15) + ret i32 %call +} + +define i32 @cmp16(ptr nocapture readonly %x, ptr nocapture readonly %y) { +; CHECK-LABEL: define i32 @cmp16( +; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { +; CHECK-NEXT: br label [[LOADBB:%.*]] +; CHECK: res_block: +; CHECK-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ] +; CHECK-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 +; CHECK-NEXT: br label [[ENDBLOCK:%.*]] +; CHECK: loadbb: +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; CHECK-NEXT: [[TMP6]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP5]], [[TMP6]] +; CHECK-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] +; CHECK: loadbb1: +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 1 +; CHECK-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 1 +; CHECK-NEXT: [[TMP12]] = call i64 @llvm.bswap.i64(i64 [[TMP10]]) +; CHECK-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP11]]) +; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; CHECK-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; CHECK: endblock: +; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] +; CHECK-NEXT: ret i32 [[PHI_RES]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 16) + ret i32 %call +} + +define i32 @cmp_eq2(ptr nocapture readonly %x, ptr nocapture readonly %y) { +; CHECK-LABEL: define i32 @cmp_eq2( +; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP4]], 0 +; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 +; CHECK-NEXT: ret i32 [[CONV]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 2) + %cmp = icmp eq i32 %call, 0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @cmp_eq3(ptr nocapture readonly %x, ptr nocapture readonly %y) { +; CHECK-LABEL: define i32 @cmp_eq3( +; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { +; CHECK-NEXT: br label [[LOADBB:%.*]] +; CHECK: res_block: +; CHECK-NEXT: br label [[ENDBLOCK:%.*]] +; CHECK: loadbb: +; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i16 [[TMP1]], [[TMP2]] +; CHECK-NEXT: br i1 [[TMP3]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] +; CHECK: loadbb1: +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 2 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 2 +; CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP8:%.*]] = icmp ne i8 [[TMP6]], [[TMP7]] +; CHECK-NEXT: br i1 [[TMP8]], label [[RES_BLOCK]], label [[ENDBLOCK]] +; CHECK: endblock: +; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0 +; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 +; CHECK-NEXT: ret i32 [[CONV]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 3) + %cmp = icmp eq i32 %call, 0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @cmp_eq4(ptr nocapture readonly %x, ptr nocapture readonly %y) { +; CHECK-LABEL: define i32 @cmp_eq4( +; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP4]], 0 +; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 +; CHECK-NEXT: ret i32 [[CONV]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 4) + %cmp = icmp eq i32 %call, 0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @cmp_eq5(ptr nocapture readonly %x, ptr nocapture readonly %y) { +; CHECK-LABEL: define i32 @cmp_eq5( +; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { +; CHECK-NEXT: br label [[LOADBB:%.*]] +; CHECK: res_block: +; CHECK-NEXT: br label [[ENDBLOCK:%.*]] +; CHECK: loadbb: +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]] +; CHECK-NEXT: br i1 [[TMP3]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] +; CHECK: loadbb1: +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP8:%.*]] = icmp ne i8 [[TMP6]], [[TMP7]] +; CHECK-NEXT: br i1 [[TMP8]], label [[RES_BLOCK]], label [[ENDBLOCK]] +; CHECK: endblock: +; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0 +; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 +; CHECK-NEXT: ret i32 [[CONV]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 5) + %cmp = icmp eq i32 %call, 0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @cmp_eq6(ptr nocapture readonly %x, ptr nocapture readonly %y) { +; CHECK-LABEL: define i32 @cmp_eq6( +; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { +; CHECK-NEXT: br label [[LOADBB:%.*]] +; CHECK: res_block: +; CHECK-NEXT: br label [[ENDBLOCK:%.*]] +; CHECK: loadbb: +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]] +; CHECK-NEXT: br i1 [[TMP3]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] +; CHECK: loadbb1: +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; CHECK-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP4]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP8:%.*]] = icmp ne i16 [[TMP6]], [[TMP7]] +; CHECK-NEXT: br i1 [[TMP8]], label [[RES_BLOCK]], label [[ENDBLOCK]] +; CHECK: endblock: +; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0 +; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 +; CHECK-NEXT: ret i32 [[CONV]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 6) + %cmp = icmp eq i32 %call, 0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @cmp_eq6_align4(ptr nocapture readonly align 4 %x, ptr nocapture readonly align 4 %y) { +; CHECK-LABEL: define i32 @cmp_eq6_align4( +; CHECK-SAME: ptr nocapture readonly align 4 [[X:%.*]], ptr nocapture readonly align 4 [[Y:%.*]]) { +; CHECK-NEXT: br label [[LOADBB:%.*]] +; CHECK: res_block: +; CHECK-NEXT: br label [[ENDBLOCK:%.*]] +; CHECK: loadbb: +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]] +; CHECK-NEXT: br i1 [[TMP3]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] +; CHECK: loadbb1: +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 4 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 4 +; CHECK-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP4]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP5]], align 4 +; CHECK-NEXT: [[TMP8:%.*]] = icmp ne i16 [[TMP6]], [[TMP7]] +; CHECK-NEXT: br i1 [[TMP8]], label [[RES_BLOCK]], label [[ENDBLOCK]] +; CHECK: endblock: +; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0 +; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 +; CHECK-NEXT: ret i32 [[CONV]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 6) + %cmp = icmp eq i32 %call, 0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @cmp_eq7(ptr nocapture readonly %x, ptr nocapture readonly %y) { +; CHECK-LABEL: define i32 @cmp_eq7( +; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { +; CHECK-NEXT: br label [[LOADBB:%.*]] +; CHECK: res_block: +; CHECK-NEXT: br label [[ENDBLOCK:%.*]] +; CHECK: loadbb: +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]] +; CHECK-NEXT: br i1 [[TMP3]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] +; CHECK: loadbb1: +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP8:%.*]] = icmp ne i32 [[TMP6]], [[TMP7]] +; CHECK-NEXT: br i1 [[TMP8]], label [[RES_BLOCK]], label [[ENDBLOCK]] +; CHECK: endblock: +; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0 +; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 +; CHECK-NEXT: ret i32 [[CONV]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 7) + %cmp = icmp eq i32 %call, 0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @cmp_eq8(ptr nocapture readonly %x, ptr nocapture readonly %y) { +; CHECK-LABEL: define i32 @cmp_eq8( +; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP4]], 0 +; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 +; CHECK-NEXT: ret i32 [[CONV]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 8) + %cmp = icmp eq i32 %call, 0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @cmp_eq9(ptr nocapture readonly %x, ptr nocapture readonly %y) { +; CHECK-LABEL: define i32 @cmp_eq9( +; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { +; CHECK-NEXT: br label [[LOADBB:%.*]] +; CHECK: res_block: +; CHECK-NEXT: br label [[ENDBLOCK:%.*]] +; CHECK: loadbb: +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]] +; CHECK-NEXT: br i1 [[TMP3]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] +; CHECK: loadbb1: +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP8:%.*]] = icmp ne i8 [[TMP6]], [[TMP7]] +; CHECK-NEXT: br i1 [[TMP8]], label [[RES_BLOCK]], label [[ENDBLOCK]] +; CHECK: endblock: +; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0 +; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 +; CHECK-NEXT: ret i32 [[CONV]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 9) + %cmp = icmp eq i32 %call, 0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @cmp_eq10(ptr nocapture readonly %x, ptr nocapture readonly %y) { +; CHECK-LABEL: define i32 @cmp_eq10( +; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { +; CHECK-NEXT: br label [[LOADBB:%.*]] +; CHECK: res_block: +; CHECK-NEXT: br label [[ENDBLOCK:%.*]] +; CHECK: loadbb: +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]] +; CHECK-NEXT: br i1 [[TMP3]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] +; CHECK: loadbb1: +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; CHECK-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP4]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP8:%.*]] = icmp ne i16 [[TMP6]], [[TMP7]] +; CHECK-NEXT: br i1 [[TMP8]], label [[RES_BLOCK]], label [[ENDBLOCK]] +; CHECK: endblock: +; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0 +; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 +; CHECK-NEXT: ret i32 [[CONV]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 10) + %cmp = icmp eq i32 %call, 0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @cmp_eq11(ptr nocapture readonly %x, ptr nocapture readonly %y) { +; CHECK-LABEL: define i32 @cmp_eq11( +; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { +; CHECK-NEXT: br label [[LOADBB:%.*]] +; CHECK: res_block: +; CHECK-NEXT: br label [[ENDBLOCK:%.*]] +; CHECK: loadbb: +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]] +; CHECK-NEXT: br i1 [[TMP3]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] +; CHECK: loadbb1: +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 3 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 3 +; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP8:%.*]] = icmp ne i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: br i1 [[TMP8]], label [[RES_BLOCK]], label [[ENDBLOCK]] +; CHECK: endblock: +; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0 +; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 +; CHECK-NEXT: ret i32 [[CONV]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 11) + %cmp = icmp eq i32 %call, 0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @cmp_eq12(ptr nocapture readonly %x, ptr nocapture readonly %y) { +; CHECK-LABEL: define i32 @cmp_eq12( +; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { +; CHECK-NEXT: br label [[LOADBB:%.*]] +; CHECK: res_block: +; CHECK-NEXT: br label [[ENDBLOCK:%.*]] +; CHECK: loadbb: +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]] +; CHECK-NEXT: br i1 [[TMP3]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] +; CHECK: loadbb1: +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP8:%.*]] = icmp ne i32 [[TMP6]], [[TMP7]] +; CHECK-NEXT: br i1 [[TMP8]], label [[RES_BLOCK]], label [[ENDBLOCK]] +; CHECK: endblock: +; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0 +; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 +; CHECK-NEXT: ret i32 [[CONV]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 12) + %cmp = icmp eq i32 %call, 0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @cmp_eq13(ptr nocapture readonly %x, ptr nocapture readonly %y) { +; CHECK-LABEL: define i32 @cmp_eq13( +; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { +; CHECK-NEXT: br label [[LOADBB:%.*]] +; CHECK: res_block: +; CHECK-NEXT: br label [[ENDBLOCK:%.*]] +; CHECK: loadbb: +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]] +; CHECK-NEXT: br i1 [[TMP3]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] +; CHECK: loadbb1: +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 5 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 5 +; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP8:%.*]] = icmp ne i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: br i1 [[TMP8]], label [[RES_BLOCK]], label [[ENDBLOCK]] +; CHECK: endblock: +; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0 +; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 +; CHECK-NEXT: ret i32 [[CONV]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 13) + %cmp = icmp eq i32 %call, 0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @cmp_eq14(ptr nocapture readonly %x, ptr nocapture readonly %y) { +; CHECK-LABEL: define i32 @cmp_eq14( +; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { +; CHECK-NEXT: br label [[LOADBB:%.*]] +; CHECK: res_block: +; CHECK-NEXT: br label [[ENDBLOCK:%.*]] +; CHECK: loadbb: +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]] +; CHECK-NEXT: br i1 [[TMP3]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] +; CHECK: loadbb1: +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 6 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 6 +; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP8:%.*]] = icmp ne i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: br i1 [[TMP8]], label [[RES_BLOCK]], label [[ENDBLOCK]] +; CHECK: endblock: +; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0 +; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 +; CHECK-NEXT: ret i32 [[CONV]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 14) + %cmp = icmp eq i32 %call, 0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @cmp_eq15(ptr nocapture readonly %x, ptr nocapture readonly %y) { +; CHECK-LABEL: define i32 @cmp_eq15( +; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { +; CHECK-NEXT: br label [[LOADBB:%.*]] +; CHECK: res_block: +; CHECK-NEXT: br label [[ENDBLOCK:%.*]] +; CHECK: loadbb: +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]] +; CHECK-NEXT: br i1 [[TMP3]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] +; CHECK: loadbb1: +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 7 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 7 +; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP8:%.*]] = icmp ne i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: br i1 [[TMP8]], label [[RES_BLOCK]], label [[ENDBLOCK]] +; CHECK: endblock: +; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0 +; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 +; CHECK-NEXT: ret i32 [[CONV]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 15) + %cmp = icmp eq i32 %call, 0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @cmp_eq16(ptr nocapture readonly %x, ptr nocapture readonly %y) { +; CHECK-LABEL: define i32 @cmp_eq16( +; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { +; CHECK-NEXT: br label [[LOADBB:%.*]] +; CHECK: res_block: +; CHECK-NEXT: br label [[ENDBLOCK:%.*]] +; CHECK: loadbb: +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]] +; CHECK-NEXT: br i1 [[TMP3]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] +; CHECK: loadbb1: +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[X]], i64 8 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[Y]], i64 8 +; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP8:%.*]] = icmp ne i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: br i1 [[TMP8]], label [[RES_BLOCK]], label [[ENDBLOCK]] +; CHECK: endblock: +; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0 +; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 +; CHECK-NEXT: ret i32 [[CONV]] +; + %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 16) + %cmp = icmp eq i32 %call, 0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} From c2df35fd51af2794afc303637c4cd61a670caa01 Mon Sep 17 00:00:00 2001 From: Igor Kirillov Date: Mon, 23 Oct 2023 10:43:50 +0000 Subject: [PATCH 2/4] [CodeGen] Improve ExpandMemCmp for more efficient non-register aligned sizes handling * Enhanced the logic of ExpandMemCmp pass to merge contiguous subsequences in LoadSequence, based on sizes allowed in `AllowedTailExpansions`. * This enhancement seeks to minimize the number of basic blocks and produce optimized code when using memcmp with non-register aligned sizes. * Enable this feature for AArch64 with memcmp sizes modulo 8 equal to 3, 5, and 6. --- .../llvm/Analysis/TargetTransformInfo.h | 11 ++ llvm/lib/CodeGen/ExpandMemCmp.cpp | 74 +++++++++--- .../AArch64/AArch64TargetTransformInfo.cpp | 1 + llvm/test/CodeGen/AArch64/memcmp.ll | 96 ++++++---------- .../Transforms/ExpandMemCmp/AArch64/memcmp.ll | 108 ++++++------------ 5 files changed, 140 insertions(+), 150 deletions(-) diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h index 5234ef8788d9e..3ec80d99b392b 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -907,6 +907,17 @@ class TargetTransformInfo { // be done with two 4-byte compares instead of 4+2+1-byte compares. This // requires all loads in LoadSizes to be doable in an unaligned way. bool AllowOverlappingLoads = false; + + // Sometimes, the amount of data that needs to be compared is smaller than + // the standard register size, but it cannot be loaded with just one load + // instruction. For example, if the size of the memory comparison is 6 + // bytes, we can handle it more efficiently by loading all 6 bytes in a + // single block and generating an 8-byte number, instead of generating two + // separate blocks with conditional jumps for 4 and 2 byte loads. This + // approach simplifies the process and produces the comparison result as + // normal. This array lists the allowed sizes of memcmp tails that can be + // merged into one block + SmallVector AllowedTailExpansions; }; MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const; diff --git a/llvm/lib/CodeGen/ExpandMemCmp.cpp b/llvm/lib/CodeGen/ExpandMemCmp.cpp index 911ebd41afc5b..d9c2c6f5f39ba 100644 --- a/llvm/lib/CodeGen/ExpandMemCmp.cpp +++ b/llvm/lib/CodeGen/ExpandMemCmp.cpp @@ -117,8 +117,8 @@ class MemCmpExpansion { Value *Lhs = nullptr; Value *Rhs = nullptr; }; - LoadPair getLoadPair(Type *LoadSizeType, bool NeedsBSwap, Type *CmpSizeType, - unsigned OffsetBytes); + LoadPair getLoadPair(Type *LoadSizeType, bool NeedsBSwap, Type *BSwapSizeType, + Type *CmpSizeType, unsigned OffsetBytes); static LoadEntryVector computeGreedyLoadSequence(uint64_t Size, llvm::ArrayRef LoadSizes, @@ -255,6 +255,31 @@ MemCmpExpansion::MemCmpExpansion( } } assert(LoadSequence.size() <= Options.MaxNumLoads && "broken invariant"); + // This part of code attempts to optimize the LoadSequence by merging allowed + // subsequences into single loads of allowed sizes from + // `AllowedTailExpansions`. If it is for zero comparison or if no allowed tail + // expansions are specified, we exit early. + if (IsUsedForZeroCmp || !Options.AllowedTailExpansions.size()) + return; + + while (LoadSequence.size() >= 2) { + auto Last = LoadSequence[LoadSequence.size() - 1]; + auto PreLast = LoadSequence[LoadSequence.size() - 2]; + + // Exit the loop if the two sequences are not contiguous + if (PreLast.Offset + PreLast.LoadSize != Last.Offset) + break; + + auto LoadSize = Last.LoadSize + PreLast.LoadSize; + if (find(Options.AllowedTailExpansions, LoadSize) == + Options.AllowedTailExpansions.end()) + break; + + // Remove the last two sequences and replace with the combined sequence + LoadSequence.pop_back(); + LoadSequence.pop_back(); + LoadSequence.emplace_back(PreLast.Offset, LoadSize); + } } unsigned MemCmpExpansion::getNumBlocks() { @@ -279,6 +304,7 @@ void MemCmpExpansion::createResultBlock() { MemCmpExpansion::LoadPair MemCmpExpansion::getLoadPair(Type *LoadSizeType, bool NeedsBSwap, + Type *BSwapSizeType, Type *CmpSizeType, unsigned OffsetBytes) { // Get the memory source at offset `OffsetBytes`. @@ -307,16 +333,22 @@ MemCmpExpansion::LoadPair MemCmpExpansion::getLoadPair(Type *LoadSizeType, if (!Rhs) Rhs = Builder.CreateAlignedLoad(LoadSizeType, RhsSource, RhsAlign); + // Zero extend if Byte Swap intrinsic has different type + if (NeedsBSwap && LoadSizeType != BSwapSizeType) { + Lhs = Builder.CreateZExt(Lhs, BSwapSizeType); + Rhs = Builder.CreateZExt(Rhs, BSwapSizeType); + } + // Swap bytes if required. if (NeedsBSwap) { - Function *Bswap = Intrinsic::getDeclaration(CI->getModule(), - Intrinsic::bswap, LoadSizeType); + Function *Bswap = Intrinsic::getDeclaration( + CI->getModule(), Intrinsic::bswap, BSwapSizeType); Lhs = Builder.CreateCall(Bswap, Lhs); Rhs = Builder.CreateCall(Bswap, Rhs); } // Zero extend if required. - if (CmpSizeType != nullptr && CmpSizeType != LoadSizeType) { + if (CmpSizeType != nullptr && CmpSizeType != Lhs->getType()) { Lhs = Builder.CreateZExt(Lhs, CmpSizeType); Rhs = Builder.CreateZExt(Rhs, CmpSizeType); } @@ -333,7 +365,7 @@ void MemCmpExpansion::emitLoadCompareByteBlock(unsigned BlockIndex, Builder.SetInsertPoint(BB); const LoadPair Loads = getLoadPair(Type::getInt8Ty(CI->getContext()), /*NeedsBSwap=*/false, - Type::getInt32Ty(CI->getContext()), OffsetBytes); + nullptr, Type::getInt32Ty(CI->getContext()), OffsetBytes); Value *Diff = Builder.CreateSub(Loads.Lhs, Loads.Rhs); PhiRes->addIncoming(Diff, BB); @@ -385,11 +417,12 @@ Value *MemCmpExpansion::getCompareLoadPairs(unsigned BlockIndex, IntegerType *const MaxLoadType = NumLoads == 1 ? nullptr : IntegerType::get(CI->getContext(), MaxLoadSize * 8); + for (unsigned i = 0; i < NumLoads; ++i, ++LoadIndex) { const LoadEntry &CurLoadEntry = LoadSequence[LoadIndex]; const LoadPair Loads = getLoadPair( IntegerType::get(CI->getContext(), CurLoadEntry.LoadSize * 8), - /*NeedsBSwap=*/false, MaxLoadType, CurLoadEntry.Offset); + /*NeedsBSwap=*/false, nullptr, MaxLoadType, CurLoadEntry.Offset); if (NumLoads != 1) { // If we have multiple loads per block, we need to generate a composite @@ -475,14 +508,18 @@ void MemCmpExpansion::emitLoadCompareBlock(unsigned BlockIndex) { Type *LoadSizeType = IntegerType::get(CI->getContext(), CurLoadEntry.LoadSize * 8); - Type *MaxLoadType = IntegerType::get(CI->getContext(), MaxLoadSize * 8); + Type *BSwapSizeType = IntegerType::get( + CI->getContext(), PowerOf2Ceil(CurLoadEntry.LoadSize * 8)); + Type *MaxLoadType = IntegerType::get( + CI->getContext(), + std::max(MaxLoadSize, (unsigned)PowerOf2Ceil(CurLoadEntry.LoadSize)) * 8); assert(CurLoadEntry.LoadSize <= MaxLoadSize && "Unexpected load type"); Builder.SetInsertPoint(LoadCmpBlocks[BlockIndex]); const LoadPair Loads = - getLoadPair(LoadSizeType, /*NeedsBSwap=*/DL.isLittleEndian(), MaxLoadType, - CurLoadEntry.Offset); + getLoadPair(LoadSizeType, /*NeedsBSwap=*/DL.isLittleEndian(), + BSwapSizeType, MaxLoadType, CurLoadEntry.Offset); // Add the loaded values to the phi nodes for calculating memcmp result only // if result is not used in a zero equality. @@ -588,19 +625,26 @@ Value *MemCmpExpansion::getMemCmpEqZeroOneBlock() { /// the compare, branch, and phi IR that is required in the general case. Value *MemCmpExpansion::getMemCmpOneBlock() { Type *LoadSizeType = IntegerType::get(CI->getContext(), Size * 8); + Type *BSwapSizeType = + IntegerType::get(CI->getContext(), PowerOf2Ceil(Size * 8)); + Type *MaxLoadType = + IntegerType::get(CI->getContext(), + std::max(MaxLoadSize, (unsigned)PowerOf2Ceil(Size)) * 8); + bool NeedsBSwap = DL.isLittleEndian() && Size != 1; // The i8 and i16 cases don't need compares. We zext the loaded values and // subtract them to get the suitable negative, zero, or positive i32 result. if (Size < 4) { - const LoadPair Loads = - getLoadPair(LoadSizeType, NeedsBSwap, Builder.getInt32Ty(), - /*Offset*/ 0); + const LoadPair Loads = getLoadPair(LoadSizeType, NeedsBSwap, BSwapSizeType, + Builder.getInt32Ty(), + /*Offset*/ 0); return Builder.CreateSub(Loads.Lhs, Loads.Rhs); } - const LoadPair Loads = getLoadPair(LoadSizeType, NeedsBSwap, LoadSizeType, - /*Offset*/ 0); + const LoadPair Loads = + getLoadPair(LoadSizeType, NeedsBSwap, BSwapSizeType, MaxLoadType, + /*Offset*/ 0); // The result of memcmp is negative, zero, or positive, so produce that by // subtracting 2 extended compare bits: sub (ugt, ult). // If a target prefers to use selects to get -1/0/1, they should be able diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index 6bbd7009e2378..776619c90393c 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -2994,6 +2994,7 @@ AArch64TTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const { // they may wake up the FP unit, which raises the power consumption. Perhaps // they could be used with no holds barred (-O3). Options.LoadSizes = {8, 4, 2, 1}; + Options.AllowedTailExpansions = {3, 5, 6}; return Options; } diff --git a/llvm/test/CodeGen/AArch64/memcmp.ll b/llvm/test/CodeGen/AArch64/memcmp.ll index dd4431108c440..b38acbae10915 100644 --- a/llvm/test/CodeGen/AArch64/memcmp.ll +++ b/llvm/test/CodeGen/AArch64/memcmp.ll @@ -152,22 +152,15 @@ define i1 @length2_eq_nobuiltin_attr(ptr %X, ptr %Y) nounwind { define i32 @length3(ptr %X, ptr %Y) nounwind { ; CHECK-LABEL: length3: ; CHECK: // %bb.0: -; CHECK-NEXT: ldrh w8, [x0] -; CHECK-NEXT: ldrh w9, [x1] +; CHECK-NEXT: ldrb w8, [x0, #2] +; CHECK-NEXT: ldrh w9, [x0] +; CHECK-NEXT: ldrb w10, [x1, #2] +; CHECK-NEXT: ldrh w11, [x1] +; CHECK-NEXT: orr w8, w9, w8, lsl #16 +; CHECK-NEXT: orr w9, w11, w10, lsl #16 ; CHECK-NEXT: rev w8, w8 ; CHECK-NEXT: rev w9, w9 -; CHECK-NEXT: lsr w8, w8, #16 -; CHECK-NEXT: lsr w9, w9, #16 -; CHECK-NEXT: cmp w8, w9 -; CHECK-NEXT: b.ne .LBB11_2 -; CHECK-NEXT: // %bb.1: // %loadbb1 -; CHECK-NEXT: ldrb w8, [x0, #2] -; CHECK-NEXT: ldrb w9, [x1, #2] ; CHECK-NEXT: sub w0, w8, w9 -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB11_2: // %res_block -; CHECK-NEXT: mov w8, #-1 // =0xffffffff -; CHECK-NEXT: cneg w0, w8, hs ; CHECK-NEXT: ret %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 3) nounwind ret i32 %m @@ -272,20 +265,18 @@ define i1 @length4_eq_const(ptr %X) nounwind { define i32 @length5(ptr %X, ptr %Y) nounwind { ; CHECK-LABEL: length5: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr w8, [x0] -; CHECK-NEXT: ldr w9, [x1] -; CHECK-NEXT: rev w8, w8 -; CHECK-NEXT: rev w9, w9 -; CHECK-NEXT: cmp w8, w9 -; CHECK-NEXT: b.ne .LBB18_2 -; CHECK-NEXT: // %bb.1: // %loadbb1 ; CHECK-NEXT: ldrb w8, [x0, #4] -; CHECK-NEXT: ldrb w9, [x1, #4] +; CHECK-NEXT: ldr w9, [x0] +; CHECK-NEXT: ldrb w10, [x1, #4] +; CHECK-NEXT: ldr w11, [x1] +; CHECK-NEXT: orr x8, x9, x8, lsl #32 +; CHECK-NEXT: orr x9, x11, x10, lsl #32 +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: cset w8, hi +; CHECK-NEXT: cset w9, lo ; CHECK-NEXT: sub w0, w8, w9 -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB18_2: // %res_block -; CHECK-NEXT: mov w8, #-1 // =0xffffffff -; CHECK-NEXT: cneg w0, w8, hs ; CHECK-NEXT: ret %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 5) nounwind ret i32 %m @@ -310,22 +301,19 @@ define i1 @length5_eq(ptr %X, ptr %Y) nounwind { define i1 @length5_lt(ptr %X, ptr %Y) nounwind { ; CHECK-LABEL: length5_lt: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr w8, [x0] -; CHECK-NEXT: ldr w9, [x1] -; CHECK-NEXT: rev w8, w8 -; CHECK-NEXT: rev w9, w9 -; CHECK-NEXT: cmp w8, w9 -; CHECK-NEXT: b.ne .LBB20_2 -; CHECK-NEXT: // %bb.1: // %loadbb1 ; CHECK-NEXT: ldrb w8, [x0, #4] -; CHECK-NEXT: ldrb w9, [x1, #4] +; CHECK-NEXT: ldr w9, [x0] +; CHECK-NEXT: ldrb w10, [x1, #4] +; CHECK-NEXT: ldr w11, [x1] +; CHECK-NEXT: orr x8, x9, x8, lsl #32 +; CHECK-NEXT: orr x9, x11, x10, lsl #32 +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: cset w8, hi +; CHECK-NEXT: cset w9, lo ; CHECK-NEXT: sub w8, w8, w9 ; CHECK-NEXT: lsr w0, w8, #31 -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB20_2: // %res_block -; CHECK-NEXT: mov w8, #-1 // =0xffffffff -; CHECK-NEXT: cneg w8, w8, hs -; CHECK-NEXT: lsr w0, w8, #31 ; CHECK-NEXT: ret %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 5) nounwind %c = icmp slt i32 %m, 0 @@ -335,28 +323,18 @@ define i1 @length5_lt(ptr %X, ptr %Y) nounwind { define i32 @length6(ptr %X, ptr %Y) nounwind { ; CHECK-LABEL: length6: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr w8, [x0] -; CHECK-NEXT: ldr w9, [x1] -; CHECK-NEXT: rev w8, w8 -; CHECK-NEXT: rev w9, w9 -; CHECK-NEXT: cmp w8, w9 -; CHECK-NEXT: b.ne .LBB21_3 -; CHECK-NEXT: // %bb.1: // %loadbb1 ; CHECK-NEXT: ldrh w8, [x0, #4] -; CHECK-NEXT: ldrh w9, [x1, #4] -; CHECK-NEXT: rev w8, w8 -; CHECK-NEXT: rev w9, w9 -; CHECK-NEXT: lsr w8, w8, #16 -; CHECK-NEXT: lsr w9, w9, #16 -; CHECK-NEXT: cmp w8, w9 -; CHECK-NEXT: b.ne .LBB21_3 -; CHECK-NEXT: // %bb.2: -; CHECK-NEXT: mov w0, wzr -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB21_3: // %res_block -; CHECK-NEXT: cmp w8, w9 -; CHECK-NEXT: mov w8, #-1 // =0xffffffff -; CHECK-NEXT: cneg w0, w8, hs +; CHECK-NEXT: ldr w9, [x0] +; CHECK-NEXT: ldrh w10, [x1, #4] +; CHECK-NEXT: ldr w11, [x1] +; CHECK-NEXT: orr x8, x9, x8, lsl #32 +; CHECK-NEXT: orr x9, x11, x10, lsl #32 +; CHECK-NEXT: rev x8, x8 +; CHECK-NEXT: rev x9, x9 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: cset w8, hi +; CHECK-NEXT: cset w9, lo +; CHECK-NEXT: sub w0, w8, w9 ; CHECK-NEXT: ret %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 6) nounwind ret i32 %m diff --git a/llvm/test/Transforms/ExpandMemCmp/AArch64/memcmp.ll b/llvm/test/Transforms/ExpandMemCmp/AArch64/memcmp.ll index e5b78fef7a064..95fb883f3cdd5 100644 --- a/llvm/test/Transforms/ExpandMemCmp/AArch64/memcmp.ll +++ b/llvm/test/Transforms/ExpandMemCmp/AArch64/memcmp.ll @@ -38,30 +38,14 @@ define i32 @cmp2_align2(ptr nocapture readonly align 2 %x, ptr nocapture readonl define i32 @cmp3(ptr nocapture readonly %x, ptr nocapture readonly %y) { ; CHECK-LABEL: define i32 @cmp3( ; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { -; CHECK-NEXT: br label [[LOADBB:%.*]] -; CHECK: res_block: -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP5:%.*]], [[TMP6:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 -; CHECK-NEXT: br label [[ENDBLOCK:%.*]] -; CHECK: loadbb: -; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr [[X]], align 1 -; CHECK-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y]], align 1 -; CHECK-NEXT: [[TMP5]] = call i16 @llvm.bswap.i16(i16 [[TMP3]]) -; CHECK-NEXT: [[TMP6]] = call i16 @llvm.bswap.i16(i16 [[TMP4]]) -; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i16 [[TMP5]], [[TMP6]] -; CHECK-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] -; CHECK: loadbb1: -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 2 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 2 -; CHECK-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 -; CHECK-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 -; CHECK-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 -; CHECK-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 -; CHECK-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] -; CHECK-NEXT: br label [[ENDBLOCK]] -; CHECK: endblock: -; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] -; CHECK-NEXT: ret i32 [[PHI_RES]] +; CHECK-NEXT: [[TMP1:%.*]] = load i24, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i24, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = zext i24 [[TMP1]] to i32 +; CHECK-NEXT: [[TMP4:%.*]] = zext i24 [[TMP2]] to i32 +; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) +; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; CHECK-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] +; CHECK-NEXT: ret i32 [[TMP7]] ; %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 3) ret i32 %call @@ -88,30 +72,18 @@ define i32 @cmp4(ptr nocapture readonly %x, ptr nocapture readonly %y) { define i32 @cmp5(ptr nocapture readonly %x, ptr nocapture readonly %y) { ; CHECK-LABEL: define i32 @cmp5( ; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { -; CHECK-NEXT: br label [[LOADBB:%.*]] -; CHECK: res_block: -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP5:%.*]], [[TMP6:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 -; CHECK-NEXT: br label [[ENDBLOCK:%.*]] -; CHECK: loadbb: -; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 -; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 -; CHECK-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) -; CHECK-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) -; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] -; CHECK-NEXT: br i1 [[TMP7]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] -; CHECK: loadbb1: -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 -; CHECK-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 -; CHECK-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 -; CHECK-NEXT: [[TMP12:%.*]] = zext i8 [[TMP10]] to i32 -; CHECK-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 -; CHECK-NEXT: [[TMP14:%.*]] = sub i32 [[TMP12]], [[TMP13]] -; CHECK-NEXT: br label [[ENDBLOCK]] -; CHECK: endblock: -; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP14]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] -; CHECK-NEXT: ret i32 [[PHI_RES]] +; CHECK-NEXT: [[TMP1:%.*]] = load i40, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i40, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = zext i40 [[TMP1]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = zext i40 [[TMP2]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i64 [[TMP5]], [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = icmp ult i64 [[TMP5]], [[TMP6]] +; CHECK-NEXT: [[TMP9:%.*]] = zext i1 [[TMP7]] to i32 +; CHECK-NEXT: [[TMP10:%.*]] = zext i1 [[TMP8]] to i32 +; CHECK-NEXT: [[TMP11:%.*]] = sub i32 [[TMP9]], [[TMP10]] +; CHECK-NEXT: ret i32 [[TMP11]] ; %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 5) ret i32 %call @@ -120,34 +92,18 @@ define i32 @cmp5(ptr nocapture readonly %x, ptr nocapture readonly %y) { define i32 @cmp6(ptr nocapture readonly %x, ptr nocapture readonly %y) { ; CHECK-LABEL: define i32 @cmp6( ; CHECK-SAME: ptr nocapture readonly [[X:%.*]], ptr nocapture readonly [[Y:%.*]]) { -; CHECK-NEXT: br label [[LOADBB:%.*]] -; CHECK: res_block: -; CHECK-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP14:%.*]], [[LOADBB1:%.*]] ] -; CHECK-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP15:%.*]], [[LOADBB1]] ] -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 -; CHECK-NEXT: br label [[ENDBLOCK:%.*]] -; CHECK: loadbb: -; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1 -; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1 -; CHECK-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) -; CHECK-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) -; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] -; CHECK-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] -; CHECK: loadbb1: -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 4 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 4 -; CHECK-NEXT: [[TMP10:%.*]] = load i16, ptr [[TMP8]], align 1 -; CHECK-NEXT: [[TMP11:%.*]] = load i16, ptr [[TMP9]], align 1 -; CHECK-NEXT: [[TMP12:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP10]]) -; CHECK-NEXT: [[TMP13:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP11]]) -; CHECK-NEXT: [[TMP14]] = zext i16 [[TMP12]] to i32 -; CHECK-NEXT: [[TMP15]] = zext i16 [[TMP13]] to i32 -; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i32 [[TMP14]], [[TMP15]] -; CHECK-NEXT: br i1 [[TMP16]], label [[ENDBLOCK]], label [[RES_BLOCK]] -; CHECK: endblock: -; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] -; CHECK-NEXT: ret i32 [[PHI_RES]] +; CHECK-NEXT: [[TMP1:%.*]] = load i48, ptr [[X]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i48, ptr [[Y]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = zext i48 [[TMP1]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = zext i48 [[TMP2]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i64 [[TMP5]], [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = icmp ult i64 [[TMP5]], [[TMP6]] +; CHECK-NEXT: [[TMP9:%.*]] = zext i1 [[TMP7]] to i32 +; CHECK-NEXT: [[TMP10:%.*]] = zext i1 [[TMP8]] to i32 +; CHECK-NEXT: [[TMP11:%.*]] = sub i32 [[TMP9]], [[TMP10]] +; CHECK-NEXT: ret i32 [[TMP11]] ; %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 6) ret i32 %call From 27809e15a323149e89712fdbf63f39d377830f70 Mon Sep 17 00:00:00 2001 From: Igor Kirillov Date: Wed, 25 Oct 2023 18:00:37 +0000 Subject: [PATCH 3/4] Address the review comments --- llvm/lib/CodeGen/ExpandMemCmp.cpp | 105 +++++++++++++++++------------- 1 file changed, 58 insertions(+), 47 deletions(-) diff --git a/llvm/lib/CodeGen/ExpandMemCmp.cpp b/llvm/lib/CodeGen/ExpandMemCmp.cpp index d9c2c6f5f39ba..3f948f734fcf7 100644 --- a/llvm/lib/CodeGen/ExpandMemCmp.cpp +++ b/llvm/lib/CodeGen/ExpandMemCmp.cpp @@ -117,7 +117,7 @@ class MemCmpExpansion { Value *Lhs = nullptr; Value *Rhs = nullptr; }; - LoadPair getLoadPair(Type *LoadSizeType, bool NeedsBSwap, Type *BSwapSizeType, + LoadPair getLoadPair(Type *LoadSizeType, Type *BSwapSizeType, Type *CmpSizeType, unsigned OffsetBytes); static LoadEntryVector @@ -128,6 +128,11 @@ class MemCmpExpansion { unsigned MaxNumLoads, unsigned &NumLoadsNonOneByte); + static void optimiseLoadSequence( + LoadEntryVector &LoadSequence, + const TargetTransformInfo::MemCmpExpansionOptions &Options, + bool IsUsedForZeroCmp); + public: MemCmpExpansion(CallInst *CI, uint64_t Size, const TargetTransformInfo::MemCmpExpansionOptions &Options, @@ -210,6 +215,37 @@ MemCmpExpansion::computeOverlappingLoadSequence(uint64_t Size, return LoadSequence; } +void MemCmpExpansion::optimiseLoadSequence( + LoadEntryVector &LoadSequence, + const TargetTransformInfo::MemCmpExpansionOptions &Options, + bool IsUsedForZeroCmp) { + // This part of code attempts to optimize the LoadSequence by merging allowed + // subsequences into single loads of allowed sizes from + // `MemCmpExpansionOptions::AllowedTailExpansions`. If it is for zero + // comparison or if no allowed tail expansions are specified, we exit early. + if (IsUsedForZeroCmp || Options.AllowedTailExpansions.empty()) + return; + + while (LoadSequence.size() >= 2) { + auto Last = LoadSequence[LoadSequence.size() - 1]; + auto PreLast = LoadSequence[LoadSequence.size() - 2]; + + // Exit the loop if the two sequences are not contiguous + if (PreLast.Offset + PreLast.LoadSize != Last.Offset) + break; + + auto LoadSize = Last.LoadSize + PreLast.LoadSize; + if (find(Options.AllowedTailExpansions, LoadSize) == + Options.AllowedTailExpansions.end()) + break; + + // Remove the last two sequences and replace with the combined sequence + LoadSequence.pop_back(); + LoadSequence.pop_back(); + LoadSequence.emplace_back(PreLast.Offset, LoadSize); + } +} + // Initialize the basic block structure required for expansion of memcmp call // with given maximum load size and memcmp size parameter. // This structure includes: @@ -255,31 +291,7 @@ MemCmpExpansion::MemCmpExpansion( } } assert(LoadSequence.size() <= Options.MaxNumLoads && "broken invariant"); - // This part of code attempts to optimize the LoadSequence by merging allowed - // subsequences into single loads of allowed sizes from - // `AllowedTailExpansions`. If it is for zero comparison or if no allowed tail - // expansions are specified, we exit early. - if (IsUsedForZeroCmp || !Options.AllowedTailExpansions.size()) - return; - - while (LoadSequence.size() >= 2) { - auto Last = LoadSequence[LoadSequence.size() - 1]; - auto PreLast = LoadSequence[LoadSequence.size() - 2]; - - // Exit the loop if the two sequences are not contiguous - if (PreLast.Offset + PreLast.LoadSize != Last.Offset) - break; - - auto LoadSize = Last.LoadSize + PreLast.LoadSize; - if (find(Options.AllowedTailExpansions, LoadSize) == - Options.AllowedTailExpansions.end()) - break; - - // Remove the last two sequences and replace with the combined sequence - LoadSequence.pop_back(); - LoadSequence.pop_back(); - LoadSequence.emplace_back(PreLast.Offset, LoadSize); - } + optimiseLoadSequence(LoadSequence, Options, IsUsedForZeroCmp); } unsigned MemCmpExpansion::getNumBlocks() { @@ -303,7 +315,6 @@ void MemCmpExpansion::createResultBlock() { } MemCmpExpansion::LoadPair MemCmpExpansion::getLoadPair(Type *LoadSizeType, - bool NeedsBSwap, Type *BSwapSizeType, Type *CmpSizeType, unsigned OffsetBytes) { @@ -334,13 +345,13 @@ MemCmpExpansion::LoadPair MemCmpExpansion::getLoadPair(Type *LoadSizeType, Rhs = Builder.CreateAlignedLoad(LoadSizeType, RhsSource, RhsAlign); // Zero extend if Byte Swap intrinsic has different type - if (NeedsBSwap && LoadSizeType != BSwapSizeType) { + if (BSwapSizeType && LoadSizeType != BSwapSizeType) { Lhs = Builder.CreateZExt(Lhs, BSwapSizeType); Rhs = Builder.CreateZExt(Rhs, BSwapSizeType); } // Swap bytes if required. - if (NeedsBSwap) { + if (BSwapSizeType) { Function *Bswap = Intrinsic::getDeclaration( CI->getModule(), Intrinsic::bswap, BSwapSizeType); Lhs = Builder.CreateCall(Bswap, Lhs); @@ -364,8 +375,8 @@ void MemCmpExpansion::emitLoadCompareByteBlock(unsigned BlockIndex, BasicBlock *BB = LoadCmpBlocks[BlockIndex]; Builder.SetInsertPoint(BB); const LoadPair Loads = - getLoadPair(Type::getInt8Ty(CI->getContext()), /*NeedsBSwap=*/false, - nullptr, Type::getInt32Ty(CI->getContext()), OffsetBytes); + getLoadPair(Type::getInt8Ty(CI->getContext()), nullptr, + Type::getInt32Ty(CI->getContext()), OffsetBytes); Value *Diff = Builder.CreateSub(Loads.Lhs, Loads.Rhs); PhiRes->addIncoming(Diff, BB); @@ -421,8 +432,8 @@ Value *MemCmpExpansion::getCompareLoadPairs(unsigned BlockIndex, for (unsigned i = 0; i < NumLoads; ++i, ++LoadIndex) { const LoadEntry &CurLoadEntry = LoadSequence[LoadIndex]; const LoadPair Loads = getLoadPair( - IntegerType::get(CI->getContext(), CurLoadEntry.LoadSize * 8), - /*NeedsBSwap=*/false, nullptr, MaxLoadType, CurLoadEntry.Offset); + IntegerType::get(CI->getContext(), CurLoadEntry.LoadSize * 8), nullptr, + MaxLoadType, CurLoadEntry.Offset); if (NumLoads != 1) { // If we have multiple loads per block, we need to generate a composite @@ -508,8 +519,11 @@ void MemCmpExpansion::emitLoadCompareBlock(unsigned BlockIndex) { Type *LoadSizeType = IntegerType::get(CI->getContext(), CurLoadEntry.LoadSize * 8); - Type *BSwapSizeType = IntegerType::get( - CI->getContext(), PowerOf2Ceil(CurLoadEntry.LoadSize * 8)); + Type *BSwapSizeType = + DL.isLittleEndian() + ? IntegerType::get(CI->getContext(), + PowerOf2Ceil(CurLoadEntry.LoadSize * 8)) + : nullptr; Type *MaxLoadType = IntegerType::get( CI->getContext(), std::max(MaxLoadSize, (unsigned)PowerOf2Ceil(CurLoadEntry.LoadSize)) * 8); @@ -517,9 +531,8 @@ void MemCmpExpansion::emitLoadCompareBlock(unsigned BlockIndex) { Builder.SetInsertPoint(LoadCmpBlocks[BlockIndex]); - const LoadPair Loads = - getLoadPair(LoadSizeType, /*NeedsBSwap=*/DL.isLittleEndian(), - BSwapSizeType, MaxLoadType, CurLoadEntry.Offset); + const LoadPair Loads = getLoadPair(LoadSizeType, BSwapSizeType, MaxLoadType, + CurLoadEntry.Offset); // Add the loaded values to the phi nodes for calculating memcmp result only // if result is not used in a zero equality. @@ -624,27 +637,25 @@ Value *MemCmpExpansion::getMemCmpEqZeroOneBlock() { /// A memcmp expansion that only has one block of load and compare can bypass /// the compare, branch, and phi IR that is required in the general case. Value *MemCmpExpansion::getMemCmpOneBlock() { + bool NeedsBSwap = DL.isLittleEndian() && Size != 1; Type *LoadSizeType = IntegerType::get(CI->getContext(), Size * 8); Type *BSwapSizeType = - IntegerType::get(CI->getContext(), PowerOf2Ceil(Size * 8)); + NeedsBSwap ? IntegerType::get(CI->getContext(), PowerOf2Ceil(Size * 8)) + : nullptr; Type *MaxLoadType = IntegerType::get(CI->getContext(), std::max(MaxLoadSize, (unsigned)PowerOf2Ceil(Size)) * 8); - bool NeedsBSwap = DL.isLittleEndian() && Size != 1; - // The i8 and i16 cases don't need compares. We zext the loaded values and // subtract them to get the suitable negative, zero, or positive i32 result. if (Size < 4) { - const LoadPair Loads = getLoadPair(LoadSizeType, NeedsBSwap, BSwapSizeType, - Builder.getInt32Ty(), - /*Offset*/ 0); + const LoadPair Loads = getLoadPair(LoadSizeType, BSwapSizeType, + Builder.getInt32Ty(), /*Offset*/ 0); return Builder.CreateSub(Loads.Lhs, Loads.Rhs); } - const LoadPair Loads = - getLoadPair(LoadSizeType, NeedsBSwap, BSwapSizeType, MaxLoadType, - /*Offset*/ 0); + const LoadPair Loads = getLoadPair(LoadSizeType, BSwapSizeType, MaxLoadType, + /*Offset*/ 0); // The result of memcmp is negative, zero, or positive, so produce that by // subtracting 2 extended compare bits: sub (ugt, ult). // If a target prefers to use selects to get -1/0/1, they should be able From 8aeb63e241df1d3df53c2eba9b24fa9d0c9e2ac2 Mon Sep 17 00:00:00 2001 From: Igor Kirillov Date: Fri, 27 Oct 2023 15:01:36 +0000 Subject: [PATCH 4/4] Fix bug when memcmp had size equal three --- llvm/lib/CodeGen/ExpandMemCmp.cpp | 2 +- llvm/test/CodeGen/AArch64/memcmp.ll | 3 +++ llvm/test/Transforms/ExpandMemCmp/AArch64/memcmp.ll | 8 ++++++-- 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/llvm/lib/CodeGen/ExpandMemCmp.cpp b/llvm/lib/CodeGen/ExpandMemCmp.cpp index 3f948f734fcf7..28e258be226a6 100644 --- a/llvm/lib/CodeGen/ExpandMemCmp.cpp +++ b/llvm/lib/CodeGen/ExpandMemCmp.cpp @@ -648,7 +648,7 @@ Value *MemCmpExpansion::getMemCmpOneBlock() { // The i8 and i16 cases don't need compares. We zext the loaded values and // subtract them to get the suitable negative, zero, or positive i32 result. - if (Size < 4) { + if (Size == 1 || Size == 2) { const LoadPair Loads = getLoadPair(LoadSizeType, BSwapSizeType, Builder.getInt32Ty(), /*Offset*/ 0); return Builder.CreateSub(Loads.Lhs, Loads.Rhs); diff --git a/llvm/test/CodeGen/AArch64/memcmp.ll b/llvm/test/CodeGen/AArch64/memcmp.ll index b38acbae10915..d13a416a28761 100644 --- a/llvm/test/CodeGen/AArch64/memcmp.ll +++ b/llvm/test/CodeGen/AArch64/memcmp.ll @@ -160,6 +160,9 @@ define i32 @length3(ptr %X, ptr %Y) nounwind { ; CHECK-NEXT: orr w9, w11, w10, lsl #16 ; CHECK-NEXT: rev w8, w8 ; CHECK-NEXT: rev w9, w9 +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: cset w8, hi +; CHECK-NEXT: cset w9, lo ; CHECK-NEXT: sub w0, w8, w9 ; CHECK-NEXT: ret %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 3) nounwind diff --git a/llvm/test/Transforms/ExpandMemCmp/AArch64/memcmp.ll b/llvm/test/Transforms/ExpandMemCmp/AArch64/memcmp.ll index 95fb883f3cdd5..54f8c7006bb51 100644 --- a/llvm/test/Transforms/ExpandMemCmp/AArch64/memcmp.ll +++ b/llvm/test/Transforms/ExpandMemCmp/AArch64/memcmp.ll @@ -44,8 +44,12 @@ define i32 @cmp3(ptr nocapture readonly %x, ptr nocapture readonly %y) { ; CHECK-NEXT: [[TMP4:%.*]] = zext i24 [[TMP2]] to i32 ; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) -; CHECK-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[TMP6]] -; CHECK-NEXT: ret i32 [[TMP7]] +; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[TMP5]], [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = icmp ult i32 [[TMP5]], [[TMP6]] +; CHECK-NEXT: [[TMP9:%.*]] = zext i1 [[TMP7]] to i32 +; CHECK-NEXT: [[TMP10:%.*]] = zext i1 [[TMP8]] to i32 +; CHECK-NEXT: [[TMP11:%.*]] = sub i32 [[TMP9]], [[TMP10]] +; CHECK-NEXT: ret i32 [[TMP11]] ; %call = tail call i32 @memcmp(ptr %x, ptr %y, i64 3) ret i32 %call