Skip to content

[BOLT][tests] Fix jrcxz instruction test #95861

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 17, 2024
Merged

Conversation

maksfb
Copy link
Contributor

@maksfb maksfb commented Jun 17, 2024

Rewrite the test case intended to check that BOLT does not separate jrcxz instruction from its destination by more than a one-byte offset.

Rewrite the test case intended to check that BOLT does not separate
jrcxz instruction from its destination by more than a one-byte offset.
@maksfb maksfb added the BOLT label Jun 17, 2024
@maksfb maksfb requested a review from urnathan June 17, 2024 23:03
@llvmbot
Copy link
Member

llvmbot commented Jun 17, 2024

@llvm/pr-subscribers-bolt

Author: Maksim Panchenko (maksfb)

Changes

Rewrite the test case intended to check that BOLT does not separate jrcxz instruction from its destination by more than a one-byte offset.


Full diff: https://github.com/llvm/llvm-project/pull/95861.diff

1 Files Affected:

  • (modified) bolt/test/X86/bug-reorder-bb-jrcxz.s (+21-628)
diff --git a/bolt/test/X86/bug-reorder-bb-jrcxz.s b/bolt/test/X86/bug-reorder-bb-jrcxz.s
index d5ac3548909e3..8a11ac4da4d67 100644
--- a/bolt/test/X86/bug-reorder-bb-jrcxz.s
+++ b/bolt/test/X86/bug-reorder-bb-jrcxz.s
@@ -1,640 +1,33 @@
-## Test performs a BB reordering with unsupported
-## instruction jrcxz. Reordering works correctly with the
-## follow options: None, Normal or Reverse. Other strategies
-## are completed with Assertion `isIntN(Size * 8 + 1, Value).
-## The cause is the distance between BB where one contains
-## jrcxz instruction.
-## Example: OpenSSL
-## https://github.com/openssl/openssl/blob/master/crypto/bn/asm/x86_64-mont5.pl#L3319
+## Check that BOLT handles code with jrcxz instruction that has a one-byte
+## signed offset restriction. If we try to separate jrcxz instruction from its
+## destination, e.g. by placing it in a different code fragment, then the link
+## step will fail.
 
 # REQUIRES: system-linux
 
-# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown \
-# RUN:   %s -o %t.o
+# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %s -o %t.o
 # RUN: link_fdata %s %t.o %t.fdata
-# RUN: %clang %cflags %t.o -falign-labels -march=native -o %t.exe -Wl,-q
+# RUN: llvm-strip --strip-unneeded %t.o
+# RUN: %clang %cflags %t.o -o %t.exe -Wl,-q
 
-# RUN:  llvm-bolt %t.exe -o %t.bolted --data %t.fdata \
-# RUN:    --reorder-blocks=ext-tsp --reorder-functions=hfsort \
-# RUN:    --split-functions --split-all-cold --split-eh --dyno-stats \
-# RUN:    --print-finalized 2>&1 | FileCheck %s
+## Disable relocation mode to leave main fragment in its original location.
 
-# CHECK-NOT: value of -2105 is too large for field of 1 byte.
+# RUN: llvm-bolt %t.exe -o %t.bolt --data %t.fdata --reorder-blocks=ext-tsp \
+# RUN:   --split-functions --relocs=0
 
-  .text
-  .section .text.startup,"ax",@progbits
-  .p2align 5,,31
-  .globl main
-  .type main, @function
+	.text
+	.globl main
+	.type	main,@function
 main:
-  jmp bn_sqrx8x_internal
-
-.globl bn_sqrx8x_internal
-.hidden bn_sqrx8x_internal
-.type bn_sqrx8x_internal,@function
-.align 32
-bn_sqrx8x_internal:
-__bn_sqrx8x_internal:
-# FDATA: 1 bn_from_mont8x 160 1 bn_sqrx8x_internal 0 0 56
-# FDATA: 1 bn_sqrx8x_internal 13 1  bn_sqrx8x_internal 40 0 60972
-# FDATA: 1 bn_sqrx8x_internal 5f 1  bn_sqrx8x_internal 2c 0 60972
-# FDATA: 1 bn_sqrx8x_internal 2f1 1 bn_sqrx8x_internal 500 0 60972
-# FDATA: 1 bn_sqrx8x_internal 34a 1 bn_sqrx8x_internal 360 0 60972
-# FDATA: 1 bn_sqrx8x_internal 411 1 bn_sqrx8x_internal 360 0 447888
-# FDATA: 1 bn_sqrx8x_internal 411 1 bn_sqrx8x_internal 417 0 63984
-# FDATA: 1 bn_sqrx8x_internal 427 1 bn_sqrx8x_internal 480 0 60972
-# FDATA: 1 bn_sqrx8x_internal 427 1 bn_sqrx8x_internal 429 0 3012
-# FDATA: 1 bn_sqrx8x_internal 467 1 bn_sqrx8x_internal 360 0 3012
-# FDATA: 1 bn_sqrx8x_internal 4ba 1 bn_sqrx8x_internal 80 0 58964
-# FDATA: 1 bn_sqrx8x_internal 4ba 1 bn_sqrx8x_internal 4c0 0 2008
-# FDATA: 1 bn_sqrx8x_internal 4fb 1 bn_sqrx8x_internal 80 0 2008
-# FDATA: 1 bn_sqrx8x_internal 5f0 1 bn_sqrx8x_internal 5f2 0 180908
-# FDATA: 1 bn_sqrx8x_internal 61b 1 bn_sqrx8x_internal 540 0 180908
-# FDATA: 1 bn_sqrx8x_internal 632 1 bn_sqrx8x_internal 637 0 59020
-# FDATA: 1 bn_sqrx8x_internal 657 1 bn_sqrx8x_internal 660 0 59020
-# FDATA: 1 bn_sqrx8x_internal 696 1 bn_sqrx8x_internal 6a0 0 120048
-# FDATA: 1 bn_sqrx8x_internal 75a 1 bn_sqrx8x_internal 6a0 0 840336
-# FDATA: 1 bn_sqrx8x_internal 75a 1 bn_sqrx8x_internal 760 0 120048
-# FDATA: 1 bn_sqrx8x_internal 768 1 bn_sqrx8x_internal 76e 0 120048
-# FDATA: 1 bn_sqrx8x_internal 7b2 1 bn_sqrx8x_internal 7c0 0 120048
-# FDATA: 1 bn_sqrx8x_internal 86e 1 bn_sqrx8x_internal 7c0 0 896560
-# FDATA: 1 bn_sqrx8x_internal 86e 1 bn_sqrx8x_internal 874 0 128080
-# FDATA: 1 bn_sqrx8x_internal 879 1 bn_sqrx8x_internal 8c0 0 120048
-# FDATA: 1 bn_sqrx8x_internal 879 1 bn_sqrx8x_internal 87b 0 8032
-# FDATA: 1 bn_sqrx8x_internal 8bb 1 bn_sqrx8x_internal 7c0 0 8032
-# FDATA: 1 bn_sqrx8x_internal 8e8 1 bn_sqrx8x_internal 8ed 0 120048
-# FDATA: 1 bn_sqrx8x_internal 955 1 bn_sqrx8x_internal 660 0 61028
-# FDATA: 1 bn_sqrx8x_internal 955 1 bn_sqrx8x_internal 95b 0 59020
-# FDATA: 0 [unknown] 0 1 bn_sqrx8x_internal 5f0 0 59020
+# FDATA: 0 [unknown] 0 1 main 0 0 1
+# FDATA: 1 main 0 1 main #.hot# 0 1
 .cfi_startproc
-  leaq 48+8(%rsp),%rdi
-  leaq (%rsi,%r9,1),%rbp
-  movq %r9,0+8(%rsp)
-  movq %rbp,8+8(%rsp)
-  jmp .Lsqr8x_zero_start
-
-.align 32
-.byte 0x66,0x66,0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00
-.Lsqrx8x_zero:
-.byte 0x3e
-  movdqa %xmm0,0(%rdi)
-  movdqa %xmm0,16(%rdi)
-  movdqa %xmm0,32(%rdi)
-  movdqa %xmm0,48(%rdi)
-.Lsqr8x_zero_start:
-  movdqa %xmm0,64(%rdi)
-  movdqa %xmm0,80(%rdi)
-  movdqa %xmm0,96(%rdi)
-  movdqa %xmm0,112(%rdi)
-  leaq 128(%rdi),%rdi
-  subq $64,%r9
-  jnz .Lsqrx8x_zero
-
-  movq 0(%rsi),%rdx
-
-  xorq %r10,%r10
-  xorq %r11,%r11
-  xorq %r12,%r12
-  xorq %r13,%r13
-  xorq %r14,%r14
-  xorq %r15,%r15
-  leaq 48+8(%rsp),%rdi
-  xorq %rbp,%rbp
-  jmp .Lsqrx8x_outer_loop
-
-.align 32
-.Lsqrx8x_outer_loop:
-  mulxq 8(%rsi),%r8,%rax
-  adcxq %r9,%r8
-  adoxq %rax,%r10
-  mulxq 16(%rsi),%r9,%rax
-  adcxq %r10,%r9
-  adoxq %rax,%r11
-.byte 0xc4,0xe2,0xab,0xf6,0x86,0x18,0x00,0x00,0x00
-  adcxq %r11,%r10
-  adoxq %rax,%r12
-.byte 0xc4,0xe2,0xa3,0xf6,0x86,0x20,0x00,0x00,0x00
-  adcxq %r12,%r11
-  adoxq %rax,%r13
-  mulxq 40(%rsi),%r12,%rax
-  adcxq %r13,%r12
-  adoxq %rax,%r14
-  mulxq 48(%rsi),%r13,%rax
-  adcxq %r14,%r13
-  adoxq %r15,%rax
-  mulxq 56(%rsi),%r14,%r15
-  movq 8(%rsi),%rdx
-  adcxq %rax,%r14
-  adoxq %rbp,%r15
-  adcq 64(%rdi),%r15
-  movq %r8,8(%rdi)
-  movq %r9,16(%rdi)
-  sbbq %rcx,%rcx
-  xorq %rbp,%rbp
-
-  mulxq 16(%rsi),%r8,%rbx
-  mulxq 24(%rsi),%r9,%rax
-  adcxq %r10,%r8
-  adoxq %rbx,%r9
-  mulxq 32(%rsi),%r10,%rbx
-  adcxq %r11,%r9
-  adoxq %rax,%r10
-.byte 0xc4,0xe2,0xa3,0xf6,0x86,0x28,0x00,0x00,0x00
-  adcxq %r12,%r10
-  adoxq %rbx,%r11
-.byte 0xc4,0xe2,0x9b,0xf6,0x9e,0x30,0x00,0x00,0x00
-  adcxq %r13,%r11
-  adoxq %r14,%r12
-.byte 0xc4,0x62,0x93,0xf6,0xb6,0x38,0x00,0x00,0x00
-  movq 16(%rsi),%rdx
-  adcxq %rax,%r12
-  adoxq %rbx,%r13
-  adcxq %r15,%r13
-  adoxq %rbp,%r14
-  adcxq %rbp,%r14
-
-  movq %r8,24(%rdi)
-  movq %r9,32(%rdi)
-
-  mulxq 24(%rsi),%r8,%rbx
-  mulxq 32(%rsi),%r9,%rax
-  adcxq %r10,%r8
-  adoxq %rbx,%r9
-  mulxq 40(%rsi),%r10,%rbx
-  adcxq %r11,%r9
-  adoxq %rax,%r10
-.byte 0xc4,0xe2,0xa3,0xf6,0x86,0x30,0x00,0x00,0x00
-  adcxq %r12,%r10
-  adoxq %r13,%r11
-.byte 0xc4,0x62,0x9b,0xf6,0xae,0x38,0x00,0x00,0x00
-.byte 0x3e
-  movq 24(%rsi),%rdx
-  adcxq %rbx,%r11
-  adoxq %rax,%r12
-  adcxq %r14,%r12
-  movq %r8,40(%rdi)
-  movq %r9,48(%rdi)
-  mulxq 32(%rsi),%r8,%rax
-  adoxq %rbp,%r13
-  adcxq %rbp,%r13
-
-  mulxq 40(%rsi),%r9,%rbx
-  adcxq %r10,%r8
-  adoxq %rax,%r9
-  mulxq 48(%rsi),%r10,%rax
-  adcxq %r11,%r9
-  adoxq %r12,%r10
-  mulxq 56(%rsi),%r11,%r12
-  movq 32(%rsi),%rdx
-  movq 40(%rsi),%r14
-  adcxq %rbx,%r10
-  adoxq %rax,%r11
-  movq 48(%rsi),%r15
-  adcxq %r13,%r11
-  adoxq %rbp,%r12
-  adcxq %rbp,%r12
-
-  movq %r8,56(%rdi)
-  movq %r9,64(%rdi)
-
-  mulxq %r14,%r9,%rax
-  movq 56(%rsi),%r8
-  adcxq %r10,%r9
-  mulxq %r15,%r10,%rbx
-  adoxq %rax,%r10
-  adcxq %r11,%r10
-  mulxq %r8,%r11,%rax
-  movq %r14,%rdx
-  adoxq %rbx,%r11
-  adcxq %r12,%r11
-
-  adcxq %rbp,%rax
-
-  mulxq %r15,%r14,%rbx
-  mulxq %r8,%r12,%r13
-  movq %r15,%rdx
-  leaq 64(%rsi),%rsi
-  adcxq %r14,%r11
-  adoxq %rbx,%r12
-  adcxq %rax,%r12
-  adoxq %rbp,%r13
-
-.byte 0x67,0x67
-  mulxq %r8,%r8,%r14
-  adcxq %r8,%r13
-  adcxq %rbp,%r14
-
-  cmpq 8+8(%rsp),%rsi
-  je .Lsqrx8x_outer_break
-
-  negq %rcx
-  movq $-8,%rcx
-  movq %rbp,%r15
-  movq 64(%rdi),%r8
-  adcxq 72(%rdi),%r9
-  adcxq 80(%rdi),%r10
-  adcxq 88(%rdi),%r11
-  adcq 96(%rdi),%r12
-  adcq 104(%rdi),%r13
-  adcq 112(%rdi),%r14
-  adcq 120(%rdi),%r15
-  leaq (%rsi),%rbp
-  leaq 128(%rdi),%rdi
-  sbbq %rax,%rax
-
-  movq -64(%rsi),%rdx
-  movq %rax,16+8(%rsp)
-  movq %rdi,24+8(%rsp)
-
+  jrcxz .Lcold
+.hot:
+  ret
 
+.Lcold:
   xorl %eax,%eax
-  jmp .Lsqrx8x_loop
-
-.align 32
-.Lsqrx8x_loop:
-  movq %r8,%rbx
-  mulxq 0(%rbp),%rax,%r8
-  adcxq %rax,%rbx
-  adoxq %r9,%r8
-
-  mulxq 8(%rbp),%rax,%r9
-  adcxq %rax,%r8
-  adoxq %r10,%r9
-
-  mulxq 16(%rbp),%rax,%r10
-  adcxq %rax,%r9
-  adoxq %r11,%r10
-
-  mulxq 24(%rbp),%rax,%r11
-  adcxq %rax,%r10
-  adoxq %r12,%r11
-
-.byte 0xc4,0x62,0xfb,0xf6,0xa5,0x20,0x00,0x00,0x00
-  adcxq %rax,%r11
-  adoxq %r13,%r12
-
-  mulxq 40(%rbp),%rax,%r13
-  adcxq %rax,%r12
-  adoxq %r14,%r13
-
-  mulxq 48(%rbp),%rax,%r14
-  movq %rbx,(%rdi,%rcx,8)
-  movl $0,%ebx
-  adcxq %rax,%r13
-  adoxq %r15,%r14
-
-.byte 0xc4,0x62,0xfb,0xf6,0xbd,0x38,0x00,0x00,0x00
-  movq 8(%rsi,%rcx,8),%rdx
-  adcxq %rax,%r14
-  adoxq %rbx,%r15
-  adcxq %rbx,%r15
-
-.byte 0x67
-  incq %rcx
-  jnz .Lsqrx8x_loop
-
-  leaq 64(%rbp),%rbp
-  movq $-8,%rcx
-  cmpq 8+8(%rsp),%rbp
-  je .Lsqrx8x_break
-
-  subq 16+8(%rsp),%rbx
-.byte 0x66
-  movq -64(%rsi),%rdx
-  adcxq 0(%rdi),%r8
-  adcxq 8(%rdi),%r9
-  adcq 16(%rdi),%r10
-  adcq 24(%rdi),%r11
-  adcq 32(%rdi),%r12
-  adcq 40(%rdi),%r13
-  adcq 48(%rdi),%r14
-  adcq 56(%rdi),%r15
-  leaq 64(%rdi),%rdi
-.byte 0x67
-  sbbq %rax,%rax
-  xorl %ebx,%ebx
-  movq %rax,16+8(%rsp)
-  jmp .Lsqrx8x_loop
-
-.align 32
-.Lsqrx8x_break:
-  xorq %rbp,%rbp
-  subq 16+8(%rsp),%rbx
-  adcxq %rbp,%r8
-  movq 24+8(%rsp),%rcx
-  adcxq %rbp,%r9
-  movq 0(%rsi),%rdx
-  adcq $0,%r10
-  movq %r8,0(%rdi)
-  adcq $0,%r11
-  adcq $0,%r12
-  adcq $0,%r13
-  adcq $0,%r14
-  adcq $0,%r15
-  cmpq %rcx,%rdi
-  je .Lsqrx8x_outer_loop
-
-  movq %r9,8(%rdi)
-  movq 8(%rcx),%r9
-  movq %r10,16(%rdi)
-  movq 16(%rcx),%r10
-  movq %r11,24(%rdi)
-  movq 24(%rcx),%r11
-  movq %r12,32(%rdi)
-  movq 32(%rcx),%r12
-  movq %r13,40(%rdi)
-  movq 40(%rcx),%r13
-  movq %r14,48(%rdi)
-  movq 48(%rcx),%r14
-  movq %r15,56(%rdi)
-  movq 56(%rcx),%r15
-  movq %rcx,%rdi
-  jmp .Lsqrx8x_outer_loop
-
-.align 32
-.Lsqrx8x_outer_break:
-  movq %r9,72(%rdi)
-.byte 102,72,15,126,217
-  movq %r10,80(%rdi)
-  movq %r11,88(%rdi)
-  movq %r12,96(%rdi)
-  movq %r13,104(%rdi)
-  movq %r14,112(%rdi)
-  leaq 48+8(%rsp),%rdi
-  movq (%rsi,%rcx,1),%rdx
-
-  movq 8(%rdi),%r11
-  xorq %r10,%r10
-  movq 0+8(%rsp),%r9
-  adoxq %r11,%r11
-  movq 16(%rdi),%r12
-  movq 24(%rdi),%r13
-
-.align 32
-.Lsqrx4x_shift_n_add:
-  mulxq %rdx,%rax,%rbx
-  adoxq %r12,%r12
-  adcxq %r10,%rax
-.byte 0x48,0x8b,0x94,0x0e,0x08,0x00,0x00,0x00
-.byte 0x4c,0x8b,0x97,0x20,0x00,0x00,0x00
-  adoxq %r13,%r13
-  adcxq %r11,%rbx
-  movq 40(%rdi),%r11
-  movq %rax,0(%rdi)
-  movq %rbx,8(%rdi)
-
-  mulxq %rdx,%rax,%rbx
-  adoxq %r10,%r10
-  adcxq %r12,%rax
-  movq 16(%rsi,%rcx,1),%rdx
-  movq 48(%rdi),%r12
-  adoxq %r11,%r11
-  adcxq %r13,%rbx
-  movq 56(%rdi),%r13
-  movq %rax,16(%rdi)
-  movq %rbx,24(%rdi)
-
-  mulxq %rdx,%rax,%rbx
-  adoxq %r12,%r12
-  adcxq %r10,%rax
-  movq 24(%rsi,%rcx,1),%rdx
-  leaq 32(%rcx),%rcx
-  movq 64(%rdi),%r10
-  adoxq %r13,%r13
-  adcxq %r11,%rbx
-  movq 72(%rdi),%r11
-  movq %rax,32(%rdi)
-  movq %rbx,40(%rdi)
-
-  mulxq %rdx,%rax,%rbx
-  adoxq %r10,%r10
-  adcxq %r12,%rax
-  jrcxz .Lsqrx4x_shift_n_add_break
-.byte 0x48,0x8b,0x94,0x0e,0x00,0x00,0x00,0x00
-  adoxq %r11,%r11
-  adcxq %r13,%rbx
-  movq 80(%rdi),%r12
-  movq 88(%rdi),%r13
-  movq %rax,48(%rdi)
-  movq %rbx,56(%rdi)
-  leaq 64(%rdi),%rdi
-  nop
-  jmp .Lsqrx4x_shift_n_add
-
-.align 32
-.Lsqrx4x_shift_n_add_break:
-  adcxq %r13,%rbx
-  movq %rax,48(%rdi)
-  movq %rbx,56(%rdi)
-  leaq 64(%rdi),%rdi
-.byte 102,72,15,126,213
-__bn_sqrx8x_reduction:
-  xorl %eax,%eax
-  movq 32+8(%rsp),%rbx
-  movq 48+8(%rsp),%rdx
-  leaq -64(%rbp,%r9,1),%rcx
-
-  movq %rcx,0+8(%rsp)
-  movq %rdi,8+8(%rsp)
-
-  leaq 48+8(%rsp),%rdi
-  jmp .Lsqrx8x_reduction_loop
-
-.align 32
-.Lsqrx8x_reduction_loop:
-  movq 8(%rdi),%r9
-  movq 16(%rdi),%r10
-  movq 24(%rdi),%r11
-  movq 32(%rdi),%r12
-  movq %rdx,%r8
-  imulq %rbx,%rdx
-  movq 40(%rdi),%r13
-  movq 48(%rdi),%r14
-  movq 56(%rdi),%r15
-  movq %rax,24+8(%rsp)
-
-  leaq 64(%rdi),%rdi
-  xorq %rsi,%rsi
-  movq $-8,%rcx
-  jmp .Lsqrx8x_reduce
-
-.align 32
-.Lsqrx8x_reduce:
-  movq %r8,%rbx
-  mulxq 0(%rbp),%rax,%r8
-  adcxq %rbx,%rax
-  adoxq %r9,%r8
-
-  mulxq 8(%rbp),%rbx,%r9
-  adcxq %rbx,%r8
-  adoxq %r10,%r9
-
-  mulxq 16(%rbp),%rbx,%r10
-  adcxq %rbx,%r9
-  adoxq %r11,%r10
-
-  mulxq 24(%rbp),%rbx,%r11
-  adcxq %rbx,%r10
-  adoxq %r12,%r11
-
-.byte 0xc4,0x62,0xe3,0xf6,0xa5,0x20,0x00,0x00,0x00
-  movq %rdx,%rax
-  movq %r8,%rdx
-  adcxq %rbx,%r11
-  adoxq %r13,%r12
-
-  mulxq 32+8(%rsp),%rbx,%rdx
-  movq %rax,%rdx
-  movq %rax,64+48+8(%rsp,%rcx,8)
-
-  mulxq 40(%rbp),%rax,%r13
-  adcxq %rax,%r12
-  adoxq %r14,%r13
-
-  mulxq 48(%rbp),%rax,%r14
-  adcxq %rax,%r13
-  adoxq %r15,%r14
-
-  mulxq 56(%rbp),%rax,%r15
-  movq %rbx,%rdx
-  adcxq %rax,%r14
-  adoxq %rsi,%r15
-  adcxq %rsi,%r15
-
-.byte 0x67,0x67,0x67
-  incq %rcx
-  jnz .Lsqrx8x_reduce
-
-  movq %rsi,%rax
-  cmpq 0+8(%rsp),%rbp
-  jae .Lsqrx8x_no_tail
-
-  movq 48+8(%rsp),%rdx
-  addq 0(%rdi),%r8
-  leaq 64(%rbp),%rbp
-  movq $-8,%rcx
-  adcxq 8(%rdi),%r9
-  adcxq 16(%rdi),%r10
-  adcq 24(%rdi),%r11
-  adcq 32(%rdi),%r12
-  adcq 40(%rdi),%r13
-  adcq 48(%rdi),%r14
-  adcq 56(%rdi),%r15
-  leaq 64(%rdi),%rdi
-  sbbq %rax,%rax
-
-  xorq %rsi,%rsi
-  movq %rax,16+8(%rsp)
-  jmp .Lsqrx8x_tail
-
-.align 32
-.Lsqrx8x_tail:
-  movq %r8,%rbx
-  mulxq 0(%rbp),%rax,%r8
-  adcxq %rax,%rbx
-  adoxq %r9,%r8
-
-  mulxq 8(%rbp),%rax,%r9
-  adcxq %rax,%r8
-  adoxq %r10,%r9
-
-  mulxq 16(%rbp),%rax,%r10
-  adcxq %rax,%r9
-  adoxq %r11,%r10
-
-  mulxq 24(%rbp),%rax,%r11
-  adcxq %rax,%r10
-  adoxq %r12,%r11
-
-.byte 0xc4,0x62,0xfb,0xf6,0xa5,0x20,0x00,0x00,0x00
-  adcxq %rax,%r11
-  adoxq %r13,%r12
-
-  mulxq 40(%rbp),%rax,%r13
-  adcxq %rax,%r12
-  adoxq %r14,%r13
-
-  mulxq 48(%rbp),%rax,%r14
-  adcxq %rax,%r13
-  adoxq %r15,%r14
-
-  mulxq 56(%rbp),%rax,%r15
-  movq 72+48+8(%rsp,%rcx,8),%rdx
-  adcxq %rax,%r14
-  adoxq %rsi,%r15
-  movq %rbx,(%rdi,%rcx,8)
-  movq %r8,%rbx
-  adcxq %rsi,%r15
-
-  incq %rcx
-  jnz .Lsqrx8x_tail
-
-  cmpq 0+8(%rsp),%rbp
-  jae .Lsqrx8x_tail_done
-
-  subq 16+8(%rsp),%rsi
-  movq 48+8(%rsp),%rdx
-  leaq 64(%rbp),%rbp
-  adcq 0(%rdi),%r8
-  adcq 8(%rdi),%r9
-  adcq 16(%rdi),%r10
-  adcq 24(%rdi),%r11
-  adcq 32(%rdi),%r12
-  adcq 40(%rdi),%r13
-  adcq 48(%rdi),%r14
-  adcq 56(%rdi),%r15
-  leaq 64(%rdi),%rdi
-  sbbq %rax,%rax
-  subq $8,%rcx
-
-  xorq %rsi,%rsi
-  movq %rax,16+8(%rsp)
-  jmp .Lsqrx8x_tail
-
-.align 32
-.Lsqrx8x_tail_done:
-  xorq %rax,%rax
-  addq 24+8(%rsp),%r8
-  adcq $0,%r9
-  adcq $0,%r10
-  adcq $0,%r11
-  adcq $0,%r12
-  adcq $0,%r13
-  adcq $0,%r14
-  adcq $0,%r15
-  adcq $0,%rax
-
-  subq 16+8(%rsp),%rsi
-.Lsqrx8x_no_tail:
-  adcq 0(%rdi),%r8
-.byte 102,72,15,126,217
-  adcq 8(%rdi),%r9
-  movq 56(%rbp),%rsi
-.byte 102,72,15,126,213
-  adcq 16(%rdi),%r10
-  adcq 24(%rdi),%r11
-  adcq 32(%rdi),%r12
-  adcq 40(%rdi),%r13
-  adcq 48(%rdi),%r14
-  adcq 56(%rdi),%r15
-  adcq $0,%rax
-
-  movq 32+8(%rsp),%rbx
-  movq 64(%rdi,%rcx,1),%rdx
-
-  movq %r8,0(%rdi)
-  leaq 64(%rdi),%r8
-  movq %r9,8(%rdi)
-  movq %r10,16(%rdi)
-  movq %r11,24(%rdi)
-  movq %r12,32(%rdi)
-  movq %r13,40(%rdi)
-  movq %r14,48(%rdi)
-  movq %r15,56(%rdi)
-
-  leaq 64(%rdi,%rcx,1),%rdi
-  cmpq 8+8(%rsp),%r8
-  jb .Lsqrx8x_reduction_loop
-  .byte 0xf3,0xc3
+  ret
 .cfi_endproc
-.size  bn_sqrx8x_internal,.-bn_sqrx8x_internal
+.size  main,.-main

@maksfb maksfb merged commit c67ecf3 into llvm:main Jun 17, 2024
6 of 7 checks passed
@maksfb maksfb deleted the gh-fix-jrcxz-test branch June 18, 2024 19:04
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants