Skip to content

Add CFI directives to armv8-mont #2584

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Aug 18, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 79 additions & 0 deletions crypto/fipsmodule/bn/asm/armv8-mont.pl
Original file line number Diff line number Diff line change
Expand Up @@ -76,17 +76,28 @@
.type bn_mul_mont,%function
.align 5
bn_mul_mont:
.cfi_startproc
AARCH64_SIGN_LINK_REGISTER
tst $num,#7
b.eq __bn_sqr8x_mont
tst $num,#3
b.eq __bn_mul4x_mont
.Lmul_mont:
stp x29,x30,[sp,#-64]!
.cfi_def_cfa_offset 64
.cfi_offset x29, -64
.cfi_offset x30, -56
add x29,sp,#0
.cfi_def_cfa x29, 64
stp x19,x20,[sp,#16]
.cfi_offset x19, -48
.cfi_offset x20, -40
stp x21,x22,[sp,#32]
.cfi_offset x21, -32
.cfi_offset x22, -24
stp x23,x24,[sp,#48]
.cfi_offset x23, -16
.cfi_offset x24, -8
ldr $m0,[$bp],#8 // bp[0]
sub $tp,sp,$num,lsl#3
Expand Down Expand Up @@ -269,13 +280,23 @@
str $nj,[$rp,#-8]
ldp x19,x20,[x29,#16]
.cfi_restore x19
.cfi_restore x20
mov sp,x29
.cfi_def_cfa sp, 64
ldp x21,x22,[x29,#32]
.cfi_restore x21
.cfi_restore x22
mov x0,#1
ldp x23,x24,[x29,#48]
.cfi_restore x23
.cfi_restore x24
ldr x29,[sp],#64
.cfi_restore x29
.cfi_def_cfa_offset 0
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't see x30 restored?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh that is weird, it looks like bn_mul_mont stores x30, never changes/restores it, and can calls ret to that original x30 address. It also calls __bn_sqr8x_mont/__bn_mul4x_mont which also store x30, and they restore it before returning. But those functions also never use/change x30. So it seems like I should load it in bn_mul_mont before returning, or should I update everything to not save it on the stack at all?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Turns out x30 is modified in the 4x and 8x functions but not in the top level bn_mul_mont, I was looking at uses in the perl code and missed my ($cnt,$carry,$topmost)=("x27","x28","x30");in the 8x and 4x versions. Looking at armv8-mont.S it is clear x30 is modified during execution in the

I did an experiment to remove the store x30 in the top level bn_mul_mont but that caused all the uses of it to fail. I may have messed up the store/restore logic. I tried to remove the store x30 and shift all the other stores over by 8 bytes

bn_mul_mont:
.cfi_startproc
	AARCH64_SIGN_LINK_REGISTER
	tst	$num,#7
	b.eq	__bn_sqr8x_mont
	tst	$num,#3
	b.eq	__bn_mul4x_mont
.Lmul_mont:
	stp	x29,x19,[sp,#-56]!
.cfi_def_cfa_offset 56
.cfi_offset x19, -48
.cfi_offset x29, -56
	add	x29,sp,#0
.cfi_def_cfa x29, 56
	stp	x20,x21,[sp,#16]
.cfi_offset x20, -40
.cfi_offset x21, -32
	stp	x22,x23,[sp,#32]
.cfi_offset x22, -24
.cfi_offset x23, -16
	str	x24,[sp,#48]
.cfi_offset x24, -8

...

	ldp	x20,x21,[x29,#16]
.cfi_restore x20
.cfi_restore x21
	mov	sp,x29
.cfi_def_cfa sp, 56
	ldp	x22,x23,[x29,#32]
.cfi_restore x22
.cfi_restore x23
	mov	x0,#1
	ldr	x24,[x29,#48]
.cfi_restore x24
	ldp	x29,x19,[sp],#56
.cfi_restore x19
.cfi_restore x29
.cfi_def_cfa_offset 0
	AARCH64_VALIDATE_LINK_REGISTER
	ret
.cfi_endproc
.size	bn_mul_mont,.-bn_mul_mont

AARCH64_VALIDATE_LINK_REGISTER
ret
.cfi_endproc
.size bn_mul_mont,.-bn_mul_mont
___
{
Expand All @@ -292,18 +313,33 @@
.type __bn_sqr8x_mont,%function
.align 5
__bn_sqr8x_mont:
.cfi_startproc
// Not adding AARCH64_SIGN_LINK_REGISTER here because __bn_sqr8x_mont is jumped to
// only from bn_mul_mont which has already signed the return address.
cmp $ap,$bp
b.ne __bn_mul4x_mont
.Lsqr8x_mont:
stp x29,x30,[sp,#-128]!
.cfi_def_cfa_offset 128
.cfi_offset x29, -128
.cfi_offset x30, -120
add x29,sp,#0
.cfi_def_cfa x29, 128
stp x19,x20,[sp,#16]
.cfi_offset x19, -112
.cfi_offset x20, -104
stp x21,x22,[sp,#32]
.cfi_offset x21, -96
.cfi_offset x22, -88
stp x23,x24,[sp,#48]
.cfi_offset x23, -80
.cfi_offset x24, -72
stp x25,x26,[sp,#64]
.cfi_offset x25, -64
.cfi_offset x26, -56
stp x27,x28,[sp,#80]
.cfi_offset x27, -48
.cfi_offset x28, -40
stp $rp,$np,[sp,#96] // offload rp and np
ldp $a0,$a1,[$ap,#8*0]
Expand Down Expand Up @@ -1007,6 +1043,7 @@
.Lsqr8x8_post_condition:
adc $carry,xzr,xzr
ldr x30,[x29,#8] // pull return address
.cfi_restore x30
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not related to your change but this ldr is weirdly placed. shouldn't it be after the .Lsqr8x_done label?

// $acc0-7,$carry hold result, $a0-7 hold modulus
subs $a0,$acc0,$a0
ldr $ap,[x29,#96] // pull rp
Expand Down Expand Up @@ -1043,16 +1080,29 @@
.Lsqr8x_done:
ldp x19,x20,[x29,#16]
.cfi_restore x19
.cfi_restore x20
mov sp,x29
ldp x21,x22,[x29,#32]
.cfi_restore x21
.cfi_restore x22
mov x0,#1
ldp x23,x24,[x29,#48]
.cfi_restore x23
.cfi_restore x24
ldp x25,x26,[x29,#64]
.cfi_restore x25
.cfi_restore x26
ldp x27,x28,[x29,#80]
.cfi_restore x27
.cfi_restore x28
ldr x29,[sp],#128
.cfi_restore x29
.cfi_def_cfa_offset 0
// x30 is popped earlier
AARCH64_VALIDATE_LINK_REGISTER
ret
.cfi_endproc
.size __bn_sqr8x_mont,.-__bn_sqr8x_mont
___
}
Expand All @@ -1075,16 +1125,31 @@
.type __bn_mul4x_mont,%function
.align 5
__bn_mul4x_mont:
.cfi_startproc
// Not adding AARCH64_SIGN_LINK_REGISTER here because __bn_mul4x_mont is jumped to
// only from bn_mul_mont or __bn_mul8x_mont which have already signed the
// return address.
stp x29,x30,[sp,#-128]!
.cfi_def_cfa_offset 128
.cfi_offset x30, -120
.cfi_offset x29, -128
add x29,sp,#0
.cfi_def_cfa x29, 128
stp x19,x20,[sp,#16]
.cfi_offset x19, -112
.cfi_offset x20, -104
stp x21,x22,[sp,#32]
.cfi_offset x21, -96
.cfi_offset x22, -88
stp x23,x24,[sp,#48]
.cfi_offset x23, -80
.cfi_offset x24, -72
stp x25,x26,[sp,#64]
.cfi_offset x25, -64
.cfi_offset x26, -56
stp x27,x28,[sp,#80]
.cfi_offset x27, -48
.cfi_offset x28, -40
sub $tp,sp,$num,lsl#3
lsl $num,$num,#3
Expand Down Expand Up @@ -1485,6 +1550,7 @@
// $acc0-3,$carry hold result, $m0-7 hold modulus
subs $a0,$acc0,$m0
ldr x30,[x29,#8] // pull return address
.cfi_restore x30
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same as above, seems to me this load is in a weird spot?

sbcs $a1,$acc1,$m1
stp xzr,xzr,[sp,#8*0]
sbcs $a2,$acc2,$m2
Expand All @@ -1504,16 +1570,29 @@
.Lmul4x_done:
ldp x19,x20,[x29,#16]
.cfi_restore x19
.cfi_restore x20
mov sp,x29
ldp x21,x22,[x29,#32]
.cfi_restore x21
.cfi_restore x22
mov x0,#1
ldp x23,x24,[x29,#48]
.cfi_restore x23
.cfi_restore x24
ldp x25,x26,[x29,#64]
.cfi_restore x25
.cfi_restore x26
ldp x27,x28,[x29,#80]
.cfi_restore x27
.cfi_restore x28
ldr x29,[sp],#128
.cfi_restore x29
.cfi_def_cfa_offset 0
// x30 is popped earlier
AARCH64_VALIDATE_LINK_REGISTER
ret
.cfi_endproc
.size __bn_mul4x_mont,.-__bn_mul4x_mont
___
}
Expand Down
6 changes: 5 additions & 1 deletion crypto/fipsmodule/bn/asm/bn-armv8.pl
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
.globl bn_add_words
.align 4
bn_add_words:
.cfi_startproc
AARCH64_VALID_CALL_TARGET
# Clear the carry flag.
cmn xzr, xzr
Expand Down Expand Up @@ -72,6 +73,7 @@
.Ladd_exit:
cset x0, cs
ret
.cfi_endproc
.size bn_add_words,.-bn_add_words
// BN_ULONG bn_sub_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
Expand All @@ -80,6 +82,7 @@
.globl bn_sub_words
.align 4
bn_sub_words:
.cfi_startproc
AARCH64_VALID_CALL_TARGET
# Set the carry flag. Arm's borrow bit is flipped from the carry flag,
# so we want C = 1 here.
Expand Down Expand Up @@ -111,7 +114,8 @@
.Lsub_exit:
cset x0, cc
ret
size bn_sub_words,.-bn_sub_words
.cfi_endproc
.size bn_sub_words,.-bn_sub_words
____

print $code;
Expand Down
Loading
Loading