Skip to content

Commit 1f2b32a

Browse files
committed
blake2b: fix amd64 assembly not to smash SP
For golang/go#44269. Change-Id: I7e405afd0b55c96ce0a4c6058ba01e8be1173a8c Reviewed-on: https://go-review.googlesource.com/c/crypto/+/292051 Trust: Russ Cox <[email protected]> Trust: Jason A. Donenfeld <[email protected]> Reviewed-by: Jason A. Donenfeld <[email protected]>
1 parent 042588c commit 1f2b32a

File tree

2 files changed

+70
-79
lines changed

2 files changed

+70
-79
lines changed

blake2b/blake2bAVX2_amd64.s

Lines changed: 44 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -282,14 +282,12 @@ TEXT ·hashBlocksAVX2(SB), 4, $320-48 // frame size = 288 + 32 byte alignment
282282
MOVQ blocks_len+32(FP), DI
283283

284284
MOVQ SP, DX
285-
MOVQ SP, R9
286-
ADDQ $31, R9
287-
ANDQ $~31, R9
288-
MOVQ R9, SP
285+
ADDQ $31, DX
286+
ANDQ $~31, DX
289287

290-
MOVQ CX, 16(SP)
288+
MOVQ CX, 16(DX)
291289
XORQ CX, CX
292-
MOVQ CX, 24(SP)
290+
MOVQ CX, 24(DX)
293291

294292
VMOVDQU ·AVX2_c40<>(SB), Y4
295293
VMOVDQU ·AVX2_c48<>(SB), Y5
@@ -301,33 +299,33 @@ TEXT ·hashBlocksAVX2(SB), 4, $320-48 // frame size = 288 + 32 byte alignment
301299

302300
MOVQ 0(BX), R8
303301
MOVQ 8(BX), R9
304-
MOVQ R9, 8(SP)
302+
MOVQ R9, 8(DX)
305303

306304
loop:
307305
ADDQ $128, R8
308-
MOVQ R8, 0(SP)
306+
MOVQ R8, 0(DX)
309307
CMPQ R8, $128
310308
JGE noinc
311309
INCQ R9
312-
MOVQ R9, 8(SP)
310+
MOVQ R9, 8(DX)
313311

314312
noinc:
315313
VMOVDQA Y8, Y0
316314
VMOVDQA Y9, Y1
317315
VMOVDQA Y6, Y2
318-
VPXOR 0(SP), Y7, Y3
316+
VPXOR 0(DX), Y7, Y3
319317

320318
LOAD_MSG_AVX2_0_2_4_6_1_3_5_7_8_10_12_14_9_11_13_15()
321-
VMOVDQA Y12, 32(SP)
322-
VMOVDQA Y13, 64(SP)
323-
VMOVDQA Y14, 96(SP)
324-
VMOVDQA Y15, 128(SP)
319+
VMOVDQA Y12, 32(DX)
320+
VMOVDQA Y13, 64(DX)
321+
VMOVDQA Y14, 96(DX)
322+
VMOVDQA Y15, 128(DX)
325323
ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5)
326324
LOAD_MSG_AVX2_14_4_9_13_10_8_15_6_1_0_11_5_12_2_7_3()
327-
VMOVDQA Y12, 160(SP)
328-
VMOVDQA Y13, 192(SP)
329-
VMOVDQA Y14, 224(SP)
330-
VMOVDQA Y15, 256(SP)
325+
VMOVDQA Y12, 160(DX)
326+
VMOVDQA Y13, 192(DX)
327+
VMOVDQA Y14, 224(DX)
328+
VMOVDQA Y15, 256(DX)
331329

332330
ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5)
333331
LOAD_MSG_AVX2_11_12_5_15_8_0_2_13_10_3_7_9_14_6_1_4()
@@ -347,8 +345,8 @@ noinc:
347345
LOAD_MSG_AVX2_10_8_7_1_2_4_6_5_15_9_3_13_11_14_12_0()
348346
ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5)
349347

350-
ROUND_AVX2(32(SP), 64(SP), 96(SP), 128(SP), Y10, Y4, Y5)
351-
ROUND_AVX2(160(SP), 192(SP), 224(SP), 256(SP), Y10, Y4, Y5)
348+
ROUND_AVX2(32(DX), 64(DX), 96(DX), 128(DX), Y10, Y4, Y5)
349+
ROUND_AVX2(160(DX), 192(DX), 224(DX), 256(DX), Y10, Y4, Y5)
352350

353351
VPXOR Y0, Y8, Y8
354352
VPXOR Y1, Y9, Y9
@@ -366,7 +364,6 @@ noinc:
366364
VMOVDQU Y9, 32(AX)
367365
VZEROUPPER
368366

369-
MOVQ DX, SP
370367
RET
371368

372369
#define VPUNPCKLQDQ_X2_X2_X15 BYTE $0xC5; BYTE $0x69; BYTE $0x6C; BYTE $0xFA
@@ -584,20 +581,18 @@ TEXT ·hashBlocksAVX(SB), 4, $288-48 // frame size = 272 + 16 byte alignment
584581
MOVQ blocks_base+24(FP), SI
585582
MOVQ blocks_len+32(FP), DI
586583

587-
MOVQ SP, BP
588-
MOVQ SP, R9
589-
ADDQ $15, R9
590-
ANDQ $~15, R9
591-
MOVQ R9, SP
584+
MOVQ SP, R10
585+
ADDQ $15, R10
586+
ANDQ $~15, R10
592587

593588
VMOVDQU ·AVX_c40<>(SB), X0
594589
VMOVDQU ·AVX_c48<>(SB), X1
595590
VMOVDQA X0, X8
596591
VMOVDQA X1, X9
597592

598593
VMOVDQU ·AVX_iv3<>(SB), X0
599-
VMOVDQA X0, 0(SP)
600-
XORQ CX, 0(SP) // 0(SP) = ·AVX_iv3 ^ (CX || 0)
594+
VMOVDQA X0, 0(R10)
595+
XORQ CX, 0(R10) // 0(R10) = ·AVX_iv3 ^ (CX || 0)
601596

602597
VMOVDQU 0(AX), X10
603598
VMOVDQU 16(AX), X11
@@ -624,35 +619,35 @@ noinc:
624619
VMOVDQU ·AVX_iv2<>(SB), X6
625620

626621
VPXOR X15, X6, X6
627-
VMOVDQA 0(SP), X7
622+
VMOVDQA 0(R10), X7
628623

629624
LOAD_MSG_AVX_0_2_4_6_1_3_5_7()
630-
VMOVDQA X12, 16(SP)
631-
VMOVDQA X13, 32(SP)
632-
VMOVDQA X14, 48(SP)
633-
VMOVDQA X15, 64(SP)
625+
VMOVDQA X12, 16(R10)
626+
VMOVDQA X13, 32(R10)
627+
VMOVDQA X14, 48(R10)
628+
VMOVDQA X15, 64(R10)
634629
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
635630
SHUFFLE_AVX()
636631
LOAD_MSG_AVX(8, 10, 12, 14, 9, 11, 13, 15)
637-
VMOVDQA X12, 80(SP)
638-
VMOVDQA X13, 96(SP)
639-
VMOVDQA X14, 112(SP)
640-
VMOVDQA X15, 128(SP)
632+
VMOVDQA X12, 80(R10)
633+
VMOVDQA X13, 96(R10)
634+
VMOVDQA X14, 112(R10)
635+
VMOVDQA X15, 128(R10)
641636
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
642637
SHUFFLE_AVX_INV()
643638

644639
LOAD_MSG_AVX(14, 4, 9, 13, 10, 8, 15, 6)
645-
VMOVDQA X12, 144(SP)
646-
VMOVDQA X13, 160(SP)
647-
VMOVDQA X14, 176(SP)
648-
VMOVDQA X15, 192(SP)
640+
VMOVDQA X12, 144(R10)
641+
VMOVDQA X13, 160(R10)
642+
VMOVDQA X14, 176(R10)
643+
VMOVDQA X15, 192(R10)
649644
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
650645
SHUFFLE_AVX()
651646
LOAD_MSG_AVX_1_0_11_5_12_2_7_3()
652-
VMOVDQA X12, 208(SP)
653-
VMOVDQA X13, 224(SP)
654-
VMOVDQA X14, 240(SP)
655-
VMOVDQA X15, 256(SP)
647+
VMOVDQA X12, 208(R10)
648+
VMOVDQA X13, 224(R10)
649+
VMOVDQA X14, 240(R10)
650+
VMOVDQA X15, 256(R10)
656651
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
657652
SHUFFLE_AVX_INV()
658653

@@ -712,14 +707,14 @@ noinc:
712707
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
713708
SHUFFLE_AVX_INV()
714709

715-
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, 16(SP), 32(SP), 48(SP), 64(SP), X15, X8, X9)
710+
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, 16(R10), 32(R10), 48(R10), 64(R10), X15, X8, X9)
716711
SHUFFLE_AVX()
717-
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, 80(SP), 96(SP), 112(SP), 128(SP), X15, X8, X9)
712+
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, 80(R10), 96(R10), 112(R10), 128(R10), X15, X8, X9)
718713
SHUFFLE_AVX_INV()
719714

720-
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, 144(SP), 160(SP), 176(SP), 192(SP), X15, X8, X9)
715+
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, 144(R10), 160(R10), 176(R10), 192(R10), X15, X8, X9)
721716
SHUFFLE_AVX()
722-
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, 208(SP), 224(SP), 240(SP), 256(SP), X15, X8, X9)
717+
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, 208(R10), 224(R10), 240(R10), 256(R10), X15, X8, X9)
723718
SHUFFLE_AVX_INV()
724719

725720
VMOVDQU 32(AX), X14
@@ -746,5 +741,4 @@ noinc:
746741
MOVQ R9, 8(BX)
747742
VZEROUPPER
748743

749-
MOVQ BP, SP
750744
RET

blake2b/blake2b_amd64.s

Lines changed: 26 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -118,15 +118,13 @@ TEXT ·hashBlocksSSE4(SB), 4, $288-48 // frame size = 272 + 16 byte alignment
118118
MOVQ blocks_base+24(FP), SI
119119
MOVQ blocks_len+32(FP), DI
120120

121-
MOVQ SP, BP
122-
MOVQ SP, R9
123-
ADDQ $15, R9
124-
ANDQ $~15, R9
125-
MOVQ R9, SP
121+
MOVQ SP, R10
122+
ADDQ $15, R10
123+
ANDQ $~15, R10
126124

127125
MOVOU ·iv3<>(SB), X0
128-
MOVO X0, 0(SP)
129-
XORQ CX, 0(SP) // 0(SP) = ·iv3 ^ (CX || 0)
126+
MOVO X0, 0(R10)
127+
XORQ CX, 0(R10) // 0(R10) = ·iv3 ^ (CX || 0)
130128

131129
MOVOU ·c40<>(SB), X13
132130
MOVOU ·c48<>(SB), X14
@@ -156,35 +154,35 @@ noinc:
156154
MOVOU ·iv2<>(SB), X6
157155

158156
PXOR X8, X6
159-
MOVO 0(SP), X7
157+
MOVO 0(R10), X7
160158

161159
LOAD_MSG(X8, X9, X10, X11, SI, 0, 2, 4, 6, 1, 3, 5, 7)
162-
MOVO X8, 16(SP)
163-
MOVO X9, 32(SP)
164-
MOVO X10, 48(SP)
165-
MOVO X11, 64(SP)
160+
MOVO X8, 16(R10)
161+
MOVO X9, 32(R10)
162+
MOVO X10, 48(R10)
163+
MOVO X11, 64(R10)
166164
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
167165
SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
168166
LOAD_MSG(X8, X9, X10, X11, SI, 8, 10, 12, 14, 9, 11, 13, 15)
169-
MOVO X8, 80(SP)
170-
MOVO X9, 96(SP)
171-
MOVO X10, 112(SP)
172-
MOVO X11, 128(SP)
167+
MOVO X8, 80(R10)
168+
MOVO X9, 96(R10)
169+
MOVO X10, 112(R10)
170+
MOVO X11, 128(R10)
173171
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
174172
SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
175173

176174
LOAD_MSG(X8, X9, X10, X11, SI, 14, 4, 9, 13, 10, 8, 15, 6)
177-
MOVO X8, 144(SP)
178-
MOVO X9, 160(SP)
179-
MOVO X10, 176(SP)
180-
MOVO X11, 192(SP)
175+
MOVO X8, 144(R10)
176+
MOVO X9, 160(R10)
177+
MOVO X10, 176(R10)
178+
MOVO X11, 192(R10)
181179
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
182180
SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
183181
LOAD_MSG(X8, X9, X10, X11, SI, 1, 0, 11, 5, 12, 2, 7, 3)
184-
MOVO X8, 208(SP)
185-
MOVO X9, 224(SP)
186-
MOVO X10, 240(SP)
187-
MOVO X11, 256(SP)
182+
MOVO X8, 208(R10)
183+
MOVO X9, 224(R10)
184+
MOVO X10, 240(R10)
185+
MOVO X11, 256(R10)
188186
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
189187
SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
190188

@@ -244,14 +242,14 @@ noinc:
244242
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
245243
SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
246244

247-
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, 16(SP), 32(SP), 48(SP), 64(SP), X11, X13, X14)
245+
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, 16(R10), 32(R10), 48(R10), 64(R10), X11, X13, X14)
248246
SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
249-
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, 80(SP), 96(SP), 112(SP), 128(SP), X11, X13, X14)
247+
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, 80(R10), 96(R10), 112(R10), 128(R10), X11, X13, X14)
250248
SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
251249

252-
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, 144(SP), 160(SP), 176(SP), 192(SP), X11, X13, X14)
250+
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, 144(R10), 160(R10), 176(R10), 192(R10), X11, X13, X14)
253251
SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
254-
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, 208(SP), 224(SP), 240(SP), 256(SP), X11, X13, X14)
252+
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, 208(R10), 224(R10), 240(R10), 256(R10), X11, X13, X14)
255253
SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
256254

257255
MOVOU 32(AX), X10
@@ -277,5 +275,4 @@ noinc:
277275
MOVQ R8, 0(BX)
278276
MOVQ R9, 8(BX)
279277

280-
MOVQ BP, SP
281278
RET

0 commit comments

Comments
 (0)