Skip to content

Commit f99511d

Browse files
labogerFiloSottile
authored andcommitted
crypto/internal/nistec: re-enable ppc64le asm for P-256
Add support for ppc64le assembler to p256. Most of the changes are due to the change in nistec interfaces. There is a change to p256MovCond based on a reviewer's comment. LXVD2X replaces the use of LXVW4X in one function. In addition, some refactoring has been done to this file to reduce size and improve readability: - Eliminate the use of defines to switch between V and VSX registers. V regs can be used for instructions some that previously required VSX. - Use XXPERMDI instead of VPERM to swap bytes loaded and stored with LXVD2X and STXVD2X instructions. This eliminates the need to load the byte swap string into a vector. - Use VMRGEW and VMRGOW instead of VPERM in the VMULT macros. This also avoids the need to load byte strings to swap the high and low values. These changes reduce the file by about 10% and shows an improvement of about 2% at runtime. For #52182 Change-Id: Ic48050fc81bb273b7b4023e54864f4255dcc2a4f Reviewed-on: https://go-review.googlesource.com/c/go/+/399755 TryBot-Result: Gopher Robot <[email protected]> Reviewed-by: David Chase <[email protected]> Reviewed-by: Filippo Valsorda <[email protected]> Run-TryBot: Filippo Valsorda <[email protected]> Reviewed-by: Filippo Valsorda <[email protected]> Reviewed-by: Paul Murphy <[email protected]>
1 parent 86536b9 commit f99511d

File tree

8 files changed

+521
-1311
lines changed

8 files changed

+521
-1311
lines changed

src/crypto/internal/nistec/generate.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ var curves = []struct {
4040
P: "P256",
4141
Element: "fiat.P256Element",
4242
Params: elliptic.P256().Params(),
43-
BuildTags: "!amd64 && !arm64",
43+
BuildTags: "!amd64 && !arm64 && !ppc64le",
4444
},
4545
{
4646
P: "P384",

src/crypto/internal/nistec/p256.go

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/crypto/internal/nistec/p256_asm.go

Lines changed: 1 addition & 93 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
// https://link.springer.com/article/10.1007%2Fs13389-014-0090-x
1111
// https://eprint.iacr.org/2013/816.pdf
1212

13-
//go:build amd64 || arm64
13+
//go:build amd64 || arm64 || ppc64le
1414

1515
package nistec
1616

@@ -355,98 +355,6 @@ func p256PointDoubleAsm(res, in *P256Point)
355355
// Montgomery domain (with R 2²⁵⁶) as four uint64 limbs in little-endian order.
356356
type p256OrdElement [4]uint64
357357

358-
// Montgomery multiplication modulo org(G). Sets res = in1 * in2 * R⁻¹.
359-
//
360-
//go:noescape
361-
func p256OrdMul(res, in1, in2 *p256OrdElement)
362-
363-
// Montgomery square modulo org(G), repeated n times (n >= 1).
364-
//
365-
//go:noescape
366-
func p256OrdSqr(res, in *p256OrdElement, n int)
367-
368-
func P256OrdInverse(k []byte) ([]byte, error) {
369-
if len(k) != 32 {
370-
return nil, errors.New("invalid scalar length")
371-
}
372-
373-
x := new(p256OrdElement)
374-
p256OrdBigToLittle(x, (*[32]byte)(k))
375-
376-
// Inversion is implemented as exponentiation by n - 2, per Fermat's little theorem.
377-
//
378-
// The sequence of 38 multiplications and 254 squarings is derived from
379-
// https://briansmith.org/ecc-inversion-addition-chains-01#p256_scalar_inversion
380-
_1 := new(p256OrdElement)
381-
_11 := new(p256OrdElement)
382-
_101 := new(p256OrdElement)
383-
_111 := new(p256OrdElement)
384-
_1111 := new(p256OrdElement)
385-
_10101 := new(p256OrdElement)
386-
_101111 := new(p256OrdElement)
387-
t := new(p256OrdElement)
388-
389-
// This code operates in the Montgomery domain where R = 2²⁵⁶ mod n and n is
390-
// the order of the scalar field. Elements in the Montgomery domain take the
391-
// form a×R and p256OrdMul calculates (a × b × R⁻¹) mod n. RR is R in the
392-
// domain, or R×R mod n, thus p256OrdMul(x, RR) gives x×R, i.e. converts x
393-
// into the Montgomery domain.
394-
RR := &p256OrdElement{0x83244c95be79eea2, 0x4699799c49bd6fa6,
395-
0x2845b2392b6bec59, 0x66e12d94f3d95620}
396-
397-
p256OrdMul(_1, x, RR) // _1
398-
p256OrdSqr(x, _1, 1) // _10
399-
p256OrdMul(_11, x, _1) // _11
400-
p256OrdMul(_101, x, _11) // _101
401-
p256OrdMul(_111, x, _101) // _111
402-
p256OrdSqr(x, _101, 1) // _1010
403-
p256OrdMul(_1111, _101, x) // _1111
404-
405-
p256OrdSqr(t, x, 1) // _10100
406-
p256OrdMul(_10101, t, _1) // _10101
407-
p256OrdSqr(x, _10101, 1) // _101010
408-
p256OrdMul(_101111, _101, x) // _101111
409-
p256OrdMul(x, _10101, x) // _111111 = x6
410-
p256OrdSqr(t, x, 2) // _11111100
411-
p256OrdMul(t, t, _11) // _11111111 = x8
412-
p256OrdSqr(x, t, 8) // _ff00
413-
p256OrdMul(x, x, t) // _ffff = x16
414-
p256OrdSqr(t, x, 16) // _ffff0000
415-
p256OrdMul(t, t, x) // _ffffffff = x32
416-
417-
p256OrdSqr(x, t, 64)
418-
p256OrdMul(x, x, t)
419-
p256OrdSqr(x, x, 32)
420-
p256OrdMul(x, x, t)
421-
422-
sqrs := []int{
423-
6, 5, 4, 5, 5,
424-
4, 3, 3, 5, 9,
425-
6, 2, 5, 6, 5,
426-
4, 5, 5, 3, 10,
427-
2, 5, 5, 3, 7, 6}
428-
muls := []*p256OrdElement{
429-
_101111, _111, _11, _1111, _10101,
430-
_101, _101, _101, _111, _101111,
431-
_1111, _1, _1, _1111, _111,
432-
_111, _111, _101, _11, _101111,
433-
_11, _11, _11, _1, _10101, _1111}
434-
435-
for i, s := range sqrs {
436-
p256OrdSqr(x, x, s)
437-
p256OrdMul(x, x, muls[i])
438-
}
439-
440-
// Montgomery multiplication by R⁻¹, or 1 outside the domain as R⁻¹×R = 1,
441-
// converts a Montgomery value out of the domain.
442-
one := &p256OrdElement{1}
443-
p256OrdMul(x, x, one)
444-
445-
var xOut [32]byte
446-
p256OrdLittleToBig(&xOut, x)
447-
return xOut[:], nil
448-
}
449-
450358
// Add sets q = p1 + p2, and returns q. The points may overlap.
451359
func (q *P256Point) Add(r1, r2 *P256Point) *P256Point {
452360
var sum, double P256Point
Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
// Copyright 2022 The Go Authors. All rights reserved.
2+
// Use of this source code is governed by a BSD-style
3+
// license that can be found in the LICENSE file.
4+
5+
//go:build amd64 || arm64
6+
7+
package nistec
8+
9+
import "errors"
10+
11+
// Montgomery multiplication modulo org(G). Sets res = in1 * in2 * R⁻¹.
12+
//
13+
//go:noescape
14+
func p256OrdMul(res, in1, in2 *p256OrdElement)
15+
16+
// Montgomery square modulo org(G), repeated n times (n >= 1).
17+
//
18+
//go:noescape
19+
func p256OrdSqr(res, in *p256OrdElement, n int)
20+
21+
func P256OrdInverse(k []byte) ([]byte, error) {
22+
if len(k) != 32 {
23+
return nil, errors.New("invalid scalar length")
24+
}
25+
26+
x := new(p256OrdElement)
27+
p256OrdBigToLittle(x, (*[32]byte)(k))
28+
29+
// Inversion is implemented as exponentiation by n - 2, per Fermat's little theorem.
30+
//
31+
// The sequence of 38 multiplications and 254 squarings is derived from
32+
// https://briansmith.org/ecc-inversion-addition-chains-01#p256_scalar_inversion
33+
_1 := new(p256OrdElement)
34+
_11 := new(p256OrdElement)
35+
_101 := new(p256OrdElement)
36+
_111 := new(p256OrdElement)
37+
_1111 := new(p256OrdElement)
38+
_10101 := new(p256OrdElement)
39+
_101111 := new(p256OrdElement)
40+
t := new(p256OrdElement)
41+
42+
// This code operates in the Montgomery domain where R = 2²⁵⁶ mod n and n is
43+
// the order of the scalar field. Elements in the Montgomery domain take the
44+
// form a×R and p256OrdMul calculates (a × b × R⁻¹) mod n. RR is R in the
45+
// domain, or R×R mod n, thus p256OrdMul(x, RR) gives x×R, i.e. converts x
46+
// into the Montgomery domain.
47+
RR := &p256OrdElement{0x83244c95be79eea2, 0x4699799c49bd6fa6,
48+
0x2845b2392b6bec59, 0x66e12d94f3d95620}
49+
50+
p256OrdMul(_1, x, RR) // _1
51+
p256OrdSqr(x, _1, 1) // _10
52+
p256OrdMul(_11, x, _1) // _11
53+
p256OrdMul(_101, x, _11) // _101
54+
p256OrdMul(_111, x, _101) // _111
55+
p256OrdSqr(x, _101, 1) // _1010
56+
p256OrdMul(_1111, _101, x) // _1111
57+
58+
p256OrdSqr(t, x, 1) // _10100
59+
p256OrdMul(_10101, t, _1) // _10101
60+
p256OrdSqr(x, _10101, 1) // _101010
61+
p256OrdMul(_101111, _101, x) // _101111
62+
p256OrdMul(x, _10101, x) // _111111 = x6
63+
p256OrdSqr(t, x, 2) // _11111100
64+
p256OrdMul(t, t, _11) // _11111111 = x8
65+
p256OrdSqr(x, t, 8) // _ff00
66+
p256OrdMul(x, x, t) // _ffff = x16
67+
p256OrdSqr(t, x, 16) // _ffff0000
68+
p256OrdMul(t, t, x) // _ffffffff = x32
69+
70+
p256OrdSqr(x, t, 64)
71+
p256OrdMul(x, x, t)
72+
p256OrdSqr(x, x, 32)
73+
p256OrdMul(x, x, t)
74+
75+
sqrs := []int{
76+
6, 5, 4, 5, 5,
77+
4, 3, 3, 5, 9,
78+
6, 2, 5, 6, 5,
79+
4, 5, 5, 3, 10,
80+
2, 5, 5, 3, 7, 6}
81+
muls := []*p256OrdElement{
82+
_101111, _111, _11, _1111, _10101,
83+
_101, _101, _101, _111, _101111,
84+
_1111, _1, _1, _1111, _111,
85+
_111, _111, _101, _11, _101111,
86+
_11, _11, _11, _1, _10101, _1111}
87+
88+
for i, s := range sqrs {
89+
p256OrdSqr(x, x, s)
90+
p256OrdMul(x, x, muls[i])
91+
}
92+
93+
// Montgomery multiplication by R⁻¹, or 1 outside the domain as R⁻¹×R = 1,
94+
// converts a Montgomery value out of the domain.
95+
one := &p256OrdElement{1}
96+
p256OrdMul(x, x, one)
97+
98+
var xOut [32]byte
99+
p256OrdLittleToBig(&xOut, x)
100+
return xOut[:], nil
101+
}

0 commit comments

Comments
 (0)