Skip to content

Commit f9538a1

Browse files
omuspablosanjose
authored andcommitted
WIP: Upgrade to OpenBLAS 0.3.13 (#39216)
* Use OpenBLAS 0.3.13 Bumping to support xcode/clang 12 which was addressed in OpenBLAS 0.3.11 * Use OpenBLAS 0.3.13+1 * Add openblas-exshift patch for src build * Update LinearAlgebra doctests for Linux * non-ambiguous ordering in eigen and eigvals test (#39767) add missing sortby's Co-authored-by: Pablo San-Jose <[email protected]> (cherry picked from commit 3129a5b)
1 parent 23267f0 commit f9538a1

File tree

11 files changed

+302
-149
lines changed

11 files changed

+302
-149
lines changed

deps/Versions.make

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ OBJCONV_JLL_NAME := Objconv
7171
OBJCONV_JLL_VER := 2.49.1+0
7272

7373
# OpenBLAS
74-
OPENBLAS_VER := 0.3.10
74+
OPENBLAS_VER := 0.3.13
7575
OPENBLAS_JLL_NAME := OpenBLAS
7676

7777
# OpenLibm

deps/blas.mk

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,12 @@ $(BUILDDIR)/$(OPENBLAS_SRC_DIR)/openblas-ofast-power.patch-applied: $(BUILDDIR)/
103103
patch -p1 -f < $(SRCDIR)/patches/openblas-ofast-power.patch
104104
echo 1 > $@
105105

106-
$(BUILDDIR)/$(OPENBLAS_SRC_DIR)/build-configured: $(BUILDDIR)/$(OPENBLAS_SRC_DIR)/openblas-ofast-power.patch-applied
106+
$(BUILDDIR)/$(OPENBLAS_SRC_DIR)/openblas-exshift.patch-applied: $(BUILDDIR)/$(OPENBLAS_SRC_DIR)/openblas-ofast-power.patch-applied
107+
cd $(BUILDDIR)/$(OPENBLAS_SRC_DIR) && \
108+
patch -p1 -f < $(SRCDIR)/patches/openblas-exshift.patch
109+
echo 1 > $@
110+
111+
$(BUILDDIR)/$(OPENBLAS_SRC_DIR)/build-configured: $(BUILDDIR)/$(OPENBLAS_SRC_DIR)/openblas-exshift.patch-applied
107112
echo 1 > $@
108113

109114
$(BUILDDIR)/$(OPENBLAS_SRC_DIR)/build-compiled: $(BUILDDIR)/$(OPENBLAS_SRC_DIR)/build-configured

deps/checksums/openblas

Lines changed: 94 additions & 94 deletions
Large diffs are not rendered by default.

deps/openblas.version

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
1-
OPENBLAS_BRANCH=v0.3.10
2-
OPENBLAS_SHA1=63b03efc2af332c88b86d4fd8079d00f4b439adf
1+
OPENBLAS_BRANCH=v0.3.13
2+
OPENBLAS_SHA1=d2b11c47774b9216660e76e2fc67e87079f26fa1

deps/patches/openblas-exshift.patch

Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,149 @@
1+
commit c4b5abbe43d7c22215ef36ef4f7c1413c975678c
2+
Author: Martin Kroeker <[email protected]>
3+
Date: Fri Jan 29 10:45:36 2021 +0100
4+
5+
fix data type
6+
7+
commit f87842483eee9d158f44d51d4c09662c3cff7526
8+
Author: Martin Kroeker <[email protected]>
9+
Date: Fri Jan 29 09:56:12 2021 +0100
10+
11+
fix calculation of non-exceptional shift (from Reference-LAPACK PR 477)
12+
13+
commit 856bc365338f7559639f341d76ca8746d1628ee5
14+
Author: Martin Kroeker <[email protected]>
15+
Date: Wed Jan 27 13:41:45 2021 +0100
16+
17+
Add exceptional shift to fix rare convergence problems
18+
19+
---
20+
diff --git a/lapack-netlib/SRC/chgeqz.f b/lapack-netlib/SRC/chgeqz.f
21+
index 73d35621..4725e716 100644
22+
--- a/lapack-netlib/SRC/chgeqz.f
23+
+++ b/lapack-netlib/SRC/chgeqz.f
24+
@@ -320,12 +320,13 @@
25+
$ C, SAFMIN, TEMP, TEMP2, TEMPR, ULP
26+
COMPLEX ABI22, AD11, AD12, AD21, AD22, CTEMP, CTEMP2,
27+
$ CTEMP3, ESHIFT, RTDISC, S, SHIFT, SIGNBC, T1,
28+
- $ U12, X
29+
+ $ U12, X, ABI12, Y
30+
* ..
31+
* .. External Functions ..
32+
+ COMPLEX CLADIV
33+
LOGICAL LSAME
34+
REAL CLANHS, SLAMCH
35+
- EXTERNAL LSAME, CLANHS, SLAMCH
36+
+ EXTERNAL CLADIV, LLSAME, CLANHS, SLAMCH
37+
* ..
38+
* .. External Subroutines ..
39+
EXTERNAL CLARTG, CLASET, CROT, CSCAL, XERBLA
40+
@@ -729,22 +730,34 @@
41+
AD22 = ( ASCALE*H( ILAST, ILAST ) ) /
42+
$ ( BSCALE*T( ILAST, ILAST ) )
43+
ABI22 = AD22 - U12*AD21
44+
+ ABI12 = AD12 - U12*AD11
45+
*
46+
- T1 = HALF*( AD11+ABI22 )
47+
- RTDISC = SQRT( T1**2+AD12*AD21-AD11*AD22 )
48+
- TEMP = REAL( T1-ABI22 )*REAL( RTDISC ) +
49+
- $ AIMAG( T1-ABI22 )*AIMAG( RTDISC )
50+
- IF( TEMP.LE.ZERO ) THEN
51+
- SHIFT = T1 + RTDISC
52+
- ELSE
53+
- SHIFT = T1 - RTDISC
54+
+ SHIFT = ABI22
55+
+ CTEMP = SQRT( ABI12 )*SQRT( AD21 )
56+
+ TEMP = ABS1( CTEMP )
57+
+ IF( CTEMP.NE.ZERO ) THEN
58+
+ X = HALF*( AD11-SHIFT )
59+
+ TEMP2 = ABS1( X )
60+
+ TEMP = MAX( TEMP, ABS1( X ) )
61+
+ Y = TEMP*SQRT( ( X / TEMP )**2+( CTEMP / TEMP )**2 )
62+
+ IF( TEMP2.GT.ZERO ) THEN
63+
+ IF( REAL( X / TEMP2 )*REAL( Y )+
64+
+ $ AIMAG( X / TEMP2 )*AIMAG( Y ).LT.ZERO )Y = -Y
65+
+ END IF
66+
+ SHIFT = SHIFT - CTEMP*CLADIV( CTEMP, ( X+Y ) )
67+
END IF
68+
ELSE
69+
*
70+
* Exceptional shift. Chosen for no particularly good reason.
71+
*
72+
- ESHIFT = ESHIFT + (ASCALE*H(ILAST,ILAST-1))/
73+
- $ (BSCALE*T(ILAST-1,ILAST-1))
74+
+ IF( ( IITER / 20 )*20.EQ.IITER .AND.
75+
+ $ BSCALE*ABS1(T( ILAST, ILAST )).GT.SAFMIN ) THEN
76+
+ ESHIFT = ESHIFT + ( ASCALE*H( ILAST,
77+
+ $ ILAST ) )/( BSCALE*T( ILAST, ILAST ) )
78+
+ ELSE
79+
+ ESHIFT = ESHIFT + ( ASCALE*H( ILAST,
80+
+ $ ILAST-1 ) )/( BSCALE*T( ILAST-1, ILAST-1 ) )
81+
+ END IF
82+
SHIFT = ESHIFT
83+
END IF
84+
*
85+
diff --git a/lapack-netlib/SRC/zhgeqz.f b/lapack-netlib/SRC/zhgeqz.f
86+
index b51cba4f..b28ae47a 100644
87+
--- a/lapack-netlib/SRC/zhgeqz.f
88+
+++ b/lapack-netlib/SRC/zhgeqz.f
89+
@@ -320,12 +320,13 @@
90+
$ C, SAFMIN, TEMP, TEMP2, TEMPR, ULP
91+
COMPLEX*16 ABI22, AD11, AD12, AD21, AD22, CTEMP, CTEMP2,
92+
$ CTEMP3, ESHIFT, RTDISC, S, SHIFT, SIGNBC, T1,
93+
- $ U12, X
94+
+ $ U12, X, ABI12, Y
95+
* ..
96+
* .. External Functions ..
97+
+ COMPLEX*16 ZLADIV
98+
LOGICAL LSAME
99+
DOUBLE PRECISION DLAMCH, ZLANHS
100+
- EXTERNAL LSAME, DLAMCH, ZLANHS
101+
+ EXTERNAL ZLADIV, LSAME, DLAMCH, ZLANHS
102+
* ..
103+
* .. External Subroutines ..
104+
EXTERNAL XERBLA, ZLARTG, ZLASET, ZROT, ZSCAL
105+
@@ -730,22 +731,34 @@
106+
AD22 = ( ASCALE*H( ILAST, ILAST ) ) /
107+
$ ( BSCALE*T( ILAST, ILAST ) )
108+
ABI22 = AD22 - U12*AD21
109+
+ ABI12 = AD12 - U12*AD11
110+
*
111+
- T1 = HALF*( AD11+ABI22 )
112+
- RTDISC = SQRT( T1**2+AD12*AD21-AD11*AD22 )
113+
- TEMP = DBLE( T1-ABI22 )*DBLE( RTDISC ) +
114+
- $ DIMAG( T1-ABI22 )*DIMAG( RTDISC )
115+
- IF( TEMP.LE.ZERO ) THEN
116+
- SHIFT = T1 + RTDISC
117+
- ELSE
118+
- SHIFT = T1 - RTDISC
119+
+ SHIFT = ABI22
120+
+ CTEMP = SQRT( ABI12 )*SQRT( AD21 )
121+
+ TEMP = ABS1( CTEMP )
122+
+ IF( CTEMP.NE.ZERO ) THEN
123+
+ X = HALF*( AD11-SHIFT )
124+
+ TEMP2 = ABS1( X )
125+
+ TEMP = MAX( TEMP, ABS1( X ) )
126+
+ Y = TEMP*SQRT( ( X / TEMP )**2+( CTEMP / TEMP )**2 )
127+
+ IF( TEMP2.GT.ZERO ) THEN
128+
+ IF( DBLE( X / TEMP2 )*DBLE( Y )+
129+
+ $ DIMAG( X / TEMP2 )*DIMAG( Y ).LT.ZERO )Y = -Y
130+
+ END IF
131+
+ SHIFT = SHIFT - CTEMP*ZLADIV( CTEMP, ( X+Y ) )
132+
END IF
133+
ELSE
134+
*
135+
* Exceptional shift. Chosen for no particularly good reason.
136+
*
137+
- ESHIFT = ESHIFT + (ASCALE*H(ILAST,ILAST-1))/
138+
- $ (BSCALE*T(ILAST-1,ILAST-1))
139+
+ IF( ( IITER / 20 )*20.EQ.IITER .AND.
140+
+ $ BSCALE*ABS1(T( ILAST, ILAST )).GT.SAFMIN ) THEN
141+
+ ESHIFT = ESHIFT + ( ASCALE*H( ILAST,
142+
+ $ ILAST ) )/( BSCALE*T( ILAST, ILAST ) )
143+
+ ELSE
144+
+ ESHIFT = ESHIFT + ( ASCALE*H( ILAST,
145+
+ $ ILAST-1 ) )/( BSCALE*T( ILAST-1, ILAST-1 ) )
146+
+ END IF
147+
SHIFT = ESHIFT
148+
END IF
149+
*
Lines changed: 22 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,33 +1,32 @@
1-
Makefile.power | 8 ++++----
2-
1 file changed, 4 insertions(+), 4 deletions(-)
1+
Makefile.power | 6 +++---
2+
1 file changed, 3 insertions(+), 3 deletions(-)
33

44
diff --git a/Makefile.power b/Makefile.power
5-
index 24d8aa8a..e53a243a 100644
5+
index c7e97229..8426e816 100644
66
--- a/Makefile.power
77
+++ b/Makefile.power
8-
@@ -11,20 +11,20 @@ endif
9-
10-
ifeq ($(CORE), POWER9)
11-
ifeq ($(USE_OPENMP), 1)
12-
-COMMON_OPT += -Ofast -mcpu=power9 -mtune=power9 -mvsx -malign-power -DUSE_OPENMP -fno-fast-math -fopenmp
13-
+COMMON_OPT += -mcpu=power9 -mtune=power9 -mvsx -malign-power -DUSE_OPENMP -fno-fast-math -fopenmp
14-
FCOMMON_OPT += -O2 -frecursive -mcpu=power9 -mtune=power9 -malign-power -DUSE_OPENMP -fno-fast-math -fopenmp
15-
else
16-
-COMMON_OPT += -Ofast -mcpu=power9 -mtune=power9 -mvsx -malign-power -fno-fast-math
17-
+COMMON_OPT += -mcpu=power9 -mtune=power9 -mvsx -malign-power -fno-fast-math
18-
FCOMMON_OPT += -O2 -frecursive -mcpu=power9 -mtune=power9 -malign-power -fno-fast-math
8+
@@ -10,13 +10,13 @@ USE_OPENMP = 1
199
endif
10+
11+
ifeq ($(CORE), POWER10)
12+
-CCOMMON_OPT += -Ofast -mcpu=power10 -mtune=power10 -mvsx -fno-fast-math
13+
+CCOMMON_OPT += -mcpu=power10 -mtune=power10 -mvsx -fno-fast-math
14+
FCOMMON_OPT += -O2 -frecursive -mcpu=power10 -mtune=power10 -fno-fast-math
2015
endif
2116

17+
ifeq ($(CORE), POWER9)
18+
ifneq ($(C_COMPILER), PGI)
19+
-CCOMMON_OPT += -Ofast -mvsx -fno-fast-math
20+
+CCOMMON_OPT += -mvsx -fno-fast-math
21+
ifeq ($(C_COMPILER), GCC)
22+
ifneq ($(GCCVERSIONGT4), 1)
23+
$(warning your compiler is too old to fully support POWER9, getting a newer version of gcc is recommended)
24+
@@ -49,7 +49,7 @@ endif
25+
2226
ifeq ($(CORE), POWER8)
23-
ifeq ($(USE_OPENMP), 1)
24-
-COMMON_OPT += -Ofast -mcpu=power8 -mtune=power8 -mvsx -malign-power -DUSE_OPENMP -fno-fast-math -fopenmp
25-
+COMMON_OPT += -mcpu=power8 -mtune=power8 -mvsx -malign-power -DUSE_OPENMP -fno-fast-math -fopenmp
26-
FCOMMON_OPT += -O2 -frecursive -mcpu=power8 -mtune=power8 -malign-power -DUSE_OPENMP -fno-fast-math -fopenmp
27+
ifneq ($(C_COMPILER), PGI)
28+
-CCOMMON_OPT += -Ofast -mcpu=power8 -mtune=power8 -mvsx -fno-fast-math
29+
+CCOMMON_OPT += -mcpu=power8 -mtune=power8 -mvsx -fno-fast-math
2730
else
28-
-COMMON_OPT += -Ofast -mcpu=power8 -mtune=power8 -mvsx -malign-power -fno-fast-math
29-
+COMMON_OPT += -mcpu=power8 -mtune=power8 -mvsx -malign-power -fno-fast-math
30-
FCOMMON_OPT += -O2 -frecursive -mcpu=power8 -mtune=power8 -malign-power -fno-fast-math
31+
CCOMMON_OPT += -fast -Mvect=simd -Mcache_align
3132
endif
32-
endif
33-

deps/patches/openblas-winexit.patch

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,14 @@ From: Keno Fischer <[email protected]>
33
Date: Sat, 14 Mar 2020 12:05:19 +0100
44

55
---
6-
driver/others/memory.c | 131 +----------------------------------------
6+
driver/others/memory.c | 131 +------------------------------------------------
77
1 file changed, 2 insertions(+), 129 deletions(-)
88

99
diff --git a/driver/others/memory.c b/driver/others/memory.c
10-
index 62a5a021..23f8fe65 100644
10+
index ba2bb55b..bf6b5529 100644
1111
--- a/driver/others/memory.c
1212
+++ b/driver/others/memory.c
13-
@@ -1510,7 +1510,7 @@ void CONSTRUCTOR gotoblas_init(void) {
13+
@@ -1534,7 +1534,7 @@ void CONSTRUCTOR gotoblas_init(void) {
1414

1515
}
1616

@@ -19,7 +19,7 @@ index 62a5a021..23f8fe65 100644
1919

2020
if (gotoblas_initialized == 0) return;
2121

22-
@@ -1547,74 +1547,12 @@ void DESTRUCTOR gotoblas_quit(void) {
22+
@@ -1571,74 +1571,12 @@ void DESTRUCTOR gotoblas_quit(void) {
2323
#endif
2424
}
2525

@@ -57,8 +57,8 @@ index 62a5a021..23f8fe65 100644
5757
-*/
5858
-static int on_process_term(void)
5959
-{
60-
- gotoblas_quit();
61-
- return 0;
60+
- gotoblas_quit();
61+
- return 0;
6262
-}
6363
#ifdef _WIN64
6464
#pragma comment(linker, "/INCLUDE:_tls_used")
@@ -94,7 +94,7 @@ index 62a5a021..23f8fe65 100644
9494
#if (defined(C_PGI) || (!defined(C_SUN) && defined(F_INTERFACE_SUN))) && (defined(ARCH_X86) || defined(ARCH_X86_64))
9595
/* Don't call me; this is just work around for PGI / Sun bug */
9696
void gotoblas_dummy_for_PGI(void) {
97-
@@ -3104,7 +3042,7 @@ void CONSTRUCTOR gotoblas_init(void) {
97+
@@ -3136,7 +3074,7 @@ void CONSTRUCTOR gotoblas_init(void) {
9898

9999
}
100100

@@ -103,7 +103,7 @@ index 62a5a021..23f8fe65 100644
103103

104104
if (gotoblas_initialized == 0) return;
105105

106-
@@ -3133,71 +3071,6 @@ void DESTRUCTOR gotoblas_quit(void) {
106+
@@ -3165,71 +3103,6 @@ void DESTRUCTOR gotoblas_quit(void) {
107107
#endif
108108
}
109109

@@ -138,8 +138,8 @@ index 62a5a021..23f8fe65 100644
138138
-*/
139139
-static int on_process_term(void)
140140
-{
141-
- gotoblas_quit();
142-
- return 0;
141+
- gotoblas_quit();
142+
- return 0;
143143
-}
144144
-#ifdef _WIN64
145145
-#pragma comment(linker, "/INCLUDE:_tls_used")
@@ -175,4 +175,3 @@ index 62a5a021..23f8fe65 100644
175175
#if (defined(C_PGI) || (!defined(C_SUN) && defined(F_INTERFACE_SUN))) && (defined(ARCH_X86) || defined(ARCH_X86_64))
176176
/* Don't call me; this is just work around for PGI / Sun bug */
177177
void gotoblas_dummy_for_PGI(void) {
178-

stdlib/LinearAlgebra/src/LinearAlgebra.jl

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -282,14 +282,14 @@ julia> ldiv!(Y, qr(A), X);
282282
julia> Y
283283
3-element Vector{Float64}:
284284
0.7128099173553719
285-
-0.051652892561983674
286-
0.10020661157024757
285+
-0.051652892561983806
286+
0.10020661157024781
287287
288288
julia> A\\X
289289
3-element Vector{Float64}:
290290
0.7128099173553719
291-
-0.05165289256198333
292-
0.10020661157024785
291+
-0.05165289256198342
292+
0.1002066115702479
293293
```
294294
"""
295295
ldiv!(Y, A, B)
@@ -319,14 +319,14 @@ julia> ldiv!(qr(A), X);
319319
julia> X
320320
3-element Vector{Float64}:
321321
0.7128099173553719
322-
-0.051652892561983674
323-
0.10020661157024757
322+
-0.051652892561983806
323+
0.10020661157024781
324324
325325
julia> A\\Y
326326
3-element Vector{Float64}:
327327
0.7128099173553719
328-
-0.05165289256198333
329-
0.10020661157024785
328+
-0.05165289256198342
329+
0.1002066115702479
330330
```
331331
"""
332332
ldiv!(A, B)

stdlib/LinearAlgebra/src/hessenberg.jl

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -402,9 +402,9 @@ Q factor:
402402
0.0 -0.707107 0.707107
403403
H factor:
404404
3×3 UpperHessenberg{Float64, Matrix{Float64}}:
405-
4.0 -11.3137 -1.41421
406-
-5.65685 5.0 2.0
407-
⋅ -8.88178e-16 1.0
405+
4.0 -11.3137 -1.41421
406+
-5.65685 5.0 2.0
407+
⋅ -1.0444e-15 1.0
408408
409409
julia> F.Q * F.H * F.Q'
410410
3×3 Matrix{Float64}:

stdlib/LinearAlgebra/test/eigen.jl

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -82,14 +82,15 @@ aimg = randn(n,n)/2
8282
a1_nsg = view(a, 1:n1, 1:n1)
8383
a2_nsg = view(a, n1+1:n2, n1+1:n2)
8484
end
85-
f = eigen(a1_nsg, a2_nsg)
85+
sortfunc = x -> real(x) + imag(x)
86+
f = eigen(a1_nsg, a2_nsg; sortby = sortfunc)
8687
@test a1_nsg*f.vectors (a2_nsg*f.vectors) * Diagonal(f.values)
87-
@test f.values eigvals(a1_nsg, a2_nsg)
88-
@test prod(f.values) prod(eigvals(a1_nsg/a2_nsg)) atol=50000ε
89-
@test eigvecs(a1_nsg, a2_nsg) == f.vectors
88+
@test f.values eigvals(a1_nsg, a2_nsg; sortby = sortfunc)
89+
@test prod(f.values) prod(eigvals(a1_nsg/a2_nsg, sortby = sortfunc)) atol=50000ε
90+
@test eigvecs(a1_nsg, a2_nsg; sortby = sortfunc) == f.vectors
9091
@test_throws ErrorException f.Z
9192

92-
d,v = eigen(a1_nsg, a2_nsg)
93+
d,v = eigen(a1_nsg, a2_nsg; sortby = sortfunc)
9394
@test d == f.values
9495
@test v == f.vectors
9596
end

stdlib/OpenBLAS_jll/Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
name = "OpenBLAS_jll"
22
uuid = "4536629a-c528-5b80-bd46-f80d51c5b363"
3-
version = "0.3.10+3"
3+
version = "0.3.13+1"
44

55
[deps]
66
CompilerSupportLibraries_jll = "e66e0078-7015-5450-92f7-15fbd957f2ae"

0 commit comments

Comments
 (0)