Skip to content

Commit 427e05b

Browse files
authored
Merge pull request #527 from weslleyspereira/try-xROTG-from-Ed-Anderson
Try: safe scaling ROTG
2 parents 77a0ceb + 5a929d8 commit 427e05b

File tree

11 files changed

+768
-408
lines changed

11 files changed

+768
-408
lines changed

BLAS/SRC/CMakeLists.txt

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,16 +29,16 @@
2929
# Level 1 BLAS
3030
#---------------------------------------------------------
3131
set(SBLAS1 isamax.f sasum.f saxpy.f scopy.f sdot.f snrm2.f
32-
srot.f srotg.f sscal.f sswap.f sdsdot.f srotmg.f srotm.f)
32+
srot.f srotg.f90 sscal.f sswap.f sdsdot.f srotmg.f srotm.f)
3333

3434
set(CBLAS1 scabs1.f scasum.f scnrm2.f icamax.f caxpy.f ccopy.f
35-
cdotc.f cdotu.f csscal.f crotg.f cscal.f cswap.f csrot.f)
35+
cdotc.f cdotu.f csscal.f crotg.f90 cscal.f cswap.f csrot.f)
3636

3737
set(DBLAS1 idamax.f dasum.f daxpy.f dcopy.f ddot.f dnrm2.f
38-
drot.f drotg.f dscal.f dsdot.f dswap.f drotmg.f drotm.f)
38+
drot.f drotg.f90 dscal.f dsdot.f dswap.f drotmg.f drotm.f)
3939

4040
set(ZBLAS1 dcabs1.f dzasum.f dznrm2.f izamax.f zaxpy.f zcopy.f
41-
zdotc.f zdotu.f zdscal.f zrotg.f zscal.f zswap.f zdrot.f)
41+
zdotc.f zdotu.f zdscal.f zrotg.f90 zscal.f zswap.f zdrot.f)
4242

4343
set(CB1AUX isamax.f sasum.f saxpy.f scopy.f snrm2.f sscal.f)
4444

BLAS/SRC/Makefile

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,10 @@
5656
TOPSRCDIR = ../..
5757
include $(TOPSRCDIR)/make.inc
5858

59+
.SUFFIXES: .f90 .o
60+
.f90.o:
61+
$(FC) $(FFLAGS) -c -o $@ $<
62+
5963
.PHONY: all
6064
all: $(BLASLIB)
6165

BLAS/SRC/crotg.f

Lines changed: 0 additions & 94 deletions
This file was deleted.

BLAS/SRC/crotg.f90

Lines changed: 229 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,229 @@
1+
!> \brief \b CROTG
2+
!
3+
! =========== DOCUMENTATION ===========
4+
!
5+
! Online html documentation available at
6+
! http://www.netlib.org/lapack/explore-html/
7+
!
8+
! Definition:
9+
! ===========
10+
!
11+
! CROTG constructs a plane rotation
12+
! [ c s ] [ a ] = [ r ]
13+
! [ -conjg(s) c ] [ b ] [ 0 ]
14+
! where c is real, s ic complex, and c**2 + conjg(s)*s = 1.
15+
!
16+
!> \par Purpose:
17+
! =============
18+
!>
19+
!> \verbatim
20+
!>
21+
!> The computation uses the formulas
22+
!> |x| = sqrt( Re(x)**2 + Im(x)**2 )
23+
!> sgn(x) = x / |x| if x /= 0
24+
!> = 1 if x = 0
25+
!> c = |a| / sqrt(|a|**2 + |b|**2)
26+
!> s = sgn(a) * conjg(b) / sqrt(|a|**2 + |b|**2)
27+
!> When a and b are real and r /= 0, the formulas simplify to
28+
!> r = sgn(a)*sqrt(|a|**2 + |b|**2)
29+
!> c = a / r
30+
!> s = b / r
31+
!> the same as in CROTG when |a| > |b|. When |b| >= |a|, the
32+
!> sign of c and s will be different from those computed by CROTG
33+
!> if the signs of a and b are not the same.
34+
!>
35+
!> \endverbatim
36+
!
37+
! Arguments:
38+
! ==========
39+
!
40+
!> \param[in,out] A
41+
!> \verbatim
42+
!> A is COMPLEX
43+
!> On entry, the scalar a.
44+
!> On exit, the scalar r.
45+
!> \endverbatim
46+
!>
47+
!> \param[in] B
48+
!> \verbatim
49+
!> B is COMPLEX
50+
!> The scalar b.
51+
!> \endverbatim
52+
!>
53+
!> \param[out] C
54+
!> \verbatim
55+
!> C is REAL
56+
!> The scalar c.
57+
!> \endverbatim
58+
!>
59+
!> \param[out] S
60+
!> \verbatim
61+
!> S is REAL
62+
!> The scalar s.
63+
!> \endverbatim
64+
!
65+
! Authors:
66+
! ========
67+
!
68+
!> \author Edward Anderson, Lockheed Martin
69+
!
70+
!> \par Contributors:
71+
! ==================
72+
!>
73+
!> Weslley Pereira, University of Colorado Denver, USA
74+
!
75+
!> \ingroup single_blas_level1
76+
!
77+
!> \par Further Details:
78+
! =====================
79+
!>
80+
!> \verbatim
81+
!>
82+
!> Anderson E. (2017)
83+
!> Algorithm 978: Safe Scaling in the Level 1 BLAS
84+
!> ACM Trans Math Softw 44:1--28
85+
!> https://doi.org/10.1145/3061665
86+
!>
87+
!> \endverbatim
88+
!
89+
! =====================================================================
90+
subroutine CROTG( a, b, c, s )
91+
integer, parameter :: wp = kind(1.e0)
92+
!
93+
! -- Reference BLAS level1 routine --
94+
! -- Reference BLAS is a software package provided by Univ. of Tennessee, --
95+
! -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
96+
!
97+
! .. Constants ..
98+
real(wp), parameter :: zero = 0.0_wp
99+
real(wp), parameter :: one = 1.0_wp
100+
complex(wp), parameter :: czero = 0.0_wp
101+
! ..
102+
! .. Scaling constants ..
103+
real(wp), parameter :: safmin = real(radix(0._wp),wp)**max( &
104+
minexponent(0._wp)-1, &
105+
1-maxexponent(0._wp) &
106+
)
107+
real(wp), parameter :: safmax = real(radix(0._wp),wp)**max( &
108+
1-minexponent(0._wp), &
109+
maxexponent(0._wp)-1 &
110+
)
111+
real(wp), parameter :: rtmin = sqrt( real(radix(0._wp),wp)**max( &
112+
minexponent(0._wp)-1, &
113+
1-maxexponent(0._wp) &
114+
) / epsilon(0._wp) )
115+
real(wp), parameter :: rtmax = sqrt( real(radix(0._wp),wp)**max( &
116+
1-minexponent(0._wp), &
117+
maxexponent(0._wp)-1 &
118+
) * epsilon(0._wp) )
119+
! ..
120+
! .. Scalar Arguments ..
121+
real(wp) :: c
122+
complex(wp) :: a, b, s
123+
! ..
124+
! .. Local Scalars ..
125+
real(wp) :: d, f1, f2, g1, g2, h2, p, u, uu, v, vv, w
126+
complex(wp) :: f, fs, g, gs, r, t
127+
! ..
128+
! .. Intrinsic Functions ..
129+
intrinsic :: abs, aimag, conjg, max, min, real, sqrt
130+
! ..
131+
! .. Statement Functions ..
132+
real(wp) :: ABSSQ
133+
! ..
134+
! .. Statement Function definitions ..
135+
ABSSQ( t ) = real( t )**2 + aimag( t )**2
136+
! ..
137+
! .. Executable Statements ..
138+
!
139+
f = a
140+
g = b
141+
if( g == czero ) then
142+
c = one
143+
s = czero
144+
r = f
145+
else if( f == czero ) then
146+
c = zero
147+
g1 = max( abs(real(g)), abs(aimag(g)) )
148+
if( g1 > rtmin .and. g1 < rtmax ) then
149+
!
150+
! Use unscaled algorithm
151+
!
152+
g2 = ABSSQ( g )
153+
d = sqrt( g2 )
154+
s = conjg( g ) / d
155+
r = d
156+
else
157+
!
158+
! Use scaled algorithm
159+
!
160+
u = min( safmax, max( safmin, g1 ) )
161+
uu = one / u
162+
gs = g*uu
163+
g2 = ABSSQ( gs )
164+
d = sqrt( g2 )
165+
s = conjg( gs ) / d
166+
r = d*u
167+
end if
168+
else
169+
f1 = max( abs(real(f)), abs(aimag(f)) )
170+
g1 = max( abs(real(g)), abs(aimag(g)) )
171+
if( f1 > rtmin .and. f1 < rtmax .and. &
172+
g1 > rtmin .and. g1 < rtmax ) then
173+
!
174+
! Use unscaled algorithm
175+
!
176+
f2 = ABSSQ( f )
177+
g2 = ABSSQ( g )
178+
h2 = f2 + g2
179+
if( f2 > rtmin .and. h2 < rtmax ) then
180+
d = sqrt( f2*h2 )
181+
else
182+
d = sqrt( f2 )*sqrt( h2 )
183+
end if
184+
p = 1 / d
185+
c = f2*p
186+
s = conjg( g )*( f*p )
187+
r = f*( h2*p )
188+
else
189+
!
190+
! Use scaled algorithm
191+
!
192+
u = min( safmax, max( safmin, f1, g1 ) )
193+
uu = one / u
194+
gs = g*uu
195+
g2 = ABSSQ( gs )
196+
if( f1*uu < rtmin ) then
197+
!
198+
! f is not well-scaled when scaled by g1.
199+
! Use a different scaling for f.
200+
!
201+
v = min( safmax, max( safmin, f1 ) )
202+
vv = one / v
203+
w = v * uu
204+
fs = f*vv
205+
f2 = ABSSQ( fs )
206+
h2 = f2*w**2 + g2
207+
else
208+
!
209+
! Otherwise use the same scaling for f and g.
210+
!
211+
w = one
212+
fs = f*uu
213+
f2 = ABSSQ( fs )
214+
h2 = f2 + g2
215+
end if
216+
if( f2 > rtmin .and. h2 < rtmax ) then
217+
d = sqrt( f2*h2 )
218+
else
219+
d = sqrt( f2 )*sqrt( h2 )
220+
end if
221+
p = 1 / d
222+
c = ( f2*p )*w
223+
s = conjg( gs )*( fs*p )
224+
r = ( fs*( h2*p ) )*u
225+
end if
226+
end if
227+
a = r
228+
return
229+
end subroutine

0 commit comments

Comments
 (0)