Skip to content

Commit ee84ee4

Browse files
arsenmronlieb
authored andcommitted
device-libs: Optimize odd/even integer checks in pow*
There are apparently some missing optimizations surrounding comparisons to the previous pseudo-enum. The compare of the conditional add of boolean compared to the constant 1 did not fold out. We would need to implement an optimization such as icmp eq (add (zext i1 x), (zext i1 y)), 1 => xor x, y which I filed here: llvm#64859 Just do this manually since it's more legible anyway. Saves 5 instructions for the f32 case. Change-Id: Iee7befb093561cf66b72a9df6b37d0cacb2154ee
1 parent 47ec0c9 commit ee84ee4

File tree

3 files changed

+46
-52
lines changed

3 files changed

+46
-52
lines changed

amd/device-libs/ocml/src/powD_base.h

Lines changed: 16 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -21,18 +21,18 @@ samesign(double x, double y)
2121
return ((xh ^ yh) & 0x80000000U) == 0;
2222
}
2323

24-
// Check if a double is an integral value, and whether it's even or
25-
// odd.
26-
//
27-
// status: 0=not integer, 1=odd, 2=even
28-
static int classify_integer(double ay)
24+
static bool is_integer(double ay)
2925
{
30-
int inty = BUILTIN_TRUNC_F64(ay) == ay;
31-
double half_ay = 0.5 * ay;
26+
return BUILTIN_TRUNC_F64(ay) == ay;
27+
}
28+
29+
static bool is_even_integer(double ay) {
30+
// Even integers are still integers after division by 2.
31+
return is_integer(0.5 * ay);
32+
}
3233

33-
// Even integers are still even after division by 2.
34-
inty += inty & (BUILTIN_TRUNC_F64(half_ay) == half_ay);
35-
return inty;
34+
static bool is_odd_integer(double ay) {
35+
return is_integer(ay) && !is_even_integer(ay);
3636
}
3737

3838
#if defined(COMPILING_POW)
@@ -44,19 +44,20 @@ MATH_MANGLE(pow)(double x, double y)
4444
double expylnx = MATH_PRIVATE(expep)(omul(y, MATH_PRIVATE(epln)(ax)));
4545

4646
double ay = BUILTIN_ABS_F64(y);
47-
int inty = classify_integer(ay);
48-
double ret = BUILTIN_COPYSIGN_F64(expylnx, ((inty == 1) & (x < 0.0)) ? -0.0 : 0.0);
47+
bool is_odd_y = is_odd_integer(ay);
48+
49+
double ret = BUILTIN_COPYSIGN_F64(expylnx, (is_odd_y & (x < 0.0)) ? -0.0 : 0.0);
4950

5051
// Now all the edge cases
51-
if (x < 0.0 && !inty)
52+
if (x < 0.0 && !is_integer(ay))
5253
ret = QNAN_F64;
5354

5455
if (BUILTIN_ISINF_F64(ay))
5556
ret = ax == 1.0 ? ax : (samesign(y, ax - 1.0) ? ay : 0.0);
5657

5758
if (BUILTIN_ISINF_F64(ax) || x == 0.0)
5859
ret = BUILTIN_COPYSIGN_F64((x == 0.0) ^ (y < 0.0) ? 0.0 : PINF_F64,
59-
inty == 1 ? x : 0.0);
60+
is_odd_y ? x : 0.0);
6061

6162
if (BUILTIN_ISUNORDERED_F64(x, y))
6263
ret = QNAN_F64;
@@ -77,9 +78,7 @@ MATH_MANGLE(powr)(double x, double y)
7778
double expylnx = MATH_PRIVATE(expep)(omul(y, MATH_PRIVATE(epln)(ax)));
7879

7980
double ay = BUILTIN_ABS_F64(y);
80-
int inty = classify_integer(ay);
81-
82-
double ret = BUILTIN_COPYSIGN_F64(expylnx, ((inty == 1) & (x < 0.0)) ? -0.0 : 0.0);
81+
double ret = BUILTIN_COPYSIGN_F64(expylnx, (is_odd_integer(ay) & (x < 0.0)) ? -0.0 : 0.0);
8382

8483
// Now all the edge cases
8584
double iz = y < 0.0 ? PINF_F64 : 0.0;

amd/device-libs/ocml/src/powF_base.h

Lines changed: 15 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -55,18 +55,18 @@ static float compute_expylnx_float(float ax, float y)
5555
return MATH_PRIVATE(expep)(omul(y, MATH_PRIVATE(epln)(ax)));
5656
}
5757

58-
// Check if a float is an integral value, and whether it's even or
59-
// odd.
60-
//
61-
// status: 0=not integer, 1=odd, 2=even
62-
static int classify_integer(float ay)
58+
static bool is_integer(float ay)
6359
{
64-
int inty = BUILTIN_TRUNC_F32(ay) == ay;
65-
float half_ay = 0.5f * ay;
60+
return BUILTIN_TRUNC_F32(ay) == ay;
61+
}
62+
63+
static bool is_even_integer(float ay) {
64+
// Even integers are still integers after division by 2.
65+
return is_integer(0.5f * ay);
66+
}
6667

67-
// Even integers are still even after division by 2.
68-
inty += inty & (BUILTIN_TRUNC_F32(half_ay) == half_ay);
69-
return inty;
68+
static bool is_odd_integer(float ay) {
69+
return is_integer(ay) && !is_even_integer(ay);
7070
}
7171

7272
#if defined(COMPILING_POW)
@@ -78,20 +78,20 @@ MATH_MANGLE(pow)(float x, float y)
7878
float expylnx = compute_expylnx_float(ax, y);
7979

8080
float ay = BUILTIN_ABS_F32(y);
81-
int inty = classify_integer(ay);
81+
bool is_odd_y = is_odd_integer(ay);
8282

83-
float ret = BUILTIN_COPYSIGN_F32(expylnx, ((inty == 1) & (x < 0.0f)) ? -0.0f : 0.0f);
83+
float ret = BUILTIN_COPYSIGN_F32(expylnx, (is_odd_y & (x < 0.0f)) ? -0.0f : 0.0f);
8484

8585
// Now all the edge cases
86-
if (x < 0.0f && !inty)
86+
if (x < 0.0f && !is_integer(ay))
8787
ret = QNAN_F32;
8888

8989
if (BUILTIN_ISINF_F32(ay))
9090
ret = ax == 1.0f ? ax : (samesign(y, ax - 1.0f) ? ay : 0.0f);
9191

9292
if (BUILTIN_ISINF_F32(ax) || x == 0.0f)
9393
ret = BUILTIN_COPYSIGN_F32((x == 0.0f) ^ (y < 0.0f) ? 0.0f : PINF_F32,
94-
inty == 1 ? x : 0.0f);
94+
is_odd_y ? x : 0.0f);
9595

9696
if (BUILTIN_ISUNORDERED_F32(x, y))
9797
ret = QNAN_F32;
@@ -111,9 +111,7 @@ MATH_MANGLE(powr)(float x, float y)
111111
float expylnx = compute_expylnx_float(ax, y);
112112

113113
float ay = BUILTIN_ABS_F32(y);
114-
int inty = classify_integer(ay);
115-
116-
float ret = BUILTIN_COPYSIGN_F32(expylnx, ((inty == 1) & (x < 0.0f)) ? -0.0f : 0.0f);
114+
float ret = BUILTIN_COPYSIGN_F32(expylnx, (is_odd_integer(ay) & (x < 0.0f)) ? -0.0f : 0.0f);
117115

118116
// Now all the edge cases
119117
float iz = y < 0.0f ? PINF_F32 : 0.0f;

amd/device-libs/ocml/src/powH_base.h

Lines changed: 15 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -16,18 +16,18 @@ static float compute_expylnx_f16(half ax, half y)
1616
return BUILTIN_AMDGPU_EXP2_F32((float)y * BUILTIN_AMDGPU_LOG2_F32((float)ax));
1717
}
1818

19-
// Check if a half is an integral value, and whether it's even or
20-
// odd.
21-
//
22-
// status: 0=not integer, 1=odd, 2=even
23-
static int classify_integer(half ay)
19+
static bool is_integer(half ay)
2420
{
25-
bool inty = BUILTIN_TRUNC_F16(ay) == ay;
26-
half half_ay = 0.5h * ay;
21+
return BUILTIN_TRUNC_F16(ay) == ay;
22+
}
23+
24+
static bool is_even_integer(half ay) {
25+
// Even integers are still integers after division by 2.
26+
return is_integer(0.5h * ay);
27+
}
2728

28-
// Even integers are still even after division by 2.
29-
inty += inty & (BUILTIN_TRUNC_F16(half_ay) == half_ay);
30-
return inty;
29+
static bool is_odd_integer(half ay) {
30+
return is_integer(ay) && !is_even_integer(ay);
3131
}
3232

3333
#if defined(COMPILING_POW)
@@ -39,20 +39,19 @@ MATH_MANGLE(pow)(half x, half y)
3939
float p = compute_expylnx_f16(ax, y);
4040

4141
half ay = BUILTIN_ABS_F16(y);
42-
int inty = classify_integer(ay);
43-
44-
half ret = BUILTIN_COPYSIGN_F16((half)p, ((inty == 1) & (x < 0.0h)) ? -0.0f : 0.0f);
42+
bool is_odd_y = is_odd_integer(ay);
43+
half ret = BUILTIN_COPYSIGN_F16((half)p, (is_odd_y & (x < 0.0h)) ? -0.0f : 0.0f);
4544

4645
// Now all the edge cases
47-
if (x < 0.0h && !inty)
46+
if (x < 0.0h && !is_integer(ay))
4847
ret = QNAN_F16;
4948

5049
if (BUILTIN_ISINF_F16(ay))
5150
ret = ax == 1.0h ? ax : (samesign(y, ax - 1.0h) ? ay : 0.0h);
5251

5352
if (BUILTIN_ISINF_F16(ax) || x == 0.0h)
5453
ret = BUILTIN_COPYSIGN_F16((x == 0.0h) ^ (y < 0.0h) ? 0.0h : PINF_F16,
55-
inty == 1 ? x : 0.0h);
54+
is_odd_y ? x : 0.0h);
5655

5756
if (BUILTIN_ISUNORDERED_F16(x, y))
5857
ret = QNAN_F16;
@@ -72,9 +71,7 @@ MATH_MANGLE(powr)(half x, half y)
7271
float p = compute_expylnx_f16(ax, y);
7372

7473
half ay = BUILTIN_ABS_F16(y);
75-
int inty = classify_integer(ay);
76-
77-
half ret = BUILTIN_COPYSIGN_F16((half)p, ((inty == 1) & (x < 0.0h)) ? -0.0f : 0.0f);
74+
half ret = BUILTIN_COPYSIGN_F16((half)p, (is_odd_integer(ay) & (x < 0.0h)) ? -0.0f : 0.0f);
7875

7976
// Now all the edge cases
8077
half iz = y < 0.0h ? PINF_F16 : 0.0h;

0 commit comments

Comments
 (0)