Skip to content
This repository was archived by the owner on Feb 25, 2025. It is now read-only.

Commit 83e22b5

Browse files
[Impeller] migrate Gaussian shaders to half precision. (#40752)
[Impeller] migrate Gaussian shaders to half precision.
1 parent 1e73744 commit 83e22b5

File tree

9 files changed

+202
-186
lines changed

9 files changed

+202
-186
lines changed

impeller/compiler/shader_lib/impeller/gaussian.glsl

Lines changed: 21 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -6,51 +6,53 @@
66
#define GAUSSIAN_GLSL_
77

88
#include <impeller/constants.glsl>
9+
#include <impeller/types.glsl>
910

1011
/// Gaussian distribution function.
11-
float IPGaussian(float x, float sigma) {
12-
float variance = sigma * sigma;
13-
return exp(-0.5 * x * x / variance) / (kSqrtTwoPi * sigma);
12+
float16_t IPGaussian(float16_t x, float16_t sigma) {
13+
float16_t variance = sigma * sigma;
14+
return exp(-0.5hf * x * x / variance) / (float16_t(kSqrtTwoPi) * sigma);
1415
}
1516

1617
/// Abramowitz and Stegun erf approximation.
17-
float IPErf(float x) {
18-
float a = abs(x);
18+
float16_t IPErf(float16_t x) {
19+
float16_t a = abs(x);
1920
// 0.278393*x + 0.230389*x^2 + 0.078108*x^4 + 1
20-
float b = (0.278393 + (0.230389 + 0.078108 * a * a) * a) * a + 1.0;
21-
return sign(x) * (1 - 1 / (b * b * b * b));
21+
float16_t b =
22+
(0.278393hf + (0.230389hf + 0.078108hf * a * a) * a) * a + 1.0hf;
23+
return sign(x) * (1.0hf - 1.0hf / (b * b * b * b));
2224
}
2325

2426
/// Vec2 variation for the Abramowitz and Stegun erf approximation.
25-
vec2 IPVec2Erf(vec2 x) {
26-
vec2 a = abs(x);
27+
f16vec2 IPVec2Erf(f16vec2 x) {
28+
f16vec2 a = abs(x);
2729
// 0.278393*x + 0.230389*x^2 + 0.078108*x^4 + 1
28-
vec2 b = (0.278393 + (0.230389 + 0.078108 * a * a) * a) * a + 1.0;
29-
return sign(x) * (1 - 1 / (b * b * b * b));
30+
f16vec2 b = (0.278393hf + (0.230389hf + 0.078108hf * a * a) * a) * a + 1.0hf;
31+
return sign(x) * (1.0hf - 1.0hf / (b * b * b * b));
3032
}
3133

3234
/// The indefinite integral of the Gaussian function.
3335
/// Uses a very close approximation of Erf.
34-
float IPGaussianIntegral(float x, float sigma) {
36+
float16_t IPGaussianIntegral(float16_t x, float16_t sigma) {
3537
// ( 1 + erf( x * (sqrt(2) / (2 * sigma) ) ) / 2
36-
return (1 + IPErf(x * (kHalfSqrtTwo / sigma))) * 0.5;
38+
return (1.0hf + IPErf(x * (float16_t(kHalfSqrtTwo) / sigma))) * 0.5hf;
3739
}
3840

3941
/// Vec2 variation for the indefinite integral of the Gaussian function.
4042
/// Uses a very close approximation of Erf.
41-
vec2 IPVec2GaussianIntegral(vec2 x, float sigma) {
43+
f16vec2 IPVec2GaussianIntegral(f16vec2 x, float16_t sigma) {
4244
// ( 1 + erf( x * (sqrt(2) / (2 * sigma) ) ) / 2
43-
return (1 + IPVec2Erf(x * (kHalfSqrtTwo / sigma))) * 0.5;
45+
return (1.0hf + IPVec2Erf(x * (float16_t(kHalfSqrtTwo) / sigma))) * 0.5hf;
4446
}
4547

4648
/// Simpler (but less accurate) approximation of the Gaussian integral.
47-
vec2 IPVec2FastGaussianIntegral(vec2 x, float sigma) {
48-
return 1 / (1 + exp(-kSqrtThree / sigma * x));
49+
f16vec2 IPVec2FastGaussianIntegral(f16vec2 x, float16_t sigma) {
50+
return 1.0hf / (1.0hf + exp(float16_t(-kSqrtThree) / sigma * x));
4951
}
5052

5153
/// Simple logistic sigmoid with a domain of [-1, 1] and range of [0, 1].
52-
float IPSigmoid(float x) {
53-
return 1.03731472073 / (1 + exp(-4 * x)) - 0.0186573603638;
54+
float16_t IPSigmoid(float16_t x) {
55+
return 1.03731472073hf / (1.0hf + exp(-4.0hf * x)) - 0.0186573603638hf;
5456
}
5557

5658
#endif

impeller/compiler/shader_lib/impeller/texture.glsl

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,15 @@ vec4 IPSampleDecal(sampler2D texture_sampler, vec2 coords) {
143143
return texture(texture_sampler, coords);
144144
}
145145

146+
/// Sample a texture with decal tile mode.
147+
f16vec4 IPHalfSampleDecal(f16sampler2D texture_sampler, f16vec2 coords) {
148+
if (any(lessThan(coords, f16vec2(0.0hf))) ||
149+
any(greaterThanEqual(coords, f16vec2(1.0)))) {
150+
return f16vec4(0.0);
151+
}
152+
return texture(texture_sampler, coords);
153+
}
154+
146155
/// Sample a texture, emulating a specific tile mode.
147156
///
148157
/// This is useful for Impeller graphics backend that don't have native support

impeller/entity/shaders/border_mask_blur.frag

Lines changed: 21 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -15,42 +15,42 @@
1515
// integral (using an erf approximation) to the 4 edges of the UV rectangle and
1616
// multiplying them.
1717

18-
uniform sampler2D texture_sampler;
18+
uniform f16sampler2D texture_sampler;
1919

2020
uniform FragInfo {
21-
float src_factor;
22-
float inner_blur_factor;
23-
float outer_blur_factor;
21+
float16_t src_factor;
22+
float16_t inner_blur_factor;
23+
float16_t outer_blur_factor;
2424

25-
vec2 sigma_uv;
25+
f16vec2 sigma_uv;
2626
}
2727
frag_info;
2828

29-
in vec2 v_texture_coords;
29+
in f16vec2 v_texture_coords;
3030

31-
out vec4 frag_color;
31+
out f16vec4 frag_color;
3232

33-
float BoxBlurMask(vec2 uv) {
33+
float16_t BoxBlurMask(f16vec2 uv) {
3434
// LTRB
35-
return IPGaussianIntegral(uv.x, frag_info.sigma_uv.x) * //
36-
IPGaussianIntegral(uv.y, frag_info.sigma_uv.y) * //
37-
IPGaussianIntegral(1 - uv.x, frag_info.sigma_uv.x) * //
38-
IPGaussianIntegral(1 - uv.y, frag_info.sigma_uv.y);
35+
return IPGaussianIntegral(uv.x, frag_info.sigma_uv.x) * //
36+
IPGaussianIntegral(uv.y, frag_info.sigma_uv.y) * //
37+
IPGaussianIntegral(1.0hf - uv.x, frag_info.sigma_uv.x) * //
38+
IPGaussianIntegral(1.0hf - uv.y, frag_info.sigma_uv.y);
3939
}
4040

4141
void main() {
42-
vec4 image_color = texture(texture_sampler, v_texture_coords);
43-
float blur_factor = BoxBlurMask(v_texture_coords);
42+
f16vec4 image_color = texture(texture_sampler, v_texture_coords);
43+
float16_t blur_factor = BoxBlurMask(v_texture_coords);
4444

45-
float within_bounds =
46-
float(v_texture_coords.x >= 0 && v_texture_coords.y >= 0 &&
47-
v_texture_coords.x < 1 && v_texture_coords.y < 1);
48-
float inner_factor =
45+
float16_t within_bounds =
46+
float16_t(v_texture_coords.x >= 0.0hf && v_texture_coords.y >= 0.0hf &&
47+
v_texture_coords.x < 1.0hf && v_texture_coords.y < 1.0hf);
48+
float16_t inner_factor =
4949
(frag_info.inner_blur_factor * blur_factor + frag_info.src_factor) *
5050
within_bounds;
51-
float outer_factor =
52-
frag_info.outer_blur_factor * blur_factor * (1 - within_bounds);
51+
float16_t outer_factor =
52+
frag_info.outer_blur_factor * blur_factor * (1.0hf - within_bounds);
5353

54-
float mask_factor = inner_factor + outer_factor;
54+
float16_t mask_factor = inner_factor + outer_factor;
5555
frag_color = image_color * mask_factor;
5656
}

impeller/entity/shaders/border_mask_blur.vert

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,10 @@ frame_info;
1515
in vec2 vertices;
1616
in vec2 texture_coords;
1717

18-
out vec2 v_texture_coords;
18+
out f16vec2 v_texture_coords;
1919

2020
void main() {
2121
gl_Position = frame_info.mvp * vec4(vertices, 0.0, 1.0);
22-
v_texture_coords =
23-
IPRemapCoords(texture_coords, frame_info.texture_sampler_y_coord_scale);
22+
v_texture_coords = f16vec2(
23+
IPRemapCoords(texture_coords, frame_info.texture_sampler_y_coord_scale));
2424
}

impeller/entity/shaders/gaussian_blur/gaussian_blur.glsl

Lines changed: 24 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -18,52 +18,52 @@
1818
#include <impeller/texture.glsl>
1919
#include <impeller/types.glsl>
2020

21-
uniform sampler2D texture_sampler;
21+
uniform f16sampler2D texture_sampler;
2222

2323
uniform BlurInfo {
24-
vec2 texture_size;
25-
vec2 blur_direction;
24+
f16vec2 texture_size;
25+
f16vec2 blur_direction;
2626

2727
// The blur sigma and radius have a linear relationship which is defined
2828
// host-side, but both are useful controls here. Sigma (pixels per standard
2929
// deviation) is used to define the gaussian function itself, whereas the
3030
// radius is used to limit how much of the function is integrated.
31-
float blur_sigma;
32-
float blur_radius;
31+
float16_t blur_sigma;
32+
float16_t blur_radius;
3333
}
3434
blur_info;
3535

3636
#if ENABLE_ALPHA_MASK
37-
uniform sampler2D alpha_mask_sampler;
37+
uniform f16sampler2D alpha_mask_sampler;
3838

3939
uniform MaskInfo {
40-
float src_factor;
41-
float inner_blur_factor;
42-
float outer_blur_factor;
40+
float16_t src_factor;
41+
float16_t inner_blur_factor;
42+
float16_t outer_blur_factor;
4343
}
4444
mask_info;
4545
#endif
4646

47-
vec4 Sample(sampler2D tex, vec2 coords) {
47+
f16vec4 Sample(f16sampler2D tex, f16vec2 coords) {
4848
#if ENABLE_DECAL_SPECIALIZATION
49-
return IPSampleDecal(tex, coords);
49+
return IPHalfSampleDecal(tex, coords);
5050
#else
5151
return texture(tex, coords);
5252
#endif
5353
}
5454

55-
in vec2 v_texture_coords;
56-
in vec2 v_src_texture_coords;
55+
in f16vec2 v_texture_coords;
56+
in f16vec2 v_src_texture_coords;
5757

58-
out vec4 frag_color;
58+
out f16vec4 frag_color;
5959

6060
void main() {
61-
vec4 total_color = vec4(0);
62-
float gaussian_integral = 0;
63-
vec2 blur_uv_offset = blur_info.blur_direction / blur_info.texture_size;
61+
f16vec4 total_color = f16vec4(0.0hf);
62+
float16_t gaussian_integral = 0.0hf;
63+
f16vec2 blur_uv_offset = blur_info.blur_direction / blur_info.texture_size;
6464

65-
for (float i = -blur_info.blur_radius; i <= blur_info.blur_radius; i++) {
66-
float gaussian = IPGaussian(i, blur_info.blur_sigma);
65+
for (float16_t i = -blur_info.blur_radius; i <= blur_info.blur_radius; i++) {
66+
float16_t gaussian = IPGaussian(i, blur_info.blur_sigma);
6767
gaussian_integral += gaussian;
6868
total_color +=
6969
gaussian *
@@ -75,11 +75,12 @@ void main() {
7575
frag_color = total_color / gaussian_integral;
7676

7777
#if ENABLE_ALPHA_MASK
78-
vec4 src_color = Sample(alpha_mask_sampler, // sampler
79-
v_src_texture_coords // texture coordinates
78+
f16vec4 src_color = Sample(alpha_mask_sampler, // sampler
79+
v_src_texture_coords // texture coordinates
8080
);
81-
float blur_factor = mask_info.inner_blur_factor * float(src_color.a > 0) +
82-
mask_info.outer_blur_factor * float(src_color.a == 0);
81+
float16_t blur_factor =
82+
mask_info.inner_blur_factor * float16_t(src_color.a > 0.0hf) +
83+
mask_info.outer_blur_factor * float16_t(src_color.a == 0.0hf);
8384

8485
frag_color = frag_color * blur_factor + src_color * mask_info.src_factor;
8586
#endif

impeller/entity/shaders/gaussian_blur/gaussian_blur.vert

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,13 +16,13 @@ in vec2 vertices;
1616
in vec2 texture_coords;
1717
in vec2 src_texture_coords;
1818

19-
out vec2 v_texture_coords;
20-
out vec2 v_src_texture_coords;
19+
out f16vec2 v_texture_coords;
20+
out f16vec2 v_src_texture_coords;
2121

2222
void main() {
2323
gl_Position = frame_info.mvp * vec4(vertices, 0.0, 1.0);
24-
v_texture_coords =
25-
IPRemapCoords(texture_coords, frame_info.texture_sampler_y_coord_scale);
26-
v_src_texture_coords = IPRemapCoords(
27-
src_texture_coords, frame_info.alpha_mask_sampler_y_coord_scale);
24+
v_texture_coords = f16vec2(
25+
IPRemapCoords(texture_coords, frame_info.texture_sampler_y_coord_scale));
26+
v_src_texture_coords = f16vec2(IPRemapCoords(
27+
src_texture_coords, frame_info.alpha_mask_sampler_y_coord_scale));
2828
}

impeller/entity/shaders/rrect_blur.frag

Lines changed: 32 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -6,58 +6,61 @@
66
#include <impeller/types.glsl>
77

88
uniform FragInfo {
9-
vec4 color;
10-
float blur_sigma;
11-
vec2 rect_size;
12-
float corner_radius;
9+
f16vec4 color;
10+
f16vec2 rect_size;
11+
float16_t blur_sigma;
12+
float16_t corner_radius;
1313
}
1414
frag_info;
1515

16-
in vec2 v_position;
16+
in f16vec2 v_position;
1717

18-
out vec4 frag_color;
18+
out f16vec4 frag_color;
1919

2020
const int kSampleCount = 4;
2121

22-
float RRectDistance(vec2 sample_position, vec2 half_size) {
23-
vec2 space = abs(sample_position) - half_size + frag_info.corner_radius;
24-
return length(max(space, 0.0)) + min(max(space.x, space.y), 0.0) -
25-
frag_info.corner_radius;
22+
float16_t RRectDistance(f16vec2 sample_position, f16vec2 half_size) {
23+
f16vec2 space = abs(sample_position) - half_size + frag_info.corner_radius;
24+
return length(max(space, float16_t(0.0hf))) +
25+
min(max(space.x, space.y), float16_t(0.0hf)) - frag_info.corner_radius;
2626
}
2727

2828
/// Closed form unidirectional rounded rect blur mask solution using the
2929
/// analytical Gaussian integral (with approximated erf).
30-
float RRectShadowX(vec2 sample_position, vec2 half_size) {
30+
float16_t RRectShadowX(f16vec2 sample_position, f16vec2 half_size) {
3131
// Compute the X direction distance field (not incorporating the Y distance)
3232
// for the rounded rect.
33-
float space =
34-
min(0, half_size.y - frag_info.corner_radius - abs(sample_position.y));
35-
float rrect_distance =
33+
float16_t space =
34+
min(float16_t(0.0hf),
35+
half_size.y - frag_info.corner_radius - abs(sample_position.y));
36+
float16_t rrect_distance =
3637
half_size.x - frag_info.corner_radius +
37-
sqrt(max(0, frag_info.corner_radius * frag_info.corner_radius -
38-
space * space));
38+
sqrt(max(
39+
float16_t(0.0hf),
40+
frag_info.corner_radius * frag_info.corner_radius - space * space));
3941

4042
// Map the linear distance field to the approximate Gaussian integral.
41-
vec2 integral = IPVec2FastGaussianIntegral(
42-
sample_position.x + vec2(-rrect_distance, rrect_distance),
43+
f16vec2 integral = IPVec2FastGaussianIntegral(
44+
sample_position.x + f16vec2(-rrect_distance, rrect_distance),
4345
frag_info.blur_sigma);
4446
return integral.y - integral.x;
4547
}
4648

47-
float RRectShadow(vec2 sample_position, vec2 half_size) {
49+
float16_t RRectShadow(f16vec2 sample_position, f16vec2 half_size) {
4850
// Limit the sampling range to 3 standard deviations in the Y direction from
4951
// the kernel center to incorporate 99.7% of the color contribution.
50-
float half_sampling_range = frag_info.blur_sigma * 3;
52+
float16_t half_sampling_range = frag_info.blur_sigma * 3.0hf;
5153

52-
float begin_y = max(-half_sampling_range, sample_position.y - half_size.y);
53-
float end_y = min(half_sampling_range, sample_position.y + half_size.y);
54-
float interval = (end_y - begin_y) / kSampleCount;
54+
float16_t begin_y =
55+
max(-half_sampling_range, sample_position.y - half_size.y);
56+
float16_t end_y = min(half_sampling_range, sample_position.y + half_size.y);
57+
float16_t interval = (end_y - begin_y) / float16_t(kSampleCount);
5558

5659
// Sample the X blur kSampleCount times, weighted by the Gaussian function.
57-
float result = 0;
60+
float16_t result = 0.0hf;
5861
for (int sample_i = 0; sample_i < kSampleCount; sample_i++) {
59-
float y = begin_y + interval * (sample_i + 0.5);
60-
result += RRectShadowX(vec2(sample_position.x, sample_position.y - y),
62+
float16_t y = begin_y + interval * (float16_t(sample_i) + 0.5hf);
63+
result += RRectShadowX(f16vec2(sample_position.x, sample_position.y - y),
6164
half_size) *
6265
IPGaussian(y, frag_info.blur_sigma) * interval;
6366
}
@@ -68,10 +71,10 @@ float RRectShadow(vec2 sample_position, vec2 half_size) {
6871
void main() {
6972
frag_color = frag_info.color;
7073

71-
vec2 half_size = frag_info.rect_size * 0.5;
72-
vec2 sample_position = v_position - half_size;
74+
f16vec2 half_size = frag_info.rect_size * 0.5hf;
75+
f16vec2 sample_position = v_position - half_size;
7376

74-
if (frag_info.blur_sigma > 0) {
77+
if (frag_info.blur_sigma > 0.0hf) {
7578
frag_color *= RRectShadow(sample_position, half_size);
7679
} else {
7780
frag_color *= -RRectDistance(sample_position, half_size);

impeller/entity/shaders/rrect_blur.vert

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,10 @@ frame_info;
1111

1212
in vec2 position;
1313

14-
out vec2 v_position;
14+
out f16vec2 v_position;
1515

1616
void main() {
1717
gl_Position = frame_info.mvp * vec4(position, 0.0, 1.0);
1818
// The fragment stage uses local coordinates to compute the blur.
19-
v_position = position;
19+
v_position = f16vec2(position);
2020
}

0 commit comments

Comments
 (0)