Skip to content

Commit 731a3eb

Browse files
author
Chris Boesch
committed
Merge pull request 'vectors-addition' (ratfactor#177) from vectors-addition into main
Reviewed-on: https://codeberg.org/ziglings/exercises/pulls/177 Thank you @BGThompson for this great exercise!
2 parents 150b3de + bfed660 commit 731a3eb

File tree

3 files changed

+167
-0
lines changed

3 files changed

+167
-0
lines changed

build.zig

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1201,6 +1201,13 @@ const exercises = [_]Exercise{
12011201
.main_file = "108_labeled_switch.zig",
12021202
.output = "The pull request has been merged.",
12031203
},
1204+
.{
1205+
.main_file = "109_vectors.zig",
1206+
.output =
1207+
\\Max difference (old fn): 0.014
1208+
\\Max difference (new fn): 0.014
1209+
,
1210+
},
12041211
.{
12051212
.main_file = "999_the_end.zig",
12061213
.output =

exercises/109_vectors.zig

Lines changed: 147 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,147 @@
1+
// So far in Ziglings, we've seen how for loops can be used to
2+
// repeat calculations across an array in several ways.
3+
//
4+
// For loops are generally great for this kind of task, but
5+
// sometimes they don't fully utilize the capabilities of the
6+
// CPU.
7+
//
8+
// Most modern CPUs can execute instructions in which SEVERAL
9+
// calculations are performed WITHIN registers at the SAME TIME.
10+
// These are known as "single instruction, multiple data" (SIMD)
11+
// instructions. SIMD instructions can make code significantly
12+
// more performant.
13+
//
14+
// To see why, imagine we have a program in which we take the
15+
// square root of four (changing) f32 floats.
16+
//
17+
// A simple compiler would take the program and produce machine code
18+
// which calculates each square root sequentially. Most registers on
19+
// modern CPUs have 64 bits, so we could imagine that each float moves
20+
// into a 64-bit register, and the following happens four times:
21+
//
22+
// 32 bits 32 bits
23+
// +-------------------+
24+
// register | 0 | x |
25+
// +-------------------+
26+
//
27+
// |
28+
// [SQRT instruction]
29+
// V
30+
//
31+
// +-------------------+
32+
// | 0 | sqrt(x) |
33+
// +-------------------+
34+
//
35+
// Notice that half of the register contains blank data to which
36+
// nothing happened. What a waste! What if we were able to use
37+
// that space instead? This is the idea at the core of SIMD.
38+
//
39+
// Most modern CPUs contain specialized registers with at least 128 bits
40+
// for performing SIMD instructions. On a machine with 128-bit SIMD
41+
// registers, a smart compiler would probably NOT issue four sqrt
42+
// instructions as above, but instead pack the floats into a single
43+
// 128-bit register, then execute a single "packed" sqrt
44+
// instruction to do ALL the square root calculations at once.
45+
//
46+
// For example:
47+
//
48+
//
49+
// 32 bits 32 bits 32 bits 32 bits
50+
// +---------------------------------------+
51+
// register | 4.0 | 9.0 | 25.0 | 49.0 |
52+
// +---------------------------------------+
53+
//
54+
// |
55+
// [SIMD SQRT instruction]
56+
// V
57+
//
58+
// +---------------------------------------+
59+
// register | 2.0 | 3.0 | 5.0 | 7.0 |
60+
// +---------------------------------------+
61+
//
62+
// Pretty cool, right?
63+
//
64+
// Code with SIMD instructions is usually more performant than code
65+
// without SIMD instructions. Zig cares a lot about performance,
66+
// so it has built-in support for SIMD! It has a data structure that
67+
// directly supports SIMD instructions:
68+
//
69+
// +-----------+
70+
// | Vectors |
71+
// +-----------+
72+
//
73+
// Operations performed on vectors in Zig will be done in parallel using
74+
// SIMD instructions, whenever possible.
75+
//
76+
// Defining vectors in Zig is straightforwards. No library import is needed.
77+
const v1 = @Vector(3, i32){ 1, 10, 100 };
78+
const v2 = @Vector(3, f32){ 2.0, 3.0, 5.0 };
79+
80+
// Vectors support the same builtin operators as their underlying base types.
81+
const v3 = v1 + v1; // { 2, 20, 200};
82+
const v4 = v2 * v2; // { 4.0, 9.0, 25.0};
83+
84+
// Intrinsics that apply to base types usually extend to vectors.
85+
const v5: @Vector(3, f32) = @floatFromInt(v3); // { 2.0, 20.0, 200.0}
86+
const v6 = v4 - v5; // { 2.0, -11.0, -175.0}
87+
const v7 = @abs(v6); // { 2.0, 11.0, 175.0}
88+
89+
// We can make constant vectors, and reduce vectors.
90+
const v8: @Vector(4, u8) = @splat(2); // { 2, 2, 2, 2}
91+
const v8_sum = @reduce(.Add, v8); // 8
92+
const v8_min = @reduce(.Min, v8); // 2
93+
94+
// Fixed-length arrays can be automatically assigned to vectors (and vice-versa).
95+
const single_digit_primes = [4]i8{ 2, 3, 5, 7 };
96+
const prime_vector: @Vector(4, i8) = single_digit_primes;
97+
98+
// Now let's use vectors to simplify and optimize some code!
99+
//
100+
// Ewa is writing a program in which they frequently want to compare
101+
// two lists of four f32s. Ewa expects the lists to be similar, and
102+
// wants to determine the largest pairwise difference between the lists.
103+
//
104+
// Ewa wrote the following function to figure this out.
105+
106+
fn calcMaxPairwiseDiffOld(list1: [4]f32, list2: [4]f32) f32 {
107+
var max_diff: f32 = 0;
108+
for (list1, list2) |n1, n2| {
109+
const abs_diff = @abs(n1 - n2);
110+
if (abs_diff > max_diff) {
111+
max_diff = abs_diff;
112+
}
113+
}
114+
return max_diff;
115+
}
116+
117+
// Ewa heard about vectors in Zig, and started writing a new vector
118+
// version of the function, but has got stuck!
119+
//
120+
// Help Ewa finish the vector version! The examples above should help.
121+
122+
const Vec4 = @Vector(4, f32);
123+
fn calcMaxPairwiseDiffNew(a: Vec4, b: Vec4) f32 {
124+
const abs_diff_vec = ???;
125+
const max_diff = @reduce(???, abs_diff_vec);
126+
return max_diff;
127+
}
128+
129+
// Quite the simplification! We could even write the function in one line
130+
// and it would still be readable.
131+
//
132+
// Since the entire function is now expressed in terms of vector operations,
133+
// the Zig compiler will easily be able to compile it down to machine code
134+
// which utilizes the all-powerful SIMD instructions and does a lot of the
135+
// computation in parallel.
136+
137+
const std = @import("std");
138+
const print = std.debug.print;
139+
140+
pub fn main() void {
141+
const l1 = [4]f32{ 3.141, 2.718, 0.577, 1.000 };
142+
const l2 = [4]f32{ 3.154, 2.707, 0.591, 0.993 };
143+
const mpd_old = calcMaxPairwiseDiffOld(l1, l2);
144+
const mpd_new = calcMaxPairwiseDiffNew(l1, l2);
145+
print("Max difference (old fn): {d: >5.3}\n", .{mpd_old});
146+
print("Max difference (new fn): {d: >5.3}\n", .{mpd_new});
147+
}

patches/patches/109_vectors.patch

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
--- exercises/109_vectors.zig 2024-11-07 14:57:09.673383618 +0100
2+
+++ answers/109_vectors.zig 2024-11-07 14:22:59.069150138 +0100
3+
@@ -121,8 +121,8 @@
4+
5+
const Vec4 = @Vector(4, f32);
6+
fn calcMaxPairwiseDiffNew(a: Vec4, b: Vec4) f32 {
7+
- const abs_diff_vec = ???;
8+
- const max_diff = @reduce(???, abs_diff_vec);
9+
+ const abs_diff_vec = @abs(a - b);
10+
+ const max_diff = @reduce(.Max, abs_diff_vec);
11+
return max_diff;
12+
}
13+

0 commit comments

Comments
 (0)