From c90d6015e3c5405d3ac0c6d225d8a6041035f1b9 Mon Sep 17 00:00:00 2001 From: bgthompson Date: Sun, 3 Nov 2024 11:16:00 +1000 Subject: [PATCH 1/5] created a new exercise about vectors in zig, gave it number 109 --- build.zig | 7 ++ exercises/109_vectors.zig | 149 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 156 insertions(+) create mode 100644 exercises/109_vectors.zig diff --git a/build.zig b/build.zig index a1f6029..e528552 100644 --- a/build.zig +++ b/build.zig @@ -1201,6 +1201,13 @@ const exercises = [_]Exercise{ .main_file = "108_labeled_switch.zig", .output = "The pull request has been merged.", }, + .{ + .main_file = "109_vectors.zig", + .output = + \\Max difference (old fn): 0.014 + \\Max difference (new fn): 0.014 + , + }, .{ .main_file = "999_the_end.zig", .output = diff --git a/exercises/109_vectors.zig b/exercises/109_vectors.zig new file mode 100644 index 0000000..9e4a2a0 --- /dev/null +++ b/exercises/109_vectors.zig @@ -0,0 +1,149 @@ +// So far in Ziglings, we've seen how for loops can be used to +// repeat calculations across an array in several ways. +// +// For loops are generally great for this kind of task, but +// sometimes they don't fully utilize the capabilities of the +// CPU. +// +// Most modern CPUs can execute instructions in which SEVERAL +// calculations are performed WITHIN registers at the SAME TIME. +// These are known as "single instruction, multiple data" (SIMD) +// instructions. SIMD instructions can make code significantly +// more performant. +// +// To see why, imagine we have a program in which we take the +// square root of four (changing) f32 floats. +// +// A simple compiler would take the program and produce machine code +// which calculates each square root sequentially. Most registers on +// modern CPUs have 64 bits, so we could imagine that each float moves +// into a 64-bit register, and the following happens four times: +// +// 32 bits 32 bits +// +-------------------+ +// register | 0 | x | +// +-------------------+ +// +// | +// [SQRT instruction] +// V +// +// +-------------------+ +// | 0 | sqrt(x) | +// +-------------------+ +// +// Notice that half of the register contains blank data to which +// nothing happened. What a waste! What if we were able to use +// that space instead? This is the idea at the core of SIMD. +// +// Most modern CPUs contain specialized registers with at least 128 bits +// for performing SIMD instructions. On a machine with 128-bit SIMD +// registers, a smart compiler would probably NOT issue four sqrt +// instructions as above, but instead pack the floats into a single +// 128-bit register, then execute a single "packed" sqrt +// instruction to do ALL the square root calculations at once. +// +// For example: +// +// +// 32 bits 32 bits 32 bits 32 bits +// +---------------------------------------+ +// register | 4.0 | 9.0 | 25.0 | 49.0 | +// +---------------------------------------+ +// +// | +// [SIMD SQRT instruction] +// V +// +// +---------------------------------------+ +// register | 2.0 | 3.0 | 5.0 | 7.0 | +// +---------------------------------------+ +// +// Pretty cool, right? +// +// Code with SIMD instructions is usually more performant than code +// without SIMD instructions. Zig cares a lot about performance, +// so it has built-in support for SIMD! It has a data structure that +// directly supports SIMD instructions: +// +// +-----------+ +// | Vectors | +// +-----------+ +// +// Operations performed on vectors in Zig will be done in parallel using +// SIMD instructions, whenever possible. +// +// Defining vectors in Zig is straightforwards. No library import is needed. +const v1 = @Vector(3, i32) { 1, 10, 100}; +const v2 = @Vector(3, f32) {2.0, 3.0, 5.0}; + +// Vectors support the same builtin operators as their underlying base types. +const v3 = v1 + v1; // { 2, 20, 200}; +const v4 = v2 * v2; // { 4.0, 9.0, 25.0}; + +// Intrinsics that apply to base types usually extend to vectors. +const v5 : @Vector(3, f32) = @floatFromInt(v3); // { 2.0, 20.0, 200.0} +const v6 = v4 - v5; // { 2.0, -11.0, -175.0} +const v7 = @abs(v6); // { 2.0, 11.0, 175.0} + +// We can make constant vectors, and reduce vectors. +const v8 : @Vector(4, u8) = @splat(2); // { 2, 2, 2, 2} +const v8_sum = @reduce(.Add, v8); // 8 +const v8_min = @reduce(.Min, v8); // 2 + +// Fixed-length arrays can be automatically assigned to vectors (and vice-versa). +const single_digit_primes = [4] i8 {2, 3, 5, 7}; +const prime_vector : @Vector(4, i8) = single_digit_primes; + +// Now let's use vectors to simplify and optimize some code! +// +// Ewa is writing a program in which they frequently want to compare +// two lists of four f32s. Ewa expects the lists to be similar, and +// wants to determine the largest pairwise difference between the lists. +// +// Ewa wrote the following function to figure this out. + +fn calcMaxPairwiseDiffOld( list1 : [4] f32, list2 : [4] f32) f32 { + var max_diff : f32 = 0; + for (list1, list2) |n1, n2| { + const abs_diff = @abs(n1 - n2); + if (abs_diff > max_diff) { + max_diff = abs_diff; + } + } + return max_diff; +} + +// Ewa heard about vectors in Zig, and started writing a new vector +// version of the function, but has got stuck! +// +// Help Ewa finish the vector version! The examples above should help. + +const Vec4 = @Vector(4, f32); +fn calcMaxPairwiseDiffNew( a : Vec4, b : Vec4) f32 { +// const abs_diff_vec = ???; + const abs_diff_vec = @abs(a - b); +// const max_diff = @reduce(???, abs_diff_vec); + const max_diff = @reduce(.Max, abs_diff_vec); + return max_diff; +} + +// Quite the simplification! We could even write the function in one line +// and it would still be readable. +// +// Since the entire function is now expressed in terms of vector operations, +// the Zig compiler will easily be able to compile it down to machine code +// which utilizes the all-powerful SIMD instructions and does a lot of the +// computation in parallel. + +const std = @import("std"); +const print = std.debug.print; + +pub fn main() void { + const l1 = [4] f32 { 3.141, 2.718, 0.577, 1.000}; + const l2 = [4] f32 { 3.154, 2.707, 0.591, 0.993}; + const mpd_old = calcMaxPairwiseDiffOld(l1, l2); + const mpd_new = calcMaxPairwiseDiffNew(l1, l2); + print("Max difference (old fn): {d: >5.3}\n", .{mpd_old}); + print("Max difference (new fn): {d: >5.3}\n", .{mpd_new}); +} From 75e5e534979d95087709b0cccf4fdf64e74d43d3 Mon Sep 17 00:00:00 2001 From: bgthompson Date: Sun, 3 Nov 2024 11:18:18 +1000 Subject: [PATCH 2/5] removed commented solution lines in vectors exercise, added ??? into lines instead --- exercises/109_vectors.zig | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/exercises/109_vectors.zig b/exercises/109_vectors.zig index 9e4a2a0..106937e 100644 --- a/exercises/109_vectors.zig +++ b/exercises/109_vectors.zig @@ -121,10 +121,8 @@ fn calcMaxPairwiseDiffOld( list1 : [4] f32, list2 : [4] f32) f32 { const Vec4 = @Vector(4, f32); fn calcMaxPairwiseDiffNew( a : Vec4, b : Vec4) f32 { -// const abs_diff_vec = ???; - const abs_diff_vec = @abs(a - b); -// const max_diff = @reduce(???, abs_diff_vec); - const max_diff = @reduce(.Max, abs_diff_vec); + const abs_diff_vec = ???; + const max_diff = @reduce(???, abs_diff_vec); return max_diff; } From 8cce587d3ba60d8073a38a87a71caa5bfaa859c6 Mon Sep 17 00:00:00 2001 From: bgthompson Date: Thu, 7 Nov 2024 13:31:32 +1000 Subject: [PATCH 3/5] added patch file for 109_vectors --- patches/patches/109_vectors.patch | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 patches/patches/109_vectors.patch diff --git a/patches/patches/109_vectors.patch b/patches/patches/109_vectors.patch new file mode 100644 index 0000000..ec2189e --- /dev/null +++ b/patches/patches/109_vectors.patch @@ -0,0 +1,13 @@ +--- exercises/109_vectors.zig 2024-11-03 11:17:00.928652000 +1000 ++++ answers/109_vectors.zig 2024-11-07 13:11:23.838667200 +1000 +@@ -121,8 +121,8 @@ + + const Vec4 = @Vector(4, f32); + fn calcMaxPairwiseDiffNew( a : Vec4, b : Vec4) f32 { +- const abs_diff_vec = ???; +- const max_diff = @reduce(???, abs_diff_vec); ++ const abs_diff_vec = @abs(a - b); ++ const max_diff = @reduce(.Max, abs_diff_vec); + return max_diff; + } + From 46e8fc0b614bd3187ab6b2c6b4cc83996fb8bf56 Mon Sep 17 00:00:00 2001 From: bgthompson Date: Thu, 7 Nov 2024 13:42:53 +1000 Subject: [PATCH 4/5] line ending format patch attempt --- patches/patches/109_vectors.patch | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/patches/patches/109_vectors.patch b/patches/patches/109_vectors.patch index ec2189e..4b11da9 100644 --- a/patches/patches/109_vectors.patch +++ b/patches/patches/109_vectors.patch @@ -6,7 +6,7 @@ fn calcMaxPairwiseDiffNew( a : Vec4, b : Vec4) f32 { - const abs_diff_vec = ???; - const max_diff = @reduce(???, abs_diff_vec); -+ const abs_diff_vec = @abs(a - b); ++ const abs_diff_vec = @abs( a - b ); + const max_diff = @reduce(.Max, abs_diff_vec); return max_diff; } From bfed6600205504a3fd2e3d3922ca5da6995224a7 Mon Sep 17 00:00:00 2001 From: Chris Boesch Date: Thu, 7 Nov 2024 15:01:59 +0100 Subject: [PATCH 5/5] Fixed formating, created patch file. --- exercises/109_vectors.zig | 42 +++++++++++++++---------------- patches/patches/109_vectors.patch | 8 +++--- 2 files changed, 25 insertions(+), 25 deletions(-) diff --git a/exercises/109_vectors.zig b/exercises/109_vectors.zig index 106937e..96892ca 100644 --- a/exercises/109_vectors.zig +++ b/exercises/109_vectors.zig @@ -10,16 +10,16 @@ // These are known as "single instruction, multiple data" (SIMD) // instructions. SIMD instructions can make code significantly // more performant. -// +// // To see why, imagine we have a program in which we take the // square root of four (changing) f32 floats. -// +// // A simple compiler would take the program and produce machine code // which calculates each square root sequentially. Most registers on // modern CPUs have 64 bits, so we could imagine that each float moves // into a 64-bit register, and the following happens four times: // -// 32 bits 32 bits +// 32 bits 32 bits // +-------------------+ // register | 0 | x | // +-------------------+ @@ -35,7 +35,7 @@ // Notice that half of the register contains blank data to which // nothing happened. What a waste! What if we were able to use // that space instead? This is the idea at the core of SIMD. -// +// // Most modern CPUs contain specialized registers with at least 128 bits // for performing SIMD instructions. On a machine with 128-bit SIMD // registers, a smart compiler would probably NOT issue four sqrt @@ -50,11 +50,11 @@ // +---------------------------------------+ // register | 4.0 | 9.0 | 25.0 | 49.0 | // +---------------------------------------+ -// +// // | // [SIMD SQRT instruction] // V -// +// // +---------------------------------------+ // register | 2.0 | 3.0 | 5.0 | 7.0 | // +---------------------------------------+ @@ -74,26 +74,26 @@ // SIMD instructions, whenever possible. // // Defining vectors in Zig is straightforwards. No library import is needed. -const v1 = @Vector(3, i32) { 1, 10, 100}; -const v2 = @Vector(3, f32) {2.0, 3.0, 5.0}; +const v1 = @Vector(3, i32){ 1, 10, 100 }; +const v2 = @Vector(3, f32){ 2.0, 3.0, 5.0 }; // Vectors support the same builtin operators as their underlying base types. const v3 = v1 + v1; // { 2, 20, 200}; const v4 = v2 * v2; // { 4.0, 9.0, 25.0}; // Intrinsics that apply to base types usually extend to vectors. -const v5 : @Vector(3, f32) = @floatFromInt(v3); // { 2.0, 20.0, 200.0} -const v6 = v4 - v5; // { 2.0, -11.0, -175.0} -const v7 = @abs(v6); // { 2.0, 11.0, 175.0} +const v5: @Vector(3, f32) = @floatFromInt(v3); // { 2.0, 20.0, 200.0} +const v6 = v4 - v5; // { 2.0, -11.0, -175.0} +const v7 = @abs(v6); // { 2.0, 11.0, 175.0} // We can make constant vectors, and reduce vectors. -const v8 : @Vector(4, u8) = @splat(2); // { 2, 2, 2, 2} -const v8_sum = @reduce(.Add, v8); // 8 -const v8_min = @reduce(.Min, v8); // 2 +const v8: @Vector(4, u8) = @splat(2); // { 2, 2, 2, 2} +const v8_sum = @reduce(.Add, v8); // 8 +const v8_min = @reduce(.Min, v8); // 2 // Fixed-length arrays can be automatically assigned to vectors (and vice-versa). -const single_digit_primes = [4] i8 {2, 3, 5, 7}; -const prime_vector : @Vector(4, i8) = single_digit_primes; +const single_digit_primes = [4]i8{ 2, 3, 5, 7 }; +const prime_vector: @Vector(4, i8) = single_digit_primes; // Now let's use vectors to simplify and optimize some code! // @@ -103,8 +103,8 @@ const prime_vector : @Vector(4, i8) = single_digit_primes; // // Ewa wrote the following function to figure this out. -fn calcMaxPairwiseDiffOld( list1 : [4] f32, list2 : [4] f32) f32 { - var max_diff : f32 = 0; +fn calcMaxPairwiseDiffOld(list1: [4]f32, list2: [4]f32) f32 { + var max_diff: f32 = 0; for (list1, list2) |n1, n2| { const abs_diff = @abs(n1 - n2); if (abs_diff > max_diff) { @@ -120,7 +120,7 @@ fn calcMaxPairwiseDiffOld( list1 : [4] f32, list2 : [4] f32) f32 { // Help Ewa finish the vector version! The examples above should help. const Vec4 = @Vector(4, f32); -fn calcMaxPairwiseDiffNew( a : Vec4, b : Vec4) f32 { +fn calcMaxPairwiseDiffNew(a: Vec4, b: Vec4) f32 { const abs_diff_vec = ???; const max_diff = @reduce(???, abs_diff_vec); return max_diff; @@ -138,8 +138,8 @@ const std = @import("std"); const print = std.debug.print; pub fn main() void { - const l1 = [4] f32 { 3.141, 2.718, 0.577, 1.000}; - const l2 = [4] f32 { 3.154, 2.707, 0.591, 0.993}; + const l1 = [4]f32{ 3.141, 2.718, 0.577, 1.000 }; + const l2 = [4]f32{ 3.154, 2.707, 0.591, 0.993 }; const mpd_old = calcMaxPairwiseDiffOld(l1, l2); const mpd_new = calcMaxPairwiseDiffNew(l1, l2); print("Max difference (old fn): {d: >5.3}\n", .{mpd_old}); diff --git a/patches/patches/109_vectors.patch b/patches/patches/109_vectors.patch index 4b11da9..bf18cc0 100644 --- a/patches/patches/109_vectors.patch +++ b/patches/patches/109_vectors.patch @@ -1,12 +1,12 @@ ---- exercises/109_vectors.zig 2024-11-03 11:17:00.928652000 +1000 -+++ answers/109_vectors.zig 2024-11-07 13:11:23.838667200 +1000 +--- exercises/109_vectors.zig 2024-11-07 14:57:09.673383618 +0100 ++++ answers/109_vectors.zig 2024-11-07 14:22:59.069150138 +0100 @@ -121,8 +121,8 @@ const Vec4 = @Vector(4, f32); - fn calcMaxPairwiseDiffNew( a : Vec4, b : Vec4) f32 { + fn calcMaxPairwiseDiffNew(a: Vec4, b: Vec4) f32 { - const abs_diff_vec = ???; - const max_diff = @reduce(???, abs_diff_vec); -+ const abs_diff_vec = @abs( a - b ); ++ const abs_diff_vec = @abs(a - b); + const max_diff = @reduce(.Max, abs_diff_vec); return max_diff; }