From c90d6015e3c5405d3ac0c6d225d8a6041035f1b9 Mon Sep 17 00:00:00 2001 From: bgthompson Date: Sun, 3 Nov 2024 11:16:00 +1000 Subject: [PATCH] created a new exercise about vectors in zig, gave it number 109 --- build.zig | 7 ++ exercises/109_vectors.zig | 149 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 156 insertions(+) create mode 100644 exercises/109_vectors.zig diff --git a/build.zig b/build.zig index a1f6029..e528552 100644 --- a/build.zig +++ b/build.zig @@ -1201,6 +1201,13 @@ const exercises = [_]Exercise{ .main_file = "108_labeled_switch.zig", .output = "The pull request has been merged.", }, + .{ + .main_file = "109_vectors.zig", + .output = + \\Max difference (old fn): 0.014 + \\Max difference (new fn): 0.014 + , + }, .{ .main_file = "999_the_end.zig", .output = diff --git a/exercises/109_vectors.zig b/exercises/109_vectors.zig new file mode 100644 index 0000000..9e4a2a0 --- /dev/null +++ b/exercises/109_vectors.zig @@ -0,0 +1,149 @@ +// So far in Ziglings, we've seen how for loops can be used to +// repeat calculations across an array in several ways. +// +// For loops are generally great for this kind of task, but +// sometimes they don't fully utilize the capabilities of the +// CPU. +// +// Most modern CPUs can execute instructions in which SEVERAL +// calculations are performed WITHIN registers at the SAME TIME. +// These are known as "single instruction, multiple data" (SIMD) +// instructions. SIMD instructions can make code significantly +// more performant. +// +// To see why, imagine we have a program in which we take the +// square root of four (changing) f32 floats. +// +// A simple compiler would take the program and produce machine code +// which calculates each square root sequentially. Most registers on +// modern CPUs have 64 bits, so we could imagine that each float moves +// into a 64-bit register, and the following happens four times: +// +// 32 bits 32 bits +// +-------------------+ +// register | 0 | x | +// +-------------------+ +// +// | +// [SQRT instruction] +// V +// +// +-------------------+ +// | 0 | sqrt(x) | +// +-------------------+ +// +// Notice that half of the register contains blank data to which +// nothing happened. What a waste! What if we were able to use +// that space instead? This is the idea at the core of SIMD. +// +// Most modern CPUs contain specialized registers with at least 128 bits +// for performing SIMD instructions. On a machine with 128-bit SIMD +// registers, a smart compiler would probably NOT issue four sqrt +// instructions as above, but instead pack the floats into a single +// 128-bit register, then execute a single "packed" sqrt +// instruction to do ALL the square root calculations at once. +// +// For example: +// +// +// 32 bits 32 bits 32 bits 32 bits +// +---------------------------------------+ +// register | 4.0 | 9.0 | 25.0 | 49.0 | +// +---------------------------------------+ +// +// | +// [SIMD SQRT instruction] +// V +// +// +---------------------------------------+ +// register | 2.0 | 3.0 | 5.0 | 7.0 | +// +---------------------------------------+ +// +// Pretty cool, right? +// +// Code with SIMD instructions is usually more performant than code +// without SIMD instructions. Zig cares a lot about performance, +// so it has built-in support for SIMD! It has a data structure that +// directly supports SIMD instructions: +// +// +-----------+ +// | Vectors | +// +-----------+ +// +// Operations performed on vectors in Zig will be done in parallel using +// SIMD instructions, whenever possible. +// +// Defining vectors in Zig is straightforwards. No library import is needed. +const v1 = @Vector(3, i32) { 1, 10, 100}; +const v2 = @Vector(3, f32) {2.0, 3.0, 5.0}; + +// Vectors support the same builtin operators as their underlying base types. +const v3 = v1 + v1; // { 2, 20, 200}; +const v4 = v2 * v2; // { 4.0, 9.0, 25.0}; + +// Intrinsics that apply to base types usually extend to vectors. +const v5 : @Vector(3, f32) = @floatFromInt(v3); // { 2.0, 20.0, 200.0} +const v6 = v4 - v5; // { 2.0, -11.0, -175.0} +const v7 = @abs(v6); // { 2.0, 11.0, 175.0} + +// We can make constant vectors, and reduce vectors. +const v8 : @Vector(4, u8) = @splat(2); // { 2, 2, 2, 2} +const v8_sum = @reduce(.Add, v8); // 8 +const v8_min = @reduce(.Min, v8); // 2 + +// Fixed-length arrays can be automatically assigned to vectors (and vice-versa). +const single_digit_primes = [4] i8 {2, 3, 5, 7}; +const prime_vector : @Vector(4, i8) = single_digit_primes; + +// Now let's use vectors to simplify and optimize some code! +// +// Ewa is writing a program in which they frequently want to compare +// two lists of four f32s. Ewa expects the lists to be similar, and +// wants to determine the largest pairwise difference between the lists. +// +// Ewa wrote the following function to figure this out. + +fn calcMaxPairwiseDiffOld( list1 : [4] f32, list2 : [4] f32) f32 { + var max_diff : f32 = 0; + for (list1, list2) |n1, n2| { + const abs_diff = @abs(n1 - n2); + if (abs_diff > max_diff) { + max_diff = abs_diff; + } + } + return max_diff; +} + +// Ewa heard about vectors in Zig, and started writing a new vector +// version of the function, but has got stuck! +// +// Help Ewa finish the vector version! The examples above should help. + +const Vec4 = @Vector(4, f32); +fn calcMaxPairwiseDiffNew( a : Vec4, b : Vec4) f32 { +// const abs_diff_vec = ???; + const abs_diff_vec = @abs(a - b); +// const max_diff = @reduce(???, abs_diff_vec); + const max_diff = @reduce(.Max, abs_diff_vec); + return max_diff; +} + +// Quite the simplification! We could even write the function in one line +// and it would still be readable. +// +// Since the entire function is now expressed in terms of vector operations, +// the Zig compiler will easily be able to compile it down to machine code +// which utilizes the all-powerful SIMD instructions and does a lot of the +// computation in parallel. + +const std = @import("std"); +const print = std.debug.print; + +pub fn main() void { + const l1 = [4] f32 { 3.141, 2.718, 0.577, 1.000}; + const l2 = [4] f32 { 3.154, 2.707, 0.591, 0.993}; + const mpd_old = calcMaxPairwiseDiffOld(l1, l2); + const mpd_new = calcMaxPairwiseDiffNew(l1, l2); + print("Max difference (old fn): {d: >5.3}\n", .{mpd_old}); + print("Max difference (new fn): {d: >5.3}\n", .{mpd_new}); +}