Vector Addition

Vector addition is the “Hello World” of GPU programming. It demonstrates how to perform element-wise operations on arrays in parallel.

Kernel Code

The kernel takes two input vectors a and b, and writes the result to c. Each thread processes one element.

open Sarek

let%kernel vector_add (a : float32 vector) (b : float32 vector) (c : float32 vector) (n : int32) =
  (* Get global thread ID *)
  let tid = get_global_id 0 in
  
  (* Check bounds to prevent out-of-bounds access *)
  if tid < n then
    (* Perform element-wise addition *)
    c.(tid) <- a.(tid) + b.(tid)

Host Code

The host code initializes the data, selects a device, and launches the kernel.

let () =
  (* Problem size *)
  let n = 1_000_000 in
  
  (* Create vectors *)
  let a = Vector.create Float32 n in
  let b = Vector.create Float32 n in
  let c = Vector.create Float32 n in
  
  (* Initialize data *)
  for i = 0 to n - 1 do
    Vector.set a i (float_of_int i);
    Vector.set b i (float_of_int (i * 2));
  done;
  
  (* Select device *)
  let device = Device.get_default () in
  
  (* Calculate grid dimensions *)
  let block_size = 256 in
  let grid_size = (n + block_size - 1) / block_size in
  
  (* Run kernel *)
  Execute.run vector_add 
    ~device 
    ~grid:(grid_size, 1, 1) 
    ~block:(block_size, 1, 1) 
    [Vec a; Vec b; Vec c; Int32 (Int32.of_int n)];
    
  (* Verify result *)
  let result = Vector.get c 10 in
  Printf.printf "c[10] = %f (expected 30.0)\n" result

```