BMEVIIIMB01
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

58 lines
1.6 KiB

// TODO: Simple matrix-vector multiplication, every thread computes a complete dot product
//
// i := get_global_id(0)
//
// IF ID < n THEN:
// yi := b[i]
// LOOP j := 0 .. m DO:
// yi += A[j + i * m] * x[j]
// END LOOP
// y[i] := yi
// END IF
__kernel
void simpleMV(const int n, const int m, __global float* y, __global float* A, __global float* x, __global float* b){
}
// TODO: Matrix-vector multiplication with parallelization of the dot product
// Assumptions: M = 2^k, M <= maximum workgroup size
//
// i = get_group_id(0)
// j = get_local_id(0)
//
// Q[j] := A[i * M + j] * x[j]
// BARRIER
//
// Sum scan on Q (reduction)
//
// IF j = 0 THEN:
// y[i] = Q[0] + b[i]
//
__kernel
void reduceMV(const int n, const int M, __global float* y, __global float* A, __global float* x, __global float* b, __local float* Q){
}
// TODO: General solution for matrix-vector multiplication, every thread processes a chunk of the dot product and visits multiple rows of the result
//
// t := get_local_id(0) / Z
// z := get_local_id(0) % Z
//
// FOR i := t ; i < n ; i := i + T :
// Compute Q[t * Z + z] as shown in the lecture
// Sum scan on Q (reduction)
// IF z = 0 THEN:
// y[i] = Q[t * Z + 0] + b[i]
//
// END FOR
__kernel
void largeMV(const int n, const int m, __global float* y, __global float* A, __global float* x, __global float* b, const int T, const int Z, __local float* Q){
}
// TODO: Gaussian elimination as shown in the lecture
// (execute the 2nd loop of the sequential implemential in parallel)
__kernel void gaussian(const int n, const int m, __global float* A){
}