You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
					
					
						
							58 lines
						
					
					
						
							1.6 KiB
						
					
					
				
			
		
		
		
			
			
			
				
					
				
				
					
				
			
		
		
	
	
							58 lines
						
					
					
						
							1.6 KiB
						
					
					
				
								// TODO: Simple matrix-vector multiplication, every thread computes a complete dot product
							 | 
						|
								//
							 | 
						|
								// i := get_global_id(0)
							 | 
						|
								//
							 | 
						|
								// IF ID < n THEN:
							 | 
						|
								//   yi := b[i]
							 | 
						|
								//   LOOP j := 0 .. m DO:
							 | 
						|
								//     yi += A[j + i * m] * x[j]
							 | 
						|
								//   END LOOP
							 | 
						|
								//   y[i] := yi
							 | 
						|
								// END IF
							 | 
						|
								__kernel
							 | 
						|
								void simpleMV(const int n, const int m, __global float* y, __global float* A, __global float* x, __global float* b){
							 | 
						|
								
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// TODO: Matrix-vector multiplication with parallelization of the dot product
							 | 
						|
								// Assumptions: M = 2^k, M <= maximum workgroup size
							 | 
						|
								//
							 | 
						|
								// i = get_group_id(0)
							 | 
						|
								// j = get_local_id(0)
							 | 
						|
								//
							 | 
						|
								// Q[j] := A[i * M + j] * x[j]
							 | 
						|
								// BARRIER
							 | 
						|
								//
							 | 
						|
								// Sum scan on Q (reduction)
							 | 
						|
								//
							 | 
						|
								// IF j = 0 THEN:
							 | 
						|
								//   y[i] = Q[0] + b[i]
							 | 
						|
								//
							 | 
						|
								__kernel
							 | 
						|
								void reduceMV(const int n, const int M, __global float* y, __global float* A, __global float* x, __global float* b, __local float* Q){
							 | 
						|
								
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// TODO: General solution for matrix-vector multiplication, every thread processes a chunk of the dot product and visits multiple rows of the result
							 | 
						|
								//
							 | 
						|
								// t := get_local_id(0) / Z
							 | 
						|
								// z := get_local_id(0) % Z
							 | 
						|
								//
							 | 
						|
								// FOR i := t ; i < n ; i := i + T :
							 | 
						|
								//    Compute Q[t * Z + z] as shown in the lecture
							 | 
						|
								//    Sum scan on Q (reduction)
							 | 
						|
								//    IF z = 0 THEN:
							 | 
						|
								//        y[i] = Q[t * Z + 0] + b[i]
							 | 
						|
								//
							 | 
						|
								// END FOR
							 | 
						|
								__kernel
							 | 
						|
								void largeMV(const int n, const int m, __global float* y, __global float* A, __global float* x, __global float* b, const int T, const int Z, __local float* Q){
							 | 
						|
								
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								// TODO: Gaussian elimination as shown in the lecture
							 | 
						|
								// (execute the 2nd loop of the sequential implemential in parallel)
							 | 
						|
								__kernel void gaussian(const int n, const int m, __global float* A){
							 | 
						|
								
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								
							 |