|
|
@ -115,27 +115,15 @@ void largeMV(const int n, const int m, __global float* y, __global float* A, __g |
|
|
|
// (execute the 2nd loop of the sequential implemential in parallel) |
|
|
|
__kernel void gaussian(const int n, const int m, __global float* A){ |
|
|
|
int j = get_global_id(0); |
|
|
|
int lid = get_local_id(0); |
|
|
|
|
|
|
|
/*if (i < n) { |
|
|
|
for (size_t k = 1; k < n - 1; k++) { |
|
|
|
int l = A[k * i] / A[k * k]; |
|
|
|
for (size_t j = k; k < n; j++) { |
|
|
|
A[i * j] = A[i * j] - l * A[k * j]; |
|
|
|
} |
|
|
|
} |
|
|
|
}*/ |
|
|
|
for (size_t k = 0; k < m; k++) { |
|
|
|
for (size_t i = 0; i < m; i++) { |
|
|
|
if (k != i) { |
|
|
|
float ratio = A[i * n + k] / A[k * n + k]; |
|
|
|
barrier(CLK_LOCAL_MEM_FENCE); |
|
|
|
//for (size_t j = 0; j < n; j++) { |
|
|
|
A[i * n + j] = A[i * n + j] - ratio * A[k * n + j]; |
|
|
|
//} |
|
|
|
A[i * n + j] = A[i * n + j] - ratio * A[k * n + j]; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
barrier(CLK_LOCAL_MEM_FENCE); |
|
|
|
if (j < m) { |
|
|
@ -145,8 +133,5 @@ __kernel void gaussian(const int n, const int m, __global float* A){ |
|
|
|
A[j * n + i] = A[j * n + i] / ref; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
//A[i] = i; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|