diff --git a/Linear/Gauss.cpp b/Linear/Gauss.cpp new file mode 100644 index 0000000..8f2901b --- /dev/null +++ b/Linear/Gauss.cpp @@ -0,0 +1,59 @@ +#include "LinearTests.h" +#include "cl.hpp" +#include "Common.h" +#include + +Gauss::Gauss(int _n, int _m, float* _G) { + n = _n; + m = _m; + G = _G; +} + +void Gauss::collect_results(cl::CommandQueue* queue) +{ + queue->enqueueReadBuffer(clInputBuffer, true, 0, sizeof(float) * n * m, G); +} + +void Gauss::gpu_compute(cl::Context* context, cl::CommandQueue* queue, cl::Program* program, cl::Event* Event) +{ + cl_int err = CL_SUCCESS; + + cl::Kernel kernel = cl::Kernel(*program, "gaussian", &err); + CheckCLError(err); + + clInputBuffer = cl::Buffer(*context, CL_MEM_READ_ONLY, sizeof(float) * n * m, NULL, &err); + CheckCLError(err); + queue->enqueueWriteBuffer(clInputBuffer, true, 0, sizeof(float) * n * m, G); + + kernel.setArg(0, n); + kernel.setArg(1, m); + kernel.setArg(2, clInputBuffer); + + queue->enqueueNDRangeKernel(kernel, + cl::NullRange, + cl::NDRange(n, 1), + cl::NDRange(n, 1), + NULL, + Event); +} + +void Gauss::cpu_compute() +{ +} + +bool Gauss::validate_results() +{ + for (int i = 0; i < m; ++i) { + for (int j = 0; j < n; ++j) { + std::cout << G[j + i * n]; + if (j < n - 1) std::cout << ", "; + } + std::cout << std::endl; + } + return true; +} + +std::string Gauss::description() +{ + return std::string(); +} diff --git a/Linear/Linear.cpp b/Linear/Linear.cpp index 71515a5..eb73fe7 100644 --- a/Linear/Linear.cpp +++ b/Linear/Linear.cpp @@ -62,8 +62,19 @@ int main() OpenCLHandler handler("../kernels/linear.cl"); //Jacobi j(MVType::ReduceMV); //Jacobi j(MVType::SimpleMV); - Jacobi j(MVType::LargeMV); - handler.run_test(&j); + //Jacobi j(MVType::LargeMV); + //handler.run_test(&j); + + int GAn = 4; + int GAm = 3; + + float GA[] = { 2, 1, -1, 8, + -3, -1, 2, -11, + -2, 1, 2, -3 }; + + Gauss g(GAn, GAm, GA); + + handler.run_test(&g); return 0; } diff --git a/Linear/Linear.vcxproj b/Linear/Linear.vcxproj index c72f0df..436a03b 100644 --- a/Linear/Linear.vcxproj +++ b/Linear/Linear.vcxproj @@ -71,6 +71,7 @@ + diff --git a/Linear/Linear.vcxproj.filters b/Linear/Linear.vcxproj.filters index 0c91e80..919c39a 100644 --- a/Linear/Linear.vcxproj.filters +++ b/Linear/Linear.vcxproj.filters @@ -33,6 +33,9 @@ Source Files + + Source Files + diff --git a/Linear/LinearTests.h b/Linear/LinearTests.h index e6f6bd5..43759bc 100644 --- a/Linear/LinearTests.h +++ b/Linear/LinearTests.h @@ -43,6 +43,21 @@ public: std::string description(); }; +class Gauss : public TestCase { +private: + int n; + int m; + float* G; + +public: + Gauss(int _n, int _m, float* _G); + void collect_results(cl::CommandQueue* queue); + void gpu_compute(cl::Context* context, cl::CommandQueue* queue, cl::Program* program, cl::Event* Event); + void cpu_compute(); + bool validate_results(); + std::string description(); +}; + class Simple : public MatrixVectorMultiplier { public: Simple(cl::Context* context, cl::CommandQueue* queue, cl::Program* program); diff --git a/kernels/linear.cl b/kernels/linear.cl index ba085af..b0516d2 100644 --- a/kernels/linear.cl +++ b/kernels/linear.cl @@ -74,9 +74,6 @@ void reduceMV(const int n, const int M, __global float* y, __global float* A, __ // END FOR __kernel void largeMV(const int n, const int m, __global float* y, __global float* A, __global float* x, __global float* b, const int T, const int Z, __local float* Q){ - int id = get_local_id(0); - // WG size: T*Z 4 - // ID 2 int t = get_local_id(0) / T; // sor 1 int z = get_local_id(0) % Z; // oszlop 0 @@ -117,7 +114,7 @@ void largeMV(const int n, const int m, __global float* y, __global float* A, __g // end for // (execute the 2nd loop of the sequential implemential in parallel) __kernel void gaussian(const int n, const int m, __global float* A){ - int i = get_global_id(0); + int j = get_global_id(0); int lid = get_local_id(0); /*if (i < n) { @@ -128,12 +125,28 @@ __kernel void gaussian(const int n, const int m, __global float* A){ } } }*/ - for (size_t k = 1; k < n - 1; k++) { - float l = A[k * n + i] / A[k * n + k]; - for (size_t j = k; j < n; j++) { - A[i * n + j] = A[i * n + j] - l * A[k * n + j]; + for (size_t k = 0; k < m; k++) { + for (size_t i = 0; i < m; i++) { + if (k != i) { + float ratio = A[i * n + k] / A[k * n + k]; + barrier(CLK_LOCAL_MEM_FENCE); + //for (size_t j = 0; j < n; j++) { + A[i * n + j] = A[i * n + j] - ratio * A[k * n + j]; + //} + } } + } + barrier(CLK_LOCAL_MEM_FENCE); + if (j < m) { + float ref = A[j * n + j]; + A[j] = ref; + for (size_t i = 0; i < n; i++) { + A[j * n + i] = A[j * n + i] / ref; + } + } + + //A[i] = i; }