|
@ -1,4 +1,5 @@ |
|
|
#include "LinearTests.h" |
|
|
#include "LinearTests.h" |
|
|
|
|
|
#include <Common.h> |
|
|
|
|
|
|
|
|
Large::Large(cl::Context* _context, cl::CommandQueue* _queue, cl::Program* _program) |
|
|
Large::Large(cl::Context* _context, cl::CommandQueue* _queue, cl::Program* _program) |
|
|
{ |
|
|
{ |
|
@ -9,5 +10,45 @@ Large::Large(cl::Context* _context, cl::CommandQueue* _queue, cl::Program* _prog |
|
|
|
|
|
|
|
|
void Large::dewIt(int n, int m, float* y, const float* A, const float* x, const float* b) |
|
|
void Large::dewIt(int n, int m, float* y, const float* A, const float* x, const float* b) |
|
|
{ |
|
|
{ |
|
|
//TODO: Implement
|
|
|
cl_int err = CL_SUCCESS; |
|
|
|
|
|
cl::Event _event; |
|
|
|
|
|
|
|
|
|
|
|
cl::Kernel kernel = cl::Kernel(*program, "largeMV", &err); |
|
|
|
|
|
CheckCLError(err); |
|
|
|
|
|
|
|
|
|
|
|
cl::Buffer ABuffer(*context, CL_MEM_READ_ONLY, sizeof(float) * n * m, NULL, &err); |
|
|
|
|
|
CheckCLError(err); |
|
|
|
|
|
queue->enqueueWriteBuffer(ABuffer, true, 0, sizeof(float) * n * m, A); |
|
|
|
|
|
|
|
|
|
|
|
cl::Buffer XBuffer(*context, CL_MEM_READ_ONLY, sizeof(float) * m, NULL, &err); |
|
|
|
|
|
CheckCLError(err); |
|
|
|
|
|
queue->enqueueWriteBuffer(XBuffer, true, 0, sizeof(float) * m, x); |
|
|
|
|
|
|
|
|
|
|
|
cl::Buffer YBuffer(*context, CL_MEM_WRITE_ONLY, sizeof(float) * n, NULL, &err); |
|
|
|
|
|
CheckCLError(err); |
|
|
|
|
|
queue->enqueueWriteBuffer(YBuffer, true, 0, sizeof(float) * n, y); |
|
|
|
|
|
|
|
|
|
|
|
cl::Buffer BBuffer(*context, CL_MEM_READ_ONLY, sizeof(float) * n, NULL, &err); |
|
|
|
|
|
CheckCLError(err); |
|
|
|
|
|
queue->enqueueWriteBuffer(BBuffer, true, 0, sizeof(float) * n, b); |
|
|
|
|
|
|
|
|
|
|
|
//void largeMV(const int n, const int m, __global float* y, __global float* A,
|
|
|
|
|
|
// __global float* x, __global float* b, const int T, const int Z, __local float* Q)
|
|
|
|
|
|
int T = 2; // kimenet T hosszu darabokra
|
|
|
|
|
|
int Z = 2; // Bemenet Z hosszu darabokra
|
|
|
|
|
|
|
|
|
|
|
|
kernel.setArg(0, n); |
|
|
|
|
|
kernel.setArg(1, m); |
|
|
|
|
|
kernel.setArg(2, YBuffer); |
|
|
|
|
|
kernel.setArg(3, ABuffer); |
|
|
|
|
|
kernel.setArg(4, XBuffer); |
|
|
|
|
|
kernel.setArg(5, BBuffer); |
|
|
|
|
|
kernel.setArg(6, T); |
|
|
|
|
|
kernel.setArg(7, Z); |
|
|
|
|
|
kernel.setArg(8, sizeof(float) * T * Z, NULL); |
|
|
|
|
|
|
|
|
|
|
|
queue->enqueueNDRangeKernel(kernel, cl::NullRange, cl::NDRange(Z*T, 1), cl::NDRange(T * Z, 1), NULL, &_event); |
|
|
|
|
|
_event.wait(); |
|
|
|
|
|
|
|
|
|
|
|
queue->enqueueReadBuffer(YBuffer, true, 0, sizeof(float) * n, y); |
|
|
} |
|
|
} |
|
|