#include "LinearTests.h" #include Reduce::Reduce(cl::Context* _context, cl::CommandQueue* _queue, cl::Program* _program) { context = _context; queue = _queue; program = _program; name = "Reduce"; } cl_ulong Reduce::dewIt(int n, int m, float* y, const float* A, const float* x, const float* b) { cl_int err = CL_SUCCESS; cl::Event _event; cl::Kernel kernel = cl::Kernel(*program, "reduceMV", &err); CheckCLError(err); cl::Buffer ABuffer(*context, CL_MEM_READ_ONLY, sizeof(float) * n * m, NULL, &err); CheckCLError(err); queue->enqueueWriteBuffer(ABuffer, true, 0, sizeof(float) * n * m, A); cl::Buffer XBuffer(*context, CL_MEM_READ_ONLY, sizeof(float) * m, NULL, &err); CheckCLError(err); queue->enqueueWriteBuffer(XBuffer, true, 0, sizeof(float) * m, x); cl::Buffer YBuffer(*context, CL_MEM_WRITE_ONLY, sizeof(float) * n, NULL, &err); CheckCLError(err); queue->enqueueWriteBuffer(YBuffer, true, 0, sizeof(float) * n, y); cl::Buffer BBuffer(*context, CL_MEM_READ_ONLY, sizeof(float) * n, NULL, &err); CheckCLError(err); queue->enqueueWriteBuffer(BBuffer, true, 0, sizeof(float) * n, b); // void reduceMV kernel.setArg(0, n); // (const int n, kernel.setArg(1, m); // const int M, kernel.setArg(2, YBuffer); // __global float* y, kernel.setArg(3, ABuffer); // __global float* A, kernel.setArg(4, XBuffer); // __global float* x, kernel.setArg(5, BBuffer); // __global float* b, kernel.setArg(6, sizeof(float) * n * m, NULL); // __local float* Q) queue->enqueueNDRangeKernel(kernel, cl::NullRange, // Indexek nem eloffszetelve cl::NDRange(n, 1), // Minden elemet egy szál cl::NullRange, // Workgroup méret? - ez az auto, ha nem indul, 1024-re, onnan csökkent, amig elindul NULL, // &_event); _event.wait(); cl_ulong execStart, execEnd; execStart = _event.getProfilingInfo(&err); execEnd = _event.getProfilingInfo(&err); queue->enqueueReadBuffer(YBuffer, true, 0, sizeof(float) * n, y); return (execEnd - execStart); }