Browse Source

2b - reduce

master
Daniel Gyulai 3 years ago
parent
commit
a281001d34
  1. 6
      Linear/Large.cpp
  2. 3
      Linear/Linear.cpp
  3. 53
      Linear/Reduce.cpp
  4. 4
      Linear/Simple.cpp
  5. 24
      kernels/linear.cl

6
Linear/Large.cpp

@ -1,8 +1,10 @@
#include "LinearTests.h"
Large::Large(cl::Context* context, cl::CommandQueue* queue, cl::Program* program)
Large::Large(cl::Context* _context, cl::CommandQueue* _queue, cl::Program* _program)
{
//TODO: Implement
context = _context;
queue = _queue;
program = _program;
}
void Large::dewIt(int n, int m, float* y, const float* A, const float* x, const float* b)

3
Linear/Linear.cpp

@ -60,7 +60,8 @@ int main()
capi();
//cppapi();
OpenCLHandler handler("../kernels/linear.cl");
Jacobi j(MVType::SimpleMV);
Jacobi j(MVType::ReduceMV);
//Jacobi j(MVType::SimpleMV);
handler.run_test(&j);
return 0;
}

53
Linear/Reduce.cpp

@ -1,11 +1,56 @@
#include "LinearTests.h"
#include <Common.h>
Reduce::Reduce(cl::Context* context, cl::CommandQueue* queue, cl::Program* program)
Reduce::Reduce(cl::Context* _context, cl::CommandQueue* _queue, cl::Program* _program)
{
//TODO: Implement
context = _context;
queue = _queue;
program = _program;
}
void Reduce::dewIt(int n, int m, float* y, const float* A, const float* x, const float* b)
{
//TODO: Implement
}
cl_int err = CL_SUCCESS;
cl::Event _event;
cl::Kernel kernel = cl::Kernel(*program, "reduceMV", &err);
CheckCLError(err);
cl::Buffer ABuffer(*context, CL_MEM_READ_ONLY, sizeof(float) * n * m, NULL, &err);
CheckCLError(err);
queue->enqueueWriteBuffer(ABuffer, true, 0, sizeof(float) * n * m, A);
cl::Buffer XBuffer(*context, CL_MEM_READ_ONLY, sizeof(float) * m, NULL, &err);
CheckCLError(err);
queue->enqueueWriteBuffer(XBuffer, true, 0, sizeof(float) * m, x);
cl::Buffer YBuffer(*context, CL_MEM_WRITE_ONLY, sizeof(float) * n, NULL, &err);
CheckCLError(err);
queue->enqueueWriteBuffer(YBuffer, true, 0, sizeof(float) * n, y);
cl::Buffer BBuffer(*context, CL_MEM_READ_ONLY, sizeof(float) * n, NULL, &err);
CheckCLError(err);
queue->enqueueWriteBuffer(BBuffer, true, 0, sizeof(float) * n, b);
// void reduceMV
kernel.setArg(0, n); // (const int n,
kernel.setArg(1, m); // const int M,
kernel.setArg(2, YBuffer); // __global float* y,
kernel.setArg(3, ABuffer); // __global float* A,
kernel.setArg(4, XBuffer); // __global float* x,
kernel.setArg(5, BBuffer); // __global float* b,
kernel.setArg(6, sizeof(float) * n * m, NULL); // __local float* Q)
queue->enqueueNDRangeKernel(kernel,
cl::NullRange, // Indexek nem eloffszetelve
cl::NDRange(n, 1), // Minden elemet egy szál
cl::NullRange, // Workgroup méret? - ez az auto, ha nem indul, 1024-re, onnan csökkent, amig elindul
NULL, //
&_event);
_event.wait();
queue->enqueueReadBuffer(YBuffer, true, 0, sizeof(float) * n, y);
}

4
Linear/Simple.cpp

@ -20,7 +20,7 @@ void Simple::dewIt(int n, int m, float* y, const float* A, const float* x, const
CheckCLError(err);
queue->enqueueWriteBuffer(ABuffer, true, 0, sizeof(float) * n *m, A);
cl::Buffer XBuffer(*context, CL_MEM_READ_ONLY, sizeof(float) * m, NULL, &err); // Ot kell kiirogatni
cl::Buffer XBuffer(*context, CL_MEM_READ_ONLY, sizeof(float) * m, NULL, &err);
CheckCLError(err);
queue->enqueueWriteBuffer(XBuffer, true, 0, sizeof(float) * m, x);
@ -47,5 +47,5 @@ void Simple::dewIt(int n, int m, float* y, const float* A, const float* x, const
&_event);
_event.wait();
queue->enqueueReadBuffer(YBuffer, true, 0, sizeof(int) * n, y);
queue->enqueueReadBuffer(YBuffer, true, 0, sizeof(float) * n, y);
}

24
kernels/linear.cl

@ -11,7 +11,7 @@
// END IF
__kernel
void simpleMV(const int n, const int m, __global float* y, __global float* A, __global float* x, __global float* b){
int i = get_local_id(0);
int i = get_global_id(0);
if (i < n) {
float yi = b[i];
@ -38,8 +38,26 @@ void simpleMV(const int n, const int m, __global float* y, __global float* A, __
//
__kernel
void reduceMV(const int n, const int M, __global float* y, __global float* A, __global float* x, __global float* b, __local float* Q){
int i = get_group_id(0);
int j = get_local_id(0);
int i = get_group_id(0); // Matrix sora, workgroup ID
int j = get_local_id(0); // Oszlop a matrixban, munkacsoporton beluli ID
// MAP
// Q - matrix i soranak és a vektornak elemenkenti szorzata
Q[j] = A[i * M + j] * x[j];
barrier(CLK_LOCAL_MEM_FENCE);
// REDUCE
// Lokalis memoria Q vektort Q[0]-ba redukalja összeadva
for (size_t s = get_local_size(0) / 2; s > 0; s >>= 1) {
if (j < s) {
Q[j] = Q[j] + Q[j + s];
}
barrier(CLK_LOCAL_MEM_FENCE);
}
if (i == 0) {
y[j] = Q[0] + b[j];
}
}
// TODO: General solution for matrix-vector multiplication, every thread processes a chunk of the dot product and visits multiple rows of the result

Loading…
Cancel
Save