Browse Source

Large almost working

master
Daniel Gyulai 3 years ago
parent
commit
9dd98fb4b0
  1. 2
      Linear/Jacobi.cpp
  2. 43
      Linear/Large.cpp
  3. 3
      Linear/Linear.cpp
  4. 36
      kernels/linear.cl

2
Linear/Jacobi.cpp

@ -88,6 +88,8 @@ void Jacobi::gpu_compute(cl::Context* context, cl::CommandQueue* queue, cl::Prog
{ {
MatrixVectorMultiplier* MVMultiplier = MethodFactory(type, context, queue, program); MatrixVectorMultiplier* MVMultiplier = MethodFactory(type, context, queue, program);
if (MVMultiplier != NULL) { if (MVMultiplier != NULL) {
int inputBuffer = 0; int inputBuffer = 0;
const int iterations = 20; const int iterations = 20;

43
Linear/Large.cpp

@ -1,4 +1,5 @@
#include "LinearTests.h" #include "LinearTests.h"
#include <Common.h>
Large::Large(cl::Context* _context, cl::CommandQueue* _queue, cl::Program* _program) Large::Large(cl::Context* _context, cl::CommandQueue* _queue, cl::Program* _program)
{ {
@ -9,5 +10,45 @@ Large::Large(cl::Context* _context, cl::CommandQueue* _queue, cl::Program* _prog
void Large::dewIt(int n, int m, float* y, const float* A, const float* x, const float* b) void Large::dewIt(int n, int m, float* y, const float* A, const float* x, const float* b)
{ {
//TODO: Implement cl_int err = CL_SUCCESS;
cl::Event _event;
cl::Kernel kernel = cl::Kernel(*program, "largeMV", &err);
CheckCLError(err);
cl::Buffer ABuffer(*context, CL_MEM_READ_ONLY, sizeof(float) * n * m, NULL, &err);
CheckCLError(err);
queue->enqueueWriteBuffer(ABuffer, true, 0, sizeof(float) * n * m, A);
cl::Buffer XBuffer(*context, CL_MEM_READ_ONLY, sizeof(float) * m, NULL, &err);
CheckCLError(err);
queue->enqueueWriteBuffer(XBuffer, true, 0, sizeof(float) * m, x);
cl::Buffer YBuffer(*context, CL_MEM_WRITE_ONLY, sizeof(float) * n, NULL, &err);
CheckCLError(err);
queue->enqueueWriteBuffer(YBuffer, true, 0, sizeof(float) * n, y);
cl::Buffer BBuffer(*context, CL_MEM_READ_ONLY, sizeof(float) * n, NULL, &err);
CheckCLError(err);
queue->enqueueWriteBuffer(BBuffer, true, 0, sizeof(float) * n, b);
//void largeMV(const int n, const int m, __global float* y, __global float* A,
// __global float* x, __global float* b, const int T, const int Z, __local float* Q)
int T = 2; // kimenet T hosszu darabokra
int Z = 2; // Bemenet Z hosszu darabokra
kernel.setArg(0, n);
kernel.setArg(1, m);
kernel.setArg(2, YBuffer);
kernel.setArg(3, ABuffer);
kernel.setArg(4, XBuffer);
kernel.setArg(5, BBuffer);
kernel.setArg(6, T);
kernel.setArg(7, Z);
kernel.setArg(8, sizeof(float) * T * Z, NULL);
queue->enqueueNDRangeKernel(kernel, cl::NullRange, cl::NDRange(Z*T, 1), cl::NDRange(T * Z, 1), NULL, &_event);
_event.wait();
queue->enqueueReadBuffer(YBuffer, true, 0, sizeof(float) * n, y);
} }

3
Linear/Linear.cpp

@ -60,8 +60,9 @@ int main()
capi(); capi();
//cppapi(); //cppapi();
OpenCLHandler handler("../kernels/linear.cl"); OpenCLHandler handler("../kernels/linear.cl");
Jacobi j(MVType::ReduceMV); //Jacobi j(MVType::ReduceMV);
//Jacobi j(MVType::SimpleMV); //Jacobi j(MVType::SimpleMV);
Jacobi j(MVType::LargeMV);
handler.run_test(&j); handler.run_test(&j);
return 0; return 0;
} }

36
kernels/linear.cl

@ -74,7 +74,43 @@ void reduceMV(const int n, const int M, __global float* y, __global float* A, __
// END FOR // END FOR
__kernel __kernel
void largeMV(const int n, const int m, __global float* y, __global float* A, __global float* x, __global float* b, const int T, const int Z, __local float* Q){ void largeMV(const int n, const int m, __global float* y, __global float* A, __global float* x, __global float* b, const int T, const int Z, __local float* Q){
int id = get_local_id(0);
// WG size: T*Z 4
// ID 2
int t = get_local_id(0) / T; // sor 1
int z = get_local_id(0) % Z; // oszlop 0
for (size_t i = 0; i < m; i += Z) { // sor kezdete ahol a modul van
if (z == 0) {
y[i + t] = 0;
}
Q[id] = 0;
for (size_t j = 0; j < n; j += T) { // oszlop kezdete ahol a modul van
//Q[t * Z + z] += A[(j + z) + (i + t) * m] * x[i + t];
Q[t * Z + z] = A[(j + z) + (i + t) * m] * x[i + t];
barrier(CLK_LOCAL_MEM_FENCE);
for (size_t s = Z / 2; s > 0; s >>= 1) {
if (t < s) {
Q[t * Z + z] = Q[t * Z + z] + Q[t * Z + z + s];
}
barrier(CLK_LOCAL_MEM_FENCE);
}
//if (z == 0) {
// for (size_t zz = 1; zz < Z; zz++) {
// Q[t * Z] += Q[t * Z + zz];
// }
//}
if (z == 0) {
y[i + t] += Q[t * Z + 0];
}
}
if (z == 0) {
y[i + t] += b[i + t];
}
}
} }
// TODO: Gaussian elimination as shown in the lecture // TODO: Gaussian elimination as shown in the lecture

Loading…
Cancel
Save