11 changed files with 354 additions and 13328 deletions
@ -1,203 +0,0 @@ |
|||||
#pragma once |
|
||||
|
|
||||
#include <cstdio> |
|
||||
#include <cstdlib> |
|
||||
#include <iostream> |
|
||||
#include <fstream> |
|
||||
#include <sstream> |
|
||||
|
|
||||
#include "cl.hpp" |
|
||||
|
|
||||
#pragma warning( disable : 4996 ) |
|
||||
|
|
||||
void printTimeStats(cl_event event) |
|
||||
|
|
||||
{ |
|
||||
|
|
||||
cl_int err = CL_SUCCESS; |
|
||||
|
|
||||
if(event == NULL) |
|
||||
|
|
||||
{ |
|
||||
|
|
||||
std::cerr << "No event object returned!" << std::endl; |
|
||||
|
|
||||
} |
|
||||
|
|
||||
else |
|
||||
|
|
||||
{ |
|
||||
|
|
||||
clWaitForEvents(1, &event); |
|
||||
|
|
||||
} |
|
||||
|
|
||||
cl_ulong execStart, execEnd; |
|
||||
|
|
||||
err = clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_START, |
|
||||
|
|
||||
sizeof(cl_ulong), &execStart, NULL); |
|
||||
|
|
||||
if(err != CL_SUCCESS) |
|
||||
|
|
||||
{ |
|
||||
|
|
||||
std::cerr << "Error during profile query: CL_PROFILING_COMMAND_START [" << err << "]." << std::endl; |
|
||||
|
|
||||
} |
|
||||
|
|
||||
|
|
||||
err = clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_END, |
|
||||
|
|
||||
sizeof(cl_ulong), &execEnd, NULL); |
|
||||
|
|
||||
if(err != CL_SUCCESS) |
|
||||
|
|
||||
{ |
|
||||
|
|
||||
std::cerr << "Error during profile query: CL_PROFILING_COMMAND_END [" << err << "]." << std::endl; |
|
||||
|
|
||||
} |
|
||||
|
|
||||
std::cout << "[start] " << execStart << " [end] " << execEnd << " [time] " << (execEnd - execStart) / 1e+06 << "ms." << std::endl; |
|
||||
|
|
||||
} |
|
||||
|
|
||||
|
|
||||
void WriteTGA_RGB(const char* filename, unsigned char* data, unsigned int width, unsigned int height) |
|
||||
{ |
|
||||
FILE *f = fopen(filename, "wb"); |
|
||||
if (!f) { |
|
||||
fprintf(stderr, "Unable to create output TGA image `%s'\n", filename); |
|
||||
exit(EXIT_FAILURE); |
|
||||
} |
|
||||
|
|
||||
fputc(0x00, f); /* ID Length, 0 => No ID */ |
|
||||
fputc(0x00, f); /* Color Map Type, 0 => No color map included */ |
|
||||
fputc(0x02, f); /* Image Type, 2 => Uncompressed, True-color Image */ |
|
||||
fputc(0x00, f); /* Next five bytes are about the color map entries */ |
|
||||
fputc(0x00, f); /* 2 bytes Index, 2 bytes length, 1 byte size */ |
|
||||
fputc(0x00, f); |
|
||||
fputc(0x00, f); |
|
||||
fputc(0x00, f); |
|
||||
fputc(0x00, f); /* X-origin of Image */ |
|
||||
fputc(0x00, f); |
|
||||
fputc(0x00, f); /* Y-origin of Image */ |
|
||||
fputc(0x00, f); |
|
||||
fputc(width & 0xff, f); /* Image Width */ |
|
||||
fputc((width >> 8) & 0xff, f); |
|
||||
fputc(height & 0xff, f); /* Image Height */ |
|
||||
fputc((height >> 8) & 0xff, f); |
|
||||
fputc(0x18, f); /* Pixel Depth, 0x18 => 24 Bits */ |
|
||||
fputc(0x20, f); /* Image Descriptor */ |
|
||||
|
|
||||
for (int y = height - 1; y >= 0; y--) { |
|
||||
for (size_t x = 0; x < width; x++) { |
|
||||
const size_t i = (y * width + x) * 3; |
|
||||
fputc(data[i + 2], f); /* write blue */ |
|
||||
fputc(data[i + 1], f); /* write green */ |
|
||||
fputc(data[i], f); /* write red */ |
|
||||
} |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
std::string FileToString(const std::string& path) { |
|
||||
std::ifstream file(path, std::ios::in | std::ios::binary); |
|
||||
if (file) |
|
||||
{ |
|
||||
std::ostringstream contents; |
|
||||
contents << file.rdbuf(); |
|
||||
file.close(); |
|
||||
return(contents.str()); |
|
||||
} |
|
||||
return std::string(); |
|
||||
|
|
||||
} |
|
||||
|
|
||||
const char *getErrorString(cl_int error) |
|
||||
{ |
|
||||
switch (error) { |
|
||||
// run-time and JIT compiler errors
|
|
||||
case 0: return "CL_SUCCESS"; |
|
||||
case -1: return "CL_DEVICE_NOT_FOUND"; |
|
||||
case -2: return "CL_DEVICE_NOT_AVAILABLE"; |
|
||||
case -3: return "CL_COMPILER_NOT_AVAILABLE"; |
|
||||
case -4: return "CL_MEM_OBJECT_ALLOCATION_FAILURE"; |
|
||||
case -5: return "CL_OUT_OF_RESOURCES"; |
|
||||
case -6: return "CL_OUT_OF_HOST_MEMORY"; |
|
||||
case -7: return "CL_PROFILING_INFO_NOT_AVAILABLE"; |
|
||||
case -8: return "CL_MEM_COPY_OVERLAP"; |
|
||||
case -9: return "CL_IMAGE_FORMAT_MISMATCH"; |
|
||||
case -10: return "CL_IMAGE_FORMAT_NOT_SUPPORTED"; |
|
||||
case -11: return "CL_BUILD_PROGRAM_FAILURE"; |
|
||||
case -12: return "CL_MAP_FAILURE"; |
|
||||
case -13: return "CL_MISALIGNED_SUB_BUFFER_OFFSET"; |
|
||||
case -14: return "CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST"; |
|
||||
case -15: return "CL_COMPILE_PROGRAM_FAILURE"; |
|
||||
case -16: return "CL_LINKER_NOT_AVAILABLE"; |
|
||||
case -17: return "CL_LINK_PROGRAM_FAILURE"; |
|
||||
case -18: return "CL_DEVICE_PARTITION_FAILED"; |
|
||||
case -19: return "CL_KERNEL_ARG_INFO_NOT_AVAILABLE"; |
|
||||
|
|
||||
// compile-time errors
|
|
||||
case -30: return "CL_INVALID_VALUE"; |
|
||||
case -31: return "CL_INVALID_DEVICE_TYPE"; |
|
||||
case -32: return "CL_INVALID_PLATFORM"; |
|
||||
case -33: return "CL_INVALID_DEVICE"; |
|
||||
case -34: return "CL_INVALID_CONTEXT"; |
|
||||
case -35: return "CL_INVALID_QUEUE_PROPERTIES"; |
|
||||
case -36: return "CL_INVALID_COMMAND_QUEUE"; |
|
||||
case -37: return "CL_INVALID_HOST_PTR"; |
|
||||
case -38: return "CL_INVALID_MEM_OBJECT"; |
|
||||
case -39: return "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR"; |
|
||||
case -40: return "CL_INVALID_IMAGE_SIZE"; |
|
||||
case -41: return "CL_INVALID_SAMPLER"; |
|
||||
case -42: return "CL_INVALID_BINARY"; |
|
||||
case -43: return "CL_INVALID_BUILD_OPTIONS"; |
|
||||
case -44: return "CL_INVALID_PROGRAM"; |
|
||||
case -45: return "CL_INVALID_PROGRAM_EXECUTABLE"; |
|
||||
case -46: return "CL_INVALID_KERNEL_NAME"; |
|
||||
case -47: return "CL_INVALID_KERNEL_DEFINITION"; |
|
||||
case -48: return "CL_INVALID_KERNEL"; |
|
||||
case -49: return "CL_INVALID_ARG_INDEX"; |
|
||||
case -50: return "CL_INVALID_ARG_VALUE"; |
|
||||
case -51: return "CL_INVALID_ARG_SIZE"; |
|
||||
case -52: return "CL_INVALID_KERNEL_ARGS"; |
|
||||
case -53: return "CL_INVALID_WORK_DIMENSION"; |
|
||||
case -54: return "CL_INVALID_WORK_GROUP_SIZE"; |
|
||||
case -55: return "CL_INVALID_WORK_ITEM_SIZE"; |
|
||||
case -56: return "CL_INVALID_GLOBAL_OFFSET"; |
|
||||
case -57: return "CL_INVALID_EVENT_WAIT_LIST"; |
|
||||
case -58: return "CL_INVALID_EVENT"; |
|
||||
case -59: return "CL_INVALID_OPERATION"; |
|
||||
case -60: return "CL_INVALID_GL_OBJECT"; |
|
||||
case -61: return "CL_INVALID_BUFFER_SIZE"; |
|
||||
case -62: return "CL_INVALID_MIP_LEVEL"; |
|
||||
case -63: return "CL_INVALID_GLOBAL_WORK_SIZE"; |
|
||||
case -64: return "CL_INVALID_PROPERTY"; |
|
||||
case -65: return "CL_INVALID_IMAGE_DESCRIPTOR"; |
|
||||
case -66: return "CL_INVALID_COMPILER_OPTIONS"; |
|
||||
case -67: return "CL_INVALID_LINKER_OPTIONS"; |
|
||||
case -68: return "CL_INVALID_DEVICE_PARTITION_COUNT"; |
|
||||
|
|
||||
// extension errors
|
|
||||
case -1000: return "CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR"; |
|
||||
case -1001: return "CL_PLATFORM_NOT_FOUND_KHR"; |
|
||||
case -1002: return "CL_INVALID_D3D10_DEVICE_KHR"; |
|
||||
case -1003: return "CL_INVALID_D3D10_RESOURCE_KHR"; |
|
||||
case -1004: return "CL_D3D10_RESOURCE_ALREADY_ACQUIRED_KHR"; |
|
||||
case -1005: return "CL_D3D10_RESOURCE_NOT_ACQUIRED_KHR"; |
|
||||
default: return "Unknown OpenCL error"; |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
bool CheckCLError(cl_int err) |
|
||||
{ |
|
||||
if(err != CL_SUCCESS) |
|
||||
{ |
|
||||
std::cout << "OpenCL error: " << getErrorString(err) << std::endl; |
|
||||
return false; |
|
||||
} |
|
||||
|
|
||||
return true; |
|
||||
} |
|
@ -0,0 +1,145 @@ |
|||||
|
#include "LinearTests.h" |
||||
|
#include <iostream> |
||||
|
|
||||
|
void Jacobi::generateLinEq() |
||||
|
{ |
||||
|
Jx_c[0] = new float[Jn]; |
||||
|
Jx_c[1] = new float[Jn]; |
||||
|
Jx_g[0] = new float[Jn]; |
||||
|
Jx_g[1] = new float[Jn]; |
||||
|
|
||||
|
for (int i = 0; i < Jn; ++i) { |
||||
|
Jx_c[0][i] = 0.0f; |
||||
|
Jx_c[1][i] = 0.0f; |
||||
|
Jx_g[0][i] = 0.0f; |
||||
|
Jx_g[1][i] = 0.0f; |
||||
|
} |
||||
|
|
||||
|
JA_c = new float[Jn * Jn]; |
||||
|
JA_g = new float[Jn * Jn]; |
||||
|
for (int i = 0; i < Jn; ++i) { |
||||
|
for (int j = 0; j < Jn; ++j) { |
||||
|
float v = 0.0f; |
||||
|
if (i == j) { |
||||
|
v = 0.5f; |
||||
|
} |
||||
|
JA_c[i + j * Jn] = v; |
||||
|
JA_g[i + j * Jn] = v; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
Jb_c = new float[Jn]; |
||||
|
Jb_g = new float[Jn]; |
||||
|
for (int i = 0; i < Jn; ++i) { |
||||
|
Jb_c[i] = 1.0f; |
||||
|
Jb_g[i] = 1.0f; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
void Jacobi::cpuScalarMV(int n, int m, float* y, const float* A, const float* x, const float* b) |
||||
|
{ |
||||
|
for (int i = 0; i < n; ++i) { |
||||
|
float yi = b[i]; |
||||
|
for (int j = 0; j < m; ++j) { |
||||
|
yi += A[i * m + j] * x[j]; |
||||
|
} |
||||
|
y[i] = yi; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
void Jacobi::printMatrix(int n, int m, float* A) |
||||
|
{ |
||||
|
for (int i = 0; i < n; ++i) { |
||||
|
for (int j = 0; j < m; ++j) { |
||||
|
std::cout << A[j + i * n]; |
||||
|
if (j < m - 1) std::cout << ", "; |
||||
|
} |
||||
|
std::cout << std::endl; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
MatrixVectorMultiplier* Jacobi::MethodFactory(MVType type, cl::Context* context, cl::CommandQueue* queue, cl::Program* program) |
||||
|
{ |
||||
|
if (type == MVType::SimpleMV) { |
||||
|
return new Simple(context, queue, program); |
||||
|
} |
||||
|
else if (type == MVType::ReduceMV) { |
||||
|
return new Reduce(context, queue, program); |
||||
|
} |
||||
|
else if (type == MVType::LargeMV) { |
||||
|
return new Large(context, queue, program); |
||||
|
} |
||||
|
else { |
||||
|
return NULL; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
Jacobi::Jacobi(MVType _type) |
||||
|
{ |
||||
|
type = _type; |
||||
|
generateLinEq(); |
||||
|
} |
||||
|
|
||||
|
void Jacobi::collect_results(cl::CommandQueue* queue) |
||||
|
{ |
||||
|
} |
||||
|
|
||||
|
void Jacobi::gpu_compute(cl::Context* context, cl::CommandQueue* queue, cl::Program* program, cl::Event* Event) |
||||
|
{ |
||||
|
MatrixVectorMultiplier* MVMultiplier = MethodFactory(type, context, queue, program); |
||||
|
|
||||
|
if (MVMultiplier != NULL) { |
||||
|
int inputBuffer = 0; |
||||
|
const int iterations = 20; |
||||
|
for (int i = 0; i < iterations; ++i) { |
||||
|
MVMultiplier->dewIt(Jn, Jn, Jx_g[(inputBuffer + 1) % 2], JA_g, Jx_g[inputBuffer], Jb_g); |
||||
|
printMatrix(1, Jn, Jx_g[inputBuffer]); |
||||
|
inputBuffer = (inputBuffer + 1) % 2; |
||||
|
} |
||||
|
} |
||||
|
else { |
||||
|
std::cout << "Invalid factory parameter" << std::endl; |
||||
|
exit(-1); |
||||
|
} |
||||
|
|
||||
|
|
||||
|
} |
||||
|
|
||||
|
void Jacobi::cpu_compute() |
||||
|
{ |
||||
|
int inputBuffer = 0; |
||||
|
const int iterations = 20; |
||||
|
for (int i = 0; i < iterations; ++i) { |
||||
|
cpuScalarMV(Jn, Jn, Jx_c[(inputBuffer + 1) % 2], JA_c, Jx_c[inputBuffer], Jb_c); |
||||
|
//printMatrix(1, Jn, Jx_c[inputBuffer + 1]);
|
||||
|
inputBuffer = (inputBuffer + 1) % 2; |
||||
|
} |
||||
|
|
||||
|
} |
||||
|
|
||||
|
bool Jacobi::validate_results() |
||||
|
{ |
||||
|
bool result = true; |
||||
|
// Actual validation
|
||||
|
//printMatrix(Jn, Jn, JA_c);
|
||||
|
//printMatrix(1, Jn, Jx_c[0]);
|
||||
|
//printMatrix(1, Jn, Jx_c[1]);
|
||||
|
|
||||
|
// Cleanup
|
||||
|
if (Jx_c[0] == 0) delete[] Jx_c[0]; |
||||
|
if (Jx_c[1] == 0) delete[] Jx_c[1]; |
||||
|
if (Jx_g[0] == 0) delete[] Jx_g[0]; |
||||
|
if (Jx_g[1] == 0) delete[] Jx_g[1]; |
||||
|
|
||||
|
if (JA_c == 0) delete[] JA_c; |
||||
|
if (Jb_c == 0) delete[] Jb_c; |
||||
|
if (JA_g == 0) delete[] JA_g; |
||||
|
if (Jb_g == 0) delete[] Jb_g; |
||||
|
|
||||
|
return result; |
||||
|
} |
||||
|
|
||||
|
std::string Jacobi::description() |
||||
|
{ |
||||
|
return std::string(); |
||||
|
} |
@ -0,0 +1,11 @@ |
|||||
|
#include "LinearTests.h" |
||||
|
|
||||
|
Large::Large(cl::Context* context, cl::CommandQueue* queue, cl::Program* program) |
||||
|
{ |
||||
|
//TODO: Implement
|
||||
|
} |
||||
|
|
||||
|
void Large::dewIt(int n, int m, float* y, const float* A, const float* x, const float* b) |
||||
|
{ |
||||
|
//TODO: Implement
|
||||
|
} |
@ -0,0 +1,62 @@ |
|||||
|
#pragma once |
||||
|
|
||||
|
#include "Tests.h" |
||||
|
|
||||
|
|
||||
|
enum class MVType { |
||||
|
SimpleMV, ReduceMV, LargeMV |
||||
|
}; |
||||
|
|
||||
|
class MatrixVectorMultiplier { |
||||
|
protected: |
||||
|
cl::Context* context; |
||||
|
cl::CommandQueue* queue; |
||||
|
cl::Program* program; |
||||
|
public: |
||||
|
virtual void dewIt(int n, int m, float* y, const float* A, const float* x, const float* b) = 0; |
||||
|
}; |
||||
|
|
||||
|
class Jacobi : public TestCase { |
||||
|
private: |
||||
|
const int Jn = 8; |
||||
|
// CPU
|
||||
|
float* Jx_c[2] = { NULL, NULL }; |
||||
|
float* JA_c = NULL; |
||||
|
float* Jb_c = NULL; |
||||
|
// GPU
|
||||
|
float* Jx_g[2] = { NULL, NULL }; |
||||
|
float* JA_g = NULL; |
||||
|
float* Jb_g = NULL; |
||||
|
|
||||
|
MVType type; |
||||
|
|
||||
|
void generateLinEq(); |
||||
|
void cpuScalarMV(int n, int m, float* y, const float* A, const float* x, const float* b); |
||||
|
void printMatrix(int n, int m, float* A); |
||||
|
MatrixVectorMultiplier* MethodFactory(MVType type, cl::Context* context, cl::CommandQueue* queue, cl::Program* program); |
||||
|
public: |
||||
|
Jacobi(MVType type); |
||||
|
void collect_results(cl::CommandQueue* queue); |
||||
|
void gpu_compute(cl::Context* context, cl::CommandQueue* queue, cl::Program* program, cl::Event* Event); |
||||
|
void cpu_compute(); |
||||
|
bool validate_results(); |
||||
|
std::string description(); |
||||
|
}; |
||||
|
|
||||
|
class Simple : public MatrixVectorMultiplier { |
||||
|
public: |
||||
|
Simple(cl::Context* context, cl::CommandQueue* queue, cl::Program* program); |
||||
|
void dewIt(int n, int m, float* y, const float* A, const float* x, const float* b); |
||||
|
}; |
||||
|
|
||||
|
class Reduce : public MatrixVectorMultiplier { |
||||
|
public: |
||||
|
Reduce(cl::Context* context, cl::CommandQueue* queue, cl::Program* program); |
||||
|
void dewIt(int n, int m, float* y, const float* A, const float* x, const float* b); |
||||
|
}; |
||||
|
|
||||
|
class Large : public MatrixVectorMultiplier { |
||||
|
public: |
||||
|
Large(cl::Context* context, cl::CommandQueue* queue, cl::Program* program); |
||||
|
void dewIt(int n, int m, float* y, const float* A, const float* x, const float* b); |
||||
|
}; |
@ -0,0 +1,11 @@ |
|||||
|
#include "LinearTests.h" |
||||
|
|
||||
|
Reduce::Reduce(cl::Context* context, cl::CommandQueue* queue, cl::Program* program) |
||||
|
{ |
||||
|
//TODO: Implement
|
||||
|
} |
||||
|
|
||||
|
void Reduce::dewIt(int n, int m, float* y, const float* A, const float* x, const float* b) |
||||
|
{ |
||||
|
//TODO: Implement
|
||||
|
} |
@ -0,0 +1,51 @@ |
|||||
|
#include "LinearTests.h" |
||||
|
#include <Common.h> |
||||
|
|
||||
|
Simple::Simple(cl::Context* _context, cl::CommandQueue* _queue, cl::Program* _program) |
||||
|
{ |
||||
|
context = _context; |
||||
|
queue = _queue; |
||||
|
program = _program; |
||||
|
} |
||||
|
|
||||
|
void Simple::dewIt(int n, int m, float* y, const float* A, const float* x, const float* b) |
||||
|
{ |
||||
|
cl_int err = CL_SUCCESS; |
||||
|
cl::Event _event; |
||||
|
|
||||
|
cl::Kernel kernel = cl::Kernel(*program, "simpleMV", &err); |
||||
|
CheckCLError(err); |
||||
|
|
||||
|
cl::Buffer ABuffer(*context, CL_MEM_READ_ONLY, sizeof(float) * n *m, NULL, &err); |
||||
|
CheckCLError(err); |
||||
|
queue->enqueueWriteBuffer(ABuffer, true, 0, sizeof(float) * n *m, A); |
||||
|
|
||||
|
cl::Buffer XBuffer(*context, CL_MEM_READ_ONLY, sizeof(float) * m, NULL, &err); // Ot kell kiirogatni
|
||||
|
CheckCLError(err); |
||||
|
queue->enqueueWriteBuffer(XBuffer, true, 0, sizeof(float) * m, x); |
||||
|
|
||||
|
cl::Buffer YBuffer(*context, CL_MEM_WRITE_ONLY, sizeof(float) * n, NULL, &err); |
||||
|
CheckCLError(err); |
||||
|
queue->enqueueWriteBuffer(YBuffer, true, 0, sizeof(float) * n, y); |
||||
|
|
||||
|
cl::Buffer BBuffer(*context, CL_MEM_READ_ONLY, sizeof(float) * n, NULL, &err); |
||||
|
CheckCLError(err); |
||||
|
queue->enqueueWriteBuffer(BBuffer, true, 0, sizeof(float) * n, b); |
||||
|
|
||||
|
kernel.setArg(0, n); |
||||
|
kernel.setArg(1, m); |
||||
|
kernel.setArg(2, YBuffer); |
||||
|
kernel.setArg(3, ABuffer); |
||||
|
kernel.setArg(4, XBuffer); |
||||
|
kernel.setArg(5, BBuffer); |
||||
|
|
||||
|
queue->enqueueNDRangeKernel(kernel, |
||||
|
cl::NullRange, // Indexek nem eloffszetelve
|
||||
|
cl::NDRange(n, 1), // Minden elemet egy szál
|
||||
|
cl::NullRange, // Workgroup méret? - ez az auto, ha nem indul, 1024-re, onnan csökkent, amig elindul
|
||||
|
NULL, //
|
||||
|
&_event); |
||||
|
_event.wait(); |
||||
|
|
||||
|
queue->enqueueReadBuffer(YBuffer, true, 0, sizeof(int) * n, y); |
||||
|
} |
File diff suppressed because it is too large
Loading…
Reference in new issue