Browse Source

Reformat test framework

master
trinitas 3 years ago
parent
commit
2c7fc739ba
  1. 204
      Primitives/Common.cpp
  2. 205
      Primitives/Common.h
  3. 51
      Primitives/OpenCLHandler.cpp
  4. 13
      Primitives/OpenCLHandler.h
  5. 305
      Primitives/Primitives.cpp
  6. 5
      Primitives/Primitives.vcxproj
  7. 15
      Primitives/Primitives.vcxproj.filters
  8. 61
      Primitives/Square.cpp
  9. 36
      Primitives/Tests.h

204
Primitives/Common.cpp

@ -0,0 +1,204 @@
#include "Common.h"
#include <cstdio>
#include <cstdlib>
#include <iostream>
#include <fstream>
#include <sstream>
#include "cl.hpp"
#pragma warning( disable : 4996 )
void printTimeStats(cl_event event)
{
cl_int err = CL_SUCCESS;
if(event == NULL)
{
std::cerr << "No event object returned!" << std::endl;
}
else
{
clWaitForEvents(1, &event);
}
cl_ulong execStart, execEnd;
err = clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_START,
sizeof(cl_ulong), &execStart, NULL);
if(err != CL_SUCCESS)
{
std::cerr << "Error during profile query: CL_PROFILING_COMMAND_START [" << err << "]." << std::endl;
}
err = clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_END,
sizeof(cl_ulong), &execEnd, NULL);
if(err != CL_SUCCESS)
{
std::cerr << "Error during profile query: CL_PROFILING_COMMAND_END [" << err << "]." << std::endl;
}
std::cout << "[start] " << execStart << " [end] " << execEnd << " [time] " << (execEnd - execStart) / 1e+06 << "ms." << std::endl;
}
void WriteTGA_RGB(const char* filename, unsigned char* data, unsigned int width, unsigned int height)
{
FILE *f = fopen(filename, "wb");
if (!f) {
fprintf(stderr, "Unable to create output TGA image `%s'\n", filename);
exit(EXIT_FAILURE);
}
fputc(0x00, f); /* ID Length, 0 => No ID */
fputc(0x00, f); /* Color Map Type, 0 => No color map included */
fputc(0x02, f); /* Image Type, 2 => Uncompressed, True-color Image */
fputc(0x00, f); /* Next five bytes are about the color map entries */
fputc(0x00, f); /* 2 bytes Index, 2 bytes length, 1 byte size */
fputc(0x00, f);
fputc(0x00, f);
fputc(0x00, f);
fputc(0x00, f); /* X-origin of Image */
fputc(0x00, f);
fputc(0x00, f); /* Y-origin of Image */
fputc(0x00, f);
fputc(width & 0xff, f); /* Image Width */
fputc((width >> 8) & 0xff, f);
fputc(height & 0xff, f); /* Image Height */
fputc((height >> 8) & 0xff, f);
fputc(0x18, f); /* Pixel Depth, 0x18 => 24 Bits */
fputc(0x20, f); /* Image Descriptor */
for (int y = height - 1; y >= 0; y--) {
for (size_t x = 0; x < width; x++) {
const size_t i = (y * width + x) * 3;
fputc(data[i + 2], f); /* write blue */
fputc(data[i + 1], f); /* write green */
fputc(data[i], f); /* write red */
}
}
}
std::string FileToString(const std::string& path) {
std::ifstream file(path, std::ios::in | std::ios::binary);
if (file)
{
std::ostringstream contents;
contents << file.rdbuf();
file.close();
return(contents.str());
}
return std::string();
}
const char *getErrorString(cl_int error)
{
switch (error) {
// run-time and JIT compiler errors
case 0: return "CL_SUCCESS";
case -1: return "CL_DEVICE_NOT_FOUND";
case -2: return "CL_DEVICE_NOT_AVAILABLE";
case -3: return "CL_COMPILER_NOT_AVAILABLE";
case -4: return "CL_MEM_OBJECT_ALLOCATION_FAILURE";
case -5: return "CL_OUT_OF_RESOURCES";
case -6: return "CL_OUT_OF_HOST_MEMORY";
case -7: return "CL_PROFILING_INFO_NOT_AVAILABLE";
case -8: return "CL_MEM_COPY_OVERLAP";
case -9: return "CL_IMAGE_FORMAT_MISMATCH";
case -10: return "CL_IMAGE_FORMAT_NOT_SUPPORTED";
case -11: return "CL_BUILD_PROGRAM_FAILURE";
case -12: return "CL_MAP_FAILURE";
case -13: return "CL_MISALIGNED_SUB_BUFFER_OFFSET";
case -14: return "CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST";
case -15: return "CL_COMPILE_PROGRAM_FAILURE";
case -16: return "CL_LINKER_NOT_AVAILABLE";
case -17: return "CL_LINK_PROGRAM_FAILURE";
case -18: return "CL_DEVICE_PARTITION_FAILED";
case -19: return "CL_KERNEL_ARG_INFO_NOT_AVAILABLE";
// compile-time errors
case -30: return "CL_INVALID_VALUE";
case -31: return "CL_INVALID_DEVICE_TYPE";
case -32: return "CL_INVALID_PLATFORM";
case -33: return "CL_INVALID_DEVICE";
case -34: return "CL_INVALID_CONTEXT";
case -35: return "CL_INVALID_QUEUE_PROPERTIES";
case -36: return "CL_INVALID_COMMAND_QUEUE";
case -37: return "CL_INVALID_HOST_PTR";
case -38: return "CL_INVALID_MEM_OBJECT";
case -39: return "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR";
case -40: return "CL_INVALID_IMAGE_SIZE";
case -41: return "CL_INVALID_SAMPLER";
case -42: return "CL_INVALID_BINARY";
case -43: return "CL_INVALID_BUILD_OPTIONS";
case -44: return "CL_INVALID_PROGRAM";
case -45: return "CL_INVALID_PROGRAM_EXECUTABLE";
case -46: return "CL_INVALID_KERNEL_NAME";
case -47: return "CL_INVALID_KERNEL_DEFINITION";
case -48: return "CL_INVALID_KERNEL";
case -49: return "CL_INVALID_ARG_INDEX";
case -50: return "CL_INVALID_ARG_VALUE";
case -51: return "CL_INVALID_ARG_SIZE";
case -52: return "CL_INVALID_KERNEL_ARGS";
case -53: return "CL_INVALID_WORK_DIMENSION";
case -54: return "CL_INVALID_WORK_GROUP_SIZE";
case -55: return "CL_INVALID_WORK_ITEM_SIZE";
case -56: return "CL_INVALID_GLOBAL_OFFSET";
case -57: return "CL_INVALID_EVENT_WAIT_LIST";
case -58: return "CL_INVALID_EVENT";
case -59: return "CL_INVALID_OPERATION";
case -60: return "CL_INVALID_GL_OBJECT";
case -61: return "CL_INVALID_BUFFER_SIZE";
case -62: return "CL_INVALID_MIP_LEVEL";
case -63: return "CL_INVALID_GLOBAL_WORK_SIZE";
case -64: return "CL_INVALID_PROPERTY";
case -65: return "CL_INVALID_IMAGE_DESCRIPTOR";
case -66: return "CL_INVALID_COMPILER_OPTIONS";
case -67: return "CL_INVALID_LINKER_OPTIONS";
case -68: return "CL_INVALID_DEVICE_PARTITION_COUNT";
// extension errors
case -1000: return "CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR";
case -1001: return "CL_PLATFORM_NOT_FOUND_KHR";
case -1002: return "CL_INVALID_D3D10_DEVICE_KHR";
case -1003: return "CL_INVALID_D3D10_RESOURCE_KHR";
case -1004: return "CL_D3D10_RESOURCE_ALREADY_ACQUIRED_KHR";
case -1005: return "CL_D3D10_RESOURCE_NOT_ACQUIRED_KHR";
default: return "Unknown OpenCL error";
}
}
bool CheckCLError(cl_int err)
{
if(err != CL_SUCCESS)
{
std::cout << "OpenCL error: " << getErrorString(err) << std::endl;
return false;
}
return true;
}

205
Primitives/Common.h

@ -1,203 +1,8 @@
#pragma once
#include <cstdio>
#include <cstdlib>
#include <iostream>
#include <fstream>
#include <sstream>
#include "cl.hpp"
#pragma warning( disable : 4996 )
void printTimeStats(cl_event event)
{
cl_int err = CL_SUCCESS;
if(event == NULL)
{
std::cerr << "No event object returned!" << std::endl;
}
else
{
clWaitForEvents(1, &event);
}
cl_ulong execStart, execEnd;
err = clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_START,
sizeof(cl_ulong), &execStart, NULL);
if(err != CL_SUCCESS)
{
std::cerr << "Error during profile query: CL_PROFILING_COMMAND_START [" << err << "]." << std::endl;
}
err = clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_END,
sizeof(cl_ulong), &execEnd, NULL);
if(err != CL_SUCCESS)
{
std::cerr << "Error during profile query: CL_PROFILING_COMMAND_END [" << err << "]." << std::endl;
}
std::cout << "[start] " << execStart << " [end] " << execEnd << " [time] " << (execEnd - execStart) / 1e+06 << "ms." << std::endl;
}
void WriteTGA_RGB(const char* filename, unsigned char* data, unsigned int width, unsigned int height)
{
FILE *f = fopen(filename, "wb");
if (!f) {
fprintf(stderr, "Unable to create output TGA image `%s'\n", filename);
exit(EXIT_FAILURE);
}
fputc(0x00, f); /* ID Length, 0 => No ID */
fputc(0x00, f); /* Color Map Type, 0 => No color map included */
fputc(0x02, f); /* Image Type, 2 => Uncompressed, True-color Image */
fputc(0x00, f); /* Next five bytes are about the color map entries */
fputc(0x00, f); /* 2 bytes Index, 2 bytes length, 1 byte size */
fputc(0x00, f);
fputc(0x00, f);
fputc(0x00, f);
fputc(0x00, f); /* X-origin of Image */
fputc(0x00, f);
fputc(0x00, f); /* Y-origin of Image */
fputc(0x00, f);
fputc(width & 0xff, f); /* Image Width */
fputc((width >> 8) & 0xff, f);
fputc(height & 0xff, f); /* Image Height */
fputc((height >> 8) & 0xff, f);
fputc(0x18, f); /* Pixel Depth, 0x18 => 24 Bits */
fputc(0x20, f); /* Image Descriptor */
for (int y = height - 1; y >= 0; y--) {
for (size_t x = 0; x < width; x++) {
const size_t i = (y * width + x) * 3;
fputc(data[i + 2], f); /* write blue */
fputc(data[i + 1], f); /* write green */
fputc(data[i], f); /* write red */
}
}
}
std::string FileToString(const std::string& path) {
std::ifstream file(path, std::ios::in | std::ios::binary);
if (file)
{
std::ostringstream contents;
contents << file.rdbuf();
file.close();
return(contents.str());
}
return std::string();
}
const char *getErrorString(cl_int error)
{
switch (error) {
// run-time and JIT compiler errors
case 0: return "CL_SUCCESS";
case -1: return "CL_DEVICE_NOT_FOUND";
case -2: return "CL_DEVICE_NOT_AVAILABLE";
case -3: return "CL_COMPILER_NOT_AVAILABLE";
case -4: return "CL_MEM_OBJECT_ALLOCATION_FAILURE";
case -5: return "CL_OUT_OF_RESOURCES";
case -6: return "CL_OUT_OF_HOST_MEMORY";
case -7: return "CL_PROFILING_INFO_NOT_AVAILABLE";
case -8: return "CL_MEM_COPY_OVERLAP";
case -9: return "CL_IMAGE_FORMAT_MISMATCH";
case -10: return "CL_IMAGE_FORMAT_NOT_SUPPORTED";
case -11: return "CL_BUILD_PROGRAM_FAILURE";
case -12: return "CL_MAP_FAILURE";
case -13: return "CL_MISALIGNED_SUB_BUFFER_OFFSET";
case -14: return "CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST";
case -15: return "CL_COMPILE_PROGRAM_FAILURE";
case -16: return "CL_LINKER_NOT_AVAILABLE";
case -17: return "CL_LINK_PROGRAM_FAILURE";
case -18: return "CL_DEVICE_PARTITION_FAILED";
case -19: return "CL_KERNEL_ARG_INFO_NOT_AVAILABLE";
// compile-time errors
case -30: return "CL_INVALID_VALUE";
case -31: return "CL_INVALID_DEVICE_TYPE";
case -32: return "CL_INVALID_PLATFORM";
case -33: return "CL_INVALID_DEVICE";
case -34: return "CL_INVALID_CONTEXT";
case -35: return "CL_INVALID_QUEUE_PROPERTIES";
case -36: return "CL_INVALID_COMMAND_QUEUE";
case -37: return "CL_INVALID_HOST_PTR";
case -38: return "CL_INVALID_MEM_OBJECT";
case -39: return "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR";
case -40: return "CL_INVALID_IMAGE_SIZE";
case -41: return "CL_INVALID_SAMPLER";
case -42: return "CL_INVALID_BINARY";
case -43: return "CL_INVALID_BUILD_OPTIONS";
case -44: return "CL_INVALID_PROGRAM";
case -45: return "CL_INVALID_PROGRAM_EXECUTABLE";
case -46: return "CL_INVALID_KERNEL_NAME";
case -47: return "CL_INVALID_KERNEL_DEFINITION";
case -48: return "CL_INVALID_KERNEL";
case -49: return "CL_INVALID_ARG_INDEX";
case -50: return "CL_INVALID_ARG_VALUE";
case -51: return "CL_INVALID_ARG_SIZE";
case -52: return "CL_INVALID_KERNEL_ARGS";
case -53: return "CL_INVALID_WORK_DIMENSION";
case -54: return "CL_INVALID_WORK_GROUP_SIZE";
case -55: return "CL_INVALID_WORK_ITEM_SIZE";
case -56: return "CL_INVALID_GLOBAL_OFFSET";
case -57: return "CL_INVALID_EVENT_WAIT_LIST";
case -58: return "CL_INVALID_EVENT";
case -59: return "CL_INVALID_OPERATION";
case -60: return "CL_INVALID_GL_OBJECT";
case -61: return "CL_INVALID_BUFFER_SIZE";
case -62: return "CL_INVALID_MIP_LEVEL";
case -63: return "CL_INVALID_GLOBAL_WORK_SIZE";
case -64: return "CL_INVALID_PROPERTY";
case -65: return "CL_INVALID_IMAGE_DESCRIPTOR";
case -66: return "CL_INVALID_COMPILER_OPTIONS";
case -67: return "CL_INVALID_LINKER_OPTIONS";
case -68: return "CL_INVALID_DEVICE_PARTITION_COUNT";
// extension errors
case -1000: return "CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR";
case -1001: return "CL_PLATFORM_NOT_FOUND_KHR";
case -1002: return "CL_INVALID_D3D10_DEVICE_KHR";
case -1003: return "CL_INVALID_D3D10_RESOURCE_KHR";
case -1004: return "CL_D3D10_RESOURCE_ALREADY_ACQUIRED_KHR";
case -1005: return "CL_D3D10_RESOURCE_NOT_ACQUIRED_KHR";
default: return "Unknown OpenCL error";
}
}
bool CheckCLError(cl_int err)
{
if(err != CL_SUCCESS)
{
std::cout << "OpenCL error: " << getErrorString(err) << std::endl;
return false;
}
return true;
}
void printTimeStats(cl_event event);
void WriteTGA_RGB(const char* filename, unsigned char* data, unsigned int width, unsigned int height);
std::string FileToString(const std::string& path);
const char* getErrorString(cl_int error);
bool CheckCLError(cl_int err);

51
Primitives/OpenCLHandler.cpp

@ -0,0 +1,51 @@
#include "Common.h"
#include "OpenCLHandler.h"
#include <iostream>
OpenCLHandler::OpenCLHandler()
{
cl_int err = CL_SUCCESS;
// Get a platform ID
std::vector<cl::Platform> platforms;
cl::Platform::get(&platforms);
if (platforms.size() == 0)
{
std::cout << "Unable to find suitable platform." << std::endl;
exit(-1);
}
std::cout << platforms[0].getInfo<CL_PLATFORM_NAME>() << std::endl;
// Create a context
cl_context_properties properties[] =
{ CL_CONTEXT_PLATFORM, (cl_context_properties)(platforms[0])(), 0 };
context = cl::Context(CL_DEVICE_TYPE_GPU, properties);
// Enumerate the devices
std::vector<cl::Device> devices = context.getInfo<CL_CONTEXT_DEVICES>();
std::cout << devices[0].getInfo<CL_DEVICE_GLOBAL_MEM_SIZE>() << std::endl;
// Create the command queue
cl::Event event;
queue = cl::CommandQueue(context, devices[0], 0, &err);
// Create the OpenCL program
std::string programSource = FileToString("../kernels/programs.cl");
program = cl::Program(context, programSource);
program.build(devices);
}
bool OpenCLHandler::run_test(TestCase* test)
{
cl::Event event;
test->gpu_compute(&context, &queue, &program, &event);
test->cpu_compute();
event.wait();
test->collect_results(&queue);
return test->validate_results();
}

13
Primitives/OpenCLHandler.h

@ -0,0 +1,13 @@
#pragma once
#include "Tests.h"
class OpenCLHandler {
private:
cl::Program program;
cl::Context context;
cl::CommandQueue queue;
public:
OpenCLHandler();
bool run_test(TestCase* test);
};

305
Primitives/Primitives.cpp

@ -2,7 +2,10 @@
#include <string>
#include <vector>
#include <iostream>
#include "Common.h"
#include "OpenCLHandler.h"
#include "Tests.h"
// OpenCL C API
#include <CL/opencl.h>
@ -10,178 +13,150 @@
// OpenCL C++ API
#include "cl.hpp"
const size_t dataSize = 4096;
//const size_t dataSize = 4096;
//
//void capi()
//{
// // Get a platform ID
// cl_platform_id platformID;
// clGetPlatformIDs(1, &platformID, NULL);
//
// // Get a device ID
// cl_device_id deviceID;
// clGetDeviceIDs(platformID, CL_DEVICE_TYPE_GPU, 1, &deviceID, NULL);
//
// // Create a context
// cl_context context;
// cl_context_properties contextProperties[] =
// { CL_CONTEXT_PLATFORM, (cl_context_properties)platformID, 0 };
// context = clCreateContext(contextProperties, 1, &deviceID, NULL, NULL, NULL);
//
// // Create a command queue
// cl_command_queue queue;
// queue = clCreateCommandQueue(context, deviceID, CL_QUEUE_PROFILING_ENABLE, NULL);
//
// // Create an OpenCL program
// std::string source = FileToString("../kernels/programs.cl");
// const char* csource = source.c_str();
// cl_program program = clCreateProgramWithSource(context, 1, &csource, NULL, NULL);
// cl_int err = clBuildProgram(program, 1, &deviceID, NULL, NULL, NULL);
// if (err != CL_SUCCESS)
// {
// cl_uint logLength;
// clGetProgramBuildInfo(program, deviceID, CL_PROGRAM_BUILD_LOG, 0, NULL, &logLength);
// char* log = new char[logLength];
// clGetProgramBuildInfo(program, deviceID, CL_PROGRAM_BUILD_LOG, logLength, log, 0);
// std::cout << log << std::endl;
// delete[] log;
// exit(-1);
// }
//}
//void cppapi()
//{
// cl_int err = CL_SUCCESS;
//
// // Get a platform ID
// std::vector<cl::Platform> platforms;
// cl::Platform::get(&platforms);
// if (platforms.size() == 0)
// {
// std::cout << "Unable to find suitable platform." << std::endl;
// exit(-1);
// }
//
// std::cout << platforms[0].getInfo<CL_PLATFORM_NAME>() << std::endl;
//
// // Create a context
// cl_context_properties properties[] =
// { CL_CONTEXT_PLATFORM, (cl_context_properties)(platforms[0])(), 0 };
// cl::Context context(CL_DEVICE_TYPE_GPU, properties);
//
// // Enumerate the devices
// std::vector<cl::Device> devices = context.getInfo<CL_CONTEXT_DEVICES>();
// std::cout << devices[0].getInfo<CL_DEVICE_GLOBAL_MEM_SIZE>() << std::endl;
//
// // Create the command queue
// cl::Event event;
// cl::CommandQueue queue(context, devices[0], 0, &err);
//
// // Create the OpenCL program
// std::string programSource = FileToString("../kernels/programs.cl");
// cl::Program program = cl::Program(context, programSource);
// program.build(devices);
//
//
// // Get the kernel handle
// cl::Kernel kernel(program, "histogram_global", &err);
// CheckCLError(err);
//
// // Allocate and upload the input data
// std::vector<float> hostBuffer;
// for (size_t index = 0; index < dataSize; ++index)
// {
// hostBuffer.push_back(static_cast<float>(index % 32));
// }
//
// cl::Buffer clInputBuffer = cl::Buffer(context, CL_MEM_READ_ONLY, sizeof(float) * dataSize, NULL, &err);
// queue.enqueueWriteBuffer(clInputBuffer,
// true, // Blocking!
// 0, sizeof(float) * dataSize, hostBuffer.data());
//
// // Allocate the output data
// cl::Buffer clResultBuffer = cl::Buffer(context, CL_MEM_WRITE_ONLY, sizeof(float) * 32, NULL, &err);
//
// // Set the kernel parameters
// kernel.setArg(0, clInputBuffer); // kernel FV paraméterei sorrendben
// kernel.setArg(1, clResultBuffer);
//
// // Enqueue the kernel
// queue.enqueueNDRangeKernel(kernel,
// cl::NullRange, // Indexek nem eloffszetelve
// cl::NDRange(dataSize, 1), // Minden elemet egy szál
// cl::NullRange, // Workgroup méret? - ez az auto, ha nem indul, 1024-re, onnan csökkent, amig elindul
// NULL, //
// &event); // Õ jlezi hogy vége, lsd lent
//
// // Create reference values
// for (size_t index = 0; index < dataSize; ++index) {
// }
// event.wait();
//
// // Copy result back to host
// queue.enqueueReadBuffer(clResultBuffer, true, 0, sizeof(float) * 32, hostBuffer.data());
//
//// Validate the result
// for (size_t index = 0; index < 32; ++index)
// {
// if (hostBuffer[index] != index*index)
// {
// std::cout << "Wrong result at [" << index << "]: " << hostBuffer[index] << "!=" << index*index << std::endl;
// break;
// }
// }
// for (size_t index = 0; index < 32; ++index) {
// std::cout << index << ": " << hostBuffer[index] << std::endl;
// }
// std::cout << "Finished" << std::endl;
//}
void capi()
{
// Get a platform ID
cl_platform_id platformID;
clGetPlatformIDs(1, &platformID, NULL);
// Get a device ID
cl_device_id deviceID;
clGetDeviceIDs(platformID, CL_DEVICE_TYPE_GPU, 1, &deviceID, NULL);
// Create a context
cl_context context;
cl_context_properties contextProperties[] =
{ CL_CONTEXT_PLATFORM, (cl_context_properties)platformID, 0 };
context = clCreateContext(contextProperties, 1, &deviceID, NULL, NULL, NULL);
// Create a command queue
cl_command_queue queue;
queue = clCreateCommandQueue(context, deviceID, CL_QUEUE_PROFILING_ENABLE, NULL);
// Create an OpenCL program
std::string source = FileToString("../kernels/programs.cl");
const char* csource = source.c_str();
cl_program program = clCreateProgramWithSource(context, 1, &csource, NULL, NULL);
cl_int err = clBuildProgram(program, 1, &deviceID, NULL, NULL, NULL);
if (err != CL_SUCCESS)
{
cl_uint logLength;
clGetProgramBuildInfo(program, deviceID, CL_PROGRAM_BUILD_LOG, 0, NULL, &logLength);
char* log = new char[logLength];
clGetProgramBuildInfo(program, deviceID, CL_PROGRAM_BUILD_LOG, logLength, log, 0);
std::cout << log << std::endl;
delete[] log;
exit(-1);
}
// Get the kernel handle
cl_kernel kernel = clCreateKernel(program, "square", &err);
if(!CheckCLError(err)) exit(-1);
// Allocate and upload the input data
std::vector<float> hostBuffer;
for (size_t index = 0; index < dataSize; ++index)
{
hostBuffer.push_back(static_cast<float>(index));
}
cl_mem inputBuffer;
inputBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(float) * dataSize, NULL, &err);
if (!CheckCLError(err)) exit(-1);
clEnqueueWriteBuffer(queue, inputBuffer, CL_TRUE, 0, sizeof(float) * dataSize, hostBuffer.data(), 0, NULL, NULL);
// Alocate output data
cl_mem outputBuffer;
outputBuffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(float) * dataSize, NULL, &err);
if (!CheckCLError(err)) exit(-1);
// Set the kernel paramateres
clSetKernelArg(kernel, 0, sizeof(cl_mem), &inputBuffer);
clSetKernelArg(kernel, 1, sizeof(cl_mem), &outputBuffer);
// Enqueue the kernel
clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &dataSize, NULL, 0, NULL, NULL);
// Copy the result back to the host
clEnqueueReadBuffer(queue, outputBuffer, CL_TRUE, 0, sizeof(float) * dataSize, hostBuffer.data(), 0, NULL, NULL);
// Validate the output
for (size_t index = 0; index < dataSize; ++index)
{
if (hostBuffer[index] != index*index)
{
std::cout << "Wrong result at [" << index << "]: " << hostBuffer[index] << "!=" << index*index << std::endl;
break;
}
}
std::cout << "Finished" << std::endl;
}
void cppapi()
int main()
{
cl_int err = CL_SUCCESS;
// Get a platform ID
std::vector<cl::Platform> platforms;
cl::Platform::get(&platforms);
if (platforms.size() == 0)
{
std::cout << "Unable to find suitable platform." << std::endl;
exit(-1);
}
std::cout << platforms[0].getInfo<CL_PLATFORM_NAME>() << std::endl;
// Create a context
cl_context_properties properties[] =
{ CL_CONTEXT_PLATFORM, (cl_context_properties)(platforms[0])(), 0 };
cl::Context context(CL_DEVICE_TYPE_GPU, properties);
// Enumerate the devices
std::vector<cl::Device> devices = context.getInfo<CL_CONTEXT_DEVICES>();
std::cout << devices[0].getInfo<CL_DEVICE_GLOBAL_MEM_SIZE>() << std::endl;
// Create the command queue
cl::Event event;
cl::CommandQueue queue(context, devices[0], 0, &err);
// Create the OpenCL program
std::string programSource = FileToString("../kernels/programs.cl");
cl::Program program = cl::Program(context, programSource);
program.build(devices);
// Get the kernel handle
cl::Kernel kernel(program, "histogram_global", &err);
CheckCLError(err);
// Allocate and upload the input data
std::vector<float> hostBuffer;
for (size_t index = 0; index < dataSize; ++index)
{
hostBuffer.push_back(static_cast<float>(index % 32));
}
cl::Buffer clInputBuffer = cl::Buffer(context, CL_MEM_READ_ONLY, sizeof(float) * dataSize, NULL, &err);
queue.enqueueWriteBuffer(clInputBuffer,
true, // Blocking!
0, sizeof(float) * dataSize, hostBuffer.data());
// Allocate the output data
cl::Buffer clResultBuffer = cl::Buffer(context, CL_MEM_WRITE_ONLY, sizeof(float) * 32, NULL, &err);
// Set the kernel parameters
kernel.setArg(0, clInputBuffer); // kernel FV paraméterei sorrendben
kernel.setArg(1, clResultBuffer);
//capi();
//cppapi();
OpenCLHandler handler;
std::vector<TestCase*> tests;
// Enqueue the kernel
queue.enqueueNDRangeKernel(kernel,
cl::NullRange, // Indexek nem eloffszetelve
cl::NDRange(dataSize, 1), // Minden elemet egy szál
cl::NullRange, // Workgroup méret? - ez az auto, ha nem indul, 1024-re, onnan csökkent, amig elindul
NULL, //
&event); // Õ jlezi hogy vége, lsd lent
Square s;
TestCase* t;
t = &s;
// Create reference values
for (size_t index = 0; index < dataSize; ++index) {
}
event.wait();
tests.push_back(t);
// Copy result back to host
queue.enqueueReadBuffer(clResultBuffer, true, 0, sizeof(float) * 32, hostBuffer.data());
// Validate the result
for (size_t index = 0; index < 32; ++index)
{
if (hostBuffer[index] != index*index)
{
std::cout << "Wrong result at [" << index << "]: " << hostBuffer[index] << "!=" << index*index << std::endl;
break;
}
for (size_t i = 0; i < tests.size(); i++) {
handler.run_test(tests[i]);
}
for (size_t index = 0; index < 32; ++index) {
std::cout << index << ": " << hostBuffer[index] << std::endl;
}
std::cout << "Finished" << std::endl;
}
int main()
{
//capi();
cppapi();
return 0;
}

5
Primitives/Primitives.vcxproj

@ -72,10 +72,15 @@
</ItemDefinitionGroup>
<ItemGroup>
<ClInclude Include="cl.hpp" />
<ClCompile Include="Common.cpp" />
<ClInclude Include="Common.h" />
<ClInclude Include="OpenCLHandler.h" />
<ClInclude Include="Tests.h" />
</ItemGroup>
<ItemGroup>
<ClCompile Include="OpenCLHandler.cpp" />
<ClCompile Include="Primitives.cpp" />
<ClCompile Include="Square.cpp" />
</ItemGroup>
<ItemGroup>
<None Include="..\kernels\programs.cl" />

15
Primitives/Primitives.vcxproj.filters

@ -21,6 +21,12 @@
<ClInclude Include="cl.hpp">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="Tests.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="OpenCLHandler.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="Common.h">
<Filter>Header Files</Filter>
</ClInclude>
@ -29,6 +35,15 @@
<ClCompile Include="Primitives.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="Square.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="OpenCLHandler.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="Common.cpp">
<Filter>Source Files</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<None Include="..\kernels\programs.cl">

61
Primitives/Square.cpp

@ -0,0 +1,61 @@
#include <iostream>
#include "Common.h"
#include "Tests.h"
Square::Square() {
for (size_t index = 0; index < data_size; ++index)
{
gpuHostBuffer.push_back(static_cast<float>(index));
sourceData.push_back(static_cast<float>(index));
}
}
void Square::gpu_compute(cl::Context* context, cl::CommandQueue* queue, cl::Program* program, cl::Event* Event) {
cl_int err = CL_SUCCESS;
// Get the kernel handle
cl::Kernel kernel(*program, "square", &err);
CheckCLError(err);
clInputBuffer = cl::Buffer(*context, CL_MEM_READ_ONLY, sizeof(float) * data_size, NULL, &err);
queue->enqueueWriteBuffer(clInputBuffer,
true, // Blocking!
0, sizeof(float) * data_size, gpuHostBuffer.data());
// Allocate the output data
clResultBuffer = cl::Buffer(*context, CL_MEM_WRITE_ONLY, sizeof(float) * data_size, NULL, &err);
// Set the kernel parameters
kernel.setArg(0, clInputBuffer); // kernel FV paraméterei sorrendben
kernel.setArg(1, clResultBuffer);
// Enqueue the kernel
queue->enqueueNDRangeKernel(kernel,
cl::NullRange, // Indexek nem eloffszetelve
cl::NDRange(data_size, 1), // Minden elemet egy szál
cl::NullRange, // Workgroup méret? - ez az auto, ha nem indul, 1024-re, onnan csökkent, amig elindul
NULL, //
Event); // Õ jlezi hogy vége, lsd lent
}
void Square::cpu_compute()
{
for (size_t index = 0; index < data_size; index++) {
cpuResult.push_back(sourceData[index] * sourceData[index]);
}
}
void Square::collect_results(cl::CommandQueue* queue) {
queue->enqueueReadBuffer(clResultBuffer, true, 0, sizeof(float) * data_size, gpuHostBuffer.data());
}
bool Square::validate_results() {
for (size_t index = 0; index < data_size; index++) {
if (cpuResult[index] != gpuHostBuffer[index]) {
std::cout << "Wrong result at [" << index << "]: " << gpuHostBuffer[index] << "!=" << cpuResult[index] << std::endl;
return false;
}
}
std::cout << "Test \"Square\" completed." << std::endl;
return true;
}

36
Primitives/Tests.h

@ -0,0 +1,36 @@
#pragma once
#include <string>
#include "cl.hpp"
class TestCase {
private:
std::string kernelCode;
public:
virtual void collect_results(cl::CommandQueue* queue) = 0;
virtual void gpu_compute(
cl::Context* context,
cl::CommandQueue* queue,
cl::Program* program,
cl::Event* Event
) = 0;
virtual void cpu_compute() = 0;
virtual bool validate_results() = 0;
};
class Square : public TestCase {
private:
size_t data_size = 4096;
std::vector<float> gpuHostBuffer;
std::vector<float> sourceData;
std::vector<float> cpuResult;
cl::Buffer clInputBuffer;
cl::Buffer clResultBuffer;
public:
Square();
void collect_results(cl::CommandQueue* queue);
void gpu_compute(cl::Context* context, cl::CommandQueue* queue, cl::Program* program, cl::Event* Event);
void cpu_compute();
bool validate_results();
};
Loading…
Cancel
Save