BMEVIIIMB01
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

187 lines
5.5 KiB

// Primitives.cpp : Defines the entry point for the console application.
#include <string>
#include <vector>
#include "Common.h"
// OpenCL C API
#include <CL/opencl.h>
// OpenCL C++ API
#include "cl.hpp"
const size_t dataSize = 4096;
void capi()
{
// Get a platform ID
cl_platform_id platformID;
clGetPlatformIDs(1, &platformID, NULL);
// Get a device ID
cl_device_id deviceID;
clGetDeviceIDs(platformID, CL_DEVICE_TYPE_GPU, 1, &deviceID, NULL);
// Create a context
cl_context context;
cl_context_properties contextProperties[] =
{ CL_CONTEXT_PLATFORM, (cl_context_properties)platformID, 0 };
context = clCreateContext(contextProperties, 1, &deviceID, NULL, NULL, NULL);
// Create a command queue
cl_command_queue queue;
queue = clCreateCommandQueue(context, deviceID, CL_QUEUE_PROFILING_ENABLE, NULL);
// Create an OpenCL program
std::string source = FileToString("../kernels/programs.cl");
const char* csource = source.c_str();
cl_program program = clCreateProgramWithSource(context, 1, &csource, NULL, NULL);
cl_int err = clBuildProgram(program, 1, &deviceID, NULL, NULL, NULL);
if (err != CL_SUCCESS)
{
cl_uint logLength;
clGetProgramBuildInfo(program, deviceID, CL_PROGRAM_BUILD_LOG, 0, NULL, &logLength);
char* log = new char[logLength];
clGetProgramBuildInfo(program, deviceID, CL_PROGRAM_BUILD_LOG, logLength, log, 0);
std::cout << log << std::endl;
delete[] log;
exit(-1);
}
// Get the kernel handle
cl_kernel kernel = clCreateKernel(program, "square", &err);
if(!CheckCLError(err)) exit(-1);
// Allocate and upload the input data
std::vector<float> hostBuffer;
for (size_t index = 0; index < dataSize; ++index)
{
hostBuffer.push_back(static_cast<float>(index));
}
cl_mem inputBuffer;
inputBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(float) * dataSize, NULL, &err);
if (!CheckCLError(err)) exit(-1);
clEnqueueWriteBuffer(queue, inputBuffer, CL_TRUE, 0, sizeof(float) * dataSize, hostBuffer.data(), 0, NULL, NULL);
// Alocate output data
cl_mem outputBuffer;
outputBuffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(float) * dataSize, NULL, &err);
if (!CheckCLError(err)) exit(-1);
// Set the kernel paramateres
clSetKernelArg(kernel, 0, sizeof(cl_mem), &inputBuffer);
clSetKernelArg(kernel, 1, sizeof(cl_mem), &outputBuffer);
// Enqueue the kernel
clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &dataSize, NULL, 0, NULL, NULL);
// Copy the result back to the host
clEnqueueReadBuffer(queue, outputBuffer, CL_TRUE, 0, sizeof(float) * dataSize, hostBuffer.data(), 0, NULL, NULL);
// Validate the output
for (size_t index = 0; index < dataSize; ++index)
{
if (hostBuffer[index] != index*index)
{
std::cout << "Wrong result at [" << index << "]: " << hostBuffer[index] << "!=" << index*index << std::endl;
break;
}
}
std::cout << "Finished" << std::endl;
}
void cppapi()
{
cl_int err = CL_SUCCESS;
// Get a platform ID
std::vector<cl::Platform> platforms;
cl::Platform::get(&platforms);
if (platforms.size() == 0)
{
std::cout << "Unable to find suitable platform." << std::endl;
exit(-1);
}
std::cout << platforms[0].getInfo<CL_PLATFORM_NAME>() << std::endl;
// Create a context
cl_context_properties properties[] =
{ CL_CONTEXT_PLATFORM, (cl_context_properties)(platforms[0])(), 0 };
cl::Context context(CL_DEVICE_TYPE_GPU, properties);
// Enumerate the devices
std::vector<cl::Device> devices = context.getInfo<CL_CONTEXT_DEVICES>();
std::cout << devices[0].getInfo<CL_DEVICE_GLOBAL_MEM_SIZE>() << std::endl;
// Create the command queue
cl::Event event;
cl::CommandQueue queue(context, devices[0], 0, &err);
// Create the OpenCL program
std::string programSource = FileToString("../kernels/programs.cl");
cl::Program program = cl::Program(context, programSource);
program.build(devices);
// Get the kernel handle
cl::Kernel kernel(program, "histogram_global", &err);
CheckCLError(err);
// Allocate and upload the input data
std::vector<float> hostBuffer;
for (size_t index = 0; index < dataSize; ++index)
{
hostBuffer.push_back(static_cast<float>(index % 32));
}
cl::Buffer clInputBuffer = cl::Buffer(context, CL_MEM_READ_ONLY, sizeof(float) * dataSize, NULL, &err);
queue.enqueueWriteBuffer(clInputBuffer,
true, // Blocking!
0, sizeof(float) * dataSize, hostBuffer.data());
// Allocate the output data
cl::Buffer clResultBuffer = cl::Buffer(context, CL_MEM_WRITE_ONLY, sizeof(float) * 32, NULL, &err);
// Set the kernel parameters
kernel.setArg(0, clInputBuffer); // kernel FV param�terei sorrendben
kernel.setArg(1, clResultBuffer);
// Enqueue the kernel
queue.enqueueNDRangeKernel(kernel,
cl::NullRange, // Indexek nem eloffszetelve
cl::NDRange(dataSize, 1), // Minden elemet egy sz�l
cl::NullRange, // Workgroup m�ret? - ez az auto, ha nem indul, 1024-re, onnan cs�kkent, amig elindul
NULL, //
&event); // � jlezi hogy v�ge, lsd lent
// Create reference values
for (size_t index = 0; index < dataSize; ++index) {
}
event.wait();
// Copy result back to host
queue.enqueueReadBuffer(clResultBuffer, true, 0, sizeof(float) * 32, hostBuffer.data());
// Validate the result
for (size_t index = 0; index < 32; ++index)
{
if (hostBuffer[index] != index*index)
{
std::cout << "Wrong result at [" << index << "]: " << hostBuffer[index] << "!=" << index*index << std::endl;
break;
}
}
for (size_t index = 0; index < 32; ++index) {
std::cout << index << ": " << hostBuffer[index] << std::endl;
}
std::cout << "Finished" << std::endl;
}
int main()
{
//capi();
cppapi();
return 0;
}