// Primitives.cpp : Defines the entry point for the console application. #include #include #include "Common.h" // OpenCL C API #include // OpenCL C++ API #include "cl.hpp" const size_t dataSize = 4096; void capi() { // Get a platform ID cl_platform_id platformID; clGetPlatformIDs(1, &platformID, NULL); // Get a device ID cl_device_id deviceID; clGetDeviceIDs(platformID, CL_DEVICE_TYPE_GPU, 1, &deviceID, NULL); // Create a context cl_context context; cl_context_properties contextProperties[] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platformID, 0 }; context = clCreateContext(contextProperties, 1, &deviceID, NULL, NULL, NULL); // Create a command queue cl_command_queue queue; queue = clCreateCommandQueue(context, deviceID, CL_QUEUE_PROFILING_ENABLE, NULL); // Create an OpenCL program std::string source = FileToString("../kernels/programs.cl"); const char* csource = source.c_str(); cl_program program = clCreateProgramWithSource(context, 1, &csource, NULL, NULL); cl_int err = clBuildProgram(program, 1, &deviceID, NULL, NULL, NULL); if (err != CL_SUCCESS) { cl_uint logLength; clGetProgramBuildInfo(program, deviceID, CL_PROGRAM_BUILD_LOG, 0, NULL, &logLength); char* log = new char[logLength]; clGetProgramBuildInfo(program, deviceID, CL_PROGRAM_BUILD_LOG, logLength, log, 0); std::cout << log << std::endl; delete[] log; exit(-1); } // Get the kernel handle cl_kernel kernel = clCreateKernel(program, "square", &err); if(!CheckCLError(err)) exit(-1); // Allocate and upload the input data std::vector hostBuffer; for (size_t index = 0; index < dataSize; ++index) { hostBuffer.push_back(static_cast(index)); } cl_mem inputBuffer; inputBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(float) * dataSize, NULL, &err); if (!CheckCLError(err)) exit(-1); clEnqueueWriteBuffer(queue, inputBuffer, CL_TRUE, 0, sizeof(float) * dataSize, hostBuffer.data(), 0, NULL, NULL); // Alocate output data cl_mem outputBuffer; outputBuffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(float) * dataSize, NULL, &err); if (!CheckCLError(err)) exit(-1); // Set the kernel paramateres clSetKernelArg(kernel, 0, sizeof(cl_mem), &inputBuffer); clSetKernelArg(kernel, 1, sizeof(cl_mem), &outputBuffer); // Enqueue the kernel clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &dataSize, NULL, 0, NULL, NULL); // Copy the result back to the host clEnqueueReadBuffer(queue, outputBuffer, CL_TRUE, 0, sizeof(float) * dataSize, hostBuffer.data(), 0, NULL, NULL); // Validate the output for (size_t index = 0; index < dataSize; ++index) { if (hostBuffer[index] != index*index) { std::cout << "Wrong result at [" << index << "]: " << hostBuffer[index] << "!=" << index*index << std::endl; break; } } std::cout << "Finished" << std::endl; } void cppapi() { cl_int err = CL_SUCCESS; // Get a platform ID std::vector platforms; cl::Platform::get(&platforms); if (platforms.size() == 0) { std::cout << "Unable to find suitable platform." << std::endl; exit(-1); } std::cout << platforms[0].getInfo() << std::endl; // Create a context cl_context_properties properties[] = { CL_CONTEXT_PLATFORM, (cl_context_properties)(platforms[0])(), 0 }; cl::Context context(CL_DEVICE_TYPE_GPU, properties); // Enumerate the devices std::vector devices = context.getInfo(); std::cout << devices[0].getInfo() << std::endl; // Create the command queue cl::Event event; cl::CommandQueue queue(context, devices[0], 0, &err); // Create the OpenCL program std::string programSource = FileToString("../kernels/programs.cl"); cl::Program program = cl::Program(context, programSource); program.build(devices); // Get the kernel handle cl::Kernel kernel(program, "histogram_global", &err); CheckCLError(err); // Allocate and upload the input data std::vector hostBuffer; for (size_t index = 0; index < dataSize; ++index) { hostBuffer.push_back(static_cast(index % 32)); } cl::Buffer clInputBuffer = cl::Buffer(context, CL_MEM_READ_ONLY, sizeof(float) * dataSize, NULL, &err); queue.enqueueWriteBuffer(clInputBuffer, true, // Blocking! 0, sizeof(float) * dataSize, hostBuffer.data()); // Allocate the output data cl::Buffer clResultBuffer = cl::Buffer(context, CL_MEM_WRITE_ONLY, sizeof(float) * 32, NULL, &err); // Set the kernel parameters kernel.setArg(0, clInputBuffer); // kernel FV paraméterei sorrendben kernel.setArg(1, clResultBuffer); // Enqueue the kernel queue.enqueueNDRangeKernel(kernel, cl::NullRange, // Indexek nem eloffszetelve cl::NDRange(dataSize, 1), // Minden elemet egy szál cl::NullRange, // Workgroup méret? - ez az auto, ha nem indul, 1024-re, onnan csökkent, amig elindul NULL, // &event); // Ő jlezi hogy vége, lsd lent // Create reference values for (size_t index = 0; index < dataSize; ++index) { } event.wait(); // Copy result back to host queue.enqueueReadBuffer(clResultBuffer, true, 0, sizeof(float) * 32, hostBuffer.data()); // Validate the result for (size_t index = 0; index < 32; ++index) { if (hostBuffer[index] != index*index) { std::cout << "Wrong result at [" << index << "]: " << hostBuffer[index] << "!=" << index*index << std::endl; break; } } for (size_t index = 0; index < 32; ++index) { std::cout << index << ": " << hostBuffer[index] << std::endl; } std::cout << "Finished" << std::endl; } int main() { //capi(); cppapi(); return 0; }