// MonteCarlo.cpp : Defines the entry point for the console application. #include #include #include "Common.h" // OpenCL C API #include // OpenCL C++ API #include "cl.hpp" #include #include const bool writeOutRandoms = true; const size_t randomNumbers = 1024; const size_t threadCount = 512; void capi() { // Get a platform ID cl_platform_id platformID; clGetPlatformIDs(1, &platformID, NULL); // Get a device ID cl_device_id deviceID; clGetDeviceIDs(platformID, CL_DEVICE_TYPE_GPU, 1, &deviceID, NULL); // Create a context cl_context context; cl_context_properties contextProperties[] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platformID, 0 }; context = clCreateContext(contextProperties, 1, &deviceID, NULL, NULL, NULL); // Create a command queue cl_command_queue queue; queue = clCreateCommandQueue(context, deviceID, CL_QUEUE_PROFILING_ENABLE, NULL); // Create an OpenCL program std::string source = FileToString("../kernels/montecarlo.cl"); const char* csource = source.c_str(); cl_program program = clCreateProgramWithSource(context, 1, &csource, NULL, NULL); cl_int err = clBuildProgram(program, 1, &deviceID, NULL, NULL, NULL); if (err != CL_SUCCESS) { cl_uint logLength; clGetProgramBuildInfo(program, deviceID, CL_PROGRAM_BUILD_LOG, 0, NULL, &logLength); char* log = new char[logLength]; clGetProgramBuildInfo(program, deviceID, CL_PROGRAM_BUILD_LOG, logLength, log, 0); std::cout << log << std::endl; delete[] log; exit(-1); } // Get the kernel handle cl_kernel kernel = clCreateKernel(program, "randomLCG", &err); if(!CheckCLError(err)) exit(-1); // Allocate memory for random numbers and random seeds // Every thread receives a private seed, stored in seedBuffer/clSeedBuffer // Every thread is supposed to generate randomNumbers random numbers std::vector randomBuffer(threadCount * randomNumbers); std::vector seedBuffer(threadCount); for (int i = 0; i < threadCount; ++i) { seedBuffer[i] = rand(); } cl_mem randomBufferDev; randomBufferDev = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(float)* threadCount * randomNumbers, NULL, &err); if (!CheckCLError(err)) exit(-1); cl_mem seedBufferDev; seedBufferDev = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(float) * threadCount, NULL, &err); if (!CheckCLError(err)) exit(-1); clEnqueueWriteBuffer(queue, seedBufferDev, CL_TRUE, 0, sizeof(float) * threadCount, seedBuffer.data(), 0, NULL, NULL); // Set the kernel paramateres clSetKernelArg(kernel, 0, sizeof(int), &randomNumbers); clSetKernelArg(kernel, 1, sizeof(cl_mem), &seedBufferDev); clSetKernelArg(kernel, 2, sizeof(cl_mem), &randomBufferDev); // Enqueue the kernel: threadCount threads in total, each generating randomNumbers random numbers in [0,1] clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &threadCount, NULL, 0, NULL, NULL); // Copy the result back to the host clEnqueueReadBuffer(queue, randomBufferDev, CL_TRUE, 0, sizeof(float) * threadCount * randomNumbers, randomBuffer.data(), 0, NULL, NULL); // Write out the output if(writeOutRandoms == true) { std::ofstream ofs("randoms.txt", std::ofstream::out); for (int i = 0; i < threadCount; ++i) { for (int j = 0; j < randomNumbers; ++j) { ofs << j << " " << randomBuffer[i + j * threadCount] << std::endl; } ofs << std::endl; } ofs.close(); } std::cout << "Finished" << std::endl; } void cppapi() { cl_int err = CL_SUCCESS; // Get a platform ID std::vector platforms; cl::Platform::get(&platforms); if (platforms.size() == 0) { std::cout << "Unable to find suitable platform." << std::endl; exit(-1); } // Create a context cl_context_properties properties[] = { CL_CONTEXT_PLATFORM, (cl_context_properties)(platforms[0])(), 0 }; cl::Context context(CL_DEVICE_TYPE_GPU, properties); // Enumerate the devices std::vector devices = context.getInfo(); // Create the command queue cl::Event event; cl::CommandQueue queue(context, devices[0], 0, &err); // Create the OpenCL program std::string programSource = FileToString("../kernels/programs.cl"); cl::Program program = cl::Program(context, programSource); err = program.build(devices); if (!CheckCLError(err)) { for (size_t devID = 0; devID < devices.size(); ++devID) { std::cout << "Device: " << devID << std::endl; std::cout << "Build Status: " << program.getBuildInfo(devices[devID]) << std::endl; std::cout << "Build Options:\t" << program.getBuildInfo(devices[devID]) << std::endl; std::cout << "Build Log:\t " << program.getBuildInfo(devices[devID]) << std::endl; std::cout << "--------------------------------------------------" << std::endl; } } // Get the kernel handle cl::Kernel kernel(program, "randomLCG", &err); CheckCLError(err); // Allocate memory for random numbers and random seeds // Every thread receives a private seed, stored in seedBuffer/clSeedBuffer // Every thread is supposed to generate randomNumbers random numbers std::vector randomBuffer(threadCount * randomNumbers); std::vector seedBuffer(threadCount); for (int i = 0; i < threadCount; ++i) { seedBuffer[i] = rand(); } cl::Buffer clSeedBuffer = cl::Buffer(context, CL_MEM_READ_ONLY, sizeof(float) * threadCount, NULL, &err); queue.enqueueWriteBuffer(clSeedBuffer, true, 0, sizeof(float) * threadCount, seedBuffer.data()); cl::Buffer clRandomBuffer = cl::Buffer(context, CL_MEM_WRITE_ONLY, sizeof(float) * threadCount * randomNumbers, NULL, &err); // Set the kernel parameters kernel.setArg(0, randomNumbers); kernel.setArg(1, clSeedBuffer); kernel.setArg(2, clRandomBuffer); // Enqueue the kernel: threadCount threads in total, each generating random numbers in [0,1] randomNumbers times queue.enqueueNDRangeKernel(kernel, cl::NullRange, cl::NDRange(threadCount, 1), cl::NullRange, NULL, &event); event.wait(); // Copy result back to host queue.enqueueReadBuffer(clRandomBuffer, true, 0, sizeof(float) * threadCount * randomNumbers, randomBuffer.data()); // Write out the output to file as "index value" rows if(writeOutRandoms == true) { std::ofstream ofs("randoms2.txt", std::ofstream::out); for (int i = 0; i < threadCount; ++i) { for (int j = 0; j < randomNumbers; ++j) { ofs << j << " " << randomBuffer[i + j * threadCount] << std::endl; } } ofs.close(); } std::cout << "Finished" << std::endl; } int main() { capi(); cppapi(); return 0; }