#include #include "Common.h" #include "PrimitiveTests.h" #include ExclusiveScan::ExclusiveScan(size_t max_size) { // Must be power of 2, breaks above 512 data_size = max_size; std::random_device rd; std::mt19937 gen(rd()); std::uniform_int_distribution distr(0, 100); for (size_t index = 0; index < data_size; ++index) { int val = distr(gen); sourceData.push_back(val); gpuResult.push_back(val); } } void ExclusiveScan::gpu_compute(cl::Context* context, cl::CommandQueue* queue, cl::Program* program, cl::Event* Event) { cl_int err = CL_SUCCESS; cl::Kernel kernel = cl::Kernel(*program, "exscan_global", &err); CheckCLError(err); clInputBuffer = cl::Buffer(*context, CL_MEM_READ_ONLY, sizeof(int) * data_size, NULL, &err); CheckCLError(err); queue->enqueueWriteBuffer(clInputBuffer, true, 0, sizeof(int) * data_size, gpuResult.data()); kernel.setArg(0, clInputBuffer); queue->enqueueNDRangeKernel(kernel, cl::NullRange, // Indexek nem eloffszetelve cl::NDRange(data_size, 1), // Minden elemet egy szál cl::NDRange(data_size, 1), // Workgroup méret? - ez az auto, ha nem indul, 1024-re, onnan csökkent, amig elindul NULL, // Event); } void ExclusiveScan::cpu_compute() { for (size_t i = 0; i < data_size; i++) { int tmp = 0; for (size_t j = 0; j < i; j++) { tmp += sourceData[j]; } cpuResult.push_back(tmp); } } void ExclusiveScan::collect_results(cl::CommandQueue* queue) { queue->enqueueReadBuffer(clInputBuffer, true, 0, sizeof(int) * data_size, gpuResult.data()); } bool ExclusiveScan::validate_results() { bool success = true; for (size_t index = 0; index < data_size; index++) { if (cpuResult[index] != gpuResult[index]) { int diff = abs(cpuResult[index] - gpuResult[index]); std::cout << "Wrong result at [" << index << "]: " << gpuResult[index] << "!=" << cpuResult[index] << ", diff: " << diff << std::endl; success = false; } } return success; } std::string ExclusiveScan::description() { return std::string("ExclusiveScan (data_size=" + std::to_string(data_size) + ")"); }