#include #include "Common.h" #include "PrimitiveTests.h" #include Compact::Compact(size_t _data_size) { data_size = _data_size; limit = 50; result_size = 0; std::random_device rd; std::mt19937 gen(rd()); std::uniform_int_distribution distr(0, 100); for (size_t index = 0; index < data_size; ++index) { int val = distr(gen); sourceData.push_back(val); //gpuResult.push_back(val); } gpuResult.resize(data_size, 0); } void Compact::gpu_compute(cl::Context* context, cl::CommandQueue* queue, cl::Program* program, cl::Event* Event) { cl_int err = CL_SUCCESS; cl::Event inner_event; cl::Kernel kernel_predicate = cl::Kernel(*program, "compact_predicate", &err); CheckCLError(err); cl::Kernel kernel_exscan = cl::Kernel(*program, "compact_exscan", &err); CheckCLError(err); cl::Kernel kernel_compact = cl::Kernel(*program, "compact_compact", &err); CheckCLError(err); clInputBuffer = cl::Buffer(*context, CL_MEM_READ_ONLY, sizeof(int) * data_size, NULL, &err); clResultBuffer = cl::Buffer(*context, CL_MEM_READ_ONLY, sizeof(int) * data_size, NULL, &err); cl::Buffer pred(*context, CL_MEM_READ_ONLY, sizeof(int) * data_size, NULL, &err); cl::Buffer prefSum(*context, CL_MEM_READ_ONLY, sizeof(int) * data_size, NULL, &err); queue->enqueueWriteBuffer(clInputBuffer, true, 0, sizeof(int) * data_size, sourceData.data()); kernel_predicate.setArg(0, clInputBuffer); kernel_predicate.setArg(1, pred); queue->enqueueNDRangeKernel(kernel_predicate, cl::NullRange, // Indexek nem eloffszetelve cl::NDRange(data_size, 1), // Minden elemet egy szál cl::NullRange, // Workgroup méret? - ez az auto, ha nem indul, 1024-re, onnan csökkent, amig elindul NULL, // &inner_event); inner_event.wait(); kernel_exscan.setArg(0, pred); kernel_exscan.setArg(1, prefSum); queue->enqueueNDRangeKernel(kernel_exscan, cl::NullRange, // Indexek nem eloffszetelve cl::NDRange(data_size, 1), // Minden elemet egy szál cl::NullRange, // Workgroup méret? - ez az auto, ha nem indul, 1024-re, onnan csökkent, amig elindul NULL, // &inner_event); inner_event.wait(); queue->enqueueReadBuffer(prefSum, true, 0, sizeof(int), &result_size); kernel_compact.setArg(0, clInputBuffer); kernel_compact.setArg(1, pred); kernel_compact.setArg(2, prefSum); queue->enqueueNDRangeKernel(kernel_compact, cl::NullRange, // Indexek nem eloffszetelve cl::NDRange(data_size, 1), // Minden elemet egy szál cl::NullRange, // Workgroup méret? - ez az auto, ha nem indul, 1024-re, onnan csökkent, amig elindul NULL, // Event); gpuResult.resize(result_size, 0); } void Compact::cpu_compute() { for (size_t i = 0; i < data_size; i++) { if (sourceData[i] < limit) { cpuResult.push_back(sourceData[i]); } } } void Compact::collect_results(cl::CommandQueue* queue) { queue->enqueueReadBuffer(clInputBuffer, true, 0, sizeof(int) * result_size, gpuResult.data()); } bool Compact::validate_results() { bool success = true; for (size_t index = 0; index < result_size; index++) { if (cpuResult[index] != gpuResult[index]) { std::cout << "Wrong result at [" << index << "]: " << gpuResult[index] << "!=" << cpuResult[index] << std::endl; success = false; } } return success; } std::string Compact::description() { return std::string("Compact (data_size=" + std::to_string(data_size) + ")"); }