Browse Source

Bugfixes, output formatting

master
trinitas 3 years ago
parent
commit
41f98861d6
  1. 26
      Primitives/Common.cpp
  2. 11
      Primitives/Common.h
  3. 39
      Primitives/OpenCLHandler.cpp
  4. 1
      Primitives/OpenCLHandler.h
  5. 10
      Primitives/Primitives.cpp
  6. 3
      Primitives/primitives/ExclusiveScan.cpp
  7. 4
      Primitives/primitives/Histogram.cpp
  8. 2
      Primitives/primitives/ReduceAdd.cpp
  9. 1
      kernels/programs.cl

26
Primitives/Common.cpp

@ -202,3 +202,29 @@ bool CheckCLError(cl_int err)
return true;
}
void Timer::start()
{
t_start = std::chrono::high_resolution_clock::now();
}
void Timer::end(unsigned int nRuns)
{
auto t_end = std::chrono::high_resolution_clock::now();
std::cout << "CPU [time] " <<
std::chrono::duration_cast<std::chrono::nanoseconds>(t_end - t_start).count() / 1e+06
/ nRuns << " ms" << std::endl;
}
void Timer::measure(const std::function<void(void)>& program, unsigned int nRuns)
{
start();
for (unsigned int i = 0; i < nRuns; ++i)
{
program();
}
end(nRuns);
}
std::chrono::time_point<std::chrono::high_resolution_clock> Timer::t_start;

11
Primitives/Common.h

@ -1,4 +1,6 @@
#pragma once
#include <chrono>
#include <functional>
#include "cl.hpp"
void printTimeStats(cl_event event);
@ -6,3 +8,12 @@ void WriteTGA_RGB(const char* filename, unsigned char* data, unsigned int width,
std::string FileToString(const std::string& path);
const char* getErrorString(cl_int error);
bool CheckCLError(cl_int err);
class Timer {
private:
static std::chrono::time_point<std::chrono::high_resolution_clock> t_start;
public:
static void start();
static void end(unsigned int nRuns = 1);
static void measure(const std::function<void(void)>& program, unsigned int nRuns = 10000);
};

39
Primitives/OpenCLHandler.cpp

@ -2,8 +2,32 @@
#include "OpenCLHandler.h"
#include <iostream>
void OpenCLHandler::printTimeStats(cl::Event& event)
{
cl_int err = CL_SUCCESS;
event.wait();
cl_ulong execStart, execEnd;
execStart = event.getProfilingInfo<CL_PROFILING_COMMAND_START>(&err);
if (err != CL_SUCCESS)
{
std::cerr << "Error during profile query: CL_PROFILING_COMMAND_START ["
<< err << "]." << std::endl;
}
execEnd = event.getProfilingInfo<CL_PROFILING_COMMAND_END>(&err);
if (err != CL_SUCCESS)
{
std::cerr << "Error during profile query: CL_PROFILING_COMMAND_END ["
<< err << "]." << std::endl;
}
//std::cout << "[start] " << execStart << " [end] " << execEnd
// << " [time] " << (execEnd - execStart) / 1e+06 << "ms." << std::endl;
std::cout << "GPU [time] " << (execEnd - execStart) / 1e+06 << " ms" <<
std::endl;
}
OpenCLHandler::OpenCLHandler()
{
cl_int err = CL_SUCCESS;
// Get a platform ID
@ -15,7 +39,7 @@ OpenCLHandler::OpenCLHandler()
exit(-1);
}
std::cout << platforms[0].getInfo<CL_PLATFORM_NAME>() << std::endl;
std::cout << "Running on: " << platforms[0].getInfo<CL_PLATFORM_NAME>() << std::endl;
// Create a context
cl_context_properties properties[] =
@ -24,13 +48,14 @@ OpenCLHandler::OpenCLHandler()
// Enumerate the devices
std::vector<cl::Device> devices = context.getInfo<CL_CONTEXT_DEVICES>();
std::cout << devices[0].getInfo<CL_DEVICE_GLOBAL_MEM_SIZE>() << std::endl;
std::cout << "Global memory: " << devices[0].getInfo<CL_DEVICE_GLOBAL_MEM_SIZE>() << std::endl;
max_workgroup_size = devices[0].getInfo<CL_DEVICE_MAX_WORK_GROUP_SIZE>();
std::cout << "Max workgroup: " << max_workgroup_size << std::endl << std::endl;
// Create the command queue
cl::Event event;
queue = cl::CommandQueue(context, devices[0], 0, &err);
queue = cl::CommandQueue(context, devices[0], CL_QUEUE_PROFILING_ENABLE, &err);
// Create the OpenCL program
std::string programSource = FileToString("../kernels/programs.cl");
program = cl::Program(context, programSource);
@ -39,13 +64,15 @@ OpenCLHandler::OpenCLHandler()
bool OpenCLHandler::run_test(TestCase* test)
{
cl::Event event;
cl::Event gpuEvent;
test->gpu_compute(&context, &queue, &program, &event);
test->gpu_compute(&context, &queue, &program, &gpuEvent);
Timer::measure([&]() {
test->cpu_compute();
}, 5);
event.wait();
printTimeStats(gpuEvent);
test->collect_results(&queue);

1
Primitives/OpenCLHandler.h

@ -8,6 +8,7 @@ private:
cl::Context context;
cl::CommandQueue queue;
size_t max_workgroup_size;
void printTimeStats(cl::Event& event);
public:
OpenCLHandler();
bool run_test(TestCase* test);

10
Primitives/Primitives.cpp

@ -60,6 +60,7 @@ void add_tests(std::vector<TestCase*>* tests) {
int main()
{
capi();
OpenCLHandler handler;
std::vector<TestCase*> tests;
@ -67,23 +68,24 @@ int main()
Histogram h1(false, 32, 4096);
Histogram h2(true, 32, 4096);
ReduceAdd r1(handler.get_max_size());
ExclusiveScan e(512);
Compact c(1024);
ExclusiveScan e(handler.get_max_size());
tests.push_back(&s);
tests.push_back(&h1);
tests.push_back(&h2);
tests.push_back(&r1);
tests.push_back(&e);
tests.push_back(&c);
tests.push_back(&e);
for (size_t i = 0; i < tests.size(); i++) {
std::cout << tests[i]->description() << std::endl;
if (handler.run_test(tests[i])) {
std::cout << tests[i]->description() << " - Success" << std::endl;
std::cout << " Success" << std::endl << std::endl;
}
else {
std::cout << tests[i]->description() << " - Failure" << std::endl;
std::cout << " Failure" << std::endl << std::endl;
}
}
return 0;

3
Primitives/primitives/ExclusiveScan.cpp

@ -59,7 +59,8 @@ bool ExclusiveScan::validate_results()
bool success = true;
for (size_t index = 0; index < data_size; index++) {
if (cpuResult[index] != gpuResult[index]) {
std::cout << "Wrong result at [" << index << "]: " << gpuResult[index] << "!=" << cpuResult[index] << std::endl;
int diff = abs(cpuResult[index] - gpuResult[index]);
std::cout << "Wrong result at [" << index << "]: " << gpuResult[index] << "!=" << cpuResult[index] << ", diff: " << diff << std::endl;
success = false;
}
}

4
Primitives/primitives/Histogram.cpp

@ -16,7 +16,6 @@ Histogram::Histogram(bool _global, int _valueSet, int _data_size)
for (size_t index = 0; index < data_size; ++index) {
sourceData.push_back(distr(gen));
}
cpuResult.resize(valueSet, 0);
gpuResult.resize(valueSet, 0);
}
@ -66,7 +65,8 @@ void Histogram::gpu_compute(cl::Context* context, cl::CommandQueue* queue, cl::P
void Histogram::cpu_compute()
{
cpuResult.resize(0, 0);
cpuResult.resize(valueSet, 0);
for (size_t index = 0; index < data_size; ++index) {
cpuResult[sourceData[index]] = cpuResult[sourceData[index]] + 1;
}

2
Primitives/primitives/ReduceAdd.cpp

@ -44,6 +44,7 @@ void ReduceAdd::gpu_compute(cl::Context* context, cl::CommandQueue* queue, cl::P
void ReduceAdd::cpu_compute()
{
cpuResult = 0;
for (size_t index = 0; index < data_size; ++index) {
cpuResult += sourceData[index];
}
@ -63,6 +64,7 @@ bool ReduceAdd::validate_results()
else
{
std::cout << "Wrong result: " << cpuResult << "!=" << gpuResult[0] << ", diff is " <<diff << std::endl;
return false;
}
}

1
kernels/programs.cl

@ -111,6 +111,7 @@ void exscan_global(__global int* data)
for (int s = 1; s < size; s *= 2) {
int tmp = data[id];
barrier(CLK_LOCAL_MEM_FENCE);
if (id + s < size) {
data[id + s] += data[id];
}

Loading…
Cancel
Save