You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
208 lines
6.3 KiB
208 lines
6.3 KiB
#include "Common.h"
|
|
|
|
#include <cstdio>
|
|
#include <cstdlib>
|
|
#include <iostream>
|
|
#include <fstream>
|
|
#include <sstream>
|
|
|
|
#include "cl.hpp"
|
|
|
|
|
|
#pragma warning( disable : 4996 )
|
|
|
|
void printTimeStats(cl_event event)
|
|
{
|
|
cl_int err = CL_SUCCESS;
|
|
if (event == NULL)
|
|
{
|
|
std::cerr << "No event object returned!" << std::endl;
|
|
}
|
|
else
|
|
{
|
|
clWaitForEvents(1, &event);
|
|
}
|
|
|
|
cl_ulong execStart, execEnd;
|
|
err = clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &execStart, NULL);
|
|
|
|
if (err != CL_SUCCESS)
|
|
{
|
|
std::cerr << "Error during profile query: CL_PROFILING_COMMAND_START [" << err << "]." << std::endl;
|
|
}
|
|
|
|
|
|
err = clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &execEnd, NULL);
|
|
|
|
if (err != CL_SUCCESS)
|
|
{
|
|
std::cerr << "Error during profile query: CL_PROFILING_COMMAND_END [" << err << "]." << std::endl;
|
|
}
|
|
|
|
std::cout << "[start] " << execStart << " [end] " << execEnd << " [time] " << (execEnd - execStart) / 1e+06 << "ms." << std::endl;
|
|
}
|
|
|
|
|
|
void WriteTGA_RGB(const char* filename, unsigned char* data, unsigned int width, unsigned int height)
|
|
{
|
|
FILE* f = fopen(filename, "wb");
|
|
if (!f) {
|
|
fprintf(stderr, "Unable to create output TGA image `%s'\n", filename);
|
|
exit(EXIT_FAILURE);
|
|
}
|
|
|
|
fputc(0x00, f); /* ID Length, 0 => No ID */
|
|
fputc(0x00, f); /* Color Map Type, 0 => No color map included */
|
|
fputc(0x02, f); /* Image Type, 2 => Uncompressed, True-color Image */
|
|
fputc(0x00, f); /* Next five bytes are about the color map entries */
|
|
fputc(0x00, f); /* 2 bytes Index, 2 bytes length, 1 byte size */
|
|
fputc(0x00, f);
|
|
fputc(0x00, f);
|
|
fputc(0x00, f);
|
|
fputc(0x00, f); /* X-origin of Image */
|
|
fputc(0x00, f);
|
|
fputc(0x00, f); /* Y-origin of Image */
|
|
fputc(0x00, f);
|
|
fputc(width & 0xff, f); /* Image Width */
|
|
fputc((width >> 8) & 0xff, f);
|
|
fputc(height & 0xff, f); /* Image Height */
|
|
fputc((height >> 8) & 0xff, f);
|
|
fputc(0x18, f); /* Pixel Depth, 0x18 => 24 Bits */
|
|
fputc(0x20, f); /* Image Descriptor */
|
|
|
|
for (int y = height - 1; y >= 0; y--) {
|
|
for (size_t x = 0; x < width; x++) {
|
|
const size_t i = (y * width + x) * 3;
|
|
fputc(data[i + 2], f); /* write blue */
|
|
fputc(data[i + 1], f); /* write green */
|
|
fputc(data[i], f); /* write red */
|
|
}
|
|
}
|
|
}
|
|
|
|
std::string FileToString(const std::string& path) {
|
|
std::ifstream file(path, std::ios::in | std::ios::binary);
|
|
if (file)
|
|
{
|
|
std::ostringstream contents;
|
|
contents << file.rdbuf();
|
|
file.close();
|
|
return(contents.str());
|
|
}
|
|
return std::string();
|
|
|
|
}
|
|
|
|
const char* getErrorString(cl_int error)
|
|
{
|
|
switch (error) {
|
|
// run-time and JIT compiler errors
|
|
case 0: return "CL_SUCCESS";
|
|
case -1: return "CL_DEVICE_NOT_FOUND";
|
|
case -2: return "CL_DEVICE_NOT_AVAILABLE";
|
|
case -3: return "CL_COMPILER_NOT_AVAILABLE";
|
|
case -4: return "CL_MEM_OBJECT_ALLOCATION_FAILURE";
|
|
case -5: return "CL_OUT_OF_RESOURCES";
|
|
case -6: return "CL_OUT_OF_HOST_MEMORY";
|
|
case -7: return "CL_PROFILING_INFO_NOT_AVAILABLE";
|
|
case -8: return "CL_MEM_COPY_OVERLAP";
|
|
case -9: return "CL_IMAGE_FORMAT_MISMATCH";
|
|
case -10: return "CL_IMAGE_FORMAT_NOT_SUPPORTED";
|
|
case -11: return "CL_BUILD_PROGRAM_FAILURE";
|
|
case -12: return "CL_MAP_FAILURE";
|
|
case -13: return "CL_MISALIGNED_SUB_BUFFER_OFFSET";
|
|
case -14: return "CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST";
|
|
case -15: return "CL_COMPILE_PROGRAM_FAILURE";
|
|
case -16: return "CL_LINKER_NOT_AVAILABLE";
|
|
case -17: return "CL_LINK_PROGRAM_FAILURE";
|
|
case -18: return "CL_DEVICE_PARTITION_FAILED";
|
|
case -19: return "CL_KERNEL_ARG_INFO_NOT_AVAILABLE";
|
|
|
|
// compile-time errors
|
|
case -30: return "CL_INVALID_VALUE";
|
|
case -31: return "CL_INVALID_DEVICE_TYPE";
|
|
case -32: return "CL_INVALID_PLATFORM";
|
|
case -33: return "CL_INVALID_DEVICE";
|
|
case -34: return "CL_INVALID_CONTEXT";
|
|
case -35: return "CL_INVALID_QUEUE_PROPERTIES";
|
|
case -36: return "CL_INVALID_COMMAND_QUEUE";
|
|
case -37: return "CL_INVALID_HOST_PTR";
|
|
case -38: return "CL_INVALID_MEM_OBJECT";
|
|
case -39: return "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR";
|
|
case -40: return "CL_INVALID_IMAGE_SIZE";
|
|
case -41: return "CL_INVALID_SAMPLER";
|
|
case -42: return "CL_INVALID_BINARY";
|
|
case -43: return "CL_INVALID_BUILD_OPTIONS";
|
|
case -44: return "CL_INVALID_PROGRAM";
|
|
case -45: return "CL_INVALID_PROGRAM_EXECUTABLE";
|
|
case -46: return "CL_INVALID_KERNEL_NAME";
|
|
case -47: return "CL_INVALID_KERNEL_DEFINITION";
|
|
case -48: return "CL_INVALID_KERNEL";
|
|
case -49: return "CL_INVALID_ARG_INDEX";
|
|
case -50: return "CL_INVALID_ARG_VALUE";
|
|
case -51: return "CL_INVALID_ARG_SIZE";
|
|
case -52: return "CL_INVALID_KERNEL_ARGS";
|
|
case -53: return "CL_INVALID_WORK_DIMENSION";
|
|
case -54: return "CL_INVALID_WORK_GROUP_SIZE";
|
|
case -55: return "CL_INVALID_WORK_ITEM_SIZE";
|
|
case -56: return "CL_INVALID_GLOBAL_OFFSET";
|
|
case -57: return "CL_INVALID_EVENT_WAIT_LIST";
|
|
case -58: return "CL_INVALID_EVENT";
|
|
case -59: return "CL_INVALID_OPERATION";
|
|
case -60: return "CL_INVALID_GL_OBJECT";
|
|
case -61: return "CL_INVALID_BUFFER_SIZE";
|
|
case -62: return "CL_INVALID_MIP_LEVEL";
|
|
case -63: return "CL_INVALID_GLOBAL_WORK_SIZE";
|
|
case -64: return "CL_INVALID_PROPERTY";
|
|
case -65: return "CL_INVALID_IMAGE_DESCRIPTOR";
|
|
case -66: return "CL_INVALID_COMPILER_OPTIONS";
|
|
case -67: return "CL_INVALID_LINKER_OPTIONS";
|
|
case -68: return "CL_INVALID_DEVICE_PARTITION_COUNT";
|
|
|
|
// extension errors
|
|
case -1000: return "CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR";
|
|
case -1001: return "CL_PLATFORM_NOT_FOUND_KHR";
|
|
case -1002: return "CL_INVALID_D3D10_DEVICE_KHR";
|
|
case -1003: return "CL_INVALID_D3D10_RESOURCE_KHR";
|
|
case -1004: return "CL_D3D10_RESOURCE_ALREADY_ACQUIRED_KHR";
|
|
case -1005: return "CL_D3D10_RESOURCE_NOT_ACQUIRED_KHR";
|
|
default: return "Unknown OpenCL error";
|
|
}
|
|
}
|
|
|
|
bool CheckCLError(cl_int err)
|
|
{
|
|
if (err != CL_SUCCESS)
|
|
{
|
|
std::cout << "OpenCL error: " << getErrorString(err) << std::endl;
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
void Timer::start()
|
|
{
|
|
t_start = std::chrono::high_resolution_clock::now();
|
|
}
|
|
|
|
void Timer::end(unsigned int nRuns)
|
|
{
|
|
auto t_end = std::chrono::high_resolution_clock::now();
|
|
std::cout << "CPU [time] " <<
|
|
std::chrono::duration_cast<std::chrono::nanoseconds>(t_end - t_start).count() / 1e+06
|
|
/ nRuns << " ms" << std::endl;
|
|
}
|
|
|
|
void Timer::measure(const std::function<void(void)>& program, unsigned int nRuns)
|
|
{
|
|
start();
|
|
for (unsigned int i = 0; i < nRuns; ++i)
|
|
{
|
|
program();
|
|
}
|
|
end(nRuns);
|
|
|
|
}
|
|
|
|
std::chrono::time_point<std::chrono::high_resolution_clock> Timer::t_start;
|