10 changed files with 13565 additions and 5 deletions
@ -0,0 +1,203 @@ |
|||||
|
#pragma once |
||||
|
|
||||
|
#include <cstdio> |
||||
|
#include <cstdlib> |
||||
|
#include <iostream> |
||||
|
#include <fstream> |
||||
|
#include <sstream> |
||||
|
|
||||
|
#include "cl.hpp" |
||||
|
|
||||
|
#pragma warning( disable : 4996 ) |
||||
|
|
||||
|
void printTimeStats(cl_event event) |
||||
|
|
||||
|
{ |
||||
|
|
||||
|
cl_int err = CL_SUCCESS; |
||||
|
|
||||
|
if(event == NULL) |
||||
|
|
||||
|
{ |
||||
|
|
||||
|
std::cerr << "No event object returned!" << std::endl; |
||||
|
|
||||
|
} |
||||
|
|
||||
|
else |
||||
|
|
||||
|
{ |
||||
|
|
||||
|
clWaitForEvents(1, &event); |
||||
|
|
||||
|
} |
||||
|
|
||||
|
cl_ulong execStart, execEnd; |
||||
|
|
||||
|
err = clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_START, |
||||
|
|
||||
|
sizeof(cl_ulong), &execStart, NULL); |
||||
|
|
||||
|
if(err != CL_SUCCESS) |
||||
|
|
||||
|
{ |
||||
|
|
||||
|
std::cerr << "Error during profile query: CL_PROFILING_COMMAND_START [" << err << "]." << std::endl; |
||||
|
|
||||
|
} |
||||
|
|
||||
|
|
||||
|
err = clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_END, |
||||
|
|
||||
|
sizeof(cl_ulong), &execEnd, NULL); |
||||
|
|
||||
|
if(err != CL_SUCCESS) |
||||
|
|
||||
|
{ |
||||
|
|
||||
|
std::cerr << "Error during profile query: CL_PROFILING_COMMAND_END [" << err << "]." << std::endl; |
||||
|
|
||||
|
} |
||||
|
|
||||
|
std::cout << "[start] " << execStart << " [end] " << execEnd << " [time] " << (execEnd - execStart) / 1e+06 << "ms." << std::endl; |
||||
|
|
||||
|
} |
||||
|
|
||||
|
|
||||
|
void WriteTGA_RGB(const char* filename, unsigned char* data, unsigned int width, unsigned int height) |
||||
|
{ |
||||
|
FILE *f = fopen(filename, "wb"); |
||||
|
if (!f) { |
||||
|
fprintf(stderr, "Unable to create output TGA image `%s'\n", filename); |
||||
|
exit(EXIT_FAILURE); |
||||
|
} |
||||
|
|
||||
|
fputc(0x00, f); /* ID Length, 0 => No ID */ |
||||
|
fputc(0x00, f); /* Color Map Type, 0 => No color map included */ |
||||
|
fputc(0x02, f); /* Image Type, 2 => Uncompressed, True-color Image */ |
||||
|
fputc(0x00, f); /* Next five bytes are about the color map entries */ |
||||
|
fputc(0x00, f); /* 2 bytes Index, 2 bytes length, 1 byte size */ |
||||
|
fputc(0x00, f); |
||||
|
fputc(0x00, f); |
||||
|
fputc(0x00, f); |
||||
|
fputc(0x00, f); /* X-origin of Image */ |
||||
|
fputc(0x00, f); |
||||
|
fputc(0x00, f); /* Y-origin of Image */ |
||||
|
fputc(0x00, f); |
||||
|
fputc(width & 0xff, f); /* Image Width */ |
||||
|
fputc((width >> 8) & 0xff, f); |
||||
|
fputc(height & 0xff, f); /* Image Height */ |
||||
|
fputc((height >> 8) & 0xff, f); |
||||
|
fputc(0x18, f); /* Pixel Depth, 0x18 => 24 Bits */ |
||||
|
fputc(0x20, f); /* Image Descriptor */ |
||||
|
|
||||
|
for (int y = height - 1; y >= 0; y--) { |
||||
|
for (size_t x = 0; x < width; x++) { |
||||
|
const size_t i = (y * width + x) * 3; |
||||
|
fputc(data[i + 2], f); /* write blue */ |
||||
|
fputc(data[i + 1], f); /* write green */ |
||||
|
fputc(data[i], f); /* write red */ |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
std::string FileToString(const std::string& path) { |
||||
|
std::ifstream file(path, std::ios::in | std::ios::binary); |
||||
|
if (file) |
||||
|
{ |
||||
|
std::ostringstream contents; |
||||
|
contents << file.rdbuf(); |
||||
|
file.close(); |
||||
|
return(contents.str()); |
||||
|
} |
||||
|
return std::string(); |
||||
|
|
||||
|
} |
||||
|
|
||||
|
const char *getErrorString(cl_int error) |
||||
|
{ |
||||
|
switch (error) { |
||||
|
// run-time and JIT compiler errors
|
||||
|
case 0: return "CL_SUCCESS"; |
||||
|
case -1: return "CL_DEVICE_NOT_FOUND"; |
||||
|
case -2: return "CL_DEVICE_NOT_AVAILABLE"; |
||||
|
case -3: return "CL_COMPILER_NOT_AVAILABLE"; |
||||
|
case -4: return "CL_MEM_OBJECT_ALLOCATION_FAILURE"; |
||||
|
case -5: return "CL_OUT_OF_RESOURCES"; |
||||
|
case -6: return "CL_OUT_OF_HOST_MEMORY"; |
||||
|
case -7: return "CL_PROFILING_INFO_NOT_AVAILABLE"; |
||||
|
case -8: return "CL_MEM_COPY_OVERLAP"; |
||||
|
case -9: return "CL_IMAGE_FORMAT_MISMATCH"; |
||||
|
case -10: return "CL_IMAGE_FORMAT_NOT_SUPPORTED"; |
||||
|
case -11: return "CL_BUILD_PROGRAM_FAILURE"; |
||||
|
case -12: return "CL_MAP_FAILURE"; |
||||
|
case -13: return "CL_MISALIGNED_SUB_BUFFER_OFFSET"; |
||||
|
case -14: return "CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST"; |
||||
|
case -15: return "CL_COMPILE_PROGRAM_FAILURE"; |
||||
|
case -16: return "CL_LINKER_NOT_AVAILABLE"; |
||||
|
case -17: return "CL_LINK_PROGRAM_FAILURE"; |
||||
|
case -18: return "CL_DEVICE_PARTITION_FAILED"; |
||||
|
case -19: return "CL_KERNEL_ARG_INFO_NOT_AVAILABLE"; |
||||
|
|
||||
|
// compile-time errors
|
||||
|
case -30: return "CL_INVALID_VALUE"; |
||||
|
case -31: return "CL_INVALID_DEVICE_TYPE"; |
||||
|
case -32: return "CL_INVALID_PLATFORM"; |
||||
|
case -33: return "CL_INVALID_DEVICE"; |
||||
|
case -34: return "CL_INVALID_CONTEXT"; |
||||
|
case -35: return "CL_INVALID_QUEUE_PROPERTIES"; |
||||
|
case -36: return "CL_INVALID_COMMAND_QUEUE"; |
||||
|
case -37: return "CL_INVALID_HOST_PTR"; |
||||
|
case -38: return "CL_INVALID_MEM_OBJECT"; |
||||
|
case -39: return "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR"; |
||||
|
case -40: return "CL_INVALID_IMAGE_SIZE"; |
||||
|
case -41: return "CL_INVALID_SAMPLER"; |
||||
|
case -42: return "CL_INVALID_BINARY"; |
||||
|
case -43: return "CL_INVALID_BUILD_OPTIONS"; |
||||
|
case -44: return "CL_INVALID_PROGRAM"; |
||||
|
case -45: return "CL_INVALID_PROGRAM_EXECUTABLE"; |
||||
|
case -46: return "CL_INVALID_KERNEL_NAME"; |
||||
|
case -47: return "CL_INVALID_KERNEL_DEFINITION"; |
||||
|
case -48: return "CL_INVALID_KERNEL"; |
||||
|
case -49: return "CL_INVALID_ARG_INDEX"; |
||||
|
case -50: return "CL_INVALID_ARG_VALUE"; |
||||
|
case -51: return "CL_INVALID_ARG_SIZE"; |
||||
|
case -52: return "CL_INVALID_KERNEL_ARGS"; |
||||
|
case -53: return "CL_INVALID_WORK_DIMENSION"; |
||||
|
case -54: return "CL_INVALID_WORK_GROUP_SIZE"; |
||||
|
case -55: return "CL_INVALID_WORK_ITEM_SIZE"; |
||||
|
case -56: return "CL_INVALID_GLOBAL_OFFSET"; |
||||
|
case -57: return "CL_INVALID_EVENT_WAIT_LIST"; |
||||
|
case -58: return "CL_INVALID_EVENT"; |
||||
|
case -59: return "CL_INVALID_OPERATION"; |
||||
|
case -60: return "CL_INVALID_GL_OBJECT"; |
||||
|
case -61: return "CL_INVALID_BUFFER_SIZE"; |
||||
|
case -62: return "CL_INVALID_MIP_LEVEL"; |
||||
|
case -63: return "CL_INVALID_GLOBAL_WORK_SIZE"; |
||||
|
case -64: return "CL_INVALID_PROPERTY"; |
||||
|
case -65: return "CL_INVALID_IMAGE_DESCRIPTOR"; |
||||
|
case -66: return "CL_INVALID_COMPILER_OPTIONS"; |
||||
|
case -67: return "CL_INVALID_LINKER_OPTIONS"; |
||||
|
case -68: return "CL_INVALID_DEVICE_PARTITION_COUNT"; |
||||
|
|
||||
|
// extension errors
|
||||
|
case -1000: return "CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR"; |
||||
|
case -1001: return "CL_PLATFORM_NOT_FOUND_KHR"; |
||||
|
case -1002: return "CL_INVALID_D3D10_DEVICE_KHR"; |
||||
|
case -1003: return "CL_INVALID_D3D10_RESOURCE_KHR"; |
||||
|
case -1004: return "CL_D3D10_RESOURCE_ALREADY_ACQUIRED_KHR"; |
||||
|
case -1005: return "CL_D3D10_RESOURCE_NOT_ACQUIRED_KHR"; |
||||
|
default: return "Unknown OpenCL error"; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
bool CheckCLError(cl_int err) |
||||
|
{ |
||||
|
if(err != CL_SUCCESS) |
||||
|
{ |
||||
|
std::cout << "OpenCL error: " << getErrorString(err) << std::endl; |
||||
|
return false; |
||||
|
} |
||||
|
|
||||
|
return true; |
||||
|
} |
@ -0,0 +1,230 @@ |
|||||
|
// Primitives.cpp : Defines the entry point for the console application.
|
||||
|
|
||||
|
#include <string> |
||||
|
#include <vector> |
||||
|
#include "Common.h" |
||||
|
|
||||
|
// OpenCL C API
|
||||
|
#include <CL/opencl.h> |
||||
|
|
||||
|
// OpenCL C++ API
|
||||
|
#include "cl.hpp" |
||||
|
|
||||
|
// Gaussian elimination
|
||||
|
const int GAn = 4; |
||||
|
const int GAm = 3; |
||||
|
|
||||
|
float GA[] = { 2, 1, -1, 8, |
||||
|
-3, -1, 2, -11, |
||||
|
-2, 1, 2, -3 }; |
||||
|
|
||||
|
int GBn = 6; |
||||
|
int GBm = 3; |
||||
|
float GB[] = { 2, -1, 0, 1, 0, 0, |
||||
|
-1, 2, -1, 0, 1, 0, |
||||
|
0, -1, 2, 0, 0, 1 }; |
||||
|
|
||||
|
|
||||
|
void scalarMV(int n, int m, float* y, const float* A, const float* x, const float* b) { |
||||
|
for (int i = 0; i<n; ++i) { |
||||
|
float yi = b[i]; |
||||
|
for (int j = 0; j<m; ++j) { |
||||
|
yi += A[i * m + j] * x[j]; |
||||
|
} |
||||
|
y[i] = yi; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
// Jacobi iteration
|
||||
|
const int Jn = 8; |
||||
|
float* Jx[2] = { NULL, NULL }; |
||||
|
float* JA = NULL; |
||||
|
float* Jb = NULL; |
||||
|
|
||||
|
void generateLinEq() |
||||
|
{ |
||||
|
Jx[0] = new float[Jn]; |
||||
|
Jx[1] = new float[Jn]; |
||||
|
for (int i = 0; i < Jn; ++i) { |
||||
|
Jx[0][i] = 0.0f; |
||||
|
Jx[1][i] = 0.0f; |
||||
|
} |
||||
|
|
||||
|
JA = new float[Jn * Jn]; |
||||
|
for (int i = 0; i < Jn; ++i) { |
||||
|
for (int j = 0; j < Jn; ++j) { |
||||
|
float v = 0.0f; |
||||
|
if (i == j) { |
||||
|
v = 0.5f; |
||||
|
} |
||||
|
JA[i + j * Jn] = v; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
Jb = new float[Jn]; |
||||
|
for (int i = 0; i < Jn; ++i) { |
||||
|
Jb[i] = 1.0f; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
void releaseLinEq() |
||||
|
{ |
||||
|
if (Jx[0] == 0) delete[] Jx[0]; |
||||
|
if (Jx[1] == 0) delete[] Jx[1]; |
||||
|
if (JA == 0) delete[] JA; |
||||
|
if (Jb == 0) delete[] Jb; |
||||
|
} |
||||
|
|
||||
|
void capi() |
||||
|
{ |
||||
|
// Get a platform ID
|
||||
|
cl_platform_id platformID; |
||||
|
clGetPlatformIDs(1, &platformID, NULL); |
||||
|
|
||||
|
// Get a device ID
|
||||
|
cl_device_id deviceID; |
||||
|
clGetDeviceIDs(platformID, CL_DEVICE_TYPE_GPU, 1, &deviceID, NULL); |
||||
|
|
||||
|
// Create a context
|
||||
|
cl_context context; |
||||
|
cl_context_properties contextProperties[] = |
||||
|
{ CL_CONTEXT_PLATFORM, (cl_context_properties)platformID, 0 }; |
||||
|
context = clCreateContext(contextProperties, 1, &deviceID, NULL, NULL, NULL); |
||||
|
|
||||
|
// Create a command queue
|
||||
|
cl_command_queue queue; |
||||
|
queue = clCreateCommandQueue(context, deviceID, CL_QUEUE_PROFILING_ENABLE, NULL); |
||||
|
|
||||
|
// Create an OpenCL program
|
||||
|
std::string source = FileToString("../kernels/programs.cl"); |
||||
|
const char* csource = source.c_str(); |
||||
|
cl_program program = clCreateProgramWithSource(context, 1, &csource, NULL, NULL); |
||||
|
cl_int err = clBuildProgram(program, 1, &deviceID, NULL, NULL, NULL); |
||||
|
if (err != CL_SUCCESS) |
||||
|
{ |
||||
|
cl_uint logLength; |
||||
|
clGetProgramBuildInfo(program, deviceID, CL_PROGRAM_BUILD_LOG, 0, NULL, &logLength); |
||||
|
char* log = new char[logLength]; |
||||
|
clGetProgramBuildInfo(program, deviceID, CL_PROGRAM_BUILD_LOG, logLength, log, 0); |
||||
|
std::cout << log << std::endl; |
||||
|
delete[] log; |
||||
|
exit(-1); |
||||
|
} |
||||
|
|
||||
|
// Get the kernel handle
|
||||
|
cl_kernel kernel = clCreateKernel(program, "gaussian", &err); |
||||
|
if(!CheckCLError(err)) exit(-1); |
||||
|
|
||||
|
// Allocate and upload the input data
|
||||
|
// ...
|
||||
|
|
||||
|
cl_mem inputBuffer; |
||||
|
inputBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(float) * GAn * GAm, NULL, &err); |
||||
|
if (!CheckCLError(err)) exit(-1); |
||||
|
|
||||
|
clEnqueueWriteBuffer(queue, inputBuffer, CL_TRUE, 0, sizeof(float) * GAn * GAm, GA, 0, NULL, NULL); |
||||
|
|
||||
|
|
||||
|
// Set the kernel paramateres
|
||||
|
clSetKernelArg(kernel, 0, sizeof(int), &GAn); |
||||
|
clSetKernelArg(kernel, 1, sizeof(int), &GAm); |
||||
|
clSetKernelArg(kernel, 2, sizeof(cl_mem), &inputBuffer); |
||||
|
// Enqueue the kernel
|
||||
|
size_t workSize = GAm; |
||||
|
size_t workGroupSize = GAm; |
||||
|
clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &workSize, &workGroupSize, 0, NULL, NULL); |
||||
|
|
||||
|
// Copy the result back to the host
|
||||
|
clEnqueueReadBuffer(queue, inputBuffer, CL_TRUE, 0, sizeof(float) * GAm * GAn, GA, 0, NULL, NULL); |
||||
|
|
||||
|
for (int i = 0; i < GAm; ++i) { |
||||
|
for (int j = 0; j < GAn; ++j) { |
||||
|
std::cout << GA[j + i * GAn]; |
||||
|
if (j < GAn - 1) std::cout << ", "; |
||||
|
} |
||||
|
std::cout << std::endl; |
||||
|
} |
||||
|
|
||||
|
clReleaseMemObject(inputBuffer); |
||||
|
clReleaseKernel(kernel); |
||||
|
|
||||
|
std::cout << "Finished" << std::endl; |
||||
|
} |
||||
|
|
||||
|
void cppapi() |
||||
|
{ |
||||
|
cl_int err = CL_SUCCESS; |
||||
|
|
||||
|
// Get a platform ID
|
||||
|
std::vector<cl::Platform> platforms; |
||||
|
cl::Platform::get(&platforms); |
||||
|
if (platforms.size() == 0) |
||||
|
{ |
||||
|
std::cout << "Unable to find suitable platform." << std::endl; |
||||
|
exit(-1); |
||||
|
} |
||||
|
|
||||
|
// Create a context
|
||||
|
cl_context_properties properties[] = |
||||
|
{ CL_CONTEXT_PLATFORM, (cl_context_properties)(platforms[0])(), 0 }; |
||||
|
cl::Context context(CL_DEVICE_TYPE_GPU, properties); |
||||
|
|
||||
|
// Enumerate the devices
|
||||
|
std::vector<cl::Device> devices = context.getInfo<CL_CONTEXT_DEVICES>(); |
||||
|
|
||||
|
// Create the command queue
|
||||
|
cl::Event event; |
||||
|
cl::CommandQueue queue(context, devices[0], 0, &err); |
||||
|
|
||||
|
// Create the OpenCL program
|
||||
|
std::string programSource = FileToString("../kernels/programs.cl"); |
||||
|
cl::Program program = cl::Program(context, programSource); |
||||
|
program.build(devices); |
||||
|
|
||||
|
// Get the kernel handle
|
||||
|
cl::Kernel kernel(program, "gaussian", &err); |
||||
|
CheckCLError(err); |
||||
|
|
||||
|
// Allocate and upload the input data
|
||||
|
// ...
|
||||
|
|
||||
|
cl::Buffer clInputBuffer = cl::Buffer(context, CL_MEM_READ_ONLY, sizeof(float) * GAn * GAm, NULL, &err); |
||||
|
queue.enqueueWriteBuffer(clInputBuffer, true, 0, sizeof(float) * GAn * GAm, GA); |
||||
|
|
||||
|
|
||||
|
// Set the kernel parameters
|
||||
|
kernel.setArg(0, GAn); |
||||
|
kernel.setArg(1, GAm); |
||||
|
kernel.setArg(2, clInputBuffer); |
||||
|
|
||||
|
// Enqueue the kernel
|
||||
|
queue.enqueueNDRangeKernel(kernel, |
||||
|
cl::NullRange, |
||||
|
cl::NDRange(GAm, 1), |
||||
|
cl::NDRange(GAm, 1), |
||||
|
NULL, |
||||
|
&event); |
||||
|
event.wait(); |
||||
|
|
||||
|
// Copy result back to host
|
||||
|
queue.enqueueReadBuffer(clInputBuffer, true, 0, sizeof(float) * GAn * GAm, GA); |
||||
|
|
||||
|
// Validate the result
|
||||
|
for (int i = 0; i < GAm; ++i) { |
||||
|
for (int j = 0; j < GAn; ++j) { |
||||
|
std::cout << GA[j + i * GAn]; |
||||
|
if (j < GAn - 1) std::cout << ", "; |
||||
|
} |
||||
|
std::cout << std::endl; |
||||
|
} |
||||
|
|
||||
|
std::cout << "Finished" << std::endl; |
||||
|
} |
||||
|
|
||||
|
int main() |
||||
|
{ |
||||
|
capi(); |
||||
|
cppapi(); |
||||
|
return 0; |
||||
|
} |
||||
|
|
@ -0,0 +1,83 @@ |
|||||
|
<?xml version="1.0" encoding="utf-8"?> |
||||
|
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> |
||||
|
<ItemGroup Label="ProjectConfigurations"> |
||||
|
<ProjectConfiguration Include="Debug|Win32"> |
||||
|
<Configuration>Debug</Configuration> |
||||
|
<Platform>Win32</Platform> |
||||
|
</ProjectConfiguration> |
||||
|
<ProjectConfiguration Include="Release|Win32"> |
||||
|
<Configuration>Release</Configuration> |
||||
|
<Platform>Win32</Platform> |
||||
|
</ProjectConfiguration> |
||||
|
</ItemGroup> |
||||
|
<PropertyGroup Label="Globals"> |
||||
|
<ProjectGuid>{FD14D4C8-8321-4451-A5DA-68E3FCCFCAC8}</ProjectGuid> |
||||
|
<RootNamespace>Linear</RootNamespace> |
||||
|
</PropertyGroup> |
||||
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" /> |
||||
|
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration"> |
||||
|
<ConfigurationType>Application</ConfigurationType> |
||||
|
<UseDebugLibraries>true</UseDebugLibraries> |
||||
|
<CharacterSet>MultiByte</CharacterSet> |
||||
|
</PropertyGroup> |
||||
|
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration"> |
||||
|
<ConfigurationType>Application</ConfigurationType> |
||||
|
<UseDebugLibraries>false</UseDebugLibraries> |
||||
|
<WholeProgramOptimization>true</WholeProgramOptimization> |
||||
|
<CharacterSet>MultiByte</CharacterSet> |
||||
|
</PropertyGroup> |
||||
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> |
||||
|
<ImportGroup Label="ExtensionSettings"> |
||||
|
</ImportGroup> |
||||
|
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'"> |
||||
|
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" /> |
||||
|
</ImportGroup> |
||||
|
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'"> |
||||
|
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" /> |
||||
|
</ImportGroup> |
||||
|
<PropertyGroup Label="UserMacros" /> |
||||
|
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'"> |
||||
|
<OutDir>$(SolutionDir)bin\</OutDir> |
||||
|
</PropertyGroup> |
||||
|
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'"> |
||||
|
<ClCompile> |
||||
|
<WarningLevel>Level3</WarningLevel> |
||||
|
<Optimization>Disabled</Optimization> |
||||
|
<AdditionalIncludeDirectories>$(CUDA_PATH)\include</AdditionalIncludeDirectories> |
||||
|
</ClCompile> |
||||
|
<Link> |
||||
|
<GenerateDebugInformation>true</GenerateDebugInformation> |
||||
|
<AdditionalLibraryDirectories>$(CUDA_PATH)\lib\Win32\</AdditionalLibraryDirectories> |
||||
|
<AdditionalDependencies>OpenCL.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies> |
||||
|
</Link> |
||||
|
</ItemDefinitionGroup> |
||||
|
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'"> |
||||
|
<ClCompile> |
||||
|
<WarningLevel>Level3</WarningLevel> |
||||
|
<Optimization>MaxSpeed</Optimization> |
||||
|
<FunctionLevelLinking>true</FunctionLevelLinking> |
||||
|
<IntrinsicFunctions>true</IntrinsicFunctions> |
||||
|
<AdditionalIncludeDirectories>$(CUDA_PATH)\include</AdditionalIncludeDirectories> |
||||
|
</ClCompile> |
||||
|
<Link> |
||||
|
<GenerateDebugInformation>true</GenerateDebugInformation> |
||||
|
<EnableCOMDATFolding>true</EnableCOMDATFolding> |
||||
|
<OptimizeReferences>true</OptimizeReferences> |
||||
|
<AdditionalLibraryDirectories>$(CUDA_PATH)\lib\Win32\</AdditionalLibraryDirectories> |
||||
|
<AdditionalDependencies>OpenCL.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies> |
||||
|
</Link> |
||||
|
</ItemDefinitionGroup> |
||||
|
<ItemGroup> |
||||
|
<ClInclude Include="cl.hpp" /> |
||||
|
<ClInclude Include="Common.h" /> |
||||
|
</ItemGroup> |
||||
|
<ItemGroup> |
||||
|
<ClCompile Include="Linear.cpp" /> |
||||
|
</ItemGroup> |
||||
|
<ItemGroup> |
||||
|
<None Include="..\kernels\linear.cl" /> |
||||
|
</ItemGroup> |
||||
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> |
||||
|
<ImportGroup Label="ExtensionTargets"> |
||||
|
</ImportGroup> |
||||
|
</Project> |
@ -0,0 +1,38 @@ |
|||||
|
<?xml version="1.0" encoding="utf-8"?> |
||||
|
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> |
||||
|
<ItemGroup> |
||||
|
<Filter Include="Source Files"> |
||||
|
<UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier> |
||||
|
<Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions> |
||||
|
</Filter> |
||||
|
<Filter Include="Header Files"> |
||||
|
<UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier> |
||||
|
<Extensions>h;hpp;hxx;hm;inl;inc;xsd</Extensions> |
||||
|
</Filter> |
||||
|
<Filter Include="Resource Files"> |
||||
|
<UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier> |
||||
|
<Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions> |
||||
|
</Filter> |
||||
|
<Filter Include="Kernels"> |
||||
|
<UniqueIdentifier>{fd13ccc5-a98b-4e30-9eba-12f62c7dd566}</UniqueIdentifier> |
||||
|
</Filter> |
||||
|
</ItemGroup> |
||||
|
<ItemGroup> |
||||
|
<ClInclude Include="cl.hpp"> |
||||
|
<Filter>Header Files</Filter> |
||||
|
</ClInclude> |
||||
|
<ClInclude Include="Common.h"> |
||||
|
<Filter>Header Files</Filter> |
||||
|
</ClInclude> |
||||
|
</ItemGroup> |
||||
|
<ItemGroup> |
||||
|
<ClCompile Include="Linear.cpp"> |
||||
|
<Filter>Source Files</Filter> |
||||
|
</ClCompile> |
||||
|
</ItemGroup> |
||||
|
<ItemGroup> |
||||
|
<None Include="..\kernels\linear.cl"> |
||||
|
<Filter>Kernels</Filter> |
||||
|
</None> |
||||
|
</ItemGroup> |
||||
|
</Project> |
File diff suppressed because it is too large
@ -0,0 +1,58 @@ |
|||||
|
// TODO: Simple matrix-vector multiplication, every thread computes a complete dot product |
||||
|
// |
||||
|
// i := get_global_id(0) |
||||
|
// |
||||
|
// IF ID < n THEN: |
||||
|
// yi := b[i] |
||||
|
// LOOP j := 0 .. m DO: |
||||
|
// yi += A[j + i * m] * x[j] |
||||
|
// END LOOP |
||||
|
// y[i] := yi |
||||
|
// END IF |
||||
|
__kernel |
||||
|
void simpleMV(const int n, const int m, __global float* y, __global float* A, __global float* x, __global float* b){ |
||||
|
|
||||
|
} |
||||
|
|
||||
|
// TODO: Matrix-vector multiplication with parallelization of the dot product |
||||
|
// Assumptions: M = 2^k, M <= maximum workgroup size |
||||
|
// |
||||
|
// i = get_group_id(0) |
||||
|
// j = get_local_id(0) |
||||
|
// |
||||
|
// Q[j] := A[i * M + j] * x[j] |
||||
|
// BARRIER |
||||
|
// |
||||
|
// Sum scan on Q (reduction) |
||||
|
// |
||||
|
// IF j = 0 THEN: |
||||
|
// y[i] = Q[0] + b[i] |
||||
|
// |
||||
|
__kernel |
||||
|
void reduceMV(const int n, const int M, __global float* y, __global float* A, __global float* x, __global float* b, __local float* Q){ |
||||
|
|
||||
|
} |
||||
|
|
||||
|
// TODO: General solution for matrix-vector multiplication, every thread processes a chunk of the dot product and visits multiple rows of the result |
||||
|
// |
||||
|
// t := get_local_id(0) / Z |
||||
|
// z := get_local_id(0) % Z |
||||
|
// |
||||
|
// FOR i := t ; i < n ; i := i + T : |
||||
|
// Compute Q[t * Z + z] as shown in the lecture |
||||
|
// Sum scan on Q (reduction) |
||||
|
// IF z = 0 THEN: |
||||
|
// y[i] = Q[t * Z + 0] + b[i] |
||||
|
// |
||||
|
// END FOR |
||||
|
__kernel |
||||
|
void largeMV(const int n, const int m, __global float* y, __global float* A, __global float* x, __global float* b, const int T, const int Z, __local float* Q){ |
||||
|
|
||||
|
} |
||||
|
|
||||
|
// TODO: Gaussian elimination as shown in the lecture |
||||
|
// (execute the 2nd loop of the sequential implemential in parallel) |
||||
|
__kernel void gaussian(const int n, const int m, __global float* A){ |
||||
|
|
||||
|
} |
||||
|
|
Loading…
Reference in new issue