Browse Source

Gauss basic

master
Daniel Gyulai 3 years ago
parent
commit
24fbbf8cc3
  1. 59
      Linear/Gauss.cpp
  2. 15
      Linear/Linear.cpp
  3. 1
      Linear/Linear.vcxproj
  4. 3
      Linear/Linear.vcxproj.filters
  5. 15
      Linear/LinearTests.h
  6. 29
      kernels/linear.cl

59
Linear/Gauss.cpp

@ -0,0 +1,59 @@
#include "LinearTests.h"
#include "cl.hpp"
#include "Common.h"
#include <iostream>
Gauss::Gauss(int _n, int _m, float* _G) {
n = _n;
m = _m;
G = _G;
}
void Gauss::collect_results(cl::CommandQueue* queue)
{
queue->enqueueReadBuffer(clInputBuffer, true, 0, sizeof(float) * n * m, G);
}
void Gauss::gpu_compute(cl::Context* context, cl::CommandQueue* queue, cl::Program* program, cl::Event* Event)
{
cl_int err = CL_SUCCESS;
cl::Kernel kernel = cl::Kernel(*program, "gaussian", &err);
CheckCLError(err);
clInputBuffer = cl::Buffer(*context, CL_MEM_READ_ONLY, sizeof(float) * n * m, NULL, &err);
CheckCLError(err);
queue->enqueueWriteBuffer(clInputBuffer, true, 0, sizeof(float) * n * m, G);
kernel.setArg(0, n);
kernel.setArg(1, m);
kernel.setArg(2, clInputBuffer);
queue->enqueueNDRangeKernel(kernel,
cl::NullRange,
cl::NDRange(n, 1),
cl::NDRange(n, 1),
NULL,
Event);
}
void Gauss::cpu_compute()
{
}
bool Gauss::validate_results()
{
for (int i = 0; i < m; ++i) {
for (int j = 0; j < n; ++j) {
std::cout << G[j + i * n];
if (j < n - 1) std::cout << ", ";
}
std::cout << std::endl;
}
return true;
}
std::string Gauss::description()
{
return std::string();
}

15
Linear/Linear.cpp

@ -62,8 +62,19 @@ int main()
OpenCLHandler handler("../kernels/linear.cl");
//Jacobi j(MVType::ReduceMV);
//Jacobi j(MVType::SimpleMV);
Jacobi j(MVType::LargeMV);
handler.run_test(&j);
//Jacobi j(MVType::LargeMV);
//handler.run_test(&j);
int GAn = 4;
int GAm = 3;
float GA[] = { 2, 1, -1, 8,
-3, -1, 2, -11,
-2, 1, 2, -3 };
Gauss g(GAn, GAm, GA);
handler.run_test(&g);
return 0;
}

1
Linear/Linear.vcxproj

@ -71,6 +71,7 @@
</Link>
</ItemDefinitionGroup>
<ItemGroup>
<ClCompile Include="Gauss.cpp" />
<ClCompile Include="Jacobi.cpp" />
<ClCompile Include="Large.cpp" />
<ClCompile Include="Linear.cpp" />

3
Linear/Linear.vcxproj.filters

@ -33,6 +33,9 @@
<ClCompile Include="Large.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="Gauss.cpp">
<Filter>Source Files</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<None Include="..\kernels\linear.cl">

15
Linear/LinearTests.h

@ -43,6 +43,21 @@ public:
std::string description();
};
class Gauss : public TestCase {
private:
int n;
int m;
float* G;
public:
Gauss(int _n, int _m, float* _G);
void collect_results(cl::CommandQueue* queue);
void gpu_compute(cl::Context* context, cl::CommandQueue* queue, cl::Program* program, cl::Event* Event);
void cpu_compute();
bool validate_results();
std::string description();
};
class Simple : public MatrixVectorMultiplier {
public:
Simple(cl::Context* context, cl::CommandQueue* queue, cl::Program* program);

29
kernels/linear.cl

@ -74,9 +74,6 @@ void reduceMV(const int n, const int M, __global float* y, __global float* A, __
// END FOR
__kernel
void largeMV(const int n, const int m, __global float* y, __global float* A, __global float* x, __global float* b, const int T, const int Z, __local float* Q){
int id = get_local_id(0);
// WG size: T*Z 4
// ID 2
int t = get_local_id(0) / T; // sor 1
int z = get_local_id(0) % Z; // oszlop 0
@ -117,7 +114,7 @@ void largeMV(const int n, const int m, __global float* y, __global float* A, __g
// end for
// (execute the 2nd loop of the sequential implemential in parallel)
__kernel void gaussian(const int n, const int m, __global float* A){
int i = get_global_id(0);
int j = get_global_id(0);
int lid = get_local_id(0);
/*if (i < n) {
@ -128,12 +125,28 @@ __kernel void gaussian(const int n, const int m, __global float* A){
}
}
}*/
for (size_t k = 1; k < n - 1; k++) {
float l = A[k * n + i] / A[k * n + k];
for (size_t j = k; j < n; j++) {
A[i * n + j] = A[i * n + j] - l * A[k * n + j];
for (size_t k = 0; k < m; k++) {
for (size_t i = 0; i < m; i++) {
if (k != i) {
float ratio = A[i * n + k] / A[k * n + k];
barrier(CLK_LOCAL_MEM_FENCE);
//for (size_t j = 0; j < n; j++) {
A[i * n + j] = A[i * n + j] - ratio * A[k * n + j];
//}
}
}
}
barrier(CLK_LOCAL_MEM_FENCE);
if (j < m) {
float ref = A[j * n + j];
A[j] = ref;
for (size_t i = 0; i < n; i++) {
A[j * n + i] = A[j * n + i] / ref;
}
}
//A[i] = i;
}

Loading…
Cancel
Save