diff --git a/Linear/Large.cpp b/Linear/Large.cpp
index 56b8fa2..fe5ce48 100644
--- a/Linear/Large.cpp
+++ b/Linear/Large.cpp
@@ -1,8 +1,10 @@
 #include "LinearTests.h"
 
-Large::Large(cl::Context* context, cl::CommandQueue* queue, cl::Program* program)
+Large::Large(cl::Context* _context, cl::CommandQueue* _queue, cl::Program* _program)
 {
-	//TODO: Implement
+	context = _context;
+	queue = _queue;
+	program = _program;
 }
 
 void Large::dewIt(int n, int m, float* y, const float* A, const float* x, const float* b)
diff --git a/Linear/Linear.cpp b/Linear/Linear.cpp
index 3e16a0d..b63e4e0 100644
--- a/Linear/Linear.cpp
+++ b/Linear/Linear.cpp
@@ -60,7 +60,8 @@ int main()
 	capi();
 	//cppapi();
 	OpenCLHandler handler("../kernels/linear.cl");
-	Jacobi j(MVType::SimpleMV);
+	Jacobi j(MVType::ReduceMV);
+	//Jacobi j(MVType::SimpleMV);
 	handler.run_test(&j);
     return 0;
 }
diff --git a/Linear/Reduce.cpp b/Linear/Reduce.cpp
index 248428f..3d9a014 100644
--- a/Linear/Reduce.cpp
+++ b/Linear/Reduce.cpp
@@ -1,11 +1,56 @@
 #include "LinearTests.h"
+#include <Common.h>
 
-Reduce::Reduce(cl::Context* context, cl::CommandQueue* queue, cl::Program* program)
+Reduce::Reduce(cl::Context* _context, cl::CommandQueue* _queue, cl::Program* _program)
 {
-	//TODO: Implement
+	context = _context;
+	queue = _queue;
+	program = _program;
 }
 
 void Reduce::dewIt(int n, int m, float* y, const float* A, const float* x, const float* b)
 {
-	//TODO: Implement
-}
+	cl_int err = CL_SUCCESS;
+	cl::Event _event;
+
+	cl::Kernel kernel = cl::Kernel(*program, "reduceMV", &err);
+	CheckCLError(err);
+
+	cl::Buffer ABuffer(*context, CL_MEM_READ_ONLY, sizeof(float) * n * m, NULL, &err);
+	CheckCLError(err);
+	queue->enqueueWriteBuffer(ABuffer, true, 0, sizeof(float) * n * m, A);
+
+	cl::Buffer XBuffer(*context, CL_MEM_READ_ONLY, sizeof(float) * m, NULL, &err);
+	CheckCLError(err);
+	queue->enqueueWriteBuffer(XBuffer, true, 0, sizeof(float) * m, x);
+
+	cl::Buffer YBuffer(*context, CL_MEM_WRITE_ONLY, sizeof(float) * n, NULL, &err);
+	CheckCLError(err);
+	queue->enqueueWriteBuffer(YBuffer, true, 0, sizeof(float) * n, y);
+
+	cl::Buffer BBuffer(*context, CL_MEM_READ_ONLY, sizeof(float) * n, NULL, &err);
+	CheckCLError(err);
+	queue->enqueueWriteBuffer(BBuffer, true, 0, sizeof(float) * n, b);
+
+
+
+
+	//						void reduceMV
+	kernel.setArg(0, n);	//		(const int n,
+	kernel.setArg(1, m);	//		const int M,
+	kernel.setArg(2, YBuffer);	//		__global float* y,
+	kernel.setArg(3, ABuffer);	//		__global float* A,
+	kernel.setArg(4, XBuffer);	//		__global float* x,
+	kernel.setArg(5, BBuffer);	//		__global float* b,
+	kernel.setArg(6, sizeof(float) * n * m, NULL);	//		__local float* Q)
+
+	queue->enqueueNDRangeKernel(kernel,
+		cl::NullRange,				// Indexek nem eloffszetelve
+		cl::NDRange(n, 1),	// Minden elemet egy szál
+		cl::NullRange,				// Workgroup méret? - ez az auto, ha nem indul, 1024-re, onnan csökkent, amig elindul
+		NULL,						// 
+		&_event);
+	_event.wait();
+
+	queue->enqueueReadBuffer(YBuffer, true, 0, sizeof(float) * n, y);
+}
\ No newline at end of file
diff --git a/Linear/Simple.cpp b/Linear/Simple.cpp
index 7c3eeaf..899d139 100644
--- a/Linear/Simple.cpp
+++ b/Linear/Simple.cpp
@@ -20,7 +20,7 @@ void Simple::dewIt(int n, int m, float* y, const float* A, const float* x, const
 	CheckCLError(err);
 	queue->enqueueWriteBuffer(ABuffer, true, 0, sizeof(float) * n *m, A);
 
-	cl::Buffer XBuffer(*context, CL_MEM_READ_ONLY, sizeof(float) * m, NULL, &err); // Ot kell kiirogatni
+	cl::Buffer XBuffer(*context, CL_MEM_READ_ONLY, sizeof(float) * m, NULL, &err);
 	CheckCLError(err);
 	queue->enqueueWriteBuffer(XBuffer, true, 0, sizeof(float) * m, x);
 
@@ -47,5 +47,5 @@ void Simple::dewIt(int n, int m, float* y, const float* A, const float* x, const
 		&_event);
 	_event.wait();
 
-	queue->enqueueReadBuffer(YBuffer, true, 0, sizeof(int) * n, y);
+	queue->enqueueReadBuffer(YBuffer, true, 0, sizeof(float) * n, y);
 }
diff --git a/kernels/linear.cl b/kernels/linear.cl
index c3cf73b..203786b 100644
--- a/kernels/linear.cl
+++ b/kernels/linear.cl
@@ -11,7 +11,7 @@
 // END IF
 __kernel
 void simpleMV(const int n, const int m, __global float* y, __global float* A, __global float* x, __global float* b){
-	int i = get_local_id(0);
+	int i = get_global_id(0);
 
 	if (i < n) {
 		float yi = b[i];
@@ -38,8 +38,26 @@ void simpleMV(const int n, const int m, __global float* y, __global float* A, __
 //
 __kernel
 void reduceMV(const int n, const int M, __global float* y, __global float* A, __global float* x, __global float* b, __local float* Q){
-	int i = get_group_id(0);
-	int j = get_local_id(0);
+	int i = get_group_id(0); // Matrix sora, workgroup ID
+	int j = get_local_id(0); // Oszlop a matrixban, munkacsoporton beluli ID
+
+	// MAP
+	// Q - matrix i soranak és a vektornak elemenkenti szorzata
+	Q[j] = A[i * M + j] * x[j];
+	barrier(CLK_LOCAL_MEM_FENCE);
+
+	// REDUCE
+	// Lokalis memoria Q vektort Q[0]-ba redukalja összeadva
+	for (size_t s = get_local_size(0) / 2; s > 0; s >>= 1) {
+		if (j < s) {
+			Q[j] = Q[j] + Q[j + s];
+		}
+		barrier(CLK_LOCAL_MEM_FENCE);
+	}
+
+	if (i == 0) {
+		y[j] = Q[0] + b[j];
+	}
 }
 
 // TODO: General solution for matrix-vector multiplication, every thread processes a chunk of the dot product and visits multiple rows of the result