Commit d184036e authored by Lubomir Riha's avatar Lubomir Riha
Browse files

ENH: 3D Thomas kernel works

parent 31bcdda3
......@@ -788,7 +788,9 @@ __global__ void thomas_kernel3D_X2(int const m, FT const alpha, FT const alpha_2
template<class FT>
__global__ void thomas_kernel3D_XT(int const m, FT const alpha, FT const alpha_23, FT const * const __restrict__ dev_c_prime, FT const * const __restrict__ b, FT * const __restrict__ x) {
#define TILE_SIZE 2
//#define TILE_SIZE 2
//TODO: Should be #define
int TILE_SIZE = blockDim.x;
__shared__ FT sh_b[TILE_SIZE][TILE_SIZE+1];
__shared__ FT sh_x[TILE_SIZE][TILE_SIZE+1];
......@@ -797,7 +799,7 @@ __global__ void thomas_kernel3D_XT(int const m, FT const alpha, FT const alpha_2
int tid_l = threadIdx.x;
int bid = blockIdx.x;
//int tid = blockIdx.x * blockDim.x + threadIdx.x;
//int tid = blockIdx.x * + threadIdx.x;
// Basis of an adress used to read dat from global memory to tiles in shared memory
// - this is rused multiple times
......@@ -1199,7 +1201,7 @@ public:
}
std::cout << std::endl;
thomas_kernel3D_XT<FT><<<8, 2>>>(m, alpha, alpha_23, c_prime, bbb, xx); //bb);
thomas_kernel3D_XT<FT><<<4, 4>>>(m, alpha, alpha_23, c_prime, bbb, xx); //bb);
cublas_transpose2(cublas_handle, m*m, m, xx, bbb);
cublas_transpose2(cublas_handle, m*m, m, bbb, xx);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment