Skip to content
Snippets Groups Projects
Commit da01e2ea authored by Lubomir Riha's avatar Lubomir Riha
Browse files

wip

parent 1a889dba
No related branches found
No related tags found
No related merge requests found
......@@ -51,7 +51,7 @@ csi cs_cholsol (csi order, const cs *A, double *b)
int total_num_of_LSC_perGPU = GPUmem_sizeGB / LSCsize * 1024.0;
int blocks = total_num_of_LSC_perGPU;
blocks = 1;
blocks = 2
n_rhs = 2;
......@@ -97,9 +97,9 @@ csi cs_cholsol (csi order, const cs *A, double *b)
}
// END - Copy RHS vector to multiple columns
// #define FULL_MEM
#define FULL_MEM
#ifdef FULL_MEM
{
int GPU_mem = 0;
int num_of_arrays = blocks;
......@@ -134,6 +134,7 @@ csi cs_cholsol (csi order, const cs *A, double *b)
// allocate each device row-pointer, then copy host data to it
int i;
for(i = 0 ; i < num_of_arrays ; i++){
cudaMalloc(&h_array_Lp[i], ((N->L->n)+1) * sizeof(int));
......@@ -171,8 +172,18 @@ csi cs_cholsol (csi order, const cs *A, double *b)
cs_lsolve_gpu_trans_multi (n, d_array_Lp, d_array_Li, d_array_Lx, d_array_x, n_rhs, n_rhs, blocks);
cs_ltsolve_gpu_trans_multi (n, d_array_Lp, d_array_Li, d_array_Lx, d_array_x, n_rhs, n_rhs, blocks);
#else
double** x_gpu_array = (double**)malloc(num_of_arrays * sizeof(double*));
for(i = 0 ; i < num_of_arrays ; i++) {
x_gpu_array[i] = cs_malloc ( n_rhs * n, sizeof (double)) ;
cudaMemcpy(x_gpu_array[i], h_array_x [i], n * n_rhs * sizeof(double), cudaMemcpyDeviceToHost );
}
double *x_gpu = x_gpu_array[1];
}
#else
{
// *** Vesion with
// Copy Chol. factor and RHSs from CPU to GPU
int *d_Lp;
......@@ -197,7 +208,7 @@ csi cs_cholsol (csi order, const cs *A, double *b)
double *x_gpu;
x_gpu = cs_malloc ( n_rhs * n, sizeof (double)) ;
cudaMemcpy(x_gpu, d_x, n * n_rhs * sizeof(double), cudaMemcpyDeviceToHost );
}
#endif
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment