wip

da01e2ea · Lubomir Riha · 1a889dba · da01e2ea
Commit da01e2ea authored 6 years ago by Lubomir Riha
--- a/CSparse/Source/cs_cholsol.c
+++ b/CSparse/Source/cs_cholsol.c
@@ -51,7 +51,7 @@ csi cs_cholsol (csi order, const cs *A, double *b)
    int total_num_of_LSC_perGPU = GPUmem_sizeGB / LSCsize * 1024.0;
    int    blocks               = total_num_of_LSC_perGPU;
-    blocks = 1; 
+    blocks = 2
    n_rhs  = 2; 
@@ -97,9 +97,9 @@ csi cs_cholsol (csi order, const cs *A, double *b)
        }
        // END - Copy RHS vector to multiple columns 
-// #define FULL_MEM
+#define FULL_MEM
 #ifdef  FULL_MEM
+    {
        int GPU_mem = 0; 
        int num_of_arrays = blocks;
@@ -134,6 +134,7 @@ csi cs_cholsol (csi order, const cs *A, double *b)
        // allocate each device row-pointer, then copy host data to it
        int i; 
        for(i = 0 ; i < num_of_arrays ; i++){
            cudaMalloc(&h_array_Lp[i],           ((N->L->n)+1) * sizeof(int));
@@ -171,8 +172,18 @@ csi cs_cholsol (csi order, const cs *A, double *b)
        cs_lsolve_gpu_trans_multi  (n, d_array_Lp, d_array_Li, d_array_Lx, d_array_x, n_rhs, n_rhs, blocks);
        cs_ltsolve_gpu_trans_multi (n, d_array_Lp, d_array_Li, d_array_Lx, d_array_x, n_rhs, n_rhs, blocks);
-#else 
+        double** x_gpu_array  = (double**)malloc(num_of_arrays * sizeof(double*));
+        for(i = 0 ; i < num_of_arrays ; i++) {
+            x_gpu_array[i] = cs_malloc ( n_rhs * n, sizeof (double)) ;
+            cudaMemcpy(x_gpu_array[i], h_array_x [i], n * n_rhs * sizeof(double), cudaMemcpyDeviceToHost );
+        }
+        double *x_gpu = x_gpu_array[1]; 
+    }
+#else 
+    {
        // *** Vesion with 
        // Copy Chol. factor and RHSs from CPU to GPU 
        int    *d_Lp;
@@ -197,7 +208,7 @@ csi cs_cholsol (csi order, const cs *A, double *b)
        double *x_gpu;
        x_gpu = cs_malloc ( n_rhs * n, sizeof (double)) ;   
        cudaMemcpy(x_gpu, d_x, n * n_rhs * sizeof(double), cudaMemcpyDeviceToHost );
+    }
 #endif