diff --git a/CSparse/Demo/cs_demo2.c b/CSparse/Demo/cs_demo2.c
index fb91eb5676937ba261daefcbe8d931b4c01004fe..0be84cb1c1a708e73dc4515a9b2e1d83bc61d3ad 100644
--- a/CSparse/Demo/cs_demo2.c
+++ b/CSparse/Demo/cs_demo2.c
@@ -51,10 +51,8 @@ int main (void)
     // demo2 (Prob) ;
     // free_problem (Prob) ;
     
-
     problem *Prob = get_problem (stdin, 1e-14) ;
 
-
     cs *A, *C ;
     double *b, *x, *resid,  t, tol ;
     csi k, m, n, ok, order, nb, ns, *r, *s, *rr, sprank ;
@@ -75,8 +73,7 @@ int main (void)
     {
         ns += ((r [k+1] == r [k]+1) && (s [k+1] == s [k]+1)) ;
     }
-    printf ("blocks: %g singletons: %g structural rank: %g\n",
-        (double) nb, (double) ns, (double) sprank) ;
+    printf ("blocks: %g singletons: %g structural rank: %g\n", (double) nb, (double) ns, (double) sprank) ;
     cs_dfree (D) ;
 
     // for (order = 0 ; order <= 3 ; order += 3)   /* natural and amd(A'*A) */
@@ -102,7 +99,7 @@ int main (void)
     //     printf ("time: %8.2f ", toc (t)) ;
     //     print_resid (ok, C, x, b, resid) ;      /* print residual */
     // }
-    // if (!Prob->sym) return (1) ;
+    if (!Prob->sym) return (1) ;
     for (order = 0 ; order <= 1 ; order++)      /* natural and amd(A+A') */
     {
         if (!order && m > 1000) continue ;
diff --git a/CSparse/Source/cs_cholsol.c b/CSparse/Source/cs_cholsol.c
index 69d6d6bc864060ea37d51da782aa94f188ff406f..c77f2ef01543ef235740609a81918c88043ae2e6 100644
--- a/CSparse/Source/cs_cholsol.c
+++ b/CSparse/Source/cs_cholsol.c
@@ -31,13 +31,7 @@ csi cs_cholsol_cpu (csi order, const cs *A, double *b)
 
 
 csi cs_cholsol (csi order, const cs *A, double *b)
-{
-    
-    printf("\n *** Running GPU version with - kernel 1 with transposed RHS - with coalesced memory access. \n");
-    double GPUmem_sizeGB = 32.0;
-    double n_rhs_ratio = 6.0;
-    int n_rhs; 
-    int blocks=1000;//0; // cca 30 GB
+{    
 
     double *x ;
     css *S ;
@@ -45,22 +39,29 @@ csi cs_cholsol (csi order, const cs *A, double *b)
     csi n, ok ;
     if (!CS_CSC (A) || !b) return (0) ;     /* check inputs */
     n = A->n ;
+ 
+    printf("\n--------------------------------------------------------------------------------------------------------------------\n");
+    printf("Running GPU version with - kernel 1 with transposed RHS - with coalesced memory access. \n");
+ 
+    double GPUmem_sizeGB        = 32.0; // GB
+    double cube_size            = cbrt((double)n/3.0);
+    double n_rhs_ratio          = (cube_size*cube_size*cube_size) / (cube_size*cube_size*cube_size - (cube_size-2.0)*(cube_size-2.0)*(cube_size-2.0) );  
+    int    n_rhs                = (int)((double)n / n_rhs_ratio); 
+    double LSCsize              = (double)n_rhs*n_rhs / 1024.0 / 1024.0 / 2.0 * sizeof(double);
+    int total_num_of_LSC_perGPU = GPUmem_sizeGB / LSCsize * 1024.0;
+    int    blocks               = total_num_of_LSC_perGPU;
+
+    blocks = 1; 
+    n_rhs  = 1; 
+
 
-    double cube_size = cbrt((double)n/3.0);
-    n_rhs_ratio = (cube_size*cube_size*cube_size) / (cube_size*cube_size*cube_size - (cube_size-2.0)*(cube_size-2.0)*(cube_size-2.0) );  
     printf(" - K to LSC size ratio is : %f - cube size is %f \n", n_rhs_ratio, cube_size);
-    n_rhs = (int)((double)n / n_rhs_ratio); 
-    // n_rhs = 1000;
     printf(" - LSC size         = %d x %d ( 1/2 for symmetric system) \n", n_rhs, n_rhs);
-    double LSCsize = (double)n_rhs*n_rhs / 1024.0 / 1024.0 / 2.0 * sizeof(double);
     printf(" - LSC size (symm.) = %f MB \n", LSCsize); 
     printf(" - number of RHS for this matrix is : %d \n", n_rhs );
-    int total_num_of_LSC_perGPU = GPUmem_sizeGB / LSCsize * 1024.0;
     printf(" - Total namber of LSCs to fit into %f GB RAM of GPU : %d \n", GPUmem_sizeGB, (int)total_num_of_LSC_perGPU );
-    blocks = total_num_of_LSC_perGPU;
     printf(" - Total problem size is : %d DOF \n", total_num_of_LSC_perGPU * n);
-
-
+    printf("\n");
 
     S = cs_schol (order, A) ;               /* ordering and symbolic analysis */
     N = cs_chol (A, S) ;                    /* numeric Cholesky factorization */