diff --git a/CSparse/Demo/Makefile b/CSparse/Demo/Makefile index 55010dd275b56ca446f4ccfce4a97af2abb4b150..d3c84d7d1662c7cfd462d66e0b69a04cf633155d 100644 --- a/CSparse/Demo/Makefile +++ b/CSparse/Demo/Makefile @@ -8,11 +8,11 @@ CS = $(LDFLAGS) ../Lib/libcsparse.a $(LDLIBS) all: lib cs_demo1 cs_demo2 cs_demo3 # - ./cs_demo2 < ../Matrix/t1 # - ./cs_demo2 < ../Matrix/FEM-2S - - ./cs_demo2 < ../Matrix/bcsstk26-v2 - - ./cs_demo2 < ../Matrix/bcsstk25 - - ./cs_demo2 < ../Matrix/bcsstk17 - - ./cs_demo2 < ../Matrix/ship_001 -# - ./cs_demo2 < ../Matrix/bcsstk01 +# - ./cs_demo2 < ../Matrix/bcsstk26-v2 +# - ./cs_demo2 < ../Matrix/bcsstk25 +# - ./cs_demo2 < ../Matrix/bcsstk17 +# - ./cs_demo2 < ../Matrix/ship_001 + - ./cs_demo2 < ../Matrix/bcsstk01 # - ./cs_demo2 < ../Matrix/crystm02 # - ./cs_demo2 < ../Matrix/bcsstk26-v2 diff --git a/CSparse/Demo/cs_demo.c b/CSparse/Demo/cs_demo.c index 95d225e4f91b4d6374f7498f695b9135bb62c82e..abcd210e80d8a134570093aa178130bc63f0eabf 100644 --- a/CSparse/Demo/cs_demo.c +++ b/CSparse/Demo/cs_demo.c @@ -83,7 +83,7 @@ problem *get_problem (FILE *f, double tol) problem *Prob ; Prob = cs_calloc (1, sizeof (problem)) ; if (!Prob) return (NULL) ; - T = cs_load2 (f) ; /* load triplet matrix T from a file */ + T = cs_load (f) ; /* load triplet matrix T from a file */ Prob->A = A = cs_compress (T) ; /* A = compressed-column form of T */ cs_spfree (T) ; /* clear T */ if (!cs_dupl (A)) return (free_problem (Prob)) ; /* sum up duplicates */ diff --git a/CSparse/Lib/Makefile b/CSparse/Lib/Makefile index aa4afc8a2081d19360fda4a491df34cc397a71f5..1a847e583803f632581215db86c531850bdd7b3f 100644 --- a/CSparse/Lib/Makefile +++ b/CSparse/Lib/Makefile @@ -16,7 +16,7 @@ LIBRARY = libcsparse CF = $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -O -CC = nvcc -dc -g -G -O0 --cudart shared # -DDEBUG +CC = nvcc -dc -g -G -O0 --cudart shared -DDEBUG I = -I../Include RANLIB = ranlib ARCHIVE = $(AR) $(ARFLAGS) diff --git a/CSparse/Source/cs_cholsol.c b/CSparse/Source/cs_cholsol.c index c77f2ef01543ef235740609a81918c88043ae2e6..59bd4ee51c02b1bc14277357bea34e8df2ed13b3 100644 --- a/CSparse/Source/cs_cholsol.c +++ b/CSparse/Source/cs_cholsol.c @@ -52,7 +52,7 @@ csi cs_cholsol (csi order, const cs *A, double *b) int blocks = total_num_of_LSC_perGPU; blocks = 1; - n_rhs = 1; + n_rhs = 2; printf(" - K to LSC size ratio is : %f - cube size is %f \n", n_rhs_ratio, cube_size); @@ -97,7 +97,9 @@ csi cs_cholsol (csi order, const cs *A, double *b) } // END - Copy RHS vector to multiple columns -/* +// #define FULL_MEM +#ifdef FULL_MEM + int GPU_mem = 0; int num_of_arrays = blocks; @@ -168,9 +170,10 @@ csi cs_cholsol (csi order, const cs *A, double *b) cs_lsolve_gpu_trans_multi (n, d_array_Lp, d_array_Li, d_array_Lx, d_array_x, n_rhs, n_rhs, blocks); cs_ltsolve_gpu_trans_multi (n, d_array_Lp, d_array_Li, d_array_Lx, d_array_x, n_rhs, n_rhs, blocks); -*/ +#else + // *** Vesion with // Copy Chol. factor and RHSs from CPU to GPU int *d_Lp; int *d_Li; @@ -193,32 +196,24 @@ csi cs_cholsol (csi order, const cs *A, double *b) // Transfer data back to CPU if needed double *x_gpu; x_gpu = cs_malloc ( n_rhs * n, sizeof (double)) ; - // cudaMemcpy(x_gpu, d_x, n * n_rhs * sizeof(double), cudaMemcpyDeviceToHost ); + cudaMemcpy(x_gpu, d_x, n * n_rhs * sizeof(double), cudaMemcpyDeviceToHost ); + +#endif + // CPU code verification - lsolve for multiple RSH cs_lsolve_mrhs (N->L, rhs_t, n_rhs); /* X = L\X */ cs_ltsolve_mrhs (N->L, rhs_t, n_rhs); /* X = L\X */ - - // int i; - // int r; - // int errors = 0; - // for (i=0; i<n; i++) { - // for (r = 0; r < n_rhs; r++) { - // if ( fabs(x_gpu[i*n_rhs + r] - rhs_t[i*n_rhs + r]) > 1e-12 ) { - // printf("%f\t", x_gpu[i*n_rhs + r] - rhs_t[i*n_rhs + r] ); - // errors++; - // } - // } - // } - // printf("\n\n %d different elements between CPU and GPU. \n\n", errors); - - + // *** Debug - check with per element output #ifdef DEBUG - // int i; - // int r; - cs_lsolve (N->L, x) ; /* x = L\x */ + { + int i; + int r; + + cs_lsolve (N->L, x) ; /* x = L\x */ cs_ltsolve (N->L, x) ; /* x = L'\x */ + for (i=0; i<n; i++) { printf("cpu: %f\t gpu:\t", x[i]); for (r = 0; r < n_rhs; r++) { @@ -226,14 +221,15 @@ csi cs_cholsol (csi order, const cs *A, double *b) printf("OK\t"); else printf("Er\t"); - // printf("%f\t", x_gpu[i*n_rhs + r] - rhs_t[i*n_rhs + r] ); + // printf("%f\t", x_gpu[i*n_rhs + r] - rhs_t[i*n_rhs + r] ); printf("%f %f \t", x_gpu[i*n_rhs + r], rhs_t[i*n_rhs + r] ); } printf("\n"); } printf("\n"); + } #else - cs_lsolve (N->L, x) ; /* x = L\x */ + cs_lsolve (N->L, x) ; /* x = L\x */ cs_ltsolve (N->L, x) ; /* x = L'\x */ #endif @@ -345,8 +341,8 @@ csi cs_cholsol_single (csi order, const cs *A, double *b) // *** Debug - check with per element output #ifdef DEBUG - // int i; - // int r; + int i; + int r; cs_lsolve (N->L, x) ; /* x = L\x */ cs_ltsolve (N->L, x) ; /* x = L'\x */ for (i=0; i<n; i++) {