From 1a889dbab562943708f5b42ef74b80443ab0829f Mon Sep 17 00:00:00 2001 From: lriha <lubomir.riha@vsb.cz> Date: Wed, 10 Apr 2019 21:56:38 +0200 Subject: [PATCH] minor changes - wip --- CSparse/Demo/Makefile | 10 ++++---- CSparse/Demo/cs_demo.c | 2 +- CSparse/Lib/Makefile | 2 +- CSparse/Source/cs_cholsol.c | 48 +++++++++++++++++-------------------- 4 files changed, 29 insertions(+), 33 deletions(-) diff --git a/CSparse/Demo/Makefile b/CSparse/Demo/Makefile index 55010dd..d3c84d7 100644 --- a/CSparse/Demo/Makefile +++ b/CSparse/Demo/Makefile @@ -8,11 +8,11 @@ CS = $(LDFLAGS) ../Lib/libcsparse.a $(LDLIBS) all: lib cs_demo1 cs_demo2 cs_demo3 # - ./cs_demo2 < ../Matrix/t1 # - ./cs_demo2 < ../Matrix/FEM-2S - - ./cs_demo2 < ../Matrix/bcsstk26-v2 - - ./cs_demo2 < ../Matrix/bcsstk25 - - ./cs_demo2 < ../Matrix/bcsstk17 - - ./cs_demo2 < ../Matrix/ship_001 -# - ./cs_demo2 < ../Matrix/bcsstk01 +# - ./cs_demo2 < ../Matrix/bcsstk26-v2 +# - ./cs_demo2 < ../Matrix/bcsstk25 +# - ./cs_demo2 < ../Matrix/bcsstk17 +# - ./cs_demo2 < ../Matrix/ship_001 + - ./cs_demo2 < ../Matrix/bcsstk01 # - ./cs_demo2 < ../Matrix/crystm02 # - ./cs_demo2 < ../Matrix/bcsstk26-v2 diff --git a/CSparse/Demo/cs_demo.c b/CSparse/Demo/cs_demo.c index 95d225e..abcd210 100644 --- a/CSparse/Demo/cs_demo.c +++ b/CSparse/Demo/cs_demo.c @@ -83,7 +83,7 @@ problem *get_problem (FILE *f, double tol) problem *Prob ; Prob = cs_calloc (1, sizeof (problem)) ; if (!Prob) return (NULL) ; - T = cs_load2 (f) ; /* load triplet matrix T from a file */ + T = cs_load (f) ; /* load triplet matrix T from a file */ Prob->A = A = cs_compress (T) ; /* A = compressed-column form of T */ cs_spfree (T) ; /* clear T */ if (!cs_dupl (A)) return (free_problem (Prob)) ; /* sum up duplicates */ diff --git a/CSparse/Lib/Makefile b/CSparse/Lib/Makefile index aa4afc8..1a847e5 100644 --- a/CSparse/Lib/Makefile +++ b/CSparse/Lib/Makefile @@ -16,7 +16,7 @@ LIBRARY = libcsparse CF = $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -O -CC = nvcc -dc -g -G -O0 --cudart shared # -DDEBUG +CC = nvcc -dc -g -G -O0 --cudart shared -DDEBUG I = -I../Include RANLIB = ranlib ARCHIVE = $(AR) $(ARFLAGS) diff --git a/CSparse/Source/cs_cholsol.c b/CSparse/Source/cs_cholsol.c index c77f2ef..59bd4ee 100644 --- a/CSparse/Source/cs_cholsol.c +++ b/CSparse/Source/cs_cholsol.c @@ -52,7 +52,7 @@ csi cs_cholsol (csi order, const cs *A, double *b) int blocks = total_num_of_LSC_perGPU; blocks = 1; - n_rhs = 1; + n_rhs = 2; printf(" - K to LSC size ratio is : %f - cube size is %f \n", n_rhs_ratio, cube_size); @@ -97,7 +97,9 @@ csi cs_cholsol (csi order, const cs *A, double *b) } // END - Copy RHS vector to multiple columns -/* +// #define FULL_MEM +#ifdef FULL_MEM + int GPU_mem = 0; int num_of_arrays = blocks; @@ -168,9 +170,10 @@ csi cs_cholsol (csi order, const cs *A, double *b) cs_lsolve_gpu_trans_multi (n, d_array_Lp, d_array_Li, d_array_Lx, d_array_x, n_rhs, n_rhs, blocks); cs_ltsolve_gpu_trans_multi (n, d_array_Lp, d_array_Li, d_array_Lx, d_array_x, n_rhs, n_rhs, blocks); -*/ +#else + // *** Vesion with // Copy Chol. factor and RHSs from CPU to GPU int *d_Lp; int *d_Li; @@ -193,32 +196,24 @@ csi cs_cholsol (csi order, const cs *A, double *b) // Transfer data back to CPU if needed double *x_gpu; x_gpu = cs_malloc ( n_rhs * n, sizeof (double)) ; - // cudaMemcpy(x_gpu, d_x, n * n_rhs * sizeof(double), cudaMemcpyDeviceToHost ); + cudaMemcpy(x_gpu, d_x, n * n_rhs * sizeof(double), cudaMemcpyDeviceToHost ); + +#endif + // CPU code verification - lsolve for multiple RSH cs_lsolve_mrhs (N->L, rhs_t, n_rhs); /* X = L\X */ cs_ltsolve_mrhs (N->L, rhs_t, n_rhs); /* X = L\X */ - - // int i; - // int r; - // int errors = 0; - // for (i=0; i<n; i++) { - // for (r = 0; r < n_rhs; r++) { - // if ( fabs(x_gpu[i*n_rhs + r] - rhs_t[i*n_rhs + r]) > 1e-12 ) { - // printf("%f\t", x_gpu[i*n_rhs + r] - rhs_t[i*n_rhs + r] ); - // errors++; - // } - // } - // } - // printf("\n\n %d different elements between CPU and GPU. \n\n", errors); - - + // *** Debug - check with per element output #ifdef DEBUG - // int i; - // int r; - cs_lsolve (N->L, x) ; /* x = L\x */ + { + int i; + int r; + + cs_lsolve (N->L, x) ; /* x = L\x */ cs_ltsolve (N->L, x) ; /* x = L'\x */ + for (i=0; i<n; i++) { printf("cpu: %f\t gpu:\t", x[i]); for (r = 0; r < n_rhs; r++) { @@ -226,14 +221,15 @@ csi cs_cholsol (csi order, const cs *A, double *b) printf("OK\t"); else printf("Er\t"); - // printf("%f\t", x_gpu[i*n_rhs + r] - rhs_t[i*n_rhs + r] ); + // printf("%f\t", x_gpu[i*n_rhs + r] - rhs_t[i*n_rhs + r] ); printf("%f %f \t", x_gpu[i*n_rhs + r], rhs_t[i*n_rhs + r] ); } printf("\n"); } printf("\n"); + } #else - cs_lsolve (N->L, x) ; /* x = L\x */ + cs_lsolve (N->L, x) ; /* x = L\x */ cs_ltsolve (N->L, x) ; /* x = L'\x */ #endif @@ -345,8 +341,8 @@ csi cs_cholsol_single (csi order, const cs *A, double *b) // *** Debug - check with per element output #ifdef DEBUG - // int i; - // int r; + int i; + int r; cs_lsolve (N->L, x) ; /* x = L\x */ cs_ltsolve (N->L, x) ; /* x = L'\x */ for (i=0; i<n; i++) { -- GitLab