Skip to content
Snippets Groups Projects
Commit 1a889dba authored by Lubomir Riha's avatar Lubomir Riha
Browse files

minor changes - wip

parent ed93db35
No related branches found
No related tags found
No related merge requests found
...@@ -8,11 +8,11 @@ CS = $(LDFLAGS) ../Lib/libcsparse.a $(LDLIBS) ...@@ -8,11 +8,11 @@ CS = $(LDFLAGS) ../Lib/libcsparse.a $(LDLIBS)
all: lib cs_demo1 cs_demo2 cs_demo3 all: lib cs_demo1 cs_demo2 cs_demo3
# - ./cs_demo2 < ../Matrix/t1 # - ./cs_demo2 < ../Matrix/t1
# - ./cs_demo2 < ../Matrix/FEM-2S # - ./cs_demo2 < ../Matrix/FEM-2S
- ./cs_demo2 < ../Matrix/bcsstk26-v2 # - ./cs_demo2 < ../Matrix/bcsstk26-v2
- ./cs_demo2 < ../Matrix/bcsstk25 # - ./cs_demo2 < ../Matrix/bcsstk25
- ./cs_demo2 < ../Matrix/bcsstk17 # - ./cs_demo2 < ../Matrix/bcsstk17
- ./cs_demo2 < ../Matrix/ship_001 # - ./cs_demo2 < ../Matrix/ship_001
# - ./cs_demo2 < ../Matrix/bcsstk01 - ./cs_demo2 < ../Matrix/bcsstk01
# - ./cs_demo2 < ../Matrix/crystm02 # - ./cs_demo2 < ../Matrix/crystm02
# - ./cs_demo2 < ../Matrix/bcsstk26-v2 # - ./cs_demo2 < ../Matrix/bcsstk26-v2
......
...@@ -83,7 +83,7 @@ problem *get_problem (FILE *f, double tol) ...@@ -83,7 +83,7 @@ problem *get_problem (FILE *f, double tol)
problem *Prob ; problem *Prob ;
Prob = cs_calloc (1, sizeof (problem)) ; Prob = cs_calloc (1, sizeof (problem)) ;
if (!Prob) return (NULL) ; if (!Prob) return (NULL) ;
T = cs_load2 (f) ; /* load triplet matrix T from a file */ T = cs_load (f) ; /* load triplet matrix T from a file */
Prob->A = A = cs_compress (T) ; /* A = compressed-column form of T */ Prob->A = A = cs_compress (T) ; /* A = compressed-column form of T */
cs_spfree (T) ; /* clear T */ cs_spfree (T) ; /* clear T */
if (!cs_dupl (A)) return (free_problem (Prob)) ; /* sum up duplicates */ if (!cs_dupl (A)) return (free_problem (Prob)) ; /* sum up duplicates */
......
...@@ -16,7 +16,7 @@ ...@@ -16,7 +16,7 @@
LIBRARY = libcsparse LIBRARY = libcsparse
CF = $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -O CF = $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -O
CC = nvcc -dc -g -G -O0 --cudart shared # -DDEBUG CC = nvcc -dc -g -G -O0 --cudart shared -DDEBUG
I = -I../Include I = -I../Include
RANLIB = ranlib RANLIB = ranlib
ARCHIVE = $(AR) $(ARFLAGS) ARCHIVE = $(AR) $(ARFLAGS)
......
...@@ -52,7 +52,7 @@ csi cs_cholsol (csi order, const cs *A, double *b) ...@@ -52,7 +52,7 @@ csi cs_cholsol (csi order, const cs *A, double *b)
int blocks = total_num_of_LSC_perGPU; int blocks = total_num_of_LSC_perGPU;
blocks = 1; blocks = 1;
n_rhs = 1; n_rhs = 2;
printf(" - K to LSC size ratio is : %f - cube size is %f \n", n_rhs_ratio, cube_size); printf(" - K to LSC size ratio is : %f - cube size is %f \n", n_rhs_ratio, cube_size);
...@@ -97,7 +97,9 @@ csi cs_cholsol (csi order, const cs *A, double *b) ...@@ -97,7 +97,9 @@ csi cs_cholsol (csi order, const cs *A, double *b)
} }
// END - Copy RHS vector to multiple columns // END - Copy RHS vector to multiple columns
/* // #define FULL_MEM
#ifdef FULL_MEM
int GPU_mem = 0; int GPU_mem = 0;
int num_of_arrays = blocks; int num_of_arrays = blocks;
...@@ -168,9 +170,10 @@ csi cs_cholsol (csi order, const cs *A, double *b) ...@@ -168,9 +170,10 @@ csi cs_cholsol (csi order, const cs *A, double *b)
cs_lsolve_gpu_trans_multi (n, d_array_Lp, d_array_Li, d_array_Lx, d_array_x, n_rhs, n_rhs, blocks); cs_lsolve_gpu_trans_multi (n, d_array_Lp, d_array_Li, d_array_Lx, d_array_x, n_rhs, n_rhs, blocks);
cs_ltsolve_gpu_trans_multi (n, d_array_Lp, d_array_Li, d_array_Lx, d_array_x, n_rhs, n_rhs, blocks); cs_ltsolve_gpu_trans_multi (n, d_array_Lp, d_array_Li, d_array_Lx, d_array_x, n_rhs, n_rhs, blocks);
*/
#else
// *** Vesion with
// Copy Chol. factor and RHSs from CPU to GPU // Copy Chol. factor and RHSs from CPU to GPU
int *d_Lp; int *d_Lp;
int *d_Li; int *d_Li;
...@@ -193,32 +196,24 @@ csi cs_cholsol (csi order, const cs *A, double *b) ...@@ -193,32 +196,24 @@ csi cs_cholsol (csi order, const cs *A, double *b)
// Transfer data back to CPU if needed // Transfer data back to CPU if needed
double *x_gpu; double *x_gpu;
x_gpu = cs_malloc ( n_rhs * n, sizeof (double)) ; x_gpu = cs_malloc ( n_rhs * n, sizeof (double)) ;
// cudaMemcpy(x_gpu, d_x, n * n_rhs * sizeof(double), cudaMemcpyDeviceToHost ); cudaMemcpy(x_gpu, d_x, n * n_rhs * sizeof(double), cudaMemcpyDeviceToHost );
#endif
// CPU code verification - lsolve for multiple RSH // CPU code verification - lsolve for multiple RSH
cs_lsolve_mrhs (N->L, rhs_t, n_rhs); /* X = L\X */ cs_lsolve_mrhs (N->L, rhs_t, n_rhs); /* X = L\X */
cs_ltsolve_mrhs (N->L, rhs_t, n_rhs); /* X = L\X */ cs_ltsolve_mrhs (N->L, rhs_t, n_rhs); /* X = L\X */
// int i;
// int r;
// int errors = 0;
// for (i=0; i<n; i++) {
// for (r = 0; r < n_rhs; r++) {
// if ( fabs(x_gpu[i*n_rhs + r] - rhs_t[i*n_rhs + r]) > 1e-12 ) {
// printf("%f\t", x_gpu[i*n_rhs + r] - rhs_t[i*n_rhs + r] );
// errors++;
// }
// }
// }
// printf("\n\n %d different elements between CPU and GPU. \n\n", errors);
// *** Debug - check with per element output // *** Debug - check with per element output
#ifdef DEBUG #ifdef DEBUG
// int i; {
// int r; int i;
cs_lsolve (N->L, x) ; /* x = L\x */ int r;
cs_lsolve (N->L, x) ; /* x = L\x */
cs_ltsolve (N->L, x) ; /* x = L'\x */ cs_ltsolve (N->L, x) ; /* x = L'\x */
for (i=0; i<n; i++) { for (i=0; i<n; i++) {
printf("cpu: %f\t gpu:\t", x[i]); printf("cpu: %f\t gpu:\t", x[i]);
for (r = 0; r < n_rhs; r++) { for (r = 0; r < n_rhs; r++) {
...@@ -226,14 +221,15 @@ csi cs_cholsol (csi order, const cs *A, double *b) ...@@ -226,14 +221,15 @@ csi cs_cholsol (csi order, const cs *A, double *b)
printf("OK\t"); printf("OK\t");
else else
printf("Er\t"); printf("Er\t");
// printf("%f\t", x_gpu[i*n_rhs + r] - rhs_t[i*n_rhs + r] ); // printf("%f\t", x_gpu[i*n_rhs + r] - rhs_t[i*n_rhs + r] );
printf("%f %f \t", x_gpu[i*n_rhs + r], rhs_t[i*n_rhs + r] ); printf("%f %f \t", x_gpu[i*n_rhs + r], rhs_t[i*n_rhs + r] );
} }
printf("\n"); printf("\n");
} }
printf("\n"); printf("\n");
}
#else #else
cs_lsolve (N->L, x) ; /* x = L\x */ cs_lsolve (N->L, x) ; /* x = L\x */
cs_ltsolve (N->L, x) ; /* x = L'\x */ cs_ltsolve (N->L, x) ; /* x = L'\x */
#endif #endif
...@@ -345,8 +341,8 @@ csi cs_cholsol_single (csi order, const cs *A, double *b) ...@@ -345,8 +341,8 @@ csi cs_cholsol_single (csi order, const cs *A, double *b)
// *** Debug - check with per element output // *** Debug - check with per element output
#ifdef DEBUG #ifdef DEBUG
// int i; int i;
// int r; int r;
cs_lsolve (N->L, x) ; /* x = L\x */ cs_lsolve (N->L, x) ; /* x = L\x */
cs_ltsolve (N->L, x) ; /* x = L'\x */ cs_ltsolve (N->L, x) ; /* x = L'\x */
for (i=0; i<n; i++) { for (i=0; i<n; i++) {
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment