Skip to content
Snippets Groups Projects
Commit dc72563b authored by Lubomir Riha's avatar Lubomir Riha
Browse files

working version with transposed rhs matrix

parent 368dc87c
Branches
No related tags found
No related merge requests found
......@@ -6,9 +6,10 @@ LDLIBS += -lm --cudart shared
CS = $(LDFLAGS) ../Lib/libcsparse.a $(LDLIBS)
all: lib cs_demo1 cs_demo2 cs_demo3
- ./cs_demo2 < ../Matrix/t1
- ./cs_demo2 < ../Matrix/ash219
# - ./cs_demo2 < ../Matrix/t1
# - ./cs_demo2 < ../Matrix/ash219
- ./cs_demo2 < ../Matrix/bcsstk01
lib:
( cd ../Lib ; $(MAKE) )
......
......@@ -170,7 +170,7 @@ csi demo2 (problem *Prob)
{
if (!order && m > 1000) continue ;
printf ("Chol \n") ;
//print_order (order) ;
print_order (order) ;
rhs (x, b, m) ; /* compute right-hand side */
t = tic () ;
ok = cs_cholsol (order, C, x) ; /* solve Ax=b with Cholesky */
......
......@@ -57,7 +57,7 @@ csi cs_cholsol (csi order, const cs *A, double *b)
// csi nz ; /* # of entries in triplet matrix, -1 for compressed-col */
// } cs ;
int n_rhs = 7;
int n_rhs = 4;
double *rhs_t;
rhs_t = cs_malloc ( n_rhs * n, sizeof (double)) ;
......@@ -65,11 +65,16 @@ csi cs_cholsol (csi order, const cs *A, double *b)
int ri;
int ni;
for (ni = 0; ni < n; ni++) {
printf("rhs[%d] %f - ",ni,x[ni]);
for (ri = 0; ri < n_rhs; ri++) {
rhs_t[ni*n_rhs+ri] = x[ni];
printf(" ri %d addr: %d %f - ",ri,ni*n_rhs+ri,rhs_t[ni*n_rhs+ri]);
}
printf("\n");
}
int *d_Lp;
int *d_Li;
double *d_Lx;
......@@ -92,12 +97,13 @@ csi cs_cholsol (csi order, const cs *A, double *b)
// cudaMemcpy( d_x + rr * n , x , n * sizeof(double) , cudaMemcpyHostToDevice) ;
// }
cudaMemcpy( d_x , rhs_t , n * sizeof(double) , cudaMemcpyHostToDevice) ;
cudaMemcpy( d_x , rhs_t , n * n_rhs * sizeof(double) , cudaMemcpyHostToDevice) ;
// cs_lsolve_gpu <<<gridSize, blockSize>>> (n, d_Lp, d_Li, d_Lx, d_x, n_rhs);
// void cs_lsolve_acc (int n, const int *Lp, const int *Lissssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssss, const double *Lx, double *x, int n_rhs, int threads, int blocks)
// void cs_lsolve_acc (int n, const int *Lp, const int *Li, const double *Lx, double *x, int n_rhs, int threads, int blocks)
cs_lsolve_acc_trans (n, d_Lp, d_Li, d_Lx, d_x, n_rhs, n_rhs, 1);
//cs_lsolve_acc (n, d_Lp, d_Li, d_Lx, d_x, n_rhs, n_rhs, 1);
double *x_gpu;
x_gpu = cs_malloc ( n_rhs * n, sizeof (double)) ;
......@@ -106,17 +112,32 @@ csi cs_cholsol (csi order, const cs *A, double *b)
cs_lsolve (N->L, x) ; /* x = L\x */
// int i;
// int r;
// for (i=0; i<n; i++) {
// printf("cpu: %f\t gpu:\t", x[i]);
// for (r = 0; r < n_rhs; r++) {
// printf("%f\t", x_gpu[n*r + i] -x[i] );
// }
// printf("\n");
// }
// printf("\n");
int i;
int r;
for (i=0; i<n; i++) {
printf("cpu: %f\t gpu:\t", x[i]);
for (r = 0; r < n_rhs; r++) {
printf("%f\t", x_gpu[i*n_rhs + r] );
printf("%f\t", x_gpu[i*n_rhs + r] - x[i] );
}
printf("\n");
}
printf("\n");
exit(0);
cs_ltsolve (N->L, x) ; /* x = L'\x */
cs_pvec (S->pinv, x, b, n) ; /* b = P'*x */
}
......
......@@ -91,19 +91,44 @@ void cs_lsolve_gpu_trans (int n, const int *Lp, const int *Li, const double *Lx,
// Li = L->i ;
// Lx = L->x ;
int rhs_offset = tid;;
// csi cs_lsolve (const cs *L, double *x)
// {
// csi p, j, n, *Lp, *Li ;
// double *Lx ;
// if (!CS_CSC (L) || !x) return (0) ; /* check inputs */
// n = L->n ; Lp = L->p ; Li = L->i ; Lx = L->x ;
// for (j = 0 ; j < n ; j++)
// {
// x [j] /= Lx [Lp [j]] ;
// for (p = Lp [j]+1 ; p < Lp [j+1] ; p++)
// {
// x [Li [p]] -= Lx [p] * x [j] ;
// }
// }
// return (1) ;
// }
int rhs_offset = tid;
for (j = 0 ; j < n ; j++)
{
x [rhs_offset + j * n_rhs] = x [rhs_offset + j * n_rhs] / Lx [Lp [j]] ;
int addr = j * n_rhs + rhs_offset;
x [addr] = x [addr] / Lx [Lp [j]] ;
for (p = Lp [j]+1 ; p < Lp [j+1] ; p++)
{
x [rhs_offset + Li[p] * n_rhs] = x [rhs_offset + Li[p] * n_rhs] - Lx [p] * x [rhs_offset + j * n_rhs] ;
x [Li[p] * n_rhs + rhs_offset] = x [Li[p] * n_rhs + rhs_offset] - Lx [p] * x [addr] ;
//printf("GPU; tid=%d Li[p]=%d addr=%d x=%f\n", tid, Li [p], rhs_offset + Li [p]*n_rhs, x [rhs_offset + Li [p]*n_rhs]);
printf("GPU; tid=%d Li[p]=%d addr=%d x=%f\n", tid, Li [p], rhs_offset + Li [p]*n_rhs, x [rhs_offset + Li [p]*n_rhs]);
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment