Newer
Older
/* x=A\b where A is symmetric positive definite; b overwritten with solution */
{
double *x ;
css *S ;
csn *N ;
csi n, ok ;
if (!CS_CSC (A) || !b) return (0) ; /* check inputs */
n = A->n ;
S = cs_schol (order, A) ; /* ordering and symbolic analysis */
N = cs_chol (A, S) ; /* numeric Cholesky factorization */
x = cs_malloc (n, sizeof (double)) ; /* get workspace */
ok = (S && N && x) ;
if (ok)
{
cs_ipvec (S->pinv, b, x, n) ; /* x = P*b */
cs_lsolve (N->L, x) ; /* x = L\x */
cs_ltsolve (N->L, x) ; /* x = L'\x */
cs_pvec (S->pinv, x, b, n) ; /* b = P'*x */
}
cs_free (x) ;
cs_sfree (S) ;
cs_nfree (N) ;
return (ok) ;
}
{
double *x ;
css *S ;
csn *N ;
csi n, ok ;
if (!CS_CSC (A) || !b) return (0) ; /* check inputs */
n = A->n ;
S = cs_schol (order, A) ; /* ordering and symbolic analysis */
N = cs_chol (A, S) ; /* numeric Cholesky factorization */
x = cs_malloc (n, sizeof (double)) ; /* get workspace */
ok = (S && N && x) ;
if (ok)
{
cs_ipvec (S->pinv, b, x, n) ; /* x = P*b */
// /* --- primary CSparse routines and data structures ------------------------- */
// typedef struct cs_sparse /* matrix in compressed-column or triplet form */
// {
// csi nzmax ; /* maximum number of entries */
// csi m ; /* number of rows */
// csi n ; /* number of columns */
// csi *p ; /* column pointers (size n+1) or col indices (size nzmax) */
// csi *i ; /* row indices, size nzmax */
// double *x ; /* numerical values, size nzmax */
// csi nz ; /* # of entries in triplet matrix, -1 for compressed-col */
// } cs ;
double *rhs_t;
rhs_t = cs_malloc ( n_rhs * n, sizeof (double)) ;
int ri;
int ni;
for (ni = 0; ni < n; ni++) {
for (ri = 0; ri < n_rhs; ri++) {
rhs_t[ni*n_rhs+ri] = x[ni];
}
}
int *d_Lp;
int *d_Li;
double *d_Lx;
double *d_x;
//cudaSetDevice(1);
cudaMalloc( &d_Lp, ((N->L->n)+1) * sizeof(int) );
cudaMalloc( &d_Li, (N->L->nzmax) * sizeof(int) );
cudaMalloc( &d_Lx, (N->L->nzmax) * sizeof(double) );
cudaMalloc( &d_x , n * n_rhs * sizeof(double) );
cudaMemcpy( d_Lp, N->L->p , ((N->L->n)+1) * sizeof(int) , cudaMemcpyHostToDevice) ;
cudaMemcpy( d_Li, N->L->i , (N->L->nzmax) * sizeof(int) , cudaMemcpyHostToDevice) ;
cudaMemcpy( d_Lx, N->L->x , (N->L->nzmax) * sizeof(double) , cudaMemcpyHostToDevice) ;
// int rr;
// for (rr = 0; rr < n_rhs; rr++) {
// cudaMemcpy( d_x + rr * n , x , n * sizeof(double) , cudaMemcpyHostToDevice) ;
// }
cudaMemcpy( d_x , rhs_t , n * sizeof(double) , cudaMemcpyHostToDevice) ;
// cs_lsolve_gpu <<<gridSize, blockSize>>> (n, d_Lp, d_Li, d_Lx, d_x, n_rhs);
// void cs_lsolve_acc (int n, const int *Lp, const int *Lissssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssss, const double *Lx, double *x, int n_rhs, int threads, int blocks)
x_gpu = cs_malloc ( n_rhs * n, sizeof (double)) ;
cudaMemcpy(x_gpu, d_x, n * n_rhs * sizeof(double), cudaMemcpyDeviceToHost );
cs_lsolve (N->L, x) ; /* x = L\x */
int i;
printf("cpu: %f\t gpu:\t", x[i]);
for (r = 0; r < n_rhs; r++) {
printf("%f\t", x_gpu[i*n_rhs + r] );
}
printf("\n");
cs_ltsolve (N->L, x) ; /* x = L'\x */
cs_pvec (S->pinv, x, b, n) ; /* b = P'*x */
}
cs_free (x) ;
cs_sfree (S) ;
cs_nfree (N) ;
return (ok) ;