cs_cholsol.c

#include "cs.h"
#include "cuda_runtime.h"


/* x=A\b where A is symmetric positive definite; b overwritten with solution */
csi cs_cholsol_xxx (csi order, const cs *A, double *b)
{
    double *x ;
    css *S ;
    csn *N ;
    csi n, ok ;
    if (!CS_CSC (A) || !b) return (0) ;     /* check inputs */
    n = A->n ;
    S = cs_schol (order, A) ;               /* ordering and symbolic analysis */
    N = cs_chol (A, S) ;                    /* numeric Cholesky factorization */
    x = cs_malloc (n, sizeof (double)) ;    /* get workspace */
    ok = (S && N && x) ;
    if (ok)
    {
        cs_ipvec (S->pinv, b, x, n) ;   /* x = P*b */
        cs_lsolve (N->L, x) ;           /* x = L\x */
        cs_ltsolve (N->L, x) ;          /* x = L'\x */
        cs_pvec (S->pinv, x, b, n) ;    /* b = P'*x */
    }
    cs_free (x) ;
    cs_sfree (S) ;
    cs_nfree (N) ;
    return (ok) ;
}


csi cs_cholsol (csi order, const cs *A, double *b)
{
    double *x ;
    css *S ;
    csn *N ;
    csi n, ok ;
    if (!CS_CSC (A) || !b) return (0) ;     /* check inputs */
    n = A->n ;
    S = cs_schol (order, A) ;               /* ordering and symbolic analysis */
    N = cs_chol (A, S) ;                    /* numeric Cholesky factorization */
    x = cs_malloc (n, sizeof (double)) ;    /* get workspace */
    ok = (S && N && x) ;
    if (ok)
    {
        cs_ipvec (S->pinv, b, x, n) ;   /* x = P*b */
        
        // /* --- primary CSparse routines and data structures ------------------------- */
        // typedef struct cs_sparse    /* matrix in compressed-column or triplet form */
        // {
        //     csi nzmax ;     /* maximum number of entries */
        //     csi m ;         /* number of rows */
        //     csi n ;         /* number of columns */
        //     csi *p ;        /* column pointers (size n+1) or col indices (size nzmax) */
        //     csi *i ;        /* row indices, size nzmax */
        //     double *x ;     /* numerical values, size nzmax */
        //     csi nz ;        /* # of entries in triplet matrix, -1 for compressed-col */
        // } cs ;
        
        int    *d_Lp;
        int    *d_Li;
        double *d_Lx;
        double *d_x;

        cudaMalloc(&d_Lp, ((N->L->n)+1)  * sizeof(int)    );
        cudaMalloc(&d_Li,  (N->L->nzmax) * sizeof(int)    );
        cudaMalloc(&d_Lx,  (N->L->nzmax) * sizeof(double) );
        cudaMalloc(&d_x ,  n             * sizeof(double) );

        cudaMemcpy( d_Lp, N->L->p , ((N->L->n)+1)  * sizeof(int)    , cudaMemcpyHostToDevice) ;
        cudaMemcpy( d_Li, N->L->i ,  (N->L->nzmax) * sizeof(int)    , cudaMemcpyHostToDevice) ;
        cudaMemcpy( d_Lx, N->L->x ,  (N->L->nzmax) * sizeof(double) , cudaMemcpyHostToDevice) ;
        cudaMemcpy( d_x , x       ,  n             * sizeof(double) , cudaMemcpyHostToDevice) ;


        // csi cs_lsolve_gpu (int n, const int *Lp, const int *Li, const double *Lx, double *x)
        // cs_lsolve_gpu <<<gridSize, blockSize>>> (n, d_Lp, d_Li, d_Lx, d_x);
        cs_lsolve_acc (n, d_Lp, d_Li, d_Lx, d_x);

        double *x_gpu;
        x_gpu = cs_malloc (n, sizeof (double)) ;   
        cudaMemcpy(x_gpu, d_x, n * sizeof(double), cudaMemcpyDeviceToHost );


        cs_lsolve (N->L, x) ;           /* x = L\x */

        int i; 
        for (i=0; i<n; i++) {
            printf("cpu: %f - gpu: %f\n",x[i], x_gpu[i] );
        }
        printf("\n");

        cs_ltsolve (N->L, x) ;          /* x = L'\x */
        cs_pvec (S->pinv, x, b, n) ;    /* b = P'*x */
    }
    cs_free (x) ;
    cs_sfree (S) ;
    cs_nfree (N) ;
    return (ok) ;
}