#include "cs.h"
///* solve Lx=b where x and b are dense.  x=b on input, solution on output. */
///*
//csi cs_lsolve (const cs *L, double *x)
//{
//   csi p, j, n, *Lp, *Li ;
//    double *Lx ;
//    if (!CS_CSC (L) || !x) return (0) ;                     /* check inputs */
//    n = L->n ; Lp = L->p ; Li = L->i ; Lx = L->x ;
//    for (j = 0 ; j < n ; j++)
//    {
//        x [j] /= Lx [Lp [j]] ;
//        for (p = Lp [j]+1 ; p < Lp [j+1] ; p++)
//        {
//            x [Li [p]] -= Lx [p] * x [j] ;
//        }
//    }
//    return (1) ;
//}
//*/

/* solve Lx=b where x and b are dense.  x=b on input, solution on output. */

//csi cs_lsolve_gpu (const cs *L, double *x)
// typedef struct cs_sparse    /* matrix in compressed-column or triplet form */
// {
//     csi nzmax ;     /* maximum number of entries */
//     csi m ;         /* number of rows */
//     csi n ;         /* number of columns */
//     csi *p ;        /* column pointers (size n+1) or col indices (size nzmax) */
//     csi *i ;        /* row indices, size nzmax */
//     double *x ;     /* numerical values, size nzmax */
//     csi nz ;        /* # of entries in triplet matrix, -1 for compressed-col */
// } cs ;





__global__
void cs_lsolve_gpu (int n, const int *Lp, const int *Li, const double *Lx, double *x)
{
    int p, j;
    
    // double *Lx ;
    // n = L->n ; 
    // Lp = L->p ; 
    // Li = L->i ; 
    // Lx = L->x ;
      
    for (j = 0 ; j < n ; j++)
    {
        x [j] /= Lx [Lp [j]] ;
        for (p = Lp [j]+1 ; p < Lp [j+1] ; p++)
        {
            x [Li [p]] -= Lx [p] * x [j] ;
            printf("GPU: %f", x [Li [p]]);
        }
    }
    
    //return (1);
}




extern "C" {
    void cs_lsolve_acc (int n, const int *Lp, const int *Li, const double *Lx, double *x)
    {
      cs_lsolve_gpu <<<1, 1>>> (n, Lp, Li, Lx, x);
    }
}