Skip to content
Snippets Groups Projects
mygpulib.h 9.30 KiB
#pragma once

#include <vector>

#include "matrices.hpp"

extern const char * gpulib_str;



namespace mygpulib
{
    namespace mgm
    {
        struct device;

        struct queue;

        struct Ad;
        struct Ah;

        static device get_device_by_mpi(int mpi_rank);

        static void set_device(const device & d);

        static void queue_create(queue & q, device & d);
        static void queue_destroy(queue & q);

        static void queue_async_barrier(const std::vector<queue> & waitfor, const std::vector<queue> & waitin);

        static void queue_wait(queue & q);

        static void device_wait(device & d);

        static size_t get_device_memory_capacity(device & d);

        static void * memalloc_device(device & d, size_t num_bytes);

        static void memfree_device(device & d, void * ptr);
        
        static void memalloc_device_max(device & d, void * & memory, size_t & memory_size_B, size_t max_needed);

        static void * memalloc_hostpinned(device & d, size_t num_bytes);

        static void memfree_hostpinned(device & d, void * ptr);

        template<typename C>
        static void submit_host_function(queue & q, const C & c);

        template<typename T, typename I>
        static void copy_submit_h2d(queue & q, T * dst, const T * src, I num_elements);
        template<typename T, typename I>
        static void copy_submit_d2h(queue & q, T * dst, const T * src, I num_elements);

        template<typename T, typename I, typename Ao, typename Ai>
        static void copy_matrix_submit_d2h(queue & q, MatrixDense<T,I,Ao> & output, const MatrixDense<T,I,Ai> & input);
        template<typename T, typename I, typename Ao, typename Ai>
        static void copy_matrix_submit_h2d(queue & q, MatrixDense<T,I,Ao> & output, const MatrixDense<T,I,Ai> & input);
        
        template<typename T, typename I, typename Ao, typename Ai>
        static void copy_matrix_submit_d2h(queue & q, MatrixCSR<T,I,Ao> & output, const MatrixCSR<T,I,Ai> & input, bool copy_pattern = true, bool copy_vals = true);
        template<typename T, typename I, typename Ao, typename Ai>
        static void copy_matrix_submit_h2d(queue & q, MatrixCSR<T,I,Ao> & output, const MatrixCSR<T,I,Ai> & input, bool copy_pattern = true, bool copy_vals = true);

        template<typename T, typename I>
        static void memset_submit(queue & q, T * ptr, I num_bytes, char val);

        template<typename T, typename I, typename A>
        static void print_matrix(queue & q, const MatrixDense<T,I,A> & Md, const char * name = "unnamed")
        {
            static_assert(A::is_data_device_accessible, "matrix data must be device accessible");
            MatrixDense<T,I,Ah> Mh(Md.nrows, Md.ncols, Md.ld, true);
            copy_matrix_submit_d2h(q, Mh, Md);
            queue_wait(q);
            ::print_matrix(Mh, name);
        }

        template<typename T, typename I, typename A>
        static void print_matrix(queue & q, const MatrixCSR<T,I,A> & Md, const char * name = "unnamed")
        {
            static_assert(A::is_data_device_accessible, "matrix data must be device accessible");
            MatrixCSR<T,I,Ah> Mh(Md.nrows, Md.ncols, Md.nvals, true);
            copy_matrix_submit_d2h(q, Mh, Md);
            queue_wait(q);
            ::print_matrix(Mh, name);
        }

        char change_operation(char op)
        {
            if(op == 'N') return 'T';
            if(op == 'T') return 'N';
            return '_';
        }

        char change_order(char order)
        {
            if(order == 'R') return 'C';
            if(order == 'C') return 'R';
            return '_';
        }

        char change_fill(char fill)
        {
            if(fill == 'L') return 'U';
            if(fill == 'U') return 'L';
            return '_';
        }
    }





    namespace dense
    {
        struct handle;

        static void handle_create(handle & h, mgm::queue & q);
        static void handle_destroy(handle & h);

        template<typename F>
        static void buffer_collect_size(handle & h, size_t & buffersize, const F & f);

        static void buffer_set(handle & h, void * ptr, size_t size);

        static void buffer_unset(handle & h);

        template<typename T, typename I>
        static void trsv(handle & h, char mat_symmetry, char transpose, I n, I ld, T * matrix, T * rhs_sol);

        template<typename T, typename I>
        static void trsm(handle & h, char order, char side, char fill, char transpose, I nrows_X, I ncols_X, T * A, I ld_A, T * rhs_sol, I ld_X);

        template<typename T, typename I>
        static void syrk(handle & h, char out_fill, char transpose, I n, I k, T alpha, T * A, I ld_A, T beta, T * C, I ld_C);
        
        template<typename T, typename I>
        static void symv(handle & h, char fill, I n, T alpha, T * A, I ld_A, T * vec_in, T * vec_out);
    }




    namespace sparse
    {
        struct handle;

        struct descr_matrix_csr;
        struct descr_matrix_dense;
        struct descr_vector_dense;

        struct descr_sparse_trsv;
        struct descr_sparse_trsm;

        static void handle_create(handle & h, mgm::queue & q);
        static void handle_destroy(handle & h);

        template<typename T, typename I>
        static void descr_matrix_csr_create(descr_matrix_csr & descr, I nrows, I ncols, I nnz, char symmetry);
        template<typename T, typename I, typename A>
        static void descr_matrix_sparse_link_data(descr_matrix_csr & descr, MatrixCSR<T,I,A> & matrix);
        static void descr_matrix_csr_destroy(descr_matrix_csr & descr);

        template<typename T, typename I>
        static void descr_matrix_dense_create(descr_matrix_dense & descr, I nrows, I ncols, I ld, char order);
        template<typename T, typename I, typename A>
        static void descr_matrix_dense_link_data(descr_matrix_dense & descr, MatrixDense<T,I,A> & matrix);
        static void descr_matrix_dense_destroy(descr_matrix_dense & descr);

        template<typename T, typename I>
        static void descr_vector_dense_create(descr_vector_dense & descr, I size);
        template<typename T, typename I, typename A>
        static void descr_vector_dense_link_data(descr_vector_dense & descr, MatrixDense<T,I,A> & matrix, I colidx = 0);
        static void descr_vector_dense_destroy(descr_vector_dense & descr);
        
        static void descr_sparse_trsv_create(descr_sparse_trsv & descr);
        static void descr_sparse_trsv_destroy(descr_sparse_trsv & descr);

        static void descr_sparse_trsm_create(descr_sparse_trsm & descr);
        static void descr_sparse_trsm_destroy(descr_sparse_trsm & descr);

        template<typename T, typename I>
        static void sparse_to_dense(handle & h, char transpose, descr_matrix_csr & sparse, descr_matrix_dense & dense, size_t & buffersize, void * buffer, char stage);

        template<typename T, typename I>
        static void trsv(handle & h, char transpose, descr_matrix_csr & matrix, descr_vector_dense & rhs, descr_vector_dense & sol, descr_sparse_trsv & descr_trsv, size_t & buffersize, void * buffer, char stage);

        template<typename T, typename I>
        static void trsm(handle & h, char transpose_mat, char transpose_rhs, descr_matrix_csr & matrix, descr_matrix_dense & rhs, descr_matrix_dense & sol, descr_sparse_trsm & descr_trsm, size_t & buffersize, void * buffer, char stage);

        template<typename T, typename I>
        void mv(handle & h, char transpose, descr_matrix_csr & A, descr_vector_dense & x, descr_vector_dense & y, size_t & buffersize, void * buffer, char stage);

        template<typename T, typename I>
        static void mm(handle & h, char transpose_A, char transpose_B, descr_matrix_csr & A, descr_matrix_dense & B, descr_matrix_dense & C, size_t & buffersize, void * buffer, char stage);
    }





    namespace dnsolver
    {
        struct handle;

        struct params;

        struct info;
        static void handle_create(handle & h, mgm::queue & q);
        static void handle_destroy(handle & h);

        static void params_create(params & p);
        static void params_destroy(params & p);

        static void info_create(info & i);
        static void info_destroy(info & i);

        template<typename T, typename I>
        static void potrf(handle & h, params & p, char fill, char order, I n, T * matrix, I ld, size_t & buffersize_device, size_t & buffersize_host, void * buffer_device, void * buffer_host, info & i, char stage);
    }





    namespace kernels
    {
        template<typename T, typename I>
        static void DCmap_scatter(mgm::queue & q, MatrixDense<T*,I,mgm::Ad> & domain_vector_pointers, const MatrixDense<I,I,mgm::Ad> & n_dofs_interfaces, const MatrixDense<T,I,mgm::Ad> & cluster_vector, const MatrixDense<I*,I,mgm::Ad> & D2Cs);

        template<typename T, typename I>
        static void DCmap_gather(mgm::queue & q, const MatrixDense<T*,I,mgm::Ad> & domain_vector_pointers, const MatrixDense<I,I,mgm::Ad> & n_dofs_interfaces, MatrixDense<T,I,mgm::Ad> & cluster_vector, const MatrixDense<I*,I,mgm::Ad> & D2Cs);

        template<typename T, typename I>
        void submatrix(mgm::queue & q, MatrixDense<T,I,mgm::Ad> & output, const MatrixCSR<T,I,mgm::Ad> & input, I rowstart, I rowend, I colstart, I colend);

        template<typename T, typename I, typename Ao, typename Ai>
        void copy_scale(mgm::queue & q, MatrixDense<T,I,Ao> & output, const MatrixDense<T,I,Ai> & input, T scalar);
    }
}