-
Jakub Homola authoredJakub Homola authored
mygpulib.h 9.30 KiB
#pragma once
#include <vector>
#include "matrices.hpp"
extern const char * gpulib_str;
namespace mygpulib
{
namespace mgm
{
struct device;
struct queue;
struct Ad;
struct Ah;
static device get_device_by_mpi(int mpi_rank);
static void set_device(const device & d);
static void queue_create(queue & q, device & d);
static void queue_destroy(queue & q);
static void queue_async_barrier(const std::vector<queue> & waitfor, const std::vector<queue> & waitin);
static void queue_wait(queue & q);
static void device_wait(device & d);
static size_t get_device_memory_capacity(device & d);
static void * memalloc_device(device & d, size_t num_bytes);
static void memfree_device(device & d, void * ptr);
static void memalloc_device_max(device & d, void * & memory, size_t & memory_size_B, size_t max_needed);
static void * memalloc_hostpinned(device & d, size_t num_bytes);
static void memfree_hostpinned(device & d, void * ptr);
template<typename C>
static void submit_host_function(queue & q, const C & c);
template<typename T, typename I>
static void copy_submit_h2d(queue & q, T * dst, const T * src, I num_elements);
template<typename T, typename I>
static void copy_submit_d2h(queue & q, T * dst, const T * src, I num_elements);
template<typename T, typename I, typename Ao, typename Ai>
static void copy_matrix_submit_d2h(queue & q, MatrixDense<T,I,Ao> & output, const MatrixDense<T,I,Ai> & input);
template<typename T, typename I, typename Ao, typename Ai>
static void copy_matrix_submit_h2d(queue & q, MatrixDense<T,I,Ao> & output, const MatrixDense<T,I,Ai> & input);
template<typename T, typename I, typename Ao, typename Ai>
static void copy_matrix_submit_d2h(queue & q, MatrixCSR<T,I,Ao> & output, const MatrixCSR<T,I,Ai> & input, bool copy_pattern = true, bool copy_vals = true);
template<typename T, typename I, typename Ao, typename Ai>
static void copy_matrix_submit_h2d(queue & q, MatrixCSR<T,I,Ao> & output, const MatrixCSR<T,I,Ai> & input, bool copy_pattern = true, bool copy_vals = true);
template<typename T, typename I>
static void memset_submit(queue & q, T * ptr, I num_bytes, char val);
template<typename T, typename I, typename A>
static void print_matrix(queue & q, const MatrixDense<T,I,A> & Md, const char * name = "unnamed")
{
static_assert(A::is_data_device_accessible, "matrix data must be device accessible");
MatrixDense<T,I,Ah> Mh(Md.nrows, Md.ncols, Md.ld, true);
copy_matrix_submit_d2h(q, Mh, Md);
queue_wait(q);
::print_matrix(Mh, name);
}
template<typename T, typename I, typename A>
static void print_matrix(queue & q, const MatrixCSR<T,I,A> & Md, const char * name = "unnamed")
{
static_assert(A::is_data_device_accessible, "matrix data must be device accessible");
MatrixCSR<T,I,Ah> Mh(Md.nrows, Md.ncols, Md.nvals, true);
copy_matrix_submit_d2h(q, Mh, Md);
queue_wait(q);
::print_matrix(Mh, name);
}
char change_operation(char op)
{
if(op == 'N') return 'T';
if(op == 'T') return 'N';
return '_';
}
char change_order(char order)
{
if(order == 'R') return 'C';
if(order == 'C') return 'R';
return '_';
}
char change_fill(char fill)
{
if(fill == 'L') return 'U';
if(fill == 'U') return 'L';
return '_';
}
}
namespace dense
{
struct handle;
static void handle_create(handle & h, mgm::queue & q);
static void handle_destroy(handle & h);
template<typename F>
static void buffer_collect_size(handle & h, size_t & buffersize, const F & f);
static void buffer_set(handle & h, void * ptr, size_t size);
static void buffer_unset(handle & h);
template<typename T, typename I>
static void trsv(handle & h, char mat_symmetry, char transpose, I n, I ld, T * matrix, T * rhs_sol);
template<typename T, typename I>
static void trsm(handle & h, char order, char side, char fill, char transpose, I nrows_X, I ncols_X, T * A, I ld_A, T * rhs_sol, I ld_X);
template<typename T, typename I>
static void syrk(handle & h, char out_fill, char transpose, I n, I k, T alpha, T * A, I ld_A, T beta, T * C, I ld_C);
template<typename T, typename I>
static void symv(handle & h, char fill, I n, T alpha, T * A, I ld_A, T * vec_in, T * vec_out);
}
namespace sparse
{
struct handle;
struct descr_matrix_csr;
struct descr_matrix_dense;
struct descr_vector_dense;
struct descr_sparse_trsv;
struct descr_sparse_trsm;
static void handle_create(handle & h, mgm::queue & q);
static void handle_destroy(handle & h);
template<typename T, typename I>
static void descr_matrix_csr_create(descr_matrix_csr & descr, I nrows, I ncols, I nnz, char symmetry);
template<typename T, typename I, typename A>
static void descr_matrix_sparse_link_data(descr_matrix_csr & descr, MatrixCSR<T,I,A> & matrix);
static void descr_matrix_csr_destroy(descr_matrix_csr & descr);
template<typename T, typename I>
static void descr_matrix_dense_create(descr_matrix_dense & descr, I nrows, I ncols, I ld, char order);
template<typename T, typename I, typename A>
static void descr_matrix_dense_link_data(descr_matrix_dense & descr, MatrixDense<T,I,A> & matrix);
static void descr_matrix_dense_destroy(descr_matrix_dense & descr);
template<typename T, typename I>
static void descr_vector_dense_create(descr_vector_dense & descr, I size);
template<typename T, typename I, typename A>
static void descr_vector_dense_link_data(descr_vector_dense & descr, MatrixDense<T,I,A> & matrix, I colidx = 0);
static void descr_vector_dense_destroy(descr_vector_dense & descr);
static void descr_sparse_trsv_create(descr_sparse_trsv & descr);
static void descr_sparse_trsv_destroy(descr_sparse_trsv & descr);
static void descr_sparse_trsm_create(descr_sparse_trsm & descr);
static void descr_sparse_trsm_destroy(descr_sparse_trsm & descr);
template<typename T, typename I>
static void sparse_to_dense(handle & h, char transpose, descr_matrix_csr & sparse, descr_matrix_dense & dense, size_t & buffersize, void * buffer, char stage);
template<typename T, typename I>
static void trsv(handle & h, char transpose, descr_matrix_csr & matrix, descr_vector_dense & rhs, descr_vector_dense & sol, descr_sparse_trsv & descr_trsv, size_t & buffersize, void * buffer, char stage);
template<typename T, typename I>
static void trsm(handle & h, char transpose_mat, char transpose_rhs, descr_matrix_csr & matrix, descr_matrix_dense & rhs, descr_matrix_dense & sol, descr_sparse_trsm & descr_trsm, size_t & buffersize, void * buffer, char stage);
template<typename T, typename I>
void mv(handle & h, char transpose, descr_matrix_csr & A, descr_vector_dense & x, descr_vector_dense & y, size_t & buffersize, void * buffer, char stage);
template<typename T, typename I>
static void mm(handle & h, char transpose_A, char transpose_B, descr_matrix_csr & A, descr_matrix_dense & B, descr_matrix_dense & C, size_t & buffersize, void * buffer, char stage);
}
namespace dnsolver
{
struct handle;
struct params;
struct info;
static void handle_create(handle & h, mgm::queue & q);
static void handle_destroy(handle & h);
static void params_create(params & p);
static void params_destroy(params & p);
static void info_create(info & i);
static void info_destroy(info & i);
template<typename T, typename I>
static void potrf(handle & h, params & p, char fill, char order, I n, T * matrix, I ld, size_t & buffersize_device, size_t & buffersize_host, void * buffer_device, void * buffer_host, info & i, char stage);
}
namespace kernels
{
template<typename T, typename I>
static void DCmap_scatter(mgm::queue & q, MatrixDense<T*,I,mgm::Ad> & domain_vector_pointers, const MatrixDense<I,I,mgm::Ad> & n_dofs_interfaces, const MatrixDense<T,I,mgm::Ad> & cluster_vector, const MatrixDense<I*,I,mgm::Ad> & D2Cs);
template<typename T, typename I>
static void DCmap_gather(mgm::queue & q, const MatrixDense<T*,I,mgm::Ad> & domain_vector_pointers, const MatrixDense<I,I,mgm::Ad> & n_dofs_interfaces, MatrixDense<T,I,mgm::Ad> & cluster_vector, const MatrixDense<I*,I,mgm::Ad> & D2Cs);
template<typename T, typename I>
void submatrix(mgm::queue & q, MatrixDense<T,I,mgm::Ad> & output, const MatrixCSR<T,I,mgm::Ad> & input, I rowstart, I rowend, I colstart, I colend);
template<typename T, typename I, typename Ao, typename Ai>
void copy_scale(mgm::queue & q, MatrixDense<T,I,Ao> & output, const MatrixDense<T,I,Ai> & input, T scalar);
}
}