Newer
Older
#include <vector>
#include <cmath>
#include <limits>
#ifdef MY_GPULIB_ROC
#include "mygpulib_roc.hpp"
#ifdef MY_GPULIB_CUDA
#include "mygpulib_cuda.hpp"
#endif
#ifdef MY_GPULIB_CUDALEG
#include "mygpulib_cudaleg.hpp"
#endif
#ifdef MY_METHOD_EXPLICIT
#include "dualop_explicit.hpp"
#ifdef MY_METHOD_IMPLICIT
#include "dualop_implicit.hpp"
#endif
#ifdef MY_METHOD_EXPLICITSC
#include "dualop_explicitsc.hpp"
#endif
#ifdef MY_GPULIB_ROC
#include "myaxpby_roc.hpp"
#endif
#ifdef MY_GPULIB_CUDA
#include "myaxpby_cuda.hpp"
#endif
#ifdef MY_GPULIB_CUDALEG
#include "myaxpby_cuda.hpp"
#endif
template<typename T, typename I>
void run_test(const std::vector<MatrixCSR<T,I>> & Kregs, const std::vector<MatrixCSR<T,I>> & Bs, const std::vector<MatrixDense<T,I>> & Fs_correct, int update_n_repeats, int apply_n_repeats, bool do_axpby, const char * magicstring_dualop, const char * magicstring_solver)
printf("Tool str: %s\n", gpulib_str);
printf("Method str: %s\n", method_str);
printf("Solver str: %s\n", solver_str);
printf("Magic string dualop: %s\n", magicstring_dualop);
printf("Magic string solver: %s\n", magicstring_solver);
I max_dofs_interface = std::max_element(Bs.begin(), Bs.end(), [](const MatrixCSR<T,I> & a, const MatrixCSR<T,I> & b) { return a.nrows < b.nrows; })->nrows;
I n_dofs_cluster = 10 * max_dofs_interface;
std::vector<std::vector<I>> domain_to_cluster_maps(Kregs.size());
for(size_t d = 0; d < domain_to_cluster_maps.size(); d++)
{
I n_dofs_interface = Bs[d].nrows;
domain_to_cluster_maps[d].resize(n_dofs_interface);
for(I i = 0; i < n_dofs_interface; i++) domain_to_cluster_maps[d][i] = rand() % n_dofs_cluster;
std::vector<direct_sparse_solver<T,I>> Ksolvers(n_domains);
for(size_t d = 0; d < n_domains; d++) Ksolvers[d].init(magicstring_solver);
my_dual_operator_cluster<T,I> dualop;
dualop.init(mpi_rank, magicstring_dualop);
myaxpby<T> cacheclearing_axpby;
if(do_axpby) cacheclearing_axpby.init(mpi_rank, (size_t)2 << 30);
for(size_t d = 0; d < n_domains; d++) Ksolvers[d].set_matrix(Kregs[d]);
dualop.set(Ksolvers, Bs, domain_to_cluster_maps, n_dofs_cluster);
{
for(int r = 0; r < update_n_repeats; r++)
{
printf("Update %d started\n", r);
if(r != 0) for(size_t d = 0; d < n_domains; d++) Ksolvers[d].set_matrix(Kregs[d]);
T max_allowed_rel_error = std::sqrt(std::numeric_limits<T>::epsilon());
printf("Checking Fs for correctness, eps = %.3e\n", max_allowed_rel_error);
std::vector<MatrixDense<T,I>> Fs;
dualop.get_Fs_copies(Fs);
// {
// print_matrix(Fs[0], "F0");
// print_matrix(Fs_correct[0], "F0 correct");
// printf("\n");
// }
int n_bad = 0;
for(size_t m = 0; m < Kregs.size(); m++)
{
bool is_ok = check_matrices_upper_equal(Fs_correct[m % Fs_correct.size()], Fs[m], max_allowed_rel_error, m < 16, n_bad < 16);
if(!is_ok) n_bad++;
}
if(n_bad == 0) printf("All F matrices seem OK\n");
else printf("Total %d bad F matrices\n", n_bad);
}
MatrixDense<T,I> x_cluster(n_dofs_cluster, 1, -1, true);
MatrixDense<T,I> y_cluster(n_dofs_cluster, 1, -1, true);
for(I i = 0; i < x_cluster.nrows; i++) x_cluster.vals[i] = 2.0 * rand() / RAND_MAX - 1.0;
for(I i = 0; i < y_cluster.nrows; i++) y_cluster.vals[i] = 0.0;
for(int r = 0; r < apply_n_repeats; r++)
{
T norm = std::accumulate(x_cluster.vals, x_cluster.vals + n_dofs_cluster, T{0}, [](T a, T b) { return std::sqrt(a*a+b*b); });
for(I i = 0; i < x_cluster.nrows; i++) x_cluster.vals[i] /= norm;
for(I i = 0; i < x_cluster.nrows; i++) x_cluster.vals[i] *= 4.0 * rand() / RAND_MAX - 2.0;
}
T checksum = 0;
for(I i = 0; i < x_cluster.nrows; i++)
{
checksum += x_cluster.vals[i] * std::sin(10.0 * i + 2.0);
}
printf("Apply checksum: %.15e\n", checksum);
}
printf("Apply finished\n");
printf("Destroy started\n");
dualop.destroy();
Ksolvers.clear();
cacheclearing_axpby.destroy();