run_test.hpp

#pragma once


#include <vector>
#include <cmath>
#include <limits>


#include "solver.h"
#ifdef MY_SOLVER_CHOLMOD
#include "solver_cholmod.hpp"
#endif

#ifdef MY_GPULIB_ROC
#include "mygpulib_roc.hpp"
#endif
#ifdef MY_GPULIB_CUDA
#include "mygpulib_cuda.hpp"
#endif
#ifdef MY_GPULIB_CUDALEG
#include "mygpulib_cudaleg.hpp"
#endif

#include "dualop.h"
#ifdef MY_METHOD_EXPLICIT
#include "dualop_explicit.hpp"
#endif
#ifdef MY_METHOD_IMPLICIT
#include "dualop_implicit.hpp"
#endif
#ifdef MY_METHOD_EXPLICITSC
#include "dualop_explicitsc.hpp"
#endif

#ifdef MY_GPULIB_ROC
#include "myaxpby_roc.hpp"
#endif
#ifdef MY_GPULIB_CUDA
#include "myaxpby_cuda.hpp"
#endif
#ifdef MY_GPULIB_CUDALEG
#include "myaxpby_cuda.hpp"
#endif


template<typename T, typename I>
void run_test(const std::vector<MatrixCSR<T,I>> & Kregs, const std::vector<MatrixCSR<T,I>> & Bs, const std::vector<MatrixDense<T,I>> & Fs_correct, int update_n_repeats, int apply_n_repeats, bool do_axpby, const char * magicstring_dualop, const char * magicstring_solver)
{
    printf("Tool str: %s\n", gpulib_str);
    printf("Method str: %s\n", method_str);
    printf("Solver str: %s\n", solver_str);
    printf("Magic string dualop: %s\n", magicstring_dualop);
    printf("Magic string solver: %s\n", magicstring_solver);

    srand(789456);

    size_t n_domains = Kregs.size();
    I max_dofs_interface = std::max_element(Bs.begin(), Bs.end(), [](const MatrixCSR<T,I> & a, const MatrixCSR<T,I> & b) { return a.nrows < b.nrows; })->nrows;
    I n_dofs_cluster = 10 * max_dofs_interface;
    std::vector<std::vector<I>> domain_to_cluster_maps(Kregs.size());
    for(size_t d = 0; d < domain_to_cluster_maps.size(); d++)
    {
        I n_dofs_interface = Bs[d].nrows;
        domain_to_cluster_maps[d].resize(n_dofs_interface);
        for(I i = 0; i < n_dofs_interface; i++) domain_to_cluster_maps[d][i] = rand() % n_dofs_cluster;
    }

#if defined(MY_GPULIB_ROC)
    int mpi_rank = 6;
#elif defined(MY_GPULIB_CUDA) || defined(MY_GPULIB_CUDALEG)
    int mpi_rank = 2;
#endif
    std::vector<direct_sparse_solver<T,I>> Ksolvers(n_domains);
    for(size_t d = 0; d < n_domains; d++) Ksolvers[d].init(magicstring_solver);
    my_dual_operator_cluster<T,I> dualop;
    dualop.init(mpi_rank, magicstring_dualop);
    myaxpby<T> cacheclearing_axpby;
    if(do_axpby) cacheclearing_axpby.init(mpi_rank, (size_t)2 << 30);

    printf("\n");
    printf("Set started\n");
    for(size_t d = 0; d < n_domains; d++) Ksolvers[d].set_matrix(Kregs[d]);
    dualop.set(Ksolvers, Bs, domain_to_cluster_maps, n_dofs_cluster);
    printf("Set finished\n");

    printf("\n");
    printf("Update started\n");
    {
        for(int r = 0; r < update_n_repeats; r++)
        {
            printf("Update %d started\n", r);
            if(r != 0) for(size_t d = 0; d < n_domains; d++) Ksolvers[d].set_matrix(Kregs[d]);
            if(do_axpby) cacheclearing_axpby.perform();
            dualop.update(Ksolvers);
            printf("Update %d finished\n", r);
        }
    }
    printf("Update finished\n");

    if(dualop.provides_Fs())
    {
        T max_allowed_rel_error = std::sqrt(std::numeric_limits<T>::epsilon());
        printf("\n");
        printf("Checking Fs for correctness, eps = %.3e\n", max_allowed_rel_error);

        std::vector<MatrixDense<T,I>> Fs;
        dualop.get_Fs_copies(Fs);
        // {
        //     print_matrix(Fs[0], "F0");
        //     print_matrix(Fs_correct[0], "F0 correct");
        //     printf("\n");
        // }

        int n_bad = 0;
        for(size_t m = 0; m < Kregs.size(); m++)
        {
            bool is_ok = check_matrices_upper_equal(Fs_correct[m % Fs_correct.size()], Fs[m], max_allowed_rel_error, m < 16, n_bad < 16);
            if(!is_ok) n_bad++;
        }
        if(n_bad == 0) printf("All F matrices seem OK\n");
        else printf("Total %d bad F matrices\n", n_bad);
    }

    printf("\n");
    printf("Apply started\n");
    {
        srand(456123);
        MatrixDense<T,I> x_cluster(n_dofs_cluster, 1, -1, true);
        MatrixDense<T,I> y_cluster(n_dofs_cluster, 1, -1, true);
        for(I i = 0; i < x_cluster.nrows; i++) x_cluster.vals[i] = 2.0 * rand() / RAND_MAX - 1.0;
        for(I i = 0; i < y_cluster.nrows; i++) y_cluster.vals[i] = 0.0;

        for(int r = 0; r < apply_n_repeats; r++)
        {
            printf("Apply %d started\n", r);
            T norm = std::accumulate(x_cluster.vals, x_cluster.vals + n_dofs_cluster, T{0}, [](T a, T b) { return std::sqrt(a*a+b*b); });
            for(I i = 0; i < x_cluster.nrows; i++) x_cluster.vals[i] /= norm;
            for(I i = 0; i < x_cluster.nrows; i++) x_cluster.vals[i] *= 4.0 * rand() / RAND_MAX - 2.0;

            if(do_axpby) cacheclearing_axpby.perform();

            dualop.apply(y_cluster, x_cluster);

            std::swap(x_cluster, y_cluster);
            printf("Apply %d finished\n", r);
        }
        
        T checksum = 0;
        for(I i = 0; i < x_cluster.nrows; i++)
        {
            checksum += x_cluster.vals[i] * std::sin(10.0 * i + 2.0);
        }
        printf("Apply checksum: %.15e\n", checksum);
    }
    printf("Apply finished\n");

    printf("\n");
    printf("Destroy started\n");
    dualop.destroy();
    Ksolvers.clear();
    cacheclearing_axpby.destroy();
    printf("Destroy finished\n");
}