net_test_pde_1.cpp

/**
 * Example solving the time dependent water flow 1D diffusion PDE:
 *
 * y_xx - y_t = 0, for (x, t) in [0, 1] x [0, 1]
 * y(0, t) = sin(t)
 * y(x, 0) = e^(-sqrt(0.5)x) * sin(-sqrt(0.5)x)
 *
 * -------------------------------------------
 * Analytical solution:
 * NN representation: sum over [a_i * (1 + e^(bi - x * w_ix - t * w_it))^(-1)]
 * -------------------------------------------
 * Optimal NN setting with biases (4 inner neurons)
 * Path   1. wx =      0.51954589, wt =     -0.48780445, b =      0.35656955, a =      1.69279158
 * Path   2. wx =     -1.24173503, wt =      1.13351300, b =      0.32528567, a =      1.69148458
 * Path   3. wx =      0.64754127, wt =      0.95758760, b =     -0.95852707, a =      2.77877453
 * Path   4. wx =      1.65439557, wt =     -0.31784248, b =     -1.81237586, a =     -3.96157108
 * @author Michal Kravčenko
 * @date 9.8.18
 */

#include <random>
#include <iostream>
#include <fstream>

#include "../../include/4neuro.h"
#include "../Solvers/DESolver.h"

//y(x, t) = ... ai * f(wxi * x + wti * t - bi)
double eval_approx_y(double x, double t, size_t n_inner_neurons, std::vector<double> &parameters){
    double value= 0.0, wxi, wti, ai, bi, ei, ei1;
    for(size_t i = 0; i < n_inner_neurons; ++i){

        wxi = parameters[4 * i + 0];
        wti = parameters[4 * i + 1];
        ai  = parameters[4 * i + 2];
        bi  = parameters[4 * i + 3];

        ei = std::pow(E, bi - wxi * x - wti * t);
        ei1 = ei + 1.0;

        value += ai / (ei1);
    }
    return value;
}
//yt(x, t) = ... (ai * wti * e^(bi - wti * t - wxi * x))/(e^(bi - wti * t - wxi * x) + 1)^2
double eval_approx_yt(double x, double t, size_t n_inner_neurons, std::vector<double> &parameters){
    double value= 0.0, wxi, wti, ai, bi, ei, ei1;

    for(size_t i = 0; i < n_inner_neurons; ++i){

        wxi = parameters[4 * i + 0];
        wti = parameters[4 * i + 1];
        ai  = parameters[4 * i + 2];
        bi  = parameters[4 * i + 3];

        ei = std::pow(E, bi - wxi * x - wti * t);
        ei1 = ei + 1.0;

        value += ai * wti * ei / (ei1 * ei1);
    }
    return value;
}
//yx(x, t) = ...  (ai * wxi * e^(bi - t * wti - wxi * x))/(e^(bi - t * wti - wxi * x) + 1)^2
double eval_approx_yx(double x, double t, size_t n_inner_neurons, std::vector<double> &parameters){
    double value= 0.0, wxi, wti, ai, bi, ei, ei1;

    for(size_t i = 0; i < n_inner_neurons; ++i){

        wxi = parameters[4 * i + 0];
        wti = parameters[4 * i + 1];
        ai  = parameters[4 * i + 2];
        bi  = parameters[4 * i + 3];

        ei = std::pow(E, bi - wxi * x - wti * t);
        ei1 = ei + 1.0;

        value += (ai * wxi * ei1)/(ei1 * ei1);
    }
    return value;
}
//yxx(x, t) = ...  (ai * wxi * e^(bi - t * wti - wxi * x))/(e^(bi - t * wti - wxi * x) + 1)^2
double eval_approx_yxx(double x, double t, size_t n_inner_neurons, std::vector<double> &parameters){
    double value= 0.0, wxi, wti, ai, bi, ei, ei1;
    for(size_t i = 0; i < n_inner_neurons; ++i){

        wxi = parameters[4 * i + 0];
        wti = parameters[4 * i + 1];
        ai  = parameters[4 * i + 2];
        bi  = parameters[4 * i + 3];

        ei = std::pow(E, bi - wxi * x - wti * t);
        ei1 = ei + 1.0;

        value += (2 * ai * wxi * wxi * ei * ei) / (ei1 * ei1 * ei1) - (ai * wxi * wxi * ei) / (ei1 * ei1);
    }
    return value;
}

double eval_approx_da_y(double x, double t, size_t neuron_idx, std::vector<double> &parameters){
    double wxi, wti, bi, ei, ei1;

    wxi = parameters[4 * neuron_idx + 0];
    wti = parameters[4 * neuron_idx + 1];
    bi =  parameters[4 * neuron_idx + 3];

    ei = std::pow(E, bi - wxi * x - wti * t);
    ei1 = ei + 1.0;

    return 1.0 / ei1;
}

double eval_approx_dwx_y(double x, double t, size_t neuron_idx, std::vector<double> &parameters){
    double wxi, wti, ai, bi, ei, ei1;
    wxi = parameters[4 * neuron_idx + 0];
    wti = parameters[4 * neuron_idx + 1];
    ai =  parameters[4 * neuron_idx + 2];
    bi =  parameters[4 * neuron_idx + 3];

    ei = std::pow(E, bi - wxi * x - wti * t);
    ei1 = ei + 1.0;

    return  (ai * x * ei)/(ei1 * ei1);
}

double eval_approx_dwt_y(double x, double t, size_t neuron_idx, std::vector<double> &parameters){
    double wxi, wti, ai, bi, ei, ei1;
    wxi = parameters[4 * neuron_idx + 0];
    wti = parameters[4 * neuron_idx + 1];
    ai =  parameters[4 * neuron_idx + 2];
    bi =  parameters[4 * neuron_idx + 3];

    ei = std::pow(E, bi - wxi * x - wti * t);
    ei1 = ei + 1.0;

    return  (ai * t * ei)/(ei1 * ei1);
}

double eval_approx_db_y(double x, double t, size_t neuron_idx, std::vector<double> &parameters){
    double wxi, wti, bi, ei, ai, ei1;
    wxi = parameters[4 * neuron_idx + 0];
    wti = parameters[4 * neuron_idx + 1];
    ai =  parameters[4 * neuron_idx + 2];
    bi =  parameters[4 * neuron_idx + 3];

    ei = std::pow(E, bi - wxi * x - wti * t);
    ei1 = ei + 1.0;

    return -(ai * ei)/(ei1 * ei1);
}

double eval_approx_da_yt(double x, double t, size_t neuron_idx, std::vector<double> &parameters){
    double wxi, wti, bi, ei, ei1;

    wxi = parameters[4 * neuron_idx + 0];
    wti = parameters[4 * neuron_idx + 1];
    bi =  parameters[4 * neuron_idx + 3];

    ei = std::pow(E, bi - wxi * x - wti * t);
    ei1 = ei + 1.0;

    return (wti * ei)/(ei1 * ei1);
}

double eval_approx_dwx_yt(double x, double t, size_t neuron_idx, std::vector<double> &parameters){
    double wxi, wti, ai, bi, ei, ei1;
    wxi = parameters[4 * neuron_idx + 0];
    wti = parameters[4 * neuron_idx + 1];
    ai =  parameters[4 * neuron_idx + 2];
    bi =  parameters[4 * neuron_idx + 3];

    ei = std::pow(E, bi - wxi * x - wti * t);
    ei1 = ei + 1.0;

    return (2 * ai * wti * x * ei * ei)/(ei1 * ei1 * ei1) - (ai * wti * x * ei)/(ei1 * ei1);
}

double eval_approx_dwt_yt(double x, double t, size_t neuron_idx, std::vector<double> &parameters){
    double wxi, wti, ai, bi, ei, ei1;
    wxi = parameters[4 * neuron_idx + 0];
    wti = parameters[4 * neuron_idx + 1];
    ai =  parameters[4 * neuron_idx + 2];
    bi =  parameters[4 * neuron_idx + 3];

    ei = std::pow(E, bi - wxi * x - wti * t);
    ei1 = ei + 1.0;

    return  -(ai * t * wti * ei) / (ei1 * ei1) + (2 * ai * t * wti * ei * ei)/(ei1 * ei1 * ei1) + (ai * ei)/(ei1 * ei1);
}

double eval_approx_db_yt(double x, double t, size_t neuron_idx, std::vector<double> &parameters){
    double wxi, wti, ai, bi, ei, ei1;
    wxi = parameters[4 * neuron_idx + 0];
    wti = parameters[4 * neuron_idx + 1];
    ai =  parameters[4 * neuron_idx + 2];
    bi =  parameters[4 * neuron_idx + 3];

    ei = std::pow(E, bi - wxi * x - wti * t);
    ei1 = ei + 1.0;

    return (ai * wti * ei) / (ei1 * ei1) - (2 * ai * wti * ei * ei) / (ei1 * ei1 * ei1);
}

double eval_approx_da_yxx(double x, double t, size_t neuron_idx, std::vector<double> &parameters){
    double wxi, wti, ai, bi, ei, ei1, ebp, eb, etx;
    wxi = parameters[4 * neuron_idx + 0];
    wti = parameters[4 * neuron_idx + 1];
    ai =  parameters[4 * neuron_idx + 2];
    bi =  parameters[4 * neuron_idx + 3];

    ei = std::pow(E, bi - wxi * x - wti * t);
    ebp= std::pow(E, bi + wxi * x + wti * t);
    eb = std::pow(E, bi);
    etx = std::pow(E, wxi * x + wti * t);
    ei1 = eb + etx;

    return -(wxi * wxi * ebp * (etx - eb))/(ei1 * ei1 * ei1);
}

double eval_approx_dwx_yxx(double x, double t, size_t neuron_idx, std::vector<double> &parameters){
    double wxi, wti, ai, bi, ei, ei1, ebp, eb, etx;
    wxi = parameters[4 * neuron_idx + 0];
    wti = parameters[4 * neuron_idx + 1];
    ai =  parameters[4 * neuron_idx + 2];
    bi =  parameters[4 * neuron_idx + 3];

    ei = std::pow(E, bi - wxi * x - wti * t);
    ebp= std::pow(E, bi + wxi * x + wti * t);
    eb = std::pow(E, bi);
    etx = std::pow(E, wxi * x + wti * t);
    ei1 = eb + etx;

    return (ai * wxi * wxi * x * ei) / ((ei + 1) * (ei + 1)) - (6 * ai * wxi * wxi * x * ei * ei) / ((ei + 1) * (ei + 1) * (ei + 1)) + (6 * ai * wxi *wxi * x * ei * ei * ei) / ((ei + 1) * (ei + 1) * (ei + 1) * (ei + 1)) - (2 * ai * wxi * ei) / ((ei + 1) * (ei + 1)) + (4 * ai * wxi * ei * ei)/((ei + 1) * (ei + 1) * (ei + 1));
}

double eval_approx_dwt_yxx(double x, double t, size_t neuron_idx, std::vector<double> &parameters){
    double wxi, wti, ai, bi, ei, ei1, ebp, eb, etx;
    wxi = parameters[4 * neuron_idx + 0];
    wti = parameters[4 * neuron_idx + 1];
    ai =  parameters[4 * neuron_idx + 2];
    bi =  parameters[4 * neuron_idx + 3];

    ei = std::pow(E, bi - wxi * x - wti * t);
    ebp= std::pow(E, bi + wxi * x + wti * t);
    eb = std::pow(E, bi);
    etx = std::pow(E, wxi * x + wti * t);
    ei1 = eb + etx;

    return (ai * t * wxi * wxi * ei) / ((ei + 1) * (ei + 1)) - (6 * ai * t * wxi * wxi * ei * ei) / ((ei + 1) * (ei + 1) * (ei + 1)) + (6 * ai * t * wxi * wxi * ei * ei * ei) / ((ei + 1) * (ei + 1) * (ei + 1) * (ei + 1));
}

double eval_approx_db_yxx(double x, double t, size_t neuron_idx, std::vector<double> &parameters){
    double wxi, wti, ai, bi, ei, ei1, ebp, eb, etx;
    wxi = parameters[4 * neuron_idx + 0];
    wti = parameters[4 * neuron_idx + 1];
    ai =  parameters[4 * neuron_idx + 2];
    bi =  parameters[4 * neuron_idx + 3];

    ei = std::pow(E, bi - wxi * x - wti * t);
    ebp= std::pow(E, bi + wxi * x + wti * t);
    eb = std::pow(E, bi);
    etx = std::pow(E, wxi * x + wti * t);
    ei1 = eb + etx;

    return (ai * wxi * wxi * eb * ebp) / (ei1 * ei1 * ei1) - (ai * wxi * wxi * ebp * (etx - eb)) / (ei1 * ei1 * ei1) + (3 * ai * wxi * wxi * eb * ebp * (etx - eb)) / (ei1 * ei1 * ei1 * ei1);
}

void eval_step_size_simple(double &gamma, double val, double prev_val, double sk, double grad_norm, double grad_norm_prev){

    if(val > prev_val){
        gamma *= 0.99999;
    }

    if(sk <= 1e-3 || grad_norm < grad_norm_prev){
        /* movement on a line */
        /* new slope is less steep, speed up */
        gamma *= 1.0005;
    }
    else if(grad_norm > grad_norm_prev){
        /* new slope is more steep, slow down*/
        gamma /= 1.0005;
    }
    else{
        gamma /= 1.005;
    }
//        gamma *= 0.999999;
}

void eval_step_size_mk( double &gamma, double beta, double &c, double grad_norm_prev, double grad_norm, double fi, double fim ){

    if( fi > fim )
    {
        c /= 1.0000005;
    }
    else if( fi < fim )
    {
        c *= 1.0000005;
    }

    gamma *= std::pow( c, 1.0 - 2.0 * beta) * std::pow( grad_norm_prev / grad_norm, 1.0 / c );


}

double calculate_gradient( std::vector<double> &data_points, size_t n_inner_neurons, std::vector<double> *parameters, std::vector<double> *gradient ){
    size_t i, j, k;
    double x, t, mem, derror, total_error, approx;

    size_t train_size = data_points.size();

    /* error of boundary condition: y(0, t) = sin(t) => e1 = 1/n * (sin(t) - y(0, t))^2 */
    for(i = 0; i < train_size; ++i){

        t = data_points[i];
        mem = (std::sin(t) - eval_approx_y(0.0, t, n_inner_neurons, *parameters));
        derror = 2.0 * mem / train_size;

        for(j = 0; j < n_inner_neurons; ++j){
            (*gradient)[4 * j + 0] -= derror * eval_approx_dwx_y(0, t, j, *parameters);
            (*gradient)[4 * j + 1] -= derror * eval_approx_dwt_y(0, t, j, *parameters);
            (*gradient)[4 * j + 2] -= derror *  eval_approx_da_y(0, t, j, *parameters);
            (*gradient)[4 * j + 3] -= derror *  eval_approx_db_y(0, t, j, *parameters);
        }

        total_error += mem * mem / train_size;
    }


    /* error boundary condition: y(x, 0) = e^(-(0.5)^(0.5)x) * sin(-(0.5)^(0.5)x) => e2 = 1/n * (e^(-(0.5)^(0.5)x) * sin(-(0.5)^(0.5)x) - y(x, 0))^2 */
    for(i = 0; i < train_size; ++i){

        x = data_points[i];
        mem = (std::pow(E, -0.707106781 * x) * std::sin( -0.707106781 * x ) - eval_approx_y(x, 0.0, n_inner_neurons, *parameters));
        derror = 2.0 * mem / train_size;

        for(j = 0; j < n_inner_neurons; ++j){
            (*gradient)[4 * j + 0] -= derror * eval_approx_dwx_y(x, 0, j, *parameters);
            (*gradient)[4 * j + 1] -= derror * eval_approx_dwt_y(x, 0, j, *parameters);
            (*gradient)[4 * j + 2] -= derror *  eval_approx_da_y(x, 0, j, *parameters);
            (*gradient)[4 * j + 3] -= derror *  eval_approx_db_y(x, 0, j, *parameters);
        }

        total_error += mem * mem / train_size;
    }

    /* error of the governing equation: y_xx - y_t = 0 => e3 = 1/n^2 * (0 - y_xx + y_t)^2 */
    for(i = 0; i < data_points.size(); ++i){
        x = data_points[i];
        for(j = 0; j < data_points.size(); ++j){
            t = data_points[j];

            approx = eval_approx_yxx(x, t, n_inner_neurons, *parameters) - eval_approx_yt(x, t, n_inner_neurons, *parameters);
            mem = 0.0 - approx;
            derror = 2.0 * mem / (train_size * train_size);
            for(k = 0; k < n_inner_neurons; ++k){
                (*gradient)[4 * k + 0] -= derror * (eval_approx_dwx_yxx(x, t, k, *parameters) - eval_approx_dwx_yt(x, t, k, *parameters));
                (*gradient)[4 * k + 1] -= derror * (eval_approx_dwt_yxx(x, t, k, *parameters) - eval_approx_dwt_yt(x, t, k, *parameters));
                (*gradient)[4 * k + 2] -= derror * ( eval_approx_da_yxx(x, t, k, *parameters) -  eval_approx_da_yt(x, t, k, *parameters));
                (*gradient)[4 * k + 3] -= derror * ( eval_approx_db_yxx(x, t, k, *parameters) -  eval_approx_db_yt(x, t, k, *parameters));
            }
            total_error += mem * mem / (train_size * train_size);
        }
    }
    return total_error;
}

void solve_example_gradient(std::vector<double> &guess, double accuracy, size_t n_inner_neurons, size_t train_size, double ds, double de, size_t n_test_points, double ts, double te){
    std::cout << "Finding a solution via a Gradient Descent method with adaptive step-length" << std::endl;
    std::cout << "********************************************************************************************************************************************" <<std::endl;
    /* SETUP OF THE TRAINING DATA */
    std::vector<double> inp, out;

    double frac, alpha, x;
    double grad_norm = accuracy * 10.0, mem, ai, bi, wxi, wti, error, derror, approx, t, gamma, total_error, sk, sy, sx, sg, beta;
    double grad_norm_prev = grad_norm;
    size_t i, j, k, iter_idx = 0;

    /* TRAIN DATA FOR THE GOVERNING DE */
    std::vector<double> data_points(train_size);

    /* ISOTROPIC TRAIN SET */
    frac = (de - ds) / (train_size - 1);
    for(i = 0; i < train_size; ++i){
        data_points[i] = frac * i + ds;
//        std::cout << data_points[i] << std::endl;
    }

//    /* CHEBYSCHEV TRAIN SET */
//    alpha = PI / (train_size );
//    frac = 0.5 * (de - ds);
//    for(i = 0; i < train_size; ++i){
//        x = (std::cos(PI - alpha * i) + 1.0) * frac + ds;
//        data_points[i] = x;
//    }

    std::vector<double> *gradient_current = new std::vector<double>(4 * n_inner_neurons);
    std::vector<double> *gradient_prev = new std::vector<double>(4 * n_inner_neurons);
    std::vector<double> *params_current = new std::vector<double>(guess);
    std::vector<double> *params_prev = new std::vector<double>(guess);

    std::vector<double> *ptr_mem;

    std::fill(gradient_current->begin(), gradient_current->end(), 0.0);
    std::fill(gradient_prev->begin(), gradient_prev->end(), 0.0);

//    for (i = 0; i < n_inner_neurons; ++i) {
//        wxi = (*params_current)[4 * i + 0];
//        wti = (*params_current)[4 * i + 1];
//        ai  = (*params_current)[4 * i + 2];
//        bi  = (*params_current)[4 * i + 3];
//
//        printf("Path %3d. wx = %15.8f, wt = %15.8f, b = %15.8f, a = %15.8f\n", (int)(i + 1), wxi, wti, bi, ai);
//    }

    gamma = 1.0;
    double val = 0.0, prev_val, c = 2.0;
    prev_val = 0.0;
    while( grad_norm > accuracy) {
        iter_idx++;
        prev_val = val;
        grad_norm_prev = grad_norm;

        /* reset of the current gradient */
        std::fill(gradient_current->begin(), gradient_current->end(), 0.0);
        val = calculate_gradient( data_points, n_inner_neurons, params_current, gradient_current );

        grad_norm = 0.0;
        for(auto v: *gradient_current){
            grad_norm += v * v;
        }
        grad_norm = std::sqrt(grad_norm);

        /* Update of the parameters */
        /* step length calculation */
        if(iter_idx < 10 || iter_idx % 100 == 0){
            /* fixed step length */
            gamma = 0.1 * accuracy;
        }
        else{


            /* norm of the gradient calculation */

            sk = 0.0;
            for(i = 0; i < gradient_current->size(); ++i){
                sx = (*gradient_current)[i] - (*gradient_prev)[i];
                sk += sx * sx;
            }
            sk = std::sqrt(sk);

            /* angle between two consecutive gradients */
            double beta = 0.0, sx = 0.0;
            for(i = 0; i < gradient_current->size(); ++i){
                sx += (gradient_current->at( i ) * gradient_prev->at( i ));
            }
            sx /= grad_norm * grad_norm_prev;
            beta = std::sqrt(std::acos( sx ) / PI);


//            eval_step_size_simple( gamma, val, prev_val, sk, grad_norm, grad_norm_prev );
            eval_step_size_mk( gamma, beta, c, grad_norm_prev, grad_norm, val, prev_val );
        }


        for(i = 0; i < gradient_current->size(); ++i){
            (*params_prev)[i] = (*params_current)[i] - gamma * (*gradient_current)[i];
        }

        /* switcheroo */
        ptr_mem = gradient_prev;
        gradient_prev = gradient_current;
        gradient_current = ptr_mem;

        ptr_mem = params_prev;
        params_prev = params_current;
        params_current = ptr_mem;


        if(iter_idx % 1 == 0){
            printf("Iteration %12d. Step size: %15.8f, C: %15.8f, Gradient norm: %15.8f. Total error: %10.8f\r", (int)iter_idx, gamma, c, grad_norm, val);
            std::cout.flush();
        }
    }
    printf("Iteration %12d. Step size: %15.8f, C: %15.8f, Gradient norm: %15.8f. Total error: %10.8f\r\n", (int)iter_idx, gamma, c, grad_norm, val);
    std::cout.flush();

    for (i = 0; i < n_inner_neurons; ++i) {
        wxi = (*params_current)[4 * i + 0];
        wti = (*params_current)[4 * i + 1];
        ai  = (*params_current)[4 * i + 2];
        bi  = (*params_current)[4 * i + 3];

        printf("Path %3d. wx%d = %15.8f, wt%d = %15.8f, b%d = %15.8f, a%d = %15.8f\n", (int)(i + 1), (int)(i + 1), wxi , (int)(i + 1), wti, (int)(i + 1), bi, (int)(i + 1), ai);
    }
    std::cout << "********************************************************************************************************************************************" <<std::endl;

//    for (i = 0; i < n_inner_neurons; ++i) {
//        wxi = (*params_current)[4 * i + 0];
//        wti = (*params_current)[4 * i + 1];
//        ai  = (*params_current)[4 * i + 2];
//        bi  = (*params_current)[4 * i + 3];
//
//        printf("%f/(1+e^(%f-%fx-%ft)) + ", (int)(i + 1), ai, bi, wxi, wti);
//    }
//    printf("\n");


//    /* SOLUTION EXPORT */
//    printf("Exporting file 'data_2d_pde1_y.txt' : %7.3f%%\r", 0.0);
//    std::cout.flush();
//
//    std::vector<double> input, output(1);
//    std::ofstream ofs_y("data_2d_pde1_y.txt", std::ofstream::out);
//    frac = (te - ts) / (n_test_points - 1);
//    for(i = 0; i < n_test_points; ++i){
//        x = i * frac + ts;
//        for(j = 0; j < n_test_points; ++j){
//            t = j * frac + ts;
//            ofs_y << x << " " << t << " " << eval_approx_y(x, t, n_inner_neurons, *params_current) << std::endl;
//            printf("Exporting file 'data_2d_pde1_y.txt' : %7.3f%%\r", (100.0 * (j + i * n_test_points)) / (n_test_points * n_test_points - 1));
//            std::cout.flush();
//        }
//    }
//    printf("Exporting file 'data_2d_pde1_y.txt' : %7.3f%%\n", 100.0);
//    std::cout.flush();
//    ofs_y.close();
//
//    printf("Exporting file 'data_2d_pde1_yt.txt' : %7.3f%%\r", 0.0);
//    std::cout.flush();
//    std::ofstream ofs_t("data_2d_pde1_yt.txt", std::ofstream::out);
//    frac = (te - ts) / (n_test_points - 1);
//    for(i = 0; i < n_test_points; ++i){
//        x = i * frac + ts;
//        for(j = 0; j < n_test_points; ++j){
//            t = j * frac + ts;
//            ofs_t << x << " " << t << " " << eval_approx_yt(x, t, n_inner_neurons, *params_current) << std::endl;
//            printf("Exporting file 'data_2d_pde1_yt.txt' : %7.3f%%\r", (100.0 * (j + i * n_test_points)) / (n_test_points * n_test_points - 1));
//            std::cout.flush();
//        }
//    }
//    printf("Exporting file 'data_2d_pde1_yt.txt' : %7.3f%%\n", 100.0);
//    std::cout.flush();
//    ofs_t.close();
//
//    printf("Exporting file 'data_2d_pde1_yx.txt' : %7.3f%%\r", 0.0);
//    std::cout.flush();
//    std::ofstream ofs_x("data_2d_pde1_yx.txt", std::ofstream::out);
//    frac = (te - ts) / (n_test_points - 1);
//    for(i = 0; i < n_test_points; ++i){
//        x = i * frac + ts;
//        for(j = 0; j < n_test_points; ++j){
//            t = j * frac + ts;
//            ofs_x << x << " " << t << " " << eval_approx_yx(x, t, n_inner_neurons, *params_current) << std::endl;
//            printf("Exporting file 'data_2d_pde1_yx.txt' : %7.3f%%\r", (100.0 * (j + i * n_test_points)) / (n_test_points * n_test_points - 1));
//            std::cout.flush();
//        }
//    }
//    printf("Exporting file 'data_2d_pde1_yx.txt' : %7.3f%%\n", 100.0);
//    std::cout.flush();
//    ofs_x.close();
//
//    printf("Exporting file 'data_2d_pde1_yxx.txt' : %7.3f%%\r", 0.0);
//    std::cout.flush();
//    std::ofstream ofs_xx("data_2d_pde1_yxx.txt", std::ofstream::out);
//    frac = (te - ts) / (n_test_points - 1);
//    for(i = 0; i < n_test_points; ++i){
//        x = i * frac + ts;
//        for(j = 0; j < n_test_points; ++j){
//            t = j * frac + ts;
//            ofs_xx << x << " " << t << " " << eval_approx_yxx(x, t, n_inner_neurons, *params_current) << std::endl;
//            printf("Exporting file 'data_2d_pde1_yxx.txt' : %7.3f%%\r", (100.0 * (j + i * n_test_points)) / (n_test_points * n_test_points - 1));
//            std::cout.flush();
//        }
//    }
//    printf("Exporting file 'data_2d_pde1_yxx.txt' : %7.3f%%\n", 100.0);
//    std::cout.flush();
//    ofs_xx.close();
//
//    /* governing equation error */
//    std::ofstream ofs_error("data_2d_pde1_first_equation_error.txt", std::ofstream::out);
//    printf("Exporting file 'data_2d_pde1_first_equation_error.txt' : %7.3f%%\r", 0.0);
//    for(i = 0; i < n_test_points; ++i){
//        x = i * frac + ts;
//        for(j = 0; j < n_test_points; ++j){
//            t = j * frac + ts;
//            ofs_error << x << " " << t << " " << std::fabs(eval_approx_yxx(x, t, n_inner_neurons, *params_current) - eval_approx_yt(x, t, n_inner_neurons, *params_current)) << std::endl;
//            printf("Exporting file 'data_2d_pde1_first_equation_error.txt' : %7.3f%%\r", (100.0 * (j + i * n_test_points)) / (n_test_points * n_test_points - 1));
//            std::cout.flush();
//        }
//    }
//    printf("Exporting file 'data_2d_pde1_first_equation_error.txt' : %7.3f%%\n", 100.0);
//    std::cout.flush();
//    ofs_error.close();
//
//    /* ISOTROPIC TEST SET FOR BOUNDARY CONDITIONS */
//    /* first boundary condition & its error */
//    std::ofstream ofs_bc_t("data_1d_pde1_yt.txt", std::ofstream::out);
//    std::ofstream ofs_bc_x("data_1d_pde1_yx.txt", std::ofstream::out);
//    printf("Exporting files 'data_1d_pde1_yt.txt' and 'data_1d_pde1_yx.txt' : %7.3f%%\r", 0.0);
//    for(i = 0; i < n_test_points; ++i){
//        x = frac * i + ts;
//        t = frac * i + ts;
//
//        double yt = std::sin(t);
//        double yx = std::pow(E, -0.707106781 * x) * std::sin( -0.707106781 * x );
//
//        double evalt = eval_approx_y(0, t, n_inner_neurons, *params_current);
//        double evalx = eval_approx_y(x, 0, n_inner_neurons, *params_current);
//
//        ofs_bc_t << i + 1 << " " << t << " " << yt << " " << evalt << " " << std::fabs(evalt - yt) << std::endl;
//        ofs_bc_x << i + 1 << " " << x << " " << yx << " " << evalx << " " << std::fabs(evalx - yx) << std::endl;
//
//        printf("Exporting files 'data_1d_pde1_yt.txt' and 'data_1d_pde1_yx.txt' : %7.3f%%\r", (100.0 * i) / (n_test_points - 1));
//        std::cout.flush();
//    }
//    printf("Exporting files 'data_1d_pde1_yt.txt' and 'data_1d_pde1_yx.txt' : %7.3f%%\r", 100.0);
//    std::cout.flush();
//    ofs_bc_t.close();
//    ofs_bc_x.close();

    delete gradient_current;
    delete gradient_prev;
    delete params_current;
    delete params_prev;
}

void solve_example_particle_swarm(double accuracy, size_t n_inner_neurons, size_t train_size, double ds, double de, size_t n_test_points, double ts, double te, size_t max_iters, size_t n_particles){
    std::cout << "Finding a solution via the Particle Swarm Optimization" << std::endl;
    std::cout << "********************************************************************************************************************************************" <<std::endl;

    /* solution properties */

    /* do not change below */
    size_t n_inputs = 2;
    size_t n_equations = 3;
    DESolver solver_01( n_equations, n_inputs, n_inner_neurons );

    /* SETUP OF THE EQUATIONS */
    MultiIndex alpha_00( n_inputs );
    MultiIndex alpha_01( n_inputs );
    MultiIndex alpha_20( n_inputs );

    alpha_00.set_partial_derivative(0, 0);
    alpha_00.set_partial_derivative(1, 0);

    alpha_01.set_partial_derivative(1, 1);

    alpha_20.set_partial_derivative(0, 2);

    /* the governing differential equation */
    solver_01.add_to_differential_equation( 0, alpha_20,  1.0 );
    solver_01.add_to_differential_equation( 0, alpha_01, -1.0 );

    /* dirichlet boundary condition */
    solver_01.add_to_differential_equation( 1, alpha_00, 1.0 );
    solver_01.add_to_differential_equation( 2, alpha_00, 1.0 );


    /* SETUP OF THE TRAINING DATA */
    //TODO neater data setup
    std::vector<double> inp, out;

    double frac, x, t;

    /* TRAIN DATA FOR THE GOVERNING DE */
    std::vector<double> test_bounds_2d = {ds, de, ds, de};

    /* GOVERNING EQUATION RHS */
    auto f1 = [](std::vector<double>&input) -> std::vector<double> {
        std::vector<double> output(1);
        output[0] = 0.0;
        return output;
    };
    DataSet ds_00(test_bounds_2d, train_size, f1, 1);

    std::vector<std::pair<std::vector<double>, std::vector<double>>> data_vec_t;
    std::vector<std::pair<std::vector<double>, std::vector<double>>> data_vec_x;
    /* ISOTROPIC TRAIN SET */
    frac = (de - ds) / (train_size - 1);
    for(unsigned int i = 0; i < train_size; ++i){
        inp = {0.0, frac * i};
        out = {std::sin(inp[1])};
        data_vec_t.emplace_back(std::make_pair(inp, out));

        inp = {frac * i, 0.0};
        out = {std::pow(E, -0.707106781 * inp[0]) * std::sin( -0.707106781 * inp[0] )};
        data_vec_x.emplace_back(std::make_pair(inp, out));

    }
    DataSet ds_t(&data_vec_t);
    DataSet ds_x(&data_vec_x);


    /* Placing the conditions into the solver */
    solver_01.set_error_function( 0, ErrorFunctionType::ErrorFuncMSE, &ds_00 );
    solver_01.set_error_function( 1, ErrorFunctionType::ErrorFuncMSE, &ds_t );
    solver_01.set_error_function( 2, ErrorFunctionType::ErrorFuncMSE, &ds_x );


    /* PARTICLE SWARM TRAINING METHOD SETUP */


    //must encapsulate each of the partial error functions
    std::vector<double> domain_bounds(8 * n_inner_neurons);
    for(unsigned int i = 0; i < 4 * n_inner_neurons; ++i){
        domain_bounds[2 * i] = -10.0;
        domain_bounds[2 * i + 1] = 10.0;
    }

    double c1 = 1.7, c2 = 1.7, w = 0.7;
    double gamma = 0.5, epsilon = 0.02, delta = 0.9;

    solver_01.solve_via_particle_swarm( &domain_bounds, c1, c2, w, n_particles, max_iters, gamma, epsilon, delta );
    size_t i, j;
    std::vector<double> *w1_ptr = solver_01.get_solution( alpha_00 )->get_parameter_ptr_weights();
    std::vector<double> *w2_ptr = solver_01.get_solution( alpha_00 )->get_parameter_ptr_biases();
    std::vector<double> export_params(4 * n_inner_neurons);
    for(size_t i = 0; i < n_inner_neurons; ++i){
        export_params[4 * i + 0] = w1_ptr->at(i);
        export_params[4 * i + 1] = w1_ptr->at(n_inner_neurons + i);
        export_params[4 * i + 2] = w1_ptr->at(2 * n_inner_neurons + i);
        export_params[4 * i + 3] = w2_ptr->at( i );

        printf("Path %3d. wx%d = %15.8f, wt%d = %15.8f, b%d = %15.8f, a%d = %15.8f\n", (int)(i + 1), (int)(i + 1), export_params[4 * i + 0] , (int)(i + 1), export_params[4 * i + 1], (int)(i + 1), export_params[4 * i + 3], (int)(i + 1), export_params[4 * i + 2]);
    }
    std::cout << "********************************************************************************************************************************************" << std::endl;
    /* PRACTICAL END OF THE EXAMPLE */

//    /* SOLUTION EXPORT */
//    for(i = 0; i < n_inner_neurons; ++i){
//        export_params[4 * i + 0] = w1_ptr->at(i);
//        export_params[4 * i + 1] = w1_ptr->at(n_inner_neurons + i);
//        export_params[4 * i + 2] = w1_ptr->at(2 * n_inner_neurons + i);
//        export_params[4 * i + 3] = w2_ptr->at( i );
//    }
//
//    printf("Exporting file 'data_2d_pde1_y.txt' : %7.3f%%\r", 0.0);
//    std::cout.flush();
//
//    std::vector<double> input, output(1);
//    std::ofstream ofs("data_2d_pde1_y.txt", std::ofstream::out);
//    frac = (te - ts) / (n_test_points - 1);
//    for(i = 0; i < n_test_points; ++i){
//        x = i * frac + ts;
//        for(j = 0; j < n_test_points; ++j){
//            t = j * frac + ts;
//            ofs << x << " " << t << " " << eval_approx_y(x, t, n_inner_neurons, export_params) << std::endl;
//            printf("Exporting file 'data_2d_pde1_y.txt' : %7.3f%%\r", (100.0 * (j + i * n_test_points)) / (n_test_points * n_test_points - 1));
//            std::cout.flush();
//        }
//    }
//    printf("Exporting file 'data_2d_pde1_y.txt' : %7.3f%%\n", 100.0);
//    std::cout.flush();
//    ofs.close();
//
//    /* governing equation error */
//    ofs = std::ofstream("data_2d_pde1_first_equation_error.txt", std::ofstream::out);
//    printf("Exporting file 'data_2d_pde1_first_equation_error.txt' : %7.3f%%\r", 0.0);
//    for(i = 0; i < n_test_points; ++i){
//        x = i * frac + ts;
//        for(j = 0; j < n_test_points; ++j){
//            t = j * frac + ts;
//            ofs << x << " " << t << " " << std::fabs(eval_approx_yxx(x, t, n_inner_neurons, export_params) - eval_approx_yt(x, t, n_inner_neurons, export_params)) << std::endl;
//            printf("Exporting file 'data_2d_pde1_first_equation_error.txt' : %7.3f%%\r", (100.0 * (j + i * n_test_points)) / (n_test_points * n_test_points - 1));
//            std::cout.flush();
//        }
//    }
//    printf("Exporting file 'data_2d_pde1_first_equation_error.txt' : %7.3f%%\n", 100.0);
//    std::cout.flush();
//    ofs.close();
//
//    /* ISOTROPIC TEST SET FOR BOUNDARY CONDITIONS */
//    /* first boundary condition & its error */
//    ofs = std::ofstream("data_1d_pde1_yt.txt", std::ofstream::out);
//    std::ofstream ofs2("data_1d_pde1_yx.txt", std::ofstream::out);
//    printf("Exporting files 'data_1d_pde1_yt.txt' and 'data_1d_pde1_yx.txt' : %7.3f%%\r", 0.0);
//    for(i = 0; i < n_test_points; ++i){
//        x = frac * i + ts;
//        t = frac * i + ts;
//
//        double yt = std::sin(t);
//        double yx = std::pow(E, -0.707106781 * x) * std::sin( -0.707106781 * x );
//
//        double evalt = eval_approx_y(0, t, n_inner_neurons, export_params);
//        double evalx = eval_approx_y(x, 0, n_inner_neurons, export_params);
//
//        ofs << i + 1 << " " << t << " " << yt << " " << evalt << " " << std::fabs(evalt - yt) << std::endl;
//        ofs2 << i + 1 << " " << x << " " << yx << " " << evalx << " " << std::fabs(evalx - yx) << std::endl;
//
//        printf("Exporting files 'data_1d_pde1_yt.txt' and 'data_1d_pde1_yx.txt' : %7.3f%%\r", (100.0 * i) / (n_test_points - 1));
//        std::cout.flush();
//    }
//    printf("Exporting files 'data_1d_pde1_yt.txt' and 'data_1d_pde1_yx.txt' : %7.3f%%\r", 100.0);
//    std::cout.flush();
//    ofs2.close();
//    ofs.close();
}

int main() {
    std::cout << "Running lib4neuro Partial Differential Equation example   1" << std::endl;
    std::cout << "********************************************************************************************************************************************" <<std::endl;
    std::cout << "          Governing equation: y_xx - y_t = 0,                                   for (x, t) in [0, 1] x [0, 1]" << std::endl;
    std::cout << "Dirichlet boundary condition:    y(0, t) = sin(t),                              for t in [0, 1]" << std::endl;
    std::cout << "Dirichlet boundary condition:    y(x, 0) = exp(-sqrt(0.5)x) * sin(-sqrt(0.5)x), for x in [0, 1]" << std::endl;
    std::cout << "********************************************************************************************************************************************" <<std::endl;
    std::cout << "Expressing solution as y(x, t) = sum over [a_i / (1 + exp(bi - wxi*x - wti*t))], i in [1, n], where n is the number of hidden neurons" <<std::endl;
    std::cout << "********************************************************************************************************************************************" <<std::endl;

    unsigned int n_inner_neurons = 4;
    unsigned int train_size = 10;
    double accuracy = 1e-3;
    double ds = 0.0;
    double de = 1.0;

    unsigned int test_size = 100;
    double ts = ds;
    double te = de + 0;

    size_t particle_swarm_max_iters = 500;
    size_t n_particles = 10;
    solve_example_particle_swarm(accuracy, n_inner_neurons, train_size, ds, de, test_size, ts, te, particle_swarm_max_iters, n_particles);


    std::vector<double> init_guess(4 * n_inner_neurons);
    std::random_device seeder;
    std::mt19937 gen(seeder());
    std::uniform_real_distribution<double> dist(-1.0, 1.0);
    for(unsigned int i = 0; i < init_guess.size(); ++i){
        init_guess[i] = dist(gen);
    }

//    init_guess = {-0.21709230, -0.26189447, 0.77853923, 0.41091127, -0.44311897, -0.99036349, 0.84912023, -0.16920743};
//    solve_example_gradient(init_guess, accuracy, n_inner_neurons, train_size, ds, de, test_size, ts, te);

    return 0;
}