From 312a43ca2a5e30db1cd9f1484d03c49cf820eb06 Mon Sep 17 00:00:00 2001
From: Michal Kravcenko <michal.kravcenko@vsb.cz>
Date: Tue, 14 Aug 2018 12:20:41 +0200
Subject: [PATCH] MOD: modified differential equation examples

---
 src/examples/net_test_ode_1.cpp | 327 +++++++++++----------------
 src/examples/net_test_pde_1.cpp | 380 ++++++++++++++++++++------------
 2 files changed, 367 insertions(+), 340 deletions(-)

diff --git a/src/examples/net_test_ode_1.cpp b/src/examples/net_test_ode_1.cpp
index 4d1e309f..c7b072d9 100644
--- a/src/examples/net_test_ode_1.cpp
+++ b/src/examples/net_test_ode_1.cpp
@@ -232,7 +232,90 @@ double eval_error_function(std::vector<double> &parameters, size_t n_inner_neuro
     return output;
 }
 
-void test_analytical_gradient_y(std::vector<double> &guess, double accuracy, size_t n_inner_neurons, size_t train_size, bool opti_w, bool opti_b, double d1_s, double d1_e,
+void eval_step_size_simple(double &gamma, double val, double prev_val, double sk, double grad_norm, double grad_norm_prev){
+
+    if(val > prev_val){
+        gamma *= 0.99999;
+    }
+
+    if(sk <= 1e-3 || grad_norm < grad_norm_prev){
+        /* movement on a line */
+        /* new slope is less steep, speed up */
+        gamma *= 1.0005;
+    }
+    else if(grad_norm > grad_norm_prev){
+        /* new slope is more steep, slow down*/
+        gamma /= 1.0005;
+    }
+    else{
+        gamma /= 1.005;
+    }
+//        gamma *= 0.999999;
+}
+
+void eval_step_size_mk( double &gamma, double beta, double &c, double grad_norm_prev, double grad_norm, double fi, double fim ){
+
+    if( fi > fim )
+    {
+        c /= 1.0000005;
+    }
+    else if( fi < fim )
+    {
+        c *= 1.0000005;
+    }
+
+    gamma *= std::pow( c, 1.0 - 2.0 * beta) * std::pow( grad_norm_prev / grad_norm, 1.0 / c );
+
+
+}
+
+
+double calculate_gradient( std::vector<double> &data_points, size_t n_inner_neurons, std::vector<double> *parameters, std::vector<double> *gradient ){
+    size_t i, j;
+    double x,  mem, derror, total_error, approx;
+
+    size_t train_size = data_points.size();
+
+    /* error boundary condition: y(0) = 1 => e1 = (1 - y(0))^2 */
+    x = 0.0;
+    mem = (1.0 - eval_approx_f(x, n_inner_neurons, *parameters));
+    derror = 2.0 * mem;
+    total_error = mem * mem;
+    for(i = 0; i < n_inner_neurons; ++i){
+        (*gradient)[3 * i] -= derror * eval_approx_dw_f(x, i, *parameters);
+        (*gradient)[3 * i + 1] -= derror * eval_approx_da_f(x, i, *parameters);
+        (*gradient)[3 * i + 2] -= derror * eval_approx_db_f(x, i, *parameters);
+    }
+
+    /* error boundary condition: y'(0) = 1 => e2 = (1 - y'(0))^2 */
+    mem = (1.0 - eval_approx_df(x, n_inner_neurons, *parameters));
+    derror = 2.0 * mem;
+    total_error += mem * mem;
+    for(i = 0; i < n_inner_neurons; ++i){
+        (*gradient)[3 * i] -= derror * eval_approx_dw_df(x, i, *parameters);
+        (*gradient)[3 * i + 1] -= derror * eval_approx_da_df(x, i, *parameters);
+        (*gradient)[3 * i + 2] -= derror * eval_approx_db_df(x, i, *parameters);
+    }
+
+    for(j = 0; j < data_points.size(); ++j){
+        x = data_points[j];
+        /* error of the governing equation: y''(x) + 4y'(x) + 4y(x) = 0 => e3 = 1/n * (0 - y''(x) - 4y'(x) - 4y(x))^2 */
+        approx= eval_approx_ddf(x, n_inner_neurons, *parameters) + 4.0 * eval_approx_df(x, n_inner_neurons, *parameters) + 4.0 * eval_approx_f(x, n_inner_neurons, *parameters);
+        mem = 0.0 - approx;
+        derror = 2.0 * mem / train_size;
+        for(i = 0; i < n_inner_neurons; ++i){
+            (*gradient)[3 * i] -= derror * (eval_approx_dw_ddf(x, i, *parameters) + 4.0 * eval_approx_dw_df(x, i, *parameters) + 4.0 * eval_approx_dw_f(x, i, *parameters));
+            (*gradient)[3 * i + 1] -= derror * (eval_approx_da_ddf(x, i, *parameters) + 4.0 * eval_approx_da_df(x, i, *parameters) + 4.0 * eval_approx_da_f(x, i, *parameters));
+            (*gradient)[3 * i + 2] -= derror * (eval_approx_db_ddf(x, i, *parameters) + 4.0 * eval_approx_db_df(x, i, *parameters) + 4.0 * eval_approx_db_f(x, i, *parameters));
+        }
+        total_error += mem * mem / train_size;
+    }
+
+    return total_error;
+}
+
+
+void test_analytical_gradient_y(std::vector<double> &guess, double accuracy, size_t n_inner_neurons, size_t train_size, double d1_s, double d1_e,
                                 size_t test_size, double ts, double te) {
 
     /* SETUP OF THE TRAINING DATA */
@@ -288,171 +371,60 @@ void test_analytical_gradient_y(std::vector<double> &guess, double accuracy, siz
     }
 
     gamma = 1.0;
+    double prev_val, val = 0.0, c = 2.0;
     while( grad_norm > accuracy) {
         iter_idx++;
+        prev_val = val;
+        grad_norm_prev = grad_norm;
 
-        /* current gradient */
+        /* reset of the current gradient */
         std::fill(gradient_current->begin(), gradient_current->end(), 0.0);
+        val = calculate_gradient( data_points, n_inner_neurons, params_current, gradient_current );
 
-        /* error boundary condition: y(0) = 1 => e1 = (1 - y(0))^2 */
-        xj = 0.0;
-        mem = (1.0 - eval_approx_f(xj, n_inner_neurons, *params_current));
-        derror = 2.0 * mem;
-        total_error = mem * mem;
-        for(i = 0; i < n_inner_neurons; ++i){
-            if(opti_w){
-                (*gradient_current)[3 * i] -= derror * eval_approx_dw_f(xj, i, *params_current);
-                (*gradient_current)[3 * i + 1] -= derror * eval_approx_da_f(xj, i, *params_current);
-            }
-            if(opti_b){
-                (*gradient_current)[3 * i + 2] -= derror * eval_approx_db_f(xj, i, *params_current);
-            }
-        }
-//        for(auto e: *gradient_current){
-//            printf("[%10.8f]", e);
-//        }
-//        printf("\n");
-
-        /* error boundary condition: y'(0) = 1 => e2 = (1 - y'(0))^2 */
-        mem = (1.0 - eval_approx_df(xj, n_inner_neurons, *params_current));
-        derror = 2.0 * mem;
-        total_error += mem * mem;
-        for(i = 0; i < n_inner_neurons; ++i){
-            if(opti_w){
-                (*gradient_current)[3 * i] -= derror * eval_approx_dw_df(xj, i, *params_current);
-                (*gradient_current)[3 * i + 1] -= derror * eval_approx_da_df(xj, i, *params_current);
-            }
-            if(opti_b){
-                (*gradient_current)[3 * i + 2] -= derror * eval_approx_db_df(xj, i, *params_current);
-            }
-        }
-//        for(auto e: *gradient_current){
-//            printf("[%10.8f]", e);
-//        }
-//        printf("\n");
-
-        for(j = 0; j < data_points.size(); ++j){
-            xj = data_points[j];
-//            xj = ds.get_data()->at(j).first[0];
-            /* error of the governing equation: y''(x) + 4y'(x) + 4y(x) = 0 => e3 = 1/n * (0 - y''(x) - 4y'(x) - 4y(x))^2 */
-            approx= eval_approx_ddf(xj, n_inner_neurons, *params_current) + 4.0 * eval_approx_df(xj, n_inner_neurons, *params_current) + 4.0 * eval_approx_f(xj, n_inner_neurons, *params_current);
-            mem = 0.0 - approx;
-            error = 2.0 * mem / train_size;
-            for(i = 0; i < n_inner_neurons; ++i){
-                if(opti_w){
-                    (*gradient_current)[3 * i] -= error * (eval_approx_dw_ddf(xj, i, *params_current) + 4.0 * eval_approx_dw_df(xj, i, *params_current) + 4.0 * eval_approx_dw_f(xj, i, *params_current));
-                    (*gradient_current)[3 * i + 1] -= error * (eval_approx_da_ddf(xj, i, *params_current) + 4.0 * eval_approx_da_df(xj, i, *params_current) + 4.0 * eval_approx_da_f(xj, i, *params_current));
-                }
-                if(opti_b){
-                    (*gradient_current)[3 * i + 2] -= error * (eval_approx_db_ddf(xj, i, *params_current) + 4.0 * eval_approx_db_df(xj, i, *params_current) + 4.0 * eval_approx_db_f(xj, i, *params_current));
-                }
-            }
-            total_error += mem * mem / train_size;
+        grad_norm = 0.0;
+        for(auto v: *gradient_current){
+            grad_norm += v * v;
         }
-
-
-//        for(auto e: *gradient_current){
-//            printf("[%10.8f]", e);
-//        }
-//        printf("\n");
-
-        /* conjugate direction coefficient (Polak-Ribiere): <-grad_curr, -grad_curr + grad_prev> / <-grad_prev, -grad_prev >*/
+        grad_norm = std::sqrt(grad_norm);
 
         /* Update of the parameters */
-
-        if(iter_idx < 10 || iter_idx % 1 == 0){
-            for(i = 0; i < conjugate_direction_current->size(); ++i){
-                (*conjugate_direction_current)[i] = - (*gradient_current)[i];
-            }
-        }
-        else{
-            /* conjugate gradient */
-            sk = sy = 0.0;
-            for(i = 0; i < conjugate_direction_current->size(); ++i){
-                sk += (*gradient_current)[i] * ((*gradient_current)[i] - (*gradient_prev)[i]);
-                sy += (*gradient_prev)[i] * (*gradient_prev)[i];
-            }
-            beta = std::max(0.0, sk / sy);
-
-            /* update of the conjugate direction */
-            for(i = 0; i < conjugate_direction_current->size(); ++i){
-                (*conjugate_direction_current)[i] = beta * (*conjugate_direction_prev)[i] - (*gradient_current)[i];
-            }
-        }
-
         /* step length calculation */
-        if(iter_idx < 10){
+        if(iter_idx < 10 || iter_idx % 100 == 0){
             /* fixed step length */
-            gamma = 0.000001;
+            gamma = 0.1 * accuracy;
         }
         else{
-//            /* Barzilai-Borwein */
-//            sk = sy = 0.0;
-//
-//            for(i = 0; i < gradient_current->size(); ++i){
-//                sx = (*params_current)[i] - (*params_prev)[i];
-//                sg = (*conjugate_direction_current)[i] - (*conjugate_direction_prev)[i];
-//
-//                sk += sx * sg;
-//                sy += sg * sg;
-//            }
-//
-//            gamma = -sk / sy;
 
+            /* norm of the gradient calculation */
 
+            sk = 0.0;
+            for(i = 0; i < gradient_current->size(); ++i){
+                sx = (*gradient_current)[i] - (*gradient_prev)[i];
+                sk += sx * sx;
+            }
+            sk = std::sqrt(sk);
 
+            /* angle between two consecutive gradients */
+            double beta = 0.0, sx = 0.0;
+            for(i = 0; i < gradient_current->size(); ++i){
+                sx += (gradient_current->at( i ) * gradient_prev->at( i ));
+            }
+            sx /= grad_norm * grad_norm_prev;
+            beta = std::sqrt(std::acos( sx ) / PI);
 
-//            /* Line search */
-//
-//            gamma = line_search(10.0, conjugate_direction, *params_current, *gradient_current, n_inner_neurons, ds_00);
+
+//            eval_step_size_simple( gamma, val, prev_val, sk, grad_norm, grad_norm_prev );
+            eval_step_size_mk( gamma, beta, c, grad_norm_prev, grad_norm, val, prev_val );
         }
 
 
-//        for(auto e: conjugate_direction){
-//            printf("[%10.8f]", e);
-//        }
-//        printf("\n");
-//
-//        for(auto e: *params_current){
-//            printf("[%10.8f]", e);
-//        }
-//        printf("\n");
 
-        /* norm of the gradient calculation */
-        grad_norm_prev = grad_norm;
-        /* adaptive step-length */
-        sk = 0.0;
-        for(i = 0; i < gradient_current->size(); ++i){
-            sx = (*gradient_current)[i] - (*gradient_prev)[i];
-            sk += sx * sx;
-        }
-        sk = std::sqrt(sk);
 
-        if(sk <= 1e-3 || grad_norm < grad_norm_prev){
-            /* movement on a line */
-            /* new slope is less steep, speed up */
-            gamma *= 1.0005;
-        }
-        else if(grad_norm > grad_norm_prev){
-            /* new slope is more steep, slow down*/
-            gamma /= 1.0005;
-        }
-        else{
-            gamma /= 1.005;
-        }
-        grad_norm = 0.0;
-        for(auto v: *gradient_current){
-            grad_norm += v * v;
-        }
-        grad_norm = std::sqrt(grad_norm);
 
 
         for(i = 0; i < gradient_current->size(); ++i){
-//            (*params_prev)[i] = (*params_current)[i] - gamma * (*gradient_current)[i];
-            (*params_prev)[i] = (*params_current)[i] + gamma * (*conjugate_direction_current)[i];
+            (*params_prev)[i] = (*params_current)[i] - gamma * (*gradient_current)[i];
         }
-//        printf("\n");
-
 
         /* switcheroo */
         ptr_mem = gradient_prev;
@@ -463,34 +435,13 @@ void test_analytical_gradient_y(std::vector<double> &guess, double accuracy, siz
         params_prev = params_current;
         params_current = ptr_mem;
 
-        ptr_mem = conjugate_direction_prev;
-        conjugate_direction_prev = conjugate_direction_current;
-        conjugate_direction_current = ptr_mem;
 
-
-
-//        for (i = 0; i < n_inner_neurons; ++i) {
-//            wi = (*params_current)[3 * i];
-//            ai = (*params_current)[3 * i + 1];
-//            bi = (*params_current)[3 * i + 2];
-//
-//            printf("Path %3d. w = %15.8f, b = %15.8f, a = %15.8f\n", i + 1, wi, bi, ai);
-//        }
-
-        if(iter_idx % 100 == 0){
-            printf("Iteration %12d. Step size: %15.8f, Gradient norm: %15.8f. Total error: %10.8f (%10.8f)\r", (int)iter_idx, gamma, grad_norm, total_error, eval_error_function(*params_prev, n_inner_neurons, data_points));
-//            for (i = 0; i < n_inner_neurons; ++i) {
-//                wi = (*params_current)[3 * i];
-//                ai = (*params_current)[3 * i + 1];
-//                bi = (*params_current)[3 * i + 2];
-//
-//                printf("Path %3d. w = %15.8f, b = %15.8f, a = %15.8f\n", i + 1, wi, bi, ai);
-//            }
-//            std::cout << "-----------------------------" << std::endl;
+        if(iter_idx % 1 == 0){
+            printf("Iteration %12d. Step size: %15.8f, C: %15.8f, Gradient norm: %15.8f. Total error: %10.8f\r", (int)iter_idx, gamma, c, grad_norm, val);
             std::cout.flush();
         }
     }
-    printf("Iteration %12d. Step size: %15.8f, Gradient norm: %15.8f. Total error: %10.8f\r\n",(int) iter_idx, gamma, grad_norm, eval_error_function(*params_current, n_inner_neurons, data_points));
+    printf("Iteration %12d. Step size: %15.8f, C: %15.8f, Gradient norm: %15.8f. Total error: %10.8f\r\n", (int)iter_idx, gamma, c, grad_norm, val);
     std::cout.flush();
 
     for (i = 0; i < n_inner_neurons; ++i) {
@@ -719,35 +670,25 @@ int main() {
     double ts = ds;
     double te = de + 2;
 
-    size_t particle_swarm_max_iters = 1000;
-    size_t n_particles = 100;
-    test_ode(accuracy, n_inner_neurons, train_size, ds, de, test_size, ts, te, particle_swarm_max_iters, n_particles);
+//    size_t particle_swarm_max_iters = 1000;
+//    size_t n_particles = 100;
+//    test_ode(accuracy, n_inner_neurons, train_size, ds, de, test_size, ts, te, particle_swarm_max_iters, n_particles);
 
-//    bool optimize_weights = true;
-//    bool optimize_biases = true;
-//
-////    std::vector<double> init_guess = {0.35088209, -0.23738505, 0.14160885, 3.72785473, -6.45758308, 1.73769138};
-//    std::vector<double> init_guess(3 * n_inner_neurons);
-//
-//    std::random_device seeder;
-//    std::mt19937 gen(seeder());
-//    std::uniform_real_distribution<double> dist(-10.0, 10.0);
-//    for(unsigned int i = 0; i < init_guess.size(); ++i){
-//        init_guess[i] = dist(gen);
-//    }
-//    if(!optimize_biases){
-//        for(unsigned int i = 0; i < n_inner_neurons; ++i){
-//            init_guess[3 * i + 2] = 0.0;
-//        }
-//    }
-//    if(!optimize_weights){
-//        for(unsigned int i = 0; i < n_inner_neurons; ++i){
-//            init_guess[3 * i] = 0.0;
-//            init_guess[3 * i + 1] = 0.0;
-//        }
-//    }
-//
-//    test_analytical_gradient_y(init_guess, accuracy, n_inner_neurons, train_size, optimize_weights, optimize_biases, ds, de, test_size, ts, te);
+    bool optimize_weights = true;
+    bool optimize_biases = true;
+
+//    std::vector<double> init_guess = {0.35088209, -0.23738505, 0.14160885, 3.72785473, -6.45758308, 1.73769138};
+    std::vector<double> init_guess(3 * n_inner_neurons);
+
+    std::random_device seeder;
+    std::mt19937 gen(seeder());
+    std::uniform_real_distribution<double> dist(-1.0, 1.0);
+    for(unsigned int i = 0; i < init_guess.size(); ++i){
+        init_guess[i] = dist(gen);
+    }
+
+
+    test_analytical_gradient_y(init_guess, accuracy, n_inner_neurons, train_size, ds, de, test_size, ts, te);
 
 
     return 0;
diff --git a/src/examples/net_test_pde_1.cpp b/src/examples/net_test_pde_1.cpp
index e85304fc..534369ae 100644
--- a/src/examples/net_test_pde_1.cpp
+++ b/src/examples/net_test_pde_1.cpp
@@ -42,6 +42,59 @@ double eval_approx_y(double x, double t, size_t n_inner_neurons, std::vector<dou
     }
     return value;
 }
+//yt(x, t) = ... (ai * wti * e^(bi - wti * t - wxi * x))/(e^(bi - wti * t - wxi * x) + 1)^2
+double eval_approx_yt(double x, double t, size_t n_inner_neurons, std::vector<double> &parameters){
+    double value= 0.0, wxi, wti, ai, bi, ei, ei1;
+
+    for(size_t i = 0; i < n_inner_neurons; ++i){
+
+        wxi = parameters[4 * i + 0];
+        wti = parameters[4 * i + 1];
+        ai  = parameters[4 * i + 2];
+        bi  = parameters[4 * i + 3];
+
+        ei = std::pow(E, bi - wxi * x - wti * t);
+        ei1 = ei + 1.0;
+
+        value += ai * wti * ei / (ei1 * ei1);
+    }
+    return value;
+}
+//yx(x, t) = ...  (ai * wxi * e^(bi - t * wti - wxi * x))/(e^(bi - t * wti - wxi * x) + 1)^2
+double eval_approx_yx(double x, double t, size_t n_inner_neurons, std::vector<double> &parameters){
+    double value= 0.0, wxi, wti, ai, bi, ei, ei1;
+
+    for(size_t i = 0; i < n_inner_neurons; ++i){
+
+        wxi = parameters[4 * i + 0];
+        wti = parameters[4 * i + 1];
+        ai  = parameters[4 * i + 2];
+        bi  = parameters[4 * i + 3];
+
+        ei = std::pow(E, bi - wxi * x - wti * t);
+        ei1 = ei + 1.0;
+
+        value += (ai * wxi * ei1)/(ei1 * ei1);
+    }
+    return value;
+}
+//yxx(x, t) = ...  (ai * wxi * e^(bi - t * wti - wxi * x))/(e^(bi - t * wti - wxi * x) + 1)^2
+double eval_approx_yxx(double x, double t, size_t n_inner_neurons, std::vector<double> &parameters){
+    double value= 0.0, wxi, wti, ai, bi, ei, ei1;
+    for(size_t i = 0; i < n_inner_neurons; ++i){
+
+        wxi = parameters[4 * i + 0];
+        wti = parameters[4 * i + 1];
+        ai  = parameters[4 * i + 2];
+        bi  = parameters[4 * i + 3];
+
+        ei = std::pow(E, bi - wxi * x - wti * t);
+        ei1 = ei + 1.0;
+
+        value += (2 * ai * wxi * wxi * ei * ei) / (ei1 * ei1 * ei1) - (ai * wxi * wxi * ei) / (ei1 * ei1);
+    }
+    return value;
+}
 
 double eval_approx_da_y(double x, double t, size_t neuron_idx, std::vector<double> &parameters){
     double wxi, wti, bi, ei, ei1;
@@ -95,25 +148,6 @@ double eval_approx_db_y(double x, double t, size_t neuron_idx, std::vector<doubl
     return -(ai * ei)/(ei1 * ei1);
 }
 
-//yt(x, t) = ... (ai * wti * e^(bi - wti * t - wxi * x))/(e^(bi - wti * t - wxi * x) + 1)^2
-double eval_approx_yt(double x, double t, size_t n_inner_neurons, std::vector<double> &parameters){
-    double value= 0.0, wxi, wti, ai, bi, ei, ei1;
-
-    for(size_t i = 0; i < n_inner_neurons; ++i){
-
-        wxi = parameters[4 * i + 0];
-        wti = parameters[4 * i + 1];
-        ai  = parameters[4 * i + 2];
-        bi  = parameters[4 * i + 3];
-
-        ei = std::pow(E, bi - wxi * x - wti * t);
-        ei1 = ei + 1.0;
-
-        value += ai * wti * ei / (ei1 * ei1);
-    }
-    return value;
-}
-
 double eval_approx_da_yt(double x, double t, size_t neuron_idx, std::vector<double> &parameters){
     double wxi, wti, bi, ei, ei1;
 
@@ -166,23 +200,6 @@ double eval_approx_db_yt(double x, double t, size_t neuron_idx, std::vector<doub
     return (ai * wti * ei) / (ei1 * ei1) - (2 * ai * wti * ei * ei) / (ei1 * ei1 * ei1);
 }
 
-double eval_approx_yxx(double x, double t, size_t n_inner_neurons, std::vector<double> &parameters){
-    double value= 0.0, wxi, wti, ai, bi, ei, ei1;
-    for(size_t i = 0; i < n_inner_neurons; ++i){
-
-        wxi = parameters[4 * i + 0];
-        wti = parameters[4 * i + 1];
-        ai  = parameters[4 * i + 2];
-        bi  = parameters[4 * i + 3];
-
-        ei = std::pow(E, bi - wxi * x - wti * t);
-        ei1 = ei + 1.0;
-
-        value += (2 * ai * wxi * wxi * ei * ei) / (ei1 * ei1 * ei1) - (ai * wxi * wxi * ei) / (ei1 * ei1);
-    }
-    return value;
-}
-
 double eval_approx_da_yxx(double x, double t, size_t neuron_idx, std::vector<double> &parameters){
     double wxi, wti, ai, bi, ei, ei1, ebp, eb, etx;
     wxi = parameters[4 * neuron_idx + 0];
@@ -247,7 +264,7 @@ double eval_approx_db_yxx(double x, double t, size_t neuron_idx, std::vector<dou
     return (ai * wxi * wxi * eb * ebp) / (ei1 * ei1 * ei1) - (ai * wxi * wxi * ebp * (etx - eb)) / (ei1 * ei1 * ei1) + (3 * ai * wxi * wxi * eb * ebp * (etx - eb)) / (ei1 * ei1 * ei1 * ei1);
 }
 
-double get_step_size_simple(double gamma, double val, double prev_val, double sk, double grad_norm, double grad_norm_prev){
+void eval_step_size_simple(double &gamma, double val, double prev_val, double sk, double grad_norm, double grad_norm_prev){
 
     if(val > prev_val){
         gamma *= 0.99999;
@@ -266,15 +283,85 @@ double get_step_size_simple(double gamma, double val, double prev_val, double sk
         gamma /= 1.005;
     }
 //        gamma *= 0.999999;
+}
+
+void eval_step_size_mk( double &gamma, double beta, double &c, double grad_norm_prev, double grad_norm, double fi, double fim ){
+
+    if( fi > fim )
+    {
+        c /= 1.0000005;
+    }
+    else if( fi < fim )
+    {
+        c *= 1.0000005;
+    }
 
+    gamma *= std::pow( c, 1.0 - 2.0 * beta) * std::pow( grad_norm_prev / grad_norm, 1.0 / c );
 
-//        gamma = 0.000001;
 
-    return gamma;
 }
 
-double get_step_size_mk(double gamma, double val, double prev_val, double sk, double grad_norm, double grad_norm_prev){
+double calculate_gradient( std::vector<double> &data_points, size_t n_inner_neurons, std::vector<double> *parameters, std::vector<double> *gradient ){
+    size_t i, j, k;
+    double x, t, mem, derror, total_error, approx;
+
+    size_t train_size = data_points.size();
+
+    /* error of boundary condition: y(0, t) = sin(t) => e1 = 1/n * (sin(t) - y(0, t))^2 */
+    for(i = 0; i < train_size; ++i){
+
+        t = data_points[i];
+        mem = (std::sin(t) - eval_approx_y(0.0, t, n_inner_neurons, *parameters));
+        derror = 2.0 * mem / train_size;
+
+        for(j = 0; j < n_inner_neurons; ++j){
+            (*gradient)[4 * j + 0] -= derror * eval_approx_dwx_y(0, t, j, *parameters);
+            (*gradient)[4 * j + 1] -= derror * eval_approx_dwt_y(0, t, j, *parameters);
+            (*gradient)[4 * j + 2] -= derror *  eval_approx_da_y(0, t, j, *parameters);
+            (*gradient)[4 * j + 3] -= derror *  eval_approx_db_y(0, t, j, *parameters);
+        }
+
+        total_error += mem * mem / train_size;
+    }
+
+
+
+    /* error boundary condition: y(x, 0) = e^(-(0.5)^(0.5)x) * sin(-(0.5)^(0.5)x) => e2 = 1/n * (e^(-(0.5)^(0.5)x) * sin(-(0.5)^(0.5)x) - y(x, 0))^2 */
+    for(i = 0; i < train_size; ++i){
+
+        x = data_points[i];
+        mem = (std::pow(E, -0.707106781 * x) * std::sin( -0.707106781 * x ) - eval_approx_y(x, 0.0, n_inner_neurons, *parameters));
+        derror = 2.0 * mem / train_size;
+
+        for(j = 0; j < n_inner_neurons; ++j){
+            (*gradient)[4 * j + 0] -= derror * eval_approx_dwx_y(x, 0, j, *parameters);
+            (*gradient)[4 * j + 1] -= derror * eval_approx_dwt_y(x, 0, j, *parameters);
+            (*gradient)[4 * j + 2] -= derror *  eval_approx_da_y(x, 0, j, *parameters);
+            (*gradient)[4 * j + 3] -= derror *  eval_approx_db_y(x, 0, j, *parameters);
+        }
+
+        total_error += mem * mem / train_size;
+    }
 
+    /* error of the governing equation: y_xx - y_t = 0 => e3 = 1/n^2 * (0 - y_xx + y_t)^2 */
+    for(i = 0; i < data_points.size(); ++i){
+        x = data_points[i];
+        for(j = 0; j < data_points.size(); ++j){
+            t = data_points[j];
+
+            approx = eval_approx_yxx(x, t, n_inner_neurons, *parameters) - eval_approx_yt(x, t, n_inner_neurons, *parameters);
+            mem = 0.0 - approx;
+            derror = 2.0 * mem / (train_size * train_size);
+            for(k = 0; k < n_inner_neurons; ++k){
+                (*gradient)[4 * k + 0] -= derror * (eval_approx_dwx_yxx(x, t, k, *parameters) - eval_approx_dwx_yt(x, t, k, *parameters));
+                (*gradient)[4 * k + 1] -= derror * (eval_approx_dwt_yxx(x, t, k, *parameters) - eval_approx_dwt_yt(x, t, k, *parameters));
+                (*gradient)[4 * k + 2] -= derror * ( eval_approx_da_yxx(x, t, k, *parameters) -  eval_approx_da_yt(x, t, k, *parameters));
+                (*gradient)[4 * k + 3] -= derror * ( eval_approx_db_yxx(x, t, k, *parameters) -  eval_approx_db_yt(x, t, k, *parameters));
+            }
+            total_error += mem * mem / (train_size * train_size);
+        }
+    }
+    return total_error;
 }
 
 void solve_example_gradient(std::vector<double> &guess, double accuracy, size_t n_inner_neurons, size_t train_size, double ds, double de, size_t n_test_points, double ts, double te){
@@ -324,110 +411,58 @@ void solve_example_gradient(std::vector<double> &guess, double accuracy, size_t
     }
 
     gamma = 1.0;
-    double val = 0.0, prev_val;
+    double val = 0.0, prev_val, c = 2.0;
     prev_val = 0.0;
     while( grad_norm > accuracy) {
         iter_idx++;
         prev_val = val;
-        total_error = 0.0;
+        grad_norm_prev = grad_norm;
 
         /* reset of the current gradient */
         std::fill(gradient_current->begin(), gradient_current->end(), 0.0);
+        val = calculate_gradient( data_points, n_inner_neurons, params_current, gradient_current );
 
-        /* error of boundary condition: y(0, t) = sin(t) => e1 = 1/n * (sin(t) - y(0, t))^2 */
-        for(i = 0; i < data_points.size(); ++i){
-
-            t = data_points[i];
-            mem = (std::sin(t) - eval_approx_y(0.0, t, n_inner_neurons, *params_current));
-            derror = 2.0 * mem / train_size;
-
-            for(j = 0; j < n_inner_neurons; ++j){
-                (*gradient_current)[4 * j + 0] -= derror * eval_approx_dwx_y(0, t, j, *params_current);
-                (*gradient_current)[4 * j + 1] -= derror * eval_approx_dwt_y(0, t, j, *params_current);
-                (*gradient_current)[4 * j + 2] -= derror *  eval_approx_da_y(0, t, j, *params_current);
-                (*gradient_current)[4 * j + 3] -= derror *  eval_approx_db_y(0, t, j, *params_current);
-            }
+        grad_norm = 0.0;
+        for(auto v: *gradient_current){
+            grad_norm += v * v;
+        }
+        grad_norm = std::sqrt(grad_norm);
 
-            total_error += mem * mem / train_size;
+        /* Update of the parameters */
+        /* step length calculation */
+        if(iter_idx < 10 || iter_idx % 100 == 0){
+            /* fixed step length */
+            gamma = 0.1 * accuracy;
         }
+        else{
 
 
 
-        /* error boundary condition: y(x, 0) = e^(-(0.5)^(0.5)x) * sin(-(0.5)^(0.5)x) => e2 = 1/n * (e^(-(0.5)^(0.5)x) * sin(-(0.5)^(0.5)x) - y(x, 0))^2 */
-        for(i = 0; i < data_points.size(); ++i){
+            /* norm of the gradient calculation */
 
-            x = data_points[i];
-            mem = (std::pow(E, -0.707106781 * x) * std::sin( -0.707106781 * x ) - eval_approx_y(x, 0.0, n_inner_neurons, *params_current));
-            derror = 2.0 * mem / train_size;
+            sk = 0.0;
+            for(i = 0; i < gradient_current->size(); ++i){
+                sx = (*gradient_current)[i] - (*gradient_prev)[i];
+                sk += sx * sx;
+            }
+            sk = std::sqrt(sk);
 
-            for(j = 0; j < n_inner_neurons; ++j){
-                (*gradient_current)[4 * j + 0] -= derror * eval_approx_dwx_y(x, 0, j, *params_current);
-                (*gradient_current)[4 * j + 1] -= derror * eval_approx_dwt_y(x, 0, j, *params_current);
-                (*gradient_current)[4 * j + 2] -= derror *  eval_approx_da_y(x, 0, j, *params_current);
-                (*gradient_current)[4 * j + 3] -= derror *  eval_approx_db_y(x, 0, j, *params_current);
+            /* angle between two consecutive gradients */
+            double beta = 0.0, sx = 0.0;
+            for(i = 0; i < gradient_current->size(); ++i){
+                sx += (gradient_current->at( i ) * gradient_prev->at( i ));
             }
+            sx /= grad_norm * grad_norm_prev;
+            beta = std::sqrt(std::acos( sx ) / PI);
 
-            total_error += mem * mem / train_size;
-        }
 
-        /* error of the governing equation: y_xx - y_t = 0 => e3 = 1/n^2 * (0 - y_xx + y_t)^2 */
-        for(i = 0; i < data_points.size(); ++i){
-            x = data_points[i];
-            for(j = 0; j < data_points.size(); ++j){
-                t = data_points[j];
-
-                approx = eval_approx_yxx(x, t, n_inner_neurons, *params_current) - eval_approx_yt(x, t, n_inner_neurons, *params_current);
-                mem = 0.0 - approx;
-                derror = 2.0 * mem / (train_size * train_size);
-//                printf("%f\n", approx);
-//                printf("%f\n", derror);
-                for(k = 0; k < n_inner_neurons; ++k){
-//                    printf("%f -> ", (*gradient_current)[4 * k + 0]);
-                    (*gradient_current)[4 * k + 0] -= derror * (eval_approx_dwx_yxx(x, t, k, *params_current) - eval_approx_dwx_yt(x, t, k, *params_current));
-//                    printf("%f (%f * (%f - %f))\n", (*gradient_current)[4 * k + 0], derror, eval_approx_dwx_yxx(x, t, k, *params_current), eval_approx_dwx_yt(x, t, k, *params_current));
-
-//                    printf("%f -> ", (*gradient_current)[4 * k + 1]);
-                    (*gradient_current)[4 * k + 1] -= derror * (eval_approx_dwt_yxx(x, t, k, *params_current) - eval_approx_dwt_yt(x, t, k, *params_current));
-//                    printf("%f (%f * (%f - %f))\n", (*gradient_current)[4 * k + 1], derror, eval_approx_dwt_yxx(x, t, k, *params_current), eval_approx_dwt_yt(x, t, k, *params_current));
-
-//                    printf("%f -> ", (*gradient_current)[4 * k + 2]);
-                    (*gradient_current)[4 * k + 2] -= derror * ( eval_approx_da_yxx(x, t, k, *params_current) -  eval_approx_da_yt(x, t, k, *params_current));
-//                    printf("%f (%f * (%f - %f))\n", (*gradient_current)[4 * k + 2], derror, eval_approx_da_yxx(x, t, k, *params_current), eval_approx_da_yt(x, t, k, *params_current));
-
-//                    printf("%f -> ", (*gradient_current)[4 * k + 3]);
-                    (*gradient_current)[4 * k + 3] -= derror * ( eval_approx_db_yxx(x, t, k, *params_current) -  eval_approx_db_yt(x, t, k, *params_current));
-//                    printf("%f (%f * (%f - %f))\n", (*gradient_current)[4 * k + 3], derror, eval_approx_db_yxx(x, t, i, *params_current), eval_approx_db_yt(x, t, k, *params_current));
-
-                }
-                total_error += mem * mem / (train_size * train_size);
-            }
+//            eval_step_size_simple( gamma, val, prev_val, sk, grad_norm, grad_norm_prev );
+            eval_step_size_mk( gamma, beta, c, grad_norm_prev, grad_norm, val, prev_val );
         }
-        val = total_error;
 
-        /* Update of the parameters */
-        /* step length calculation */
-        if(iter_idx < 10){
-            /* fixed step length */
-            gamma = 0.1 * accuracy;
-        }
 
-        /* norm of the gradient calculation */
-        grad_norm_prev = grad_norm;
-        /* adaptive step-length */
-        sk = 0.0;
-        for(i = 0; i < gradient_current->size(); ++i){
-            sx = (*gradient_current)[i] - (*gradient_prev)[i];
-            sk += sx * sx;
-        }
-        sk = std::sqrt(sk);
 
-        grad_norm = 0.0;
-        for(auto v: *gradient_current){
-            grad_norm += v * v;
-        }
-        grad_norm = std::sqrt(grad_norm);
 
-        gamma = get_step_size_simple(gamma, val, prev_val, sk, grad_norm, grad_norm_prev);
 
 
         for(i = 0; i < gradient_current->size(); ++i){
@@ -445,11 +480,11 @@ void solve_example_gradient(std::vector<double> &guess, double accuracy, size_t
 
 
         if(iter_idx % 1 == 0){
-            printf("Iteration %12d. Step size: %15.8f, Gradient norm: %15.8f. Total error: %10.8f\r", (int)iter_idx, gamma, grad_norm, total_error);
+            printf("Iteration %12d. Step size: %15.8f, C: %15.8f, Gradient norm: %15.8f. Total error: %10.8f\r", (int)iter_idx, gamma, c, grad_norm, val);
             std::cout.flush();
         }
     }
-    printf("Iteration %12d. Step size: %15.8f, Gradient norm: %15.8f. Total error: %10.8f\r\n", (int)iter_idx, gamma, grad_norm, total_error);
+    printf("Iteration %12d. Step size: %15.8f, C: %15.8f, Gradient norm: %15.8f. Total error: %10.8f\r\n", (int)iter_idx, gamma, c, grad_norm, val);
     std::cout.flush();
 
     for (i = 0; i < n_inner_neurons; ++i) {
@@ -461,15 +496,15 @@ void solve_example_gradient(std::vector<double> &guess, double accuracy, size_t
         printf("Path %3d. wx = %15.8f, wt = %15.8f, b = %15.8f, a = %15.8f\n", (int)(i + 1), wxi, wti, bi, ai);
     }
 
-    for (i = 0; i < n_inner_neurons; ++i) {
-        wxi = (*params_current)[4 * i + 0];
-        wti = (*params_current)[4 * i + 1];
-        ai  = (*params_current)[4 * i + 2];
-        bi  = (*params_current)[4 * i + 3];
-
-        printf("%f/(1+e^(%f-%fx-%ft)) + ", (int)(i + 1), ai, bi, wxi, wti);
-    }
-    printf("\n");
+//    for (i = 0; i < n_inner_neurons; ++i) {
+//        wxi = (*params_current)[4 * i + 0];
+//        wti = (*params_current)[4 * i + 1];
+//        ai  = (*params_current)[4 * i + 2];
+//        bi  = (*params_current)[4 * i + 3];
+//
+//        printf("%f/(1+e^(%f-%fx-%ft)) + ", (int)(i + 1), ai, bi, wxi, wti);
+//    }
+//    printf("\n");
 
 
     /* SOLUTION EXPORT */
@@ -477,41 +512,92 @@ void solve_example_gradient(std::vector<double> &guess, double accuracy, size_t
     std::cout.flush();
 
     std::vector<double> input, output(1);
-    std::ofstream ofs("data_2d_pde1_y.txt", std::ofstream::out);
+    std::ofstream ofs_y("data_2d_pde1_y.txt", std::ofstream::out);
     frac = (te - ts) / (n_test_points - 1);
     for(i = 0; i < n_test_points; ++i){
         x = i * frac + ts;
         for(j = 0; j < n_test_points; ++j){
             t = j * frac + ts;
-            ofs << x << " " << t << " " << eval_approx_y(x, t, n_inner_neurons, *params_current) << std::endl;
+            ofs_y << x << " " << t << " " << eval_approx_y(x, t, n_inner_neurons, *params_current) << std::endl;
             printf("Exporting file 'data_2d_pde1_y.txt' : %7.3f%%\r", (100.0 * (j + i * n_test_points)) / (n_test_points * n_test_points - 1));
             std::cout.flush();
         }
     }
     printf("Exporting file 'data_2d_pde1_y.txt' : %7.3f%%\n", 100.0);
     std::cout.flush();
-    ofs.close();
+    ofs_y.close();
+
+    printf("Exporting file 'data_2d_pde1_yt.txt' : %7.3f%%\r", 0.0);
+    std::cout.flush();
+    std::ofstream ofs_t("data_2d_pde1_yt.txt", std::ofstream::out);
+    frac = (te - ts) / (n_test_points - 1);
+    for(i = 0; i < n_test_points; ++i){
+        x = i * frac + ts;
+        for(j = 0; j < n_test_points; ++j){
+            t = j * frac + ts;
+            ofs_t << x << " " << t << " " << eval_approx_yt(x, t, n_inner_neurons, *params_current) << std::endl;
+            printf("Exporting file 'data_2d_pde1_yt.txt' : %7.3f%%\r", (100.0 * (j + i * n_test_points)) / (n_test_points * n_test_points - 1));
+            std::cout.flush();
+        }
+    }
+    printf("Exporting file 'data_2d_pde1_yt.txt' : %7.3f%%\n", 100.0);
+    std::cout.flush();
+    ofs_t.close();
+
+    printf("Exporting file 'data_2d_pde1_yx.txt' : %7.3f%%\r", 0.0);
+    std::cout.flush();
+    std::ofstream ofs_x("data_2d_pde1_yx.txt", std::ofstream::out);
+    frac = (te - ts) / (n_test_points - 1);
+    for(i = 0; i < n_test_points; ++i){
+        x = i * frac + ts;
+        for(j = 0; j < n_test_points; ++j){
+            t = j * frac + ts;
+            ofs_x << x << " " << t << " " << eval_approx_yx(x, t, n_inner_neurons, *params_current) << std::endl;
+            printf("Exporting file 'data_2d_pde1_yx.txt' : %7.3f%%\r", (100.0 * (j + i * n_test_points)) / (n_test_points * n_test_points - 1));
+            std::cout.flush();
+        }
+    }
+    printf("Exporting file 'data_2d_pde1_yx.txt' : %7.3f%%\n", 100.0);
+    std::cout.flush();
+    ofs_x.close();
+
+    printf("Exporting file 'data_2d_pde1_yxx.txt' : %7.3f%%\r", 0.0);
+    std::cout.flush();
+    std::ofstream ofs_xx("data_2d_pde1_yxx.txt", std::ofstream::out);
+    frac = (te - ts) / (n_test_points - 1);
+    for(i = 0; i < n_test_points; ++i){
+        x = i * frac + ts;
+        for(j = 0; j < n_test_points; ++j){
+            t = j * frac + ts;
+            ofs_xx << x << " " << t << " " << eval_approx_yxx(x, t, n_inner_neurons, *params_current) << std::endl;
+            printf("Exporting file 'data_2d_pde1_yxx.txt' : %7.3f%%\r", (100.0 * (j + i * n_test_points)) / (n_test_points * n_test_points - 1));
+            std::cout.flush();
+        }
+    }
+    printf("Exporting file 'data_2d_pde1_yxx.txt' : %7.3f%%\n", 100.0);
+    std::cout.flush();
+    ofs_xx.close();
 
     /* governing equation error */
-    ofs = std::ofstream("data_2d_pde1_first_equation_error.txt", std::ofstream::out);
+    std::ofstream ofs_error("data_2d_pde1_first_equation_error.txt", std::ofstream::out);
     printf("Exporting file 'data_2d_pde1_first_equation_error.txt' : %7.3f%%\r", 0.0);
     for(i = 0; i < n_test_points; ++i){
         x = i * frac + ts;
         for(j = 0; j < n_test_points; ++j){
             t = j * frac + ts;
-            ofs << x << " " << t << " " << std::fabs(eval_approx_yxx(x, t, n_inner_neurons, *params_current) - eval_approx_yt(x, t, n_inner_neurons, *params_current)) << std::endl;
+            ofs_error << x << " " << t << " " << std::fabs(eval_approx_yxx(x, t, n_inner_neurons, *params_current) - eval_approx_yt(x, t, n_inner_neurons, *params_current)) << std::endl;
             printf("Exporting file 'data_2d_pde1_first_equation_error.txt' : %7.3f%%\r", (100.0 * (j + i * n_test_points)) / (n_test_points * n_test_points - 1));
             std::cout.flush();
         }
     }
     printf("Exporting file 'data_2d_pde1_first_equation_error.txt' : %7.3f%%\n", 100.0);
     std::cout.flush();
-    ofs.close();
+    ofs_error.close();
 
     /* ISOTROPIC TEST SET FOR BOUNDARY CONDITIONS */
     /* first boundary condition & its error */
-    ofs = std::ofstream("data_1d_pde1_yt.txt", std::ofstream::out);
-    std::ofstream ofs2("data_1d_pde1_yx.txt", std::ofstream::out);
+    std::ofstream ofs_bc_t("data_1d_pde1_yt.txt", std::ofstream::out);
+    std::ofstream ofs_bc_x("data_1d_pde1_yx.txt", std::ofstream::out);
     printf("Exporting files 'data_1d_pde1_yt.txt' and 'data_1d_pde1_yx.txt' : %7.3f%%\r", 0.0);
     for(i = 0; i < n_test_points; ++i){
         x = frac * i + ts;
@@ -523,16 +609,16 @@ void solve_example_gradient(std::vector<double> &guess, double accuracy, size_t
         double evalt = eval_approx_y(0, t, n_inner_neurons, *params_current);
         double evalx = eval_approx_y(x, 0, n_inner_neurons, *params_current);
 
-        ofs << i + 1 << " " << t << " " << yt << " " << evalt << " " << std::fabs(evalt - yt) << std::endl;
-        ofs2 << i + 1 << " " << x << " " << yx << " " << evalx << " " << std::fabs(evalx - yx) << std::endl;
+        ofs_bc_t << i + 1 << " " << t << " " << yt << " " << evalt << " " << std::fabs(evalt - yt) << std::endl;
+        ofs_bc_x << i + 1 << " " << x << " " << yx << " " << evalx << " " << std::fabs(evalx - yx) << std::endl;
 
         printf("Exporting files 'data_1d_pde1_yt.txt' and 'data_1d_pde1_yx.txt' : %7.3f%%\r", (100.0 * i) / (n_test_points - 1));
         std::cout.flush();
     }
     printf("Exporting files 'data_1d_pde1_yt.txt' and 'data_1d_pde1_yx.txt' : %7.3f%%\r", 100.0);
     std::cout.flush();
-    ofs2.close();
-    ofs.close();
+    ofs_bc_t.close();
+    ofs_bc_x.close();
 
     delete gradient_current;
     delete gradient_prev;
@@ -709,9 +795,9 @@ void solve_example_particle_swarm(double accuracy, size_t n_inner_neurons, size_
 
 int main() {
 
-    unsigned int n_inner_neurons = 2;
-    unsigned int train_size = 20;
-    double accuracy = 1e-4;
+    unsigned int n_inner_neurons = 3;
+    unsigned int train_size = 10;
+    double accuracy = 1e-5;
     double ds = 0.0;
     double de = 1.0;
 
-- 
GitLab