MOD: modified differential equation examples

312a43ca · Michal Kravcenko · 1a06e926 · 312a43ca · 312a43ca
Commit 312a43ca authored 6 years ago by Michal Kravcenko
--- a/src/examples/net_test_ode_1.cpp
+++ b/src/examples/net_test_ode_1.cpp
@@ -232,7 +232,90 @@ double eval_error_function(std::vector<double> &parameters, size_t n_inner_neuro
    return output;
 }

-void test_analytical_gradient_y(std::vector<double> &guess, double accuracy, size_t n_inner_neurons, size_t train_size, bool opti_w, bool opti_b, double d1_s, double d1_e,
+void eval_step_size_simple(double &gamma, double val, double prev_val, double sk, double grad_norm, double grad_norm_prev){
+
+    if(val > prev_val){
+        gamma *= 0.99999;
+    }
+
+    if(sk <= 1e-3 || grad_norm < grad_norm_prev){
+        /* movement on a line */
+        /* new slope is less steep, speed up */
+        gamma *= 1.0005;
+    }
+    else if(grad_norm > grad_norm_prev){
+        /* new slope is more steep, slow down*/
+        gamma /= 1.0005;
+    }
+    else{
+        gamma /= 1.005;
+    }
+//        gamma *= 0.999999;
+}
+
+void eval_step_size_mk( double &gamma, double beta, double &c, double grad_norm_prev, double grad_norm, double fi, double fim ){
+
+    if( fi > fim )
+    {
+        c /= 1.0000005;
+    }
+    else if( fi < fim )
+    {
+        c *= 1.0000005;
+    }
+
+    gamma *= std::pow( c, 1.0 - 2.0 * beta) * std::pow( grad_norm_prev / grad_norm, 1.0 / c );
+
+
+}
+
+
+double calculate_gradient( std::vector<double> &data_points, size_t n_inner_neurons, std::vector<double> *parameters, std::vector<double> *gradient ){
+    size_t i, j;
+    double x,  mem, derror, total_error, approx;
+
+    size_t train_size = data_points.size();
+
+    /* error boundary condition: y(0) = 1 => e1 = (1 - y(0))^2 */
+    x = 0.0;
+    mem = (1.0 - eval_approx_f(x, n_inner_neurons, *parameters));
+    derror = 2.0 * mem;
+    total_error = mem * mem;
+    for(i = 0; i < n_inner_neurons; ++i){
+        (*gradient)[3 * i] -= derror * eval_approx_dw_f(x, i, *parameters);
+        (*gradient)[3 * i + 1] -= derror * eval_approx_da_f(x, i, *parameters);
+        (*gradient)[3 * i + 2] -= derror * eval_approx_db_f(x, i, *parameters);
+    }
+
+    /* error boundary condition: y'(0) = 1 => e2 = (1 - y'(0))^2 */
+    mem = (1.0 - eval_approx_df(x, n_inner_neurons, *parameters));
+    derror = 2.0 * mem;
+    total_error += mem * mem;
+    for(i = 0; i < n_inner_neurons; ++i){
+        (*gradient)[3 * i] -= derror * eval_approx_dw_df(x, i, *parameters);
+        (*gradient)[3 * i + 1] -= derror * eval_approx_da_df(x, i, *parameters);
+        (*gradient)[3 * i + 2] -= derror * eval_approx_db_df(x, i, *parameters);
+    }
+
+    for(j = 0; j < data_points.size(); ++j){
+        x = data_points[j];
+        /* error of the governing equation: y''(x) + 4y'(x) + 4y(x) = 0 => e3 = 1/n * (0 - y''(x) - 4y'(x) - 4y(x))^2 */
+        approx= eval_approx_ddf(x, n_inner_neurons, *parameters) + 4.0 * eval_approx_df(x, n_inner_neurons, *parameters) + 4.0 * eval_approx_f(x, n_inner_neurons, *parameters);
+        mem = 0.0 - approx;
+        derror = 2.0 * mem / train_size;
+        for(i = 0; i < n_inner_neurons; ++i){
+            (*gradient)[3 * i] -= derror * (eval_approx_dw_ddf(x, i, *parameters) + 4.0 * eval_approx_dw_df(x, i, *parameters) + 4.0 * eval_approx_dw_f(x, i, *parameters));
+            (*gradient)[3 * i + 1] -= derror * (eval_approx_da_ddf(x, i, *parameters) + 4.0 * eval_approx_da_df(x, i, *parameters) + 4.0 * eval_approx_da_f(x, i, *parameters));
+            (*gradient)[3 * i + 2] -= derror * (eval_approx_db_ddf(x, i, *parameters) + 4.0 * eval_approx_db_df(x, i, *parameters) + 4.0 * eval_approx_db_f(x, i, *parameters));
+        }
+        total_error += mem * mem / train_size;
+    }
+
+    return total_error;
+}
+
+
+void test_analytical_gradient_y(std::vector<double> &guess, double accuracy, size_t n_inner_neurons, size_t train_size, double d1_s, double d1_e,
                                size_t test_size, double ts, double te) {

    /* SETUP OF THE TRAINING DATA */
@@ -288,171 +371,60 @@ void test_analytical_gradient_y(std::vector<double> &guess, double accuracy, siz
    }

    gamma = 1.0;
+    double prev_val, val = 0.0, c = 2.0;
    while( grad_norm > accuracy) {
        iter_idx++;
+        prev_val = val;
+        grad_norm_prev = grad_norm;

-        /* current gradient */
+        /* reset of the current gradient */
        std::fill(gradient_current->begin(), gradient_current->end(), 0.0);
+        val = calculate_gradient( data_points, n_inner_neurons, params_current, gradient_current );

-        /* error boundary condition: y(0) = 1 => e1 = (1 - y(0))^2 */
-        xj = 0.0;
-        mem = (1.0 - eval_approx_f(xj, n_inner_neurons, *params_current));
-        derror = 2.0 * mem;
-        total_error = mem * mem;
-        for(i = 0; i < n_inner_neurons; ++i){
-            if(opti_w){
-                (*gradient_current)[3 * i] -= derror * eval_approx_dw_f(xj, i, *params_current);
-                (*gradient_current)[3 * i + 1] -= derror * eval_approx_da_f(xj, i, *params_current);
-            }
-            if(opti_b){
-                (*gradient_current)[3 * i + 2] -= derror * eval_approx_db_f(xj, i, *params_current);
-            }
-        }
-//        for(auto e: *gradient_current){
-//            printf("[%10.8f]", e);
-//        }
-//        printf("\n");
-
-        /* error boundary condition: y'(0) = 1 => e2 = (1 - y'(0))^2 */
-        mem = (1.0 - eval_approx_df(xj, n_inner_neurons, *params_current));
-        derror = 2.0 * mem;
-        total_error += mem * mem;
-        for(i = 0; i < n_inner_neurons; ++i){
-            if(opti_w){
-                (*gradient_current)[3 * i] -= derror * eval_approx_dw_df(xj, i, *params_current);
-                (*gradient_current)[3 * i + 1] -= derror * eval_approx_da_df(xj, i, *params_current);
-            }
-            if(opti_b){
-                (*gradient_current)[3 * i + 2] -= derror * eval_approx_db_df(xj, i, *params_current);
-            }
-        }
-//        for(auto e: *gradient_current){
-//            printf("[%10.8f]", e);
-//        }
-//        printf("\n");
-
-        for(j = 0; j < data_points.size(); ++j){
-            xj = data_points[j];
-//            xj = ds.get_data()->at(j).first[0];
-            /* error of the governing equation: y''(x) + 4y'(x) + 4y(x) = 0 => e3 = 1/n * (0 - y''(x) - 4y'(x) - 4y(x))^2 */
-            approx= eval_approx_ddf(xj, n_inner_neurons, *params_current) + 4.0 * eval_approx_df(xj, n_inner_neurons, *params_current) + 4.0 * eval_approx_f(xj, n_inner_neurons, *params_current);
-            mem = 0.0 - approx;
-            error = 2.0 * mem / train_size;
-            for(i = 0; i < n_inner_neurons; ++i){
-                if(opti_w){
-                    (*gradient_current)[3 * i] -= error * (eval_approx_dw_ddf(xj, i, *params_current) + 4.0 * eval_approx_dw_df(xj, i, *params_current) + 4.0 * eval_approx_dw_f(xj, i, *params_current));
-                    (*gradient_current)[3 * i + 1] -= error * (eval_approx_da_ddf(xj, i, *params_current) + 4.0 * eval_approx_da_df(xj, i, *params_current) + 4.0 * eval_approx_da_f(xj, i, *params_current));
-                }
-                if(opti_b){
-                    (*gradient_current)[3 * i + 2] -= error * (eval_approx_db_ddf(xj, i, *params_current) + 4.0 * eval_approx_db_df(xj, i, *params_current) + 4.0 * eval_approx_db_f(xj, i, *params_current));
-                }
-            }
-            total_error += mem * mem / train_size;
+        grad_norm = 0.0;
+        for(auto v: *gradient_current){
+            grad_norm += v * v;
        }
-
-
-//        for(auto e: *gradient_current){
-//            printf("[%10.8f]", e);
-//        }
-//        printf("\n");
-
-        /* conjugate direction coefficient (Polak-Ribiere): <-grad_curr, -grad_curr + grad_prev> / <-grad_prev, -grad_prev >*/
+        grad_norm = std::sqrt(grad_norm);

        /* Update of the parameters */
-
-        if(iter_idx < 10 || iter_idx % 1 == 0){
-            for(i = 0; i < conjugate_direction_current->size(); ++i){
-                (*conjugate_direction_current)[i] = - (*gradient_current)[i];
-            }
-        }
-        else{
-            /* conjugate gradient */
-            sk = sy = 0.0;
-            for(i = 0; i < conjugate_direction_current->size(); ++i){
-                sk += (*gradient_current)[i] * ((*gradient_current)[i] - (*gradient_prev)[i]);
-                sy += (*gradient_prev)[i] * (*gradient_prev)[i];
-            }
-            beta = std::max(0.0, sk / sy);
-
-            /* update of the conjugate direction */
-            for(i = 0; i < conjugate_direction_current->size(); ++i){
-                (*conjugate_direction_current)[i] = beta * (*conjugate_direction_prev)[i] - (*gradient_current)[i];
-            }
-        }
-
        /* step length calculation */
-        if(iter_idx < 10){
+        if(iter_idx < 10 || iter_idx % 100 == 0){
            /* fixed step length */
-            gamma = 0.000001;
+            gamma = 0.1 * accuracy;
        }
        else{
-//            /* Barzilai-Borwein */
-//            sk = sy = 0.0;
-//
-//            for(i = 0; i < gradient_current->size(); ++i){
-//                sx = (*params_current)[i] - (*params_prev)[i];
-//                sg = (*conjugate_direction_current)[i] - (*conjugate_direction_prev)[i];
-//
-//                sk += sx * sg;
-//                sy += sg * sg;
-//            }
-//
-//            gamma = -sk / sy;

+            /* norm of the gradient calculation */

+            sk = 0.0;
+            for(i = 0; i < gradient_current->size(); ++i){
+                sx = (*gradient_current)[i] - (*gradient_prev)[i];
+                sk += sx * sx;
+            }
+            sk = std::sqrt(sk);

+            /* angle between two consecutive gradients */
+            double beta = 0.0, sx = 0.0;
+            for(i = 0; i < gradient_current->size(); ++i){
+                sx += (gradient_current->at( i ) * gradient_prev->at( i ));
+            }
+            sx /= grad_norm * grad_norm_prev;
+            beta = std::sqrt(std::acos( sx ) / PI);

-//            /* Line search */
-//
-//            gamma = line_search(10.0, conjugate_direction, *params_current, *gradient_current, n_inner_neurons, ds_00);
+
+//            eval_step_size_simple( gamma, val, prev_val, sk, grad_norm, grad_norm_prev );
+            eval_step_size_mk( gamma, beta, c, grad_norm_prev, grad_norm, val, prev_val );
        }


-//        for(auto e: conjugate_direction){
-//            printf("[%10.8f]", e);
-//        }
-//        printf("\n");
-//
-//        for(auto e: *params_current){
-//            printf("[%10.8f]", e);
-//        }
-//        printf("\n");

-        /* norm of the gradient calculation */
-        grad_norm_prev = grad_norm;
-        /* adaptive step-length */
-        sk = 0.0;
-        for(i = 0; i < gradient_current->size(); ++i){
-            sx = (*gradient_current)[i] - (*gradient_prev)[i];
-            sk += sx * sx;
-        }
-        sk = std::sqrt(sk);

-        if(sk <= 1e-3 || grad_norm < grad_norm_prev){
-            /* movement on a line */
-            /* new slope is less steep, speed up */
-            gamma *= 1.0005;
-        }
-        else if(grad_norm > grad_norm_prev){
-            /* new slope is more steep, slow down*/
-            gamma /= 1.0005;
-        }
-        else{
-            gamma /= 1.005;
-        }
-        grad_norm = 0.0;
-        for(auto v: *gradient_current){
-            grad_norm += v * v;
-        }
-        grad_norm = std::sqrt(grad_norm);


        for(i = 0; i < gradient_current->size(); ++i){
-//            (*params_prev)[i] = (*params_current)[i] - gamma * (*gradient_current)[i];
-            (*params_prev)[i] = (*params_current)[i] + gamma * (*conjugate_direction_current)[i];
+            (*params_prev)[i] = (*params_current)[i] - gamma * (*gradient_current)[i];
        }
-//        printf("\n");
-

        /* switcheroo */
        ptr_mem = gradient_prev;
@@ -463,34 +435,13 @@ void test_analytical_gradient_y(std::vector<double> &guess, double accuracy, siz
        params_prev = params_current;
        params_current = ptr_mem;

-        ptr_mem = conjugate_direction_prev;
-        conjugate_direction_prev = conjugate_direction_current;
-        conjugate_direction_current = ptr_mem;

-
-
-//        for (i = 0; i < n_inner_neurons; ++i) {
-//            wi = (*params_current)[3 * i];
-//            ai = (*params_current)[3 * i + 1];
-//            bi = (*params_current)[3 * i + 2];
-//
-//            printf("Path %3d. w = %15.8f, b = %15.8f, a = %15.8f\n", i + 1, wi, bi, ai);
-//        }
-
-        if(iter_idx % 100 == 0){
-            printf("Iteration %12d. Step size: %15.8f, Gradient norm: %15.8f. Total error: %10.8f (%10.8f)\r", (int)iter_idx, gamma, grad_norm, total_error, eval_error_function(*params_prev, n_inner_neurons, data_points));
-//            for (i = 0; i < n_inner_neurons; ++i) {
-//                wi = (*params_current)[3 * i];
-//                ai = (*params_current)[3 * i + 1];
-//                bi = (*params_current)[3 * i + 2];
-//
-//                printf("Path %3d. w = %15.8f, b = %15.8f, a = %15.8f\n", i + 1, wi, bi, ai);
-//            }
-//            std::cout << "-----------------------------" << std::endl;
+        if(iter_idx % 1 == 0){
+            printf("Iteration %12d. Step size: %15.8f, C: %15.8f, Gradient norm: %15.8f. Total error: %10.8f\r", (int)iter_idx, gamma, c, grad_norm, val);
            std::cout.flush();
        }
    }
-    printf("Iteration %12d. Step size: %15.8f, Gradient norm: %15.8f. Total error: %10.8f\r\n",(int) iter_idx, gamma, grad_norm, eval_error_function(*params_current, n_inner_neurons, data_points));
+    printf("Iteration %12d. Step size: %15.8f, C: %15.8f, Gradient norm: %15.8f. Total error: %10.8f\r\n", (int)iter_idx, gamma, c, grad_norm, val);
    std::cout.flush();

    for (i = 0; i < n_inner_neurons; ++i) {
@@ -719,35 +670,25 @@ int main() {
    double ts = ds;
    double te = de + 2;

-    size_t particle_swarm_max_iters = 1000;
-    size_t n_particles = 100;
-    test_ode(accuracy, n_inner_neurons, train_size, ds, de, test_size, ts, te, particle_swarm_max_iters, n_particles);
+//    size_t particle_swarm_max_iters = 1000;
+//    size_t n_particles = 100;
+//    test_ode(accuracy, n_inner_neurons, train_size, ds, de, test_size, ts, te, particle_swarm_max_iters, n_particles);

-//    bool optimize_weights = true;
-//    bool optimize_biases = true;
-//
-////    std::vector<double> init_guess = {0.35088209, -0.23738505, 0.14160885, 3.72785473, -6.45758308, 1.73769138};
-//    std::vector<double> init_guess(3 * n_inner_neurons);
-//
-//    std::random_device seeder;
-//    std::mt19937 gen(seeder());
-//    std::uniform_real_distribution<double> dist(-10.0, 10.0);
-//    for(unsigned int i = 0; i < init_guess.size(); ++i){
-//        init_guess[i] = dist(gen);
-//    }
-//    if(!optimize_biases){
-//        for(unsigned int i = 0; i < n_inner_neurons; ++i){
-//            init_guess[3 * i + 2] = 0.0;
-//        }
-//    }
-//    if(!optimize_weights){
-//        for(unsigned int i = 0; i < n_inner_neurons; ++i){
-//            init_guess[3 * i] = 0.0;
-//            init_guess[3 * i + 1] = 0.0;
-//        }
-//    }
-//
-//    test_analytical_gradient_y(init_guess, accuracy, n_inner_neurons, train_size, optimize_weights, optimize_biases, ds, de, test_size, ts, te);
+    bool optimize_weights = true;
+    bool optimize_biases = true;
+
+//    std::vector<double> init_guess = {0.35088209, -0.23738505, 0.14160885, 3.72785473, -6.45758308, 1.73769138};
+    std::vector<double> init_guess(3 * n_inner_neurons);
+
+    std::random_device seeder;
+    std::mt19937 gen(seeder());
+    std::uniform_real_distribution<double> dist(-1.0, 1.0);
+    for(unsigned int i = 0; i < init_guess.size(); ++i){
+        init_guess[i] = dist(gen);
+    }
+
+
+    test_analytical_gradient_y(init_guess, accuracy, n_inner_neurons, train_size, ds, de, test_size, ts, te);


    return 0;

--- a/src/examples/net_test_pde_1.cpp
+++ b/src/examples/net_test_pde_1.cpp