From 312a43ca2a5e30db1cd9f1484d03c49cf820eb06 Mon Sep 17 00:00:00 2001 From: Michal Kravcenko <michal.kravcenko@vsb.cz> Date: Tue, 14 Aug 2018 12:20:41 +0200 Subject: [PATCH] MOD: modified differential equation examples --- src/examples/net_test_ode_1.cpp | 327 +++++++++++---------------- src/examples/net_test_pde_1.cpp | 380 ++++++++++++++++++++------------ 2 files changed, 367 insertions(+), 340 deletions(-) diff --git a/src/examples/net_test_ode_1.cpp b/src/examples/net_test_ode_1.cpp index 4d1e309f..c7b072d9 100644 --- a/src/examples/net_test_ode_1.cpp +++ b/src/examples/net_test_ode_1.cpp @@ -232,7 +232,90 @@ double eval_error_function(std::vector<double> ¶meters, size_t n_inner_neuro return output; } -void test_analytical_gradient_y(std::vector<double> &guess, double accuracy, size_t n_inner_neurons, size_t train_size, bool opti_w, bool opti_b, double d1_s, double d1_e, +void eval_step_size_simple(double &gamma, double val, double prev_val, double sk, double grad_norm, double grad_norm_prev){ + + if(val > prev_val){ + gamma *= 0.99999; + } + + if(sk <= 1e-3 || grad_norm < grad_norm_prev){ + /* movement on a line */ + /* new slope is less steep, speed up */ + gamma *= 1.0005; + } + else if(grad_norm > grad_norm_prev){ + /* new slope is more steep, slow down*/ + gamma /= 1.0005; + } + else{ + gamma /= 1.005; + } +// gamma *= 0.999999; +} + +void eval_step_size_mk( double &gamma, double beta, double &c, double grad_norm_prev, double grad_norm, double fi, double fim ){ + + if( fi > fim ) + { + c /= 1.0000005; + } + else if( fi < fim ) + { + c *= 1.0000005; + } + + gamma *= std::pow( c, 1.0 - 2.0 * beta) * std::pow( grad_norm_prev / grad_norm, 1.0 / c ); + + +} + + +double calculate_gradient( std::vector<double> &data_points, size_t n_inner_neurons, std::vector<double> *parameters, std::vector<double> *gradient ){ + size_t i, j; + double x, mem, derror, total_error, approx; + + size_t train_size = data_points.size(); + + /* error boundary condition: y(0) = 1 => e1 = (1 - y(0))^2 */ + x = 0.0; + mem = (1.0 - eval_approx_f(x, n_inner_neurons, *parameters)); + derror = 2.0 * mem; + total_error = mem * mem; + for(i = 0; i < n_inner_neurons; ++i){ + (*gradient)[3 * i] -= derror * eval_approx_dw_f(x, i, *parameters); + (*gradient)[3 * i + 1] -= derror * eval_approx_da_f(x, i, *parameters); + (*gradient)[3 * i + 2] -= derror * eval_approx_db_f(x, i, *parameters); + } + + /* error boundary condition: y'(0) = 1 => e2 = (1 - y'(0))^2 */ + mem = (1.0 - eval_approx_df(x, n_inner_neurons, *parameters)); + derror = 2.0 * mem; + total_error += mem * mem; + for(i = 0; i < n_inner_neurons; ++i){ + (*gradient)[3 * i] -= derror * eval_approx_dw_df(x, i, *parameters); + (*gradient)[3 * i + 1] -= derror * eval_approx_da_df(x, i, *parameters); + (*gradient)[3 * i + 2] -= derror * eval_approx_db_df(x, i, *parameters); + } + + for(j = 0; j < data_points.size(); ++j){ + x = data_points[j]; + /* error of the governing equation: y''(x) + 4y'(x) + 4y(x) = 0 => e3 = 1/n * (0 - y''(x) - 4y'(x) - 4y(x))^2 */ + approx= eval_approx_ddf(x, n_inner_neurons, *parameters) + 4.0 * eval_approx_df(x, n_inner_neurons, *parameters) + 4.0 * eval_approx_f(x, n_inner_neurons, *parameters); + mem = 0.0 - approx; + derror = 2.0 * mem / train_size; + for(i = 0; i < n_inner_neurons; ++i){ + (*gradient)[3 * i] -= derror * (eval_approx_dw_ddf(x, i, *parameters) + 4.0 * eval_approx_dw_df(x, i, *parameters) + 4.0 * eval_approx_dw_f(x, i, *parameters)); + (*gradient)[3 * i + 1] -= derror * (eval_approx_da_ddf(x, i, *parameters) + 4.0 * eval_approx_da_df(x, i, *parameters) + 4.0 * eval_approx_da_f(x, i, *parameters)); + (*gradient)[3 * i + 2] -= derror * (eval_approx_db_ddf(x, i, *parameters) + 4.0 * eval_approx_db_df(x, i, *parameters) + 4.0 * eval_approx_db_f(x, i, *parameters)); + } + total_error += mem * mem / train_size; + } + + return total_error; +} + + +void test_analytical_gradient_y(std::vector<double> &guess, double accuracy, size_t n_inner_neurons, size_t train_size, double d1_s, double d1_e, size_t test_size, double ts, double te) { /* SETUP OF THE TRAINING DATA */ @@ -288,171 +371,60 @@ void test_analytical_gradient_y(std::vector<double> &guess, double accuracy, siz } gamma = 1.0; + double prev_val, val = 0.0, c = 2.0; while( grad_norm > accuracy) { iter_idx++; + prev_val = val; + grad_norm_prev = grad_norm; - /* current gradient */ + /* reset of the current gradient */ std::fill(gradient_current->begin(), gradient_current->end(), 0.0); + val = calculate_gradient( data_points, n_inner_neurons, params_current, gradient_current ); - /* error boundary condition: y(0) = 1 => e1 = (1 - y(0))^2 */ - xj = 0.0; - mem = (1.0 - eval_approx_f(xj, n_inner_neurons, *params_current)); - derror = 2.0 * mem; - total_error = mem * mem; - for(i = 0; i < n_inner_neurons; ++i){ - if(opti_w){ - (*gradient_current)[3 * i] -= derror * eval_approx_dw_f(xj, i, *params_current); - (*gradient_current)[3 * i + 1] -= derror * eval_approx_da_f(xj, i, *params_current); - } - if(opti_b){ - (*gradient_current)[3 * i + 2] -= derror * eval_approx_db_f(xj, i, *params_current); - } - } -// for(auto e: *gradient_current){ -// printf("[%10.8f]", e); -// } -// printf("\n"); - - /* error boundary condition: y'(0) = 1 => e2 = (1 - y'(0))^2 */ - mem = (1.0 - eval_approx_df(xj, n_inner_neurons, *params_current)); - derror = 2.0 * mem; - total_error += mem * mem; - for(i = 0; i < n_inner_neurons; ++i){ - if(opti_w){ - (*gradient_current)[3 * i] -= derror * eval_approx_dw_df(xj, i, *params_current); - (*gradient_current)[3 * i + 1] -= derror * eval_approx_da_df(xj, i, *params_current); - } - if(opti_b){ - (*gradient_current)[3 * i + 2] -= derror * eval_approx_db_df(xj, i, *params_current); - } - } -// for(auto e: *gradient_current){ -// printf("[%10.8f]", e); -// } -// printf("\n"); - - for(j = 0; j < data_points.size(); ++j){ - xj = data_points[j]; -// xj = ds.get_data()->at(j).first[0]; - /* error of the governing equation: y''(x) + 4y'(x) + 4y(x) = 0 => e3 = 1/n * (0 - y''(x) - 4y'(x) - 4y(x))^2 */ - approx= eval_approx_ddf(xj, n_inner_neurons, *params_current) + 4.0 * eval_approx_df(xj, n_inner_neurons, *params_current) + 4.0 * eval_approx_f(xj, n_inner_neurons, *params_current); - mem = 0.0 - approx; - error = 2.0 * mem / train_size; - for(i = 0; i < n_inner_neurons; ++i){ - if(opti_w){ - (*gradient_current)[3 * i] -= error * (eval_approx_dw_ddf(xj, i, *params_current) + 4.0 * eval_approx_dw_df(xj, i, *params_current) + 4.0 * eval_approx_dw_f(xj, i, *params_current)); - (*gradient_current)[3 * i + 1] -= error * (eval_approx_da_ddf(xj, i, *params_current) + 4.0 * eval_approx_da_df(xj, i, *params_current) + 4.0 * eval_approx_da_f(xj, i, *params_current)); - } - if(opti_b){ - (*gradient_current)[3 * i + 2] -= error * (eval_approx_db_ddf(xj, i, *params_current) + 4.0 * eval_approx_db_df(xj, i, *params_current) + 4.0 * eval_approx_db_f(xj, i, *params_current)); - } - } - total_error += mem * mem / train_size; + grad_norm = 0.0; + for(auto v: *gradient_current){ + grad_norm += v * v; } - - -// for(auto e: *gradient_current){ -// printf("[%10.8f]", e); -// } -// printf("\n"); - - /* conjugate direction coefficient (Polak-Ribiere): <-grad_curr, -grad_curr + grad_prev> / <-grad_prev, -grad_prev >*/ + grad_norm = std::sqrt(grad_norm); /* Update of the parameters */ - - if(iter_idx < 10 || iter_idx % 1 == 0){ - for(i = 0; i < conjugate_direction_current->size(); ++i){ - (*conjugate_direction_current)[i] = - (*gradient_current)[i]; - } - } - else{ - /* conjugate gradient */ - sk = sy = 0.0; - for(i = 0; i < conjugate_direction_current->size(); ++i){ - sk += (*gradient_current)[i] * ((*gradient_current)[i] - (*gradient_prev)[i]); - sy += (*gradient_prev)[i] * (*gradient_prev)[i]; - } - beta = std::max(0.0, sk / sy); - - /* update of the conjugate direction */ - for(i = 0; i < conjugate_direction_current->size(); ++i){ - (*conjugate_direction_current)[i] = beta * (*conjugate_direction_prev)[i] - (*gradient_current)[i]; - } - } - /* step length calculation */ - if(iter_idx < 10){ + if(iter_idx < 10 || iter_idx % 100 == 0){ /* fixed step length */ - gamma = 0.000001; + gamma = 0.1 * accuracy; } else{ -// /* Barzilai-Borwein */ -// sk = sy = 0.0; -// -// for(i = 0; i < gradient_current->size(); ++i){ -// sx = (*params_current)[i] - (*params_prev)[i]; -// sg = (*conjugate_direction_current)[i] - (*conjugate_direction_prev)[i]; -// -// sk += sx * sg; -// sy += sg * sg; -// } -// -// gamma = -sk / sy; + /* norm of the gradient calculation */ + sk = 0.0; + for(i = 0; i < gradient_current->size(); ++i){ + sx = (*gradient_current)[i] - (*gradient_prev)[i]; + sk += sx * sx; + } + sk = std::sqrt(sk); + /* angle between two consecutive gradients */ + double beta = 0.0, sx = 0.0; + for(i = 0; i < gradient_current->size(); ++i){ + sx += (gradient_current->at( i ) * gradient_prev->at( i )); + } + sx /= grad_norm * grad_norm_prev; + beta = std::sqrt(std::acos( sx ) / PI); -// /* Line search */ -// -// gamma = line_search(10.0, conjugate_direction, *params_current, *gradient_current, n_inner_neurons, ds_00); + +// eval_step_size_simple( gamma, val, prev_val, sk, grad_norm, grad_norm_prev ); + eval_step_size_mk( gamma, beta, c, grad_norm_prev, grad_norm, val, prev_val ); } -// for(auto e: conjugate_direction){ -// printf("[%10.8f]", e); -// } -// printf("\n"); -// -// for(auto e: *params_current){ -// printf("[%10.8f]", e); -// } -// printf("\n"); - /* norm of the gradient calculation */ - grad_norm_prev = grad_norm; - /* adaptive step-length */ - sk = 0.0; - for(i = 0; i < gradient_current->size(); ++i){ - sx = (*gradient_current)[i] - (*gradient_prev)[i]; - sk += sx * sx; - } - sk = std::sqrt(sk); - if(sk <= 1e-3 || grad_norm < grad_norm_prev){ - /* movement on a line */ - /* new slope is less steep, speed up */ - gamma *= 1.0005; - } - else if(grad_norm > grad_norm_prev){ - /* new slope is more steep, slow down*/ - gamma /= 1.0005; - } - else{ - gamma /= 1.005; - } - grad_norm = 0.0; - for(auto v: *gradient_current){ - grad_norm += v * v; - } - grad_norm = std::sqrt(grad_norm); for(i = 0; i < gradient_current->size(); ++i){ -// (*params_prev)[i] = (*params_current)[i] - gamma * (*gradient_current)[i]; - (*params_prev)[i] = (*params_current)[i] + gamma * (*conjugate_direction_current)[i]; + (*params_prev)[i] = (*params_current)[i] - gamma * (*gradient_current)[i]; } -// printf("\n"); - /* switcheroo */ ptr_mem = gradient_prev; @@ -463,34 +435,13 @@ void test_analytical_gradient_y(std::vector<double> &guess, double accuracy, siz params_prev = params_current; params_current = ptr_mem; - ptr_mem = conjugate_direction_prev; - conjugate_direction_prev = conjugate_direction_current; - conjugate_direction_current = ptr_mem; - - -// for (i = 0; i < n_inner_neurons; ++i) { -// wi = (*params_current)[3 * i]; -// ai = (*params_current)[3 * i + 1]; -// bi = (*params_current)[3 * i + 2]; -// -// printf("Path %3d. w = %15.8f, b = %15.8f, a = %15.8f\n", i + 1, wi, bi, ai); -// } - - if(iter_idx % 100 == 0){ - printf("Iteration %12d. Step size: %15.8f, Gradient norm: %15.8f. Total error: %10.8f (%10.8f)\r", (int)iter_idx, gamma, grad_norm, total_error, eval_error_function(*params_prev, n_inner_neurons, data_points)); -// for (i = 0; i < n_inner_neurons; ++i) { -// wi = (*params_current)[3 * i]; -// ai = (*params_current)[3 * i + 1]; -// bi = (*params_current)[3 * i + 2]; -// -// printf("Path %3d. w = %15.8f, b = %15.8f, a = %15.8f\n", i + 1, wi, bi, ai); -// } -// std::cout << "-----------------------------" << std::endl; + if(iter_idx % 1 == 0){ + printf("Iteration %12d. Step size: %15.8f, C: %15.8f, Gradient norm: %15.8f. Total error: %10.8f\r", (int)iter_idx, gamma, c, grad_norm, val); std::cout.flush(); } } - printf("Iteration %12d. Step size: %15.8f, Gradient norm: %15.8f. Total error: %10.8f\r\n",(int) iter_idx, gamma, grad_norm, eval_error_function(*params_current, n_inner_neurons, data_points)); + printf("Iteration %12d. Step size: %15.8f, C: %15.8f, Gradient norm: %15.8f. Total error: %10.8f\r\n", (int)iter_idx, gamma, c, grad_norm, val); std::cout.flush(); for (i = 0; i < n_inner_neurons; ++i) { @@ -719,35 +670,25 @@ int main() { double ts = ds; double te = de + 2; - size_t particle_swarm_max_iters = 1000; - size_t n_particles = 100; - test_ode(accuracy, n_inner_neurons, train_size, ds, de, test_size, ts, te, particle_swarm_max_iters, n_particles); +// size_t particle_swarm_max_iters = 1000; +// size_t n_particles = 100; +// test_ode(accuracy, n_inner_neurons, train_size, ds, de, test_size, ts, te, particle_swarm_max_iters, n_particles); -// bool optimize_weights = true; -// bool optimize_biases = true; -// -//// std::vector<double> init_guess = {0.35088209, -0.23738505, 0.14160885, 3.72785473, -6.45758308, 1.73769138}; -// std::vector<double> init_guess(3 * n_inner_neurons); -// -// std::random_device seeder; -// std::mt19937 gen(seeder()); -// std::uniform_real_distribution<double> dist(-10.0, 10.0); -// for(unsigned int i = 0; i < init_guess.size(); ++i){ -// init_guess[i] = dist(gen); -// } -// if(!optimize_biases){ -// for(unsigned int i = 0; i < n_inner_neurons; ++i){ -// init_guess[3 * i + 2] = 0.0; -// } -// } -// if(!optimize_weights){ -// for(unsigned int i = 0; i < n_inner_neurons; ++i){ -// init_guess[3 * i] = 0.0; -// init_guess[3 * i + 1] = 0.0; -// } -// } -// -// test_analytical_gradient_y(init_guess, accuracy, n_inner_neurons, train_size, optimize_weights, optimize_biases, ds, de, test_size, ts, te); + bool optimize_weights = true; + bool optimize_biases = true; + +// std::vector<double> init_guess = {0.35088209, -0.23738505, 0.14160885, 3.72785473, -6.45758308, 1.73769138}; + std::vector<double> init_guess(3 * n_inner_neurons); + + std::random_device seeder; + std::mt19937 gen(seeder()); + std::uniform_real_distribution<double> dist(-1.0, 1.0); + for(unsigned int i = 0; i < init_guess.size(); ++i){ + init_guess[i] = dist(gen); + } + + + test_analytical_gradient_y(init_guess, accuracy, n_inner_neurons, train_size, ds, de, test_size, ts, te); return 0; diff --git a/src/examples/net_test_pde_1.cpp b/src/examples/net_test_pde_1.cpp index e85304fc..534369ae 100644 --- a/src/examples/net_test_pde_1.cpp +++ b/src/examples/net_test_pde_1.cpp @@ -42,6 +42,59 @@ double eval_approx_y(double x, double t, size_t n_inner_neurons, std::vector<dou } return value; } +//yt(x, t) = ... (ai * wti * e^(bi - wti * t - wxi * x))/(e^(bi - wti * t - wxi * x) + 1)^2 +double eval_approx_yt(double x, double t, size_t n_inner_neurons, std::vector<double> ¶meters){ + double value= 0.0, wxi, wti, ai, bi, ei, ei1; + + for(size_t i = 0; i < n_inner_neurons; ++i){ + + wxi = parameters[4 * i + 0]; + wti = parameters[4 * i + 1]; + ai = parameters[4 * i + 2]; + bi = parameters[4 * i + 3]; + + ei = std::pow(E, bi - wxi * x - wti * t); + ei1 = ei + 1.0; + + value += ai * wti * ei / (ei1 * ei1); + } + return value; +} +//yx(x, t) = ... (ai * wxi * e^(bi - t * wti - wxi * x))/(e^(bi - t * wti - wxi * x) + 1)^2 +double eval_approx_yx(double x, double t, size_t n_inner_neurons, std::vector<double> ¶meters){ + double value= 0.0, wxi, wti, ai, bi, ei, ei1; + + for(size_t i = 0; i < n_inner_neurons; ++i){ + + wxi = parameters[4 * i + 0]; + wti = parameters[4 * i + 1]; + ai = parameters[4 * i + 2]; + bi = parameters[4 * i + 3]; + + ei = std::pow(E, bi - wxi * x - wti * t); + ei1 = ei + 1.0; + + value += (ai * wxi * ei1)/(ei1 * ei1); + } + return value; +} +//yxx(x, t) = ... (ai * wxi * e^(bi - t * wti - wxi * x))/(e^(bi - t * wti - wxi * x) + 1)^2 +double eval_approx_yxx(double x, double t, size_t n_inner_neurons, std::vector<double> ¶meters){ + double value= 0.0, wxi, wti, ai, bi, ei, ei1; + for(size_t i = 0; i < n_inner_neurons; ++i){ + + wxi = parameters[4 * i + 0]; + wti = parameters[4 * i + 1]; + ai = parameters[4 * i + 2]; + bi = parameters[4 * i + 3]; + + ei = std::pow(E, bi - wxi * x - wti * t); + ei1 = ei + 1.0; + + value += (2 * ai * wxi * wxi * ei * ei) / (ei1 * ei1 * ei1) - (ai * wxi * wxi * ei) / (ei1 * ei1); + } + return value; +} double eval_approx_da_y(double x, double t, size_t neuron_idx, std::vector<double> ¶meters){ double wxi, wti, bi, ei, ei1; @@ -95,25 +148,6 @@ double eval_approx_db_y(double x, double t, size_t neuron_idx, std::vector<doubl return -(ai * ei)/(ei1 * ei1); } -//yt(x, t) = ... (ai * wti * e^(bi - wti * t - wxi * x))/(e^(bi - wti * t - wxi * x) + 1)^2 -double eval_approx_yt(double x, double t, size_t n_inner_neurons, std::vector<double> ¶meters){ - double value= 0.0, wxi, wti, ai, bi, ei, ei1; - - for(size_t i = 0; i < n_inner_neurons; ++i){ - - wxi = parameters[4 * i + 0]; - wti = parameters[4 * i + 1]; - ai = parameters[4 * i + 2]; - bi = parameters[4 * i + 3]; - - ei = std::pow(E, bi - wxi * x - wti * t); - ei1 = ei + 1.0; - - value += ai * wti * ei / (ei1 * ei1); - } - return value; -} - double eval_approx_da_yt(double x, double t, size_t neuron_idx, std::vector<double> ¶meters){ double wxi, wti, bi, ei, ei1; @@ -166,23 +200,6 @@ double eval_approx_db_yt(double x, double t, size_t neuron_idx, std::vector<doub return (ai * wti * ei) / (ei1 * ei1) - (2 * ai * wti * ei * ei) / (ei1 * ei1 * ei1); } -double eval_approx_yxx(double x, double t, size_t n_inner_neurons, std::vector<double> ¶meters){ - double value= 0.0, wxi, wti, ai, bi, ei, ei1; - for(size_t i = 0; i < n_inner_neurons; ++i){ - - wxi = parameters[4 * i + 0]; - wti = parameters[4 * i + 1]; - ai = parameters[4 * i + 2]; - bi = parameters[4 * i + 3]; - - ei = std::pow(E, bi - wxi * x - wti * t); - ei1 = ei + 1.0; - - value += (2 * ai * wxi * wxi * ei * ei) / (ei1 * ei1 * ei1) - (ai * wxi * wxi * ei) / (ei1 * ei1); - } - return value; -} - double eval_approx_da_yxx(double x, double t, size_t neuron_idx, std::vector<double> ¶meters){ double wxi, wti, ai, bi, ei, ei1, ebp, eb, etx; wxi = parameters[4 * neuron_idx + 0]; @@ -247,7 +264,7 @@ double eval_approx_db_yxx(double x, double t, size_t neuron_idx, std::vector<dou return (ai * wxi * wxi * eb * ebp) / (ei1 * ei1 * ei1) - (ai * wxi * wxi * ebp * (etx - eb)) / (ei1 * ei1 * ei1) + (3 * ai * wxi * wxi * eb * ebp * (etx - eb)) / (ei1 * ei1 * ei1 * ei1); } -double get_step_size_simple(double gamma, double val, double prev_val, double sk, double grad_norm, double grad_norm_prev){ +void eval_step_size_simple(double &gamma, double val, double prev_val, double sk, double grad_norm, double grad_norm_prev){ if(val > prev_val){ gamma *= 0.99999; @@ -266,15 +283,85 @@ double get_step_size_simple(double gamma, double val, double prev_val, double sk gamma /= 1.005; } // gamma *= 0.999999; +} + +void eval_step_size_mk( double &gamma, double beta, double &c, double grad_norm_prev, double grad_norm, double fi, double fim ){ + + if( fi > fim ) + { + c /= 1.0000005; + } + else if( fi < fim ) + { + c *= 1.0000005; + } + gamma *= std::pow( c, 1.0 - 2.0 * beta) * std::pow( grad_norm_prev / grad_norm, 1.0 / c ); -// gamma = 0.000001; - return gamma; } -double get_step_size_mk(double gamma, double val, double prev_val, double sk, double grad_norm, double grad_norm_prev){ +double calculate_gradient( std::vector<double> &data_points, size_t n_inner_neurons, std::vector<double> *parameters, std::vector<double> *gradient ){ + size_t i, j, k; + double x, t, mem, derror, total_error, approx; + + size_t train_size = data_points.size(); + + /* error of boundary condition: y(0, t) = sin(t) => e1 = 1/n * (sin(t) - y(0, t))^2 */ + for(i = 0; i < train_size; ++i){ + + t = data_points[i]; + mem = (std::sin(t) - eval_approx_y(0.0, t, n_inner_neurons, *parameters)); + derror = 2.0 * mem / train_size; + + for(j = 0; j < n_inner_neurons; ++j){ + (*gradient)[4 * j + 0] -= derror * eval_approx_dwx_y(0, t, j, *parameters); + (*gradient)[4 * j + 1] -= derror * eval_approx_dwt_y(0, t, j, *parameters); + (*gradient)[4 * j + 2] -= derror * eval_approx_da_y(0, t, j, *parameters); + (*gradient)[4 * j + 3] -= derror * eval_approx_db_y(0, t, j, *parameters); + } + + total_error += mem * mem / train_size; + } + + + + /* error boundary condition: y(x, 0) = e^(-(0.5)^(0.5)x) * sin(-(0.5)^(0.5)x) => e2 = 1/n * (e^(-(0.5)^(0.5)x) * sin(-(0.5)^(0.5)x) - y(x, 0))^2 */ + for(i = 0; i < train_size; ++i){ + + x = data_points[i]; + mem = (std::pow(E, -0.707106781 * x) * std::sin( -0.707106781 * x ) - eval_approx_y(x, 0.0, n_inner_neurons, *parameters)); + derror = 2.0 * mem / train_size; + + for(j = 0; j < n_inner_neurons; ++j){ + (*gradient)[4 * j + 0] -= derror * eval_approx_dwx_y(x, 0, j, *parameters); + (*gradient)[4 * j + 1] -= derror * eval_approx_dwt_y(x, 0, j, *parameters); + (*gradient)[4 * j + 2] -= derror * eval_approx_da_y(x, 0, j, *parameters); + (*gradient)[4 * j + 3] -= derror * eval_approx_db_y(x, 0, j, *parameters); + } + + total_error += mem * mem / train_size; + } + /* error of the governing equation: y_xx - y_t = 0 => e3 = 1/n^2 * (0 - y_xx + y_t)^2 */ + for(i = 0; i < data_points.size(); ++i){ + x = data_points[i]; + for(j = 0; j < data_points.size(); ++j){ + t = data_points[j]; + + approx = eval_approx_yxx(x, t, n_inner_neurons, *parameters) - eval_approx_yt(x, t, n_inner_neurons, *parameters); + mem = 0.0 - approx; + derror = 2.0 * mem / (train_size * train_size); + for(k = 0; k < n_inner_neurons; ++k){ + (*gradient)[4 * k + 0] -= derror * (eval_approx_dwx_yxx(x, t, k, *parameters) - eval_approx_dwx_yt(x, t, k, *parameters)); + (*gradient)[4 * k + 1] -= derror * (eval_approx_dwt_yxx(x, t, k, *parameters) - eval_approx_dwt_yt(x, t, k, *parameters)); + (*gradient)[4 * k + 2] -= derror * ( eval_approx_da_yxx(x, t, k, *parameters) - eval_approx_da_yt(x, t, k, *parameters)); + (*gradient)[4 * k + 3] -= derror * ( eval_approx_db_yxx(x, t, k, *parameters) - eval_approx_db_yt(x, t, k, *parameters)); + } + total_error += mem * mem / (train_size * train_size); + } + } + return total_error; } void solve_example_gradient(std::vector<double> &guess, double accuracy, size_t n_inner_neurons, size_t train_size, double ds, double de, size_t n_test_points, double ts, double te){ @@ -324,110 +411,58 @@ void solve_example_gradient(std::vector<double> &guess, double accuracy, size_t } gamma = 1.0; - double val = 0.0, prev_val; + double val = 0.0, prev_val, c = 2.0; prev_val = 0.0; while( grad_norm > accuracy) { iter_idx++; prev_val = val; - total_error = 0.0; + grad_norm_prev = grad_norm; /* reset of the current gradient */ std::fill(gradient_current->begin(), gradient_current->end(), 0.0); + val = calculate_gradient( data_points, n_inner_neurons, params_current, gradient_current ); - /* error of boundary condition: y(0, t) = sin(t) => e1 = 1/n * (sin(t) - y(0, t))^2 */ - for(i = 0; i < data_points.size(); ++i){ - - t = data_points[i]; - mem = (std::sin(t) - eval_approx_y(0.0, t, n_inner_neurons, *params_current)); - derror = 2.0 * mem / train_size; - - for(j = 0; j < n_inner_neurons; ++j){ - (*gradient_current)[4 * j + 0] -= derror * eval_approx_dwx_y(0, t, j, *params_current); - (*gradient_current)[4 * j + 1] -= derror * eval_approx_dwt_y(0, t, j, *params_current); - (*gradient_current)[4 * j + 2] -= derror * eval_approx_da_y(0, t, j, *params_current); - (*gradient_current)[4 * j + 3] -= derror * eval_approx_db_y(0, t, j, *params_current); - } + grad_norm = 0.0; + for(auto v: *gradient_current){ + grad_norm += v * v; + } + grad_norm = std::sqrt(grad_norm); - total_error += mem * mem / train_size; + /* Update of the parameters */ + /* step length calculation */ + if(iter_idx < 10 || iter_idx % 100 == 0){ + /* fixed step length */ + gamma = 0.1 * accuracy; } + else{ - /* error boundary condition: y(x, 0) = e^(-(0.5)^(0.5)x) * sin(-(0.5)^(0.5)x) => e2 = 1/n * (e^(-(0.5)^(0.5)x) * sin(-(0.5)^(0.5)x) - y(x, 0))^2 */ - for(i = 0; i < data_points.size(); ++i){ + /* norm of the gradient calculation */ - x = data_points[i]; - mem = (std::pow(E, -0.707106781 * x) * std::sin( -0.707106781 * x ) - eval_approx_y(x, 0.0, n_inner_neurons, *params_current)); - derror = 2.0 * mem / train_size; + sk = 0.0; + for(i = 0; i < gradient_current->size(); ++i){ + sx = (*gradient_current)[i] - (*gradient_prev)[i]; + sk += sx * sx; + } + sk = std::sqrt(sk); - for(j = 0; j < n_inner_neurons; ++j){ - (*gradient_current)[4 * j + 0] -= derror * eval_approx_dwx_y(x, 0, j, *params_current); - (*gradient_current)[4 * j + 1] -= derror * eval_approx_dwt_y(x, 0, j, *params_current); - (*gradient_current)[4 * j + 2] -= derror * eval_approx_da_y(x, 0, j, *params_current); - (*gradient_current)[4 * j + 3] -= derror * eval_approx_db_y(x, 0, j, *params_current); + /* angle between two consecutive gradients */ + double beta = 0.0, sx = 0.0; + for(i = 0; i < gradient_current->size(); ++i){ + sx += (gradient_current->at( i ) * gradient_prev->at( i )); } + sx /= grad_norm * grad_norm_prev; + beta = std::sqrt(std::acos( sx ) / PI); - total_error += mem * mem / train_size; - } - /* error of the governing equation: y_xx - y_t = 0 => e3 = 1/n^2 * (0 - y_xx + y_t)^2 */ - for(i = 0; i < data_points.size(); ++i){ - x = data_points[i]; - for(j = 0; j < data_points.size(); ++j){ - t = data_points[j]; - - approx = eval_approx_yxx(x, t, n_inner_neurons, *params_current) - eval_approx_yt(x, t, n_inner_neurons, *params_current); - mem = 0.0 - approx; - derror = 2.0 * mem / (train_size * train_size); -// printf("%f\n", approx); -// printf("%f\n", derror); - for(k = 0; k < n_inner_neurons; ++k){ -// printf("%f -> ", (*gradient_current)[4 * k + 0]); - (*gradient_current)[4 * k + 0] -= derror * (eval_approx_dwx_yxx(x, t, k, *params_current) - eval_approx_dwx_yt(x, t, k, *params_current)); -// printf("%f (%f * (%f - %f))\n", (*gradient_current)[4 * k + 0], derror, eval_approx_dwx_yxx(x, t, k, *params_current), eval_approx_dwx_yt(x, t, k, *params_current)); - -// printf("%f -> ", (*gradient_current)[4 * k + 1]); - (*gradient_current)[4 * k + 1] -= derror * (eval_approx_dwt_yxx(x, t, k, *params_current) - eval_approx_dwt_yt(x, t, k, *params_current)); -// printf("%f (%f * (%f - %f))\n", (*gradient_current)[4 * k + 1], derror, eval_approx_dwt_yxx(x, t, k, *params_current), eval_approx_dwt_yt(x, t, k, *params_current)); - -// printf("%f -> ", (*gradient_current)[4 * k + 2]); - (*gradient_current)[4 * k + 2] -= derror * ( eval_approx_da_yxx(x, t, k, *params_current) - eval_approx_da_yt(x, t, k, *params_current)); -// printf("%f (%f * (%f - %f))\n", (*gradient_current)[4 * k + 2], derror, eval_approx_da_yxx(x, t, k, *params_current), eval_approx_da_yt(x, t, k, *params_current)); - -// printf("%f -> ", (*gradient_current)[4 * k + 3]); - (*gradient_current)[4 * k + 3] -= derror * ( eval_approx_db_yxx(x, t, k, *params_current) - eval_approx_db_yt(x, t, k, *params_current)); -// printf("%f (%f * (%f - %f))\n", (*gradient_current)[4 * k + 3], derror, eval_approx_db_yxx(x, t, i, *params_current), eval_approx_db_yt(x, t, k, *params_current)); - - } - total_error += mem * mem / (train_size * train_size); - } +// eval_step_size_simple( gamma, val, prev_val, sk, grad_norm, grad_norm_prev ); + eval_step_size_mk( gamma, beta, c, grad_norm_prev, grad_norm, val, prev_val ); } - val = total_error; - /* Update of the parameters */ - /* step length calculation */ - if(iter_idx < 10){ - /* fixed step length */ - gamma = 0.1 * accuracy; - } - /* norm of the gradient calculation */ - grad_norm_prev = grad_norm; - /* adaptive step-length */ - sk = 0.0; - for(i = 0; i < gradient_current->size(); ++i){ - sx = (*gradient_current)[i] - (*gradient_prev)[i]; - sk += sx * sx; - } - sk = std::sqrt(sk); - grad_norm = 0.0; - for(auto v: *gradient_current){ - grad_norm += v * v; - } - grad_norm = std::sqrt(grad_norm); - gamma = get_step_size_simple(gamma, val, prev_val, sk, grad_norm, grad_norm_prev); for(i = 0; i < gradient_current->size(); ++i){ @@ -445,11 +480,11 @@ void solve_example_gradient(std::vector<double> &guess, double accuracy, size_t if(iter_idx % 1 == 0){ - printf("Iteration %12d. Step size: %15.8f, Gradient norm: %15.8f. Total error: %10.8f\r", (int)iter_idx, gamma, grad_norm, total_error); + printf("Iteration %12d. Step size: %15.8f, C: %15.8f, Gradient norm: %15.8f. Total error: %10.8f\r", (int)iter_idx, gamma, c, grad_norm, val); std::cout.flush(); } } - printf("Iteration %12d. Step size: %15.8f, Gradient norm: %15.8f. Total error: %10.8f\r\n", (int)iter_idx, gamma, grad_norm, total_error); + printf("Iteration %12d. Step size: %15.8f, C: %15.8f, Gradient norm: %15.8f. Total error: %10.8f\r\n", (int)iter_idx, gamma, c, grad_norm, val); std::cout.flush(); for (i = 0; i < n_inner_neurons; ++i) { @@ -461,15 +496,15 @@ void solve_example_gradient(std::vector<double> &guess, double accuracy, size_t printf("Path %3d. wx = %15.8f, wt = %15.8f, b = %15.8f, a = %15.8f\n", (int)(i + 1), wxi, wti, bi, ai); } - for (i = 0; i < n_inner_neurons; ++i) { - wxi = (*params_current)[4 * i + 0]; - wti = (*params_current)[4 * i + 1]; - ai = (*params_current)[4 * i + 2]; - bi = (*params_current)[4 * i + 3]; - - printf("%f/(1+e^(%f-%fx-%ft)) + ", (int)(i + 1), ai, bi, wxi, wti); - } - printf("\n"); +// for (i = 0; i < n_inner_neurons; ++i) { +// wxi = (*params_current)[4 * i + 0]; +// wti = (*params_current)[4 * i + 1]; +// ai = (*params_current)[4 * i + 2]; +// bi = (*params_current)[4 * i + 3]; +// +// printf("%f/(1+e^(%f-%fx-%ft)) + ", (int)(i + 1), ai, bi, wxi, wti); +// } +// printf("\n"); /* SOLUTION EXPORT */ @@ -477,41 +512,92 @@ void solve_example_gradient(std::vector<double> &guess, double accuracy, size_t std::cout.flush(); std::vector<double> input, output(1); - std::ofstream ofs("data_2d_pde1_y.txt", std::ofstream::out); + std::ofstream ofs_y("data_2d_pde1_y.txt", std::ofstream::out); frac = (te - ts) / (n_test_points - 1); for(i = 0; i < n_test_points; ++i){ x = i * frac + ts; for(j = 0; j < n_test_points; ++j){ t = j * frac + ts; - ofs << x << " " << t << " " << eval_approx_y(x, t, n_inner_neurons, *params_current) << std::endl; + ofs_y << x << " " << t << " " << eval_approx_y(x, t, n_inner_neurons, *params_current) << std::endl; printf("Exporting file 'data_2d_pde1_y.txt' : %7.3f%%\r", (100.0 * (j + i * n_test_points)) / (n_test_points * n_test_points - 1)); std::cout.flush(); } } printf("Exporting file 'data_2d_pde1_y.txt' : %7.3f%%\n", 100.0); std::cout.flush(); - ofs.close(); + ofs_y.close(); + + printf("Exporting file 'data_2d_pde1_yt.txt' : %7.3f%%\r", 0.0); + std::cout.flush(); + std::ofstream ofs_t("data_2d_pde1_yt.txt", std::ofstream::out); + frac = (te - ts) / (n_test_points - 1); + for(i = 0; i < n_test_points; ++i){ + x = i * frac + ts; + for(j = 0; j < n_test_points; ++j){ + t = j * frac + ts; + ofs_t << x << " " << t << " " << eval_approx_yt(x, t, n_inner_neurons, *params_current) << std::endl; + printf("Exporting file 'data_2d_pde1_yt.txt' : %7.3f%%\r", (100.0 * (j + i * n_test_points)) / (n_test_points * n_test_points - 1)); + std::cout.flush(); + } + } + printf("Exporting file 'data_2d_pde1_yt.txt' : %7.3f%%\n", 100.0); + std::cout.flush(); + ofs_t.close(); + + printf("Exporting file 'data_2d_pde1_yx.txt' : %7.3f%%\r", 0.0); + std::cout.flush(); + std::ofstream ofs_x("data_2d_pde1_yx.txt", std::ofstream::out); + frac = (te - ts) / (n_test_points - 1); + for(i = 0; i < n_test_points; ++i){ + x = i * frac + ts; + for(j = 0; j < n_test_points; ++j){ + t = j * frac + ts; + ofs_x << x << " " << t << " " << eval_approx_yx(x, t, n_inner_neurons, *params_current) << std::endl; + printf("Exporting file 'data_2d_pde1_yx.txt' : %7.3f%%\r", (100.0 * (j + i * n_test_points)) / (n_test_points * n_test_points - 1)); + std::cout.flush(); + } + } + printf("Exporting file 'data_2d_pde1_yx.txt' : %7.3f%%\n", 100.0); + std::cout.flush(); + ofs_x.close(); + + printf("Exporting file 'data_2d_pde1_yxx.txt' : %7.3f%%\r", 0.0); + std::cout.flush(); + std::ofstream ofs_xx("data_2d_pde1_yxx.txt", std::ofstream::out); + frac = (te - ts) / (n_test_points - 1); + for(i = 0; i < n_test_points; ++i){ + x = i * frac + ts; + for(j = 0; j < n_test_points; ++j){ + t = j * frac + ts; + ofs_xx << x << " " << t << " " << eval_approx_yxx(x, t, n_inner_neurons, *params_current) << std::endl; + printf("Exporting file 'data_2d_pde1_yxx.txt' : %7.3f%%\r", (100.0 * (j + i * n_test_points)) / (n_test_points * n_test_points - 1)); + std::cout.flush(); + } + } + printf("Exporting file 'data_2d_pde1_yxx.txt' : %7.3f%%\n", 100.0); + std::cout.flush(); + ofs_xx.close(); /* governing equation error */ - ofs = std::ofstream("data_2d_pde1_first_equation_error.txt", std::ofstream::out); + std::ofstream ofs_error("data_2d_pde1_first_equation_error.txt", std::ofstream::out); printf("Exporting file 'data_2d_pde1_first_equation_error.txt' : %7.3f%%\r", 0.0); for(i = 0; i < n_test_points; ++i){ x = i * frac + ts; for(j = 0; j < n_test_points; ++j){ t = j * frac + ts; - ofs << x << " " << t << " " << std::fabs(eval_approx_yxx(x, t, n_inner_neurons, *params_current) - eval_approx_yt(x, t, n_inner_neurons, *params_current)) << std::endl; + ofs_error << x << " " << t << " " << std::fabs(eval_approx_yxx(x, t, n_inner_neurons, *params_current) - eval_approx_yt(x, t, n_inner_neurons, *params_current)) << std::endl; printf("Exporting file 'data_2d_pde1_first_equation_error.txt' : %7.3f%%\r", (100.0 * (j + i * n_test_points)) / (n_test_points * n_test_points - 1)); std::cout.flush(); } } printf("Exporting file 'data_2d_pde1_first_equation_error.txt' : %7.3f%%\n", 100.0); std::cout.flush(); - ofs.close(); + ofs_error.close(); /* ISOTROPIC TEST SET FOR BOUNDARY CONDITIONS */ /* first boundary condition & its error */ - ofs = std::ofstream("data_1d_pde1_yt.txt", std::ofstream::out); - std::ofstream ofs2("data_1d_pde1_yx.txt", std::ofstream::out); + std::ofstream ofs_bc_t("data_1d_pde1_yt.txt", std::ofstream::out); + std::ofstream ofs_bc_x("data_1d_pde1_yx.txt", std::ofstream::out); printf("Exporting files 'data_1d_pde1_yt.txt' and 'data_1d_pde1_yx.txt' : %7.3f%%\r", 0.0); for(i = 0; i < n_test_points; ++i){ x = frac * i + ts; @@ -523,16 +609,16 @@ void solve_example_gradient(std::vector<double> &guess, double accuracy, size_t double evalt = eval_approx_y(0, t, n_inner_neurons, *params_current); double evalx = eval_approx_y(x, 0, n_inner_neurons, *params_current); - ofs << i + 1 << " " << t << " " << yt << " " << evalt << " " << std::fabs(evalt - yt) << std::endl; - ofs2 << i + 1 << " " << x << " " << yx << " " << evalx << " " << std::fabs(evalx - yx) << std::endl; + ofs_bc_t << i + 1 << " " << t << " " << yt << " " << evalt << " " << std::fabs(evalt - yt) << std::endl; + ofs_bc_x << i + 1 << " " << x << " " << yx << " " << evalx << " " << std::fabs(evalx - yx) << std::endl; printf("Exporting files 'data_1d_pde1_yt.txt' and 'data_1d_pde1_yx.txt' : %7.3f%%\r", (100.0 * i) / (n_test_points - 1)); std::cout.flush(); } printf("Exporting files 'data_1d_pde1_yt.txt' and 'data_1d_pde1_yx.txt' : %7.3f%%\r", 100.0); std::cout.flush(); - ofs2.close(); - ofs.close(); + ofs_bc_t.close(); + ofs_bc_x.close(); delete gradient_current; delete gradient_prev; @@ -709,9 +795,9 @@ void solve_example_particle_swarm(double accuracy, size_t n_inner_neurons, size_ int main() { - unsigned int n_inner_neurons = 2; - unsigned int train_size = 20; - double accuracy = 1e-4; + unsigned int n_inner_neurons = 3; + unsigned int train_size = 10; + double accuracy = 1e-5; double ds = 0.0; double de = 1.0; -- GitLab