Skip to content
Snippets Groups Projects
Commit 312a43ca authored by Michal Kravcenko's avatar Michal Kravcenko
Browse files

MOD: modified differential equation examples

parent 1a06e926
No related branches found
No related tags found
No related merge requests found
......@@ -232,7 +232,90 @@ double eval_error_function(std::vector<double> &parameters, size_t n_inner_neuro
return output;
}
void test_analytical_gradient_y(std::vector<double> &guess, double accuracy, size_t n_inner_neurons, size_t train_size, bool opti_w, bool opti_b, double d1_s, double d1_e,
void eval_step_size_simple(double &gamma, double val, double prev_val, double sk, double grad_norm, double grad_norm_prev){
if(val > prev_val){
gamma *= 0.99999;
}
if(sk <= 1e-3 || grad_norm < grad_norm_prev){
/* movement on a line */
/* new slope is less steep, speed up */
gamma *= 1.0005;
}
else if(grad_norm > grad_norm_prev){
/* new slope is more steep, slow down*/
gamma /= 1.0005;
}
else{
gamma /= 1.005;
}
// gamma *= 0.999999;
}
void eval_step_size_mk( double &gamma, double beta, double &c, double grad_norm_prev, double grad_norm, double fi, double fim ){
if( fi > fim )
{
c /= 1.0000005;
}
else if( fi < fim )
{
c *= 1.0000005;
}
gamma *= std::pow( c, 1.0 - 2.0 * beta) * std::pow( grad_norm_prev / grad_norm, 1.0 / c );
}
double calculate_gradient( std::vector<double> &data_points, size_t n_inner_neurons, std::vector<double> *parameters, std::vector<double> *gradient ){
size_t i, j;
double x, mem, derror, total_error, approx;
size_t train_size = data_points.size();
/* error boundary condition: y(0) = 1 => e1 = (1 - y(0))^2 */
x = 0.0;
mem = (1.0 - eval_approx_f(x, n_inner_neurons, *parameters));
derror = 2.0 * mem;
total_error = mem * mem;
for(i = 0; i < n_inner_neurons; ++i){
(*gradient)[3 * i] -= derror * eval_approx_dw_f(x, i, *parameters);
(*gradient)[3 * i + 1] -= derror * eval_approx_da_f(x, i, *parameters);
(*gradient)[3 * i + 2] -= derror * eval_approx_db_f(x, i, *parameters);
}
/* error boundary condition: y'(0) = 1 => e2 = (1 - y'(0))^2 */
mem = (1.0 - eval_approx_df(x, n_inner_neurons, *parameters));
derror = 2.0 * mem;
total_error += mem * mem;
for(i = 0; i < n_inner_neurons; ++i){
(*gradient)[3 * i] -= derror * eval_approx_dw_df(x, i, *parameters);
(*gradient)[3 * i + 1] -= derror * eval_approx_da_df(x, i, *parameters);
(*gradient)[3 * i + 2] -= derror * eval_approx_db_df(x, i, *parameters);
}
for(j = 0; j < data_points.size(); ++j){
x = data_points[j];
/* error of the governing equation: y''(x) + 4y'(x) + 4y(x) = 0 => e3 = 1/n * (0 - y''(x) - 4y'(x) - 4y(x))^2 */
approx= eval_approx_ddf(x, n_inner_neurons, *parameters) + 4.0 * eval_approx_df(x, n_inner_neurons, *parameters) + 4.0 * eval_approx_f(x, n_inner_neurons, *parameters);
mem = 0.0 - approx;
derror = 2.0 * mem / train_size;
for(i = 0; i < n_inner_neurons; ++i){
(*gradient)[3 * i] -= derror * (eval_approx_dw_ddf(x, i, *parameters) + 4.0 * eval_approx_dw_df(x, i, *parameters) + 4.0 * eval_approx_dw_f(x, i, *parameters));
(*gradient)[3 * i + 1] -= derror * (eval_approx_da_ddf(x, i, *parameters) + 4.0 * eval_approx_da_df(x, i, *parameters) + 4.0 * eval_approx_da_f(x, i, *parameters));
(*gradient)[3 * i + 2] -= derror * (eval_approx_db_ddf(x, i, *parameters) + 4.0 * eval_approx_db_df(x, i, *parameters) + 4.0 * eval_approx_db_f(x, i, *parameters));
}
total_error += mem * mem / train_size;
}
return total_error;
}
void test_analytical_gradient_y(std::vector<double> &guess, double accuracy, size_t n_inner_neurons, size_t train_size, double d1_s, double d1_e,
size_t test_size, double ts, double te) {
/* SETUP OF THE TRAINING DATA */
......@@ -288,171 +371,60 @@ void test_analytical_gradient_y(std::vector<double> &guess, double accuracy, siz
}
gamma = 1.0;
double prev_val, val = 0.0, c = 2.0;
while( grad_norm > accuracy) {
iter_idx++;
prev_val = val;
grad_norm_prev = grad_norm;
/* current gradient */
/* reset of the current gradient */
std::fill(gradient_current->begin(), gradient_current->end(), 0.0);
val = calculate_gradient( data_points, n_inner_neurons, params_current, gradient_current );
/* error boundary condition: y(0) = 1 => e1 = (1 - y(0))^2 */
xj = 0.0;
mem = (1.0 - eval_approx_f(xj, n_inner_neurons, *params_current));
derror = 2.0 * mem;
total_error = mem * mem;
for(i = 0; i < n_inner_neurons; ++i){
if(opti_w){
(*gradient_current)[3 * i] -= derror * eval_approx_dw_f(xj, i, *params_current);
(*gradient_current)[3 * i + 1] -= derror * eval_approx_da_f(xj, i, *params_current);
}
if(opti_b){
(*gradient_current)[3 * i + 2] -= derror * eval_approx_db_f(xj, i, *params_current);
}
}
// for(auto e: *gradient_current){
// printf("[%10.8f]", e);
// }
// printf("\n");
/* error boundary condition: y'(0) = 1 => e2 = (1 - y'(0))^2 */
mem = (1.0 - eval_approx_df(xj, n_inner_neurons, *params_current));
derror = 2.0 * mem;
total_error += mem * mem;
for(i = 0; i < n_inner_neurons; ++i){
if(opti_w){
(*gradient_current)[3 * i] -= derror * eval_approx_dw_df(xj, i, *params_current);
(*gradient_current)[3 * i + 1] -= derror * eval_approx_da_df(xj, i, *params_current);
}
if(opti_b){
(*gradient_current)[3 * i + 2] -= derror * eval_approx_db_df(xj, i, *params_current);
}
}
// for(auto e: *gradient_current){
// printf("[%10.8f]", e);
// }
// printf("\n");
for(j = 0; j < data_points.size(); ++j){
xj = data_points[j];
// xj = ds.get_data()->at(j).first[0];
/* error of the governing equation: y''(x) + 4y'(x) + 4y(x) = 0 => e3 = 1/n * (0 - y''(x) - 4y'(x) - 4y(x))^2 */
approx= eval_approx_ddf(xj, n_inner_neurons, *params_current) + 4.0 * eval_approx_df(xj, n_inner_neurons, *params_current) + 4.0 * eval_approx_f(xj, n_inner_neurons, *params_current);
mem = 0.0 - approx;
error = 2.0 * mem / train_size;
for(i = 0; i < n_inner_neurons; ++i){
if(opti_w){
(*gradient_current)[3 * i] -= error * (eval_approx_dw_ddf(xj, i, *params_current) + 4.0 * eval_approx_dw_df(xj, i, *params_current) + 4.0 * eval_approx_dw_f(xj, i, *params_current));
(*gradient_current)[3 * i + 1] -= error * (eval_approx_da_ddf(xj, i, *params_current) + 4.0 * eval_approx_da_df(xj, i, *params_current) + 4.0 * eval_approx_da_f(xj, i, *params_current));
}
if(opti_b){
(*gradient_current)[3 * i + 2] -= error * (eval_approx_db_ddf(xj, i, *params_current) + 4.0 * eval_approx_db_df(xj, i, *params_current) + 4.0 * eval_approx_db_f(xj, i, *params_current));
}
}
total_error += mem * mem / train_size;
grad_norm = 0.0;
for(auto v: *gradient_current){
grad_norm += v * v;
}
// for(auto e: *gradient_current){
// printf("[%10.8f]", e);
// }
// printf("\n");
/* conjugate direction coefficient (Polak-Ribiere): <-grad_curr, -grad_curr + grad_prev> / <-grad_prev, -grad_prev >*/
grad_norm = std::sqrt(grad_norm);
/* Update of the parameters */
if(iter_idx < 10 || iter_idx % 1 == 0){
for(i = 0; i < conjugate_direction_current->size(); ++i){
(*conjugate_direction_current)[i] = - (*gradient_current)[i];
}
}
else{
/* conjugate gradient */
sk = sy = 0.0;
for(i = 0; i < conjugate_direction_current->size(); ++i){
sk += (*gradient_current)[i] * ((*gradient_current)[i] - (*gradient_prev)[i]);
sy += (*gradient_prev)[i] * (*gradient_prev)[i];
}
beta = std::max(0.0, sk / sy);
/* update of the conjugate direction */
for(i = 0; i < conjugate_direction_current->size(); ++i){
(*conjugate_direction_current)[i] = beta * (*conjugate_direction_prev)[i] - (*gradient_current)[i];
}
}
/* step length calculation */
if(iter_idx < 10){
if(iter_idx < 10 || iter_idx % 100 == 0){
/* fixed step length */
gamma = 0.000001;
gamma = 0.1 * accuracy;
}
else{
// /* Barzilai-Borwein */
// sk = sy = 0.0;
//
// for(i = 0; i < gradient_current->size(); ++i){
// sx = (*params_current)[i] - (*params_prev)[i];
// sg = (*conjugate_direction_current)[i] - (*conjugate_direction_prev)[i];
//
// sk += sx * sg;
// sy += sg * sg;
// }
//
// gamma = -sk / sy;
/* norm of the gradient calculation */
sk = 0.0;
for(i = 0; i < gradient_current->size(); ++i){
sx = (*gradient_current)[i] - (*gradient_prev)[i];
sk += sx * sx;
}
sk = std::sqrt(sk);
/* angle between two consecutive gradients */
double beta = 0.0, sx = 0.0;
for(i = 0; i < gradient_current->size(); ++i){
sx += (gradient_current->at( i ) * gradient_prev->at( i ));
}
sx /= grad_norm * grad_norm_prev;
beta = std::sqrt(std::acos( sx ) / PI);
// /* Line search */
//
// gamma = line_search(10.0, conjugate_direction, *params_current, *gradient_current, n_inner_neurons, ds_00);
// eval_step_size_simple( gamma, val, prev_val, sk, grad_norm, grad_norm_prev );
eval_step_size_mk( gamma, beta, c, grad_norm_prev, grad_norm, val, prev_val );
}
// for(auto e: conjugate_direction){
// printf("[%10.8f]", e);
// }
// printf("\n");
//
// for(auto e: *params_current){
// printf("[%10.8f]", e);
// }
// printf("\n");
/* norm of the gradient calculation */
grad_norm_prev = grad_norm;
/* adaptive step-length */
sk = 0.0;
for(i = 0; i < gradient_current->size(); ++i){
sx = (*gradient_current)[i] - (*gradient_prev)[i];
sk += sx * sx;
}
sk = std::sqrt(sk);
if(sk <= 1e-3 || grad_norm < grad_norm_prev){
/* movement on a line */
/* new slope is less steep, speed up */
gamma *= 1.0005;
}
else if(grad_norm > grad_norm_prev){
/* new slope is more steep, slow down*/
gamma /= 1.0005;
}
else{
gamma /= 1.005;
}
grad_norm = 0.0;
for(auto v: *gradient_current){
grad_norm += v * v;
}
grad_norm = std::sqrt(grad_norm);
for(i = 0; i < gradient_current->size(); ++i){
// (*params_prev)[i] = (*params_current)[i] - gamma * (*gradient_current)[i];
(*params_prev)[i] = (*params_current)[i] + gamma * (*conjugate_direction_current)[i];
(*params_prev)[i] = (*params_current)[i] - gamma * (*gradient_current)[i];
}
// printf("\n");
/* switcheroo */
ptr_mem = gradient_prev;
......@@ -463,34 +435,13 @@ void test_analytical_gradient_y(std::vector<double> &guess, double accuracy, siz
params_prev = params_current;
params_current = ptr_mem;
ptr_mem = conjugate_direction_prev;
conjugate_direction_prev = conjugate_direction_current;
conjugate_direction_current = ptr_mem;
// for (i = 0; i < n_inner_neurons; ++i) {
// wi = (*params_current)[3 * i];
// ai = (*params_current)[3 * i + 1];
// bi = (*params_current)[3 * i + 2];
//
// printf("Path %3d. w = %15.8f, b = %15.8f, a = %15.8f\n", i + 1, wi, bi, ai);
// }
if(iter_idx % 100 == 0){
printf("Iteration %12d. Step size: %15.8f, Gradient norm: %15.8f. Total error: %10.8f (%10.8f)\r", (int)iter_idx, gamma, grad_norm, total_error, eval_error_function(*params_prev, n_inner_neurons, data_points));
// for (i = 0; i < n_inner_neurons; ++i) {
// wi = (*params_current)[3 * i];
// ai = (*params_current)[3 * i + 1];
// bi = (*params_current)[3 * i + 2];
//
// printf("Path %3d. w = %15.8f, b = %15.8f, a = %15.8f\n", i + 1, wi, bi, ai);
// }
// std::cout << "-----------------------------" << std::endl;
if(iter_idx % 1 == 0){
printf("Iteration %12d. Step size: %15.8f, C: %15.8f, Gradient norm: %15.8f. Total error: %10.8f\r", (int)iter_idx, gamma, c, grad_norm, val);
std::cout.flush();
}
}
printf("Iteration %12d. Step size: %15.8f, Gradient norm: %15.8f. Total error: %10.8f\r\n",(int) iter_idx, gamma, grad_norm, eval_error_function(*params_current, n_inner_neurons, data_points));
printf("Iteration %12d. Step size: %15.8f, C: %15.8f, Gradient norm: %15.8f. Total error: %10.8f\r\n", (int)iter_idx, gamma, c, grad_norm, val);
std::cout.flush();
for (i = 0; i < n_inner_neurons; ++i) {
......@@ -719,35 +670,25 @@ int main() {
double ts = ds;
double te = de + 2;
size_t particle_swarm_max_iters = 1000;
size_t n_particles = 100;
test_ode(accuracy, n_inner_neurons, train_size, ds, de, test_size, ts, te, particle_swarm_max_iters, n_particles);
// size_t particle_swarm_max_iters = 1000;
// size_t n_particles = 100;
// test_ode(accuracy, n_inner_neurons, train_size, ds, de, test_size, ts, te, particle_swarm_max_iters, n_particles);
// bool optimize_weights = true;
// bool optimize_biases = true;
//
//// std::vector<double> init_guess = {0.35088209, -0.23738505, 0.14160885, 3.72785473, -6.45758308, 1.73769138};
// std::vector<double> init_guess(3 * n_inner_neurons);
//
// std::random_device seeder;
// std::mt19937 gen(seeder());
// std::uniform_real_distribution<double> dist(-10.0, 10.0);
// for(unsigned int i = 0; i < init_guess.size(); ++i){
// init_guess[i] = dist(gen);
// }
// if(!optimize_biases){
// for(unsigned int i = 0; i < n_inner_neurons; ++i){
// init_guess[3 * i + 2] = 0.0;
// }
// }
// if(!optimize_weights){
// for(unsigned int i = 0; i < n_inner_neurons; ++i){
// init_guess[3 * i] = 0.0;
// init_guess[3 * i + 1] = 0.0;
// }
// }
//
// test_analytical_gradient_y(init_guess, accuracy, n_inner_neurons, train_size, optimize_weights, optimize_biases, ds, de, test_size, ts, te);
bool optimize_weights = true;
bool optimize_biases = true;
// std::vector<double> init_guess = {0.35088209, -0.23738505, 0.14160885, 3.72785473, -6.45758308, 1.73769138};
std::vector<double> init_guess(3 * n_inner_neurons);
std::random_device seeder;
std::mt19937 gen(seeder());
std::uniform_real_distribution<double> dist(-1.0, 1.0);
for(unsigned int i = 0; i < init_guess.size(); ++i){
init_guess[i] = dist(gen);
}
test_analytical_gradient_y(init_guess, accuracy, n_inner_neurons, train_size, ds, de, test_size, ts, te);
return 0;
......
This diff is collapsed.
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment