Commit 2bbf9a37 authored by Martin Beseda's avatar Martin Beseda

MERGE: Merged

parents fa5f7738 a3306758
......@@ -178,11 +178,14 @@ void Particle::print_coordinate() {
printf("%10.8f\n", (*this->coordinate)[this->coordinate_dim - 1]);
}
ParticleSwarm::ParticleSwarm(ErrorFunction* ef, double *domain_bounds,
ParticleSwarm::ParticleSwarm(ErrorFunction* ef, std::vector<double> *domain_bounds,
double c1, double c2, double w, size_t n_particles, size_t iter_max) {
srand(time(NULL));
if(domain_bounds->size() < 2 * ef->get_dimension()){
std::cerr << "The supplied domain bounds dimension is too low! It should be at least " << 2 * ef->get_dimension() << "\n" << std::endl;
}
this->f = ef;
this->func_dim = ef->get_dimension();
......@@ -200,14 +203,12 @@ ParticleSwarm::ParticleSwarm(ErrorFunction* ef, double *domain_bounds,
this->iter_max = iter_max;
this->particle_swarm = new Particle*[this->n_particles];
this->domain_bounds = &(domain_bounds->at(0));
for( size_t pi = 0; pi < this->n_particles; ++pi ){
this->particle_swarm[pi] = new Particle(ef, domain_bounds);
this->particle_swarm[pi] = new Particle(ef, this->domain_bounds);
}
this->domain_bounds = domain_bounds;
}
ParticleSwarm::~ParticleSwarm() {
......@@ -337,7 +338,7 @@ void ParticleSwarm::optimize( double gamma, double epsilon, double delta) {
// }
/* Check if the particles are near to each other AND the maximal velocity is less than 'gamma' */
if(cluster.size() > delta * this->n_particles && std::abs(prev_max_vel_step/max_vel_step) > gamma) {
if( cluster.size() > delta * this->n_particles && prev_max_vel_step < gamma * max_vel_step ) {
break;
}
......@@ -348,21 +349,18 @@ void ParticleSwarm::optimize( double gamma, double epsilon, double delta) {
this->determine_optimal_coordinate_and_value(*this->p_min_glob, optimal_value);
if(outer_it < this->iter_max) {
/* Convergence reached */
printf("\nFound optimum in %d iterations: %10.8f at coordinates: \n", (int)outer_it, optimal_value);
for (size_t i = 0; i <= this->func_dim - 1; ++i) {
printf("%10.8f \n", (*this->p_min_glob)[i]);
}
printf("\nFound optimum in %d iterations. Objective function value: %10.8f\n", (int)outer_it, optimal_value);
} else {
/* Maximal number of iterations reached */
printf("\nMax number of iterations reached (%d)! Found value %10.8f at coordinates: \n", (int)outer_it, optimal_value);
for (size_t i = 0; i <= this->func_dim - 1; ++i) {
printf("\t%10.8f \n", (*this->p_min_glob)[i]);
}
printf("\nMax number of iterations reached (%d)! Objective function value: %10.8f\n", (int)outer_it, optimal_value);
}
// for (size_t i = 0; i <= this->func_dim - 1; ++i) {
// printf("%10.8f \n", (*this->p_min_glob)[i]);
// }
//
// this->f->eval( this->get_solution() );
//delete [] p_min_glob; // TODO
delete centroid;
}
......
......@@ -170,7 +170,8 @@ public:
* @param n_particles
* @param iter_max
*/
ParticleSwarm( ErrorFunction* ef, double* domain_bounds, double c1, double c2, double w, size_t n_particles, size_t iter_max = 1 );
//TODO make domain_bounds constant
ParticleSwarm( ErrorFunction* ef, std::vector<double> *domain_bounds, double c1 = 1.711897, double c2 = 1.711897, double w = 0.711897, size_t n_particles = 50, size_t iter_max = 1000 );
/**
*
......
......@@ -319,7 +319,7 @@ void DESolver::set_error_function(size_t equation_idx, ErrorFunctionType F, Data
}
//TODO instead use general method with Optimizer as its argument (create hierarchy of optimizers)
void DESolver::solve_via_particle_swarm(double *domain_bounds, double c1, double c2, double w,
void DESolver::solve_via_particle_swarm(std::vector<double> *domain_bounds, double c1, double c2, double w,
size_t n_particles, size_t max_iters, double gamma,
double epsilon, double delta) {
......
......@@ -135,7 +135,7 @@ public:
void solve_via_particle_swarm(
double * domain_bounds,
std::vector<double> *domain_bounds,
double c1,
double c2,
double w,
......
/**
* Basic example using particle swarm method to train the network
* (result 0, -1/4)
*/
//
......@@ -13,6 +12,13 @@
int main() {
std::cout << "Running lib4neuro example 1: Basic use of the particle swarm method to train a simple network with few linear neurons" << std::endl;
std::cout << "***********************************************************************************************************************" <<std::endl;
std::cout << "The code attempts to find an approximate solution to the system of equations below:" << std::endl;
std::cout << "0 * w1 + 1 * w2 = 0.50" << std::endl;
std::cout << "1 * w1 + 0.5*w2 = 0.75" << std::endl;
std::cout << "***********************************************************************************************************************" <<std::endl;
/* TRAIN DATA DEFINITION */
std::vector<std::pair<std::vector<double>, std::vector<double>>> data_vec;
std::vector<double> inp, out;
......@@ -35,20 +41,15 @@ int main() {
NeuronLinear *i2 = new NeuronLinear( ); //f(x) = x
/* Output neuron */
double b = 1.0;//bias
NeuronLinear *o1 = new NeuronLinear( ); //f(x) = x + 1
NeuronLinear *o1 = new NeuronLinear( ); //f(x) = x
/* Adding neurons to the net */
size_t idx1 = net.add_neuron(i1, BIAS_TYPE::NO_BIAS);
size_t idx2 = net.add_neuron(i2, BIAS_TYPE::NO_BIAS);
size_t idx3 = net.add_neuron(o1, BIAS_TYPE::NEXT_BIAS);
std::vector<double> *bv = net.get_parameter_ptr_biases();
for(size_t i = 0; i < 1; ++i){
bv->at(i) = 1.0;
}
size_t idx3 = net.add_neuron(o1, BIAS_TYPE::NO_BIAS);
//
/* Adding connections */
net.add_connection_simple(idx1, idx3, SIMPLE_CONNECTION_TYPE::NEXT_WEIGHT);
......@@ -70,18 +71,39 @@ int main() {
MSE mse(&net, &ds);
/* TRAINING METHOD SETUP */
unsigned int max_iters = 2000;
double domain_bounds[4] = {-800.0, 800.0, -800.0, 800.0};
double c1 = 0.5, c2 = 1.5, w = 0.8;
std::vector<double> domain_bounds = {-10.0, 10.0, -10.0, 10.0, -10.0, 10.0};
ParticleSwarm swarm_01(&mse, &domain_bounds);
/* if the maximal velocity from the previous step is less than 'gamma' times the current maximal velocity, then one
* terminating criterion is met */
double gamma = 0.5;
/* if 'delta' times 'n' particles are in the centroid neighborhood given by the radius 'epsilon', then the second
* terminating criterion is met ('n' is the total number of particles) */
double epsilon = 0.02;
double delta = 0.7;
swarm_01.optimize(gamma, epsilon, delta);
std::vector<double> *parameters = swarm_01.get_solution();
net.copy_parameter_space(parameters);
printf("w1 = %10.7f\n", parameters->at( 0 ));
printf("w2 = %10.7f\n", parameters->at( 1 ));
std::cout << "***********************************************************************************************************************" <<std::endl;
/* ERROR CALCULATION */
double error = 0.0;
inp = {0, 1};
net.eval_single( inp, out );
error += (0.5 - out[0]) * (0.5 - out[0]);
std::cout << "x = (0, 1), expected output: 0.50, real output: " << out[0] << std::endl;
unsigned int n_particles = 10;
inp = {1, 0.5};
net.eval_single( inp, out );
error += (0.75 - out[0]) * (0.75 - out[0]);
std::cout << "x = (1, 0.5), expected output: 0.75, real output: " << out[0] << std::endl;
std::cout << "Run finished! Error of the network: " << 0.5 * error << std::endl;
ParticleSwarm swarm_01(&mse, domain_bounds, c1, c2, w, n_particles, max_iters);
swarm_01.optimize(0.5, 0.02);
return 0;
}
/**
* Example of a neural network with reused edge weights
* The system of equations associated with the net in this example is not regular
* minimizes the function: ((2y+0.5)^2 + (2x+1)^2 + (2x + y + 0.25)^2 + (2x+1)^2 + 1 + (4.5x + 0.37)^2 ) /3
* minimum [0.705493164] at (x, y) = (-1133/6290, -11193/62900) = (-0.180127186, -0.177949126)
*/
//
......@@ -14,6 +11,13 @@
#include "../include/4neuro.h"
int main() {
std::cout << "Running lib4neuro example 2: Basic use of the particle swarm method to train a network with five linear neurons and repeating edge weights" << std::endl;
std::cout << "********************************************************************************************************************************************" <<std::endl;
std::cout << "The code attempts to find an approximate solution to the system of equations below:" << std::endl;
std::cout << " 0 * w1 + 1 * w2 = 0.50 + b1" << std::endl;
std::cout << " 1 * w1 + 0.5*w2 = 0.75 + b1" << std::endl;
std::cout << "(1.25 + b2) * w2 = 0.63 + b3" << std::endl;
std::cout << "***********************************************************************************************************************" <<std::endl;
/* TRAIN DATA DEFINITION */
std::vector<std::pair<std::vector<double>, std::vector<double>>> data_vec;
......@@ -40,11 +44,11 @@ int main() {
NeuronLinear *i2 = new NeuronLinear( ); //f(x) = x
double b = 1;//bias
NeuronLinear *i3 = new NeuronLinear( ); //f(x) = x + 1
NeuronLinear *i3 = new NeuronLinear( ); //f(x) = x
/* Output neurons */
NeuronLinear *o1 = new NeuronLinear( ); //f(x) = x + 1
NeuronLinear *o2 = new NeuronLinear( ); //f(x) = x + 1
NeuronLinear *o1 = new NeuronLinear( ); //f(x) = x
NeuronLinear *o2 = new NeuronLinear( ); //f(x) = x
......@@ -55,14 +59,7 @@ int main() {
size_t idx4 = net.add_neuron(i3, BIAS_TYPE::NEXT_BIAS);
size_t idx5 = net.add_neuron(o2, BIAS_TYPE::NEXT_BIAS);
std::vector<double> *bv = net.get_parameter_ptr_biases();
for(size_t i = 0; i < 3; ++i){
bv->at(i) = 1.0;
}
/* Adding connections */
//net.add_connection_simple(idx1, idx3, -1, 1.0);
//net.add_connection_simple(idx2, idx3, -1, 1.0);
net.add_connection_simple(idx1, idx3, SIMPLE_CONNECTION_TYPE::NEXT_WEIGHT); // weight index 0
net.add_connection_simple(idx2, idx3, SIMPLE_CONNECTION_TYPE::NEXT_WEIGHT); // weight index 1
net.add_connection_simple(idx4, idx5, SIMPLE_CONNECTION_TYPE::EXISTING_WEIGHT, 0); // AGAIN weight index 0 - same weight!
......@@ -94,21 +91,50 @@ int main() {
// printf("evaluation of error at point (%f, %f) => %f\n", weights[0], weights[1], mse.eval(weights));
/* TRAINING METHOD SETUP */
unsigned int max_iters = 5000;
//must encapsulate each of the partial error functions
double domain_bounds[4] = {-800.0, 800.0, -800.0, 800.0};
std::vector<double> domain_bounds = {-10.0, 10.0, -10.0, 10.0,-10.0, 10.0, -10.0, 10.0,-10.0, 10.0, -10.0, 10.0,-10.0, 10.0, -10.0, 10.0,-10.0, 10.0, -10.0, 10.0,-10.0, 10.0, -10.0, 10.0,-10.0, 10.0, -10.0, 10.0};
ParticleSwarm swarm_01(&mse, &domain_bounds);
/* if the maximal velocity from the previous step is less than 'gamma' times the current maximal velocity, then one
* terminating criterion is met */
double gamma = 0.5;
/* if 'delta' times 'n' particles are in the centroid neighborhood given by the radius 'epsilon', then the second
* terminating criterion is met ('n' is the total number of particles) */
double epsilon = 0.02;
double delta = 0.9;
swarm_01.optimize(gamma, epsilon, delta);
std::vector<double> *parameters = swarm_01.get_solution();
net.copy_parameter_space(parameters);
printf("w1 = %10.7f\n", parameters->at( 0 ));
printf("w2 = %10.7f\n", parameters->at( 1 ));
printf("b1 = %10.7f\n", parameters->at( 2 ));
printf("b2 = %10.7f\n", parameters->at( 3 ));
printf("b3 = %10.7f\n", parameters->at( 4 ));
std::cout << "***********************************************************************************************************************" <<std::endl;
/* ERROR CALCULATION */
double error = 0.0;
inp = {0, 1, 0};
net.eval_single( inp, out );
error += (0.5 - out[0]) * (0.5 - out[0]) + (0.0 - out[1]) * (0.0 - out[1]);
printf("x = (%4.2f, %4.2f, %4.2f), expected output: (%4.2f, %4.2f), real output: (%10.7f, %10.7f)\n", inp[0], inp[1], inp[2], 0.5, 0.0, out[0], out[1]);
double c1 = 0.5, c2 = 1.5, w = 0.8;
inp = {1, 0.5, 0};
net.eval_single( inp, out );
error += (0.75 - out[0]) * (0.75 - out[0]) + (0.0 - out[1]) * (0.0 - out[1]);
printf("x = (%4.2f, %4.2f, %4.2f), expected output: (%4.2f, %4.2f), real output: (%10.7f, %10.7f)\n", inp[0], inp[1], inp[2], 0.75, 0.0, out[0], out[1]);
unsigned int n_particles = 100;
inp = {0, 0, 1.25};
net.eval_single( inp, out );
error += (0.0 - out[0]) * (0.0 - out[0]) + (0.63 - out[1]) * (0.63 - out[1]);
printf("x = (%4.2f, %4.2f, %4.2f), expected output: (%4.2f, %4.2f), real output: (%10.7f, %10.7f)\n", inp[0], inp[1], inp[2], 0.0, 0.63, out[0], out[1]);
ParticleSwarm swarm_01(&mse, domain_bounds, c1, c2, w, n_particles, max_iters);
std::cout << "Run finished! Error of the network: " << error / 3.0 << std::endl;
swarm_01.optimize(0.5, 0.02, 0.9);
printf("evaluation of error: %f\n", mse.eval());
return 0;
}
\ No newline at end of file
......@@ -14,6 +14,8 @@
#include "../include/4neuro.h"
int main() {
std::cout << "Running lib4neuro example 3: Use of the particle swarm method to train a set of networks sharing some edge weights" << std::endl;
std::cout << "********************************************************************************************************************" <<std::endl;
/* TRAIN DATA DEFINITION */
std::vector<std::pair<std::vector<double>, std::vector<double>>> data_vec_01, data_vec_02;
......@@ -42,11 +44,11 @@ int main() {
NeuronLinear *i1 = new NeuronLinear(); //f(x) = x
NeuronLinear *i2 = new NeuronLinear(); //f(x) = x
NeuronLinear *i3 = new NeuronLinear( ); //f(x) = x + 1
NeuronLinear *i3 = new NeuronLinear( ); //f(x) = x
/* Output neurons */
NeuronLinear *o1 = new NeuronLinear( ); //f(x) = x + 1
NeuronLinear *o2 = new NeuronLinear( ); //f(x) = x + 1
NeuronLinear *o1 = new NeuronLinear( ); //f(x) = x
NeuronLinear *o2 = new NeuronLinear( ); //f(x) = x
......@@ -57,11 +59,6 @@ int main() {
size_t idx4 = net.add_neuron(i3, BIAS_TYPE::NEXT_BIAS);
size_t idx5 = net.add_neuron(o2, BIAS_TYPE::NEXT_BIAS);
std::vector<double> *bv = net.get_parameter_ptr_biases();
for(size_t i = 0; i < 3; ++i){
bv->at(i) = 1.0;
}
/* Adding connections */
net.add_connection_simple(idx1, idx3, SIMPLE_CONNECTION_TYPE::NEXT_WEIGHT); // weight index 0
net.add_connection_simple(idx2, idx3, SIMPLE_CONNECTION_TYPE::NEXT_WEIGHT); // weight index 1
......@@ -91,40 +88,50 @@ int main() {
subnet_01_input_neurons.push_back(idx1);
subnet_01_input_neurons.push_back(idx2);
subnet_01_output_neurons.push_back(idx3);
NeuralNetwork *subnet_01 = net.get_subnet(subnet_01_input_neurons, subnet_01_output_neurons);
NeuralNetwork *subnet_01 = net.get_subnet( subnet_01_input_neurons, subnet_01_output_neurons );
subnet_02_input_neurons.push_back(idx4);
subnet_02_output_neurons.push_back(idx5);
NeuralNetwork *subnet_02 = net.get_subnet(subnet_02_input_neurons, subnet_02_output_neurons);
NeuralNetwork *subnet_02 = net.get_subnet( subnet_02_input_neurons, subnet_02_output_neurons );
/* COMPLEX ERROR FUNCTION SPECIFICATION */
MSE mse_01(subnet_01, &ds_01);
MSE mse_02(subnet_02, &ds_02);
if(subnet_01 && subnet_02){
/* COMPLEX ERROR FUNCTION SPECIFICATION */
MSE mse_01(subnet_01, &ds_01);
MSE mse_02(subnet_02, &ds_02);
ErrorSum mse_sum;
mse_sum.add_error_function( &mse_01 );
mse_sum.add_error_function( &mse_02 );
ErrorSum mse_sum;
mse_sum.add_error_function( &mse_01 );
mse_sum.add_error_function( &mse_02 );
/* TRAINING METHOD SETUP */
unsigned int max_iters = 50;
/* TRAINING METHOD SETUP */
std::vector<double> domain_bounds = {-10.0, 10.0, -10.0, 10.0,-10.0, 10.0, -10.0, 10.0,-10.0, 10.0, -10.0, 10.0,-10.0, 10.0, -10.0, 10.0,-10.0, 10.0, -10.0, 10.0,-10.0, 10.0, -10.0, 10.0,-10.0, 10.0, -10.0, 10.0};
ParticleSwarm swarm_01(&mse_sum, &domain_bounds);
/* if the maximal velocity from the previous step is less than 'gamma' times the current maximal velocity, then one
* terminating criterion is met */
double gamma = 0.5;
//must encapsulate each of the partial error functions
double domain_bounds[4] = {-800.0, 800.0, -800.0, 800.0};
/* if 'delta' times 'n' particles are in the centroid neighborhood given by the radius 'epsilon', then the second
* terminating criterion is met ('n' is the total number of particles) */
double epsilon = 0.02;
double delta = 0.9;
swarm_01.optimize(gamma, epsilon, delta);
double c1 = 0.5, c2 = 1.5, w = 0.8;
unsigned int n_particles = 100;
// printf("mse2: %d\n", mse_02.get_dimension());
ParticleSwarm swarm_01(&mse_sum, domain_bounds, c1, c2, w, n_particles, max_iters);
}
else{
std::cout << "We apologize, this example is unfinished as we are in the process of developing methods for efficient subnetwork definition" << std::endl;
}
swarm_01.optimize(0.5, 0.02, 0.9);
if(subnet_01){
delete subnet_01;
subnet_01 = nullptr;
}
// double weights[2] = {0, -0.25};
// printf("evaluation of error at (x, y) = (%f, %f): %f\n", weights[0], weights[1], mse_01.eval(weights));
if(subnet_02){
delete subnet_02;
subnet_02 = nullptr;
}
delete subnet_02;
delete subnet_01;
return 0;
}
\ No newline at end of file
......@@ -232,9 +232,95 @@ double eval_error_function(std::vector<double> &parameters, size_t n_inner_neuro
return output;
}
void test_analytical_gradient_y(std::vector<double> &guess, double accuracy, size_t n_inner_neurons, size_t train_size, bool opti_w, bool opti_b, double d1_s, double d1_e,
void eval_step_size_simple(double &gamma, double val, double prev_val, double sk, double grad_norm, double grad_norm_prev){
if(val > prev_val){
gamma *= 0.99999;
}
if(sk <= 1e-3 || grad_norm < grad_norm_prev){
/* movement on a line */
/* new slope is less steep, speed up */
gamma *= 1.0005;
}
else if(grad_norm > grad_norm_prev){
/* new slope is more steep, slow down*/
gamma /= 1.0005;
}
else{
gamma /= 1.005;
}
// gamma *= 0.999999;
}
void eval_step_size_mk( double &gamma, double beta, double &c, double grad_norm_prev, double grad_norm, double fi, double fim ){
if( fi > fim )
{
c /= 1.0000005;
}
else if( fi < fim )
{
c *= 1.0000005;
}
gamma *= std::pow( c, 1.0 - 2.0 * beta) * std::pow( grad_norm_prev / grad_norm, 1.0 / c );
}
double calculate_gradient( std::vector<double> &data_points, size_t n_inner_neurons, std::vector<double> *parameters, std::vector<double> *gradient ){
size_t i, j;
double x, mem, derror, total_error, approx;
size_t train_size = data_points.size();
/* error boundary condition: y(0) = 1 => e1 = (1 - y(0))^2 */
x = 0.0;
mem = (1.0 - eval_approx_f(x, n_inner_neurons, *parameters));
derror = 2.0 * mem;
total_error = mem * mem;
for(i = 0; i < n_inner_neurons; ++i){
(*gradient)[3 * i] -= derror * eval_approx_dw_f(x, i, *parameters);
(*gradient)[3 * i + 1] -= derror * eval_approx_da_f(x, i, *parameters);
(*gradient)[3 * i + 2] -= derror * eval_approx_db_f(x, i, *parameters);
}
/* error boundary condition: y'(0) = 1 => e2 = (1 - y'(0))^2 */
mem = (1.0 - eval_approx_df(x, n_inner_neurons, *parameters));
derror = 2.0 * mem;
total_error += mem * mem;
for(i = 0; i < n_inner_neurons; ++i){
(*gradient)[3 * i] -= derror * eval_approx_dw_df(x, i, *parameters);
(*gradient)[3 * i + 1] -= derror * eval_approx_da_df(x, i, *parameters);
(*gradient)[3 * i + 2] -= derror * eval_approx_db_df(x, i, *parameters);
}
for(j = 0; j < data_points.size(); ++j){
x = data_points[j];
/* error of the governing equation: y''(x) + 4y'(x) + 4y(x) = 0 => e3 = 1/n * (0 - y''(x) - 4y'(x) - 4y(x))^2 */
approx= eval_approx_ddf(x, n_inner_neurons, *parameters) + 4.0 * eval_approx_df(x, n_inner_neurons, *parameters) + 4.0 * eval_approx_f(x, n_inner_neurons, *parameters);
mem = 0.0 - approx;
derror = 2.0 * mem / train_size;
for(i = 0; i < n_inner_neurons; ++i){
(*gradient)[3 * i] -= derror * (eval_approx_dw_ddf(x, i, *parameters) + 4.0 * eval_approx_dw_df(x, i, *parameters) + 4.0 * eval_approx_dw_f(x, i, *parameters));
(*gradient)[3 * i + 1] -= derror * (eval_approx_da_ddf(x, i, *parameters) + 4.0 * eval_approx_da_df(x, i, *parameters) + 4.0 * eval_approx_da_f(x, i, *parameters));
(*gradient)[3 * i + 2] -= derror * (eval_approx_db_ddf(x, i, *parameters) + 4.0 * eval_approx_db_df(x, i, *parameters) + 4.0 * eval_approx_db_f(x, i, *parameters));
}
total_error += mem * mem / train_size;
}
return total_error;
}
void test_analytical_gradient_y(std::vector<double> &guess, double accuracy, size_t n_inner_neurons, size_t train_size, double d1_s, double d1_e,
size_t test_size, double ts, double te) {
std::cout << "Finding a solution via a Gradient Descent method with adaptive step-length" << std::endl;
std::cout << "********************************************************************************************************************************************" <<std::endl;
/* SETUP OF THE TRAINING DATA */
std::vector<double> inp, out;
......@@ -279,180 +365,71 @@ void test_analytical_gradient_y(std::vector<double> &guess, double accuracy, siz
for (i = 0; i < n_inner_neurons; ++i) {
wi = (*params_current)[3 * i];
ai = (*params_current)[3 * i + 1];
bi = (*params_current)[3 * i + 2];
printf("Path %3d. w = %15.8f, b = %15.8f, a = %15.8f\n", (int)(i + 1), wi, bi, ai);
}
// for (i = 0; i < n_inner_neurons; ++i) {
// wi = (*params_current)[3 * i];
// ai = (*params_current)[3 * i + 1];
// bi = (*params_current)[3 * i + 2];
//
// printf("Path %3d. w = %15.8f, b = %15.8f, a = %15.8f\n", (int)(i + 1), wi, bi, ai);
// }
gamma = 1.0;
double prev_val, val = 0.0, c = 2.0;
while( grad_norm > accuracy) {
iter_idx++;
prev_val = val;
grad_norm_prev = grad_norm;
/* current gradient */
/* reset of the current gradient */
std::fill(gradient_current->begin(), gradient_current->end(), 0.0);
val = calculate_gradient( data_points, n_inner_neurons, params_current, gradient_current );
/* error boundary condition: y(0) = 1 => e1 = (1 - y(0))^2 */
xj = 0.0;
mem = (1.0 - eval_approx_f(xj, n_inner_neurons, *params_current));
derror = 2.0 * mem;
total_error = mem * mem;
for(i = 0; i < n_inner_neurons; ++i){
if(opti_w){
(*gradient_current)[3 * i] -= derror * eval_approx_dw_f(xj, i, *params_current);
(*gradient_current)[3 * i + 1] -= derror * eval_approx_da_f(xj, i, *params_current);
}
if(opti_b){
(*gradient_current)[3 * i + 2] -= derror * eval_approx_db_f(xj, i, *params_current);
}
grad_norm = 0.0;
for(auto v: *gradient_current){
grad_norm += v * v;
}
// for(auto e: *gradient_current){
// printf("[%10.8f]", e);
// }
// printf("\n");
grad_norm = std::sqrt(grad_norm);
/* error boundary condition: y'(0) = 1 => e2 = (1 - y'(0))^2 */
mem = (1.0 - eval_approx_df(xj, n_inner_neurons, *params_current));
derror = 2.0 * mem;
total_error += mem * mem;
for(i = 0; i < n_inner_neurons; ++i){
if(opti_w){
(*gradient_current)[3 * i] -= derror * eval_approx_dw_df(xj, i, *params_current);
(*gradient_current)[3 * i + 1] -= derror * eval_approx_da_df(xj, i, *params_current);
}
if(opti_b){
(*gradient_current)[3 * i + 2] -= derror * eval_approx_db_df(xj, i, *params_current);
}
}
// for(auto e: *gradient_current){
// printf("[%10.8f]", e);
// }
// printf("\n");
for(j = 0; j < data_points.size(); ++j){
xj = data_points[j];
// xj = ds.get_data()->at(j).first[0];
/* error of the governing equation: y''(x) + 4y'(x) + 4y(x) = 0 => e3 = 1/n * (0 - y''(x) - 4y'(x) - 4y(x))^2 */
approx= eval_approx_ddf(xj, n_inner_neurons, *params_current) + 4.0 * eval_approx_df(xj, n_inner_neurons, *params_current) + 4.0 * eval_approx_f(xj, n_inner_neurons, *params_current);
mem = 0.0 - approx;
error = 2.0 * mem / train_size;
for(i = 0; i < n_inner_neurons; ++i){
if(opti_w){
(*gradient_current)[3 * i] -= error * (eval_approx_dw_ddf(xj, i, *params_current) + 4.0 * eval_approx_dw_df(xj, i, *params_current) + 4.0 * eval_approx_dw_f(xj, i, *params_current));
(*gradient_current)[3 * i + 1] -= error * (eval_approx_da_ddf(xj, i, *params_current) + 4.0 * eval_approx_da_df(xj, i, *params_current) + 4.0 * eval_approx_da_f(xj, i, *params_current));
}
if(opti_b){
(*gradient_current)[3 * i + 2] -= error * (eval_approx_db_ddf(xj, i, *params_current) + 4.0 * eval_approx_db_df(xj, i, *params_current) + 4.0 * eval_approx_db_f(xj, i, *params_current));
}
}
total_error += mem * mem / train_size;
/* Update of the parameters */
/* step length calculation */
if(iter_idx < 10 || iter_idx % 100 == 0){
/* fixed step length */
gamma = 0.1 * accuracy;
}
else{
// for(auto e: *gradient_current){
// printf("[%10.8f]", e);
// }
// printf("\n");
/* conjugate direction coefficient (Polak-Ribiere): <-grad_curr, -grad_curr + grad_prev> / <-grad_prev, -grad_prev >*/
/* Update of the parameters */
/* norm of the gradient calculation */
if(iter_idx < 10 || iter_idx % 1 == 0){
for(i = 0; i < conjugate_direction_current->size(); ++i){
(*conjugate_direction_current)[i] = - (*gradient_current)[i];
sk = 0.0;
for(i = 0; i < gradient_current->size(); ++i){
sx = (*gradient_current)[i] - (*gradient_prev)[i];
sk += sx * sx;
}
}
else{
/* conjugate gradient */
sk = sy = 0.0;
for(i = 0; i < conjugate_direction_current->size(); ++i){
sk += (*gradient_current)[i] * ((*gradient_current)[i] - (*gradient_prev)[i]);
sy += (*gradient_prev)[i] * (*gradient_prev)[i];
}
beta = std::max(0.0, sk / sy);
sk = std::sqrt(sk);
/* update of the conjugate direction */
for(i = 0; i < conjugate_direction_current->size(); ++i){
(*conjugate_direction_current)[i] = beta * (*conjugate_direction_prev)[i] - (*gradient_current)[i];
/* angle between two consecutive gradients */
double beta = 0.0, sx = 0.0;
for(i = 0; i < gradient_current->size(); ++i){
sx += (gradient_current->at( i ) * gradient_prev->at( i ));
}
}
/* step length calculation */
if(iter_idx < 10){
/* fixed step length */
gamma = 0.000001;
}
else{
// /* Barzilai-Borwein */
// sk = sy = 0.0;
//
// for(i = 0; i < gradient_current->size(); ++i){
// sx = (*params_current)[i] - (*params_prev)[i];
// sg = (*conjugate_direction_current)[i] - (*conjugate_direction_prev)[i];
//
// sk += sx * sg;
// sy += sg * sg;
// }
//
// gamma = -sk / sy;
sx /= grad_norm * grad_norm_prev;
beta = std::sqrt(std::acos( sx ) / PI);
// /* Line search */
//
// gamma = line_search(10.0, conjugate_direction, *params_current, *gradient_current, n_inner_neurons, ds_00);
// eval_step_size_simple( gamma, val, prev_val, sk, grad_norm, grad_norm_prev );
eval_step_size_mk( gamma, beta, c, grad_norm_prev, grad_norm, val, prev_val );
}
// for(auto e: conjugate_direction){
// printf("[%10.8f]", e);