diff --git a/src/LearningMethods/LearningSequence.cpp b/src/LearningMethods/LearningSequence.cpp index fbfa7f8014599a09ce033a624292d20aa3171f39..4192ecce3eb05952ce13934cd7de46a562c393a1 100644 --- a/src/LearningMethods/LearningSequence.cpp +++ b/src/LearningMethods/LearningSequence.cpp @@ -57,5 +57,6 @@ namespace lib4neuro { } COUT_DEBUG( "Cycle: " << cycle_idx << ", the lowest error: " << the_best_error << std::endl ); } + ef.get_network_instance()->copy_parameter_space( this->best_parameters ); } } \ No newline at end of file diff --git a/src/LearningMethods/LevenbergMarquardt.cpp b/src/LearningMethods/LevenbergMarquardt.cpp index 28e811bac7103c053893897c10470fb69b797da6..e411df5465bed9002b938281bf4830f80b555bec 100644 --- a/src/LearningMethods/LevenbergMarquardt.cpp +++ b/src/LearningMethods/LevenbergMarquardt.cpp @@ -32,41 +32,72 @@ struct lib4neuro::LevenbergMarquardt::LevenbergMarquardtImpl { std::vector<double> *optimal_parameters; /** - * Returning Jacobian matrix of the residual function using the central differences method, i.e. - * f'(x) = (f(x + delta) - f(x - delta)) / 2*delta + * Returning Jacobian matrix of the residual function using the backpropagation algorithm * * @param h Step size * @return Jacobian matrix */ - arma::Mat<double>* get_Jacobian_matrix(lib4neuro::ErrorFunction& ef, - arma::Mat<double>* J, - double h=1e-3); + void get_Jacobian_matrix(lib4neuro::NeuralNetwork &f, arma::Mat<double> &J, DataSet &data); + + /** + * Returns the right hand side of the resulting system of equations related to data errors in @data and approximating function @f + * @param f + * @param rhs + * @param data + */ + void get_rhs_vector(lib4neuro::NeuralNetwork &f, arma::Col<double> &rhs, DataSet &data); }; -arma::Mat<double>* lib4neuro::LevenbergMarquardt::LevenbergMarquardtImpl::get_Jacobian_matrix( - lib4neuro::ErrorFunction& ef, - arma::Mat<double>* J, - double h) { +void lib4neuro::LevenbergMarquardt::LevenbergMarquardtImpl::get_Jacobian_matrix( + lib4neuro::NeuralNetwork &f, arma::Mat<double> &J, DataSet &data) { + + size_t n_parameters = f.get_n_weights() + f.get_n_biases(); + size_t n_data_points = data.get_n_elements(); - size_t n_parameters = ef.get_dimension(); - size_t n_data_points = ef.get_dataset()->get_n_elements(); - std::vector<std::pair<std::vector<double>, std::vector<double>>>* data = ef.get_dataset()->get_data(); std::vector<double> grad(n_parameters); - std::vector<double> *parameters = ef.get_parameters(); - std::vector<double> error_scaling(ef.get_network_instance()->get_n_outputs()); - std::fill(error_scaling.begin(), error_scaling.end(), 1.0); + std::vector<double> error_scaling(f.get_n_outputs()); + std::fill(error_scaling.begin(), error_scaling.end(), 1.0 ); for(size_t i = 0; i < n_data_points; i++) { std::fill(grad.begin(), grad.end(), 0.0); - ef.get_network_instance()->add_to_gradient_single(data->at(i).first, error_scaling, 1.0, grad); + f.add_to_gradient_single(data.get_data()->at(i).first, error_scaling, 1.0, grad); for(size_t j = 0; j < n_parameters; j++) { - J->at(i, j) = grad.at(j); + J.at(i, j) = grad[ j ]; } } +} + +void lib4neuro::LevenbergMarquardt::LevenbergMarquardtImpl::get_rhs_vector( + lib4neuro::NeuralNetwork &f, arma::Col<double> &rhs, DataSet &data) { + + size_t n_parameters = f.get_n_weights() + f.get_n_biases(); + size_t n_data_points = data.get_n_elements(); + size_t dim_out = f.get_n_outputs(); + + std::vector<double> grad(n_parameters); + std::vector<double> error_scaling(f.get_n_outputs()); + + rhs.resize(n_parameters); + std::fill(rhs.begin(), rhs.end(), 0.0); + + for(size_t i = 0; i < n_data_points; i++) { + + std::fill(grad.begin(), grad.end(), 0.0); + + f.eval_single(data.get_data()->at(i).first, error_scaling); - return J; + for( size_t oi = 0; oi < dim_out; ++oi ){ + error_scaling[ oi ] = data.get_data()->at(i).second.at( oi ) - error_scaling[ oi ]; + } + + f.add_to_gradient_single(data.get_data()->at(i).first, error_scaling, 1.0, grad); + + for(size_t j = 0; j < n_parameters; j++) { + rhs[ j ] += grad[ j ]; + } + } } namespace lib4neuro { @@ -87,6 +118,7 @@ namespace lib4neuro { this->p_impl->lambda_increase = lambda_increase; this->p_impl->lambda_decrease = lambda_decrease; this->p_impl->maximum_niters = max_iters; + this->p_impl->optimal_parameters = new std::vector<double>(); } void LevenbergMarquardt::optimize(lib4neuro::ErrorFunction& ef, @@ -106,43 +138,49 @@ namespace lib4neuro { double current_err = ef.eval(); - COUT_INFO("Finding a solution via a Levenberg-Marquardt method with adaptive step-length... Starting error: " << current_err << std::endl); + COUT_INFO("Finding a solution via a Levenberg-Marquardt method... Starting error: " << current_err << std::endl); if(ofs && ofs->is_open()) { - *ofs << "Finding a solution via a Levenberg-Marquardt method with adaptive step-length... Starting error: " << current_err << std::endl; + *ofs << "Finding a solution via a Levenberg-Marquardt method... Starting error: " << current_err << std::endl; } size_t n_parameters = ef.get_dimension(); size_t n_data_points = ef.get_dataset()->get_n_elements(); - std::vector<double> *gradient_current = new std::vector<double>(n_parameters); - std::fill(gradient_current->begin(), gradient_current->end(), 0.0); - std::vector<double> *gradient_prev = new std::vector<double>(n_parameters); - std::fill(gradient_prev->begin(), gradient_prev->end(), 0.0); std::vector<double> *params_current = ef.get_parameters(); std::vector<double> *params_tmp = new std::vector<double>(n_parameters); arma::Mat<double> J(n_data_points, n_parameters); // Jacobian matrix arma::Mat<double> H(n_data_points, n_parameters); // Hessian matrix arma::Mat<double> H_new(n_data_points, n_parameters); - std::vector<std::pair<std::vector<double>, std::vector<double>>>* data = ef.get_dataset()->get_data(); - double lambda = this->p_impl->lambda_initial; // Dumping parameter - double prev_err; + double prev_err = 0, update_norm = 0, gradient_norm = 0, mem_double = 0; bool update_J = true; arma::Col<double> update; + arma::Col<double> rhs; std::vector<double> d_prep(n_data_points); - arma::Col<double>* d; + arma::Col<double> d; + double slowdown_coeff = 0.25; //-------------------// // Solver iterations // //-------------------// size_t iter_counter = 0; do { + if(update_J) { /* Get Jacobian matrix */ - //TODO upravit tak aby nebylo zavisle na pouzite chybove funkci - this->p_impl->get_Jacobian_matrix(ef, &J); + this->p_impl->get_Jacobian_matrix(*ef.get_network_instance(), J, *ef.get_dataset()); + this->p_impl->get_rhs_vector(*ef.get_network_instance(), rhs, *ef.get_dataset()); + + gradient_norm = 0; + + for( size_t ci = 0; ci < n_parameters; ++ci ){ + mem_double = rhs[ ci ]; + mem_double *= mem_double; + gradient_norm += mem_double; + } + gradient_norm = std::sqrt(gradient_norm) / J.n_rows; /* Get approximation of Hessian (H ~ J'*J) */ H = J.t() * J; @@ -152,22 +190,20 @@ namespace lib4neuro { } /* H_new = H + lambda*I */ - H_new = H + lambda * arma::eye<arma::Mat<double>>(n_parameters, n_parameters); +// H_new = H + lambda * arma::diagmat( H ); + H_new = H + lambda * arma::eye( n_parameters, n_parameters ); - /* Compute the residual vector */ - for(size_t i = 0; i < n_data_points; i++) { - //TODO upravit aby nebylo zavisle na pouzite chybove funkci - d_prep.at(i) = ef.calculate_single_residual(&data->at(i).first, &data->at(i).second); - } - d = new arma::Col<double> (d_prep); /* Compute the update vector */ - update = -H_new.i()*(J.t()*(*d)); + update = arma::solve(H_new, rhs); /* Compute the error after update of parameters */ + update_norm = 0.0; for(size_t i = 0; i < n_parameters; i++) { - params_tmp->at(i) = update.at(i); + params_tmp->at(i) = params_current->at(i) + update.at(i); + update_norm += update.at(i) * update.at(i); } + update_norm = std::sqrt(update_norm); current_err = ef.eval(params_tmp); /* Check, if the parameter update improved the function */ @@ -191,21 +227,27 @@ namespace lib4neuro { prev_err = current_err; update_J = true; -// COUT_DEBUG("Iteration: " << iter_counter << " Current error: " << current_err << std::endl); +// COUT_DEBUG("Iteration: " << iter_counter << " Current error: " << current_err << ", Current gradient norm: " << gradient_norm << ", Direction norm: " << update_norm << std::endl); } else { /* If the error after parameters update is not lower, increase the damping factor lambda */ update_J = false; lambda *= this->p_impl->lambda_increase; } + COUT_DEBUG("Iteration: " << iter_counter << " Current error: " << current_err << ", Current gradient norm: " << gradient_norm << ", Direction norm: " << update_norm << "\r"); - COUT_DEBUG("Iteration: " << iter_counter << " Current error: " << current_err << std::endl); - }while(iter_counter++ < this->p_impl->maximum_niters); + }while(iter_counter++ < this->p_impl->maximum_niters && update_norm > this->p_impl->tolerance); + COUT_DEBUG("Iteration: " << iter_counter << " Current error: " << current_err << ", Current gradient norm: " << gradient_norm << ", Direction norm: " << update_norm << std::endl); /* Store the optimized parameters */ - this->p_impl->optimal_parameters = params_current; + *this->p_impl->optimal_parameters = *params_current; + + ef.get_network_instance()->copy_parameter_space(this->p_impl->optimal_parameters); + + delete params_tmp; + } std::vector<double>* LevenbergMarquardt::get_parameters() { diff --git a/src/LearningMethods/ParticleSwarm.cpp b/src/LearningMethods/ParticleSwarm.cpp index 05c41dde55319cef9b1082381355f6b41732471d..276b518917fb8c777109273a68b4558725b3e801 100644 --- a/src/LearningMethods/ParticleSwarm.cpp +++ b/src/LearningMethods/ParticleSwarm.cpp @@ -36,9 +36,8 @@ void Particle::randomize_coordinates() { std::random_device seeder; std::mt19937 gen(seeder()); - this->domain_bounds = domain_bounds; for(unsigned int i = 0; i < this->coordinate_dim; ++i){ - std::uniform_real_distribution<double> dist_coord(-1, 1); + std::uniform_real_distribution<double> dist_coord(this->domain_bounds->at(2 * i), this->domain_bounds->at(2 * i + 1)); (*this->coordinate)[i] = dist_coord(gen); } } @@ -65,7 +64,44 @@ void Particle::randomize_velocity() { Particle::Particle(lib4neuro::ErrorFunction *ef, std::vector<double> *domain_bounds) { this->ef = ef; - this->domain_bounds = domain_bounds; + this->domain_bounds = new std::vector<double>(*domain_bounds); + this->coordinate_dim = ef->get_dimension(); + this->ef = ef; + + this->coordinate = new std::vector<double>(this->coordinate_dim); + this->velocity = new std::vector<double>(this->coordinate_dim); + this->optimal_coordinate = new std::vector<double>(this->coordinate_dim); + + + this->randomize_velocity(); + this->randomize_parameters(); + this->randomize_coordinates(); + + for(unsigned int i = 0; i < this->coordinate_dim; ++i){ + (*this->optimal_coordinate)[i] = (*this->coordinate)[i]; + } + + this->optimal_value = this->ef->eval(this->coordinate); + +} + +Particle::Particle(lib4neuro::ErrorFunction *ef, std::vector<double> *central_system, double dispersion_coeff) { + + this->ef = ef; + + if( this->domain_bounds ){ + delete this->domain_bounds; + } + + this->domain_bounds = new std::vector<double>(2 * central_system->size()); +// return; + + + for( size_t i = 0; i < central_system->size(); ++i ){ + this->domain_bounds->at(2 * i) = central_system->at( i ) - dispersion_coeff; + this->domain_bounds->at(2 * i + 1) = central_system->at( i ) + dispersion_coeff; + } + this->coordinate_dim = ef->get_dimension(); this->ef = ef; @@ -228,6 +264,10 @@ namespace lib4neuro { this->particle_swarm = nullptr; } + if( this->domain_bounds ){ + delete this->domain_bounds; + } + if (this->p_min_glob) { delete this->p_min_glob; this->p_min_glob = nullptr; @@ -257,13 +297,16 @@ namespace lib4neuro { this->func_dim = ef.get_dimension(); + + /* initialize the particles */ for (size_t pi = 0; pi < this->n_particles; ++pi) { if (this->particle_swarm->at(pi)) { delete this->particle_swarm->at(pi); } - this->particle_swarm->at(pi) = new Particle(&ef, this->domain_bounds); + this->particle_swarm->at(pi) = new Particle(&ef, ef.get_parameters(), this->radius_factor); } + this->radius_factor *= 1.25; if (!this->p_min_glob) { this->p_min_glob = new std::vector<double>(this->func_dim); @@ -503,7 +546,7 @@ namespace lib4neuro { this->n_particles = n_particles; this->iter_max = iter_max; this->particle_swarm = new std::vector<Particle *>(this->n_particles); - this->domain_bounds = domain_bounds; + this->domain_bounds = new std::vector<double>(*domain_bounds); std::fill(this->particle_swarm->begin(), this->particle_swarm->end(), nullptr); } diff --git a/src/LearningMethods/ParticleSwarm.h b/src/LearningMethods/ParticleSwarm.h index b6b8d9318debfc1a84d93f671836f728b53c5898..45ed15002dea8ff222d42cbc0b277bc62e6785db 100644 --- a/src/LearningMethods/ParticleSwarm.h +++ b/src/LearningMethods/ParticleSwarm.h @@ -33,9 +33,9 @@ private: double current_val; - lib4neuro::ErrorFunction *ef; + lib4neuro::ErrorFunction *ef = nullptr; - std::vector<double> *domain_bounds; + std::vector<double> *domain_bounds = nullptr; void randomize_coordinates(); @@ -56,6 +56,15 @@ public: * @param f_dim */ LIB4NEURO_API Particle(lib4neuro::ErrorFunction* ef, std::vector<double> *domain_bounds); + + /** + * + * @param ef + * @param central_system + * @param dispersion_coeff + */ + LIB4NEURO_API Particle(lib4neuro::ErrorFunction* ef, std::vector<double> *central_system, double dispersion_coeff); + LIB4NEURO_API ~Particle( ); /** @@ -166,6 +175,11 @@ namespace lib4neuro { */ double delta; + /** + * increases the range of the particle dispersion with each consecutive run of @this->optimize + */ + double radius_factor = 1.0; + /** * Error threshold - determines a successful convergence * diff --git a/src/examples/simulator.cpp b/src/examples/simulator.cpp index 1126529bb547fadaa6f05ec20a5d4fc24118d79d..451a1501d35cb6f195ed85eafef73d3715999920 100644 --- a/src/examples/simulator.cpp +++ b/src/examples/simulator.cpp @@ -17,12 +17,15 @@ int main(int argc, char** argv) { bool normalize_data = true; double prec = 1e-9; + double prec_lm = 1e-15; int restart_interval = 500; int max_n_iters_gradient = 10000; - int max_n_iters_swarm = 50; - int n_particles_swarm = 100; + int max_n_iters_gradient_lm = 10000; + + int max_n_iters_swarm = 20; + int n_particles_swarm = 200; int batch_size = 0; - int max_number_of_cycles = 10; + int max_number_of_cycles = 5; try { /* PHASE 1 - TRAINING DATA LOADING, NETWORK ASSEMBLY AND PARTICLE SWARM OPTIMIZATION */ l4n::CSVReader reader1("/home/fluffymoo/Dropbox/data_BACK_RH_1.csv", ";", true); // File, separator, skip 1st line @@ -39,7 +42,7 @@ int main(int argc, char** argv) { } /* Numbers of neurons in layers (including input and output layers) */ - std::vector<unsigned int> neuron_numbers_in_layers = { 1, 2, 1 }; + std::vector<unsigned int> neuron_numbers_in_layers = { 1, 6, 6, 1 }; /* Fully connected feed-forward network with linear activation functions for input and output */ /* layers and the specified activation fns for the hidden ones (each entry = layer)*/ @@ -75,18 +78,21 @@ int main(int argc, char** argv) { 0.7, n_particles_swarm, max_n_iters_swarm); + // Parameters of the gradient descent // 1) Threshold for the successful ending of the optimization - deviation from minima // 2) Number of iterations to reset step size to tolerance/10.0 // 3) Maximal number of iterations - optimization will stop after that, even if not converged + l4n::GradientDescent gs_(prec, restart_interval, max_n_iters_gradient, batch_size); l4n::GradientDescentBB gs(prec, restart_interval, max_n_iters_gradient, batch_size); - l4n::GradientDescentSingleItem gs_si(prec, 0, 5000); - l4n::LevenbergMarquardt leven(max_n_iters_gradient); + l4n::GradientDescentSingleItem gs_si(prec, 0, 5000);//TODO needs improvement + l4n::LevenbergMarquardt leven(max_n_iters_gradient_lm, prec_lm); l4n::LearningSequence learning_sequence( 1e-6, max_number_of_cycles ); learning_sequence.add_learning_method( &ps ); // learning_sequence.add_learning_method( &gs ); learning_sequence.add_learning_method( &leven ); +// learning_sequence.add_learning_method( &gs_ ); // learning_sequence.add_learning_method( &gs_si ); // learning_sequence.add_learning_method( &gs ); @@ -137,7 +143,7 @@ int main(int argc, char** argv) { // /* Error function */ l4n::MSE mse3(&nn3, &ds3); // First parameter - neural network, second parameter - data-set - mse3.eval_on_data_set(&ds3, &output_file); + mse3.eval_on_data_set(&ds3, &output_file, nullptr, normalize_data, true); /* Close the output file for writing */ output_file.close();