From d6969095ed6996629b2c69fe90c4766a789ffc31 Mon Sep 17 00:00:00 2001 From: Michal Kravcenko <michal.kravcenko@vsb.cz> Date: Wed, 20 Feb 2019 14:00:33 +0100 Subject: [PATCH] ADD+MOD: added two new learning methods and a new learning method able to cycle through user defined sequence of learning methods --- src/LearningMethods/GradientDescent.cpp | 129 +++++++++- src/LearningMethods/GradientDescent.h | 18 ++ src/LearningMethods/GradientDescentBB.cpp | 229 ++++++++++++++++++ src/LearningMethods/GradientDescentBB.h | 91 +++++++ .../GradientDescentSingleItem.cpp | 106 ++++++++ .../GradientDescentSingleItem.h | 106 ++++++++ src/LearningMethods/ILearningMethods.cpp | 3 +- src/LearningMethods/ILearningMethods.h | 4 +- src/LearningMethods/LearningSequence.cpp | 61 +++++ src/LearningMethods/LearningSequence.h | 80 ++++++ src/LearningMethods/ParticleSwarm.cpp | 8 +- 11 files changed, 817 insertions(+), 18 deletions(-) create mode 100644 src/LearningMethods/GradientDescentBB.cpp create mode 100644 src/LearningMethods/GradientDescentBB.h create mode 100644 src/LearningMethods/GradientDescentSingleItem.cpp create mode 100644 src/LearningMethods/GradientDescentSingleItem.h create mode 100644 src/LearningMethods/LearningSequence.cpp create mode 100644 src/LearningMethods/LearningSequence.h diff --git a/src/LearningMethods/GradientDescent.cpp b/src/LearningMethods/GradientDescent.cpp index ee8cec51..44412b60 100644 --- a/src/LearningMethods/GradientDescent.cpp +++ b/src/LearningMethods/GradientDescent.cpp @@ -5,6 +5,7 @@ * @date 30.7.18 - */ +#include <random.hpp> #include "GradientDescent.h" #include "message.h" @@ -42,6 +43,46 @@ namespace lib4neuro { } + bool GradientDescent::perform_feasible_1D_step( + lib4neuro::ErrorFunction &ef, + double error_previous, + double step_coefficient, + std::vector<double> *direction, + std::vector<double> *parameters_before, + std::vector<double> *parameters_after + ) { + + size_t i; + + boost::random::mt19937 gen(std::time(0)); + boost::random::uniform_int_distribution<> dis(0, direction->size()); + size_t max_dir_idx = dis(gen); + + double error_current = error_previous + 1.0; + while( error_current >= error_previous ){ + (*parameters_after)[max_dir_idx] = (*parameters_before)[max_dir_idx] - step_coefficient * (*direction)[max_dir_idx]; + + error_current = ef.eval( parameters_after ); + if( step_coefficient < 1e-32){ +// COUT_DEBUG(" Attempting to find a feasible direction in one dimension was NOT SUCCESSFUL" << std::endl); + for (i = 0; i < direction->size(); ++i) { + (*parameters_after)[i] = (*parameters_before)[i] - step_coefficient * (*direction)[i]; + } + return false; + } + else{ + if( error_current >= error_previous ){ +// COUT_DEBUG(" Incorrect step size! Reducing it by a factor of 0.5. Errors: " << error_current << ", prev: " << error_previous << std::endl); + step_coefficient *= 0.5; + } + else{ +// COUT_DEBUG(" Step OK" << std::endl); + } + } + } + return true; + } + void GradientDescent::optimize(lib4neuro::ErrorFunction &ef, std::ofstream* ofs) { /* Copy data set max and min values, if it's normalized */ @@ -51,9 +92,11 @@ namespace lib4neuro { } COUT_INFO("Finding a solution via a Gradient Descent method with adaptive step-length..." << std::endl); + COUT_INFO("Initial error: " << ef.eval() << std::endl); if(ofs && ofs->is_open()) { *ofs << "Finding a solution via a Gradient Descent method with adaptive step-length..." << std::endl; + *ofs << "Initial error: " << ef.eval() << std::endl; } double grad_norm = this->tolerance * 10.0, gamma, sx, beta; @@ -80,6 +123,12 @@ namespace lib4neuro { std::fill(gradient_current->begin(), gradient_current->end(), 0.0); std::fill(gradient_prev->begin(), gradient_prev->end(), 0.0); + + val = ef.eval(params_current); + double coeff = 1; + bool it_analyzed = false; + size_t counter_good_guesses = 0, counter_bad_guesses = 0, counter_simplified_direction_good = 0, counter_simplified_direction_bad = 0; + double cooling = 1.0; while (grad_norm > this->tolerance && (iter_idx != 0)) { iter_idx--; iter_counter++; @@ -105,10 +154,12 @@ namespace lib4neuro { /* Update of the parameters */ /* step length calculation */ - if (iter_counter < 10 || iter_counter % this->restart_frequency == 0) { + if (iter_counter < 10 || iter_counter % this->restart_frequency == 0 ) { /* fixed step length */ gamma = 0.1 * this->tolerance; - //gamma = 0.001; +// gamma = 1 / grad_norm; +// gamma = 1e-4; + cooling = 1.0; } else { /* angle between two consecutive gradients */ sx = 0.0; @@ -128,8 +179,54 @@ namespace lib4neuro { } for (i = 0; i < gradient_current->size(); ++i) { - (*params_prev)[i] = (*params_current)[i] - gamma * (*gradient_current)[i]; + (*params_prev)[i] = (*params_current)[i] - cooling * gamma * (*gradient_current)[i]; } + val = ef.eval(params_prev); + +// val = prev_val + 1.0; +// coeff = 1; +// it_analyzed = false; +// while(val >= prev_val){ +// for (i = 0; i < gradient_current->size(); ++i) { +// (*params_prev)[i] = (*params_current)[i] - coeff * gamma * (*gradient_current)[i]; +// } +// val = ef.eval(params_prev); +// +// +// if( coeff < 1e-32){ +//// COUT_DEBUG("Error, the advised gradient direction is not feasible. Attempting to find a feasible direction in one dimension" << std::endl); +// if( !this->perform_feasible_1D_step(ef, prev_val, gamma, gradient_current, params_current, params_prev) ){ +// gamma = 1; +// counter_simplified_direction_bad++; +// } +// else{ +// gamma = 1; +// counter_simplified_direction_good++; +// } +// +// break; +// } +// else{ +// if( val >= prev_val ){ +//// COUT_DEBUG("Incorrect step size! Reducing gamma. Errors: " << val << ", prev: " << prev_val << std::endl); +// coeff *= 0.5; +// +// if( !it_analyzed ){ +// counter_bad_guesses++; +// } +// } +// else{ +//// COUT_DEBUG("Step OK" << std::endl); +// if( !it_analyzed ){ +// counter_good_guesses++; +// } +// } +// } +// it_analyzed = true; +// } +// gamma *= coeff; + + /* switcheroo */ ptr_mem = gradient_prev; @@ -140,22 +237,31 @@ namespace lib4neuro { params_prev = params_current; params_current = ptr_mem; - val = ef.eval(params_current); COUT_DEBUG(std::string("Iteration: ") << (unsigned int)(iter_counter) - << ". Step size: " << gamma + << ". Step size: " << gamma * cooling << ". C: " << c << ". Gradient norm: " << grad_norm << ". Total error: " << val << ".\r" ); WRITE_TO_OFS_DEBUG(ofs, "Iteration: " << (unsigned int)(iter_counter) - << ". Step size: " << gamma + << ". Step size: " << gamma * cooling << ". C: " << c << ". Gradient norm: " << grad_norm << ". Total error: " << val << "." << std::endl); +// if(iter_counter % 100 == 0){ +// COUT_INFO(std::string("Iteration: ") << (unsigned int)(iter_counter) +// << ". Step size: " << gamma +// << ". C: " << c +// << ". Gradient norm: " << grad_norm +// << ". Total error: " << val +// << ".\r"); +// } + + cooling *= 0.9999; } COUT_DEBUG(std::string("Iteration: ") << (unsigned int)(iter_counter) @@ -164,19 +270,20 @@ namespace lib4neuro { << ". Gradient norm: " << grad_norm << ". Total error: " << val << "." << std::endl); + COUT_DEBUG("Number of total steps: " << counter_bad_guesses + counter_good_guesses << ", good: " << counter_good_guesses << ", bad: " << counter_bad_guesses << ", from which " << counter_simplified_direction_good + counter_simplified_direction_bad << " were attempted by simplified direction, success: " << counter_simplified_direction_good << ", fail: " << counter_simplified_direction_bad << std::endl << std::endl ); if(iter_idx == 0) { - COUT_INFO("Maximum number of iterations (" << this->maximum_niters << ") was reached!" << std::endl); + COUT_INFO(std::endl << "Maximum number of iterations (" << this->maximum_niters << ") was reached! Final error: " << val << std::endl); if(ofs && ofs->is_open()) { - *ofs << "Maximum number of iterations (" << this->maximum_niters << ") was reached!" << std::endl; + *ofs << "Maximum number of iterations (" << this->maximum_niters << ") was reached! Final error: " << val << std::endl; } } else { - COUT_INFO("Gradient Descent method converged after " - << this->maximum_niters-iter_idx - << " iterations." + COUT_INFO(std::endl << "Gradient Descent method converged after " + << this->maximum_niters - iter_idx + << " iterations. Final error:" << val << std::endl); #ifdef L4N_DEBUG if(ofs && ofs->is_open()) { diff --git a/src/LearningMethods/GradientDescent.h b/src/LearningMethods/GradientDescent.h index 7936937f..bd75a317 100644 --- a/src/LearningMethods/GradientDescent.h +++ b/src/LearningMethods/GradientDescent.h @@ -83,6 +83,24 @@ namespace lib4neuro { double fi, double fim); + /** + * Analyses direction of parameters change and performs the most feasible step in one parameter + * @param ef[in] error function to be optimized + * @param error_previous[in] evaluation of the error function on the @parameters_before state + * @param step_coefficient[in] scalar value denoting the scaling of the step in one direction + * @param direction direction[in] vector to be analyzed + * @param parameters_before[in] state of the parameter space before analysis + * @param parameters_after[out] suggested state of the parameters after the analysis completes + */ + virtual bool perform_feasible_1D_step( + lib4neuro::ErrorFunction &ef, + double error_previous, + double step_coefficient, + std::vector<double> * direction, + std::vector<double> *parameters_before, + std::vector<double> *parameters_after + ); + public: /** diff --git a/src/LearningMethods/GradientDescentBB.cpp b/src/LearningMethods/GradientDescentBB.cpp new file mode 100644 index 00000000..3f86452e --- /dev/null +++ b/src/LearningMethods/GradientDescentBB.cpp @@ -0,0 +1,229 @@ +/** + * DESCRIPTION OF THE FILE + * + * @author Michal KravÄŤenko + * @date 4.2.19 - + */ + +#include "GradientDescentBB.h" +#include "message.h" + +namespace lib4neuro { + GradientDescentBB::GradientDescentBB(double epsilon, size_t n_to_restart, int max_iters, size_t batch) { + this->tolerance = epsilon; + this->restart_frequency = n_to_restart; + this->optimal_parameters = new std::vector<double>(0); + this->maximum_niters = max_iters; + this->batch = batch; + } + + GradientDescentBB::~GradientDescentBB() { + if (this->optimal_parameters) { + delete this->optimal_parameters; + this->optimal_parameters = nullptr; + } + } + + + void GradientDescentBB::optimize(lib4neuro::ErrorFunction &ef, std::ofstream* ofs) { + + /* Copy data set max and min values, if it's normalized */ + if(ef.get_dataset()->is_normalized()) { + ef.get_network_instance()->set_normalization_strategy_instance( + ef.get_dataset()->get_normalization_strategy()); + } + + COUT_INFO("Finding a solution via a Gradient Descent method with adaptive step-length..." << std::endl); + COUT_INFO("Initial error: " << ef.eval() << std::endl); + + if(ofs && ofs->is_open()) { + *ofs << "Finding a solution via a Gradient Descent method with adaptive step-length..." << std::endl; + *ofs << "Initial error: " << ef.eval() << std::endl; + } + + double grad_norm = this->tolerance * 10.0, gamma, sx, beta; + double grad_norm_prev; + size_t i; + long long int iter_idx = this->maximum_niters; + size_t iter_counter = 0; + + gamma = 1.0; + double prev_val, val = 0.0, c = 1.25, val_best; + + size_t n_parameters = ef.get_dimension(); + + + std::vector<double> *gradient_current = new std::vector<double>(n_parameters); + std::vector<double> *gradient_prev = new std::vector<double>(n_parameters); + std::vector<double> *params_current = ef.get_parameters(); + std::vector<double> *params_prev = new std::vector<double>(n_parameters); + std::vector<double> *params_best = new std::vector<double>(*params_current); + + std::vector<double> *ptr_mem; + + double alpha = -1.0, cc, gg; + std::vector<double> dot__( 3 ); + double d1 = 0.0, d2 = 0.0, d3 = 0.0; + + + std::fill(gradient_current->begin(), gradient_current->end(), 0.0); + std::fill(gradient_prev->begin(), gradient_prev->end(), 0.0); + val = ef.eval(params_current); + val_best = val; + +// this-> batch = 0; + double cooling_factor = 1.0; + while (grad_norm > this->tolerance && (iter_idx != 0)) { + iter_idx--; + iter_counter++; + prev_val = val; + grad_norm_prev = grad_norm; + + /* reset of the current gradient */ + std::fill(gradient_current->begin(), gradient_current->end(), 0.0); +// std::fill(gradient_mem.begin(), gradient_mem.end(), 0.0); + ef.calculate_error_gradient(*params_current, *gradient_current, 1.0, this->batch); +// double error_analytical = this->calculate_gradient( ef.get_dataset()->get_data(), (size_t)2, params_current, gradient_current ); + +// for(size_t k = 0; k < gradient_mem.size(); ++k){ +// printf("%f : %f\n", gradient_mem[ k ], gradient_current->at( k )); +// } +// printf("---------------------\n"); + + grad_norm = 0.0; + for (auto v: *gradient_current) { + grad_norm += v * v; + //COUT_DEBUG( grad_norm << std::endl ); + } + grad_norm = std::sqrt(grad_norm); + + /* Update of the parameters */ + /* step length calculation */ + if (iter_counter < 10 || iter_counter % this->restart_frequency < 10 ) { + /* fixed step length */ + gamma = 0.1 * this->tolerance; +// gamma = 1 / grad_norm; +// gamma = 1e-4; + cooling_factor = 1.0; + } else { + + std::fill( dot__.begin( ), dot__.end( ), 0.0 ); + d1 = d2 = d3 = 0.0; + + for ( int d = 0; d < gradient_current->size(); d++ ) { + cc = params_current->at( d ) - params_prev->at( d ); + gg = gradient_current->at( d ) - gradient_prev->at( d ); + + d1 += cc * cc; + d2 += cc * gg; + d3 += gg * gg; + } + + dot__[0] = d1; + dot__[1] = d2; + dot__[2] = d3; + + gamma = 1; + if ( fabs( dot__[1] ) > 0.0 ) { + gamma = 0.25*( dot__[0] / dot__[1] ); + } + } + + for (i = 0; i < gradient_current->size(); ++i) { + (*params_prev)[i] = (*params_current)[i] - cooling_factor * gamma * (*gradient_current)[i]; + } + + + /* switcheroo */ + ptr_mem = gradient_prev; + gradient_prev = gradient_current; + gradient_current = ptr_mem; + + ptr_mem = params_prev; + params_prev = params_current; + params_current = ptr_mem; + + val = ef.eval(params_current); + if( val < val_best ){ + val_best = val; + + for(i = 0; i < gradient_current->size(); ++i){ + params_best->at( i ) = params_current->at( i ); + } + } + + COUT_DEBUG(std::string("Iteration: ") << (unsigned int)(iter_counter) + << ". Step size: " << gamma*cooling_factor + << ". C: " << c + << ". Gradient norm: " << grad_norm + << ". Total error: " << val << ". the lowest error: " << val_best + << ".\r" ); + + WRITE_TO_OFS_DEBUG(ofs, "Iteration: " << (unsigned int)(iter_counter) + << ". Step size: " << gamma*cooling_factor + << ". C: " << c + << ". Gradient norm: " << grad_norm + << ". Total error: " << val << ". the lowest error: " << val_best + << "." << std::endl); + +// if(iter_counter % 100 == 0){ +// COUT_INFO(std::string("Iteration: ") << (unsigned int)(iter_counter) +// << ". Step size: " << gamma +// << ". C: " << c +// << ". Gradient norm: " << grad_norm +// << ". Total error: " << val +// << ".\r"); +// } + + cooling_factor *= 0.99999; + + } + COUT_DEBUG(std::string("Iteration: ") << (unsigned int)(iter_counter) + << ". Step size: " << gamma*cooling_factor + << ". C: " << c + << ". Gradient norm: " << grad_norm + << ". Total error: " << val + << "." << std::endl); + + + if(iter_idx == 0) { + COUT_INFO(std::endl << "Maximum number of iterations (" << this->maximum_niters << ") was reached! Final error: " << val_best << std::endl); + + if(ofs && ofs->is_open()) { + *ofs << "Maximum number of iterations (" << this->maximum_niters << ") was reached! Final error: " << val_best << std::endl; + + } + + } else { + COUT_INFO(std::endl << "Gradient Descent method converged after " + << this->maximum_niters - iter_idx + << " iterations. Final error:" << val_best + << std::endl); +#ifdef L4N_DEBUG + if(ofs && ofs->is_open()) { + *ofs << "Gradient Descent method converged after " + << this->maximum_niters-iter_idx + << " iterations." + << std::endl; + } +#endif + } + + *this->optimal_parameters = *params_best; + + +// ef.analyze_error_gradient(*params_current, *gradient_current, 1.0, this->batch); + ef.get_network_instance()->copy_parameter_space(this->optimal_parameters); + + delete gradient_current; + delete gradient_prev; + delete params_current; + delete params_prev; + delete params_best; + } + + std::vector<double> *GradientDescentBB::get_parameters() { + return this->optimal_parameters; + } + +} diff --git a/src/LearningMethods/GradientDescentBB.h b/src/LearningMethods/GradientDescentBB.h new file mode 100644 index 00000000..b93b469f --- /dev/null +++ b/src/LearningMethods/GradientDescentBB.h @@ -0,0 +1,91 @@ +/** + * DESCRIPTION OF THE FILE + * + * @author Michal KravÄŤenko + * @date 4.2.19 - + */ + +#ifndef LIB4NEURO_GRADIENTDESCENTBB_H +#define LIB4NEURO_GRADIENTDESCENTBB_H + + +#include "../settings.h" +#include "../constants.h" +#include "ILearningMethods.h" +#include "../ErrorFunction/ErrorFunctions.h" + +namespace lib4neuro { + /** + * + */ + class GradientDescentBB : public ILearningMethods { + + private: + + /** + * Threshold for the successful ending of the optimization - deviation from minima + */ + double tolerance; + + /** + * + */ + double max_error; + + /** + * Number of iterations to reset step size to tolerance/10.0 + */ + size_t restart_frequency; + + /** + * + */ + size_t batch; + + /** + * + */ + size_t iter_max; + + /** + * Maximal number of iterations - optimization will stop after that, even if not converged + */ + long long int maximum_niters; + + /** + * Vector of minima coordinates + */ + std::vector<double> *optimal_parameters; + + public: + + /** + * Creates an instance of Gradient Descent Optimizer (i.e. back-propagation) + * @param epsilon Threshold for the successful ending of the optimization - deviation from minima + * @param n_to_restart Number of iterations to reset step size to tolerance/10.0 + * @param max_iters Maximal number of iterations - optimization will stop after that, even if not converged + */ + LIB4NEURO_API explicit GradientDescentBB(double epsilon = 1e-3, size_t n_to_restart = 100, int max_iters = 1000, size_t batch = 0); + + /** + * Deallocates the instance + */ + LIB4NEURO_API ~GradientDescentBB(); + + /** + * + * @param ef + */ + LIB4NEURO_API void optimize(lib4neuro::ErrorFunction &ef, std::ofstream* ofs = nullptr) override; + + /** + * + * @return + */ + LIB4NEURO_API std::vector<double> *get_parameters() override; + }; + +} + + +#endif //LIB4NEURO_GRADIENTDESCENTBB_H diff --git a/src/LearningMethods/GradientDescentSingleItem.cpp b/src/LearningMethods/GradientDescentSingleItem.cpp new file mode 100644 index 00000000..504ac99c --- /dev/null +++ b/src/LearningMethods/GradientDescentSingleItem.cpp @@ -0,0 +1,106 @@ +/** + * DESCRIPTION OF THE FILE + * + * @author Michal KravÄŤenko + * @date 19.2.19 - + */ + +#include "GradientDescentSingleItem.h" + +#include <random.hpp> +#include "message.h" + +namespace lib4neuro { + GradientDescentSingleItem::GradientDescentSingleItem(double epsilon, size_t n_to_restart, int max_iters, size_t batch) { + this->tolerance = epsilon; + this->restart_frequency = n_to_restart; + this->optimal_parameters = new std::vector<double>(0); + this->maximum_niters = max_iters; + this->batch = batch; + } + + GradientDescentSingleItem::~GradientDescentSingleItem() { + if (this->optimal_parameters) { + delete this->optimal_parameters; + this->optimal_parameters = nullptr; + } + } + + + double GradientDescentSingleItem::get_optimal_step_size(lib4neuro::ErrorFunction &f, std::vector<double> &x, + std::vector<double> &d, size_t n_elems) { + + double alpha = 10.0 / n_elems; + alpha = 1.0; + double value = f.eval(); + double value_shifted = value + 1.0; + + + std::vector<double> shifted_x(x); + while( value_shifted > value ){ + alpha *= 0.5; + + for( size_t i = 0; i < x.size(); ++i ){ + shifted_x[ i ] = x [ i ] - alpha * d[ i ]; + } + + value_shifted = f.eval( &shifted_x ); + } +// std::cout << "Error reduction: " << value - value_shifted << std::endl; + return alpha; + } + + + void GradientDescentSingleItem::optimize(lib4neuro::ErrorFunction &ef, std::ofstream* ofs) { + + COUT_INFO("Finding a solution via a Gradient Descent [Single Item] method with adaptive step-length..." << std::endl); + COUT_INFO("Initial error: " << ef.eval() << std::endl); + + size_t total_elements = ef.get_dataset()->get_n_elements(), updated_elements = 0, iter = 0; + double max_error = 1.0, error, gamma; + size_t iter_idx = this->maximum_niters; + size_t dim = ef.get_network_instance()->get_n_biases() + ef.get_network_instance()->get_n_weights(); + + std::vector<double> parameter_vector = *ef.get_parameters(); + std::vector<double> gradient_vector(dim); + std::vector<double> search_direction(dim); + std::vector<double> error_vector(ef.get_network_instance()->get_n_outputs()); + while( max_error >= this->tolerance && iter_idx >= 1 ){ + iter_idx--; + iter++; + + max_error = 0.0; + updated_elements = 0; + std::fill(search_direction.begin(), search_direction.end(), 0); + for( size_t i = 0; i < ef.get_dataset()->get_n_elements(); ++i){ + error = ef.eval_single_item_by_idx( i, ¶meter_vector, error_vector ); + + if( error > max_error ){ + max_error = error; + } + + if( error > this->tolerance ){ + updated_elements++; + ef.calculate_error_gradient_single(error_vector, gradient_vector); + + for(size_t j = 0; j < dim; ++j ){ + search_direction[ j ] += gradient_vector[ j ]; + } + } + } + gamma = this->get_optimal_step_size(ef, parameter_vector, search_direction, updated_elements); + + for( size_t j = 0; j < dim; ++j ){ + parameter_vector[ j ] -= gamma * search_direction[ j ]; + } + + COUT_DEBUG("Iteration: " << iter << ", Total elements in train set: " << total_elements << ", # of elements with high error: " << updated_elements << ", max. error: " << max_error << "\r"); + } + COUT_DEBUG("Iteration: " << iter << ", Total elements in train set: " << total_elements << ", # of elements with high error: " << updated_elements << ", max. error: " << max_error << std::endl); + } + + std::vector<double> *GradientDescentSingleItem::get_parameters() { + return this->optimal_parameters; + } + +} diff --git a/src/LearningMethods/GradientDescentSingleItem.h b/src/LearningMethods/GradientDescentSingleItem.h new file mode 100644 index 00000000..73ebf17b --- /dev/null +++ b/src/LearningMethods/GradientDescentSingleItem.h @@ -0,0 +1,106 @@ +/** + * DESCRIPTION OF THE FILE + * + * @author Michal KravÄŤenko + * @date 19.2.19 - + */ + +#ifndef LIB4NEURO_GRADIENTDESCENTSINGLEITEM_H +#define LIB4NEURO_GRADIENTDESCENTSINGLEITEM_H + + +#include "../settings.h" +#include "../constants.h" +#include "ILearningMethods.h" +#include "../ErrorFunction/ErrorFunctions.h" +#include "GradientDescentBB.h" + +namespace lib4neuro { + /** + * + */ + class GradientDescentSingleItem : public ILearningMethods { + + private: + + /** + * Threshold for the successful ending of the optimization - deviation from minima + */ + double tolerance; + + /** + * + */ + double max_error; + + /** + * Number of iterations to reset step size to tolerance/10.0 + */ + size_t restart_frequency; + + /** + * + */ + size_t batch; + + /** + * + */ + size_t iter_max; + + /** + * Maximal number of iterations - optimization will stop after that, even if not converged + */ + long long int maximum_niters; + + /** + * Vector of minima coordinates + */ + std::vector<double> *optimal_parameters; + + + protected: + + /** + * Finds the optimal value of step-length in direction @d from position @x of function @f + * @param f + * @param x + * @param d + * @param n_elems + * @return + */ + virtual double get_optimal_step_size(lib4neuro::ErrorFunction &f, std::vector<double> &x, std::vector<double> &d, size_t n_elems); + + + public: + + /** + * Creates an instance of Gradient Descent Optimizer (i.e. back-propagation) + * @param epsilon Threshold for the successful ending of the optimization - deviation from minima + * @param n_to_restart Number of iterations to reset step size to tolerance/10.0 + * @param max_iters Maximal number of iterations - optimization will stop after that, even if not converged + */ + LIB4NEURO_API explicit GradientDescentSingleItem(double epsilon = 1e-3, size_t n_to_restart = 100, int max_iters = 1000, size_t batch = 0); + + /** + * Deallocates the instance + */ + LIB4NEURO_API ~GradientDescentSingleItem(); + + /** + * + * @param ef + */ + LIB4NEURO_API void optimize(lib4neuro::ErrorFunction &ef, std::ofstream* ofs = nullptr) override; + + /** + * + * @return + */ + LIB4NEURO_API std::vector<double> *get_parameters() override; + }; + +} + + +#endif //LIB4NEURO_GRADIENTDESCENTSINGLEITEM_H diff --git a/src/LearningMethods/ILearningMethods.cpp b/src/LearningMethods/ILearningMethods.cpp index d0bb4b1c..6aa47daf 100644 --- a/src/LearningMethods/ILearningMethods.cpp +++ b/src/LearningMethods/ILearningMethods.cpp @@ -5,4 +5,5 @@ * @date 10.9.18 - */ -#include "ILearningMethods.h" \ No newline at end of file +#include "ILearningMethods.h" + diff --git a/src/LearningMethods/ILearningMethods.h b/src/LearningMethods/ILearningMethods.h index d0009c9b..80c1a939 100644 --- a/src/LearningMethods/ILearningMethods.h +++ b/src/LearningMethods/ILearningMethods.h @@ -27,13 +27,15 @@ private: */ lib4neuro::ErrorFunction *ef = nullptr; + + public: /** * Runs the method specific learning algorithm minimizing the given error function */ virtual void optimize( lib4neuro::ErrorFunction &ef, std::ofstream* ofs = nullptr ) = 0; - /** + /** * Updates the optimal weight&bias settings in the passed vector */ virtual std::vector<double>* get_parameters( ) = 0; diff --git a/src/LearningMethods/LearningSequence.cpp b/src/LearningMethods/LearningSequence.cpp new file mode 100644 index 00000000..c0b786ea --- /dev/null +++ b/src/LearningMethods/LearningSequence.cpp @@ -0,0 +1,61 @@ +/** + * DESCRIPTION OF THE FILE + * + * @author Michal KravÄŤenko + * @date 19.2.19 - + */ + +#include "LearningSequence.h" + +namespace lib4neuro { + + LearningSequence::LearningSequence( double tolerance, int max_n_cycles ){ + this->tol = tolerance; + this->max_number_of_cycles = max_n_cycles; + this->best_parameters = new std::vector<double>(); + } + + LearningSequence::~LearningSequence() { + + + } + + std::vector<double>* LearningSequence::get_parameters() { + if( this->learning_sequence.size() > 0 ){ + return this->learning_sequence[0]->get_parameters( ); + } + return nullptr; + } + + void LearningSequence::add_learning_method(ILearningMethods *method) { + this->learning_sequence.push_back( method ); + } + + void LearningSequence::optimize(lib4neuro::ErrorFunction &ef, std::ofstream *ofs) { + + double error = ef.eval(); + double the_best_error = error; + int mcycles = this->max_number_of_cycles, cycle_idx = 0; + + while( error > this->tol && mcycles != 0){ + mcycles--; + cycle_idx++; + + for( auto m: this->learning_sequence ){ + m->optimize( ef, ofs ); + error = ef.eval(); + + if( error < the_best_error ){ + the_best_error = error; + *this->best_parameters = *ef.get_parameters(); + } + + if( error <= this->tol ){ + ef.get_network_instance()->copy_parameter_space( this->best_parameters ); + return; + } + } + COUT_DEBUG( "Cycle: " << cycle_idx << ", the lowest error: " << the_best_error << std::endl ); + } + } +} \ No newline at end of file diff --git a/src/LearningMethods/LearningSequence.h b/src/LearningMethods/LearningSequence.h new file mode 100644 index 00000000..ef38950b --- /dev/null +++ b/src/LearningMethods/LearningSequence.h @@ -0,0 +1,80 @@ +/** + * DESCRIPTION OF THE FILE + * + * @author Michal KravÄŤenko + * @date 19.2.19 - + */ + +#ifndef LIB4NEURO_LEARNINGSEQUENCE_H +#define LIB4NEURO_LEARNINGSEQUENCE_H + +#include <4neuro.h> +#include "../settings.h" +#include "../constants.h" +#include "ILearningMethods.h" + +namespace lib4neuro { + /** + * + */ + class LearningSequence : public ILearningMethods { + + private: + + /** + * + */ + std::vector<ILearningMethods*> learning_sequence; + + /** + * + */ + double tol; + + /** + * + */ + std::vector<double> *best_parameters = nullptr; + + /** + * + */ + int max_number_of_cycles = -1; + + + public: + + /** + * + */ + LIB4NEURO_API explicit LearningSequence( double tolerance = 1e-6, int max_n_cycles = -1); + + /** + * Deallocates the instance + */ + LIB4NEURO_API ~LearningSequence(); + + /** + * + * @param ef + * @param ofs + */ + LIB4NEURO_API void optimize(lib4neuro::ErrorFunction &ef, std::ofstream* ofs = nullptr) override; + + /** + * + * @return + */ + LIB4NEURO_API std::vector<double> *get_parameters() override; + + /** + * + * @param method + */ + LIB4NEURO_API void add_learning_method( ILearningMethods * method ); + }; + +} + + +#endif //LIB4NEURO_LEARNINGSEQUENCE_H diff --git a/src/LearningMethods/ParticleSwarm.cpp b/src/LearningMethods/ParticleSwarm.cpp index a7698056..e8c3154d 100644 --- a/src/LearningMethods/ParticleSwarm.cpp +++ b/src/LearningMethods/ParticleSwarm.cpp @@ -307,7 +307,7 @@ namespace lib4neuro { // for(unsigned int i = 0; i < this->n_particles; ++i){ // this->particle_swarm[i]->print_coordinate(); // } - printf("Initial best value: %10.8f\n", optimal_value); + COUT_INFO("Initial best value: " << optimal_value << std::endl); while (outer_it < this->iter_max) { max_velocity = 0; @@ -402,12 +402,10 @@ namespace lib4neuro { this->determine_optimal_coordinate_and_value(*this->p_min_glob, optimal_value); if (outer_it < this->iter_max) { /* Convergence reached */ - printf("\nFound optimum in %d iterations. Objective function value: %10.8f\n", (int) outer_it, - optimal_value); + COUT_INFO( std::endl << "Found optimum in " << outer_it << " iterations. Objective function value: " << optimal_value << std::endl); } else { /* Maximal number of iterations reached */ - printf("\nMax number of iterations reached (%d)! Objective function value: %10.8f\n", (int) outer_it, - optimal_value); + COUT_INFO( std::endl << "Max number of iterations reached (" << outer_it << ")! Objective function value: " << optimal_value <<std:: endl); } // for (size_t i = 0; i <= this->func_dim - 1; ++i) { // printf("%10.8f \n", (*this->p_min_glob)[i]); -- GitLab