Newer
Older
/**
* DESCRIPTION OF THE FILE
*
* @author Michal Kravčenko
* @date 30.7.18 -
*/
#ifndef INC_4NEURO_GRADIENTDESCENT_H
#define INC_4NEURO_GRADIENTDESCENT_H
#include "../constants.h"
Martin Beseda
committed
#include "LearningMethod.h"

Michal Kravcenko
committed
#include "../ErrorFunction/ErrorFunctions.h"
Martin Beseda
committed
/**
*
*/
Martin Beseda
committed
class GradientDescent : public GradientLearningMethod {
* Threshold for the successful ending of the optimization - deviation from minima
* Number of iterations to reset step size to tolerance/10.0
/**
* Maximal number of iterations - optimization will stop after that, even if not converged
*/
long long int maximum_niters;
/**
* Vector of minima coordinates
*/

Michal Kravcenko
committed
* Adaptive calculation of the step-size based on several historical characteristics.
* ----------------------------------------------------------------------------------
* If the current error @fi is larger than the error in the previous step @fim, the rate of step-size change decreases (the algorithm is going in the direction too quickly)
* Otherwise the rate of step-size change increases (the algorithm is on the right path, we can attempts to push through more rapidly)
* ----------------------------------------------------------------------------------
* The step size is then calculated via: @c^(1-2@beta) * (@grad_norm_prev/@grad_norm)^(1/@c)
* If the previous gradient norm is lower then the current gradient norm, then the step-size decreases (as we probably took a too large of a step)
* Otherwise it increases (as we are likely on the right track, we can try to speed-up the convergence)

Michal Kravcenko
committed
* @param gamma[in, out] a step size used in the last iteration
* @param beta[in] a number in the interval [0, 1]. it represents a measure of direction change between two last steps, 0: no change, 1:opposite directions
* @param c[in, out] greater than zero. it is a measure of a non-linear step-size change. the higher @c is, the more rapidly the step-size increases/decreases
* @param grad_norm_prev[in] gradient norm of the error in the previous iteration
* @param grad_norm[in] gradient norm of the error in the current iteration
* @param fi[in] value of the error
* @param fim[in] value of the error in the previous iteration
Martin Beseda
committed
eval_step_size_mk(double &gamma,
double beta,
double &c,
double grad_norm_prev,
double grad_norm,
double fi,

Michal Kravcenko
committed
/**
* Analyses direction of parameters change and performs the most feasible step in one parameter
* @param ef[in] error function to be optimized
* @param error_previous[in] evaluation of the error function on the @parameters_before state
* @param step_coefficient[in] scalar value denoting the scaling of the step in one direction
* @param direction direction[in] vector to be analyzed
* @param parameters_before[in] state of the parameter space before analysis
* @param parameters_after[out] suggested state of the parameters after the analysis completes
*/
virtual bool perform_feasible_1D_step(
lib4neuro::ErrorFunction &ef,
double error_previous,
double step_coefficient,
std::vector<double> * direction,
std::vector<double> *parameters_before,
std::vector<double> *parameters_after
);
* Creates an instance of Gradient Descent Optimizer (i.e. back-propagation)
* @param epsilon Threshold for the successful ending of the optimization - deviation from minima
* @param n_to_restart Number of iterations to reset step size to tolerance/10.0
* @param max_iters Maximal number of iterations - optimization will stop after that, even if not converged
Martin Beseda
committed
LIB4NEURO_API explicit GradientDescent(double epsilon = 1e-3, size_t n_to_restart = 100, int max_iters = 1000, size_t batch = 0);
* Deallocates the instance
*/
LIB4NEURO_API ~GradientDescent();
/**
*
* @param ef
*/
Martin Beseda
committed
LIB4NEURO_API void optimize(lib4neuro::ErrorFunction &ef, std::ofstream* ofs = nullptr) override;
LIB4NEURO_API std::shared_ptr<std::vector<double>> get_parameters() override;
#endif //INC_4NEURO_GRADIENTDESCENT_H