/** * DESCRIPTION OF THE FILE * * @author Michal KravĨenko * @date 30.7.18 - */ #ifndef INC_4NEURO_GRADIENTDESCENT_H #define INC_4NEURO_GRADIENTDESCENT_H #include "../settings.h" #include "../constants.h" #include "LearningMethod.h" #include "../ErrorFunction/ErrorFunctions.h" namespace lib4neuro { /** * */ class GradientDescent : public GradientLearningMethod { private: /** * Threshold for the successful ending of the optimization - deviation from minima */ double tolerance; /** * Number of iterations to reset step size to tolerance/10.0 */ size_t restart_frequency; /** * */ size_t batch; /** * Maximal number of iterations - optimization will stop after that, even if not converged */ long long int maximum_niters; /** * Vector of minima coordinates */ std::vector<double> optimal_parameters; /** * Adaptive calculation of the step-size based on several historical characteristics. * ---------------------------------------------------------------------------------- * If the current error @fi is larger than the error in the previous step @fim, the rate of step-size change decreases (the algorithm is going in the direction too quickly) * Otherwise the rate of step-size change increases (the algorithm is on the right path, we can attempts to push through more rapidly) * ---------------------------------------------------------------------------------- * The step size is then calculated via: @c^(1-2@beta) * (@grad_norm_prev/@grad_norm)^(1/@c) * If the previous gradient norm is lower then the current gradient norm, then the step-size decreases (as we probably took a too large of a step) * Otherwise it increases (as we are likely on the right track, we can try to speed-up the convergence) * * @param gamma[in, out] a step size used in the last iteration * @param beta[in] a number in the interval [0, 1]. it represents a measure of direction change between two last steps, 0: no change, 1:opposite directions * @param c[in, out] greater than zero. it is a measure of a non-linear step-size change. the higher @c is, the more rapidly the step-size increases/decreases * @param grad_norm_prev[in] gradient norm of the error in the previous iteration * @param grad_norm[in] gradient norm of the error in the current iteration * @param fi[in] value of the error * @param fim[in] value of the error in the previous iteration */ virtual void eval_step_size_mk(double &gamma, double beta, double &c, double grad_norm_prev, double grad_norm, double fi, double fim); /** * Analyses direction of parameters change and performs the most feasible step in one parameter * @param ef[in] error function to be optimized * @param error_previous[in] evaluation of the error function on the @parameters_before state * @param step_coefficient[in] scalar value denoting the scaling of the step in one direction * @param direction direction[in] vector to be analyzed * @param parameters_before[in] state of the parameter space before analysis * @param parameters_after[out] suggested state of the parameters after the analysis completes */ virtual bool perform_feasible_1D_step( lib4neuro::ErrorFunction &ef, double error_previous, double step_coefficient, std::vector<double> * direction, std::vector<double> *parameters_before, std::vector<double> *parameters_after ); public: /** * Creates an instance of Gradient Descent Optimizer (i.e. back-propagation) * @param epsilon Threshold for the successful ending of the optimization - deviation from minima * @param n_to_restart Number of iterations to reset step size to tolerance/10.0 * @param max_iters Maximal number of iterations - optimization will stop after that, even if not converged */ LIB4NEURO_API explicit GradientDescent(double epsilon = 1e-3, size_t n_to_restart = 100, int max_iters = 1000, size_t batch = 0); /** * Deallocates the instance */ LIB4NEURO_API ~GradientDescent(); /** * * @param ef */ LIB4NEURO_API void optimize(lib4neuro::ErrorFunction &ef, std::ofstream* ofs = nullptr) override; /** * * @return */ LIB4NEURO_API std::shared_ptr<std::vector<double>> get_parameters() override; }; } #endif //INC_4NEURO_GRADIENTDESCENT_H