Skip to content
Snippets Groups Projects
GradientDescent.h 4.91 KiB
Newer Older
  • Learn to ignore specific revisions
  • /**
     * DESCRIPTION OF THE FILE
     *
     * @author Michal Kravčenko
     * @date 30.7.18 -
     */
    
    #ifndef INC_4NEURO_GRADIENTDESCENT_H
    #define INC_4NEURO_GRADIENTDESCENT_H
    
    
    #include "../settings.h"
    
    #include "../constants.h"
    
    #include "../ErrorFunction/ErrorFunctions.h"
    
    namespace lib4neuro {
    
        class GradientDescent : public GradientLearningMethod {
    
        private:
    
             * Threshold for the successful ending of the optimization - deviation from minima
    
             */
            double tolerance;
    
            /**
    
             * Number of iterations to reset step size to tolerance/10.0
    
             */
            size_t restart_frequency;
    
    		size_t batch;
    
            /**
             * Maximal number of iterations - optimization will stop after that, even if not converged
             */
    
            long long int maximum_niters;
    
    
            /**
             * Vector of minima coordinates
             */
    
            std::vector<double> optimal_parameters;
    
             * Adaptive calculation of the step-size based on several historical characteristics.
             * ----------------------------------------------------------------------------------
             * If the current error @fi is larger than the error in the previous step @fim, the rate of step-size change decreases (the algorithm is going in the direction too quickly)
             * Otherwise the rate of step-size change increases (the algorithm is on the right path, we can attempts to push through more rapidly)
             * ----------------------------------------------------------------------------------
             * The step size is then calculated via: @c^(1-2@beta) * (@grad_norm_prev/@grad_norm)^(1/@c)
             * If the previous gradient norm is lower then the current gradient norm, then the step-size decreases (as we probably took a too large of a step)
             * Otherwise it increases (as we are likely on the right track, we can try to speed-up the convergence)
    
             * @param gamma[in, out] a step size used in the last iteration
             * @param beta[in] a number in the interval [0, 1]. it represents a measure of direction change between two last steps, 0: no change, 1:opposite directions
             * @param c[in, out] greater than zero. it is a measure of a non-linear step-size change. the higher @c is, the more rapidly the step-size increases/decreases
             * @param grad_norm_prev[in] gradient norm of the error in the previous iteration
             * @param grad_norm[in] gradient norm of the error in the current iteration
             * @param fi[in] value of the error
             * @param fim[in] value of the error in the previous iteration
    
             */
            virtual void
    
            eval_step_size_mk(double &gamma,
                              double beta,
                              double &c,
                              double grad_norm_prev,
                              double grad_norm,
                              double fi,
    
                              double fim);
    
    
            /**
             * Analyses direction of parameters change and performs the most feasible step in one parameter
             * @param ef[in] error function to be optimized
             * @param error_previous[in] evaluation of the error function on the @parameters_before state
             * @param step_coefficient[in] scalar value denoting the scaling of the step in one direction
             * @param direction direction[in] vector to be analyzed
             * @param parameters_before[in] state of the parameter space before analysis
             * @param parameters_after[out] suggested state of the parameters after the analysis completes
             */
            virtual bool perform_feasible_1D_step(
                    lib4neuro::ErrorFunction &ef,
                    double error_previous,
                    double step_coefficient,
                    std::vector<double> * direction,
                    std::vector<double> *parameters_before,
                    std::vector<double> *parameters_after
                    );
    
    
             * Creates an instance of Gradient Descent Optimizer (i.e. back-propagation)
             * @param epsilon Threshold for the successful ending of the optimization - deviation from minima
             * @param n_to_restart Number of iterations to reset step size to tolerance/10.0
             * @param max_iters Maximal number of iterations - optimization will stop after that, even if not converged
    
            LIB4NEURO_API explicit GradientDescent(double epsilon = 1e-3, size_t n_to_restart = 100, int max_iters = 1000, size_t batch = 0);
    
             * Deallocates the instance
    
             */
            LIB4NEURO_API ~GradientDescent();
    
            /**
             *
             * @param ef
             */
    
            LIB4NEURO_API void optimize(lib4neuro::ErrorFunction &ef, std::ofstream* ofs = nullptr) override;
    
            LIB4NEURO_API std::shared_ptr<std::vector<double>> get_parameters() override;
    
    
    #endif //INC_4NEURO_GRADIENTDESCENT_H