Skip to content
Snippets Groups Projects
GradientDescent.h 11.8 KiB
Newer Older
  • Learn to ignore specific revisions
  • /**
     * DESCRIPTION OF THE FILE
     *
     * @author Michal Kravčenko
     * @date 30.7.18 -
     */
    
    #ifndef INC_4NEURO_GRADIENTDESCENT_H
    #define INC_4NEURO_GRADIENTDESCENT_H
    
    
    #include "ILearningMethods.h"
    #include "../ErrorFunction/ErrorFunctions.h"
    
    class GradientDescent: public ILearningMethods {
    
    private:
    
        /**
         *
         */
        double tolerance;
    
        /**
         *
         */
        size_t restart_frequency;
    
        std::vector<double> *optimal_parameters;
    
        /**
         *
         * @param gamma
         * @param beta
         * @param c
         * @param grad_norm_prev
         * @param grad_norm
         * @param fi
         * @param fim
         */
         virtual void eval_step_size_mk( double &gamma, double beta, double &c, double grad_norm_prev, double grad_norm, double fi, double fim );
        double eval_f(double x){
            return std::pow(E, -2.0 * x) * (3.0 * x + 1.0);
        }
    
        double eval_df(double x){
            return std::pow(E, -2.0 * x) * (1.0 - 6.0 * x);
        }
    
        double eval_ddf(double x){
            return 4.0 * std::pow(E, -2.0 * x) * (3.0 * x - 2.0);
        }
    
        double eval_approx_f(double x, size_t n_inner_neurons, std::vector<double> &parameters){
            double value= 0.0, wi, ai, bi, ei, ei1;
            for(size_t i = 0; i < n_inner_neurons; ++i){
    
                wi = parameters[3 * i];
                ai = parameters[3 * i + 1];
                bi = parameters[3 * i + 2];
    
                ei = std::pow(E, bi - wi * x);
                ei1 = ei + 1.0;
    
                value += ai / (ei1);
            }
            return value;
        }
    
        double eval_approx_df(double x, size_t n_inner_neurons, std::vector<double> &parameters){
            double value= 0.0, wi, ai, bi, ei, ei1;
            for(size_t i = 0; i < n_inner_neurons; ++i){
    
                wi = parameters[3 * i];
                ai = parameters[3 * i + 1];
                bi = parameters[3 * i + 2];
    
                ei = std::pow(E, bi - wi * x);
                ei1 = ei + 1.0;
    
                value += ai * wi * ei / (ei1 * ei1);
            }
    
            return value;
        }
    
        double eval_approx_ddf(double x, size_t n_inner_neurons, std::vector<double> &parameters){
            double value= 0.0, wi, ai, bi, ewx, eb;
    
            for(size_t i = 0; i < n_inner_neurons; ++i){
    
                wi = parameters[3 * i];
                ai = parameters[3 * i + 1];
                bi = parameters[3 * i + 2];
    
    
                eb = std::pow(E, bi);
                ewx = std::pow(E, wi * x);
    
                value += -(ai*wi*wi*eb*ewx*(ewx - eb))/((eb + ewx)*(eb + ewx)*(eb + ewx));
            }
    
            return value;
        }
    
    //NN partial derivative (wi): (ai * x * e^(bi - wi * x)) * (e^(bi - wi * x) + 1)^(-2)
        double eval_approx_dw_f(double x, size_t neuron_idx, std::vector<double> &parameters){
            double wi, ai, bi, ei, ei1;
            wi = parameters[3 * neuron_idx];
            ai = parameters[3 * neuron_idx + 1];
            bi = parameters[3 * neuron_idx + 2];
    
            ei = std::pow(E, bi - wi * x);
            ei1 = ei + 1.0;
    
            return (ai * x * ei) / (ei1 * ei1);
        }
    
    //dNN partial derivative (wi): -(a w x e^(b - w x))/(e^(b - w x) + 1)^2 + (2 a w x e^(2 b - 2 w x))/(e^(b - w x) + 1)^3 + (a e^(b - w x))/(e^(b - w x) + 1)^2
        double eval_approx_dw_df(double x, size_t neuron_idx, std::vector<double> &parameters){
    
            double wi, ai, bi, ei, ei1;
    
            wi = parameters[3 * neuron_idx];
            ai = parameters[3 * neuron_idx + 1];
            bi = parameters[3 * neuron_idx + 2];
    
            ei = std::pow(E, bi - wi * x);
            ei1 = ei + 1.0;
    
            return -(ai * wi * x * ei)/(ei1 * ei1) + (2.0*ai*wi*x*ei*ei)/(ei1 * ei1 * ei1) + (ai* ei)/(ei1 * ei1);
        }
    
    //ddNN partial derivative (wi): -(a w^2 x e^(b + 2 w x))/(e^b + e^(w x))^3 - (a w^2 x e^(b + w x) (e^(w x) - e^b))/(e^b + e^(w x))^3 + (3 a w^2 x e^(b + 2 w x) (e^(w x) - e^b))/(e^b + e^(w x))^4 - (2 a w e^(b + w x) (e^(w x) - e^b))/(e^b + e^(w x))^3
        double eval_approx_dw_ddf(double x, size_t neuron_idx, std::vector<double> &parameters){
            double wi, ai, bi, eb, ewx;
    
            wi = parameters[3 * neuron_idx];
            ai = parameters[3 * neuron_idx + 1];
            bi = parameters[3 * neuron_idx + 2];
    
            eb = std::pow(E, bi);
            ewx = std::pow(E, wi * x);
    
            return  -(ai*wi*wi* x * eb*ewx*ewx)/((eb + ewx)*(eb + ewx)*(eb + ewx)) - (ai*wi*wi*x*eb*ewx*(ewx - eb))/((eb + ewx)*(eb + ewx)*(eb + ewx)) + (3*ai*wi*wi*x*eb*ewx*ewx*(ewx - eb))/((eb + ewx)*(eb + ewx)*(eb + ewx)*(eb + ewx)) - (2*ai*wi*eb*ewx*(ewx - eb))/((eb + ewx)*(eb + ewx)*(eb + ewx));
        }
    
    //NN partial derivative (ai): (1 + e^(-x * wi + bi))^(-1)
        double eval_approx_da_f(double x, size_t neuron_idx, std::vector<double> &parameters){
            double wi, bi, ei, ei1;
    
            wi = parameters[3 * neuron_idx];
            bi = parameters[3 * neuron_idx + 2];
    
            ei = std::pow(E, bi - wi * x);
            ei1 = ei + 1.0;
    
            return 1.0 / ei1;
        }
    
    //dNN partial derivative (ai): (w e^(b - w x))/(e^(b - w x) + 1)^2
        double eval_approx_da_df(double x, size_t neuron_idx, std::vector<double> &parameters){
            double wi, bi, ei, ei1;
    
            wi = parameters[3 * neuron_idx];
            bi = parameters[3 * neuron_idx + 2];
    
            ei = std::pow(E, bi - wi * x);
            ei1 = ei + 1.0;
    
            return (wi*ei)/(ei1 * ei1);
        }
    
    //ddNN partial derivative (ai):  -(w^2 e^(b + w x) (e^(w x) - e^b))/(e^b + e^(w x))^3
        double eval_approx_da_ddf(double x, size_t neuron_idx, std::vector<double> &parameters){
            double wi, bi, eip, ewx, eb;
    
            wi = parameters[3 * neuron_idx];
            bi = parameters[3 * neuron_idx + 2];
    
            eip = std::pow(E, bi + wi * x);
            eb = std::pow(E, bi);
            ewx = std::pow(E, wi * x);
    
            return -(wi*wi*eip*(ewx - eb))/((eb + ewx)*(eb + ewx)*(eb + ewx));
        }
    
    //NN partial derivative (bi): -(ai * e^(bi - wi * x)) * (e^(bi - wi * x) + 1)^(-2)
        double eval_approx_db_f(double x, size_t neuron_idx, std::vector<double> &parameters){
            double wi, bi, ei, ai, ei1;
            wi = parameters[3 * neuron_idx];
            ai = parameters[3 * neuron_idx + 1];
            bi = parameters[3 * neuron_idx + 2];
    
            ei = std::pow(E, bi - wi * x);
            ei1 = ei + 1.0;
    
            return -(ai * ei)/(ei1 * ei1);
        }
    
    //dNN partial derivative (bi): (a w e^(b + w x) (e^(w x) - e^b))/(e^b + e^(w x))^3
        double eval_approx_db_df(double x, size_t neuron_idx, std::vector<double> &parameters){
            double wi, bi, ai, ewx, eb;
    
            wi = parameters[3 * neuron_idx];
            ai = parameters[3 * neuron_idx + 1];
            bi = parameters[3 * neuron_idx + 2];
    
            eb = std::pow(E, bi);
            ewx = std::pow(E, wi*x);
    
            return (ai* wi* eb*ewx* (ewx - eb))/((eb + ewx)*(eb + ewx)*(eb + ewx));
        }
    
    //ddNN partial derivative (bi): -(a w^2 e^(b + w x) (-4 e^(b + w x) + e^(2 b) + e^(2 w x)))/(e^b + e^(w x))^4
        double eval_approx_db_ddf(double x, size_t neuron_idx, std::vector<double> &parameters){
            double wi, bi, ai, ewx, eb;
    
            wi = parameters[3 * neuron_idx];
            ai = parameters[3 * neuron_idx + 1];
            bi = parameters[3 * neuron_idx + 2];
    
            eb = std::pow(E, bi);
            ewx = std::pow(E, wi*x);
    
            return -(ai* wi*wi* eb*ewx* (-4.0* eb*ewx + eb*eb + ewx*ewx))/((eb +ewx)*(eb +ewx)*(eb +ewx)*(eb +ewx));
        }
    
        double eval_error_function(std::vector<double> &parameters, size_t n_inner_neurons, std::vector<double> test_points){
    
            double output = 0.0, approx, frac = 1.0 / (test_points.size());
    
            for(auto x: test_points){
                /* governing equation */
                approx = 4.0 * eval_approx_f(x, n_inner_neurons, parameters) + 4.0 * eval_approx_df(x, n_inner_neurons, parameters) + eval_approx_ddf(x, n_inner_neurons, parameters);
    
                output += (0.0 - approx) * (0.0 - approx) * frac;
    
            }
    
            /* BC */
            approx = eval_approx_f(0.0, n_inner_neurons, parameters);
            output += (1.0 - approx) * (1.0 - approx);
    
            approx = eval_approx_df(0.0, n_inner_neurons, parameters);
            output += (1.0 - approx) * (1.0 - approx);
    
    
            return output;
        }
    
        void eval_step_size_simple(double &gamma, double val, double prev_val, double sk, double grad_norm, double grad_norm_prev){
    
            if(val > prev_val){
                gamma *= 0.99999;
            }
    
            if(sk <= 1e-3 || grad_norm < grad_norm_prev){
                /* movement on a line */
                /* new slope is less steep, speed up */
                gamma *= 1.0005;
            }
            else if(grad_norm > grad_norm_prev){
                /* new slope is more steep, slow down*/
                gamma /= 1.0005;
            }
            else{
                gamma /= 1.005;
            }
    //        gamma *= 0.999999;
        }
    
    
        double calculate_gradient( std::vector<std::pair<std::vector<double>, std::vector<double>>> *dataset, size_t n_inner_neurons, std::vector<double> *parameters, std::vector<double> *gradient ){
            size_t i, j;
            double x,  mem, derror, total_error, approx;
    
            std::vector<double> data_points(dataset->size());
            for( i = 0; i < dataset->size(); ++i){
                data_points[ i ] = dataset->at( i ).first[0];
            }
    
            std::vector<double> parameters_analytical(parameters->size());
            for(i = 0; i < n_inner_neurons; ++i){
                parameters_analytical[3 * i + 0] = parameters->at(0 * n_inner_neurons + i);
                parameters_analytical[3 * i + 1] = parameters->at(1 * n_inner_neurons + i);
                parameters_analytical[3 * i + 2] = parameters->at(2 * n_inner_neurons + i);
            }
            size_t train_size = data_points.size();
    
            /* error boundary condition: y(0) = 1 => e1 = (1 - y(0))^2 */
            x = 0.0;
            mem = (1.0 - eval_approx_f(x, n_inner_neurons, *parameters));
            derror = 2.0 * mem;
            total_error = mem * mem;
            for(i = 0; i < n_inner_neurons; ++i){
                (*gradient)[i] -= derror * eval_approx_dw_f(x, i, *parameters);
                (*gradient)[i + n_inner_neurons] -= derror * eval_approx_da_f(x, i, *parameters);
                (*gradient)[i + 2*n_inner_neurons] -= derror * eval_approx_db_f(x, i, *parameters);
            }
    
            /* error boundary condition: y'(0) = 1 => e2 = (1 - y'(0))^2 */
            mem = (1.0 - eval_approx_df(x, n_inner_neurons, *parameters));
            derror = 2.0 * mem;
            total_error += mem * mem;
            for(i = 0; i < n_inner_neurons; ++i){
                (*gradient)[i] -= derror * eval_approx_dw_df(x, i, *parameters);
                (*gradient)[i + n_inner_neurons] -= derror * eval_approx_da_df(x, i, *parameters);
                (*gradient)[i + 2*n_inner_neurons] -= derror * eval_approx_db_df(x, i, *parameters);
            }
    
            for(j = 0; j < data_points.size(); ++j){
                x = data_points[j];
                /* error of the governing equation: y''(x) + 4y'(x) + 4y(x) = 0 => e3 = 1/n * (0 - y''(x) - 4y'(x) - 4y(x))^2 */
                approx= eval_approx_ddf(x, n_inner_neurons, *parameters) + 4.0 * eval_approx_df(x, n_inner_neurons, *parameters) + 4.0 * eval_approx_f(x, n_inner_neurons, *parameters);
                mem = 0.0 - approx;
                derror = 2.0 * mem / train_size;
                for(i = 0; i < n_inner_neurons; ++i){
                    (*gradient)[i] -= derror * (eval_approx_dw_ddf(x, i, *parameters) + 4.0 * eval_approx_dw_df(x, i, *parameters) + 4.0 * eval_approx_dw_f(x, i, *parameters));
                    (*gradient)[i + n_inner_neurons] -= derror * (eval_approx_da_ddf(x, i, *parameters) + 4.0 * eval_approx_da_df(x, i, *parameters) + 4.0 * eval_approx_da_f(x, i, *parameters));
                    (*gradient)[i + 2*n_inner_neurons] -= derror * (eval_approx_db_ddf(x, i, *parameters) + 4.0 * eval_approx_db_df(x, i, *parameters) + 4.0 * eval_approx_db_f(x, i, *parameters));
                }
                total_error += mem * mem / train_size;
            }
    
            return total_error;
        }
    
    public:
    
        /**
         *
         * @param epsilon
         */
        GradientDescent( double epsilon = 1e-3, size_t n_to_restart = 100 );
    
        /**
         *
         */
        ~GradientDescent();
    
        /**
         *
         * @param ef
         */
        virtual void optimize( ErrorFunction &ef );
    
        /**
         *
         * @return
         */
        virtual std::vector<double>* get_parameters( );
    
    
    
    };
    
    
    #endif //INC_4NEURO_GRADIENTDESCENT_H