Skip to content
Snippets Groups Projects
GradientDescent.cpp 5.04 KiB
Newer Older
  • Learn to ignore specific revisions
  • /**
     * DESCRIPTION OF THE FILE
     *
     * @author Michal Kravčenko
     * @date 30.7.18 - 
     */
    
    #include "GradientDescent.h"
    
    David Vojtek's avatar
    David Vojtek committed
    
    namespace lib4neuro {
        GradientDescent::GradientDescent(double epsilon, size_t n_to_restart) {
            this->tolerance = epsilon;
            this->restart_frequency = n_to_restart;
            this->optimal_parameters = new std::vector<double>(0);
        }
    
        GradientDescent::~GradientDescent() {
            if (this->optimal_parameters) {
                delete this->optimal_parameters;
                this->optimal_parameters = nullptr;
            }
        }
    
        void GradientDescent::eval_step_size_mk(double &gamma, double beta, double &c, double grad_norm_prev,
                                                double grad_norm, double fi, double fim) {
    
            if (fi > fim) {
                c /= 1.0000005;
            } else if (fi < fim) {
                c *= 1.0000005;
            }
    
            gamma *= std::pow(c, 1.0 - 2.0 * beta) * std::pow(grad_norm_prev / grad_norm, 1.0 / c);
    
        }
    
        void GradientDescent::optimize(lib4neuro::ErrorFunction &ef) {
    
            std::cout << "Finding a solution via a Gradient Descent method with adaptive step-length" << std::endl;
            std::cout
                    << "********************************************************************************************************************************************"
                    << std::endl;
            double grad_norm = this->tolerance * 10.0, gamma, sx, beta;
            double grad_norm_prev;
            size_t i, iter_idx = 0;
    
            gamma = 1.0;
            double prev_val, val = 0.0, c = 1.25;
    
            size_t n_parameters = ef.get_dimension();
    
    
            std::vector<double> *gradient_current = new std::vector<double>(n_parameters);
            std::vector<double> *gradient_prev = new std::vector<double>(n_parameters);
            std::vector<double> *params_current = ef.get_parameters();
            std::vector<double> *params_prev = new std::vector<double>(n_parameters);
            std::vector<double> *ptr_mem;
    
    //    std::vector<double> gradient_mem( n_parameters );
    //    std::vector<double> parameters_analytical( n_parameters );
    
    
            std::fill(gradient_current->begin(), gradient_current->end(), 0.0);
            std::fill(gradient_prev->begin(), gradient_prev->end(), 0.0);
    
            while (grad_norm > this->tolerance) {
                iter_idx++;
                prev_val = val;
                grad_norm_prev = grad_norm;
    
                /* reset of the current gradient */
                std::fill(gradient_current->begin(), gradient_current->end(), 0.0);
    //        std::fill(gradient_mem.begin(), gradient_mem.end(), 0.0);
                ef.calculate_error_gradient(*params_current, *gradient_current);
    //        double error_analytical = this->calculate_gradient( ef.get_dataset()->get_data(), (size_t)2, params_current, gradient_current );
    
    //        for(size_t k = 0; k < gradient_mem.size(); ++k){
    //            printf("%f : %f\n", gradient_mem[ k ], gradient_current->at( k ));
    //        }
    //        printf("---------------------\n");
    
                grad_norm = 0.0;
                for (auto v: *gradient_current) {
                    grad_norm += v * v;
                }
                grad_norm = std::sqrt(grad_norm);
    
                /* Update of the parameters */
                /* step length calculation */
                if (iter_idx < 10 || iter_idx % this->restart_frequency == 0) {
                    /* fixed step length */
                    gamma = 0.1 * this->tolerance;
                } else {
                    /* angle between two consecutive gradients */
                    sx = 0.0;
                    for (i = 0; i < gradient_current->size(); ++i) {
                        sx += (gradient_current->at(i) * gradient_prev->at(i));
                    }
                    sx /= grad_norm * grad_norm_prev;
                    beta = std::sqrt(std::acos(sx) / lib4neuro::PI);
    
                    eval_step_size_mk(gamma, beta, c, grad_norm_prev, grad_norm, val, prev_val);
                }
    
                for (i = 0; i < gradient_current->size(); ++i) {
                    (*params_prev)[i] = (*params_current)[i] - gamma * (*gradient_current)[i];
                }
    
                /* switcheroo */
                ptr_mem = gradient_prev;
                gradient_prev = gradient_current;
                gradient_current = ptr_mem;
    
                ptr_mem = params_prev;
                params_prev = params_current;
                params_current = ptr_mem;
    
                val = ef.eval(params_current);
                if (iter_idx % 1 == 0) {
                    printf("Iteration %12d. Step size: %15.8f, C: %15.8f, Gradient norm: %15.8f. Total error: %10.8f\r",
                           (int) iter_idx, gamma, c, grad_norm, val);
                    std::cout.flush();
                }
            }
            printf("Iteration %12d. Step size: %15.8f, C: %15.8f, Gradient norm: %15.8f. Total error: %10.8f\n",
                   (int) iter_idx, gamma, c, grad_norm, val);
            std::cout.flush();
    
    
            *this->optimal_parameters = *params_current;
    
            delete gradient_current;
            delete gradient_prev;
            delete params_current;
            delete params_prev;
        }
    
        std::vector<double> *GradientDescent::get_parameters() {
            return this->optimal_parameters;
        }
    
    }