Skip to content
Snippets Groups Projects
GradientDescent.cpp 4.65 KiB
Newer Older
  • Learn to ignore specific revisions
  • /**
     * DESCRIPTION OF THE FILE
     *
     * @author Michal Kravčenko
     * @date 30.7.18 - 
     */
    
    #include "GradientDescent.h"
    
    
    GradientDescent::GradientDescent(double epsilon, size_t n_to_restart){
        this->tolerance = epsilon;
        this->restart_frequency = n_to_restart;
        this->optimal_parameters = new std::vector<double>(0);
    }
    
    GradientDescent::~GradientDescent() {
        if( this->optimal_parameters ){
            delete this->optimal_parameters;
            this->optimal_parameters = nullptr;
        }
    }
    
    void GradientDescent::eval_step_size_mk(double &gamma, double beta, double &c, double grad_norm_prev,
                                              double grad_norm, double fi, double fim ) {
    
        if( fi > fim )
        {
            c /= 1.0000005;
        }
        else if( fi < fim )
        {
            c *= 1.0000005;
        }
    
        gamma *= std::pow( c, 1.0 - 2.0 * beta) * std::pow( grad_norm_prev / grad_norm, 1.0 / c );
    
    }
    
    void GradientDescent::optimize( ErrorFunction &ef ) {
    
        std::cout << "Finding a solution via a Gradient Descent method with adaptive step-length" << std::endl;
        std::cout << "********************************************************************************************************************************************" <<std::endl;
        double grad_norm = this->tolerance * 10.0, gamma, sx, beta;
        double grad_norm_prev;
        size_t i, iter_idx = 0;
    
        gamma = 1.0;
        double prev_val, val = 0.0, c = 1.25;
    
        size_t n_parameters = ef.get_dimension();
    
    
        std::vector<double> *gradient_current = new std::vector<double>( n_parameters );
        std::vector<double> *gradient_prev = new std::vector<double>( n_parameters );
        std::vector<double> *params_current = ef.get_parameters( );
        std::vector<double> *params_prev = new std::vector<double>( n_parameters );
        std::vector<double> *ptr_mem;
    
    //    std::vector<double> gradient_mem( n_parameters );
    //    std::vector<double> parameters_analytical( n_parameters );
    
    
        std::fill(gradient_current->begin(), gradient_current->end(), 0.0);
        std::fill(gradient_prev->begin(), gradient_prev->end(), 0.0);
    
        while( grad_norm > this->tolerance ) {
            iter_idx++;
            prev_val = val;
            grad_norm_prev = grad_norm;
    
            /* reset of the current gradient */
            std::fill(gradient_current->begin(), gradient_current->end(), 0.0);
    //        std::fill(gradient_mem.begin(), gradient_mem.end(), 0.0);
            ef.calculate_error_gradient( *params_current, *gradient_current );
    //        double error_analytical = this->calculate_gradient( ef.get_dataset()->get_data(), (size_t)2, params_current, gradient_current );
    
    //        for(size_t k = 0; k < gradient_mem.size(); ++k){
    //            printf("%f : %f\n", gradient_mem[ k ], gradient_current->at( k ));
    //        }
    //        printf("---------------------\n");
    
            grad_norm = 0.0;
            for(auto v: *gradient_current){
                grad_norm += v * v;
            }
            grad_norm = std::sqrt(grad_norm);
    
            /* Update of the parameters */
            /* step length calculation */
            if(iter_idx < 10 || iter_idx % this->restart_frequency == 0 ){
                /* fixed step length */
                gamma = 0.1 * this->tolerance;
            }
    
            else{
                /* angle between two consecutive gradients */
                sx = 0.0;
                for(i = 0; i < gradient_current->size(); ++i){
                    sx += (gradient_current->at( i ) * gradient_prev->at( i ));
                }
                sx /= grad_norm * grad_norm_prev;
                beta = std::sqrt(std::acos( sx ) / PI);
    
                eval_step_size_mk( gamma, beta, c, grad_norm_prev, grad_norm, val, prev_val );
            }
    
            for(i = 0; i < gradient_current->size(); ++i){
                (*params_prev)[i] = (*params_current)[i] - gamma * (*gradient_current)[i];
    
    Michal Kravcenko's avatar
    Michal Kravcenko committed
    //            (*params_prev)[i] *= 0.95;
    
            }
    
            /* switcheroo */
            ptr_mem = gradient_prev;
            gradient_prev = gradient_current;
            gradient_current = ptr_mem;
    
            ptr_mem = params_prev;
            params_prev = params_current;
            params_current = ptr_mem;
    
            val = ef.eval( params_current );
            if(iter_idx % 1 == 0){
                printf("Iteration %12d. Step size: %15.8f, C: %15.8f, Gradient norm: %15.8f. Total error: %10.8f\r", (int)iter_idx, gamma, c, grad_norm, val);
                std::cout.flush();
            }
        }
        printf("Iteration %12d. Step size: %15.8f, C: %15.8f, Gradient norm: %15.8f. Total error: %10.8f\n", (int)iter_idx, gamma, c, grad_norm, val);
        std::cout.flush();
    
    
        *this->optimal_parameters = *params_current;
    
        delete gradient_current;
        delete gradient_prev;
        delete params_current;
        delete params_prev;
    }
    
    std::vector<double>* GradientDescent::get_parameters() {
        return this->optimal_parameters;
    }