GradientDescent.cpp

/**
 * DESCRIPTION OF THE FILE
 *
 * @author Michal Kravčenko
 * @date 30.7.18 - 
 */

#include <random.hpp>
#include "GradientDescent.h"
#include "message.h"

namespace lib4neuro {
    GradientDescent::GradientDescent(double epsilon, size_t n_to_restart, int max_iters, size_t batch) {
        this->tolerance = epsilon;
        this->restart_frequency = n_to_restart;
        this->maximum_niters = max_iters;
        this->batch = batch;
    }

    GradientDescent::~GradientDescent() {
    }

    void GradientDescent::eval_step_size_mk(double &gamma,
                                            double beta,
                                            double &c,
                                            double grad_norm_prev,
                                            double grad_norm,
                                            double fi,
                                            double fim) {

        if (fi > fim) {
            c /= 1.0000005;
        } else if (fi < fim) {
            c *= 1.0000005;
        }

        gamma *= std::pow(c, 1.0 - 2.0 * beta) * std::pow(grad_norm_prev / grad_norm, 1.0 / c);

    }

    bool GradientDescent::perform_feasible_1D_step(
            lib4neuro::ErrorFunction &ef,
            double error_previous,
            double step_coefficient,
            std::shared_ptr<std::vector<double>> direction,
            std::shared_ptr<std::vector<double>> parameters_before,
            std::shared_ptr<std::vector<double>> parameters_after
            ) {

        size_t i;

        boost::random::mt19937 gen(std::time(0));
        boost::random::uniform_int_distribution<> dis(0, direction->size());
        size_t  max_dir_idx = dis(gen);

        double error_current = error_previous + 1.0;
        while( error_current >=  error_previous ){
            (*parameters_after)[max_dir_idx] = (*parameters_before)[max_dir_idx] - step_coefficient * (*direction)[max_dir_idx];

            error_current = ef.eval( parameters_after.get() );
            if( step_coefficient < 1e-32){
                for (i = 0; i < direction->size(); ++i) {
                    (*parameters_after)[i] = (*parameters_before)[i] - step_coefficient * (*direction)[i];
                }
                return false;
            }
            else{
                if( error_current >=  error_previous ){
                    step_coefficient *= 0.5;
                }
                else{
                }
            }
        }
        return true;
    }

    void GradientDescent::optimize(lib4neuro::ErrorFunction &ef, std::ofstream* ofs) {

        /* Copy data set max and min values, if it's normalized */
        if(ef.get_dataset()->is_normalized()) {
            ef.get_network_instance()->set_normalization_strategy_instance(
                    ef.get_dataset()->get_normalization_strategy());
        }

        COUT_INFO("Finding a solution via a Gradient Descent method with adaptive step-length..." << std::endl);
        COUT_INFO("Initial error: " << ef.eval() << std::endl);

        if(ofs && ofs->is_open()) {
            *ofs << "Finding a solution via a Gradient Descent method with adaptive step-length..." << std::endl;
            *ofs << "Initial error: " << ef.eval() << std::endl;
        }

        double grad_norm = this->tolerance * 10.0, gamma, sx, beta;
        double grad_norm_prev;
        size_t i;
        long long int iter_idx = this->maximum_niters;
        size_t iter_counter = 0;

        gamma = 1.0;
        double prev_val, val = 0.0, c = 1.25;

        size_t n_parameters = ef.get_dimension();


        std::vector<double>* gradient_current(new std::vector<double>(n_parameters));
        std::vector<double>* gradient_prev(new std::vector<double>(n_parameters));
        std::vector<double>* params_current = new std::vector<double>(ef.get_parameters());
        std::vector<double>* params_prev(new std::vector<double>(n_parameters));
        std::vector<double>* ptr_mem;


        std::fill(gradient_current->begin(), gradient_current->end(), 0.0);
        std::fill(gradient_prev->begin(), gradient_prev->end(), 0.0);

        val = ef.eval(params_current);
        double coeff = 1;
        bool it_analyzed = false;
        size_t counter_good_guesses = 0, counter_bad_guesses = 0, counter_simplified_direction_good = 0, counter_simplified_direction_bad = 0;
        double cooling = 1.0;
        while (grad_norm > this->tolerance && (iter_idx != 0)) {
            iter_idx--;
            iter_counter++;
            prev_val = val;
            grad_norm_prev = grad_norm;

            /* reset of the current gradient */
            std::fill(gradient_current->begin(), gradient_current->end(), 0.0);
            ef.calculate_error_gradient(*params_current, *gradient_current, 1.0, this->batch);


            grad_norm = 0.0;
            for (auto v: *gradient_current) {
                grad_norm += v * v;
            }
            grad_norm = std::sqrt(grad_norm);

            /* Update of the parameters */
            /* step length calculation */
            if (iter_counter < 10 || iter_counter % this->restart_frequency == 0 ) {
                /* fixed step length */
                gamma = 0.1 * this->tolerance;
                cooling = 1.0;
            } else {
                /* angle between two consecutive gradients */
                sx = 0.0;
                for (i = 0; i < gradient_current->size(); ++i) {
                    sx += (gradient_current->at(i) * gradient_prev->at(i));
                }
                sx /= grad_norm * grad_norm_prev;
                if( sx < -1.0 + 5e-12 ){
                    sx = -1 + 5e-12;
                }
                else if( sx > 1.0 - 5e-12 ){
                    sx = 1 - 5e-12;
                }
                beta = std::sqrt(std::acos(sx) / lib4neuro::PI);

                eval_step_size_mk(gamma, beta, c, grad_norm_prev, grad_norm, val, prev_val);
            }

            for (i = 0; i < gradient_current->size(); ++i) {
                (*params_prev)[i] = (*params_current)[i] - cooling * gamma * (*gradient_current)[i];
            }
            val = ef.eval(params_prev);


            /* switcheroo */
            ptr_mem = gradient_prev;
            gradient_prev = gradient_current;
            gradient_current = ptr_mem;

            ptr_mem = params_prev;
            params_prev = params_current;
            params_current = ptr_mem;


            COUT_DEBUG(std::string("Iteration: ") << (unsigned int)(iter_counter)
                                                  << ". Step size: " << gamma * cooling
                                                  << ". C: " << c
                                                  << ". Gradient norm: " << grad_norm
                                                  << ". Total error: " << val
                                                  << ".\r" );

            WRITE_TO_OFS_DEBUG(ofs, "Iteration: " << (unsigned int)(iter_counter)
                                                  << ". Step size: " << gamma * cooling
                                                  << ". C: " << c
                                                  << ". Gradient norm: " << grad_norm
                                                  << ". Total error: " << val
                                                  << "." << std::endl);


            cooling *= 0.9999;

        }
        COUT_DEBUG(std::string("Iteration: ") << (unsigned int)(iter_counter)
                                              << ". Step size: " << gamma
                                              << ". C: " << c
                                              << ". Gradient norm: " << grad_norm
                                              << ". Total error: " << val
                                              << "." << std::endl);
        COUT_DEBUG("Number of total steps: " << counter_bad_guesses + counter_good_guesses << ", good: " << counter_good_guesses << ", bad: " << counter_bad_guesses << ", from which " << counter_simplified_direction_good + counter_simplified_direction_bad << " were attempted by simplified direction, success: " << counter_simplified_direction_good << ", fail: " << counter_simplified_direction_bad << std::endl << std::endl );

        if(iter_idx == 0) {
            COUT_INFO(std::endl << "Maximum number of iterations (" << this->maximum_niters << ") was reached! Final error: " << val << std::endl);

            if(ofs && ofs->is_open()) {
                *ofs << "Maximum number of iterations (" << this->maximum_niters << ") was reached! Final error: " << val << std::endl;

            }

        } else {
            COUT_INFO(std::endl << "Gradient Descent method converged after "
                              << this->maximum_niters - iter_idx
                              << " iterations. Final error:" << val
                              << std::endl);
#ifdef L4N_DEBUG
            if(ofs && ofs->is_open()) {
                *ofs << "Gradient Descent method converged after "
                     << this->maximum_niters-iter_idx
                     << " iterations."
                     << std::endl;
            }
#endif
        }

        this->optimal_parameters = *params_current;
        ef.get_network_instance()->copy_parameter_space( &this->optimal_parameters );

    }
}