Skip to content
Snippets Groups Projects
GradientDescent.cpp 10.2 KiB
Newer Older
  • Learn to ignore specific revisions
  • /**
     * DESCRIPTION OF THE FILE
     *
     * @author Michal Kravčenko
    
     * @date 30.7.18 -
    
    #include "GradientDescent.h"
    
    #include "../mpi_wrapper.h"
    
    namespace lib4neuro {
    
    Martin Beseda's avatar
    Martin Beseda committed
        GradientDescent::GradientDescent(double epsilon,
                                         size_t n_to_restart,
                                         int max_iters,
                                         size_t batch) {
    
            this->tolerance         = epsilon;
    
            this->restart_frequency = n_to_restart;
    
            this->maximum_niters    = max_iters;
            this->batch             = batch;
    
        GradientDescent::~GradientDescent() {
    
    Martin Beseda's avatar
    Martin Beseda committed
        void GradientDescent::eval_step_size_mk(double& gamma,
    
    Martin Beseda's avatar
    Martin Beseda committed
                                                double& c,
    
                                                double grad_norm_prev,
                                                double grad_norm,
                                                double fi,
                                                double fim) {
    
            if (fi > fim) {
                c /= 1.0000005;
            } else if (fi < fim) {
                c *= 1.0000005;
            }
    
    
    Martin Beseda's avatar
    Martin Beseda committed
            gamma *= std::pow(c,
                              1.0 - 2.0 * beta) * std::pow(grad_norm_prev / grad_norm,
                                                           1.0 / c);
    
        bool GradientDescent::perform_feasible_1D_step(
    
            lib4neuro::ErrorFunction& ef,
            double error_previous,
            double step_coefficient,
            std::shared_ptr<std::vector<double>> direction,
            std::shared_ptr<std::vector<double>> parameters_before,
            std::shared_ptr<std::vector<double>> parameters_after
    
    Martin Beseda's avatar
    Martin Beseda committed
        ) {
    
            boost::random::mt19937                    gen(std::time(0));
    
    Martin Beseda's avatar
    Martin Beseda committed
            boost::random::uniform_int_distribution<> dis(0,
                                                          direction->size());
    
            size_t                                    max_dir_idx = dis(gen);
    
    
            double error_current = error_previous + 1.0;
    
    Martin Beseda's avatar
    Martin Beseda committed
            while (error_current >= error_previous) {
                (*parameters_after)[max_dir_idx] =
    
                    (*parameters_before)[max_dir_idx] - step_coefficient * (*direction)[max_dir_idx];
    
    Martin Beseda's avatar
    Martin Beseda committed
                error_current = ef.eval(parameters_after.get());
                if (step_coefficient < 1e-32) {
    
                    for (i = 0; i < direction->size(); ++i) {
                        (*parameters_after)[i] = (*parameters_before)[i] - step_coefficient * (*direction)[i];
                    }
                    return false;
    
    Martin Beseda's avatar
    Martin Beseda committed
                } else {
                    if (error_current >= error_previous) {
    
    Martin Beseda's avatar
    Martin Beseda committed
                    } else {
    
    Martin Beseda's avatar
    Martin Beseda committed
        void GradientDescent::optimize(lib4neuro::ErrorFunction& ef,
                                       std::ofstream* ofs) {
    
            double err_ = ef.eval();
    
            COUT_INFO("Finding a solution via a Gradient Descent method with adaptive step-length..." << std::endl);
    
            COUT_INFO("Initial error: " << err_ << std::endl);
    
            if (ofs && ofs->is_open() && lib4neuro::mpi_rank == 0) {
    
                *ofs << "Finding a solution via a Gradient Descent method with adaptive step-length..." << std::endl;
    
                *ofs << "Initial error: " << err_ << std::endl;
    
            double        grad_norm    = this->tolerance * 10.0, gamma, sx, beta;
            double        grad_norm_prev;
            size_t        i;
            long long int iter_idx     = this->maximum_niters;
            size_t        iter_counter = 0;
    
            gamma                = 1.0;
    
            double prev_val, val = 0.0, c = 1.25;
    
            size_t n_parameters                 = ef.get_dimension();
    
            std::vector<double>* gradient_current(new std::vector<double>(n_parameters));
            std::vector<double>* gradient_prev(new std::vector<double>(n_parameters));
    
            std::vector<double>* params_current = new std::vector<double>(ef.get_parameters());
    
            std::vector<double>* params_prev(new std::vector<double>(n_parameters));
            std::vector<double>* ptr_mem;
    
    Martin Beseda's avatar
    Martin Beseda committed
            std::fill(gradient_current->begin(),
                      gradient_current->end(),
                      0.0);
            std::fill(gradient_prev->begin(),
                      gradient_prev->end(),
                      0.0);
    
            val = ef.eval(params_current);
    
            size_t counter_good_guesses = 0, counter_bad_guesses = 0, counter_simplified_direction_good = 0, counter_simplified_direction_bad = 0;
    
            double cooling              = 1.0;
    
            while (grad_norm > this->tolerance && (iter_idx != 0)) {
                iter_idx--;
                iter_counter++;
    
                prev_val       = val;
    
                grad_norm_prev = grad_norm;
    
                /* reset of the current gradient */
    
    Martin Beseda's avatar
    Martin Beseda committed
                std::fill(gradient_current->begin(),
                          gradient_current->end(),
                          0.0);
                ef.calculate_error_gradient(*params_current,
                                            *gradient_current,
                                            1.0,
                                            this->batch);
    
                grad_norm = 0.0;
                for (auto v: *gradient_current) {
                    grad_norm += v * v;
                }
                grad_norm = std::sqrt(grad_norm);
    
                /* Update of the parameters */
                /* step length calculation */
    
    Martin Beseda's avatar
    Martin Beseda committed
                if (iter_counter < 10 || iter_counter % this->restart_frequency == 0) {
    
                    /* fixed step length */
    
                    gamma   = 0.1 * this->tolerance;
    
                } else {
                    /* angle between two consecutive gradients */
    
                    sx     = 0.0;
    
                    for (i = 0; i < gradient_current->size(); ++i) {
                        sx += (gradient_current->at(i) * gradient_prev->at(i));
                    }
                    sx /= grad_norm * grad_norm_prev;
    
    Martin Beseda's avatar
    Martin Beseda committed
                    if (sx < -1.0 + 5e-12) {
    
    Martin Beseda's avatar
    Martin Beseda committed
                    } else if (sx > 1.0 - 5e-12) {
    
                    beta   = std::sqrt(std::acos(sx) / lib4neuro::PI);
    
    Martin Beseda's avatar
    Martin Beseda committed
                    eval_step_size_mk(gamma,
                                      beta,
                                      c,
                                      grad_norm_prev,
                                      grad_norm,
                                      val,
                                      prev_val);
    
                for (i = 0; i < gradient_current->size(); ++i) {
    
                    (*params_prev)[i] = (*params_current)[i] - cooling * gamma * (*gradient_current)[i];
    
                val = ef.eval(params_prev);
    
                /* switcheroo */
    
                ptr_mem          = gradient_prev;
                gradient_prev    = gradient_current;
    
                gradient_current = ptr_mem;
    
    
                ptr_mem        = params_prev;
                params_prev    = params_current;
    
                params_current = ptr_mem;
    
    Martin Beseda's avatar
    Martin Beseda committed
                COUT_DEBUG(std::string("Iteration: ") << (unsigned int) (iter_counter)
    
                                                      << ". C: " << c
                                                      << ". Gradient norm: " << grad_norm
                                                      << ". Total error: " << val
    
    Martin Beseda's avatar
    Martin Beseda committed
                                                      << ".\r");
    
    Martin Beseda's avatar
    Martin Beseda committed
                WRITE_TO_OFS_DEBUG(ofs,
                                   "Iteration: " << (unsigned int) (iter_counter)
                                                 << ". Step size: " << gamma * cooling
                                                 << ". C: " << c
                                                 << ". Gradient norm: " << grad_norm
                                                 << ". Total error: " << val
                                                 << "." << std::endl);
    
    Martin Beseda's avatar
    Martin Beseda committed
            COUT_DEBUG(std::string("Iteration: ") << (unsigned int) (iter_counter)
    
                                                  << ". Step size: " << gamma
                                                  << ". C: " << c
                                                  << ". Gradient norm: " << grad_norm
                                                  << ". Total error: " << val
                                                  << "." << std::endl);
    
    Martin Beseda's avatar
    Martin Beseda committed
            COUT_DEBUG("Number of total steps: " << counter_bad_guesses + counter_good_guesses << ", good: "
                                                 << counter_good_guesses << ", bad: " << counter_bad_guesses
                                                 << ", from which "
                                                 << counter_simplified_direction_good + counter_simplified_direction_bad
                                                 << " were attempted by simplified direction, success: "
                                                 << counter_simplified_direction_good << ", fail: "
                                                 << counter_simplified_direction_bad << std::endl << std::endl);
    
            if (iter_idx == 0) {
                COUT_INFO(std::endl << "Maximum number of iterations (" << this->maximum_niters
                                    << ") was reached! Final error: " << val << std::endl);
    
    
                if (ofs && ofs->is_open() && lib4neuro::mpi_rank == 0) {
    
    Martin Beseda's avatar
    Martin Beseda committed
                    *ofs << "Maximum number of iterations (" << this->maximum_niters << ") was reached! Final error: "
                         << val << std::endl;
    
                COUT_INFO(std::endl << "Gradient Descent method converged after "
    
    Martin Beseda's avatar
    Martin Beseda committed
                                    << this->maximum_niters - iter_idx
                                    << " iterations. Final error:" << val
                                    << std::endl);
    
                if (ofs && ofs->is_open() && lib4neuro::mpi_rank == 0) {
    
                    *ofs << "Gradient Descent method converged after "
    
    Martin Beseda's avatar
    Martin Beseda committed
                         << this->maximum_niters - iter_idx
    
                         << " iterations."
    
            this->optimal_parameters = *params_current;
    
            ef.set_parameters(this->optimal_parameters);
    
            delete gradient_current;
            delete gradient_prev;
            delete params_current;
            delete params_prev;