Skip to content
Snippets Groups Projects
GradientDescent.cpp 12.6 KiB
Newer Older
  • Learn to ignore specific revisions
  • /**
     * DESCRIPTION OF THE FILE
     *
     * @author Michal Kravčenko
     * @date 30.7.18 - 
     */
    
    
    #include "GradientDescent.h"
    
    namespace lib4neuro {
    
        GradientDescent::GradientDescent(double epsilon, size_t n_to_restart, int max_iters, size_t batch) {
    
            this->tolerance = epsilon;
            this->restart_frequency = n_to_restart;
            this->optimal_parameters = new std::vector<double>(0);
    
            this->maximum_niters = max_iters;
    
        GradientDescent::~GradientDescent() {
            if (this->optimal_parameters) {
                delete this->optimal_parameters;
                this->optimal_parameters = nullptr;
            }
    
        void GradientDescent::eval_step_size_mk(double &gamma,
                                                double beta,
                                                double &c,
                                                double grad_norm_prev,
                                                double grad_norm,
                                                double fi,
                                                double fim) {
    
            if (fi > fim) {
                c /= 1.0000005;
            } else if (fi < fim) {
                c *= 1.0000005;
            }
    
            gamma *= std::pow(c, 1.0 - 2.0 * beta) * std::pow(grad_norm_prev / grad_norm, 1.0 / c);
    
        bool GradientDescent::perform_feasible_1D_step(
                lib4neuro::ErrorFunction &ef,
                double error_previous,
                double step_coefficient,
                std::vector<double> *direction,
                std::vector<double> *parameters_before,
                std::vector<double> *parameters_after
                ) {
    
            size_t i;
    
            boost::random::mt19937 gen(std::time(0));
            boost::random::uniform_int_distribution<> dis(0, direction->size());
            size_t  max_dir_idx = dis(gen);
    
            double error_current = error_previous + 1.0;
            while( error_current >=  error_previous ){
                (*parameters_after)[max_dir_idx] = (*parameters_before)[max_dir_idx] - step_coefficient * (*direction)[max_dir_idx];
    
                error_current = ef.eval( parameters_after );
                if( step_coefficient < 1e-32){
    //                COUT_DEBUG("    Attempting to find a feasible direction in one dimension was NOT SUCCESSFUL" << std::endl);
                    for (i = 0; i < direction->size(); ++i) {
                        (*parameters_after)[i] = (*parameters_before)[i] - step_coefficient * (*direction)[i];
                    }
                    return false;
                }
                else{
                    if( error_current >=  error_previous ){
    //                    COUT_DEBUG("    Incorrect step size! Reducing it by a factor of 0.5. Errors: " << error_current << ", prev: " << error_previous << std::endl);
                        step_coefficient *= 0.5;
                    }
                    else{
    //                    COUT_DEBUG("    Step OK" << std::endl);
                    }
                }
            }
            return true;
        }
    
    
        void GradientDescent::optimize(lib4neuro::ErrorFunction &ef, std::ofstream* ofs) {
    
            /* Copy data set max and min values, if it's normalized */
    
            if(ef.get_dataset()->is_normalized()) {
    
                ef.get_network_instance()->set_normalization_strategy_instance(
                        ef.get_dataset()->get_normalization_strategy());
            }
    
    
            COUT_INFO("Finding a solution via a Gradient Descent method with adaptive step-length..." << std::endl);
    
            COUT_INFO("Initial error: " << ef.eval() << std::endl);
    
            if(ofs && ofs->is_open()) {
                *ofs << "Finding a solution via a Gradient Descent method with adaptive step-length..." << std::endl;
    
                *ofs << "Initial error: " << ef.eval() << std::endl;
    
            double grad_norm = this->tolerance * 10.0, gamma, sx, beta;
            double grad_norm_prev;
    
            size_t i;
            long long int iter_idx = this->maximum_niters;
            size_t iter_counter = 0;
    
            gamma = 1.0;
            double prev_val, val = 0.0, c = 1.25;
    
            size_t n_parameters = ef.get_dimension();
    
    
            std::vector<double> *gradient_current = new std::vector<double>(n_parameters);
            std::vector<double> *gradient_prev = new std::vector<double>(n_parameters);
            std::vector<double> *params_current = ef.get_parameters();
            std::vector<double> *params_prev = new std::vector<double>(n_parameters);
            std::vector<double> *ptr_mem;
    
    
    //    std::vector<double> gradient_mem( n_parameters );
    //    std::vector<double> parameters_analytical( n_parameters );
    
    
    
            std::fill(gradient_current->begin(), gradient_current->end(), 0.0);
            std::fill(gradient_prev->begin(), gradient_prev->end(), 0.0);
    
    
            val = ef.eval(params_current);
            double coeff = 1;
            bool it_analyzed = false;
            size_t counter_good_guesses = 0, counter_bad_guesses = 0, counter_simplified_direction_good = 0, counter_simplified_direction_bad = 0;
            double cooling = 1.0;
    
            while (grad_norm > this->tolerance && (iter_idx != 0)) {
                iter_idx--;
                iter_counter++;
    
                prev_val = val;
                grad_norm_prev = grad_norm;
    
                /* reset of the current gradient */
                std::fill(gradient_current->begin(), gradient_current->end(), 0.0);
    
    //        std::fill(gradient_mem.begin(), gradient_mem.end(), 0.0);
    
                ef.calculate_error_gradient(*params_current, *gradient_current, 1.0, this->batch);
    
    //        double error_analytical = this->calculate_gradient( ef.get_dataset()->get_data(), (size_t)2, params_current, gradient_current );
    
    //        for(size_t k = 0; k < gradient_mem.size(); ++k){
    //            printf("%f : %f\n", gradient_mem[ k ], gradient_current->at( k ));
    //        }
    //        printf("---------------------\n");
    
    
                grad_norm = 0.0;
                for (auto v: *gradient_current) {
                    grad_norm += v * v;
                }
                grad_norm = std::sqrt(grad_norm);
    
                /* Update of the parameters */
                /* step length calculation */
    
                if (iter_counter < 10 || iter_counter % this->restart_frequency == 0 ) {
    
                    /* fixed step length */
                    gamma = 0.1 * this->tolerance;
    
    //                gamma = 1 / grad_norm;
    //                gamma = 1e-4;
                    cooling = 1.0;
    
                } else {
                    /* angle between two consecutive gradients */
                    sx = 0.0;
                    for (i = 0; i < gradient_current->size(); ++i) {
                        sx += (gradient_current->at(i) * gradient_prev->at(i));
                    }
                    sx /= grad_norm * grad_norm_prev;
    
                    if( sx < -1.0 + 5e-12 ){
                        sx = -1 + 5e-12;
                    }
                    else if( sx > 1.0 - 5e-12 ){
                        sx = 1 - 5e-12;
                    }
    
                    beta = std::sqrt(std::acos(sx) / lib4neuro::PI);
    
                    eval_step_size_mk(gamma, beta, c, grad_norm_prev, grad_norm, val, prev_val);
                }
    
                for (i = 0; i < gradient_current->size(); ++i) {
    
                    (*params_prev)[i] = (*params_current)[i] - cooling * gamma * (*gradient_current)[i];
    
                val = ef.eval(params_prev);
    
    //            val = prev_val + 1.0;
    //            coeff = 1;
    //            it_analyzed = false;
    //            while(val >= prev_val){
    //                for (i = 0; i < gradient_current->size(); ++i) {
    //                    (*params_prev)[i] = (*params_current)[i] - coeff * gamma * (*gradient_current)[i];
    //                }
    //                val = ef.eval(params_prev);
    //
    //
    //                if( coeff < 1e-32){
    ////                    COUT_DEBUG("Error, the advised gradient direction is not feasible. Attempting to find a feasible direction in one dimension" << std::endl);
    //                    if( !this->perform_feasible_1D_step(ef, prev_val, gamma, gradient_current, params_current, params_prev) ){
    //                        gamma = 1;
    //                        counter_simplified_direction_bad++;
    //                    }
    //                    else{
    //                        gamma = 1;
    //                        counter_simplified_direction_good++;
    //                    }
    //
    //                    break;
    //                }
    //                else{
    //                    if( val >=  prev_val ){
    ////                        COUT_DEBUG("Incorrect step size! Reducing gamma. Errors: " << val << ", prev: " << prev_val << std::endl);
    //                        coeff *= 0.5;
    //
    //                        if( !it_analyzed ){
    //                            counter_bad_guesses++;
    //                        }
    //                    }
    //                    else{
    ////                        COUT_DEBUG("Step OK" << std::endl);
    //                        if( !it_analyzed ){
    //                            counter_good_guesses++;
    //                        }
    //                    }
    //                }
    //                it_analyzed = true;
    //            }
    //            gamma *= coeff;
    
    
    
                /* switcheroo */
                ptr_mem = gradient_prev;
                gradient_prev = gradient_current;
                gradient_current = ptr_mem;
    
                ptr_mem = params_prev;
                params_prev = params_current;
                params_current = ptr_mem;
    
    
                COUT_DEBUG(std::string("Iteration: ") << (unsigned int)(iter_counter)
    
                                                      << ". C: " << c
                                                      << ". Gradient norm: " << grad_norm
                                                      << ". Total error: " << val
    
    
                WRITE_TO_OFS_DEBUG(ofs, "Iteration: " << (unsigned int)(iter_counter)
    
                                                      << ". C: " << c
                                                      << ". Gradient norm: " << grad_norm
                                                      << ". Total error: " << val
                                                      << "." << std::endl);
    
    //            if(iter_counter % 100 == 0){
    //                COUT_INFO(std::string("Iteration: ") << (unsigned int)(iter_counter)
    //                                                      << ". Step size: " << gamma
    //                                                      << ". C: " << c
    //                                                      << ". Gradient norm: " << grad_norm
    //                                                      << ". Total error: " << val
    //                                                      << ".\r");
    //            }
    
                cooling *= 0.9999;
    
            COUT_DEBUG(std::string("Iteration: ") << (unsigned int)(iter_counter)
                                                  << ". Step size: " << gamma
                                                  << ". C: " << c
                                                  << ". Gradient norm: " << grad_norm
                                                  << ". Total error: " << val
                                                  << "." << std::endl);
    
            COUT_DEBUG("Number of total steps: " << counter_bad_guesses + counter_good_guesses << ", good: " << counter_good_guesses << ", bad: " << counter_bad_guesses << ", from which " << counter_simplified_direction_good + counter_simplified_direction_bad << " were attempted by simplified direction, success: " << counter_simplified_direction_good << ", fail: " << counter_simplified_direction_bad << std::endl << std::endl );
    
                COUT_INFO(std::endl << "Maximum number of iterations (" << this->maximum_niters << ") was reached! Final error: " << val << std::endl);
    
                    *ofs << "Maximum number of iterations (" << this->maximum_niters << ") was reached! Final error: " << val << std::endl;
    
                COUT_INFO(std::endl << "Gradient Descent method converged after "
                                  << this->maximum_niters - iter_idx
                                  << " iterations. Final error:" << val
    
    #ifdef L4N_DEBUG
                if(ofs && ofs->is_open()) {
                    *ofs << "Gradient Descent method converged after "
    
                         << this->maximum_niters-iter_idx
    
                         << " iterations."
    
            *this->optimal_parameters = *params_current;
    
            delete gradient_current;
            delete gradient_prev;
            delete params_current;
            delete params_prev;
    
        std::vector<double> *GradientDescent::get_parameters() {
            return this->optimal_parameters;
        }