Skip to content
Snippets Groups Projects
NormalizedGradientDescent.cpp 5.03 KiB
Newer Older
  • Learn to ignore specific revisions
  • /**
     * DESCRIPTION OF THE FILE
     *
     * @author Michal Kravčenko
     * @date 30.7.18 -
     */
    
    #include <random.hpp>
    
    #include <mpi.h>
    
    
    #include "NormalizedGradientDescent.h"
    #include "message.h"
    
    
    namespace lib4neuro {
        NormalizedGradientDescent::NormalizedGradientDescent(
    		double epsilon,
            int max_iters,
            size_t batch
    	) {
            this->tolerance         = epsilon;
            this->maximum_niters    = max_iters;
            this->batch             = batch;
        }
    
        NormalizedGradientDescent::~NormalizedGradientDescent() {
        }
    
    
    
        void NormalizedGradientDescent::optimize(lib4neuro::ErrorFunction& ef,
                                       std::ofstream* ofs) {
    
            double err_ = ef.eval();
            COUT_INFO("Finding a solution via Normalized Gradient Descent method ..." << std::endl);
            COUT_INFO("Initial error: " << err_ << std::endl);
    
            if (ofs && ofs->is_open() && lib4neuro::mpi_rank == 0) {
                *ofs << "Finding a solution via Normalized Gradient Descent method ..." << std::endl;
                *ofs << "Initial error: " << err_ << std::endl;
            }
    
            double        grad_norm    = this->tolerance * 10.0;
            double        grad_norm_prev;
            size_t        i;
            long long int iter_idx     = this->maximum_niters;
            size_t        iter_counter = 0;
    
            double prev_val, val = 0.0, c = 1.25;
    
            size_t n_parameters                 = ef.get_dimension();
    
    
            std::vector<double>* gradient_current(new std::vector<double>(n_parameters));
            std::vector<double>* gradient_prev(new std::vector<double>(n_parameters));
            std::vector<double>* params_current = new std::vector<double>(ef.get_parameters());
            std::vector<double>* params_prev(new std::vector<double>(n_parameters));
            std::vector<double>* ptr_mem;
    
    
            std::fill(gradient_current->begin(),
                      gradient_current->end(),
                      0.0);
            std::fill(gradient_prev->begin(),
                      gradient_prev->end(),
                      0.0);
    
            val = err_;
            prev_val = err_;
            double total_time = -MPI_Wtime( );
            double cooling = 1;
            while (val > this->tolerance && (iter_idx != 0)) {
                iter_idx--;
                iter_counter++;
                grad_norm_prev = grad_norm;
    
                /* reset of the current gradient */
                std::fill(gradient_current->begin(),
                          gradient_current->end(),
                          0.0);
                ef.calculate_error_gradient_normalized(*params_current,
                    *gradient_current,
                    this->batch
    			);
    
    
                /* Update of the parameters */
    
    			double scaling = 0.001*val/err_;
    			scaling = 0.00001 * cooling;
                for (i = 0; i < gradient_current->size(); ++i) {
                    (*params_prev)[i] = (*params_current)[i] - scaling*(*gradient_current)[i];
                }
                prev_val = val;
                val = ef.eval(params_prev);
    
                /* switcheroo */
                ptr_mem          = gradient_prev;
                gradient_prev    = gradient_current;
                gradient_current = ptr_mem;
    
                ptr_mem        = params_prev;
                params_prev    = params_current;
                params_current = ptr_mem;
    
    			// grad_norm *= scaling;
    
    			COUT_INFO( " iteration " << iter_counter << ", direction norm " << scaling << ", error " << val );
    
    //			if( prev_val < val ){
    //                cooling *= 0.7;
    //			}
    //			else{
    //                cooling *= 1.05;
    //			}
    //
    //			if( iter_counter % 500 == 0 ){
    //                cooling  = 1;
    //			}
            }
    
            if (iter_idx == 0) {
                COUT_INFO(std::endl << "Maximum number of iterations (" << this->maximum_niters
                                    << ") was reached! Final error: " << val << std::endl);
    
                if (ofs && ofs->is_open() && lib4neuro::mpi_rank == 0) {
                    *ofs << "Maximum number of iterations (" << this->maximum_niters << ") was reached! Final error: "
                         << val << std::endl;
    
                }
    
            } else {
                COUT_INFO(std::endl << "Gradient Descent method converged after "
                                    << this->maximum_niters - iter_idx
                                    << " iterations. Final error:" << val
                                    << std::endl);
    #ifdef L4N_DEBUG
                if (ofs && ofs->is_open() && lib4neuro::mpi_rank == 0) {
                    *ofs << "Gradient Descent method converged after "
                         << this->maximum_niters - iter_idx
                         << " iterations."
                         << std::endl;
                }
    #endif
            }
            total_time += MPI_Wtime( );
    
            this->optimal_parameters = *params_current;
            ef.set_parameters(this->optimal_parameters);
    
            delete gradient_current;
            delete gradient_prev;
            delete params_current;
            delete params_prev;
    
    		COUT_INFO( "Finished in " << total_time << " [s], in " << iter_counter << " iterations" );
    		COUT_INFO( " " << total_time / iter_counter << " [s] per iteration" );
    		COUT_INFO( " error " << val );
    
    
        }
    }