Something went wrong on our end
-
Michal Kravcenko authored
ADD: added kallman filter learning method (not tested) and random learning method + modifications in neural network to support the new functionality
Michal Kravcenko authoredADD: added kallman filter learning method (not tested) and random learning method + modifications in neural network to support the new functionality
GradientDescent.cpp 12.69 KiB
/**
* DESCRIPTION OF THE FILE
*
* @author Michal Kravčenko
* @date 30.7.18 -
*/
#include <random.hpp>
#include "GradientDescent.h"
#include "message.h"
namespace lib4neuro {
GradientDescent::GradientDescent(double epsilon, size_t n_to_restart, int max_iters, size_t batch) {
this->tolerance = epsilon;
this->restart_frequency = n_to_restart;
this->optimal_parameters = new std::vector<double>(0);
this->maximum_niters = max_iters;
this->batch = batch;
}
GradientDescent::~GradientDescent() {
if (this->optimal_parameters) {
delete this->optimal_parameters;
this->optimal_parameters = nullptr;
}
}
void GradientDescent::eval_step_size_mk(double &gamma,
double beta,
double &c,
double grad_norm_prev,
double grad_norm,
double fi,
double fim) {
if (fi > fim) {
c /= 1.0000005;
} else if (fi < fim) {
c *= 1.0000005;
}
gamma *= std::pow(c, 1.0 - 2.0 * beta) * std::pow(grad_norm_prev / grad_norm, 1.0 / c);
}
bool GradientDescent::perform_feasible_1D_step(
lib4neuro::ErrorFunction &ef,
double error_previous,
double step_coefficient,
std::vector<double> *direction,
std::vector<double> *parameters_before,
std::vector<double> *parameters_after
) {
size_t i;
boost::random::mt19937 gen(std::time(0));
boost::random::uniform_int_distribution<> dis(0, direction->size());
size_t max_dir_idx = dis(gen);
double error_current = error_previous + 1.0;
while( error_current >= error_previous ){
(*parameters_after)[max_dir_idx] = (*parameters_before)[max_dir_idx] - step_coefficient * (*direction)[max_dir_idx];
error_current = ef.eval( parameters_after );
if( step_coefficient < 1e-32){
// COUT_DEBUG(" Attempting to find a feasible direction in one dimension was NOT SUCCESSFUL" << std::endl);
for (i = 0; i < direction->size(); ++i) {
(*parameters_after)[i] = (*parameters_before)[i] - step_coefficient * (*direction)[i];
}
return false;
}
else{
if( error_current >= error_previous ){
// COUT_DEBUG(" Incorrect step size! Reducing it by a factor of 0.5. Errors: " << error_current << ", prev: " << error_previous << std::endl);
step_coefficient *= 0.5;
}
else{
// COUT_DEBUG(" Step OK" << std::endl);
}
}
}
return true;
}
void GradientDescent::optimize(lib4neuro::ErrorFunction &ef, std::ofstream* ofs) {
/* Copy data set max and min values, if it's normalized */
if(ef.get_dataset()->is_normalized()) {
ef.get_network_instance()->set_normalization_strategy_instance(
ef.get_dataset()->get_normalization_strategy());
}
COUT_INFO("Finding a solution via a Gradient Descent method with adaptive step-length..." << std::endl);
COUT_INFO("Initial error: " << ef.eval() << std::endl);
if(ofs && ofs->is_open()) {
*ofs << "Finding a solution via a Gradient Descent method with adaptive step-length..." << std::endl;
*ofs << "Initial error: " << ef.eval() << std::endl;
}
double grad_norm = this->tolerance * 10.0, gamma, sx, beta;
double grad_norm_prev;
size_t i;
long long int iter_idx = this->maximum_niters;
size_t iter_counter = 0;
gamma = 1.0;
double prev_val, val = 0.0, c = 1.25;
size_t n_parameters = ef.get_dimension();
std::vector<double> *gradient_current = new std::vector<double>(n_parameters);
std::vector<double> *gradient_prev = new std::vector<double>(n_parameters);
std::vector<double> *params_current = ef.get_parameters();
std::vector<double> *params_prev = new std::vector<double>(n_parameters);
std::vector<double> *ptr_mem;
// std::vector<double> gradient_mem( n_parameters );
// std::vector<double> parameters_analytical( n_parameters );
std::fill(gradient_current->begin(), gradient_current->end(), 0.0);
std::fill(gradient_prev->begin(), gradient_prev->end(), 0.0);
val = ef.eval(params_current);
double coeff = 1;
bool it_analyzed = false;
size_t counter_good_guesses = 0, counter_bad_guesses = 0, counter_simplified_direction_good = 0, counter_simplified_direction_bad = 0;
double cooling = 1.0;
while (grad_norm > this->tolerance && (iter_idx != 0)) {
iter_idx--;
iter_counter++;
prev_val = val;
grad_norm_prev = grad_norm;
/* reset of the current gradient */
std::fill(gradient_current->begin(), gradient_current->end(), 0.0);
// std::fill(gradient_mem.begin(), gradient_mem.end(), 0.0);
ef.calculate_error_gradient(*params_current, *gradient_current, 1.0, this->batch);
// double error_analytical = this->calculate_gradient( ef.get_dataset()->get_data(), (size_t)2, params_current, gradient_current );
// for(size_t k = 0; k < gradient_mem.size(); ++k){
// printf("%f : %f\n", gradient_mem[ k ], gradient_current->at( k ));
// }
// printf("---------------------\n");
grad_norm = 0.0;
for (auto v: *gradient_current) {
grad_norm += v * v;
}
grad_norm = std::sqrt(grad_norm);
/* Update of the parameters */
/* step length calculation */
if (iter_counter < 10 || iter_counter % this->restart_frequency == 0 ) {
/* fixed step length */
gamma = 0.1 * this->tolerance;
// gamma = 1 / grad_norm;
// gamma = 1e-4;
cooling = 1.0;
} else {
/* angle between two consecutive gradients */
sx = 0.0;
for (i = 0; i < gradient_current->size(); ++i) {
sx += (gradient_current->at(i) * gradient_prev->at(i));
}
sx /= grad_norm * grad_norm_prev;
if( sx < -1.0 + 5e-12 ){
sx = -1 + 5e-12;
}
else if( sx > 1.0 - 5e-12 ){
sx = 1 - 5e-12;
}
beta = std::sqrt(std::acos(sx) / lib4neuro::PI);
eval_step_size_mk(gamma, beta, c, grad_norm_prev, grad_norm, val, prev_val);
}
for (i = 0; i < gradient_current->size(); ++i) {
(*params_prev)[i] = (*params_current)[i] - cooling * gamma * (*gradient_current)[i];
}
val = ef.eval(params_prev);
// val = prev_val + 1.0;
// coeff = 1;
// it_analyzed = false;
// while(val >= prev_val){
// for (i = 0; i < gradient_current->size(); ++i) {
// (*params_prev)[i] = (*params_current)[i] - coeff * gamma * (*gradient_current)[i];
// }
// val = ef.eval(params_prev);
//
//
// if( coeff < 1e-32){
//// COUT_DEBUG("Error, the advised gradient direction is not feasible. Attempting to find a feasible direction in one dimension" << std::endl);
// if( !this->perform_feasible_1D_step(ef, prev_val, gamma, gradient_current, params_current, params_prev) ){
// gamma = 1;
// counter_simplified_direction_bad++;
// }
// else{
// gamma = 1;
// counter_simplified_direction_good++;
// }
//
// break;
// }
// else{
// if( val >= prev_val ){
//// COUT_DEBUG("Incorrect step size! Reducing gamma. Errors: " << val << ", prev: " << prev_val << std::endl);
// coeff *= 0.5;
//
// if( !it_analyzed ){
// counter_bad_guesses++;
// }
// }
// else{
//// COUT_DEBUG("Step OK" << std::endl);
// if( !it_analyzed ){
// counter_good_guesses++;
// }
// }
// }
// it_analyzed = true;
// }
// gamma *= coeff;
/* switcheroo */
ptr_mem = gradient_prev;
gradient_prev = gradient_current;
gradient_current = ptr_mem;
ptr_mem = params_prev;
params_prev = params_current;
params_current = ptr_mem;
COUT_DEBUG(std::string("Iteration: ") << (unsigned int)(iter_counter)
<< ". Step size: " << gamma * cooling
<< ". C: " << c
<< ". Gradient norm: " << grad_norm
<< ". Total error: " << val
<< ".\r" );
WRITE_TO_OFS_DEBUG(ofs, "Iteration: " << (unsigned int)(iter_counter)
<< ". Step size: " << gamma * cooling
<< ". C: " << c
<< ". Gradient norm: " << grad_norm
<< ". Total error: " << val
<< "." << std::endl);
// if(iter_counter % 100 == 0){
// COUT_INFO(std::string("Iteration: ") << (unsigned int)(iter_counter)
// << ". Step size: " << gamma
// << ". C: " << c
// << ". Gradient norm: " << grad_norm
// << ". Total error: " << val
// << ".\r");
// }
cooling *= 0.9999;
}
COUT_DEBUG(std::string("Iteration: ") << (unsigned int)(iter_counter)
<< ". Step size: " << gamma
<< ". C: " << c
<< ". Gradient norm: " << grad_norm
<< ". Total error: " << val
<< "." << std::endl);
COUT_DEBUG("Number of total steps: " << counter_bad_guesses + counter_good_guesses << ", good: " << counter_good_guesses << ", bad: " << counter_bad_guesses << ", from which " << counter_simplified_direction_good + counter_simplified_direction_bad << " were attempted by simplified direction, success: " << counter_simplified_direction_good << ", fail: " << counter_simplified_direction_bad << std::endl << std::endl );
if(iter_idx == 0) {
COUT_INFO(std::endl << "Maximum number of iterations (" << this->maximum_niters << ") was reached! Final error: " << val << std::endl);
if(ofs && ofs->is_open()) {
*ofs << "Maximum number of iterations (" << this->maximum_niters << ") was reached! Final error: " << val << std::endl;
}
} else {
COUT_INFO(std::endl << "Gradient Descent method converged after "
<< this->maximum_niters - iter_idx
<< " iterations. Final error:" << val
<< std::endl);
#ifdef L4N_DEBUG
if(ofs && ofs->is_open()) {
*ofs << "Gradient Descent method converged after "
<< this->maximum_niters-iter_idx
<< " iterations."
<< std::endl;
}
#endif
}
*this->optimal_parameters = *params_current;
ef.get_network_instance()->copy_parameter_space( this->optimal_parameters );
delete gradient_current;
delete gradient_prev;
delete params_current;
delete params_prev;
}
std::vector<double> *GradientDescent::get_parameters() {
return this->optimal_parameters;
}
}