Newer
Older
/**
* DESCRIPTION OF THE FILE
*
* @author Michal Kravčenko
* @date 30.7.18 -
*/
#include "GradientDescent.h"

Michal Kravcenko
committed
GradientDescent::GradientDescent(double epsilon, size_t n_to_restart, long long int max_iters) {
this->tolerance = epsilon;
this->restart_frequency = n_to_restart;
this->optimal_parameters = new std::vector<double>(0);
this->maximum_niters = max_iters;

Michal Kravcenko
committed
}
GradientDescent::~GradientDescent() {
if (this->optimal_parameters) {
delete this->optimal_parameters;
this->optimal_parameters = nullptr;
}

Michal Kravcenko
committed
}
void GradientDescent::eval_step_size_mk(double &gamma, double beta, double &c, double grad_norm_prev,
double grad_norm, double fi, double fim) {

Michal Kravcenko
committed
if (fi > fim) {
c /= 1.0000005;
} else if (fi < fim) {
c *= 1.0000005;
}
gamma *= std::pow(c, 1.0 - 2.0 * beta) * std::pow(grad_norm_prev / grad_norm, 1.0 / c);

Michal Kravcenko
committed

Michal Kravcenko
committed
void GradientDescent::optimize(lib4neuro::ErrorFunction &ef) {

Michal Kravcenko
committed
/* Copy data set max and min values, if it's normalized */
if(!std::isnan(ef.get_dataset()->get_max_inp_val())) {
ef.get_network_instance()->set_normalization_strategy_instance(
ef.get_dataset()->get_normalization_strategy());
}
std::cout << "Finding a solution via a Gradient Descent method with adaptive step-length" << std::endl;
std::cout
<< "********************************************************************************************************************************************"
<< std::endl;
double grad_norm = this->tolerance * 10.0, gamma, sx, beta;
double grad_norm_prev;
size_t i;
long long int iter_idx = this->maximum_niters;
size_t iter_counter = 0;

Michal Kravcenko
committed
gamma = 1.0;
double prev_val, val = 0.0, c = 1.25;

Michal Kravcenko
committed

Michal Kravcenko
committed
std::vector<double> *gradient_current = new std::vector<double>(n_parameters);
std::vector<double> *gradient_prev = new std::vector<double>(n_parameters);
std::vector<double> *params_current = ef.get_parameters();
std::vector<double> *params_prev = new std::vector<double>(n_parameters);
std::vector<double> *ptr_mem;

Michal Kravcenko
committed
// std::vector<double> gradient_mem( n_parameters );
// std::vector<double> parameters_analytical( n_parameters );
std::fill(gradient_current->begin(), gradient_current->end(), 0.0);
std::fill(gradient_prev->begin(), gradient_prev->end(), 0.0);
while (grad_norm > this->tolerance && (iter_idx != 0)) {
iter_idx--;
iter_counter++;
prev_val = val;
grad_norm_prev = grad_norm;

Michal Kravcenko
committed
/* reset of the current gradient */
std::fill(gradient_current->begin(), gradient_current->end(), 0.0);

Michal Kravcenko
committed
// std::fill(gradient_mem.begin(), gradient_mem.end(), 0.0);
ef.calculate_error_gradient(*params_current, *gradient_current);

Michal Kravcenko
committed
// double error_analytical = this->calculate_gradient( ef.get_dataset()->get_data(), (size_t)2, params_current, gradient_current );
// for(size_t k = 0; k < gradient_mem.size(); ++k){
// printf("%f : %f\n", gradient_mem[ k ], gradient_current->at( k ));
// }
// printf("---------------------\n");
grad_norm = 0.0;
for (auto v: *gradient_current) {
grad_norm += v * v;
}
grad_norm = std::sqrt(grad_norm);
/* Update of the parameters */
/* step length calculation */
if (iter_counter < 10 || iter_counter % this->restart_frequency == 0) {
/* fixed step length */
gamma = 0.1 * this->tolerance;
} else {
/* angle between two consecutive gradients */
sx = 0.0;
for (i = 0; i < gradient_current->size(); ++i) {
sx += (gradient_current->at(i) * gradient_prev->at(i));
}
sx /= grad_norm * grad_norm_prev;
beta = std::sqrt(std::acos(sx) / lib4neuro::PI);
eval_step_size_mk(gamma, beta, c, grad_norm_prev, grad_norm, val, prev_val);
}

Michal Kravcenko
committed
for (i = 0; i < gradient_current->size(); ++i) {
(*params_prev)[i] = (*params_current)[i] - gamma * (*gradient_current)[i];

Michal Kravcenko
committed
}
/* switcheroo */
ptr_mem = gradient_prev;
gradient_prev = gradient_current;
gradient_current = ptr_mem;
ptr_mem = params_prev;
params_prev = params_current;
params_current = ptr_mem;

Michal Kravcenko
committed
if (iter_counter % 1 == 0) {
printf("Iteration %12d. Step size: %15.8f, C: %15.8f, Gradient norm: %15.8f. Total error: %10.8f\r",
(int) iter_counter, gamma, c, grad_norm, val);

Michal Kravcenko
committed
}
printf("Iteration %12d. Step size: %15.8f, C: %15.8f, Gradient norm: %15.8f. Total error: %10.8f\n",
(int) iter_counter, gamma, c, grad_norm, val);

Michal Kravcenko
committed
*this->optimal_parameters = *params_current;

Michal Kravcenko
committed
delete gradient_current;
delete gradient_prev;
delete params_current;
delete params_prev;

Michal Kravcenko
committed
}
std::vector<double> *GradientDescent::get_parameters() {
return this->optimal_parameters;
}