Newer
Older

Michal Kravcenko
committed
/**
* DESCRIPTION OF THE FILE
*
* @author Michal Kravčenko
* @date 4.2.19 -
*/
#include "GradientDescentBB.h"
#include "message.h"
namespace lib4neuro {
GradientDescentBB::GradientDescentBB(double epsilon,
size_t n_to_restart,
int max_iters,
size_t batch) {

Michal Kravcenko
committed
this->restart_frequency = n_to_restart;
this->maximum_niters = max_iters;
this->batch = batch;

Michal Kravcenko
committed
}
GradientDescentBB::~GradientDescentBB() {
}
void GradientDescentBB::optimize(lib4neuro::ErrorFunction& ef,
std::ofstream* ofs) {

Michal Kravcenko
committed
COUT_INFO("Finding a solution via a Gradient Descent method with adaptive step-length..." << std::endl);
COUT_INFO("Initial error: " << ef.eval() << std::endl);

Michal Kravcenko
committed
*ofs << "Finding a solution via a Gradient Descent method with adaptive step-length..." << std::endl;
*ofs << "Initial error: " << ef.eval() << std::endl;
}
double grad_norm = this->tolerance * 10.0, gamma, sx, beta;
double grad_norm_prev;
size_t i;
long long int iter_idx = this->maximum_niters;
size_t iter_counter = 0;

Michal Kravcenko
committed

Michal Kravcenko
committed
double prev_val, val = 0.0, c = 1.25, val_best;
size_t n_parameters = ef.get_dimension();

Michal Kravcenko
committed
std::vector<double>* gradient_current(new std::vector<double>(n_parameters));
std::vector<double>* gradient_prev(new std::vector<double>(n_parameters));
std::vector<double>* params_current = new std::vector<double>(ef.get_parameters());
std::vector<double>* params_prev(new std::vector<double>(n_parameters));
std::vector<double>* params_best(new std::vector<double>(*params_current));

Michal Kravcenko
committed
std::vector<double>* ptr_mem;

Michal Kravcenko
committed

Michal Kravcenko
committed
std::fill(gradient_current->begin(),
gradient_current->end(),
0.0);
std::fill(gradient_prev->begin(),
gradient_prev->end(),
0.0);

Michal Kravcenko
committed
val_best = val;
double cooling_factor = 1.0;
while (grad_norm > this->tolerance && (iter_idx != 0)) {
iter_idx--;
iter_counter++;

Michal Kravcenko
committed
grad_norm_prev = grad_norm;
/* reset of the current gradient */
std::fill(gradient_current->begin(),
gradient_current->end(),
0.0);
ef.calculate_error_gradient(*params_current,
*gradient_current,
1.0,
this->batch);

Michal Kravcenko
committed
grad_norm = 0.0;
for (auto v: *gradient_current) {
grad_norm += v * v;
//COUT_DEBUG( grad_norm << std::endl );
}
grad_norm = std::sqrt(grad_norm);
/* Update of the parameters */
/* step length calculation */
if (iter_counter < 10 || iter_counter % this->restart_frequency < 10) {

Michal Kravcenko
committed
/* fixed step length */

Michal Kravcenko
committed
cooling_factor = 1.0;
} else {
std::fill(dot__.begin(),
dot__.end(),
0.0);

Michal Kravcenko
committed
d1 = d2 = d3 = 0.0;
for (size_t d = 0; d < gradient_current->size(); d++) {
cc = params_current->at(d) - params_prev->at(d);
gg = gradient_current->at(d) - gradient_prev->at(d);

Michal Kravcenko
committed
d1 += cc * cc;
d2 += cc * gg;
d3 += gg * gg;
}
dot__[0] = d1;
dot__[1] = d2;
dot__[2] = d3;
gamma = 1;
if (fabs(dot__[1]) > 0.0) {
gamma = 0.25 * (dot__[0] / dot__[1]);

Michal Kravcenko
committed
}
}
for (i = 0; i < gradient_current->size(); ++i) {
(*params_prev)[i] = (*params_current)[i] - cooling_factor * gamma * (*gradient_current)[i];
}
/* switcheroo */
ptr_mem = gradient_prev;
gradient_prev = gradient_current;

Michal Kravcenko
committed
gradient_current = ptr_mem;
ptr_mem = params_prev;
params_prev = params_current;

Michal Kravcenko
committed
params_current = ptr_mem;
val = ef.eval(params_current);

Michal Kravcenko
committed
val_best = val;
for (i = 0; i < gradient_current->size(); ++i) {
params_best->at(i) = params_current->at(i);

Michal Kravcenko
committed
}
}
COUT_DEBUG(std::string("Iteration: ") << (unsigned int) (iter_counter)
<< ". Step size: " << gamma * cooling_factor

Michal Kravcenko
committed
<< ". C: " << c
<< ". Gradient norm: " << grad_norm
<< ". Total error: " << val << ". the lowest error: " << val_best

Michal Kravcenko
committed
WRITE_TO_OFS_DEBUG(ofs,
"Iteration: " << (unsigned int) (iter_counter)
<< ". Step size: " << gamma * cooling_factor
<< ". C: " << c
<< ". Gradient norm: " << grad_norm
<< ". Total error: " << val << ". the lowest error: " << val_best
<< "." << std::endl);

Michal Kravcenko
committed
cooling_factor *= 0.99999;
}
COUT_DEBUG(std::string("Iteration: ") << (unsigned int) (iter_counter)
<< ". Step size: " << gamma * cooling_factor

Michal Kravcenko
committed
<< ". C: " << c
<< ". Gradient norm: " << grad_norm
<< ". Total error: " << val
<< "." << std::endl);
if (iter_idx == 0) {
COUT_INFO(std::endl << "Maximum number of iterations (" << this->maximum_niters
<< ") was reached! Final error: " << val_best << std::endl);

Michal Kravcenko
committed
if (ofs && ofs->is_open()) {
*ofs << "Maximum number of iterations (" << this->maximum_niters << ") was reached! Final error: "
<< val_best << std::endl;

Michal Kravcenko
committed
}
} else {
COUT_INFO(std::endl << "Gradient Descent method converged after "
<< this->maximum_niters - iter_idx
<< " iterations. Final error:" << val_best
<< std::endl);
#ifdef L4N_DEBUG

Michal Kravcenko
committed
*ofs << "Gradient Descent method converged after "

Michal Kravcenko
committed
<< " iterations."
<< std::endl;
}
#endif
}
Martin Beseda
committed
this->optimal_parameters = *params_best;

Michal Kravcenko
committed
ef.set_parameters(this->optimal_parameters);
delete gradient_current;
delete gradient_prev;
delete params_prev;
delete params_best;