Newer
Older
/**
* DESCRIPTION OF THE FILE
*
* @author Michal Kravčenko
* @date 30.7.18 -
*/

Michal Kravcenko
committed
#include <random.hpp>
#include "message.h"

Michal Kravcenko
committed
Martin Beseda
committed
GradientDescent::GradientDescent(double epsilon, size_t n_to_restart, int max_iters, size_t batch) {
this->tolerance = epsilon;
this->restart_frequency = n_to_restart;
this->optimal_parameters = new std::vector<double>(0);
this->maximum_niters = max_iters;
Martin Beseda
committed
this->batch = batch;

Michal Kravcenko
committed
}
GradientDescent::~GradientDescent() {
if (this->optimal_parameters) {
delete this->optimal_parameters;
this->optimal_parameters = nullptr;
}

Michal Kravcenko
committed
}
Martin Beseda
committed
void GradientDescent::eval_step_size_mk(double &gamma,
double beta,
double &c,
double grad_norm_prev,
double grad_norm,
double fi,
double fim) {

Michal Kravcenko
committed
if (fi > fim) {
c /= 1.0000005;
} else if (fi < fim) {
c *= 1.0000005;
}
gamma *= std::pow(c, 1.0 - 2.0 * beta) * std::pow(grad_norm_prev / grad_norm, 1.0 / c);

Michal Kravcenko
committed

Michal Kravcenko
committed

Michal Kravcenko
committed
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
bool GradientDescent::perform_feasible_1D_step(
lib4neuro::ErrorFunction &ef,
double error_previous,
double step_coefficient,
std::vector<double> *direction,
std::vector<double> *parameters_before,
std::vector<double> *parameters_after
) {
size_t i;
boost::random::mt19937 gen(std::time(0));
boost::random::uniform_int_distribution<> dis(0, direction->size());
size_t max_dir_idx = dis(gen);
double error_current = error_previous + 1.0;
while( error_current >= error_previous ){
(*parameters_after)[max_dir_idx] = (*parameters_before)[max_dir_idx] - step_coefficient * (*direction)[max_dir_idx];
error_current = ef.eval( parameters_after );
if( step_coefficient < 1e-32){
// COUT_DEBUG(" Attempting to find a feasible direction in one dimension was NOT SUCCESSFUL" << std::endl);
for (i = 0; i < direction->size(); ++i) {
(*parameters_after)[i] = (*parameters_before)[i] - step_coefficient * (*direction)[i];
}
return false;
}
else{
if( error_current >= error_previous ){
// COUT_DEBUG(" Incorrect step size! Reducing it by a factor of 0.5. Errors: " << error_current << ", prev: " << error_previous << std::endl);
step_coefficient *= 0.5;
}
else{
// COUT_DEBUG(" Step OK" << std::endl);
}
}
}
return true;
}
Martin Beseda
committed
void GradientDescent::optimize(lib4neuro::ErrorFunction &ef, std::ofstream* ofs) {

Michal Kravcenko
committed
/* Copy data set max and min values, if it's normalized */
if(ef.get_dataset()->is_normalized()) {
ef.get_network_instance()->set_normalization_strategy_instance(
ef.get_dataset()->get_normalization_strategy());
}
COUT_INFO("Finding a solution via a Gradient Descent method with adaptive step-length..." << std::endl);

Michal Kravcenko
committed
COUT_INFO("Initial error: " << ef.eval() << std::endl);
Martin Beseda
committed
if(ofs && ofs->is_open()) {
*ofs << "Finding a solution via a Gradient Descent method with adaptive step-length..." << std::endl;

Michal Kravcenko
committed
*ofs << "Initial error: " << ef.eval() << std::endl;
Martin Beseda
committed
}
double grad_norm = this->tolerance * 10.0, gamma, sx, beta;
double grad_norm_prev;
size_t i;
long long int iter_idx = this->maximum_niters;
size_t iter_counter = 0;

Michal Kravcenko
committed
gamma = 1.0;
double prev_val, val = 0.0, c = 1.25;

Michal Kravcenko
committed
size_t n_parameters = ef.get_dimension();

Michal Kravcenko
committed
std::vector<double> *gradient_current = new std::vector<double>(n_parameters);
std::vector<double> *gradient_prev = new std::vector<double>(n_parameters);
std::vector<double> *params_current = ef.get_parameters();
std::vector<double> *params_prev = new std::vector<double>(n_parameters);
std::vector<double> *ptr_mem;

Michal Kravcenko
committed
// std::vector<double> gradient_mem( n_parameters );
// std::vector<double> parameters_analytical( n_parameters );
std::fill(gradient_current->begin(), gradient_current->end(), 0.0);
std::fill(gradient_prev->begin(), gradient_prev->end(), 0.0);

Michal Kravcenko
committed
val = ef.eval(params_current);
double coeff = 1;
bool it_analyzed = false;
size_t counter_good_guesses = 0, counter_bad_guesses = 0, counter_simplified_direction_good = 0, counter_simplified_direction_bad = 0;
double cooling = 1.0;
while (grad_norm > this->tolerance && (iter_idx != 0)) {
iter_idx--;
iter_counter++;
prev_val = val;
grad_norm_prev = grad_norm;

Michal Kravcenko
committed
/* reset of the current gradient */
std::fill(gradient_current->begin(), gradient_current->end(), 0.0);

Michal Kravcenko
committed
// std::fill(gradient_mem.begin(), gradient_mem.end(), 0.0);
ef.calculate_error_gradient(*params_current, *gradient_current, 1.0, this->batch);

Michal Kravcenko
committed
// double error_analytical = this->calculate_gradient( ef.get_dataset()->get_data(), (size_t)2, params_current, gradient_current );
// for(size_t k = 0; k < gradient_mem.size(); ++k){
// printf("%f : %f\n", gradient_mem[ k ], gradient_current->at( k ));
// }
// printf("---------------------\n");
grad_norm = 0.0;
for (auto v: *gradient_current) {
grad_norm += v * v;
}
grad_norm = std::sqrt(grad_norm);
/* Update of the parameters */
/* step length calculation */

Michal Kravcenko
committed
if (iter_counter < 10 || iter_counter % this->restart_frequency == 0 ) {
/* fixed step length */
gamma = 0.1 * this->tolerance;

Michal Kravcenko
committed
// gamma = 1 / grad_norm;
// gamma = 1e-4;
cooling = 1.0;
} else {
/* angle between two consecutive gradients */
sx = 0.0;
for (i = 0; i < gradient_current->size(); ++i) {
sx += (gradient_current->at(i) * gradient_prev->at(i));
}
sx /= grad_norm * grad_norm_prev;

Michal Kravcenko
committed
if( sx < -1.0 + 5e-12 ){
sx = -1 + 5e-12;
}
else if( sx > 1.0 - 5e-12 ){
sx = 1 - 5e-12;
}
beta = std::sqrt(std::acos(sx) / lib4neuro::PI);
eval_step_size_mk(gamma, beta, c, grad_norm_prev, grad_norm, val, prev_val);
}

Michal Kravcenko
committed
for (i = 0; i < gradient_current->size(); ++i) {

Michal Kravcenko
committed
(*params_prev)[i] = (*params_current)[i] - cooling * gamma * (*gradient_current)[i];

Michal Kravcenko
committed
}

Michal Kravcenko
committed
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
val = ef.eval(params_prev);
// val = prev_val + 1.0;
// coeff = 1;
// it_analyzed = false;
// while(val >= prev_val){
// for (i = 0; i < gradient_current->size(); ++i) {
// (*params_prev)[i] = (*params_current)[i] - coeff * gamma * (*gradient_current)[i];
// }
// val = ef.eval(params_prev);
//
//
// if( coeff < 1e-32){
//// COUT_DEBUG("Error, the advised gradient direction is not feasible. Attempting to find a feasible direction in one dimension" << std::endl);
// if( !this->perform_feasible_1D_step(ef, prev_val, gamma, gradient_current, params_current, params_prev) ){
// gamma = 1;
// counter_simplified_direction_bad++;
// }
// else{
// gamma = 1;
// counter_simplified_direction_good++;
// }
//
// break;
// }
// else{
// if( val >= prev_val ){
//// COUT_DEBUG("Incorrect step size! Reducing gamma. Errors: " << val << ", prev: " << prev_val << std::endl);
// coeff *= 0.5;
//
// if( !it_analyzed ){
// counter_bad_guesses++;
// }
// }
// else{
//// COUT_DEBUG("Step OK" << std::endl);
// if( !it_analyzed ){
// counter_good_guesses++;
// }
// }
// }
// it_analyzed = true;
// }
// gamma *= coeff;

Michal Kravcenko
committed
/* switcheroo */
ptr_mem = gradient_prev;
gradient_prev = gradient_current;
gradient_current = ptr_mem;
ptr_mem = params_prev;
params_prev = params_current;
params_current = ptr_mem;

Michal Kravcenko
committed
COUT_DEBUG(std::string("Iteration: ") << (unsigned int)(iter_counter)

Michal Kravcenko
committed
<< ". Step size: " << gamma * cooling
<< ". C: " << c
<< ". Gradient norm: " << grad_norm
<< ". Total error: " << val
Martin Beseda
committed
WRITE_TO_OFS_DEBUG(ofs, "Iteration: " << (unsigned int)(iter_counter)

Michal Kravcenko
committed
<< ". Step size: " << gamma * cooling
<< ". C: " << c
<< ". Gradient norm: " << grad_norm
<< ". Total error: " << val
<< "." << std::endl);
Martin Beseda
committed

Michal Kravcenko
committed
// if(iter_counter % 100 == 0){
// COUT_INFO(std::string("Iteration: ") << (unsigned int)(iter_counter)
// << ". Step size: " << gamma
// << ". C: " << c
// << ". Gradient norm: " << grad_norm
// << ". Total error: " << val
// << ".\r");
// }
cooling *= 0.9999;
Martin Beseda
committed

Michal Kravcenko
committed
}

Michal Kravcenko
committed
COUT_DEBUG(std::string("Iteration: ") << (unsigned int)(iter_counter)
<< ". Step size: " << gamma
<< ". C: " << c
<< ". Gradient norm: " << grad_norm
<< ". Total error: " << val
<< "." << std::endl);

Michal Kravcenko
committed
COUT_DEBUG("Number of total steps: " << counter_bad_guesses + counter_good_guesses << ", good: " << counter_good_guesses << ", bad: " << counter_bad_guesses << ", from which " << counter_simplified_direction_good + counter_simplified_direction_bad << " were attempted by simplified direction, success: " << counter_simplified_direction_good << ", fail: " << counter_simplified_direction_bad << std::endl << std::endl );

Michal Kravcenko
committed
if(iter_idx == 0) {

Michal Kravcenko
committed
COUT_INFO(std::endl << "Maximum number of iterations (" << this->maximum_niters << ") was reached! Final error: " << val << std::endl);
Martin Beseda
committed
if(ofs && ofs->is_open()) {

Michal Kravcenko
committed
*ofs << "Maximum number of iterations (" << this->maximum_niters << ") was reached! Final error: " << val << std::endl;
Martin Beseda
committed
}
} else {

Michal Kravcenko
committed
COUT_INFO(std::endl << "Gradient Descent method converged after "
<< this->maximum_niters - iter_idx
<< " iterations. Final error:" << val
<< std::endl);
Martin Beseda
committed
#ifdef L4N_DEBUG
if(ofs && ofs->is_open()) {
*ofs << "Gradient Descent method converged after "
<< this->maximum_niters-iter_idx
<< std::endl;
Martin Beseda
committed
}
#endif

Michal Kravcenko
committed
*this->optimal_parameters = *params_current;

Michal Kravcenko
committed
delete gradient_current;
delete gradient_prev;
delete params_current;
delete params_prev;

Michal Kravcenko
committed
}
std::vector<double> *GradientDescent::get_parameters() {
return this->optimal_parameters;
}

Michal Kravcenko
committed