Newer
Older
/**
* DESCRIPTION OF THE FILE
*
* @author Michal Kravčenko
* @date 30.7.18 -
*/
#include "GradientDescent.h"

Michal Kravcenko
committed
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
GradientDescent::GradientDescent(double epsilon, size_t n_to_restart){
this->tolerance = epsilon;
this->restart_frequency = n_to_restart;
this->optimal_parameters = new std::vector<double>(0);
}
GradientDescent::~GradientDescent() {
if( this->optimal_parameters ){
delete this->optimal_parameters;
this->optimal_parameters = nullptr;
}
}
void GradientDescent::eval_step_size_mk(double &gamma, double beta, double &c, double grad_norm_prev,
double grad_norm, double fi, double fim ) {
if( fi > fim )
{
c /= 1.0000005;
}
else if( fi < fim )
{
c *= 1.0000005;
}
gamma *= std::pow( c, 1.0 - 2.0 * beta) * std::pow( grad_norm_prev / grad_norm, 1.0 / c );
}
void GradientDescent::optimize( ErrorFunction &ef ) {
std::cout << "Finding a solution via a Gradient Descent method with adaptive step-length" << std::endl;
std::cout << "********************************************************************************************************************************************" <<std::endl;
double grad_norm = this->tolerance * 10.0, gamma, sx, beta;
double grad_norm_prev;
size_t i, iter_idx = 0;
gamma = 1.0;
double prev_val, val = 0.0, c = 1.25;
size_t n_parameters = ef.get_dimension();
std::vector<double> *gradient_current = new std::vector<double>( n_parameters );
std::vector<double> *gradient_prev = new std::vector<double>( n_parameters );
std::vector<double> *params_current = ef.get_parameters( );
std::vector<double> *params_prev = new std::vector<double>( n_parameters );
std::vector<double> *ptr_mem;
// std::vector<double> gradient_mem( n_parameters );
// std::vector<double> parameters_analytical( n_parameters );
std::fill(gradient_current->begin(), gradient_current->end(), 0.0);
std::fill(gradient_prev->begin(), gradient_prev->end(), 0.0);
while( grad_norm > this->tolerance ) {
iter_idx++;
prev_val = val;
grad_norm_prev = grad_norm;
/* reset of the current gradient */
std::fill(gradient_current->begin(), gradient_current->end(), 0.0);
// std::fill(gradient_mem.begin(), gradient_mem.end(), 0.0);
ef.calculate_error_gradient( *params_current, *gradient_current );
// double error_analytical = this->calculate_gradient( ef.get_dataset()->get_data(), (size_t)2, params_current, gradient_current );
// for(size_t k = 0; k < gradient_mem.size(); ++k){
// printf("%f : %f\n", gradient_mem[ k ], gradient_current->at( k ));
// }
// printf("---------------------\n");
grad_norm = 0.0;
for(auto v: *gradient_current){
grad_norm += v * v;
}
grad_norm = std::sqrt(grad_norm);
/* Update of the parameters */
/* step length calculation */
if(iter_idx < 10 || iter_idx % this->restart_frequency == 0 ){
/* fixed step length */
gamma = 0.1 * this->tolerance;
}
else{
/* angle between two consecutive gradients */
sx = 0.0;
for(i = 0; i < gradient_current->size(); ++i){
sx += (gradient_current->at( i ) * gradient_prev->at( i ));
}
sx /= grad_norm * grad_norm_prev;
beta = std::sqrt(std::acos( sx ) / PI);
eval_step_size_mk( gamma, beta, c, grad_norm_prev, grad_norm, val, prev_val );
}
for(i = 0; i < gradient_current->size(); ++i){
(*params_prev)[i] = (*params_current)[i] - gamma * (*gradient_current)[i];

Michal Kravcenko
committed
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
}
/* switcheroo */
ptr_mem = gradient_prev;
gradient_prev = gradient_current;
gradient_current = ptr_mem;
ptr_mem = params_prev;
params_prev = params_current;
params_current = ptr_mem;
val = ef.eval( params_current );
if(iter_idx % 1 == 0){
printf("Iteration %12d. Step size: %15.8f, C: %15.8f, Gradient norm: %15.8f. Total error: %10.8f\r", (int)iter_idx, gamma, c, grad_norm, val);
std::cout.flush();
}
}
printf("Iteration %12d. Step size: %15.8f, C: %15.8f, Gradient norm: %15.8f. Total error: %10.8f\n", (int)iter_idx, gamma, c, grad_norm, val);
std::cout.flush();
*this->optimal_parameters = *params_current;
delete gradient_current;
delete gradient_prev;
delete params_current;
delete params_prev;
}
std::vector<double>* GradientDescent::get_parameters() {
return this->optimal_parameters;
}