Newer
Older

Michal Kravcenko
committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
/**
* DESCRIPTION OF THE FILE
*
* @author Michal Kravčenko
* @date 19.2.19 -
*/
#include "GradientDescentSingleItem.h"
#include <random.hpp>
#include "message.h"
namespace lib4neuro {
GradientDescentSingleItem::GradientDescentSingleItem(double epsilon, size_t n_to_restart, int max_iters, size_t batch) {
this->tolerance = epsilon;
this->restart_frequency = n_to_restart;
this->optimal_parameters = new std::vector<double>(0);
this->maximum_niters = max_iters;
this->batch = batch;
}
GradientDescentSingleItem::~GradientDescentSingleItem() {
if (this->optimal_parameters) {
delete this->optimal_parameters;
this->optimal_parameters = nullptr;
}
}
double GradientDescentSingleItem::get_optimal_step_size(lib4neuro::ErrorFunction &f, std::vector<double> &x,
std::vector<double> &d, size_t n_elems) {
double alpha = 10.0 / n_elems;
alpha = 1.0;
double value = f.eval();
double value_shifted = value + 1.0;
std::vector<double> shifted_x(x);
while( value_shifted > value ){
alpha *= 0.5;
for( size_t i = 0; i < x.size(); ++i ){
shifted_x[ i ] = x [ i ] - alpha * d[ i ];
}
value_shifted = f.eval( &shifted_x );
}
// std::cout << "Error reduction: " << value - value_shifted << std::endl;
return alpha;
}
void GradientDescentSingleItem::optimize(lib4neuro::ErrorFunction &ef, std::ofstream* ofs) {
COUT_INFO("Finding a solution via a Gradient Descent [Single Item] method with adaptive step-length..." << std::endl);
COUT_INFO("Initial error: " << ef.eval() << std::endl);
size_t total_elements = ef.get_dataset()->get_n_elements(), updated_elements = 0, iter = 0;
double max_error = 1.0, error, gamma;
size_t iter_idx = this->maximum_niters;
size_t dim = ef.get_network_instance()->get_n_biases() + ef.get_network_instance()->get_n_weights();
std::vector<double> parameter_vector = *ef.get_parameters();
std::vector<double> gradient_vector(dim);
std::vector<double> search_direction(dim);
std::vector<double> error_vector(ef.get_network_instance()->get_n_outputs());
while( max_error >= this->tolerance && iter_idx >= 1 ){
iter_idx--;
iter++;
max_error = 0.0;
updated_elements = 0;
std::fill(search_direction.begin(), search_direction.end(), 0);
for( size_t i = 0; i < ef.get_dataset()->get_n_elements(); ++i){
error = ef.eval_single_item_by_idx( i, ¶meter_vector, error_vector );
if( error > max_error ){
max_error = error;
}
if( error > this->tolerance ){
updated_elements++;
ef.calculate_error_gradient_single(error_vector, gradient_vector);
for(size_t j = 0; j < dim; ++j ){
search_direction[ j ] += gradient_vector[ j ];
}
}
}
gamma = this->get_optimal_step_size(ef, parameter_vector, search_direction, updated_elements);
for( size_t j = 0; j < dim; ++j ){
parameter_vector[ j ] -= gamma * search_direction[ j ];
}
COUT_DEBUG("Iteration: " << iter << ", Total elements in train set: " << total_elements << ", # of elements with high error: " << updated_elements << ", max. error: " << max_error << "\r");
}
COUT_DEBUG("Iteration: " << iter << ", Total elements in train set: " << total_elements << ", # of elements with high error: " << updated_elements << ", max. error: " << max_error << std::endl);
}
std::vector<double> *GradientDescentSingleItem::get_parameters() {
return this->optimal_parameters;
}
}