Newer
Older

Michal Kravcenko
committed
/**
* DESCRIPTION OF THE FILE
*
* @author Michal Kravčenko
* @date 19.2.19 -
*/
#include "GradientDescentSingleItem.h"
#include <random.hpp>
#include "message.h"
namespace lib4neuro {
GradientDescentSingleItem::GradientDescentSingleItem(double epsilon, size_t n_to_restart, int max_iters, size_t batch) {
this->tolerance = epsilon;
this->restart_frequency = n_to_restart;
// this->optimal_parameters = new std::vector<double>(0);

Michal Kravcenko
committed
this->maximum_niters = max_iters;
this->batch = batch;
}
GradientDescentSingleItem::~GradientDescentSingleItem() {
// if (this->optimal_parameters) {
// delete this->optimal_parameters;
// this->optimal_parameters = nullptr;
// }

Michal Kravcenko
committed
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
}
double GradientDescentSingleItem::get_optimal_step_size(lib4neuro::ErrorFunction &f, std::vector<double> &x,
std::vector<double> &d, size_t n_elems) {
double alpha = 10.0 / n_elems;
alpha = 1.0;
double value = f.eval();
double value_shifted = value + 1.0;
std::vector<double> shifted_x(x);
while( value_shifted > value ){
alpha *= 0.5;
for( size_t i = 0; i < x.size(); ++i ){
shifted_x[ i ] = x [ i ] - alpha * d[ i ];
}
value_shifted = f.eval( &shifted_x );
}
// std::cout << "Error reduction: " << value - value_shifted << std::endl;
return alpha;
}
void GradientDescentSingleItem::optimize(lib4neuro::ErrorFunction &ef, std::ofstream* ofs) {
COUT_INFO("Finding a solution via a Gradient Descent [Single Item] method with adaptive step-length..." << std::endl);
COUT_INFO("Initial error: " << ef.eval() << std::endl);
size_t total_elements = ef.get_dataset()->get_n_elements(), updated_elements = 0, iter = 0;
double max_error = 1.0, error, gamma;
size_t iter_idx = this->maximum_niters;
size_t dim = ef.get_network_instance()->get_n_biases() + ef.get_network_instance()->get_n_weights();
std::vector<double> parameter_vector = *ef.get_parameters();
std::vector<double> gradient_vector(dim);
std::vector<double> search_direction(dim);
std::vector<double> error_vector(ef.get_network_instance()->get_n_outputs());
while( max_error >= this->tolerance && iter_idx >= 1 ){
iter_idx--;
iter++;
max_error = 0.0;
updated_elements = 0;
std::fill(search_direction.begin(), search_direction.end(), 0);
for( size_t i = 0; i < ef.get_dataset()->get_n_elements(); ++i){
error = ef.eval_single_item_by_idx( i, ¶meter_vector, error_vector );
if( error > max_error ){
max_error = error;
}
if( error > this->tolerance ){
updated_elements++;
ef.calculate_error_gradient_single(error_vector, gradient_vector);
for(size_t j = 0; j < dim; ++j ){
search_direction[ j ] += gradient_vector[ j ];
}
}
}
gamma = this->get_optimal_step_size(ef, parameter_vector, search_direction, updated_elements);
for( size_t j = 0; j < dim; ++j ){
parameter_vector[ j ] -= gamma * search_direction[ j ];
}
COUT_DEBUG("Iteration: " << iter << ", Total elements in train set: " << total_elements << ", # of elements with high error: " << updated_elements << ", max. error: " << max_error << "\r");
}
COUT_DEBUG("Iteration: " << iter << ", Total elements in train set: " << total_elements << ", # of elements with high error: " << updated_elements << ", max. error: " << max_error << std::endl);

Michal Kravcenko
committed
this->optimal_parameters = parameter_vector;
ef.get_network_instance()->copy_parameter_space( &this->optimal_parameters );

Michal Kravcenko
committed

Michal Kravcenko
committed
}
std::shared_ptr<std::vector<double>> GradientDescentSingleItem::get_parameters() {
std::shared_ptr<std::vector<double>> ret;
ret.reset(&this->optimal_parameters);
return ret;