Newer
Older
//
// Created by martin on 7/15/18.
//
#include <cmath>
#include <boost/random/mersenne_twister.hpp>
#include <boost/random/uniform_int_distribution.hpp>
Martin Beseda
committed
#include "exceptions.h"
#include "message.h"
Martin Beseda
committed
namespace lib4neuro {

Michal Kravcenko
committed
Martin Beseda
committed
size_t ErrorFunction::get_dimension() {
return this->dimension;
}
NeuralNetwork* ErrorFunction::get_network_instance() {
return this->net;
}
void ErrorFunction::divide_data_train_test(double percent_test) {
size_t ds_size = this->ds->get_n_elements();
/* Store the full data set */
this->ds_full = this->ds;
/* Choose random subset of the DataSet for training and the remaining part for validation */
boost::random::mt19937 gen;
boost::random::uniform_int_distribution<> dist(0, ds_size - 1);
size_t test_set_size = ceil(ds_size * percent_test);
std::vector<unsigned int> test_indices;
test_indices.reserve(test_set_size);
for (unsigned int i = 0; i < test_set_size; i++) {
test_indices.emplace_back(dist(gen));
}
std::sort(test_indices.begin(), test_indices.end(), std::greater<unsigned int>());
std::vector<std::pair<std::vector<double>, std::vector<double>>> test_data, train_data;
/* Copy all the data to train_data */
Martin Beseda
committed
for (auto e : *this->ds_full->get_data()) {
/* Move the testing data from train_data to test_data */
Martin Beseda
committed
for (auto ind : test_indices) {
test_data.emplace_back(train_data.at(ind));
train_data.erase(train_data.begin() + ind);
}
/* Re-initialize data set for training */
this->ds = new DataSet(&train_data, this->ds_full->get_normalization_strategy());
this->ds_test = new DataSet(&test_data, this->ds_full->get_normalization_strategy());
}
void ErrorFunction::return_full_data_set_for_training() {
Martin Beseda
committed
if (this->ds_test) {
Martin Beseda
committed
DataSet* ErrorFunction::get_dataset() {
return this->ds;
}
DataSet* ErrorFunction::get_test_dataset() {
return this->ds_test;
}
std::vector<double>* ErrorFunction::get_parameters() {
std::vector<double>* output = new std::vector<double>(this->net->get_n_weights() + this->net->get_n_biases());
size_t i = 0;
for (auto el: *this->net->get_parameter_ptr_weights()) {
output->at(i) = el;
++i;
}
for (auto el: *this->net->get_parameter_ptr_biases()) {
output->at(i) = el;
++i;
}
return output;
}
MSE::MSE(NeuralNetwork* net, DataSet* ds) {
Martin Beseda
committed
this->net = net;
this->ds = ds;
this->dimension = net->get_n_weights() + net->get_n_biases();
Martin Beseda
committed
double MSE::eval_on_data_set(lib4neuro::DataSet* data_set, std::ofstream* results_file_path,
std::vector<double>* weights) {
//TODO do NOT duplicate code - rewrite the function in a better way
size_t dim_out = data_set->get_output_dim();
size_t n_elements = data_set->get_n_elements();
Martin Beseda
committed
double error = 0.0, val;
std::vector<std::pair<std::vector<double>, std::vector<double>>>* data = data_set->get_data();

Michal Kravcenko
committed
//TODO instead use something smarter
Martin Beseda
committed
std::vector<double> output(dim_out);

Michal Kravcenko
committed
Martin Beseda
committed
COUT_DEBUG(
"Evaluation of the error function MSE on the given data-set (format 'input' 'real output' 'predicted output'):"
<< std::endl);
*results_file_path << "[Element index] [Input] [Real output] [Predicted output]" << std::endl;
for (auto i = 0; i < data->size(); i++) { // Iterate through every element in the test set

Michal Kravcenko
committed
Martin Beseda
committed
/* Compute the net output and store it into 'output' variable */
this->net->eval_single(data->at(i).first,
Martin Beseda
committed
weights);

Michal Kravcenko
committed
Martin Beseda
committed
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
/* Compute difference for every element of the output vector */
for (size_t j = 0; j < dim_out; ++j) {
COUT_DEBUG("Element " << i << ": "
<< data->at(i).first.at(j) << " "
<< data->at(i).second.at(j) << " "
<< output.at(j) << std::endl);
*results_file_path << "Element " << i << ": "
<< data->at(i).first.at(j) << " "
<< data->at(i).second.at(j) << " "
<< output.at(j);// << std::endl;
val = output.at(j) - data->at(i).second.at(j);
error += val * val;
}
*results_file_path << std::endl;
}
double result = error / n_elements;
*results_file_path << "MSE = " << result << std::endl;
return result;
}
double MSE::eval_on_data_set(DataSet* data_set, std::string results_file_path, std::vector<double>* weights) {
//TODO do NOT duplicate code - rewrite the function in a better way
size_t dim_out = data_set->get_output_dim();
size_t n_elements = data_set->get_n_elements();
double error = 0.0, val;
std::vector<std::pair<std::vector<double>, std::vector<double>>>* data = data_set->get_data();
//TODO instead use something smarter
std::vector<double> output(dim_out);
COUT_DEBUG(
"Evaluation of the error function MSE on the given data-set (format 'input' 'real output' 'predicted output'):"
<< std::endl);
std::ofstream ofs(results_file_path);
if (!ofs.is_open()) {
THROW_RUNTIME_ERROR("File path: " + results_file_path + " was not successfully opened!");
}
ofs << "[Input] [Real output] [Predicted output]" << std::endl;
for (auto i = 0; i < data->size(); i++) { // Iterate through every element in the test set
/* Compute the net output and store it into 'output' variable */
this->net->eval_single(data->at(i).first,
output,
weights);
/* Compute difference for every element of the output vector */
for (size_t j = 0; j < dim_out; ++j) {
COUT_DEBUG("Element " << i << ": "
<< data->at(i).first.at(j) << " "
<< data->at(i).second.at(j) << " "
<< output.at(j) << std::endl);
ofs << data->at(i).first.at(j) << " "
<< data->at(i).second.at(j) << " "
<< output.at(j) << std::endl;
val = output.at(j) - data->at(i).second.at(j);
Martin Beseda
committed
error += val * val;
}
Martin Beseda
committed
ofs << std::endl;
Martin Beseda
committed
}
Martin Beseda
committed
ofs.close();
Martin Beseda
committed
return error / n_elements;

Michal Kravcenko
committed
Martin Beseda
committed
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
double MSE::eval_on_data_set(DataSet* data_set, std::vector<double>* weights) {
size_t dim_out = data_set->get_output_dim();
size_t n_elements = data_set->get_n_elements();
double error = 0.0, val;
std::vector<std::pair<std::vector<double>, std::vector<double>>>* data = data_set->get_data();
//TODO instead use something smarter
std::vector<double> output(dim_out);
COUT_DEBUG(
"Evaluation of the error function MSE on the given data-set (format 'input' 'real output' 'predicted output'):"
<< std::endl);
for (auto i = 0; i < data->size(); i++) { // Iterate through every element in the test set
/* Compute the net output and store it into 'output' variable */
this->net->eval_single(data->at(i).first,
output,
weights);
/* Compute difference for every element of the output vector */
for (size_t j = 0; j < dim_out; ++j) {
COUT_DEBUG("Element " << i << ": "
<< data->at(i).first.at(j) << " "
<< data->at(i).second.at(j) << " "
<< output.at(j) << std::endl);
val = output.at(j) - data->at(i).second.at(j);
error += val * val;
}
}
return error / n_elements;
}
double MSE::eval(std::vector<double>* weights) {
size_t dim_out = this->ds->get_output_dim();

Michal Kravcenko
committed
// unsigned int dim_in = this->ds->get_input_dim();
size_t n_elements = this->ds->get_n_elements();
double error = 0.0, val;
Martin Beseda
committed
std::vector<std::pair<std::vector<double>, std::vector<double>>>* data = this->ds->get_data();
// //TODO instead use something smarter
// this->net->copy_weights(weights);
std::vector<double> output(dim_out);
for (auto el: *data) { // Iterate through every element in the test set
this->net->eval_single(el.first, output,
weights); // Compute the net output and store it into 'output' variable
for (size_t j = 0; j < dim_out; ++j) { // Compute difference for every element of the output vector
Martin Beseda
committed
val = output.at(j) - el.second.at(j);
error += val * val;
}
}
return error / n_elements;

Michal Kravcenko
committed
Martin Beseda
committed
double MSE::eval_on_test_data(std::vector<double>* weights) {
return this->eval_on_data_set(this->ds_test, weights);

Michal Kravcenko
committed
Martin Beseda
committed
double MSE::eval_on_test_data(std::string results_file_path, std::vector<double>* weights) {
return this->eval_on_data_set(this->ds_test, results_file_path, weights);
}
double MSE::eval_on_test_data(std::ofstream* results_file_path, std::vector<double>* weights) {
return this->eval_on_data_set(this->ds_test, results_file_path, weights);
}
Martin Beseda
committed
Martin Beseda
committed
void
MSE::calculate_error_gradient(std::vector<double>& params, std::vector<double>& grad, double alpha, size_t batch) {

Michal Kravcenko
committed
size_t dim_out = this->ds->get_output_dim();
size_t n_elements = this->ds->get_n_elements();
Martin Beseda
committed
std::vector<std::pair<std::vector<double>, std::vector<double>>>* data = this->ds->get_data();

Michal Kravcenko
committed
Martin Beseda
committed
if (batch) {
Martin Beseda
committed
*data = this->ds->get_random_data_batch(batch);
n_elements = data->size();
}
std::vector<double> error_derivative(dim_out);

Michal Kravcenko
committed
for (auto el: *data) { // Iterate through every element in the test set

Michal Kravcenko
committed
this->net->eval_single(el.first, error_derivative,
¶ms); // Compute the net output and store it into 'output' variable

Michal Kravcenko
committed
for (size_t j = 0; j < dim_out; ++j) {
error_derivative[j] = 2.0 * (error_derivative[j] - el.second[j]); //real - expected result
}
this->net->add_to_gradient_single(el.first, error_derivative, alpha / n_elements, grad);
}
Martin Beseda
committed
ErrorSum::ErrorSum() {
this->summand = nullptr;
this->summand_coefficient = nullptr;
this->dimension = 0;
}
Martin Beseda
committed
ErrorSum::~ErrorSum() {
if (this->summand) {
delete this->summand;
}
if (this->summand_coefficient) {
delete this->summand_coefficient;
}
double ErrorSum::eval_on_test_data(std::vector<double>* weights) {
//TODO take care of the case, when there are no test data
Martin Beseda
committed
ErrorFunction* ef = nullptr;

Michal Kravcenko
committed
Martin Beseda
committed
for (unsigned int i = 0; i < this->summand->size(); ++i) {

Michal Kravcenko
committed
if (ef) {
output += ef->eval_on_test_data(weights) * this->summand_coefficient->at(i);
}

Michal Kravcenko
committed
}

Michal Kravcenko
committed
}
Martin Beseda
committed
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
double ErrorSum::eval_on_test_data(std::string results_file_path, std::vector<double>* weights) {
THROW_NOT_IMPLEMENTED_ERROR();
return -1;
}
double ErrorSum::eval_on_test_data(std::ofstream* results_file_path, std::vector<double>* weights) {
THROW_NOT_IMPLEMENTED_ERROR();
return -1;
}
double ErrorSum::eval_on_data_set(lib4neuro::DataSet* data_set, std::vector<double>* weights) {
THROW_NOT_IMPLEMENTED_ERROR();
return -1;
}
double ErrorSum::eval_on_data_set(lib4neuro::DataSet* data_set, std::string results_file_path,
std::vector<double>* weights) {
THROW_NOT_IMPLEMENTED_ERROR();
return -1;
}
double ErrorSum::eval_on_data_set(lib4neuro::DataSet* data_set, std::ofstream* results_file_path,
std::vector<double>* weights) {
THROW_NOT_IMPLEMENTED_ERROR();
return -1;
}
double ErrorSum::eval(std::vector<double>* weights) {
Martin Beseda
committed
double output = 0.0;
Martin Beseda
committed
ErrorFunction* ef = nullptr;
Martin Beseda
committed
for (unsigned int i = 0; i < this->summand->size(); ++i) {
if (ef) {
output += ef->eval(weights) * this->summand_coefficient->at(i);
}
Martin Beseda
committed
}
Martin Beseda
committed
return output;
Martin Beseda
committed
void ErrorSum::calculate_error_gradient(std::vector<double>& params, std::vector<double>& grad, double alpha,
size_t batch) {

Michal Kravcenko
committed
Martin Beseda
committed
ErrorFunction* ef = nullptr;
for (size_t i = 0; i < this->summand->size(); ++i) {
ef = this->summand->at(i);

Michal Kravcenko
committed
ef->calculate_error_gradient(params, grad, this->summand_coefficient->at(i) * alpha, batch);

Michal Kravcenko
committed
}
Martin Beseda
committed
void ErrorSum::add_error_function(ErrorFunction* F, double alpha) {
Martin Beseda
committed
if (!this->summand) {
Martin Beseda
committed
this->summand = new std::vector<ErrorFunction*>(0);
Martin Beseda
committed
}
this->summand->push_back(F);
if (!this->summand_coefficient) {
this->summand_coefficient = new std::vector<double>(0);
}
this->summand_coefficient->push_back(alpha);
if (F) {
if (F->get_dimension() > this->dimension) {
this->dimension = F->get_dimension();
}
Martin Beseda
committed
size_t ErrorSum::get_dimension() {
// if(!this->dimension) {
// size_t max = 0;
// for(auto e : *this->summand) {
// if(e->get_dimension() > max) {
// max = e->get_dimension();
// }
// };
//
// this->dimension = max;
// }
Martin Beseda
committed
return this->dimension;
}
Martin Beseda
committed
std::vector<double>* ErrorSum::get_parameters() {
return this->summand->at(0)->get_parameters();
}

Michal Kravcenko
committed
Martin Beseda
committed
DataSet* ErrorSum::get_dataset() {
return this->summand->at(0)->get_dataset();
};