// // Created by martin on 7/15/18. // #include <vector> #include <cmath> #include <sstream> #include <boost/random/mersenne_twister.hpp> #include <boost/random/uniform_int_distribution.hpp> #include "ErrorFunctions.h" #include "exceptions.h" #include "message.h" namespace lib4neuro { size_t ErrorFunction::get_dimension() { return this->dimension; } NeuralNetwork* ErrorFunction::get_network_instance() { return this->net; } void ErrorFunction::divide_data_train_test(double percent_test) { size_t ds_size = this->ds->get_n_elements(); /* Store the full data set */ this->ds_full = this->ds; /* Choose random subset of the DataSet for training and the remaining part for validation */ boost::random::mt19937 gen; boost::random::uniform_int_distribution<> dist(0, ds_size - 1); size_t test_set_size = ceil(ds_size * percent_test); std::vector<unsigned int> test_indices; test_indices.reserve(test_set_size); for (unsigned int i = 0; i < test_set_size; i++) { test_indices.emplace_back(dist(gen)); } std::sort(test_indices.begin(), test_indices.end(), std::greater<unsigned int>()); std::vector<std::pair<std::vector<double>, std::vector<double>>> test_data, train_data; /* Copy all the data to train_data */ for (auto e : *this->ds_full->get_data()) { train_data.emplace_back(e); } /* Move the testing data from train_data to test_data */ for (auto ind : test_indices) { test_data.emplace_back(train_data.at(ind)); train_data.erase(train_data.begin() + ind); } /* Re-initialize data set for training */ this->ds = new DataSet(&train_data, this->ds_full->get_normalization_strategy()); /* Initialize test data */ this->ds_test = new DataSet(&test_data, this->ds_full->get_normalization_strategy()); } void ErrorFunction::return_full_data_set_for_training() { if (this->ds_test) { this->ds = this->ds_full; } } DataSet* ErrorFunction::get_dataset() { return this->ds; } DataSet* ErrorFunction::get_test_dataset() { return this->ds_test; } std::vector<double>* ErrorFunction::get_parameters() { std::vector<double>* output = new std::vector<double>(this->net->get_n_weights() + this->net->get_n_biases()); size_t i = 0; for (auto el: *this->net->get_parameter_ptr_weights()) { output->at(i) = el; ++i; } for (auto el: *this->net->get_parameter_ptr_biases()) { output->at(i) = el; ++i; } return output; } MSE::MSE(NeuralNetwork* net, DataSet* ds) { this->net = net; this->ds = ds; this->dimension = net->get_n_weights() + net->get_n_biases(); } double MSE::eval_on_single_input(std::vector<double>* input, std::vector<double>* output, std::vector<double>* weights) { std::vector<double> predicted_output(this->get_network_instance()->get_n_outputs()); this->net->eval_single(*input, predicted_output, weights); double result = 0; double val; for(size_t i = 0; i < output->size(); i++) { val = output->at(i) - predicted_output.at(i); result += val*val; } return std::sqrt(result); } double MSE::eval_on_data_set(lib4neuro::DataSet* data_set, std::ofstream* results_file_path, std::vector<double>* weights, bool denormalize_data, bool verbose) { size_t dim_in = data_set->get_input_dim(); size_t dim_out = data_set->get_output_dim(); double error = 0.0, val, output_norm = 0; std::vector<std::pair<std::vector<double>, std::vector<double>>>* data = data_set->get_data(); size_t n_elements = data->size(); //TODO instead use something smarter std::vector<std::vector<double>> outputs(data->size()); std::vector<double> output(dim_out); if (verbose) { COUT_DEBUG("Evaluation of the error function MSE on the given data-set" << std::endl); COUT_DEBUG(R_ALIGN << "[Element index]" << " " << R_ALIGN << "[Input]" << " " << R_ALIGN << "[Real output]" << " " << R_ALIGN << "[Predicted output]" << " " << R_ALIGN << "[Absolute error]" << " " << R_ALIGN << "[Relative error %]" << std::endl); } if (results_file_path) { *results_file_path << R_ALIGN << "[Element index]" << " " << R_ALIGN << "[Input]" << " " << R_ALIGN << "[Real output]" << " " << R_ALIGN << "[Predicted output]" << " " << R_ALIGN << "[Abs. error]" << " " << R_ALIGN << "[Rel. error %]" << std::endl; } for (auto i = 0; i < data->size(); i++) { // Iterate through every element in the test set /* Compute the net output and store it into 'output' variable */ this->net->eval_single(data->at(i).first, output, weights); outputs.at(i) = output; } double denormalized_output; double denormalized_real_input; double denormalized_real_output; // bool denormalize_output = false; // if (denormalize_data) { // if(data_set->is_normalized()) { // data_set->de_normalize(); // } // denormalize_output = true; // } for (auto i = 0; i < data->size(); i++) { /* Compute difference for every element of the output vector */ #ifdef L4N_DEBUG std::stringstream ss_input; std::string separator = ""; for (auto j = 0; j < dim_in; j++) { if(denormalize_data) { denormalized_real_input = data_set->get_normalization_strategy()->de_normalize(data->at(i).first.at(j)); } else { denormalized_real_input = data->at(i).first.at(j); } ss_input << separator << denormalized_real_input; separator = ","; } if(denormalize_data) { denormalized_real_input = data_set->get_normalization_strategy()->de_normalize(data->at(i).first.back()); } else { denormalized_real_input = data->at(i).first.back(); } std::stringstream ss_real_output; std::stringstream ss_predicted_output; #endif double loc_error = 0; output_norm = 0; separator = ""; for (size_t j = 0; j < dim_out; ++j) { if (denormalize_data) { denormalized_real_output = data_set->get_normalization_strategy()->de_normalize(data->at(i).second.at(j)); denormalized_output = data_set->get_normalization_strategy()->de_normalize(outputs.at(i).at(j)); } else { denormalized_real_output = data->at(i).second.at(j); denormalized_output = outputs.at(i).at(j); } #ifdef L4N_DEBUG ss_real_output << separator << denormalized_real_output; ss_predicted_output << separator << denormalized_output; separator = ","; #endif val = denormalized_output - denormalized_real_output; loc_error += val * val; error += loc_error; output_norm += denormalized_output * denormalized_output; } #ifdef L4N_DEBUG std::stringstream ss_ind; ss_ind << "[" << i << "]"; if (verbose) { COUT_DEBUG(R_ALIGN << ss_ind.str() << " " << R_ALIGN << ss_input.str() << " " << R_ALIGN << ss_real_output.str() << " " << R_ALIGN << ss_predicted_output.str() << " " << R_ALIGN << std::sqrt(loc_error) << " " << R_ALIGN << 200.0 * std::sqrt(loc_error) / (std::sqrt(loc_error) + std::sqrt(output_norm)) << std::endl); } if (results_file_path) { *results_file_path << R_ALIGN << ss_ind.str() << " " << R_ALIGN << ss_input.str() << " " << R_ALIGN << ss_real_output.str() << " " << R_ALIGN << ss_predicted_output.str() << " " << R_ALIGN << std::sqrt(loc_error) << " " << R_ALIGN << 200.0 * std::sqrt(loc_error) / (std::sqrt(loc_error) + std::sqrt(output_norm)) << std::endl; } #endif } double result = std::sqrt(error) / n_elements; if (verbose) { COUT_DEBUG("MSE = " << result << std::endl); } if (results_file_path) { *results_file_path << "MSE = " << result << std::endl; } return result; } double MSE::eval_on_data_set(DataSet* data_set, std::string results_file_path, std::vector<double>* weights, bool verbose) { std::ofstream ofs(results_file_path); if (ofs.is_open()) { return this->eval_on_data_set(data_set, &ofs, weights, true, verbose); ofs.close(); } else { THROW_RUNTIME_ERROR("File " + results_file_path + " couldn't be open!"); } } double MSE::eval_on_data_set(DataSet* data_set, std::vector<double>* weights, bool verbose) { return this->eval_on_data_set(data_set, nullptr, weights, true, verbose); } double MSE::eval(std::vector<double>* weights, bool denormalize_data, bool verbose) { return this->eval_on_data_set(this->ds, nullptr, weights, denormalize_data, verbose); } double MSE::eval_on_test_data(std::vector<double>* weights, bool verbose) { return this->eval_on_data_set(this->ds_test, weights, verbose); } double MSE::eval_on_test_data(std::string results_file_path, std::vector<double>* weights, bool verbose) { return this->eval_on_data_set(this->ds_test, results_file_path, weights, verbose); } double MSE::eval_on_test_data(std::ofstream* results_file_path, std::vector<double>* weights, bool verbose) { return this->eval_on_data_set(this->ds_test, results_file_path, weights, true, verbose); } void MSE::calculate_error_gradient(std::vector<double>& params, std::vector<double>& grad, double alpha, size_t batch) { size_t dim_out = this->ds->get_output_dim(); size_t n_elements = this->ds->get_n_elements(); std::vector<std::pair<std::vector<double>, std::vector<double>>>* data = this->ds->get_data(); if (batch > 0) { *data = this->ds->get_random_data_batch(batch); n_elements = data->size(); } std::vector<double> error_derivative(dim_out); for (auto el: *data) { // Iterate through every element in the test set this->net->eval_single(el.first, error_derivative, ¶ms); // Compute the net output and store it into 'output' variable for (size_t j = 0; j < dim_out; ++j) { error_derivative[j] = 2.0 * (error_derivative[j] - el.second[j]); //real - expected result } this->net->add_to_gradient_single(el.first, error_derivative, alpha / n_elements, grad); } } double MSE::calculate_single_residual(std::vector<double>* input, std::vector<double>* output, std::vector<double>* parameters) { //TODO maybe move to the general ErrorFunction //TODO check input vector sizes - they HAVE TO be allocated before calling this function return -this->eval_on_single_input(input, output, parameters); } void MSE::calculate_residual_gradient(std::vector<double>* input, std::vector<double>* output, std::vector<double>* gradient, double h) { //TODO check input vector sizes - they HAVE TO be allocated before calling this function size_t n_parameters = this->get_dimension(); std::vector<double>* parameters = this->get_parameters(); double delta; // Complete step size double former_parameter_value; double f_val1; // f(x + delta) double f_val2; // f(x - delta) for (size_t i = 0; i < n_parameters; i++) { delta = h * (1 + std::abs(parameters->at(i))); former_parameter_value = parameters->at(i); if(delta != 0) { /* Computation of f_val1 = f(x + delta) */ parameters->at(i) = former_parameter_value + delta; f_val1 = this->calculate_single_residual(input, output, parameters); /* Computation of f_val2 = f(x - delta) */ parameters->at(i) = former_parameter_value - delta; f_val2 = this->calculate_single_residual(input, output, parameters); gradient->at(i) = (f_val1 - f_val2) / (2*delta); } /* Restore parameter to the former value */ parameters->at(i) = former_parameter_value; } } void MSE::calculate_error_gradient_single(std::vector<double> &error_vector, std::vector<double> &gradient_vector) { std::fill(gradient_vector.begin(), gradient_vector.end(), 0); std::vector<double> dummy_input; this->net->add_to_gradient_single( dummy_input, error_vector, 1.0, gradient_vector); } void MSE::analyze_error_gradient(std::vector<double>& params, std::vector<double>& grad, double alpha, size_t batch) { size_t dim_out = this->ds->get_output_dim(); size_t n_elements = this->ds->get_n_elements(); std::vector<std::pair<std::vector<double>, std::vector<double>>>* data = this->ds->get_data(); if (batch > 0) { *data = this->ds->get_random_data_batch(batch); n_elements = data->size(); } std::vector<double> error_derivative(dim_out); std::vector<double> grad_sum(grad.size()); std::fill(grad_sum.begin(), grad_sum.end(), 0.0); this->net->write_weights(); this->net->write_biases(); for (auto el: *data) { // Iterate through every element in the test set this->net->eval_single_debug(el.first, error_derivative, ¶ms); // Compute the net output and store it into 'output' variable std::cout << "Input["; for( auto v: el.first){ std::cout << v << ", "; } std::cout << "]"; std::cout << " Desired Output["; for( auto v: el.second){ std::cout << v << ", "; } std::cout << "]"; std::cout << " Real Output["; for( auto v: error_derivative){ std::cout << v << ", "; } std::cout << "]"; for (size_t j = 0; j < dim_out; ++j) { error_derivative[j] = 2.0 * (error_derivative[j] - el.second[j]); //real - expected result } std::cout << " Error derivative["; for( auto v: error_derivative){ std::cout << v << ", "; } std::cout << "]"; std::fill( grad.begin(), grad.end(), 0.0); this->net->add_to_gradient_single_debug(el.first, error_derivative, 1.0, grad); for(size_t i = 0; i < grad.size(); ++i){ grad_sum[i] += grad[i]; } std::cout << " Gradient["; for( auto v: grad){ std::cout << v << ", "; } std::cout << "]"; std::cout << std::endl; } std::cout << " Total gradient["; for( auto v: grad_sum){ std::cout << v << ", "; } std::cout << "]" << std::endl << std::endl; } double MSE::eval_single_item_by_idx(size_t i, std::vector<double> *parameter_vector, std::vector<double> &error_vector) { double output = 0, val; this->net->eval_single(this->get_dataset()->get_data()->at(i).first, error_vector, parameter_vector); for (size_t j = 0; j < error_vector.size(); ++j) { // Compute difference for every element of the output vector val = error_vector.at(j) - this->get_dataset()->get_data()->at(i).second.at(j); output += val * val; } for (size_t j = 0; j < error_vector.size(); ++j) { error_vector[j] = 2.0 * (error_vector[j] - this->get_dataset()->get_data()->at(i).second[j]); //real - expected result } return sqrt(output); } ErrorSum::ErrorSum() { this->summand = nullptr; this->summand_coefficient = nullptr; this->dimension = 0; } ErrorSum::~ErrorSum() { if (this->summand) { delete this->summand; } if (this->summand_coefficient) { delete this->summand_coefficient; } } double ErrorSum::eval_on_test_data(std::vector<double>* weights, bool verbose) { //TODO take care of the case, when there are no test data double output = 0.0; ErrorFunction* ef = nullptr; for (unsigned int i = 0; i < this->summand->size(); ++i) { ef = this->summand->at(i); if (ef) { output += ef->eval_on_test_data(weights) * this->summand_coefficient->at(i); } } return output; } double ErrorSum::eval_on_test_data(std::string results_file_path, std::vector<double>* weights, bool verbose) { THROW_NOT_IMPLEMENTED_ERROR(); return -1; } double ErrorSum::eval_on_test_data(std::ofstream* results_file_path, std::vector<double>* weights, bool verbose) { THROW_NOT_IMPLEMENTED_ERROR(); return -1; } double ErrorSum::eval_on_data_set(lib4neuro::DataSet* data_set, std::vector<double>* weights, bool verbose) { THROW_NOT_IMPLEMENTED_ERROR(); return -1; } double ErrorSum::eval_on_data_set(lib4neuro::DataSet* data_set, std::string results_file_path, std::vector<double>* weights, bool verbose) { THROW_NOT_IMPLEMENTED_ERROR(); return -1; } double ErrorSum::eval_on_data_set(lib4neuro::DataSet* data_set, std::ofstream* results_file_path, std::vector<double>* weights, bool denormalize_data, bool verbose) { THROW_NOT_IMPLEMENTED_ERROR(); return -1; } double ErrorSum::eval(std::vector<double>* weights, bool denormalize_data, bool verbose) { double output = 0.0; ErrorFunction* ef = nullptr; for (unsigned int i = 0; i < this->summand->size(); ++i) { ef = this->summand->at(i); if (ef) { output += ef->eval(weights) * this->summand_coefficient->at(i); } } return output; } double ErrorSum::eval_single_item_by_idx(size_t i, std::vector<double> *parameter_vector, std::vector<double> &error_vector) { double output = 0.0; ErrorFunction* ef = nullptr; std::fill(error_vector.begin(), error_vector.end(), 0); std::vector<double> error_vector_mem(error_vector.size()); for (size_t j = 0; j < this->summand->size(); ++j) { ef = this->summand->at(i); if (ef) { output += ef->eval_single_item_by_idx(i, parameter_vector, error_vector_mem) * this->summand_coefficient->at(j); for( size_t k = 0; k < error_vector_mem.size(); ++k){ error_vector[k] += error_vector_mem[k] * this->summand_coefficient->at(j); } } } return output; } void ErrorSum::calculate_error_gradient(std::vector<double>& params, std::vector<double>& grad, double alpha, size_t batch) { ErrorFunction* ef = nullptr; for (size_t i = 0; i < this->summand->size(); ++i) { ef = this->summand->at(i); if (ef) { ef->calculate_error_gradient(params, grad, this->summand_coefficient->at(i) * alpha, batch); } } } void ErrorSum::calculate_error_gradient_single(std::vector<double> &error_vector, std::vector<double> &gradient_vector) { COUT_INFO("ErrorSum::calculate_error_gradient_single NOT YET IMPLEMENTED!!!"); } void ErrorSum::analyze_error_gradient(std::vector<double>& params, std::vector<double>& grad, double alpha, size_t batch) { ErrorFunction* ef = nullptr; for (size_t i = 0; i < this->summand->size(); ++i) { ef = this->summand->at(i); if (ef) { ef->calculate_error_gradient(params, grad, this->summand_coefficient->at(i) * alpha, batch); } } } void ErrorSum::add_error_function(ErrorFunction* F, double alpha) { if (!this->summand) { this->summand = new std::vector<ErrorFunction*>(0); } this->summand->push_back(F); if (!this->summand_coefficient) { this->summand_coefficient = new std::vector<double>(0); } this->summand_coefficient->push_back(alpha); if (F) { if (F->get_dimension() > this->dimension) { this->dimension = F->get_dimension(); } } } size_t ErrorSum::get_dimension() { // if(!this->dimension) { // size_t max = 0; // for(auto e : *this->summand) { // if(e->get_dimension() > max) { // max = e->get_dimension(); // } // }; // // this->dimension = max; // } return this->dimension; } std::vector<double>* ErrorSum::get_parameters() { return this->summand->at(0)->get_parameters(); } DataSet* ErrorSum::get_dataset() { return this->summand->at(0)->get_dataset(); }; void ErrorSum::calculate_residual_gradient(std::vector<double>* input, std::vector<double>* output, std::vector<double>* gradient, double h) { THROW_NOT_IMPLEMENTED_ERROR(); } double ErrorSum::calculate_single_residual(std::vector<double>* input, std::vector<double>* output, std::vector<double>* parameters) { THROW_NOT_IMPLEMENTED_ERROR(); return 0; } }