ErrorFunctions.cpp

//
// Created by martin on 7/15/18.
//

#include <vector>
#include <cmath>
#include <sstream>
#include <boost/random/mersenne_twister.hpp>
#include <boost/random/uniform_int_distribution.hpp>

#include "ErrorFunctions.h"
#include "exceptions.h"
#include "message.h"

namespace lib4neuro {

    size_t ErrorFunction::get_dimension() {
        return this->dimension;
    }

    NeuralNetwork* ErrorFunction::get_network_instance() {
        return this->net;
    }

    void ErrorFunction::divide_data_train_test(double percent_test) {
        size_t ds_size = this->ds->get_n_elements();

        /* Store the full data set */
        this->ds_full = this->ds;

        /* Choose random subset of the DataSet for training and the remaining part for validation */
        boost::random::mt19937 gen;
        boost::random::uniform_int_distribution<> dist(0, ds_size - 1);

        size_t test_set_size = ceil(ds_size * percent_test);

        std::vector<unsigned int> test_indices;
        test_indices.reserve(test_set_size);
        for (unsigned int i = 0; i < test_set_size; i++) {
            test_indices.emplace_back(dist(gen));
        }
        std::sort(test_indices.begin(), test_indices.end(), std::greater<unsigned int>());

        std::vector<std::pair<std::vector<double>, std::vector<double>>> test_data, train_data;

        /* Copy all the data to train_data */
        for (auto e : *this->ds_full->get_data()) {
            train_data.emplace_back(e);
        }

        /* Move the testing data from train_data to test_data */
        for (auto ind : test_indices) {
            test_data.emplace_back(train_data.at(ind));
            train_data.erase(train_data.begin() + ind);
        }

        /* Re-initialize data set for training */
        this->ds = new DataSet(&train_data, this->ds_full->get_normalization_strategy());

        /* Initialize test data */
        this->ds_test = new DataSet(&test_data, this->ds_full->get_normalization_strategy());
    }

    void ErrorFunction::return_full_data_set_for_training() {
        if (this->ds_test) {
            this->ds = this->ds_full;
        }
    }

    DataSet* ErrorFunction::get_dataset() {
        return this->ds;
    }

    DataSet* ErrorFunction::get_test_dataset() {
        return this->ds_test;
    }

    std::vector<double>* ErrorFunction::get_parameters() {
        std::vector<double>* output = new std::vector<double>(this->net->get_n_weights() + this->net->get_n_biases());

        size_t i = 0;

        for (auto el: *this->net->get_parameter_ptr_weights()) {
            output->at(i) = el;
            ++i;
        }

        for (auto el: *this->net->get_parameter_ptr_biases()) {
            output->at(i) = el;
            ++i;
        }

        return output;
    }

    MSE::MSE(NeuralNetwork* net, DataSet* ds) {
        this->net = net;
        this->ds = ds;
        this->dimension = net->get_n_weights() + net->get_n_biases();
    }

    double MSE::eval_on_data_set(lib4neuro::DataSet* data_set, std::ofstream* results_file_path,
                                 std::vector<double>* weights) {
        //TODO do NOT duplicate code - rewrite the function in a better way

        size_t dim_in = data_set->get_input_dim();
        size_t dim_out = data_set->get_output_dim();
        double error = 0.0, val, output_norm = 0;

        std::vector<std::pair<std::vector<double>, std::vector<double>>>* data = data_set->get_data();
        size_t n_elements = data->size();

        //TODO instead use something smarter
        std::vector<std::vector<double>> outputs(data->size());
        std::vector<double> output(dim_out);

        COUT_DEBUG("Evaluation of the error function MSE on the given data-set" << std::endl);
        COUT_DEBUG(R_ALIGN << "[Element index]" << " "
                   << R_ALIGN << "[Input]" << " "
                   << R_ALIGN << "[Real output]" << " "
                   << R_ALIGN << "[Predicted output]" << " "
                   << R_ALIGN << "[Absolute error]" << " "
                   << R_ALIGN << "[Relative error %]"
                   << std::endl);

        *results_file_path << R_ALIGN << "[Element index]" << " "
                           << R_ALIGN << "[Input]" << " "
                           << R_ALIGN << "[Real output]" << " "
                           << R_ALIGN << "[Predicted output]" << " "
                           << R_ALIGN << "[Abs. error]" << " "
                           << R_ALIGN << "[Rel. error %]"
                           << std::endl;

        for (auto i = 0; i < data->size(); i++) {  // Iterate through every element in the test set
            /* Compute the net output and store it into 'output' variable */
            this->net->eval_single(data->at(i).first,
                                   output,
                                   weights);

            outputs.at(i) = output;
        }

        bool denormalize_output = false;
        if(data_set->is_normalized()) {
            data_set->de_normalize();
            denormalize_output = true;
        }

        for (auto i = 0; i < data->size(); i++) {

            /* Compute difference for every element of the output vector */
#ifdef L4N_DEBUG
            std::stringstream ss_input;
            for(auto j = 0; j < dim_in-1; j++) {
                ss_input << data->at(i).first.at(j) << ",";
            }
            ss_input << data->at(i).first.back();

            std::stringstream ss_real_output;
            std::stringstream ss_predicted_output;
#endif
            double denormalized_output;
            double loc_error = 0;
            output_norm = 0;
            for (size_t j = 0; j < dim_out; ++j) {
                if(denormalize_output) {
                    denormalized_output = data_set->get_normalization_strategy()->de_normalize(outputs.at(i).at(j));
                } else {
                    denormalized_output = outputs.at(i).at(j);
                }

#ifdef L4N_DEBUG
                ss_real_output << data->at(i).second.at(j);
                ss_predicted_output << denormalized_output;
#endif

                val = denormalized_output - data->at(i).second.at(j);
                loc_error += val * val;
                error += loc_error;

                output_norm += denormalized_output * denormalized_output;
            }

#ifdef L4N_DEBUG
            std::stringstream ss_ind;
            ss_ind << "[" << i << "]";

            COUT_DEBUG(R_ALIGN << ss_ind.str() << " "
                       << R_ALIGN << ss_input.str() << " "
                       << R_ALIGN << ss_real_output.str() << " "
                       << R_ALIGN << ss_predicted_output.str() << " "
                       << R_ALIGN << std::sqrt(loc_error) << " "
                       << R_ALIGN << 200.0 * std::sqrt(loc_error) / (std::sqrt(loc_error) + std::sqrt(output_norm))
                       << std::endl);

            *results_file_path << R_ALIGN << ss_ind.str() << " "
                               << R_ALIGN << ss_input.str() << " "
                               << R_ALIGN << ss_real_output.str() << " "
                               << R_ALIGN << ss_predicted_output.str() << " "
                               << R_ALIGN << std::sqrt(loc_error) << " "
                               << R_ALIGN << 200.0 * std::sqrt(loc_error) / (std::sqrt(loc_error) + std::sqrt(output_norm))
                               << std::endl;
#endif
        }

        double result = std::sqrt(error) / n_elements;

        COUT_DEBUG("MSE = " << result << std::endl);
        *results_file_path << "MSE = " << result << std::endl;

        return result;
    }

    double MSE::eval_on_data_set(DataSet* data_set, std::string results_file_path, std::vector<double>* weights) {
        //TODO do NOT duplicate code - rewrite the function in a better way

        size_t dim_out = data_set->get_output_dim();
        size_t n_elements = data_set->get_n_elements();
        double error = 0.0, val;

        std::vector<std::pair<std::vector<double>, std::vector<double>>>* data = data_set->get_data();

        //TODO instead use something smarter
        std::vector<std::vector<double>> outputs(data->size());
        std::vector<double> output(dim_out);

        COUT_DEBUG("Evaluation of the error function MSE on the given data-set" << std::endl);
        COUT_DEBUG(R_ALIGN << "[Input]" << " "
                   << R_ALIGN << "[Real output]" << " "
                   << R_ALIGN << "[Predicted output]" << " "
                   << std::endl);

        std::ofstream ofs(results_file_path);
        if (!ofs.is_open()) {
            THROW_RUNTIME_ERROR("File path: " + results_file_path + " was not successfully opened!");
        }

        ofs << R_ALIGN << "[Index]" << " "
            << R_ALIGN << "[Input]" << " "
            << R_ALIGN << "[Real output]" << " "
            << R_ALIGN << "[Predicted output]"
            << std::endl;

        for (auto i = 0; i < data->size(); i++) {  // Iterate through every element in the test set

            /* Compute the net output and store it into 'output' variable */
            this->net->eval_single(data->at(i).first,
                                   output,
                                   weights);

            outputs.at(i) = output;
        }

        bool denormalize_output = false;
        if(data_set->is_normalized()) {
            data_set->de_normalize();
            denormalize_output = true;
        }

        for(auto i = 0; i < data->size(); i++) {
            /* Compute difference for every element of the output vector */
            double denormalized_output;
            for (size_t j = 0; j < dim_out; ++j) {
                if(denormalize_output) {
                    denormalized_output = data_set->get_normalization_strategy()->de_normalize(outputs.at(i).at(j));
                } else {
                    denormalized_output = outputs.at(i).at(j);
                }

                std::stringstream ss_ind;
                ss_ind << "[" << i << "]";

                COUT_DEBUG(R_ALIGN << ss_ind.str() << " "
                           << R_ALIGN << data->at(i).first.at(j) << " "
                           << R_ALIGN << data->at(i).second.at(j) << " "
                           << R_ALIGN << denormalized_output
                           << std::endl);

                ofs << R_ALIGN << ss_ind.str() << " "
                    << R_ALIGN << data->at(i).first.at(j) << " "
                    << R_ALIGN << data->at(i).second.at(j) << " "
                    << R_ALIGN << denormalized_output
                    << std::endl;

                val = denormalized_output - data->at(i).second.at(j);
                error += val * val;
            }

            ofs << std::endl;
        }


        double result = std::sqrt(error) / n_elements;

        ofs << "MSE = " << result << std::endl;
        ofs.close();

        COUT_DEBUG("MSE = " << result << std::endl);

        return result;
    }

    double MSE::eval_on_data_set(DataSet* data_set, std::vector<double>* weights) {
        size_t dim_out = data_set->get_output_dim();
        size_t n_elements = data_set->get_n_elements();
        double error = 0.0, val;

        std::vector<std::pair<std::vector<double>, std::vector<double>>>* data = data_set->get_data();

        //TODO instead use something smarter
        std::vector<std::vector<double>> outputs(data->size());
        std::vector<double> output(dim_out);

        COUT_DEBUG("Evaluation of the error function MSE on the given data-set" << std::endl);
        COUT_DEBUG(R_ALIGN << "[Input]" << " "
                   << R_ALIGN << "[Real output]" << " "
                   << R_ALIGN << "[Predicted output]" << " "
                   << std::endl);

        /* Compute predicted outputs */
        for (auto i = 0; i < data->size(); i++) {  // Iterate through every element in the test set

            /* Compute the net output and store it into 'output' variable */
            this->net->eval_single(data->at(i).first,
                                   output,
                                   weights);

            outputs.at(i) = output;
        }

        /* De-normalize data-set, if it's normalized */
        bool denormalize_output = false;
        if(data_set->is_normalized()) {
            data_set->de_normalize();
            denormalize_output = true;
        }

        /* Evaluate the prediction error on de-normalized data */
        for(auto i = 0; i < data->size(); i++) {

            /* Compute difference for every element of the output vector */
            double denormalized_output;
            for (auto j = 0; j < dim_out; ++j) {
                if(denormalize_output) {
                    denormalized_output = data_set->get_normalization_strategy()->de_normalize(outputs.at(i).at(j));
                } else {
                    denormalized_output = outputs.at(i).at(j);
                }

                std::stringstream ss_ind;
                ss_ind << "[" << i << "]";

                COUT_DEBUG(R_ALIGN << ss_ind.str() << " "
                           << R_ALIGN << data->at(i).first.at(j) << " "
                           << R_ALIGN << data->at(i).second.at(j) << " "
                           << R_ALIGN << denormalized_output
                           << std::endl);

                val = denormalized_output - data->at(i).second.at(j);
                error += val * val;
            }
        }

        double result = std::sqrt(error)/n_elements;
        COUT_DEBUG("MSE = " << result << std::endl);

        return result;
    }

    double MSE::eval(std::vector<double>* weights) {
        size_t dim_out = this->ds->get_output_dim();
//    unsigned int dim_in = this->ds->get_input_dim();
        size_t n_elements = this->ds->get_n_elements();
        double error = 0.0, val;

        std::vector<std::pair<std::vector<double>, std::vector<double>>>* data = this->ds->get_data();

//    //TODO instead use something smarter
//    this->net->copy_weights(weights);

        std::vector<double> output(dim_out);

        for (auto el: *data) {  // Iterate through every element in the test set

            this->net->eval_single(el.first, output,
                                   weights);  // Compute the net output and store it into 'output' variable

            for (size_t j = 0; j < dim_out; ++j) {  // Compute difference for every element of the output vector

                val = output.at(j) - el.second.at(j);
                error += val * val;
            }
        }
        return sqrt(error) / n_elements;
    }

    double MSE::eval_on_test_data(std::vector<double>* weights) {
        return this->eval_on_data_set(this->ds_test, weights);
    }

    double MSE::eval_on_test_data(std::string results_file_path, std::vector<double>* weights) {
        return this->eval_on_data_set(this->ds_test, results_file_path, weights);
    }

    double MSE::eval_on_test_data(std::ofstream* results_file_path, std::vector<double>* weights) {
        return this->eval_on_data_set(this->ds_test, results_file_path, weights);
    }

    void
    MSE::calculate_error_gradient(std::vector<double>& params, std::vector<double>& grad, double alpha, size_t batch) {

        size_t dim_out = this->ds->get_output_dim();
        size_t n_elements = this->ds->get_n_elements();
        std::vector<std::pair<std::vector<double>, std::vector<double>>>* data = this->ds->get_data();

        if (batch > 0) {
            *data = this->ds->get_random_data_batch(batch);
            n_elements = data->size();
        }
        std::vector<double> error_derivative(dim_out);


        for (auto el: *data) {  // Iterate through every element in the test set

            this->net->eval_single(el.first, error_derivative,
                                   &params);  // Compute the net output and store it into 'output' variable

            for (size_t j = 0; j < dim_out; ++j) {
                error_derivative[j] = 2.0 * (error_derivative[j] - el.second[j]); //real - expected result
            }

            this->net->add_to_gradient_single(el.first, error_derivative, alpha / n_elements, grad);
        }
    }

    void MSE::calculate_error_gradient_single(std::vector<double> &error_vector,
                                              std::vector<double> &gradient_vector) {
        std::fill(gradient_vector.begin(), gradient_vector.end(), 0);
        std::vector<double> dummy_input;
        this->net->add_to_gradient_single( dummy_input, error_vector, 1.0, gradient_vector);
    }

    void
    MSE::analyze_error_gradient(std::vector<double>& params, std::vector<double>& grad, double alpha, size_t batch) {

        size_t dim_out = this->ds->get_output_dim();
        size_t n_elements = this->ds->get_n_elements();
        std::vector<std::pair<std::vector<double>, std::vector<double>>>* data = this->ds->get_data();

        if (batch > 0) {
            *data = this->ds->get_random_data_batch(batch);
            n_elements = data->size();
        }
        std::vector<double> error_derivative(dim_out);

        std::vector<double> grad_sum(grad.size());
        std::fill(grad_sum.begin(), grad_sum.end(), 0.0);
        this->net->write_weights();
        this->net->write_biases();
        for (auto el: *data) {  // Iterate through every element in the test set

            this->net->eval_single_debug(el.first, error_derivative,
                                   &params);  // Compute the net output and store it into 'output' variable
            std::cout << "Input[";
            for( auto v: el.first){
                std::cout << v << ", ";
            }
            std::cout << "]";

            std::cout << " Desired Output[";
            for( auto v: el.second){
                std::cout << v << ", ";
            }
            std::cout << "]";

            std::cout << " Real Output[";
            for( auto v: error_derivative){
                std::cout << v << ", ";
            }
            std::cout << "]";

            for (size_t j = 0; j < dim_out; ++j) {
                error_derivative[j] = 2.0 * (error_derivative[j] - el.second[j]); //real - expected result
            }
            std::cout << " Error derivative[";
            for( auto v: error_derivative){
                std::cout << v << ", ";
            }
            std::cout << "]";

            std::fill( grad.begin(), grad.end(), 0.0);
            this->net->add_to_gradient_single_debug(el.first, error_derivative, 1.0, grad);
            for(size_t i = 0; i < grad.size(); ++i){
                grad_sum[i] += grad[i];
            }

            std::cout << " Gradient[";
            for( auto v: grad){
                std::cout << v << ", ";
            }
            std::cout << "]";

            std::cout << std::endl;
        }
        std::cout << " Total gradient[";
        for( auto v: grad_sum){
            std::cout << v << ", ";
        }
        std::cout << "]" << std::endl << std::endl;
    }

    double MSE::eval_single_item_by_idx(size_t i, std::vector<double> *parameter_vector,
                                        std::vector<double> &error_vector) {
        double output = 0, val;

        this->net->eval_single(this->get_dataset()->get_data()->at(i).first, error_vector, parameter_vector);

        for (size_t j = 0; j < error_vector.size(); ++j) {  // Compute difference for every element of the output vector
            val = error_vector.at(j) - this->get_dataset()->get_data()->at(i).second.at(j);
            output += val * val;
        }

        for (size_t j = 0; j < error_vector.size(); ++j) {
            error_vector[j] = 2.0 * (error_vector[j] - this->get_dataset()->get_data()->at(i).second[j]); //real - expected result
        }

        return sqrt(output);
    }

    ErrorSum::ErrorSum() {
        this->summand = nullptr;
        this->summand_coefficient = nullptr;
        this->dimension = 0;
    }

    ErrorSum::~ErrorSum() {
        if (this->summand) {
            delete this->summand;
        }
        if (this->summand_coefficient) {
            delete this->summand_coefficient;
        }
    }

    double ErrorSum::eval_on_test_data(std::vector<double>* weights) {
        //TODO take care of the case, when there are no test data

        double output = 0.0;
        ErrorFunction* ef = nullptr;

        for (unsigned int i = 0; i < this->summand->size(); ++i) {
            ef = this->summand->at(i);

            if (ef) {
                output += ef->eval_on_test_data(weights) * this->summand_coefficient->at(i);
            }
        }

        return output;
    }

    double ErrorSum::eval_on_test_data(std::string results_file_path, std::vector<double>* weights) {
        THROW_NOT_IMPLEMENTED_ERROR();

        return -1;
    }

    double ErrorSum::eval_on_test_data(std::ofstream* results_file_path, std::vector<double>* weights) {
        THROW_NOT_IMPLEMENTED_ERROR();
        return -1;
    }

    double ErrorSum::eval_on_data_set(lib4neuro::DataSet* data_set, std::vector<double>* weights) {
        THROW_NOT_IMPLEMENTED_ERROR();

        return -1;
    }

    double ErrorSum::eval_on_data_set(lib4neuro::DataSet* data_set, std::string results_file_path,
                                      std::vector<double>* weights) {
        THROW_NOT_IMPLEMENTED_ERROR();

        return -1;
    }

    double ErrorSum::eval_on_data_set(lib4neuro::DataSet* data_set, std::ofstream* results_file_path,
                                      std::vector<double>* weights) {
        THROW_NOT_IMPLEMENTED_ERROR();
        return -1;
    }

    double ErrorSum::eval(std::vector<double>* weights) {
        double output = 0.0;
        ErrorFunction* ef = nullptr;

        for (unsigned int i = 0; i < this->summand->size(); ++i) {
            ef = this->summand->at(i);

            if (ef) {
                output += ef->eval(weights) * this->summand_coefficient->at(i);
            }
        }

        return output;
    }

    double ErrorSum::eval_single_item_by_idx(size_t i, std::vector<double> *parameter_vector,
                                             std::vector<double> &error_vector) {
        double output = 0.0;
        ErrorFunction* ef = nullptr;
        std::fill(error_vector.begin(), error_vector.end(), 0);

        std::vector<double> error_vector_mem(error_vector.size());
        for (size_t j = 0; j < this->summand->size(); ++j) {
            ef = this->summand->at(i);

            if (ef) {
                output += ef->eval_single_item_by_idx(i, parameter_vector, error_vector_mem) * this->summand_coefficient->at(j);

                for( size_t k = 0; k < error_vector_mem.size(); ++k){
                    error_vector[k] += error_vector_mem[k] * this->summand_coefficient->at(j);
                }
            }
        }

        return output;
    }

    void ErrorSum::calculate_error_gradient(std::vector<double>& params, std::vector<double>& grad, double alpha,
                                            size_t batch) {

        ErrorFunction* ef = nullptr;
        for (size_t i = 0; i < this->summand->size(); ++i) {
            ef = this->summand->at(i);

            if (ef) {
                ef->calculate_error_gradient(params, grad, this->summand_coefficient->at(i) * alpha, batch);
            }
        }
    }

    void ErrorSum::calculate_error_gradient_single(std::vector<double> &error_vector,
                                                   std::vector<double> &gradient_vector) {
        COUT_INFO("ErrorSum::calculate_error_gradient_single NOT YET IMPLEMENTED!!!");
    }

    void ErrorSum::analyze_error_gradient(std::vector<double>& params, std::vector<double>& grad, double alpha,
                                            size_t batch) {

        ErrorFunction* ef = nullptr;
        for (size_t i = 0; i < this->summand->size(); ++i) {
            ef = this->summand->at(i);

            if (ef) {
                ef->calculate_error_gradient(params, grad, this->summand_coefficient->at(i) * alpha, batch);
            }
        }
    }

    void ErrorSum::add_error_function(ErrorFunction* F, double alpha) {
        if (!this->summand) {
            this->summand = new std::vector<ErrorFunction*>(0);
        }
        this->summand->push_back(F);

        if (!this->summand_coefficient) {
            this->summand_coefficient = new std::vector<double>(0);
        }
        this->summand_coefficient->push_back(alpha);

        if (F) {
            if (F->get_dimension() > this->dimension) {
                this->dimension = F->get_dimension();
            }
        }
    }

    size_t ErrorSum::get_dimension() {
//    if(!this->dimension) {
//        size_t max = 0;
//        for(auto e : *this->summand) {
//            if(e->get_dimension() > max) {
//                max = e->get_dimension();
//            }
//        };
//
//        this->dimension = max;
//    }
        return this->dimension;
    }

    std::vector<double>* ErrorSum::get_parameters() {
        return this->summand->at(0)->get_parameters();
    }

    DataSet* ErrorSum::get_dataset() {
        return this->summand->at(0)->get_dataset();
    };


}