DESolver.cpp

/**
 * DESCRIPTION OF THE FILE
 *
 * @author Michal Kravčenko
 * @date 22.7.18 -
 */

#include <boost/format.hpp>

#include "message.h"
#include "DESolver.h"
#include "exceptions.h"

//TODO add support for multiple unknown functions

namespace lib4neuro {
    MultiIndex::MultiIndex(size_t dimension) {
        this->dim = dimension;
        this->partial_derivatives_degrees.resize(this->dim);
        std::fill(this->partial_derivatives_degrees.begin(), this->partial_derivatives_degrees.end(), 0);
    }

    void MultiIndex::set_partial_derivative(size_t index, size_t value) {
        this->partial_derivatives_degrees.at(index) = value;
    }

    std::vector<size_t> *MultiIndex::get_partial_derivatives_degrees() {
        return &this->partial_derivatives_degrees;
    }

    bool MultiIndex::operator<(const MultiIndex &rhs) const {
        if (dim < rhs.dim) { return true; }
        else if (dim > rhs.dim) { return false; }

        for (size_t i = 0; i < dim; ++i) {
            if (partial_derivatives_degrees[i] < rhs.partial_derivatives_degrees[i]) {
                return true;
            } else if (partial_derivatives_degrees[i] > rhs.partial_derivatives_degrees[i]) {
                return false;
            }
        }
        return false;
    }

    std::string MultiIndex::to_string() const {
        std::string output;
        char buff[255];

        for (size_t i = 0; i < this->dim - 1; ++i) {
            sprintf(buff, "%d, ", (int) this->partial_derivatives_degrees[i]);
            output.append(buff);
        }
        sprintf(buff, "%d", (int) this->partial_derivatives_degrees[this->dim - 1]);
        output.append(buff);

        return output;
    }

    size_t MultiIndex::get_degree() const {
        size_t output = 0;

        for (auto i: this->partial_derivatives_degrees) {
            output += i;
        }

        return output;
    }


    DESolver::DESolver(size_t n_equations, size_t n_inputs, size_t m) {

        if (m <= 0 || n_inputs <= 0 || n_equations <= 0) {
            THROW_INVALID_ARGUMENT_ERROR("Parameters 'm', 'n_equations', 'n_inputs' and 'n_outputs' must be greater than zero!");
        }
        printf("Differential Equation Solver with %d equations\n--------------------------------------------------------------------------\n",
               (int) n_equations);

        printf("Constructing NN structure representing the solution [%d input neurons][%d inner neurons]...\n",
               (int) n_inputs, (int) m);

        this->dim_i = n_inputs;
        this->dim_inn = m;
        this->n_equations = n_equations;

        this->solution = new NeuralNetwork();

        this->solution_inner_neurons = new std::vector<NeuronLogistic *>(0);
        this->solution_inner_neurons->reserve(m);

        /* input neurons */
        std::vector<size_t> input_set(this->dim_i);
        size_t idx;
        for (size_t i = 0; i < this->dim_i; ++i) {
            NeuronLinear *input_i = new NeuronLinear();  //f(x) = x
            idx = this->solution->add_neuron(input_i, BIAS_TYPE::NO_BIAS);
            input_set[i] = idx;
        }
        this->solution->specify_input_neurons(input_set);
        size_t first_input_neuron = input_set[0];

        /* output neuron */
        std::vector<size_t> output_set(1);
        idx = this->solution->add_neuron(new NeuronLinear(), BIAS_TYPE::NO_BIAS);//f(x) = x
        output_set[0] = idx;
        this->solution->specify_output_neurons(output_set);
        size_t first_output_neuron = idx;

        /* inner neurons */
        size_t first_inner_neuron = 0;
        for (size_t i = 0; i < this->dim_inn; ++i) {
            NeuronLogistic *inner_i = new NeuronLogistic(); //f(x) = 1.0 / (1.0 + e^(-x))
            this->solution_inner_neurons->push_back(inner_i);
            idx = this->solution->add_neuron(inner_i, BIAS_TYPE::NEXT_BIAS);

            if (i == 0) {
                first_inner_neuron = idx;
            }
        }


        /* connections between input neurons and inner neurons */
        size_t weight_idx;
        for (size_t i = 0; i < this->dim_i; ++i) {
            for (size_t j = 0; j < this->dim_inn; ++j) {
                weight_idx = this->solution->add_connection_simple(first_input_neuron + i, first_inner_neuron + j,
                                                                   SIMPLE_CONNECTION_TYPE::NEXT_WEIGHT);
                printf("  adding a connection between input neuron %2d[%2d] and inner neuron  %2d[%2d], weight index %3d\n",
                       (int) i, (int) (first_input_neuron + i), (int) j, (int) (first_inner_neuron + j),
                       (int) weight_idx);
            }
        }

        /* connections between inner neurons and output neurons */
        for (size_t i = 0; i < this->dim_inn; ++i) {
            weight_idx = this->solution->add_connection_simple(first_inner_neuron + i, first_output_neuron,
                                                               SIMPLE_CONNECTION_TYPE::NEXT_WEIGHT);
            printf("  adding a connection between inner neuron %2d[%2d] and output neuron %2d[%2d], weight index %3d\n",
                   (int) i, (int) (first_inner_neuron + i), 0, (int) (first_output_neuron), (int) weight_idx);
        }

        MultiIndex initial_mi(this->dim_i);

        this->map_multiindices2nn[initial_mi] = this->solution;

        this->differential_equations = new std::vector<NeuralNetworkSum *>(0);
        this->differential_equations->reserve(this->n_equations);

        for (unsigned int i = 0; i < this->n_equations; ++i) {
            NeuralNetworkSum *new_sum = new NeuralNetworkSum();
            this->differential_equations->push_back(new_sum);
        }

        this->errors_functions_types = new std::vector<ErrorFunctionType>(this->n_equations);
        this->errors_functions_data_sets = new std::vector<DataSet *>(this->n_equations);

        printf("done\n");

    }

    DESolver::~DESolver() {

        if (this->solution_inner_neurons) {
            delete this->solution_inner_neurons;
            this->solution_inner_neurons = nullptr;
        }

        if (this->errors_functions_types) {
            delete this->errors_functions_types;
            this->errors_functions_types = nullptr;
        }

        if (this->errors_functions_data_sets) {
            delete this->errors_functions_data_sets;
            this->errors_functions_data_sets = nullptr;
        }

        if (this->differential_equations) {
            for (auto nns: *this->differential_equations) {
                delete nns;
            }
            delete this->differential_equations;
            this->differential_equations = nullptr;
        }


        for (auto nn: this->map_multiindices2nn) {
            NeuralNetwork *n_to_delete = nn.second;
            delete n_to_delete;
        }

    }

//TODO more efficient representation of the functions (large portion of the structure is the same for all partial derivatives)
    void DESolver::add_to_differential_equation(size_t equation_idx, MultiIndex &alpha, std::string expression_string) {

        if (equation_idx >= this->n_equations) {
            THROW_INVALID_ARGUMENT_ERROR("The provided equation index is too large!");
        }

        size_t derivative_degree = alpha.get_degree();

        if (derivative_degree > 2) {
            THROW_INVALID_ARGUMENT_ERROR("The supplied multi-index represents partial derivative of order higher than 2! (Valid degree is at most 2)\n");
        }

        /* retrieve indices of the variables according to which we perform the derivations ( applicable to any order, not just 2 or less )*/
        std::vector<size_t> partial_derivative_indices;
        partial_derivative_indices.reserve(derivative_degree);
        for (size_t i = 0; i < alpha.get_partial_derivatives_degrees()->size(); ++i) {
            size_t degree = alpha.get_partial_derivatives_degrees()->at(i);

            while (degree > 0) {

                partial_derivative_indices.push_back(i);
                degree--;

            }
        }

        NeuralNetwork *new_net = nullptr;
        /* we check whether the new multi-index is already present */
        if (map_multiindices2nn.find(alpha) != map_multiindices2nn.end()) {
            new_net = map_multiindices2nn[alpha];
            this->differential_equations->at(equation_idx)->add_network(new_net, expression_string);
            printf("\nAdding an existing partial derivative (multi-index: %s) to equation %d with coefficient %s\n",
                   alpha.to_string().c_str(), (int) equation_idx, expression_string.c_str());
            return;
        }
        printf("\nAdding a new partial derivative (multi-index: %s) to equation %d with coefficient %s\n",
               alpha.to_string().c_str(), (int) equation_idx, expression_string.c_str());

        /* we need to construct a new neural network */
        new_net = new NeuralNetwork();
        new_net->set_parameter_space_pointers(*this->solution);

        /* input neurons */
        std::vector<size_t> input_set(this->dim_i);
        size_t idx;
        for (size_t i = 0; i < this->dim_i; ++i) {
            NeuronLinear *input_i = new NeuronLinear();  //f(x) = x
            idx = new_net->add_neuron(input_i, BIAS_TYPE::NO_BIAS);
            input_set[i] = idx;
        }
        new_net->specify_input_neurons(input_set);
        size_t first_input_neuron = input_set[0];


        /* output neurons */
        std::vector<size_t> output_set(1);
        idx = new_net->add_neuron(new NeuronLinear(), BIAS_TYPE::NO_BIAS);//f(x) = x
        output_set[0] = idx;
        new_net->specify_output_neurons(output_set);
        size_t first_output_neuron = idx;

        /* the new partial derivative has degree of at least one */
        size_t first_inner_neuron = 0;
        NeuronLogistic *n_ptr = nullptr, *n_ptr2 = nullptr;
        for (size_t i = 0; i < this->dim_inn; ++i) {
            n_ptr = this->solution_inner_neurons->at(i);

            for (size_t j = 0; j < derivative_degree; ++j) {
                n_ptr2 = n_ptr;

                n_ptr = n_ptr->get_derivative();

                if (j > 0) {
                    delete n_ptr2;
                    n_ptr2 = nullptr;
                }

            }
            idx = new_net->add_neuron(n_ptr, BIAS_TYPE::EXISTING_BIAS,
                                      this->solution->get_neuron_bias_index(i + this->dim_i + 1));

            if (i == 0) {
                first_inner_neuron = idx;
            }
        }

        /* identity neurons serving as a 'glue'*/
        size_t first_glue_neuron = idx + 1;
        for (size_t i = 0; i < derivative_degree * this->dim_inn; ++i) {
            idx = new_net->add_neuron(new NeuronLinear(), BIAS_TYPE::NO_BIAS); //f(x) = x
        }

        /* connections between input neurons and inner neurons */
        size_t connection_idx = 0;
        for (size_t i = 0; i < this->dim_i; ++i) {
            for (size_t j = 0; j < this->dim_inn; ++j) {
                printf("  adding a connection between input neuron %2d[%2d] and inner neuron  %2d[%2d], connection index: %3d\n",
                       (int) i, (int) (first_input_neuron + i), (int) j, (int) (first_inner_neuron + j),
                       (int) connection_idx);
                new_net->add_existing_connection(first_input_neuron + i, first_inner_neuron + j, connection_idx,
                                                 *this->solution);
                connection_idx++;
            }
        }
        printf("----------------------------------------------------------------------------------------------------\n");

        /* connections between inner neurons and the first set of 'glueing' neurons */
        for (size_t i = 0; i < this->dim_inn; ++i) {
            printf("  adding a connection between inner neuron %2d[%2d] and glue neuron   %2d[%2d], connection index: %3d\n",
                   (int) i, (int) (first_inner_neuron + i), (int) i, (int) (first_glue_neuron + i),
                   (int) connection_idx);
            new_net->add_existing_connection(first_inner_neuron + i, first_glue_neuron + i, connection_idx,
                                             *this->solution);
            connection_idx++;
        }
        printf("----------------------------------------------------------------------------------------------------\n");

        size_t pd_idx;
        /* connections between glueing neurons */
        for (size_t di = 0; di < derivative_degree - 1; ++di) {
            pd_idx = partial_derivative_indices[di];/* partial derivative index */
            for (size_t i = 0; i < this->dim_inn; ++i) {
                connection_idx = pd_idx * this->dim_inn + i;
                printf("  adding a connection between glue neuron  %2d[%2d] and glue neuron   %2d[%2d], connection index: %3d\n",
                       (int) (i + (di) * this->dim_inn), (int) (first_glue_neuron + i + (di) * this->dim_inn),
                       (int) (i + (di + 1) * this->dim_inn), (int) (first_glue_neuron + i + (di + 1) * this->dim_inn),
                       (int) connection_idx);
                new_net->add_existing_connection(first_glue_neuron + i + (di) * this->dim_inn,
                                                 first_glue_neuron + i + (di + 1) * this->dim_inn, connection_idx,
                                                 *this->solution);
            }
        }
        printf("----------------------------------------------------------------------------------------------------\n");

        /* connection between the layer of glueing neurons toward the output neuron */
        pd_idx = partial_derivative_indices[derivative_degree - 1];/* partial derivative index */
        for (size_t i = 0; i < this->dim_inn; ++i) {
            connection_idx = pd_idx * this->dim_inn + i;
            printf("  adding a connection between glue neuron %2d[%2d] and output neuron  %2d[%2d], connection index: %3d\n",
                   (int) (i + (derivative_degree - 1) * this->dim_inn),
                   (int) (first_glue_neuron + i + (derivative_degree - 1) * this->dim_inn), 0,
                   (int) (first_output_neuron), (int) connection_idx);
            new_net->add_existing_connection(first_glue_neuron + i + (derivative_degree - 1) * this->dim_inn,
                                             first_output_neuron, connection_idx, *this->solution);
        }

        map_multiindices2nn[alpha] = new_net;

        this->differential_equations->at(equation_idx)->add_network(new_net, expression_string);
    }


    void DESolver::add_to_differential_equation(size_t equation_idx, std::string expression_string) {

        printf("Adding a known function '%s' to equation %d\n", expression_string.c_str(), (int) equation_idx);
        this->differential_equations->at(equation_idx)->add_network(nullptr, expression_string);

    }


    void DESolver::set_error_function(size_t equation_idx, ErrorFunctionType F, DataSet *conditions) {
        if (equation_idx >= this->n_equations) {
            THROW_INVALID_ARGUMENT_ERROR("The parameter 'equation_idx' is too large! It exceeds the number of differential equations.");
        }

        this->errors_functions_types->at(equation_idx) = F;
        this->errors_functions_data_sets->at(equation_idx) = conditions;
    }

//TODO instead use general method with Optimizer as its argument (create hierarchy of optimizers)
    void DESolver::solve(LearningMethod &learning_method) {

        NeuralNetwork *nn;
        DataSet *ds;

        /* DEFINITION OF THE GLOBAL ERROR FUNCTION */
        ErrorSum total_error;
        for (size_t i = 0; i < this->n_equations; ++i) {
            nn = this->differential_equations->at(i);
            ds = this->errors_functions_data_sets->at(i);
            if (ds) {
                if (this->errors_functions_types->at(i) == ErrorFunctionType::ErrorFuncMSE) {
                    total_error.add_error_function(new MSE(nn, ds), 1.0);
                } else {
                    //default
                    total_error.add_error_function(new MSE(nn, ds), 1.0);
                }
            } else {
                total_error.add_error_function(nullptr, 1.0);
            }
        }

        printf("error before optimization: %f\n", total_error.eval(nullptr));

        learning_method.optimize(total_error);
        this->solution->copy_parameter_space(learning_method.get_parameters());

        printf("error after optimization: %f\n", total_error.eval(nullptr));
    }

    void DESolver::randomize_parameters() {

        MultiIndex alpha(this->dim_i);
        this->map_multiindices2nn[alpha]->randomize_parameters();

    }

    NeuralNetwork *DESolver::get_solution(MultiIndex &alpha) {
        return this->map_multiindices2nn[alpha];
    }

    double
    DESolver::eval_equation(size_t equation_idx, std::vector<double> *weight_and_biases, std::vector<double> &input) {
        std::vector<double> output(1);

        this->differential_equations->at(equation_idx)->eval_single(input, output, weight_and_biases);

//    printf("Input: ");
//    for( auto e: input ){
//        printf("%f, ", e);
//    }
//    printf("\nOutput: ");
//    for( auto e: output ){
//        printf("%f, ", e);
//    }
//    printf("\n");

        return output[0];
    }

    double DESolver::eval_total_error(std::vector<double> &weights_and_biases) {

        NeuralNetwork *nn;
        DataSet *ds;

        ///* DEFINITION OF THE PARTIAL ERROR FUNCTIONS */
        std::vector<ErrorFunction *> error_functions(this->n_equations);
        for (size_t i = 0; i < this->n_equations; ++i) {
            nn = this->differential_equations->at(i);
            ds = this->errors_functions_data_sets->at(i);

            if (this->errors_functions_types->at(i) == ErrorFunctionType::ErrorFuncMSE) {
                error_functions[i] = new MSE(nn, ds);
            } else {
                //default
                error_functions[i] = new MSE(nn, ds);
            }
        }

        /* DEFINITION OF THE GLOBAL ERROR FUNCTION */
        ErrorSum total_error;
        for (size_t i = 0; i < this->n_equations; ++i) {
            total_error.add_error_function(error_functions[i], 1.0);
        }

        //return total_error.eval(&weights_and_biases);
		return 64;
    }

}