ADD+MOD+FIX: added new evaluation functions for debug/testing purposes,...

ADD+MOD+FIX: added new evaluation functions for debug/testing purposes, modified random generation of parameters, fixed the calculation of gradient via backpropagation

ADD+MOD+FIX: added new evaluation functions for debug/testing purposes,...
5a528459 · Michal Kravcenko · d6969095 · 5a528459 · 5a528459 · 5a528459
Commit 5a528459 authored 6 years ago by Michal Kravcenko
--- a/src/Network/NeuralNetwork.cpp
+++ b/src/Network/NeuralNetwork.cpp
@@ -16,6 +16,7 @@

 namespace lib4neuro {
    NeuralNetwork::NeuralNetwork() {
+        this->gen = boost::random::mt19937(std::time(0));
        this->neurons = new ::std::vector<Neuron *>(0);
        this->neuron_biases = new ::std::vector<double>(0);
        this->neuron_potentials = new ::std::vector<double>(0);
@@ -52,6 +53,7 @@ namespace lib4neuro {
            THROW_RUNTIME_ERROR("File '" + filepath + "' couldn't be open!");
        }

+        this->gen = boost::random::mt19937(std::time(0));
    }

    NeuralNetwork::~NeuralNetwork() {
@@ -536,9 +538,82 @@ namespace lib4neuro {
        }
    }

+    void NeuralNetwork::eval_single_debug(::std::vector<double> &input, ::std::vector<double> &output,
+                                    ::std::vector<double> *custom_weights_and_biases) {
+        if ((this->input_neuron_indices->size() * this->output_neuron_indices->size()) <= 0) {
+            THROW_INVALID_ARGUMENT_ERROR("Input and output neurons have not been specified!");
+        }
+
+        if (this->input_neuron_indices->size() != input.size()) {
+            THROW_INVALID_ARGUMENT_ERROR("Data input size != Network input size");
+        }
+
+        if (this->output_neuron_indices->size() != output.size()) {
+            THROW_INVALID_ARGUMENT_ERROR("Data output size != Network output size");
+        }
+
+        double potential, bias;
+        int bias_idx;
+
+        this->copy_parameter_space(custom_weights_and_biases);
+
+        this->analyze_layer_structure();
+
+        /* reset of the output and the neuron potentials */
+        ::std::fill(output.begin(), output.end(), 0.0);
+        ::std::fill(this->neuron_potentials->begin(), this->neuron_potentials->end(), 0.0);
+
+        /* set the potentials of the input neurons */
+        for (size_t i = 0; i < this->input_neuron_indices->size(); ++i) {
+            this->neuron_potentials->at(this->input_neuron_indices->at(i)) = input[i];
+            std::cout << this->neuron_potentials->at(this->input_neuron_indices->at(i)) << ", ";
+        }
+        std::cout << std::endl;
+
+
+
+        /* we iterate through all the feed-forward layers and transfer the signals */
+        for (auto layer: *this->neuron_layers_feedforward) {
+            /* we iterate through all neurons in this layer and propagate the signal to the neighboring neurons */
+
+            for (auto si: *layer) {
+                bias = 0.0;
+                bias_idx = this->neuron_bias_indices->at(si);
+                if (bias_idx >= 0) {
+                    bias = this->neuron_biases->at(bias_idx);
+                }
+                potential = this->neurons->at(si)->activate(this->neuron_potentials->at(si), bias);
+                std::cout << "  applying bias: " << bias << " to neuron potential: " << this->neuron_potentials->at(si) << " -> " << potential << std::endl;
+
+                for (auto c: *this->outward_adjacency->at(si)) {
+                    size_t ti = c.first;
+                    size_t ci = c.second;
+
+                    this->neuron_potentials->at(ti) +=
+                            this->connection_list->at(ci)->eval(*this->connection_weights) * potential;
+
+                    std::cout << "  adding input to neuron " << ti << " += " << this->connection_list->at(ci)->eval(*this->connection_weights) << "*" << potential << std::endl;
+                }
+            }
+        }
+
+        unsigned int i = 0;
+        for (auto oi: *this->output_neuron_indices) {
+            bias = 0.0;
+            bias_idx = this->neuron_bias_indices->at(oi);
+            if (bias_idx >= 0) {
+                bias = this->neuron_biases->at(bias_idx);
+            }
+            output[i] = this->neurons->at(oi)->activate(this->neuron_potentials->at(oi), bias);
+            std::cout << "setting the output[" << i << "] = " << output[i] << "(bias = " << bias << ")" << std::endl;
+            ++i;
+        }
+    }
+
    void NeuralNetwork::add_to_gradient_single(std::vector<double> &input, ::std::vector<double> &error_derivative,
                                               double error_scaling, ::std::vector<double> &gradient) {

+
        ::std::vector<double> scaling_backprog(this->get_n_neurons());
        ::std::fill(scaling_backprog.begin(), scaling_backprog.end(), 0.0);

@@ -587,7 +662,7 @@ namespace lib4neuro {
                        size_t ti = c.first;
                        size_t ci = c.second;

-                        neuron_potential_t = this->neuron_potentials->at(ti);
+                        neuron_potential_t = this->neurons->at(ti)->get_last_activation_value( );
                        connection_weight = this->connection_list->at(ci)->eval(*this->connection_weights);

                        this->connection_list->at(ci)->eval_partial_derivative(*this->get_parameter_ptr_weights(),
@@ -605,28 +680,109 @@ namespace lib4neuro {
        }
    }

-    void NeuralNetwork::randomize_weights() {
+    void NeuralNetwork::add_to_gradient_single_debug(std::vector<double> &input, ::std::vector<double> &error_derivative,
+                                               double error_scaling, ::std::vector<double> &gradient) {
+

-        boost::random::mt19937 gen(std::time(0));
+        ::std::vector<double> scaling_backprog(this->get_n_neurons());
+        ::std::fill(scaling_backprog.begin(), scaling_backprog.end(), 0.0);
+
+        size_t bias_shift = this->get_n_weights();
+        size_t neuron_idx;
+        int bias_idx;
+        double neuron_potential, neuron_activation_t, neuron_bias, connection_weight;
+
+        NeuronDifferentiable *active_neuron;
+
+        /* initial error propagation */
+        ::std::vector<size_t> *current_layer = this->neuron_layers_feedforward->at(
+                this->neuron_layers_feedforward->size() - 1);
+        //TODO might not work in the future as the output neurons could be permuted
+        std::cout << "Error scaling on the output layer: ";
+        for (size_t i = 0; i < current_layer->size(); ++i) {
+            neuron_idx = current_layer->at(i);
+            scaling_backprog[neuron_idx] = error_derivative[i] * error_scaling;
+
+            std::cout << scaling_backprog[neuron_idx] << " [neuron " << neuron_idx << "], ";
+        }
+        std::cout << std::endl;
+
+        /* we iterate through all the layers in reverse order and calculate partial derivatives scaled correspondingly */
+        for (size_t j = this->neuron_layers_feedforward->size(); j > 0; --j) {
+
+            current_layer = this->neuron_layers_feedforward->at(j - 1);
+
+            for (size_t i = 0; i < current_layer->size(); ++i) {
+
+                neuron_idx = current_layer->at(i);
+                active_neuron = dynamic_cast<NeuronDifferentiable *> (this->neurons->at(neuron_idx));
+
+                if (active_neuron) {
+                    std::cout << "  [backpropagation] active neuron: " << neuron_idx << std::endl;
+
+                    bias_idx = this->neuron_bias_indices->at(neuron_idx);
+                    neuron_potential = this->neuron_potentials->at(neuron_idx);
+
+                    if (bias_idx >= 0) {
+                        neuron_bias = this->neuron_biases->at(bias_idx);
+                        gradient[bias_shift + bias_idx] += scaling_backprog[neuron_idx] *
+                                                           active_neuron->activation_function_eval_derivative_bias(
+                                                                   neuron_potential, neuron_bias);
+                        scaling_backprog[neuron_idx] *= active_neuron->activation_function_eval_derivative(
+                                neuron_potential,
+                                neuron_bias);
+                    }
+
+                    std::cout << "      [backpropagation] scaling coefficient: " << scaling_backprog[neuron_idx] << std::endl;
+
+                    /* connections to lower level neurons */
+                    for (auto c: *this->inward_adjacency->at(neuron_idx)) {
+                        size_t ti = c.first;
+                        size_t ci = c.second;
+
+                        neuron_activation_t = this->neurons->at(ti)->get_last_activation_value( );
+                        connection_weight = this->connection_list->at(ci)->eval(*this->connection_weights);
+
+                        std::cout << "      [backpropagation] value ("<<ti<< "): " << neuron_activation_t << ", scaling: " << scaling_backprog[neuron_idx] << std::endl;
+
+                        this->connection_list->at(ci)->eval_partial_derivative(*this->get_parameter_ptr_weights(),
+                                                                               gradient,
+                                                                               neuron_activation_t *
+                                                                               scaling_backprog[neuron_idx]);
+
+                        scaling_backprog[ti] += scaling_backprog[neuron_idx] * connection_weight;
+                    }
+                } else {
+                    THROW_INVALID_ARGUMENT_ERROR(
+                            "Neuron used in backpropagation does not contain differentiable activation function!\n");
+                }
+            }
+        }
+    }
+
+    void NeuralNetwork::randomize_weights() {

        // Init weight guess ("optimal" for logistic activation functions)
-        double r = 4 * sqrt(6. / (this->connection_weights->size()));
+        double r = 1.0 / (this->neuron_biases->size() + this->connection_weights->size());

        boost::random::uniform_real_distribution<> dist(-r, r);

        for (size_t i = 0; i < this->connection_weights->size(); i++) {
            this->connection_weights->at(i) = dist(gen);
+//            std::cout << "weight[" << i <<"]" << this->connection_weights->at(i) << std::endl;
        }
    }

    void NeuralNetwork::randomize_biases() {

-        boost::random::mt19937 gen(std::time(0));

+
+        double r = 1.0 / (this->neuron_biases->size() + this->connection_weights->size());
        // Init weight guess ("optimal" for logistic activation functions)
-        boost::random::uniform_real_distribution<> dist(-1, 1);
+        boost::random::uniform_real_distribution<> dist(-r, r);
        for (size_t i = 0; i < this->neuron_biases->size(); i++) {
            this->neuron_biases->at(i) = dist(gen);
+//            std::cout << "bias[" << i <<"]" << this->neuron_biases->at(i) << std::endl;
        }
    }

@@ -1022,6 +1178,7 @@ namespace lib4neuro {
                                         NEURON_TYPE hidden_layer_neuron_type,
                                         std::ofstream* ofs) : NeuralNetwork() {
        std::vector<NEURON_TYPE> tmp;
+        this->gen = boost::random::mt19937(std::time(0));

        for(auto i = 0; i < neuron_numbers->size(); i++) {
            tmp.emplace_back(hidden_layer_neuron_type);
@@ -1043,6 +1200,7 @@ namespace lib4neuro {
            THROW_INVALID_ARGUMENT_ERROR("Parameter 'neuron_numbers' specifying numbers of neurons in network's layers "
                                         "doesn't specify input and output layers, which are compulsory!");
        }
+        this->gen = boost::random::mt19937(std::time(0));

        this->neurons = new ::std::vector<Neuron *>(0);
        this->neuron_biases = new ::std::vector<double>(0);

--- a/src/Network/NeuralNetwork.h
+++ b/src/Network/NeuralNetwork.h
@@ -13,7 +13,13 @@

 #include <iostream>
 #include <vector>
-
+#include <iostream>
+#include <cstdio>
+#include <fstream>
+#include <vector>
+#include <utility>
+#include <algorithm>
+#include <assert.h>
 #include <algorithm>
 #include <utility>
 #include <fstream>
@@ -28,6 +34,10 @@
 #include "../NetConnection/ConnectionFunctionIdentity.h"
 #include "../NormalizationStrategy/NormalizationStrategy.h"

+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_int_distribution.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+
 namespace lib4neuro {

    /**
@@ -50,6 +60,7 @@ namespace lib4neuro {
     */
    class NeuralNetwork {
    protected:
+        boost::random::mt19937 gen;

        /**
         *
@@ -216,6 +227,15 @@ namespace lib4neuro {
        LIB4NEURO_API virtual void eval_single(std::vector<double> &input, std::vector<double> &output,
                                               std::vector<double> *custom_weights_and_biases = nullptr);

+        /**
+         *
+         * @param input
+         * @param output
+         * @param custom_weights_and_biases
+         */
+        LIB4NEURO_API virtual void eval_single_debug(std::vector<double> &input, std::vector<double> &output,
+                                               std::vector<double> *custom_weights_and_biases = nullptr);
+
        /**
         *
         * @param error_derivative
@@ -225,6 +245,15 @@ namespace lib4neuro {
        add_to_gradient_single(std::vector<double> &input, std::vector<double> &error_derivative, double error_scaling,
                               std::vector<double> &gradient);

+        /**
+          *
+          * @param error_derivative
+          * @param gradient
+          */
+        LIB4NEURO_API virtual void
+        add_to_gradient_single_debug(std::vector<double> &input, std::vector<double> &error_derivative, double error_scaling,
+                               std::vector<double> &gradient);
+
        /**
         * Adds a new neuron to the list of neurons. Also assigns a valid bias value to its activation function
         * @param[in] n

--- a/src/Network/NeuralNetworkSum.cpp
+++ b/src/Network/NeuralNetworkSum.cpp
@@ -78,6 +78,36 @@ namespace lib4neuro {

    }

+    void NeuralNetworkSum::eval_single_debug(std::vector<double> &input, std::vector<double> &output,
+                                       std::vector<double> *custom_weights_and_biases) {
+        std::vector<double> mem_output(output.size());
+        std::fill(output.begin(), output.end(), 0.0);
+
+        NeuralNetwork *SUM;
+
+        for (size_t ni = 0; ni < this->summand->size(); ++ni) {
+            SUM = this->summand->at(ni);
+
+            if (SUM) {
+                this->summand->at(ni)->eval_single_debug(input, mem_output, custom_weights_and_biases);
+
+                double alpha = this->summand_coefficient->at(ni)->eval(input);
+
+                for (size_t j = 0; j < output.size(); ++j) {
+                    output[j] += mem_output[j] * alpha;
+                }
+            } else {
+                //TODO assume the result can be a vector of doubles
+                double alpha = this->summand_coefficient->at(ni)->eval(input);
+
+                for (size_t j = 0; j < output.size(); ++j) {
+                    output[j] += alpha;
+                }
+            }
+        }
+
+    }
+
    void NeuralNetworkSum::add_to_gradient_single(std::vector<double> &input, std::vector<double> &error_derivative,
                                                  double error_scaling, std::vector<double> &gradient) {


--- a/src/Network/NeuralNetworkSum.h
+++ b/src/Network/NeuralNetworkSum.h
@@ -43,6 +43,15 @@ namespace lib4neuro {
        LIB4NEURO_API void eval_single(std::vector<double> &input, std::vector<double> &output,
                                       std::vector<double> *custom_weights_and_biases = nullptr) override;

+        /**
+         *
+         * @param input
+         * @param output
+         * @param custom_weights_and_biases
+         */
+        LIB4NEURO_API void eval_single_debug(std::vector<double> &input, std::vector<double> &output,
+                                       std::vector<double> *custom_weights_and_biases = nullptr) override;
+
        /**
         *
         * @param error_derivative