From 5a52845927b05c25e9e193d74bcbfede1efa76bb Mon Sep 17 00:00:00 2001 From: Michal Kravcenko <michal.kravcenko@vsb.cz> Date: Wed, 20 Feb 2019 14:02:39 +0100 Subject: [PATCH] ADD+MOD+FIX: added new evaluation functions for debug/testing purposes, modified random generation of parameters, fixed the calculation of gradient via backpropagation --- src/Network/NeuralNetwork.cpp | 170 +++++++++++++++++++++++++++++-- src/Network/NeuralNetwork.h | 31 +++++- src/Network/NeuralNetworkSum.cpp | 30 ++++++ src/Network/NeuralNetworkSum.h | 9 ++ 4 files changed, 233 insertions(+), 7 deletions(-) diff --git a/src/Network/NeuralNetwork.cpp b/src/Network/NeuralNetwork.cpp index ed0f9ad5..e566efad 100644 --- a/src/Network/NeuralNetwork.cpp +++ b/src/Network/NeuralNetwork.cpp @@ -16,6 +16,7 @@ namespace lib4neuro { NeuralNetwork::NeuralNetwork() { + this->gen = boost::random::mt19937(std::time(0)); this->neurons = new ::std::vector<Neuron *>(0); this->neuron_biases = new ::std::vector<double>(0); this->neuron_potentials = new ::std::vector<double>(0); @@ -52,6 +53,7 @@ namespace lib4neuro { THROW_RUNTIME_ERROR("File '" + filepath + "' couldn't be open!"); } + this->gen = boost::random::mt19937(std::time(0)); } NeuralNetwork::~NeuralNetwork() { @@ -536,9 +538,82 @@ namespace lib4neuro { } } + void NeuralNetwork::eval_single_debug(::std::vector<double> &input, ::std::vector<double> &output, + ::std::vector<double> *custom_weights_and_biases) { + if ((this->input_neuron_indices->size() * this->output_neuron_indices->size()) <= 0) { + THROW_INVALID_ARGUMENT_ERROR("Input and output neurons have not been specified!"); + } + + if (this->input_neuron_indices->size() != input.size()) { + THROW_INVALID_ARGUMENT_ERROR("Data input size != Network input size"); + } + + if (this->output_neuron_indices->size() != output.size()) { + THROW_INVALID_ARGUMENT_ERROR("Data output size != Network output size"); + } + + double potential, bias; + int bias_idx; + + this->copy_parameter_space(custom_weights_and_biases); + + this->analyze_layer_structure(); + + /* reset of the output and the neuron potentials */ + ::std::fill(output.begin(), output.end(), 0.0); + ::std::fill(this->neuron_potentials->begin(), this->neuron_potentials->end(), 0.0); + + /* set the potentials of the input neurons */ + for (size_t i = 0; i < this->input_neuron_indices->size(); ++i) { + this->neuron_potentials->at(this->input_neuron_indices->at(i)) = input[i]; + std::cout << this->neuron_potentials->at(this->input_neuron_indices->at(i)) << ", "; + } + std::cout << std::endl; + + + + /* we iterate through all the feed-forward layers and transfer the signals */ + for (auto layer: *this->neuron_layers_feedforward) { + /* we iterate through all neurons in this layer and propagate the signal to the neighboring neurons */ + + for (auto si: *layer) { + bias = 0.0; + bias_idx = this->neuron_bias_indices->at(si); + if (bias_idx >= 0) { + bias = this->neuron_biases->at(bias_idx); + } + potential = this->neurons->at(si)->activate(this->neuron_potentials->at(si), bias); + std::cout << " applying bias: " << bias << " to neuron potential: " << this->neuron_potentials->at(si) << " -> " << potential << std::endl; + + for (auto c: *this->outward_adjacency->at(si)) { + size_t ti = c.first; + size_t ci = c.second; + + this->neuron_potentials->at(ti) += + this->connection_list->at(ci)->eval(*this->connection_weights) * potential; + + std::cout << " adding input to neuron " << ti << " += " << this->connection_list->at(ci)->eval(*this->connection_weights) << "*" << potential << std::endl; + } + } + } + + unsigned int i = 0; + for (auto oi: *this->output_neuron_indices) { + bias = 0.0; + bias_idx = this->neuron_bias_indices->at(oi); + if (bias_idx >= 0) { + bias = this->neuron_biases->at(bias_idx); + } + output[i] = this->neurons->at(oi)->activate(this->neuron_potentials->at(oi), bias); + std::cout << "setting the output[" << i << "] = " << output[i] << "(bias = " << bias << ")" << std::endl; + ++i; + } + } + void NeuralNetwork::add_to_gradient_single(std::vector<double> &input, ::std::vector<double> &error_derivative, double error_scaling, ::std::vector<double> &gradient) { + ::std::vector<double> scaling_backprog(this->get_n_neurons()); ::std::fill(scaling_backprog.begin(), scaling_backprog.end(), 0.0); @@ -587,7 +662,7 @@ namespace lib4neuro { size_t ti = c.first; size_t ci = c.second; - neuron_potential_t = this->neuron_potentials->at(ti); + neuron_potential_t = this->neurons->at(ti)->get_last_activation_value( ); connection_weight = this->connection_list->at(ci)->eval(*this->connection_weights); this->connection_list->at(ci)->eval_partial_derivative(*this->get_parameter_ptr_weights(), @@ -605,28 +680,109 @@ namespace lib4neuro { } } - void NeuralNetwork::randomize_weights() { + void NeuralNetwork::add_to_gradient_single_debug(std::vector<double> &input, ::std::vector<double> &error_derivative, + double error_scaling, ::std::vector<double> &gradient) { + - boost::random::mt19937 gen(std::time(0)); + ::std::vector<double> scaling_backprog(this->get_n_neurons()); + ::std::fill(scaling_backprog.begin(), scaling_backprog.end(), 0.0); + + size_t bias_shift = this->get_n_weights(); + size_t neuron_idx; + int bias_idx; + double neuron_potential, neuron_activation_t, neuron_bias, connection_weight; + + NeuronDifferentiable *active_neuron; + + /* initial error propagation */ + ::std::vector<size_t> *current_layer = this->neuron_layers_feedforward->at( + this->neuron_layers_feedforward->size() - 1); + //TODO might not work in the future as the output neurons could be permuted + std::cout << "Error scaling on the output layer: "; + for (size_t i = 0; i < current_layer->size(); ++i) { + neuron_idx = current_layer->at(i); + scaling_backprog[neuron_idx] = error_derivative[i] * error_scaling; + + std::cout << scaling_backprog[neuron_idx] << " [neuron " << neuron_idx << "], "; + } + std::cout << std::endl; + + /* we iterate through all the layers in reverse order and calculate partial derivatives scaled correspondingly */ + for (size_t j = this->neuron_layers_feedforward->size(); j > 0; --j) { + + current_layer = this->neuron_layers_feedforward->at(j - 1); + + for (size_t i = 0; i < current_layer->size(); ++i) { + + neuron_idx = current_layer->at(i); + active_neuron = dynamic_cast<NeuronDifferentiable *> (this->neurons->at(neuron_idx)); + + if (active_neuron) { + std::cout << " [backpropagation] active neuron: " << neuron_idx << std::endl; + + bias_idx = this->neuron_bias_indices->at(neuron_idx); + neuron_potential = this->neuron_potentials->at(neuron_idx); + + if (bias_idx >= 0) { + neuron_bias = this->neuron_biases->at(bias_idx); + gradient[bias_shift + bias_idx] += scaling_backprog[neuron_idx] * + active_neuron->activation_function_eval_derivative_bias( + neuron_potential, neuron_bias); + scaling_backprog[neuron_idx] *= active_neuron->activation_function_eval_derivative( + neuron_potential, + neuron_bias); + } + + std::cout << " [backpropagation] scaling coefficient: " << scaling_backprog[neuron_idx] << std::endl; + + /* connections to lower level neurons */ + for (auto c: *this->inward_adjacency->at(neuron_idx)) { + size_t ti = c.first; + size_t ci = c.second; + + neuron_activation_t = this->neurons->at(ti)->get_last_activation_value( ); + connection_weight = this->connection_list->at(ci)->eval(*this->connection_weights); + + std::cout << " [backpropagation] value ("<<ti<< "): " << neuron_activation_t << ", scaling: " << scaling_backprog[neuron_idx] << std::endl; + + this->connection_list->at(ci)->eval_partial_derivative(*this->get_parameter_ptr_weights(), + gradient, + neuron_activation_t * + scaling_backprog[neuron_idx]); + + scaling_backprog[ti] += scaling_backprog[neuron_idx] * connection_weight; + } + } else { + THROW_INVALID_ARGUMENT_ERROR( + "Neuron used in backpropagation does not contain differentiable activation function!\n"); + } + } + } + } + + void NeuralNetwork::randomize_weights() { // Init weight guess ("optimal" for logistic activation functions) - double r = 4 * sqrt(6. / (this->connection_weights->size())); + double r = 1.0 / (this->neuron_biases->size() + this->connection_weights->size()); boost::random::uniform_real_distribution<> dist(-r, r); for (size_t i = 0; i < this->connection_weights->size(); i++) { this->connection_weights->at(i) = dist(gen); +// std::cout << "weight[" << i <<"]" << this->connection_weights->at(i) << std::endl; } } void NeuralNetwork::randomize_biases() { - boost::random::mt19937 gen(std::time(0)); + + double r = 1.0 / (this->neuron_biases->size() + this->connection_weights->size()); // Init weight guess ("optimal" for logistic activation functions) - boost::random::uniform_real_distribution<> dist(-1, 1); + boost::random::uniform_real_distribution<> dist(-r, r); for (size_t i = 0; i < this->neuron_biases->size(); i++) { this->neuron_biases->at(i) = dist(gen); +// std::cout << "bias[" << i <<"]" << this->neuron_biases->at(i) << std::endl; } } @@ -1022,6 +1178,7 @@ namespace lib4neuro { NEURON_TYPE hidden_layer_neuron_type, std::ofstream* ofs) : NeuralNetwork() { std::vector<NEURON_TYPE> tmp; + this->gen = boost::random::mt19937(std::time(0)); for(auto i = 0; i < neuron_numbers->size(); i++) { tmp.emplace_back(hidden_layer_neuron_type); @@ -1043,6 +1200,7 @@ namespace lib4neuro { THROW_INVALID_ARGUMENT_ERROR("Parameter 'neuron_numbers' specifying numbers of neurons in network's layers " "doesn't specify input and output layers, which are compulsory!"); } + this->gen = boost::random::mt19937(std::time(0)); this->neurons = new ::std::vector<Neuron *>(0); this->neuron_biases = new ::std::vector<double>(0); diff --git a/src/Network/NeuralNetwork.h b/src/Network/NeuralNetwork.h index 08db2531..2e6218ab 100644 --- a/src/Network/NeuralNetwork.h +++ b/src/Network/NeuralNetwork.h @@ -13,7 +13,13 @@ #include <iostream> #include <vector> - +#include <iostream> +#include <cstdio> +#include <fstream> +#include <vector> +#include <utility> +#include <algorithm> +#include <assert.h> #include <algorithm> #include <utility> #include <fstream> @@ -28,6 +34,10 @@ #include "../NetConnection/ConnectionFunctionIdentity.h" #include "../NormalizationStrategy/NormalizationStrategy.h" +#include <boost/random/mersenne_twister.hpp> +#include <boost/random/uniform_int_distribution.hpp> +#include <boost/random/uniform_real_distribution.hpp> + namespace lib4neuro { /** @@ -50,6 +60,7 @@ namespace lib4neuro { */ class NeuralNetwork { protected: + boost::random::mt19937 gen; /** * @@ -216,6 +227,15 @@ namespace lib4neuro { LIB4NEURO_API virtual void eval_single(std::vector<double> &input, std::vector<double> &output, std::vector<double> *custom_weights_and_biases = nullptr); + /** + * + * @param input + * @param output + * @param custom_weights_and_biases + */ + LIB4NEURO_API virtual void eval_single_debug(std::vector<double> &input, std::vector<double> &output, + std::vector<double> *custom_weights_and_biases = nullptr); + /** * * @param error_derivative @@ -225,6 +245,15 @@ namespace lib4neuro { add_to_gradient_single(std::vector<double> &input, std::vector<double> &error_derivative, double error_scaling, std::vector<double> &gradient); + /** + * + * @param error_derivative + * @param gradient + */ + LIB4NEURO_API virtual void + add_to_gradient_single_debug(std::vector<double> &input, std::vector<double> &error_derivative, double error_scaling, + std::vector<double> &gradient); + /** * Adds a new neuron to the list of neurons. Also assigns a valid bias value to its activation function * @param[in] n diff --git a/src/Network/NeuralNetworkSum.cpp b/src/Network/NeuralNetworkSum.cpp index 8dbd3560..7763b63d 100644 --- a/src/Network/NeuralNetworkSum.cpp +++ b/src/Network/NeuralNetworkSum.cpp @@ -78,6 +78,36 @@ namespace lib4neuro { } + void NeuralNetworkSum::eval_single_debug(std::vector<double> &input, std::vector<double> &output, + std::vector<double> *custom_weights_and_biases) { + std::vector<double> mem_output(output.size()); + std::fill(output.begin(), output.end(), 0.0); + + NeuralNetwork *SUM; + + for (size_t ni = 0; ni < this->summand->size(); ++ni) { + SUM = this->summand->at(ni); + + if (SUM) { + this->summand->at(ni)->eval_single_debug(input, mem_output, custom_weights_and_biases); + + double alpha = this->summand_coefficient->at(ni)->eval(input); + + for (size_t j = 0; j < output.size(); ++j) { + output[j] += mem_output[j] * alpha; + } + } else { + //TODO assume the result can be a vector of doubles + double alpha = this->summand_coefficient->at(ni)->eval(input); + + for (size_t j = 0; j < output.size(); ++j) { + output[j] += alpha; + } + } + } + + } + void NeuralNetworkSum::add_to_gradient_single(std::vector<double> &input, std::vector<double> &error_derivative, double error_scaling, std::vector<double> &gradient) { diff --git a/src/Network/NeuralNetworkSum.h b/src/Network/NeuralNetworkSum.h index 9eb2a601..42127a26 100644 --- a/src/Network/NeuralNetworkSum.h +++ b/src/Network/NeuralNetworkSum.h @@ -43,6 +43,15 @@ namespace lib4neuro { LIB4NEURO_API void eval_single(std::vector<double> &input, std::vector<double> &output, std::vector<double> *custom_weights_and_biases = nullptr) override; + /** + * + * @param input + * @param output + * @param custom_weights_and_biases + */ + LIB4NEURO_API void eval_single_debug(std::vector<double> &input, std::vector<double> &output, + std::vector<double> *custom_weights_and_biases = nullptr) override; + /** * * @param error_derivative -- GitLab