diff --git a/src/LearningMethods/ParticleSwarm.cpp b/src/LearningMethods/ParticleSwarm.cpp index 62cedd36f640e835697056775091a80c5cb65637..5370623884c606fee265e4b955d5aeb93e7df019 100644 --- a/src/LearningMethods/ParticleSwarm.cpp +++ b/src/LearningMethods/ParticleSwarm.cpp @@ -5,6 +5,7 @@ * @date 2.7.18 - */ +#include <iostream> #include "ParticleSwarm.h" /** @@ -14,41 +15,63 @@ * @param domain_bounds * @param F */ -Particle::Particle(ErrorFunction* ef, double *domain_bounds) { - //TODO better generating of random numbers - this->coordinate_dim = ef->get_dimension(); - this->coordinate = new std::vector<double>(this->coordinate_dim); - this->velocity = new std::vector<double>(this->coordinate_dim); - this->optimal_coordinate = new std::vector<double>(this->coordinate_dim); +void Particle::randomize_coordinates() { std::random_device seeder; std::mt19937 gen(seeder()); - std::uniform_real_distribution<double> dist_vel(0.5, 1.0); + std::uniform_real_distribution<double> dist_coord(-1.0, 1.0); + this->domain_bounds = domain_bounds; for(unsigned int i = 0; i < this->coordinate_dim; ++i){ - (*this->velocity)[i] = dist_vel(gen); + (*this->coordinate)[i] = dist_coord(gen); } +} + +void Particle::randomize_parameters() { + std::random_device seeder; + std::mt19937 gen(seeder()); + std::uniform_real_distribution<double> dist_vel(0.5, 1.0); this->r1 = dist_vel(gen); this->r2 = dist_vel(gen); this->r3 = dist_vel(gen); +} +void Particle::randomize_velocity() { + std::random_device seeder; + std::mt19937 gen(seeder()); + std::uniform_real_distribution<double> dist_vel(0.5, 1.0); + for(unsigned int i = 0; i < this->coordinate_dim; ++i){ + (*this->velocity)[i] = dist_vel(gen); + } +} - - this->ef = ef; - std::uniform_real_distribution<double> dist_coord(-1.0, 1.0); +Particle::Particle(ErrorFunction* ef, double *domain_bounds) { + //TODO better generating of random numbers this->domain_bounds = domain_bounds; + this->coordinate_dim = ef->get_dimension(); + this->ef = ef; + + this->coordinate = new std::vector<double>(this->coordinate_dim); + this->velocity = new std::vector<double>(this->coordinate_dim); + this->optimal_coordinate = new std::vector<double>(this->coordinate_dim); + + + this->randomize_velocity(); + this->randomize_parameters(); + this->randomize_coordinates(); + + + for(unsigned int i = 0; i < this->coordinate_dim; ++i){ - (*this->coordinate)[i] = dist_coord(gen); (*this->optimal_coordinate)[i] = (*this->coordinate)[i]; } -// (*this->coordinate) = {-6.35706416, -1.55399893, -1.05305639, 3.07464411}; -// printf("coordinate_dim: %d\n", this->coordinate_dim); this->optimal_value = this->ef->eval(this->coordinate); -// this->print_coordinate(); + this->print_coordinate(); + } Particle::~Particle() { @@ -94,8 +117,6 @@ double Particle::change_coordinate(double w, double c1, double c2, std::vector<d double vel_mem; double output = 0.0; - bool in_domain; - double compensation_coef = 1; /* Choose random global minima */ std::vector<double> *random_global_best; @@ -107,27 +128,31 @@ double Particle::change_coordinate(double w, double c1, double c2, std::vector<d // TODO use std::sample to choose random vector //std::sample(global_min_vec.begin(), global_min_vec.end(), std::back_inserter(random_global_best), 1, std::mt19937{std::random_device{}()}); + for(unsigned int i = 0; i < this->coordinate_dim; ++i) { + vel_mem = w * (*this->velocity)[i] + + c1 * this->r1 * ((*this->optimal_coordinate)[i] - (*this->coordinate)[i]) + + c2 * this->r2 * (glob_min_coord[i] - (*this->coordinate)[i]); +// + (c1+c2)/2 * this->r3 * ((*random_global_best)[i] - (*this->coordinate)[i]); + + if ((*this->coordinate)[i] + vel_mem > this->domain_bounds[2 * i + 1]) { + this->randomize_velocity(); + this->randomize_parameters(); + this->randomize_coordinates(); + break; + } else if ((*this->coordinate)[i] + vel_mem < this->domain_bounds[2 * i]) { + this->randomize_velocity(); + this->randomize_parameters(); + this->randomize_coordinates(); + break; + } + } + for(unsigned int i = 0; i < this->coordinate_dim; ++i){ vel_mem = w * (*this->velocity)[i] + c1 * this->r1 * ((*this->optimal_coordinate)[i] - (*this->coordinate)[i]) + c2 * this->r2 * (glob_min_coord[i] - (*this->coordinate)[i]) + (c1+c2)/2 * this->r3 * ((*random_global_best)[i] - (*this->coordinate)[i]); - do{ -// printf("%f + %f ?? %f, %f\n", (*this->coordinate)[i], vel_mem, this->domain_bounds[2 * i + 1], this->domain_bounds[2 * i]); - if ((*this->coordinate)[i] + vel_mem > this->domain_bounds[2 * i + 1]) { - in_domain = false; - vel_mem = -penalty_coef * compensation_coef * w * vel_mem; - compensation_coef /= 2; - } else if ((*this->coordinate)[i] + vel_mem < this->domain_bounds[2 * i]) { - in_domain = false; - vel_mem = penalty_coef * compensation_coef * w * vel_mem; - compensation_coef /= 2; - } else { - in_domain = true; - compensation_coef = 1; - } - }while(!in_domain); (*this->velocity)[i] = vel_mem; (*this->coordinate)[i] += vel_mem; @@ -275,6 +300,7 @@ void ParticleSwarm::optimize( double gamma, double epsilon, double delta) { for(size_t pi=0; pi < this->n_particles; pi++) { particle = this->particle_swarm[pi]; tmp_velocity = particle->change_coordinate( this->w, this->c1, this->c2, *this->p_min_glob, global_best_vec); + particle->print_coordinate(); if(tmp_velocity > max_velocity) { prev_max_velocity = max_velocity; @@ -301,6 +327,7 @@ void ParticleSwarm::optimize( double gamma, double epsilon, double delta) { euclidean_dist /= this->n_particles; printf("Iteration %d, avg euclidean distance: %f, cluster percent: %f, f-value: %f\n", (int)outer_it, euclidean_dist, double(cluster.size())/this->n_particles, optimal_value); + std::cout.flush(); // for(unsigned int i=0; i < this->n_particles; i++) { // printf("Particle %d (%f): \n", i, this->particle_swarm[i]->get_current_value()); @@ -322,14 +349,14 @@ void ParticleSwarm::optimize( double gamma, double epsilon, double delta) { if(outer_it < this->iter_max) { /* Convergence reached */ - printf("Found optimum in %d iterations: %10.8f at coordinates: \n", (int)outer_it, optimal_value); + printf("\nFound optimum in %d iterations: %10.8f at coordinates: \n", (int)outer_it, optimal_value); for (size_t i = 0; i <= this->func_dim - 1; ++i) { printf("%10.8f \n", (*this->p_min_glob)[i]); } } else { /* Maximal number of iterations reached */ - printf("Max number of iterations reached (%d)! Found value %10.8f at coordinates: \n", (int)outer_it, optimal_value); + printf("\nMax number of iterations reached (%d)! Found value %10.8f at coordinates: \n", (int)outer_it, optimal_value); for (size_t i = 0; i <= this->func_dim - 1; ++i) { printf("\t%10.8f \n", (*this->p_min_glob)[i]); } diff --git a/src/LearningMethods/ParticleSwarm.h b/src/LearningMethods/ParticleSwarm.h index 65e058a54a13b2b176a63586ef8f8bca10a7d3b5..c3bdf0fe2a1f1e89e52de88e90ea4593cf529193 100644 --- a/src/LearningMethods/ParticleSwarm.h +++ b/src/LearningMethods/ParticleSwarm.h @@ -43,6 +43,11 @@ private: double *domain_bounds; + void randomize_coordinates(); + + void randomize_velocity(); + + void randomize_parameters(); public: diff --git a/src/NetConnection/ConnectionFunctionGeneral.cpp b/src/NetConnection/ConnectionFunctionGeneral.cpp index dcbb6e158bfea8de12af434ff84f38e1bcda6ffa..74093c7ebe3a38bd4d0986bd821cfb5cd26c20e0 100644 --- a/src/NetConnection/ConnectionFunctionGeneral.cpp +++ b/src/NetConnection/ConnectionFunctionGeneral.cpp @@ -13,24 +13,20 @@ ConnectionFunctionGeneral::ConnectionFunctionGeneral() { } -ConnectionFunctionGeneral::ConnectionFunctionGeneral(std::vector<double>* w_array, std::vector<unsigned int> ¶m_indices, std::string &function_string) { - this->param_indices = new std::vector<unsigned int>(param_indices); - this->weight_array = w_array; +ConnectionFunctionGeneral::ConnectionFunctionGeneral(std::vector<size_t > ¶m_indices, std::string &function_string) { + this->param_indices = param_indices; } ConnectionFunctionGeneral::~ConnectionFunctionGeneral() { - if(this->param_indices){ - delete this->param_indices; - this->param_indices = nullptr; - } + } -double ConnectionFunctionGeneral::eval() { +double ConnectionFunctionGeneral::eval( std::vector<double> ¶meter_space ) { //TODO return 0.0; } -void ConnectionFunctionGeneral::eval_partial_derivative(std::vector<double> &weight_gradient, double alpha) { +void ConnectionFunctionGeneral::eval_partial_derivative(std::vector<double> ¶meter_space, std::vector<double> &weight_gradient, double alpha) { //TODO } diff --git a/src/NetConnection/ConnectionFunctionGeneral.h b/src/NetConnection/ConnectionFunctionGeneral.h index 26761cc600801ceedab056988bbef2e024a6ab55..0c4afe0a5b63d7c80b873824baf299eec332c5d9 100644 --- a/src/NetConnection/ConnectionFunctionGeneral.h +++ b/src/NetConnection/ConnectionFunctionGeneral.h @@ -13,15 +13,11 @@ class ConnectionFunctionGeneral { protected: - /** - * - */ - std::vector<double> * weight_array = nullptr; /** * */ - std::vector<unsigned int> *param_indices = nullptr; + std::vector<size_t> param_indices; public: @@ -35,25 +31,25 @@ public: * @param param_count * @param f */ - ConnectionFunctionGeneral(std::vector<double>* w_array, std::vector<unsigned int> ¶m_indices, std::string &function_string); + ConnectionFunctionGeneral(std::vector<size_t> ¶m_indices, std::string &function_string); /** * */ - virtual ~ConnectionFunctionGeneral(); + virtual ~ConnectionFunctionGeneral( ); /** * * @return */ - virtual double eval( ); + virtual double eval( std::vector<double> ¶meter_space ); /** * Performs partial derivative of this transfer function according to all parameters. Adds the values multiplied * by alpha to the corresponding gradient vector */ - virtual void eval_partial_derivative(std::vector<double> &weight_gradient, double alpha); + virtual void eval_partial_derivative( std::vector<double> ¶meter_space, std::vector<double> &weight_gradient, double alpha ); }; diff --git a/src/NetConnection/ConnectionFunctionIdentity.cpp b/src/NetConnection/ConnectionFunctionIdentity.cpp index 20a1d2de604e2f213e9ae7bf3effddc95d199964..3716ea75dc8ff4beb89e11de357dc1df5339ccd6 100644 --- a/src/NetConnection/ConnectionFunctionIdentity.cpp +++ b/src/NetConnection/ConnectionFunctionIdentity.cpp @@ -7,15 +7,29 @@ #include "ConnectionFunctionIdentity.h" -ConnectionFunctionIdentity::ConnectionFunctionIdentity(double * val, size_t pidx) { - this->value = val; +ConnectionFunctionIdentity::ConnectionFunctionIdentity( ) { + this->is_unitary = true; +} + +ConnectionFunctionIdentity::ConnectionFunctionIdentity( size_t pidx ) { this->param_idx = pidx; + this->is_unitary = false; } -double ConnectionFunctionIdentity::eval() { - return (*this->value); +double ConnectionFunctionIdentity::eval( std::vector<double> ¶meter_space ) { + + if( this->is_unitary ){ + return 1.0; + } + + return parameter_space.at(this->param_idx); } -void ConnectionFunctionIdentity::eval_partial_derivative(std::vector<double> &weight_gradient, double alpha) { +void ConnectionFunctionIdentity::eval_partial_derivative(std::vector<double> ¶meter_space, std::vector<double> &weight_gradient, double alpha) { + + if( this->is_unitary ){ + return; + } + weight_gradient[this->param_idx] += alpha; } \ No newline at end of file diff --git a/src/NetConnection/ConnectionFunctionIdentity.h b/src/NetConnection/ConnectionFunctionIdentity.h index 4887ed65d099d2d1a1d6271e700b765ccca70f26..104fc58049a137f240ecabacb3b0d4a7bba6ec5b 100644 --- a/src/NetConnection/ConnectionFunctionIdentity.h +++ b/src/NetConnection/ConnectionFunctionIdentity.h @@ -17,27 +17,35 @@ class ConnectionFunctionGeneral; */ class ConnectionFunctionIdentity:public ConnectionFunctionGeneral { private: - double * value = nullptr; - size_t param_idx; + + size_t param_idx = 0; + + bool is_unitary = false; + public: /** * */ - ConnectionFunctionIdentity(double * value_ptr, size_t pidx); + ConnectionFunctionIdentity( ); + + /** + * + */ + ConnectionFunctionIdentity( size_t pidx ); /** * * @return */ - double eval() override; + double eval( std::vector<double> ¶meter_space ) override; /** * * @param weight_gradient * @param alpha */ - void eval_partial_derivative(std::vector<double> &weight_gradient, double alpha) override; + void eval_partial_derivative(std::vector<double> ¶meter_space, std::vector<double> &weight_gradient, double alpha) override; }; diff --git a/src/Network/NeuralNetwork.cpp b/src/Network/NeuralNetwork.cpp index 2b0558f2078a91029decfa60fa3bfc4412f0354e..99eb85fa96fae079fe05a4c63c1e500d363f0814 100644 --- a/src/Network/NeuralNetwork.cpp +++ b/src/Network/NeuralNetwork.cpp @@ -12,48 +12,25 @@ NeuralNetwork::NeuralNetwork() { this->neurons = new std::vector<Neuron*>(0); - this->neuron_biases = nullptr; + this->neuron_biases = new std::vector<double>(0); this->neuron_potentials = new std::vector<double>(0); + this->neuron_bias_indices = new std::vector<int>(0); - this->connection_weights = nullptr; + this->connection_weights =new std::vector<double>(0); this->connection_list = new std::vector<ConnectionFunctionGeneral*>(0); this->inward_adjacency = new std::vector<std::vector<std::pair<size_t, size_t>>*>(0); this->outward_adjacency = new std::vector<std::vector<std::pair<size_t, size_t>>*>(0); - this->neuron_layers_feedforward = new std::vector<std::vector<size_t>*>(0); this->neuron_layers_feedbackward = new std::vector<std::vector<size_t>*>(0); - this->delete_weights = false; - this->delete_biases = false; - this->layers_analyzed = false; - this->in_out_determined = false; - - this->last_used_bias_idx = 0; - this->last_used_weight_idx = 0; -} - -NeuralNetwork::NeuralNetwork(size_t n_connection_weights, size_t n_biases) { - - this->neurons = new std::vector<Neuron*>(0); - this->neuron_biases = new std::vector<double>(n_biases); - this->neuron_potentials = new std::vector<double>(0); - - this->connection_weights = new std::vector<double>(n_connection_weights); - this->connection_list = new std::vector<ConnectionFunctionGeneral*>(0); - this->inward_adjacency = new std::vector<std::vector<std::pair<size_t, size_t>>*>(0); - this->outward_adjacency = new std::vector<std::vector<std::pair<size_t, size_t>>*>(0); - - this->neuron_layers_feedforward = new std::vector<std::vector<size_t>*>(0); - this->neuron_layers_feedbackward = new std::vector<std::vector<size_t>*>(0); + this->input_neuron_indices = new std::vector<size_t>(0); + this->output_neuron_indices = new std::vector<size_t>(0); this->delete_weights = true; this->delete_biases = true; this->layers_analyzed = false; this->in_out_determined = false; - - this->last_used_bias_idx = 0; - this->last_used_weight_idx = 0; } NeuralNetwork::~NeuralNetwork() { @@ -72,6 +49,11 @@ NeuralNetwork::~NeuralNetwork() { this->neuron_potentials = nullptr; } + if(this->neuron_bias_indices){ + delete this->neuron_bias_indices; + this->neuron_bias_indices = nullptr; + } + if(this->output_neuron_indices){ delete this->output_neuron_indices; this->output_neuron_indices = nullptr; @@ -366,104 +348,52 @@ NeuralNetwork* NeuralNetwork::get_subnet(std::vector<size_t> &input_neuron_indic return output_net; } -size_t NeuralNetwork::add_neuron(Neuron *n, int bias_idx) { - - size_t local_b_idx = (size_t)bias_idx; +size_t NeuralNetwork::add_neuron(Neuron* n, BIAS_TYPE bt, size_t bias_idx) { - if(this->neuron_biases->size() <= local_b_idx){ - std::cerr << "Additional neuron cannot be added! The bias index is too large\n" << std::endl; - exit(-1); + if( bt == BIAS_TYPE::NO_BIAS ){ + this->neuron_bias_indices->push_back(-1); } - - this->outward_adjacency->push_back(new std::vector<std::pair<size_t, size_t>>(0)); - this->inward_adjacency->push_back(new std::vector<std::pair<size_t, size_t>>(0)); - - this->neurons->push_back(n); - n->set_bias( &(this->neuron_biases->at( local_b_idx )) ); - - this->in_out_determined = false; - this->layers_analyzed = false; - - this->n_neurons++; - this->neuron_potentials->resize(this->n_neurons); - - return this->n_neurons - 1; -} - -size_t NeuralNetwork::add_neuron(Neuron *n, BIAS_TYPE bt) { - - size_t local_b_idx = 0; - - if(bt == NOCHANGE){ - return this->add_neuron_no_bias( n ); + else if( bt == BIAS_TYPE::NEXT_BIAS ){ + this->neuron_bias_indices->push_back((int)this->neuron_biases->size()); + this->neuron_biases->resize(this->neuron_biases->size() + 1); } - - local_b_idx = this->last_used_bias_idx; - - if(this->neuron_biases->size() <= local_b_idx){ - std::cerr << "Additional neuron cannot be added! The bias index is too large\n" << std::endl; - exit(-1); + else if( bt == BIAS_TYPE::EXISTING_BIAS ){ + if( bias_idx >= this->neuron_biases->size()){ + std::cerr << "The supplied bias index is too large!\n" << std::endl; + } + this->neuron_bias_indices->push_back((int)bias_idx); } this->outward_adjacency->push_back(new std::vector<std::pair<size_t, size_t>>(0)); this->inward_adjacency->push_back(new std::vector<std::pair<size_t, size_t>>(0)); - this->neurons->push_back(n); - n->set_bias( &(this->neuron_biases->at( local_b_idx )) ); - - this->in_out_determined = false; - this->layers_analyzed = false; - - this->n_neurons++; - this->last_used_bias_idx++; - this->neuron_potentials->resize(this->n_neurons); - - return this->n_neurons - 1; -} - -size_t NeuralNetwork::add_neuron_no_bias(Neuron *n) { - - this->outward_adjacency->push_back(new std::vector<std::pair<size_t, size_t>>(0)); - this->inward_adjacency->push_back(new std::vector<std::pair<size_t, size_t>>(0)); - this->neurons->push_back(n); this->in_out_determined = false; this->layers_analyzed = false; - - this->n_neurons++; - this->neuron_potentials->resize(this->n_neurons); - - return this->n_neurons - 1; + return this->neurons->size() - 1; } -size_t NeuralNetwork::add_connection_simple(size_t n1_idx, size_t n2_idx) { - return add_connection_simple(n1_idx, n2_idx, -1); -} - -size_t NeuralNetwork::add_connection_simple(size_t n1_idx, size_t n2_idx, int weight_idx) { - return add_connection_simple(n1_idx, n2_idx, weight_idx, 1); -} +size_t NeuralNetwork::add_connection_simple( size_t n1_idx, size_t n2_idx, SIMPLE_CONNECTION_TYPE sct, size_t weight_idx ) { -size_t NeuralNetwork::add_connection_simple(size_t n1_idx, size_t n2_idx, int weight_idx, double weight_value) { - - size_t local_w_idx = 0; - if(weight_idx < 0){ - local_w_idx = this->last_used_weight_idx; + ConnectionFunctionIdentity *con_weight_u1u2; + if( sct == SIMPLE_CONNECTION_TYPE::UNITARY_WEIGHT ){ + con_weight_u1u2 = new ConnectionFunctionIdentity( ); } else{ - local_w_idx = (size_t)weight_idx; - } + if( sct == SIMPLE_CONNECTION_TYPE::NEXT_WEIGHT ){ + weight_idx = this->connection_weights->size(); + this->connection_weights->resize(weight_idx + 1); + } + else if( sct == SIMPLE_CONNECTION_TYPE::EXISTING_WEIGHT ){ + if( weight_idx >= this->connection_weights->size()){ + std::cerr << "The supplied connection weight index is too large!\n" << std::endl; + } + } - if(this->connection_weights->size() <= local_w_idx){ - std::cerr << "Additional connection cannot be added! Number of connections is saturated\n" << std::endl; - exit(-1); + con_weight_u1u2 = new ConnectionFunctionIdentity( weight_idx ); } - this->connection_weights->at(local_w_idx) = weight_value; - this->last_used_weight_idx++; - - ConnectionFunctionIdentity *con_weight_u1u2 = new ConnectionFunctionIdentity( &(this->connection_weights->at(local_w_idx)), local_w_idx ); size_t conn_idx = this->add_new_connection_to_list(con_weight_u1u2); this->add_outward_connection(n1_idx, n2_idx, conn_idx); @@ -515,25 +445,24 @@ void NeuralNetwork::set_parameter_space_pointers(NeuralNetwork &parent_network) } void NeuralNetwork::eval_single(std::vector<double> &input, std::vector<double> &output, std::vector<double> * custom_weights_and_biases) { - if(!this->in_out_determined && this->n_inputs * this->n_outputs <= 0){ + if(!this->in_out_determined || (this->input_neuron_indices->size() * this->output_neuron_indices->size()) <= 0){ std::cerr << "Input and output neurons have not been specified\n" << std::endl; exit(-1); } - if(this->n_inputs != input.size()){ + if(this->input_neuron_indices->size() != input.size()){ std::cerr << "Error, input size != Network input size\n" << std::endl; exit(-1); } - if(this->n_outputs != output.size()){ + if(this->output_neuron_indices->size() != output.size()){ std::cerr << "Error, output size != Network output size\n" << std::endl; exit(-1); } this->copy_parameter_space( custom_weights_and_biases ); - this->analyze_layer_structure(); /* reset of the output and the neuron potentials */ @@ -541,92 +470,92 @@ void NeuralNetwork::eval_single(std::vector<double> &input, std::vector<double> std::fill(this->neuron_potentials->begin(), this->neuron_potentials->end(), 0.0); /* set the potentials of the input neurons */ - for(unsigned int i = 0; i < this->n_inputs; ++i){ + for(size_t i = 0; i < this->input_neuron_indices->size(); ++i){ this->neuron_potentials->at( this->input_neuron_indices->at(i) ) = input[ i ]; } - /* we iterate through all the feedforward layers and transfer the signals */ - double pot; + /* we iterate through all the feed-forward layers and transfer the signals */ + double potential, bias; + int bias_idx; for( auto layer: *this->neuron_layers_feedforward){ /* we iterate through all neurons in this layer and propagate the signal to the neighboring neurons */ for( auto si: *layer ){ - pot = this->neurons->at(si)->activate(this->neuron_potentials->at( si )); + bias = 0.0; + bias_idx = this->neuron_bias_indices->at( si ); + if( bias_idx >= 0 ){ + bias = this->neuron_biases->at( bias_idx ); + } + potential = this->neurons->at(si)->activate(this->neuron_potentials->at( si ), bias); for(auto c: *this->outward_adjacency->at( si )){ size_t ti = c.first; size_t ci = c.second; - this->neuron_potentials->at( ti ) += this->connection_list->at( ci )->eval() * pot; + this->neuron_potentials->at( ti ) += this->connection_list->at( ci )->eval( *this->connection_weights ) * potential; } } } unsigned int i = 0; for(auto oi: *this->output_neuron_indices){ - output[i] = this->neurons->at( oi )->activate( this->neuron_potentials->at( oi ) ); + bias = 0.0; + bias_idx = this->neuron_bias_indices->at( oi ); + if( bias_idx >= 0 ){ + bias = this->neuron_biases->at( bias_idx ); + } + output[i] = this->neurons->at( oi )->activate( this->neuron_potentials->at( oi ), bias ); ++i; } } void NeuralNetwork::randomize_weights( ) { - if( this->last_used_weight_idx <= 0 ){ - - return; - } - boost::random::mt19937 gen; // Init weight guess ("optimal" for logistic activation functions) - double r = 4 * sqrt(6./(this->last_used_weight_idx)); + double r = 4 * sqrt(6./(this->connection_weights->size())); boost::random::uniform_real_distribution<> dist(-r, r); - for(size_t i = 0; i < this->last_used_weight_idx; i++) { + for(size_t i = 0; i < this->connection_weights->size(); i++) { this->connection_weights->at(i) = dist(gen); } } void NeuralNetwork::randomize_biases( ) { - if( this->last_used_bias_idx <= 0 ){ - return; - } - boost::random::mt19937 gen; // Init weight guess ("optimal" for logistic activation functions) boost::random::uniform_real_distribution<> dist(-1, 1); - for(size_t i = 0; i < this->last_used_bias_idx; i++) { + for(size_t i = 0; i < this->neuron_biases->size(); i++) { this->neuron_biases->at(i) = dist(gen); } } size_t NeuralNetwork::get_n_inputs() { - return this->n_inputs; + return this->input_neuron_indices->size(); } size_t NeuralNetwork::get_n_outputs() { - return this->n_outputs; + return this->output_neuron_indices->size(); } size_t NeuralNetwork::get_n_weights() { - if(!this->connection_weights){ - return 0; - } return this->connection_weights->size(); } size_t NeuralNetwork::get_n_biases() { - if(!this->neuron_biases){ - return 0; - } return this->neuron_biases->size(); } +size_t NeuralNetwork::get_neuron_bias_index(size_t neuron_idx) { + return this->neuron_biases->at( neuron_idx ); +} + size_t NeuralNetwork::get_n_neurons() { - return this->n_neurons; + return this->input_neuron_indices->size(); } void NeuralNetwork::specify_input_neurons(std::vector<size_t> &input_neurons_indices) { @@ -637,7 +566,6 @@ void NeuralNetwork::specify_input_neurons(std::vector<size_t> &input_neurons_ind delete this->input_neuron_indices; this->input_neuron_indices = new std::vector<size_t>(input_neurons_indices); } - this->n_inputs = input_neurons_indices.size(); } void NeuralNetwork::specify_output_neurons(std::vector<size_t> &output_neurons_indices) { @@ -648,7 +576,6 @@ void NeuralNetwork::specify_output_neurons(std::vector<size_t> &output_neurons_i delete this->output_neuron_indices; this->output_neuron_indices = new std::vector<size_t>(output_neurons_indices); } - this->n_outputs = output_neurons_indices.size(); } void NeuralNetwork::print_weights() { @@ -664,7 +591,7 @@ void NeuralNetwork::print_weights() { } void NeuralNetwork::print_stats(){ - printf("Number of neurons: %d, number of active weights: %d, number of active biases: %d\n", (int)this->neurons->size(), (int)this->last_used_weight_idx, (int)this->last_used_bias_idx); + printf("Number of neurons: %d, number of active weights: %d, number of active biases: %d\n", (int)this->neurons->size(), (int)this->connection_weights->size(), (int)this->neuron_biases->size()); } std::vector<double>* NeuralNetwork::get_parameter_ptr_biases() { @@ -701,6 +628,9 @@ void NeuralNetwork::analyze_layer_structure() { return; } + /* buffer preparation */ + this->neuron_potentials->resize(this->get_n_neurons()); + /* space allocation */ if(this->neuron_layers_feedforward){ for(auto e: *this->neuron_layers_feedforward){ @@ -751,9 +681,9 @@ void NeuralNetwork::analyze_layer_structure() { size_t idx1 = 0, idx2 = 1; - active_set_size[0] = this->n_inputs; + active_set_size[0] = this->get_n_inputs(); size_t i = 0; - for(i = 0; i < this->n_inputs; ++i){ + for(i = 0; i < this->get_n_inputs(); ++i){ active_eval_set[i] = this->input_neuron_indices->at(i); } @@ -797,8 +727,8 @@ void NeuralNetwork::analyze_layer_structure() { idx1 = 0; idx2 = 1; - active_set_size[0] = this->n_outputs; - for(i = 0; i < this->n_outputs; ++i){ + active_set_size[0] = this->get_n_outputs(); + for(i = 0; i < this->get_n_outputs(); ++i){ active_eval_set[i] = this->output_neuron_indices->at(i); } diff --git a/src/Network/NeuralNetwork.h b/src/Network/NeuralNetwork.h index dca2d9b760f3b715e144dc04f41d660b55affea1..fb9918f0836907856d608affda2cf50d0d7f9ca3 100644 --- a/src/Network/NeuralNetwork.h +++ b/src/Network/NeuralNetwork.h @@ -21,7 +21,9 @@ enum NET_TYPE{GENERAL}; -enum BIAS_TYPE{NEXT, NOCHANGE}; +enum BIAS_TYPE{NEXT_BIAS, NO_BIAS, EXISTING_BIAS}; + +enum SIMPLE_CONNECTION_TYPE{NEXT_WEIGHT, UNITARY_WEIGHT, EXISTING_WEIGHT}; /** @@ -59,6 +61,11 @@ private: */ std::vector<double>* neuron_biases = nullptr; + /** + * + */ + std::vector<int>* neuron_bias_indices = nullptr; + /** * */ @@ -89,31 +96,6 @@ private: */ std::vector<std::vector<size_t>*> *neuron_layers_feedbackward = nullptr; - /** - * - */ - size_t n_inputs = 0; - - /** - * - */ - size_t n_outputs = 0; - - /** - * - */ - size_t last_used_weight_idx = 0; - - /** - * - */ - size_t last_used_bias_idx = 0; - - /** - * - */ - size_t n_neurons = 0; - /** * */ @@ -171,12 +153,6 @@ public: */ NeuralNetwork(); - /** - * - */ - NeuralNetwork(size_t n_connection_weights, size_t n_neurons); - - /** * */ @@ -220,22 +196,7 @@ public: * @param[in] n * @return */ - size_t add_neuron(Neuron* n, int bias_idx ); - - /** - * Adds a new neuron to the list of neurons. Also assigns a valid bias value to its activation function - * @param[in] n - * @return - */ - size_t add_neuron(Neuron* n, BIAS_TYPE bt = NOCHANGE ); - - /** - * Adds a new neuron to this network, does not touch its bias. - * @param n - * @return - */ - //TODO reformulate to use add_neuron(, BIAS_TYPE) - size_t add_neuron_no_bias(Neuron *n); + size_t add_neuron(Neuron* n, BIAS_TYPE bt = NEXT_BIAS, size_t bias_idx = 0); /** * @@ -243,26 +204,7 @@ public: * @param n2_idx * @return */ - size_t add_connection_simple(size_t n1_idx, size_t n2_idx); - - /** - * - * @param n1_idx - * @param n2_idx - * @param weight_idx - * @return - */ - size_t add_connection_simple(size_t n1_idx, size_t n2_idx, int weight_idx); - - /** - * - * @param n1_idx - * @param n2_idx - * @param weight_idx - * @param weight_value - * @return - */ - size_t add_connection_simple(size_t n1_idx, size_t n2_idx, int weight_idx, double weight_value); + size_t add_connection_simple(size_t n1_idx, size_t n2_idx, SIMPLE_CONNECTION_TYPE sct = NEXT_WEIGHT, size_t weight_idx = 0 ); /** * Take the existing connection with index 'connection_idx' in 'parent_network' and adds it to the structure of this @@ -309,6 +251,12 @@ public: */ virtual size_t get_n_biases(); + /** + * + * @return + */ + virtual size_t get_neuron_bias_index( size_t neuron_idx ); + /** * * @return diff --git a/src/Neuron/Neuron.cpp b/src/Neuron/Neuron.cpp index c905e2907e50eb776c28c887cae9242791fd7b9f..fc7dc78db4bda1326fce329aab8add4005a54da5 100644 --- a/src/Neuron/Neuron.cpp +++ b/src/Neuron/Neuron.cpp @@ -4,11 +4,6 @@ Neuron::~Neuron() { } - -void Neuron::set_bias(double *b) { - this->bias = b; -} - //template<class Archive> //void Neuron::serialize(Archive & ar, const unsigned int version) { // ar << this->potential; diff --git a/src/Neuron/Neuron.h b/src/Neuron/Neuron.h index 91540ac39d482c0c00d936dd22fbfea5eac2b4e3..6e098a85858a5e7ef54fc4a497fb5c39c51b37ec 100644 --- a/src/Neuron/Neuron.h +++ b/src/Neuron/Neuron.h @@ -22,10 +22,6 @@ class Neuron { friend class boost::serialization::access; protected: - /** - * - */ - double * bias; template<class Archive> void serialize(Archive & ar, const unsigned int version){ @@ -50,16 +46,7 @@ public: /** * Performs the activation function and returns the result */ - virtual double activate( double x ) = 0; - - /** - * - * @param b - */ - virtual void set_bias( double * b = nullptr ); - - - + virtual double activate( double x, double b ) = 0; }; /* end of Neuron class */ @@ -74,14 +61,14 @@ class IDifferentiable { * Calculates the derivative with respect to the argument, ie the 'potential' * @return f'(x), where 'f(x)' is the activation function and 'x' = 'potential' */ - virtual double activation_function_eval_derivative( double x ) = 0; + virtual double activation_function_eval_derivative( double x, double b ) = 0; /** * Calculates the derivative with respect to the bias * @return d/db f'(x), where 'f(x)' is the activation function, 'x' is the 'potential' * and 'b' is the bias */ - virtual double activation_function_eval_derivative_bias( double x ) = 0; + virtual double activation_function_eval_derivative_bias( double x, double b ) = 0; /** * Returns a Neuron pointer object with activation function being the partial derivative of diff --git a/src/Neuron/NeuronBinary.cpp b/src/Neuron/NeuronBinary.cpp index faad77d5a8e1c4524cbef168c40cb592b49f6743..4ccb178bcf0d0c81945c83c16f71dfd5a207d935 100644 --- a/src/Neuron/NeuronBinary.cpp +++ b/src/Neuron/NeuronBinary.cpp @@ -4,18 +4,11 @@ #include "NeuronBinary.h" -NeuronBinary::NeuronBinary( double * b ) { - - this->bias = b; +NeuronBinary::NeuronBinary( ) { } -double NeuronBinary::activate( double x ) { - - double b = 0.0; - if( this->bias ){ - b = *this->bias; - } +double NeuronBinary::activate( double x, double b ) { if(x >= b){ return 1.0; diff --git a/src/Neuron/NeuronBinary.h b/src/Neuron/NeuronBinary.h index 5c33c05ec1532f581b6e986aa4d330b5275f33b1..9e2d116160ad0b97ba8230f4333e4e907523d303 100644 --- a/src/Neuron/NeuronBinary.h +++ b/src/Neuron/NeuronBinary.h @@ -32,12 +32,12 @@ public: * @param[in] threshold Denotes when the neuron is activated * When neuron potential exceeds 'threshold' value it becomes excited */ - explicit NeuronBinary( double * b = nullptr ); + explicit NeuronBinary( ); /** * Performs the activation function and stores the result into the 'state' property */ - double activate( double x ) override; + double activate( double x, double b ) override; }; diff --git a/src/Neuron/NeuronConstant.cpp b/src/Neuron/NeuronConstant.cpp index 1fcacde80cfe059ad04ede291fffb815cd312e74..a09fe8c561453229b0b3129cac431b8e3dc883c1 100644 --- a/src/Neuron/NeuronConstant.cpp +++ b/src/Neuron/NeuronConstant.cpp @@ -13,15 +13,15 @@ NeuronConstant::NeuronConstant( double c ) { } -double NeuronConstant::activate( double x ) { +double NeuronConstant::activate( double x, double b ) { return this->p; } -double NeuronConstant::activation_function_eval_derivative_bias( double x ) { +double NeuronConstant::activation_function_eval_derivative_bias( double x, double b ) { return 0.0; } -double NeuronConstant::activation_function_eval_derivative( double x ) { +double NeuronConstant::activation_function_eval_derivative( double x, double b ) { return 0.0; } diff --git a/src/Neuron/NeuronConstant.h b/src/Neuron/NeuronConstant.h index 8da88c2c5dd97b2b4dc9645f4d240add780a09af..f87d5a608d18671661b9be6b156aa7fd5032cdca 100644 --- a/src/Neuron/NeuronConstant.h +++ b/src/Neuron/NeuronConstant.h @@ -33,7 +33,7 @@ public: /** * Evaluates and returns 'c' */ - double activate( double x ) override; + double activate( double x, double b ) override; /** * Calculates the partial derivative of the activation function @@ -41,13 +41,13 @@ public: * @return Partial derivative of the activation function according to the * 'bias' parameter. Returns 0.0 */ - double activation_function_eval_derivative_bias( double x ) override; + double activation_function_eval_derivative_bias( double x, double b ) override; /** * Calculates d/dx of (c) at point x * @return 0.0 */ - double activation_function_eval_derivative( double x ) override; + double activation_function_eval_derivative( double x, double b ) override; /** * Returns a pointer to a Neuron with derivative as its activation function diff --git a/src/Neuron/NeuronLinear.cpp b/src/Neuron/NeuronLinear.cpp index f83bc12e0ab3be80c44fe20ca54c1fec99589934..71ad80fd42825abd1be54d6433653c4f4a6ba9ce 100644 --- a/src/Neuron/NeuronLinear.cpp +++ b/src/Neuron/NeuronLinear.cpp @@ -6,26 +6,21 @@ -NeuronLinear::NeuronLinear( double * b ) { +NeuronLinear::NeuronLinear( ) { - this->bias = b; } -double NeuronLinear::activate( double x ) { - double b = 0.0; - if( this->bias ){ - b = *this->bias; - } +double NeuronLinear::activate( double x, double b ) { return x + b; } -double NeuronLinear::activation_function_eval_derivative_bias( double x ) { +double NeuronLinear::activation_function_eval_derivative_bias( double x, double b ) { return 1.0; } -double NeuronLinear::activation_function_eval_derivative( double x ) { +double NeuronLinear::activation_function_eval_derivative( double x, double b ) { return 1.0; } diff --git a/src/Neuron/NeuronLinear.h b/src/Neuron/NeuronLinear.h index 40289aa6ca9301384348e3330b53070d6fce0775..bd5d8c42f26bce108eba26c39fedd690d9e58c1a 100644 --- a/src/Neuron/NeuronLinear.h +++ b/src/Neuron/NeuronLinear.h @@ -36,12 +36,12 @@ public: * f(x) = x + b * @param[in] b Bias */ - explicit NeuronLinear( double * b = nullptr ); + explicit NeuronLinear( ); /** * Evaluates 'x + b' and stores the result into the 'state' property */ - double activate( double x ) override; + double activate( double x, double b ) override; /** * Calculates the partial derivative of the activation function @@ -49,13 +49,13 @@ public: * @return Partial derivative of the activation function according to the * 'bias' parameter. Returns 1.0 */ - double activation_function_eval_derivative_bias( double x ) override; + double activation_function_eval_derivative_bias( double x, double b ) override; /** * Calculates d/dx of (x + b) at point x * @return 1.0 */ - double activation_function_eval_derivative( double x ) override; + double activation_function_eval_derivative( double x, double b ) override; /** * Returns a pointer to a Neuron with derivative as its activation function diff --git a/src/Neuron/NeuronLogistic.cpp b/src/Neuron/NeuronLogistic.cpp index 7dbd9d71c05e828c8d45e763026647f1db88722c..7d8075bc3298fb8fabe387e99e80479d28f6e381 100644 --- a/src/Neuron/NeuronLogistic.cpp +++ b/src/Neuron/NeuronLogistic.cpp @@ -5,39 +5,34 @@ #include "NeuronLogistic.h" -NeuronLogistic_d2::NeuronLogistic_d2( double * b ) { - this->bias = b; +NeuronLogistic_d2::NeuronLogistic_d2( ) { + } -double NeuronLogistic_d2::activate( double x ) { +double NeuronLogistic_d2::activate( double x, double b ) { //(e^(b + x) (e^b - e^x))/(e^b + e^x)^3 - double b = 0.0; - if( this->bias ){ - b = *this->bias; - } double ex = std::pow(E, x); double eb = std::pow(E, b); + double denom = (eb + ex); - return (eb*ex*(eb - ex))/((eb + ex)*(eb + ex)*(eb + ex)); + return (eb*ex*(eb - ex))/(denom*denom*denom); } -double NeuronLogistic_d2::activation_function_eval_derivative_bias( double x ) { +double NeuronLogistic_d2::activation_function_eval_derivative_bias( double x, double b ) { //-(e^(b + x) (-4 e^(b + x) + e^(2 b) + e^(2 x)))/(e^b + e^x)^4 - double b = 0.0; - if( this->bias ){ - b = *this->bias; - } double eb = std::pow(E, b); double ex = std::pow(E, x); + double ebex = eb * ex; + double denom = (eb + ex); - return -(eb*ex*(-4*eb*ex + eb*eb +ex*ex))/((eb + ex)*(eb + ex)*(eb + ex)*(eb + ex)); + return -(ebex*(-4*ebex + eb*eb +ex*ex))/(denom*denom*denom*denom); } -double NeuronLogistic_d2::activation_function_eval_derivative( double x ) { +double NeuronLogistic_d2::activation_function_eval_derivative( double x, double b ) { //(e^(b + x) (-4 e^(b + x) + e^(2 b) + e^(2 x)))/(e^b + e^x)^4 - return -this->activation_function_eval_derivative_bias( x ); + return -this->activation_function_eval_derivative_bias( x, b ); } NeuronLogistic* NeuronLogistic_d2::get_derivative() { @@ -45,88 +40,76 @@ NeuronLogistic* NeuronLogistic_d2::get_derivative() { return nullptr; } -NeuronLogistic_d1::NeuronLogistic_d1( double * b ) { - this->bias = b; +NeuronLogistic_d1::NeuronLogistic_d1( ) { + } -double NeuronLogistic_d1::activate( double x ) { +double NeuronLogistic_d1::activate( double x, double b ) { //e^(b - x)/(e^(b - x) + 1)^2 - double b = 0.0; - if( this->bias ){ - b = *this->bias; - } + double ex = std::pow(E, x); double eb = std::pow(E, b); double d = (eb/ex); + double denom = (d + 1); - return d/((d + 1)*(d + 1)); + return d/(denom*denom); } -double NeuronLogistic_d1::activation_function_eval_derivative_bias( double x ) { +double NeuronLogistic_d1::activation_function_eval_derivative_bias( double x, double b ) { //(e^(b + x) (e^x - e^b))/(e^b + e^x)^3 - double b = 0.0; - if( this->bias ){ - b = *this->bias; - } double ex = std::pow(E, x); double eb = std::pow(E, b); + double denom = (eb + ex); - return (eb*ex* (ex - eb))/((eb + ex)*(eb + ex)*(eb + ex)); + return (eb*ex* (ex - eb))/(denom*denom*denom); } -double NeuronLogistic_d1::activation_function_eval_derivative( double x ) { +double NeuronLogistic_d1::activation_function_eval_derivative( double x, double b ) { //(e^(b + x) (e^b - e^x))/(e^b + e^x)^3 - return -this->activation_function_eval_derivative_bias( x ); + return -this->activation_function_eval_derivative_bias( x, b ); } NeuronLogistic* NeuronLogistic_d1::get_derivative( ) { //(e^(b + x) (e^b - e^x))/(e^b + e^x)^3 NeuronLogistic_d2* output = nullptr; - output = new NeuronLogistic_d2( this->bias ); + output = new NeuronLogistic_d2( ); return output; } -NeuronLogistic::NeuronLogistic( double * b ) { - this->bias = b; +NeuronLogistic::NeuronLogistic( ) { + } -double NeuronLogistic::activate( double x ) { +double NeuronLogistic::activate( double x, double b ) { //(1 + e^(-x + b))^(-1) - double b = 0.0; - if( this->bias ){ - b = *this->bias; - } double ex = std::pow(E, b - x); - return std::pow(1.0 + ex, -1); + return 1.0 / (1.0 + ex); } -double NeuronLogistic::activation_function_eval_derivative_bias( double x ) { +double NeuronLogistic::activation_function_eval_derivative_bias( double x, double b ) { //-e^(b - x)/(e^(b - x) + 1)^2 - double b = 0.0; - if( this->bias ){ - b = *this->bias; - } double ex = std::pow(E, b - x); + double denom = (ex + 1); - return -ex/((ex + 1)*(ex + 1)); + return -ex/(denom*denom); } -double NeuronLogistic::activation_function_eval_derivative( double x ) { +double NeuronLogistic::activation_function_eval_derivative( double x, double b ) { //e^(b - x)/(e^(b - x) + 1)^2 - return -this->activation_function_eval_derivative_bias( x ); + return -this->activation_function_eval_derivative_bias( x, b ); } NeuronLogistic* NeuronLogistic::get_derivative( ) { NeuronLogistic_d1 *output = nullptr; - output = new NeuronLogistic_d1( this->bias ); + output = new NeuronLogistic_d1( ); return output; diff --git a/src/Neuron/NeuronLogistic.h b/src/Neuron/NeuronLogistic.h index af66b545e3f693cfec1541d537e57aa623abf26d..9eaa3e2bda01667c983f022aad3a35323f2e33fb 100644 --- a/src/Neuron/NeuronLogistic.h +++ b/src/Neuron/NeuronLogistic.h @@ -30,12 +30,12 @@ public: * Constructs the object of the Logistic neuron with activation function * f(x) = (1 + e^(-x + b))^(-1) */ - explicit NeuronLogistic( double * b = nullptr ); + explicit NeuronLogistic( ); /** * Evaluates '(1 + e^(-x + b))^(-1)' and stores the result into the 'state' property */ - virtual double activate( double x ) override; + virtual double activate( double x, double b ) override; /** * Calculates the partial derivative of the activation function @@ -43,12 +43,12 @@ public: * @return Partial derivative of the activation function according to the * bias, returns: -e^(b - x)/(e^(b - x) + 1)^2 */ - virtual double activation_function_eval_derivative_bias( double x ) override; + virtual double activation_function_eval_derivative_bias( double x, double b ) override; /** * Calculates d/dx of (1 + e^(-x + b))^(-1) * @return e^(b - x)/(e^(b - x) + 1)^2 */ - virtual double activation_function_eval_derivative( double x ) override; + virtual double activation_function_eval_derivative( double x, double b ) override; /** * Returns a pointer to a Neuron with derivative as its activation function @@ -73,12 +73,12 @@ public: * f(x) = e^(b - x)/(e^(b - x) + 1)^2 * @param[in] b Bias */ - explicit NeuronLogistic_d1( double * b = nullptr ); + explicit NeuronLogistic_d1( ); /** * Evaluates 'e^(b - x)/(e^(b - x) + 1)^2' and returns the result */ - virtual double activate( double x ) override; + virtual double activate( double x, double b ) override; /** * Calculates the partial derivative of the activation function @@ -86,13 +86,13 @@ public: * @return Partial derivative of the activation function according to the * bias, returns: (e^(b + x) (e^x - e^b))/(e^b + e^x)^3 */ - virtual double activation_function_eval_derivative_bias( double x ) override; + virtual double activation_function_eval_derivative_bias( double x, double b ) override; /** * Calculates d/dx of e^(b - x)*(1 + e^(b - x))^(-2) * @return (e^(b + x) (e^b - e^x))/(e^b + e^x)^3 */ - virtual double activation_function_eval_derivative( double x ) override; + virtual double activation_function_eval_derivative( double x, double b ) override; /** * Returns a pointer to a Neuron with derivative as its activation function @@ -119,12 +119,12 @@ public: * Constructs the object of the Logistic neuron with activation function * f(x) = (e^(b + x) (e^b - e^x))/(e^b + e^x)^3 */ - explicit NeuronLogistic_d2( double * b = nullptr ); + explicit NeuronLogistic_d2( ); /** * Evaluates '(e^(b + x) (e^b - e^x))/(e^b + e^x)^3' and returns the result */ - virtual double activate( double x ) override; + virtual double activate( double x, double b ) override; /** * Calculates the partial derivative of the activation function @@ -132,13 +132,13 @@ public: * @return Partial derivative of the activation function according to the * bias, returns: -(e^(b + x) (-4 e^(b + x) + e^(2 b) + e^(2 x)))/(e^b + e^x)^4 */ - virtual double activation_function_eval_derivative_bias( double x ) override; + virtual double activation_function_eval_derivative_bias( double x, double b ) override; /** * Calculates d/dx of (e^(b + x) (e^b - e^x))/(e^b + e^x)^3 * @return (e^(b + x) (-4 e^(b + x) + e^(2 b) + e^(2 x)))/(e^b + e^x)^4 */ - virtual double activation_function_eval_derivative( double x ) override; + virtual double activation_function_eval_derivative( double x, double b ) override; /** * diff --git a/src/Solvers/DESolver.cpp b/src/Solvers/DESolver.cpp index 562c27755bc4dea4367646113db79e8c39bc8892..e4a2d75fa971583ef0a1797577290779cd07f5fb 100644 --- a/src/Solvers/DESolver.cpp +++ b/src/Solvers/DESolver.cpp @@ -72,7 +72,7 @@ DESolver::DESolver( size_t n_equations, size_t n_inputs, size_t m ) { this->dim_inn= m; this->n_equations = n_equations; - this->solution = new NeuralNetwork( (n_inputs + 1) * m, m ); + this->solution = new NeuralNetwork( ); this->solution_inner_neurons = new std::vector<NeuronLogistic*>(0); this->solution_inner_neurons->reserve( m ); @@ -82,7 +82,7 @@ DESolver::DESolver( size_t n_equations, size_t n_inputs, size_t m ) { size_t idx; for( size_t i = 0; i < this->dim_i; ++i ){ NeuronLinear *input_i = new NeuronLinear( ); //f(x) = x - idx = this->solution->add_neuron_no_bias( input_i ); + idx = this->solution->add_neuron( input_i, BIAS_TYPE::NO_BIAS ); input_set[i] = idx; } this->solution->specify_input_neurons( input_set ); @@ -90,7 +90,7 @@ DESolver::DESolver( size_t n_equations, size_t n_inputs, size_t m ) { /* output neuron */ std::vector<size_t> output_set( 1 ); - idx = this->solution->add_neuron_no_bias( new NeuronLinear( ) );//f(x) = x + idx = this->solution->add_neuron( new NeuronLinear( ), BIAS_TYPE::NO_BIAS );//f(x) = x output_set[0] = idx; this->solution->specify_output_neurons( output_set ); size_t first_output_neuron = idx; @@ -100,7 +100,7 @@ DESolver::DESolver( size_t n_equations, size_t n_inputs, size_t m ) { for(size_t i = 0; i < this->dim_inn; ++i){ NeuronLogistic *inner_i = new NeuronLogistic( ); //f(x) = 1.0 / (1.0 + e^(-x)) this->solution_inner_neurons->push_back( inner_i ); - idx = this->solution->add_neuron( inner_i ); + idx = this->solution->add_neuron( inner_i, BIAS_TYPE::NEXT_BIAS ); if(i == 0){ first_inner_neuron = idx; @@ -112,14 +112,14 @@ DESolver::DESolver( size_t n_equations, size_t n_inputs, size_t m ) { size_t weight_idx; for(size_t i = 0; i < this->dim_i; ++i){ for(size_t j = 0; j < this->dim_inn; ++j){ - weight_idx = this->solution->add_connection_simple(first_input_neuron + i, first_inner_neuron + j); + weight_idx = this->solution->add_connection_simple(first_input_neuron + i, first_inner_neuron + j, SIMPLE_CONNECTION_TYPE::NEXT_WEIGHT ); printf(" adding a connection between input neuron %2d[%2d] and inner neuron %2d[%2d], weight index %3d\n", (int)i, (int)(first_input_neuron + i), (int)j, (int)(first_inner_neuron + j), (int)weight_idx); } } /* connections between inner neurons and output neurons */ for(size_t i = 0; i < this->dim_inn; ++i){ - weight_idx = this->solution->add_connection_simple(first_inner_neuron + i, first_output_neuron); + weight_idx = this->solution->add_connection_simple(first_inner_neuron + i, first_output_neuron, SIMPLE_CONNECTION_TYPE::NEXT_WEIGHT ); printf(" adding a connection between inner neuron %2d[%2d] and output neuron %2d[%2d], weight index %3d\n", (int)i, (int)(first_inner_neuron + i), 0, (int)(first_output_neuron ), (int)weight_idx); } @@ -221,7 +221,7 @@ void DESolver::add_to_differential_equation( size_t equation_idx, MultiIndex &al size_t idx; for( size_t i = 0; i < this->dim_i; ++i ){ NeuronLinear *input_i = new NeuronLinear( ); //f(x) = x - idx = new_net->add_neuron_no_bias( input_i ); + idx = new_net->add_neuron( input_i, BIAS_TYPE::NO_BIAS ); input_set[i] = idx; } new_net->specify_input_neurons( input_set ); @@ -230,7 +230,7 @@ void DESolver::add_to_differential_equation( size_t equation_idx, MultiIndex &al /* output neurons */ std::vector<size_t> output_set( 1 ); - idx = new_net->add_neuron_no_bias( new NeuronLinear( ) );//f(x) = x + idx = new_net->add_neuron( new NeuronLinear( ), BIAS_TYPE::NO_BIAS );//f(x) = x output_set[0] = idx; new_net->specify_output_neurons( output_set ); size_t first_output_neuron = idx; @@ -252,7 +252,7 @@ void DESolver::add_to_differential_equation( size_t equation_idx, MultiIndex &al } } - idx = new_net->add_neuron( n_ptr ); + idx = new_net->add_neuron( n_ptr, BIAS_TYPE::EXISTING_BIAS, this->solution->get_neuron_bias_index( i + this->dim_i + 1 ) ); if(i == 0){ first_inner_neuron = idx; @@ -262,7 +262,7 @@ void DESolver::add_to_differential_equation( size_t equation_idx, MultiIndex &al /* identity neurons serving as a 'glue'*/ size_t first_glue_neuron = idx + 1; for(size_t i = 0; i < derivative_degree * this->dim_inn; ++i){ - idx = new_net->add_neuron_no_bias( new NeuronLinear( ) ); //f(x) = x + idx = new_net->add_neuron( new NeuronLinear( ), BIAS_TYPE::NO_BIAS ); //f(x) = x } /* connections between input neurons and inner neurons */ @@ -318,6 +318,7 @@ void DESolver::set_error_function(size_t equation_idx, ErrorFunctionType F, Data this->errors_functions_data_sets->at( equation_idx ) = conditions; } +//TODO instead use general method with Optimizer as its argument (create hierarchy of optimizers) void DESolver::solve_via_particle_swarm(double *domain_bounds, double c1, double c2, double w, size_t n_particles, size_t max_iters, double gamma, double epsilon, double delta) { diff --git a/src/examples/net_test_ode_1.cpp b/src/examples/net_test_ode_1.cpp index 18167337f00828a0ba16b1acb6b9ab953bf97103..f458e54eb8323e3fd7f0298f43d74ee43003e0d3 100644 --- a/src/examples/net_test_ode_1.cpp +++ b/src/examples/net_test_ode_1.cpp @@ -9,13 +9,9 @@ * Analytical solution: e^(-2x) * (3x + 1) * NN representation: sum over [a_i * (1 + e^(-x * w_i + b_i))^(-1)] * ------------------------------------------- - * Optimal NN setting without biases (2 inner neurons) - * Path 1. w = -6.35706416, b = 0.00000000, a = -1.05305639 - * Path 2. w = -1.55399893, b = 0.00000000, a = 3.07464411 - * ------------------------------------------- * Optimal NN setting with biases (2 inner neurons) - * Path 1. w = 6.75296220, b = -1.63419516, a = 1.71242130 - * Path 2. w = 1.86917877, b = 1.09972747, a = -1.70757578 + * Path 1. w = -1.66009975, b = -0.40767447, a = 2.46457042 + * Path 2. w = -4.38622765, b = 2.75707816, a = -8.04752347 * @author Michal KravÄŤenko * @date 17.7.18 - */ @@ -243,7 +239,7 @@ void test_analytical_gradient_y(std::vector<double> &guess, double accuracy, siz std::vector<double> inp, out; double frac, alpha, x; - double grad_norm = accuracy * 10.0, mem, ai, bi, wi, error, derror, approx, xj, gamma, total_error, sk, sy, sx, beta; + double grad_norm = accuracy * 10.0, mem, ai, bi, wi, error, derror, approx, xj, gamma, total_error, sk, sy, sx, sg, beta; double grad_norm_prev = grad_norm; size_t i, j, iter_idx = 0; @@ -251,18 +247,20 @@ void test_analytical_gradient_y(std::vector<double> &guess, double accuracy, siz std::vector<double> data_points(train_size); /* ISOTROPIC TRAIN SET */ -// frac = (d1_e - d1_s) / (train_size - 1); -// for(unsigned int i = 0; i < train_size; ++i){ -// data_points[i] = frac * i; + frac = (d1_e - d1_s) / (train_size - 1); + for(unsigned int i = 0; i < train_size; ++i){ + data_points[i] = frac * i; + } + +// /* CHEBYSCHEV TRAIN SET */ +// alpha = PI / (train_size ); +// frac = 0.5 * (d1_e - d1_s); +// for(i = 0; i < train_size; ++i){ +// x = (std::cos(PI - alpha * i) + 1.0) * frac + d1_s; +// data_points[i] = x; // } - /* CHEBYSCHEV TRAIN SET */ - alpha = PI / (train_size ); - frac = 0.5 * (d1_e - d1_s); - for(i = 0; i < train_size; ++i){ - x = (std::cos(PI - alpha * i) + 1.0) * frac + d1_s; - data_points[i] = x; - } +// DataSet ds(0.0, 4.0, train_size, 0.0); std::vector<double> *gradient_current = new std::vector<double>(3 * n_inner_neurons); std::vector<double> *gradient_prev = new std::vector<double>(3 * n_inner_neurons); @@ -334,7 +332,8 @@ void test_analytical_gradient_y(std::vector<double> &guess, double accuracy, siz // printf("\n"); for(j = 0; j < data_points.size(); ++j){ - xj = data_points[i]; + xj = data_points[j]; +// xj = ds.get_data()->at(j).first[0]; /* error of the governing equation: y''(x) + 4y'(x) + 4y(x) = 0 => e3 = 1/n * (0 - y''(x) - 4y'(x) - 4y(x))^2 */ approx= eval_approx_ddf(xj, n_inner_neurons, *params_current) + 4.0 * eval_approx_df(xj, n_inner_neurons, *params_current) + 4.0 * eval_approx_f(xj, n_inner_neurons, *params_current); mem = 0.0 - approx; @@ -351,60 +350,6 @@ void test_analytical_gradient_y(std::vector<double> &guess, double accuracy, siz total_error += mem * mem / train_size; } -// /* error of the unknown function: y(x) = e^(-2x)*(3x+1) => e3 = 1/n * (e^(-2x)*(3x+1) - y(x))^2 */ -// for(j = 0; j < data_points.size(); ++j) { -// xj = data_points[i]; -// approx= eval_approx_f(xj, n_inner_neurons, *params_current); -// mem = (eval_f(xj) - approx); -// error = 2.0 * mem / train_size; -// for(i = 0; i < n_inner_neurons; ++i){ -// if(opti_w){ -// (*gradient_current)[3 * i] -= error * (eval_approx_dw_f(xj, i, *params_current)); -// (*gradient_current)[3 * i + 1] -= error * (eval_approx_da_f(xj, i, *params_current)); -// } -// if(opti_b){ -// (*gradient_current)[3 * i + 2] -= error * (eval_approx_db_f(xj, i, *params_current)); -// } -// } -// total_error += mem * mem / train_size; -// } - -// /* error of the unknown function: y'(x) = e^(-2x)*(1-6x) => e3 = 1/n * (e^(-2x)*(1-6x) - y'(x))^2 */ -// for(j = 0; j < data_points.size(); ++j) { -// xj = data_points[i]; -// approx= eval_approx_df(xj, n_inner_neurons, *params_current); -// mem = (eval_df(xj) - approx); -// error = 2.0 * mem / train_size; -// for(i = 0; i < n_inner_neurons; ++i){ -// if(opti_w){ -// (*gradient_current)[3 * i] -= error * (eval_approx_dw_df(xj, i, *params_current)); -// (*gradient_current)[3 * i + 1] -= error * (eval_approx_da_df(xj, i, *params_current)); -// } -// if(opti_b){ -// (*gradient_current)[3 * i + 2] -= error * (eval_approx_db_df(xj, i, *params_current)); -// } -// } -// total_error += mem * mem / train_size; -// } - -// /* error of the unknown function: y''(x) = 4e^(-2x)*(3x-2) => e3 = 4/n * (e^(-2x)*(3x-2) - y''(x))^2 */ -// for(j = 0; j < data_points.size(); ++j) { -// xj = data_points[i]; -// approx= eval_approx_ddf(xj, n_inner_neurons, *params_current); -// mem = (eval_ddf(xj) - approx); -// error = 2.0 * mem / train_size; -// for(i = 0; i < n_inner_neurons; ++i){ -// if(opti_w){ -// (*gradient_current)[3 * i] -= error * (eval_approx_dw_ddf(xj, i, *params_current)); -// (*gradient_current)[3 * i + 1] -= error * (eval_approx_da_ddf(xj, i, *params_current)); -// } -// if(opti_b){ -// (*gradient_current)[3 * i + 2] -= error * (eval_approx_db_ddf(xj, i, *params_current)); -// } -// } -//// printf("x: %f -> error: %f\n", xj, error); -// total_error += mem * mem / train_size; -// } // for(auto e: *gradient_current){ // printf("[%10.8f]", e); @@ -533,7 +478,7 @@ void test_analytical_gradient_y(std::vector<double> &guess, double accuracy, siz // } if(iter_idx % 100 == 0){ - printf("Iteration %12d. Step size: %15.8f, Gradient norm: %15.8f. Total error: %10.8f (%10.8f)\n", (int)iter_idx, gamma, grad_norm, total_error, eval_error_function(*params_prev, n_inner_neurons, data_points)); + printf("Iteration %12d. Step size: %15.8f, Gradient norm: %15.8f. Total error: %10.8f (%10.8f)\r", (int)iter_idx, gamma, grad_norm, total_error, eval_error_function(*params_prev, n_inner_neurons, data_points)); // for (i = 0; i < n_inner_neurons; ++i) { // wi = (*params_current)[3 * i]; // ai = (*params_current)[3 * i + 1]; @@ -578,7 +523,8 @@ void test_analytical_gradient_y(std::vector<double> &guess, double accuracy, siz frac = 1.0 / train_size; for(j = 0; j < data_points.size(); ++j){ - xj = data_points[j]; +// xj = ds.get_data()->at(j).first[0]; + xj = data_points[i]; mem = 4.0 * eval_approx_f(xj, n_inner_neurons, *params_current) + 4.0 * eval_approx_df(xj, n_inner_neurons, *params_current) + eval_approx_ddf(xj, n_inner_neurons, *params_current); @@ -595,11 +541,11 @@ void test_analytical_gradient_y(std::vector<double> &guess, double accuracy, siz delete conjugate_direction_prev; } +void test_odr(double accuracy, size_t n_inner_neurons, size_t train_size, double ds, double de, size_t n_test_points, double ts, double te, size_t max_iters, size_t n_particles){ -void test_odr(size_t n_inner_neurons){ + /* SOLVER SETUP */ size_t n_inputs = 1; size_t n_equations = 3; - DESolver solver_01( n_equations, n_inputs, n_inner_neurons ); /* SETUP OF THE EQUATIONS */ @@ -624,11 +570,11 @@ void test_odr(size_t n_inner_neurons){ /* SETUP OF THE TRAINING DATA */ std::vector<double> inp, out; - double d1_s = 0.0, d1_e = 4.0, frac; + double d1_s = ds, d1_e = de, frac; /* TRAIN DATA FOR THE GOVERNING DE */ std::vector<std::pair<std::vector<double>, std::vector<double>>> data_vec_g; - unsigned int train_size = 150; + /* ISOTROPIC TRAIN SET */ frac = (d1_e - d1_s) / (train_size - 1); @@ -670,7 +616,6 @@ void test_odr(size_t n_inner_neurons){ /* PARTICLE SWARM TRAINING METHOD SETUP */ - unsigned int max_iters = 100; //must encapsulate each of the partial error functions double *domain_bounds = new double[ 6 * n_inner_neurons ]; @@ -679,11 +624,11 @@ void test_odr(size_t n_inner_neurons){ domain_bounds[2 * i + 1] = 800.0; } - double c1 = 0.5, c2 = 0.75, w = 0.8; + double c1 = 0.05, c2 = 0.0, w = 0.3; + - unsigned int n_particles = 1000; - double gamma = 0.5, epsilon = 0.02, delta = 0.9; + double gamma = 0.5, epsilon = 0.0000000000002, delta = 1.1; solver_01.solve_via_particle_swarm( domain_bounds, c1, c2, w, n_particles, max_iters, gamma, epsilon, delta ); @@ -697,7 +642,6 @@ void test_odr(size_t n_inner_neurons){ parameters[3 * i + 2] = biases_params->at(i); } - unsigned int n_test_points = 150; std::vector<double> input(1), output(1); double x; for(unsigned int i = 0; i < n_test_points; ++i){ @@ -716,20 +660,22 @@ void test_odr(size_t n_inner_neurons){ int main() { - unsigned int n_inner_neurons = 6; + unsigned int n_inner_neurons = 2; + unsigned int train_size = 200; + double accuracy = 1e-4; + double ds = 0.0; + double de = 4.0; - test_odr(n_inner_neurons); + unsigned int test_size = 300; + double ts = ds; + double te = de + 2; + + size_t particle_swarm_max_iters = 1000; + size_t n_particles = 10; + test_odr(accuracy, n_inner_neurons, train_size, ds, de, test_size, ts, te, particle_swarm_max_iters, n_particles); // bool optimize_weights = true; // bool optimize_biases = true; -// unsigned int train_size = 150; -// double accuracy = 1e-5; -// double ds = 0.0; -// double de = 4.0; -// -// unsigned int test_size = 300; -// double ts = ds; -// double te = de; // //// std::vector<double> init_guess = {0.35088209, -0.23738505, 0.14160885, 3.72785473, -6.45758308, 1.73769138}; // std::vector<double> init_guess(3 * n_inner_neurons);