diff --git a/src/ErrorFunction/ErrorFunctions.cpp b/src/ErrorFunction/ErrorFunctions.cpp index 39cb60294a3f9959490703673f423887c93960a2..72490ffc00e97d5fde1baa6e5a61082d99e4722a 100644 --- a/src/ErrorFunction/ErrorFunctions.cpp +++ b/src/ErrorFunction/ErrorFunctions.cpp @@ -108,7 +108,7 @@ namespace lib4neuro { double MSE::eval_on_single_input(std::vector<double>* input, std::vector<double>* output, std::vector<double>* weights) { - std::vector<double> predicted_output; + std::vector<double> predicted_output(this->get_network_instance()->get_n_outputs()); this->net->eval_single(*input, predicted_output, weights); double result = 0; double val; @@ -367,11 +367,21 @@ namespace lib4neuro { } } + double MSE::calculate_single_residual(std::vector<double>* input, std::vector<double>* output) { + + //TODO maybe move to the general ErrorFunction + //TODO check input vector sizes - they HAVE TO be allocated before calling this function + + return -this->eval_on_single_input(input, output); + } + void MSE::calculate_residual_gradient(std::vector<double>* input, std::vector<double>* output, std::vector<double>* gradient, double h) { + //TODO check input vector sizes - they HAVE TO be allocated before calling this function + size_t n_parameters = this->get_dimension(); std::vector<double>* parameters = this->get_parameters(); @@ -387,11 +397,11 @@ namespace lib4neuro { if(delta != 0) { /* Computation of f_val1 = f(x + delta) */ parameters->at(i) = former_parameter_value + delta; - f_val1 = -1 * this->eval_on_single_input(input, output); + f_val1 = -1 * this->eval_on_single_input(input, output, parameters); /* Computation of f_val2 = f(x - delta) */ parameters->at(i) = former_parameter_value - delta; - f_val2 = -1 * this->eval_on_single_input(input, output); + f_val2 = -1 * this->eval_on_single_input(input, output, parameters); gradient->at(i) = (f_val1 - f_val2) / (2*delta); } @@ -550,6 +560,4 @@ namespace lib4neuro { DataSet* ErrorSum::get_dataset() { return this->summand->at(0)->get_dataset(); }; - - } diff --git a/src/ErrorFunction/ErrorFunctions.h b/src/ErrorFunction/ErrorFunctions.h index c155f3ce2e3e49a25470c19719aa42bf002cfd92..2549c9f1988dd1ab0cdbd63b303bc1f5572353f7 100644 --- a/src/ErrorFunction/ErrorFunctions.h +++ b/src/ErrorFunction/ErrorFunctions.h @@ -180,8 +180,10 @@ class ErrorFunctionDifferentiable : public ErrorFunction { calculate_residual_gradient(std::vector<double>* input, std::vector<double>* output, std::vector<double>* gradient, - double h) = 0; + double h = 1e-3) = 0; + virtual double calculate_single_residual(std::vector<double>* input, + std::vector<double>* output) = 0; }; class MSE : public ErrorFunctionDifferentiable { @@ -217,7 +219,17 @@ class ErrorFunctionDifferentiable : public ErrorFunction { size_t batch = 0) override; /** - * Compute gradient of the residual function 0 - MSE(x_i) for a specific input x_i. + * Evaluates the function f(x) = 0 - MSE(x) for a + * specified input x + * + * @param input + * @return + */ + virtual double calculate_single_residual(std::vector<double>* input, + std::vector<double>* output) override ; + + /** + * Compute gradient of the residual function f(x) = 0 - MSE(x) for a specific input x. * The method uses the central difference method. * * @param[in] input Vector being a single input diff --git a/src/LearningMethods/LevenbergMarquardt.cpp b/src/LearningMethods/LevenbergMarquardt.cpp index 9be7a149658c990938f3c95df90f4c0018da1917..478fc1cd6b62864c0dbb3753f315e920749e357b 100644 --- a/src/LearningMethods/LevenbergMarquardt.cpp +++ b/src/LearningMethods/LevenbergMarquardt.cpp @@ -31,8 +31,6 @@ struct lib4neuro::LevenbergMarquardt::LevenbergMarquardtImpl { */ std::vector<double> *optimal_parameters; -// std::unique_ptr<lib4neuro::LevenbergMarquardt> parent_class; - /** * Returning Jacobian matrix of the residual function using the central differences method, i.e. * f'(x) = (f(x + delta) - f(x - delta)) / 2*delta @@ -40,24 +38,29 @@ struct lib4neuro::LevenbergMarquardt::LevenbergMarquardtImpl { * @param h Step size * @return Jacobian matrix */ - arma::Mat<double>* get_Jacobian_matrix(lib4neuro::ErrorFunctionDifferentiable& ef, arma::Mat<double>* J, double h=1e-3); + arma::Mat<double>* get_Jacobian_matrix(lib4neuro::ErrorFunctionDifferentiable& ef, + arma::Mat<double>* J, + double h=1e-3); }; -arma::Mat<double>* lib4neuro::LevenbergMarquardt::LevenbergMarquardtImpl::get_Jacobian_matrix(lib4neuro::ErrorFunctionDifferentiable& ef, +arma::Mat<double>* lib4neuro::LevenbergMarquardt::LevenbergMarquardtImpl::get_Jacobian_matrix( + lib4neuro::ErrorFunctionDifferentiable& ef, arma::Mat<double>* J, double h) { + size_t n_parameters = ef.get_dimension(); size_t n_data_points = ef.get_dataset()->get_n_elements(); - size_t input_dim = ef.get_network_instance()->get_n_inputs(); - size_t output_dim = ef.get_network_instance()->get_n_outputs(); +// size_t input_dim = ef.get_network_instance()->get_n_inputs(); +// size_t output_dim = ef.get_network_instance()->get_n_outputs(); std::vector<std::pair<std::vector<double>, std::vector<double>>>* data = ef.get_dataset()->get_data(); - std::vector<double>* parameters = ef.get_parameters(); +// std::vector<double>* parameters = ef.get_parameters(); + std::vector<double> grad(n_parameters); for(size_t i = 0; i < n_data_points; i++) { - std::vector<double> grad(n_parameters); + ef.calculate_residual_gradient(&data->at(i).first, &data->at(i).second, &grad, h); for(size_t j = 0; j < n_parameters; j++) { - // J->at(i, j) = ef.calculate_residual_gradient(data->at(i).first, data->at(i).second, grad); + J->at(i, j) = grad.at(j); } } @@ -74,7 +77,6 @@ namespace lib4neuro { double lambda_increase, double lambda_decrease) : p_impl(new LevenbergMarquardtImpl()) { -// this->p_impl->parent_class = this; this->p_impl->tolerance = tolerance; this->p_impl->tolerance_gradient = tolerance_gradient; this->p_impl->tolerance_parameters = tolerance_parameters; @@ -87,13 +89,13 @@ namespace lib4neuro { void LevenbergMarquardt::optimize(lib4neuro::ErrorFunctionDifferentiable& ef, std::ofstream* ofs) { - optimize(ef, ofs, LM_UPDATE_TYPE::MARQUARDT); - + optimize(ef, LM_UPDATE_TYPE::MARQUARDT, ofs); } void LevenbergMarquardt::optimize(lib4neuro::ErrorFunctionDifferentiable& ef, - std::ofstream* ofs, - lib4neuro::LM_UPDATE_TYPE update_type) { + lib4neuro::LM_UPDATE_TYPE update_type, + std::ofstream* ofs) { + /* Copy data set max and min values, if it's normalized */ if(ef.get_dataset()->is_normalized()) { ef.get_network_instance()->set_normalization_strategy_instance( @@ -105,34 +107,28 @@ namespace lib4neuro { *ofs << "Finding a solution via a Levenberg-Marquardt method with adaptive step-length..." << std::endl; } - double grad_norm = this->p_impl->tolerance * 10.0; - double grad_norm_prev; - size_t i; - long long int iter_idx = this->p_impl->maximum_niters; - double gamma = 1.0; - double prev_val; - double val = 0.0; - double c = 1.25; - size_t n_parameters = ef.get_dimension(); size_t n_data_points = ef.get_dataset()->get_n_elements(); - size_t output_dim = ef.get_dataset()->get_output_dim(); std::vector<double> *gradient_current = new std::vector<double>(n_parameters); std::fill(gradient_current->begin(), gradient_current->end(), 0.0); std::vector<double> *gradient_prev = new std::vector<double>(n_parameters); std::fill(gradient_prev->begin(), gradient_prev->end(), 0.0); std::vector<double> *params_current = ef.get_parameters(); - std::vector<double> *params_prev = new std::vector<double>(n_parameters); - std::vector<double> *ptr_mem; + std::vector<double> *params_tmp = new std::vector<double>(n_parameters); arma::Mat<double> J(n_data_points, n_parameters); // Jacobian matrix arma::Mat<double> H(n_data_points, n_parameters); // Hessian matrix + arma::Mat<double> H_new(n_data_points, n_parameters); + + std::vector<std::pair<std::vector<double>, std::vector<double>>>* data = ef.get_dataset()->get_data(); double lambda = this->p_impl->lambda_initial; // Dumping parameter + double prev_err; double current_err; bool update_J = true; arma::Col<double> update; - arma::Mat<double> d(n_data_points, output_dim); + std::vector<double> d_prep(n_data_points); + arma::Col<double>* d; //-------------------// // Solver iterations // @@ -141,34 +137,74 @@ namespace lib4neuro { do { if(update_J) { /* Get Jacobian matrix */ - this->p_impl->get_Jacobian_matrix(ef, - &J); + this->p_impl->get_Jacobian_matrix(ef, &J); /* Get approximation of Hessian (H ~ J'*J) */ H = J.t() * J; - /* Evaluate the current error */ - current_err = ef.eval(); + /* Evaluate the error before updating parameters */ + prev_err = ef.eval(); } - /* H = H + lambda*I */ - H += lambda * arma::eye<arma::Mat<double>>(n_parameters, n_parameters); + /* H_new = H + lambda*I */ + H_new = H + lambda * arma::eye<arma::Mat<double>>(n_parameters, n_parameters); /* Compute the residual vector */ -// for(size_t j = 0; j < n_data_points; j++) { -// d(j) = -// } + for(size_t i = 0; i < n_data_points; i++) { + d_prep.at(i) = ef.calculate_single_residual(&data->at(i).first, &data->at(i).second); + } + d = new arma::Col<double> (d_prep); /* Compute the update vector */ - update = -H.i()*(J.t()*d); + update = -H_new.i()*(J.t()*(*d)); + + /* Compute the error after update of parameters */ + for(size_t i = 0; i < n_parameters; i++) { + params_tmp->at(i) = update.at(i); + } + current_err = ef.eval(params_tmp); + + /* Check, if the parameter update improved the function */ + if(current_err < prev_err) { + + /* If the convergence threshold is achieved, finish the computation */ + if(current_err < this->p_impl->tolerance) { + break; + } + + /* If the error is lowered after parameter update, accept the new parameters and lower the damping + * factor lambda */ + + //TODO rewrite without division! + lambda /= this->p_impl->lambda_decrease; + + for(size_t i = 0; i < n_parameters; i++) { + params_current->at(i) = update.at(i); + } + + prev_err = current_err; + update_J = true; + + COUT_DEBUG("Iteration: " << iter_counter << " Current error: " << current_err << std::endl); + + } else { + /* If the error after parameters update is not lower, increase the damping factor lambda */ + update_J = false; + lambda *= this->p_impl->lambda_increase; + } + + COUT_DEBUG("Iteration: " << iter_counter << " Current error: " << current_err << std::endl); + + }while(iter_counter++ < this->p_impl->maximum_niters); - }while(iter_counter < this->p_impl->maximum_niters); + /* Store the optimized parameters */ + *this->p_impl->optimal_parameters = *params_current; } std::vector<double>* LevenbergMarquardt::get_parameters() { return this->p_impl->optimal_parameters; } - LevenbergMarquardt::~LevenbergMarquardt() {}; + LevenbergMarquardt::~LevenbergMarquardt() = default; } \ No newline at end of file diff --git a/src/LearningMethods/LevenbergMarquardt.h b/src/LearningMethods/LevenbergMarquardt.h index ae00e496de184d7ef078ea4501a68a3bcde0083d..5a898edaca287988dd07fa060aadbfea75712385 100644 --- a/src/LearningMethods/LevenbergMarquardt.h +++ b/src/LearningMethods/LevenbergMarquardt.h @@ -40,9 +40,11 @@ namespace lib4neuro { double lambda_increase=11, double lambda_decrease=9); - void optimize(lib4neuro::ErrorFunctionDifferentiable &ef, std::ofstream* ofs = nullptr); + void optimize(ErrorFunctionDifferentiable &ef, std::ofstream* ofs = nullptr); - void optimize(lib4neuro::ErrorFunctionDifferentiable &ef, std::ofstream* ofs, LM_UPDATE_TYPE update_type); + void optimize(ErrorFunctionDifferentiable &ef, + LM_UPDATE_TYPE update_type, + std::ofstream* ofs = nullptr); std::vector<double>* get_parameters() override; diff --git a/src/LearningMethods/ParticleSwarm.cpp b/src/LearningMethods/ParticleSwarm.cpp index 2574935ea4b1e2849abdf3966bdc95bebc0b66a3..997122c0a6cddfd89c8f35814df272a2184710d1 100644 --- a/src/LearningMethods/ParticleSwarm.cpp +++ b/src/LearningMethods/ParticleSwarm.cpp @@ -412,15 +412,9 @@ namespace lib4neuro { printf("\nMax number of iterations reached (%d)! Objective function value: %10.8f\n", (int) outer_it, optimal_value); } -// for (size_t i = 0; i <= this->func_dim - 1; ++i) { -// printf("%10.8f \n", (*this->p_min_glob)[i]); -// } -// ef.eval( this->get_parameters() ); -// ef.eval( this->get_parameters() ); - delete centroid; } diff --git a/src/Network/NeuralNetwork.cpp b/src/Network/NeuralNetwork.cpp index ed0f9ad5bb3931476dd75e550469d4579962217d..a7c471907f673b23f3a0668f180aff72f21c0994 100644 --- a/src/Network/NeuralNetwork.cpp +++ b/src/Network/NeuralNetwork.cpp @@ -472,8 +472,10 @@ namespace lib4neuro { this->delete_weights = false; } - void NeuralNetwork::eval_single(::std::vector<double> &input, ::std::vector<double> &output, - ::std::vector<double> *custom_weights_and_biases) { + void NeuralNetwork::eval_single(::std::vector<double>& input, + ::std::vector<double>& output, + ::std::vector<double>* custom_weights_and_biases) { + if ((this->input_neuron_indices->size() * this->output_neuron_indices->size()) <= 0) { THROW_INVALID_ARGUMENT_ERROR("Input and output neurons have not been specified!"); } diff --git a/src/examples/simulator.cpp b/src/examples/simulator.cpp index f527ebf2d26199fd85e4bbc6d2a73af4b90a8880..457c6b1b15d80d74b01819f4da1dcac5ec0aeb08 100644 --- a/src/examples/simulator.cpp +++ b/src/examples/simulator.cpp @@ -12,19 +12,20 @@ #include <assert.h> #include "4neuro.h" +#include "../LearningMethods/LevenbergMarquardt.h" int main(int argc, char** argv) { try { /* PHASE 1 - TRAINING DATA LOADING, NETWORK ASSEMBLY AND PARTICLE SWARM OPTIMIZATION */ - l4n::CSVReader reader1("/home/martin/Desktop/data_BACK_RH_1.csv", ";", true); // File, separator, skip 1st line + l4n::CSVReader reader1("/home/martin/Desktop/FH1Z1.CV3.PV.dat", "\t", true); // File, separator, skip 1st line reader1.read(); // Read from the file /* PHASE 1 - NEURAL NETWORK SPECIFICATION */ /* Create data set for both the first training of the neural network */ /* Specify which columns are inputs or outputs */ std::vector<unsigned int> inputs1 = { 0 }; // Possible multiple inputs, e.g. {0,3}, column indices starting from 0 - std::vector<unsigned int> outputs1 = { 1 }; // Possible multiple outputs, e.g. {1,2} + std::vector<unsigned int> outputs1 = { 2 }; // Possible multiple outputs, e.g. {1,2} l4n::DataSet ds1 = reader1.get_data_set(&inputs1, &outputs1); // Creation of data-set for NN ds1.normalize(); // Normalization of data to prevent numerical problems @@ -72,8 +73,8 @@ int main(int argc, char** argv) { 1.711897, 1.711897, 0.711897, - 250, - 20); + 300, + 1); /* Weight and bias randomization in the network accordingly to the uniform distribution */ nn1.randomize_parameters(); @@ -87,13 +88,13 @@ int main(int argc, char** argv) { l4n::NeuralNetwork nn2("test_net_Particle_Swarm.4n"); /* Training data loading for the second phase */ - l4n::CSVReader reader2("/home/martin/Desktop/data_BACK_RH_1.csv", ";", true); // File, separator, skip 1st line + l4n::CSVReader reader2("/home/martin/Desktop/FH1Z1.CV3.PV.dat", "\t", true); // File, separator, skip 1st line reader2.read(); // Read from the file /* Create data set for both the first training of the neural network */ /* Specify which columns are inputs or outputs */ std::vector<unsigned int> inputs2 = { 0 }; // Possible multiple inputs, e.g. {0,3}, column indices starting from 0 - std::vector<unsigned int> outputs2 = { 1 }; // Possible multiple outputs, e.g. {1,2} + std::vector<unsigned int> outputs2 = { 2 }; // Possible multiple outputs, e.g. {1,2} l4n::DataSet ds2 = reader2.get_data_set(&inputs2, &outputs2); // Creation of data-set for NN ds2.normalize(); // Normalization of data to prevent numerical problems @@ -104,7 +105,7 @@ int main(int argc, char** argv) { // 1) Threshold for the successful ending of the optimization - deviation from minima // 2) Number of iterations to reset step size to tolerance/10.0 // 3) Maximal number of iterations - optimization will stop after that, even if not converged - l4n::GradientDescent gs(1e-6, 150, 2000); + l4n::GradientDescent gs(1e-6, 150, 1); /* Gradient Descent Optimization */ gs.optimize(mse2); // Network training @@ -131,13 +132,13 @@ int main(int argc, char** argv) { nn3.write_biases(&output_file); /* Evaluate network on an arbitrary data-set and save results into the file */ - l4n::CSVReader reader3("/home/martin/Desktop/data_BACK_RH_1.csv", ";", true); // File, separator, skip 1st line + l4n::CSVReader reader3("/home/martin/Desktop/FH1Z1.CV3.PV.dat", "\t", true); // File, separator, skip 1st line reader3.read(); // Read from the file /* Create data set for both the testing of the neural network */ /* Specify which columns are inputs or outputs */ std::vector<unsigned int> inputs3 = { 0 }; // Possible multiple inputs, e.g. {0,3}, column indices starting from 0 - std::vector<unsigned int> outputs3 = { 1 }; // Possible multiple outputs, e.g. {1,2} + std::vector<unsigned int> outputs3 = { 2 }; // Possible multiple outputs, e.g. {1,2} l4n::DataSet ds3 = reader3.get_data_set(&inputs3, &outputs3); // Creation of data-set for NN ds3.normalize(); // Normalization of data to prevent numerical problems @@ -150,10 +151,13 @@ int main(int argc, char** argv) { /* Error function */ l4n::MSE mse3(&nn3, &ds3); // First parameter - neural network, second parameter - data-set + l4n::LevenbergMarquardt lm(200); + lm.optimize(mse3); + std::cout << "Network trained only by GPS: " << std::endl; mse1.eval_on_data_set(&ds3, &output_file, nullptr, true, true); - std::cout << "Network trained only by GS:" << std::endl; + std::cout << "Network trained by GPS and GS:" << std::endl; mse3.eval_on_data_set(&ds3, &output_file, nullptr, true, true); /* Close the output file for writing */