/** * DESCRIPTION OF THE FILE * * @author Michal KravĨenko * @date 30.7.18 - */ #ifndef INC_4NEURO_GRADIENTDESCENT_H #define INC_4NEURO_GRADIENTDESCENT_H #include "../constants.h" #include "ILearningMethods.h" #include "../ErrorFunction/ErrorFunctions.h" class GradientDescent: public ILearningMethods { private: /** * */ double tolerance; /** * */ size_t restart_frequency; std::vector<double> *optimal_parameters; /** * * @param gamma * @param beta * @param c * @param grad_norm_prev * @param grad_norm * @param fi * @param fim */ virtual void eval_step_size_mk( double &gamma, double beta, double &c, double grad_norm_prev, double grad_norm, double fi, double fim ); double eval_f(double x){ return std::pow(E, -2.0 * x) * (3.0 * x + 1.0); } double eval_df(double x){ return std::pow(E, -2.0 * x) * (1.0 - 6.0 * x); } double eval_ddf(double x){ return 4.0 * std::pow(E, -2.0 * x) * (3.0 * x - 2.0); } double eval_approx_f(double x, size_t n_inner_neurons, std::vector<double> ¶meters){ double value= 0.0, wi, ai, bi, ei, ei1; for(size_t i = 0; i < n_inner_neurons; ++i){ wi = parameters[3 * i]; ai = parameters[3 * i + 1]; bi = parameters[3 * i + 2]; ei = std::pow(E, bi - wi * x); ei1 = ei + 1.0; value += ai / (ei1); } return value; } double eval_approx_df(double x, size_t n_inner_neurons, std::vector<double> ¶meters){ double value= 0.0, wi, ai, bi, ei, ei1; for(size_t i = 0; i < n_inner_neurons; ++i){ wi = parameters[3 * i]; ai = parameters[3 * i + 1]; bi = parameters[3 * i + 2]; ei = std::pow(E, bi - wi * x); ei1 = ei + 1.0; value += ai * wi * ei / (ei1 * ei1); } return value; } double eval_approx_ddf(double x, size_t n_inner_neurons, std::vector<double> ¶meters){ double value= 0.0, wi, ai, bi, ewx, eb; for(size_t i = 0; i < n_inner_neurons; ++i){ wi = parameters[3 * i]; ai = parameters[3 * i + 1]; bi = parameters[3 * i + 2]; eb = std::pow(E, bi); ewx = std::pow(E, wi * x); value += -(ai*wi*wi*eb*ewx*(ewx - eb))/((eb + ewx)*(eb + ewx)*(eb + ewx)); } return value; } //NN partial derivative (wi): (ai * x * e^(bi - wi * x)) * (e^(bi - wi * x) + 1)^(-2) double eval_approx_dw_f(double x, size_t neuron_idx, std::vector<double> ¶meters){ double wi, ai, bi, ei, ei1; wi = parameters[3 * neuron_idx]; ai = parameters[3 * neuron_idx + 1]; bi = parameters[3 * neuron_idx + 2]; ei = std::pow(E, bi - wi * x); ei1 = ei + 1.0; return (ai * x * ei) / (ei1 * ei1); } //dNN partial derivative (wi): -(a w x e^(b - w x))/(e^(b - w x) + 1)^2 + (2 a w x e^(2 b - 2 w x))/(e^(b - w x) + 1)^3 + (a e^(b - w x))/(e^(b - w x) + 1)^2 double eval_approx_dw_df(double x, size_t neuron_idx, std::vector<double> ¶meters){ double wi, ai, bi, ei, ei1; wi = parameters[3 * neuron_idx]; ai = parameters[3 * neuron_idx + 1]; bi = parameters[3 * neuron_idx + 2]; ei = std::pow(E, bi - wi * x); ei1 = ei + 1.0; return -(ai * wi * x * ei)/(ei1 * ei1) + (2.0*ai*wi*x*ei*ei)/(ei1 * ei1 * ei1) + (ai* ei)/(ei1 * ei1); } //ddNN partial derivative (wi): -(a w^2 x e^(b + 2 w x))/(e^b + e^(w x))^3 - (a w^2 x e^(b + w x) (e^(w x) - e^b))/(e^b + e^(w x))^3 + (3 a w^2 x e^(b + 2 w x) (e^(w x) - e^b))/(e^b + e^(w x))^4 - (2 a w e^(b + w x) (e^(w x) - e^b))/(e^b + e^(w x))^3 double eval_approx_dw_ddf(double x, size_t neuron_idx, std::vector<double> ¶meters){ double wi, ai, bi, eb, ewx; wi = parameters[3 * neuron_idx]; ai = parameters[3 * neuron_idx + 1]; bi = parameters[3 * neuron_idx + 2]; eb = std::pow(E, bi); ewx = std::pow(E, wi * x); return -(ai*wi*wi* x * eb*ewx*ewx)/((eb + ewx)*(eb + ewx)*(eb + ewx)) - (ai*wi*wi*x*eb*ewx*(ewx - eb))/((eb + ewx)*(eb + ewx)*(eb + ewx)) + (3*ai*wi*wi*x*eb*ewx*ewx*(ewx - eb))/((eb + ewx)*(eb + ewx)*(eb + ewx)*(eb + ewx)) - (2*ai*wi*eb*ewx*(ewx - eb))/((eb + ewx)*(eb + ewx)*(eb + ewx)); } //NN partial derivative (ai): (1 + e^(-x * wi + bi))^(-1) double eval_approx_da_f(double x, size_t neuron_idx, std::vector<double> ¶meters){ double wi, bi, ei, ei1; wi = parameters[3 * neuron_idx]; bi = parameters[3 * neuron_idx + 2]; ei = std::pow(E, bi - wi * x); ei1 = ei + 1.0; return 1.0 / ei1; } //dNN partial derivative (ai): (w e^(b - w x))/(e^(b - w x) + 1)^2 double eval_approx_da_df(double x, size_t neuron_idx, std::vector<double> ¶meters){ double wi, bi, ei, ei1; wi = parameters[3 * neuron_idx]; bi = parameters[3 * neuron_idx + 2]; ei = std::pow(E, bi - wi * x); ei1 = ei + 1.0; return (wi*ei)/(ei1 * ei1); } //ddNN partial derivative (ai): -(w^2 e^(b + w x) (e^(w x) - e^b))/(e^b + e^(w x))^3 double eval_approx_da_ddf(double x, size_t neuron_idx, std::vector<double> ¶meters){ double wi, bi, eip, ewx, eb; wi = parameters[3 * neuron_idx]; bi = parameters[3 * neuron_idx + 2]; eip = std::pow(E, bi + wi * x); eb = std::pow(E, bi); ewx = std::pow(E, wi * x); return -(wi*wi*eip*(ewx - eb))/((eb + ewx)*(eb + ewx)*(eb + ewx)); } //NN partial derivative (bi): -(ai * e^(bi - wi * x)) * (e^(bi - wi * x) + 1)^(-2) double eval_approx_db_f(double x, size_t neuron_idx, std::vector<double> ¶meters){ double wi, bi, ei, ai, ei1; wi = parameters[3 * neuron_idx]; ai = parameters[3 * neuron_idx + 1]; bi = parameters[3 * neuron_idx + 2]; ei = std::pow(E, bi - wi * x); ei1 = ei + 1.0; return -(ai * ei)/(ei1 * ei1); } //dNN partial derivative (bi): (a w e^(b + w x) (e^(w x) - e^b))/(e^b + e^(w x))^3 double eval_approx_db_df(double x, size_t neuron_idx, std::vector<double> ¶meters){ double wi, bi, ai, ewx, eb; wi = parameters[3 * neuron_idx]; ai = parameters[3 * neuron_idx + 1]; bi = parameters[3 * neuron_idx + 2]; eb = std::pow(E, bi); ewx = std::pow(E, wi*x); return (ai* wi* eb*ewx* (ewx - eb))/((eb + ewx)*(eb + ewx)*(eb + ewx)); } //ddNN partial derivative (bi): -(a w^2 e^(b + w x) (-4 e^(b + w x) + e^(2 b) + e^(2 w x)))/(e^b + e^(w x))^4 double eval_approx_db_ddf(double x, size_t neuron_idx, std::vector<double> ¶meters){ double wi, bi, ai, ewx, eb; wi = parameters[3 * neuron_idx]; ai = parameters[3 * neuron_idx + 1]; bi = parameters[3 * neuron_idx + 2]; eb = std::pow(E, bi); ewx = std::pow(E, wi*x); return -(ai* wi*wi* eb*ewx* (-4.0* eb*ewx + eb*eb + ewx*ewx))/((eb +ewx)*(eb +ewx)*(eb +ewx)*(eb +ewx)); } double eval_error_function(std::vector<double> ¶meters, size_t n_inner_neurons, std::vector<double> test_points){ double output = 0.0, approx, frac = 1.0 / (test_points.size()); for(auto x: test_points){ /* governing equation */ approx = 4.0 * eval_approx_f(x, n_inner_neurons, parameters) + 4.0 * eval_approx_df(x, n_inner_neurons, parameters) + eval_approx_ddf(x, n_inner_neurons, parameters); output += (0.0 - approx) * (0.0 - approx) * frac; } /* BC */ approx = eval_approx_f(0.0, n_inner_neurons, parameters); output += (1.0 - approx) * (1.0 - approx); approx = eval_approx_df(0.0, n_inner_neurons, parameters); output += (1.0 - approx) * (1.0 - approx); return output; } void eval_step_size_simple(double &gamma, double val, double prev_val, double sk, double grad_norm, double grad_norm_prev){ if(val > prev_val){ gamma *= 0.99999; } if(sk <= 1e-3 || grad_norm < grad_norm_prev){ /* movement on a line */ /* new slope is less steep, speed up */ gamma *= 1.0005; } else if(grad_norm > grad_norm_prev){ /* new slope is more steep, slow down*/ gamma /= 1.0005; } else{ gamma /= 1.005; } // gamma *= 0.999999; } double calculate_gradient( std::vector<std::pair<std::vector<double>, std::vector<double>>> *dataset, size_t n_inner_neurons, std::vector<double> *parameters, std::vector<double> *gradient ){ size_t i, j; double x, mem, derror, total_error, approx; std::vector<double> data_points(dataset->size()); for( i = 0; i < dataset->size(); ++i){ data_points[ i ] = dataset->at( i ).first[0]; } std::vector<double> parameters_analytical(parameters->size()); for(i = 0; i < n_inner_neurons; ++i){ parameters_analytical[3 * i + 0] = parameters->at(0 * n_inner_neurons + i); parameters_analytical[3 * i + 1] = parameters->at(1 * n_inner_neurons + i); parameters_analytical[3 * i + 2] = parameters->at(2 * n_inner_neurons + i); } size_t train_size = data_points.size(); /* error boundary condition: y(0) = 1 => e1 = (1 - y(0))^2 */ x = 0.0; mem = (1.0 - eval_approx_f(x, n_inner_neurons, *parameters)); derror = 2.0 * mem; total_error = mem * mem; for(i = 0; i < n_inner_neurons; ++i){ (*gradient)[i] -= derror * eval_approx_dw_f(x, i, *parameters); (*gradient)[i + n_inner_neurons] -= derror * eval_approx_da_f(x, i, *parameters); (*gradient)[i + 2*n_inner_neurons] -= derror * eval_approx_db_f(x, i, *parameters); } /* error boundary condition: y'(0) = 1 => e2 = (1 - y'(0))^2 */ mem = (1.0 - eval_approx_df(x, n_inner_neurons, *parameters)); derror = 2.0 * mem; total_error += mem * mem; for(i = 0; i < n_inner_neurons; ++i){ (*gradient)[i] -= derror * eval_approx_dw_df(x, i, *parameters); (*gradient)[i + n_inner_neurons] -= derror * eval_approx_da_df(x, i, *parameters); (*gradient)[i + 2*n_inner_neurons] -= derror * eval_approx_db_df(x, i, *parameters); } for(j = 0; j < data_points.size(); ++j){ x = data_points[j]; /* error of the governing equation: y''(x) + 4y'(x) + 4y(x) = 0 => e3 = 1/n * (0 - y''(x) - 4y'(x) - 4y(x))^2 */ approx= eval_approx_ddf(x, n_inner_neurons, *parameters) + 4.0 * eval_approx_df(x, n_inner_neurons, *parameters) + 4.0 * eval_approx_f(x, n_inner_neurons, *parameters); mem = 0.0 - approx; derror = 2.0 * mem / train_size; for(i = 0; i < n_inner_neurons; ++i){ (*gradient)[i] -= derror * (eval_approx_dw_ddf(x, i, *parameters) + 4.0 * eval_approx_dw_df(x, i, *parameters) + 4.0 * eval_approx_dw_f(x, i, *parameters)); (*gradient)[i + n_inner_neurons] -= derror * (eval_approx_da_ddf(x, i, *parameters) + 4.0 * eval_approx_da_df(x, i, *parameters) + 4.0 * eval_approx_da_f(x, i, *parameters)); (*gradient)[i + 2*n_inner_neurons] -= derror * (eval_approx_db_ddf(x, i, *parameters) + 4.0 * eval_approx_db_df(x, i, *parameters) + 4.0 * eval_approx_db_f(x, i, *parameters)); } total_error += mem * mem / train_size; } return total_error; } public: /** * * @param epsilon */ GradientDescent( double epsilon = 1e-3, size_t n_to_restart = 100 ); /** * */ ~GradientDescent(); /** * * @param ef */ virtual void optimize( ErrorFunction &ef ); /** * * @return */ virtual std::vector<double>* get_parameters( ); }; #endif //INC_4NEURO_GRADIENTDESCENT_H