From d6969095ed6996629b2c69fe90c4766a789ffc31 Mon Sep 17 00:00:00 2001
From: Michal Kravcenko <michal.kravcenko@vsb.cz>
Date: Wed, 20 Feb 2019 14:00:33 +0100
Subject: [PATCH] ADD+MOD: added two new learning methods and a new learning
 method able to cycle through user defined sequence of learning methods

---
 src/LearningMethods/GradientDescent.cpp       | 129 +++++++++-
 src/LearningMethods/GradientDescent.h         |  18 ++
 src/LearningMethods/GradientDescentBB.cpp     | 229 ++++++++++++++++++
 src/LearningMethods/GradientDescentBB.h       |  91 +++++++
 .../GradientDescentSingleItem.cpp             | 106 ++++++++
 .../GradientDescentSingleItem.h               | 106 ++++++++
 src/LearningMethods/ILearningMethods.cpp      |   3 +-
 src/LearningMethods/ILearningMethods.h        |   4 +-
 src/LearningMethods/LearningSequence.cpp      |  61 +++++
 src/LearningMethods/LearningSequence.h        |  80 ++++++
 src/LearningMethods/ParticleSwarm.cpp         |   8 +-
 11 files changed, 817 insertions(+), 18 deletions(-)
 create mode 100644 src/LearningMethods/GradientDescentBB.cpp
 create mode 100644 src/LearningMethods/GradientDescentBB.h
 create mode 100644 src/LearningMethods/GradientDescentSingleItem.cpp
 create mode 100644 src/LearningMethods/GradientDescentSingleItem.h
 create mode 100644 src/LearningMethods/LearningSequence.cpp
 create mode 100644 src/LearningMethods/LearningSequence.h

diff --git a/src/LearningMethods/GradientDescent.cpp b/src/LearningMethods/GradientDescent.cpp
index ee8cec51..44412b60 100644
--- a/src/LearningMethods/GradientDescent.cpp
+++ b/src/LearningMethods/GradientDescent.cpp
@@ -5,6 +5,7 @@
  * @date 30.7.18 - 
  */
 
+#include <random.hpp>
 #include "GradientDescent.h"
 #include "message.h"
 
@@ -42,6 +43,46 @@ namespace lib4neuro {
 
     }
 
+    bool GradientDescent::perform_feasible_1D_step(
+            lib4neuro::ErrorFunction &ef,
+            double error_previous,
+            double step_coefficient,
+            std::vector<double> *direction,
+            std::vector<double> *parameters_before,
+            std::vector<double> *parameters_after
+            ) {
+
+        size_t i;
+
+        boost::random::mt19937 gen(std::time(0));
+        boost::random::uniform_int_distribution<> dis(0, direction->size());
+        size_t  max_dir_idx = dis(gen);
+
+        double error_current = error_previous + 1.0;
+        while( error_current >=  error_previous ){
+            (*parameters_after)[max_dir_idx] = (*parameters_before)[max_dir_idx] - step_coefficient * (*direction)[max_dir_idx];
+
+            error_current = ef.eval( parameters_after );
+            if( step_coefficient < 1e-32){
+//                COUT_DEBUG("    Attempting to find a feasible direction in one dimension was NOT SUCCESSFUL" << std::endl);
+                for (i = 0; i < direction->size(); ++i) {
+                    (*parameters_after)[i] = (*parameters_before)[i] - step_coefficient * (*direction)[i];
+                }
+                return false;
+            }
+            else{
+                if( error_current >=  error_previous ){
+//                    COUT_DEBUG("    Incorrect step size! Reducing it by a factor of 0.5. Errors: " << error_current << ", prev: " << error_previous << std::endl);
+                    step_coefficient *= 0.5;
+                }
+                else{
+//                    COUT_DEBUG("    Step OK" << std::endl);
+                }
+            }
+        }
+        return true;
+    }
+
     void GradientDescent::optimize(lib4neuro::ErrorFunction &ef, std::ofstream* ofs) {
 
         /* Copy data set max and min values, if it's normalized */
@@ -51,9 +92,11 @@ namespace lib4neuro {
         }
 
         COUT_INFO("Finding a solution via a Gradient Descent method with adaptive step-length..." << std::endl);
+        COUT_INFO("Initial error: " << ef.eval() << std::endl);
 
         if(ofs && ofs->is_open()) {
             *ofs << "Finding a solution via a Gradient Descent method with adaptive step-length..." << std::endl;
+            *ofs << "Initial error: " << ef.eval() << std::endl;
         }
 
         double grad_norm = this->tolerance * 10.0, gamma, sx, beta;
@@ -80,6 +123,12 @@ namespace lib4neuro {
 
         std::fill(gradient_current->begin(), gradient_current->end(), 0.0);
         std::fill(gradient_prev->begin(), gradient_prev->end(), 0.0);
+
+        val = ef.eval(params_current);
+        double coeff = 1;
+        bool it_analyzed = false;
+        size_t counter_good_guesses = 0, counter_bad_guesses = 0, counter_simplified_direction_good = 0, counter_simplified_direction_bad = 0;
+        double cooling = 1.0;
         while (grad_norm > this->tolerance && (iter_idx != 0)) {
             iter_idx--;
             iter_counter++;
@@ -105,10 +154,12 @@ namespace lib4neuro {
 
             /* Update of the parameters */
             /* step length calculation */
-            if (iter_counter < 10 || iter_counter % this->restart_frequency == 0) {
+            if (iter_counter < 10 || iter_counter % this->restart_frequency == 0 ) {
                 /* fixed step length */
                 gamma = 0.1 * this->tolerance;
-                //gamma = 0.001;
+//                gamma = 1 / grad_norm;
+//                gamma = 1e-4;
+                cooling = 1.0;
             } else {
                 /* angle between two consecutive gradients */
                 sx = 0.0;
@@ -128,8 +179,54 @@ namespace lib4neuro {
             }
 
             for (i = 0; i < gradient_current->size(); ++i) {
-                (*params_prev)[i] = (*params_current)[i] - gamma * (*gradient_current)[i];
+                (*params_prev)[i] = (*params_current)[i] - cooling * gamma * (*gradient_current)[i];
             }
+            val = ef.eval(params_prev);
+
+//            val = prev_val + 1.0;
+//            coeff = 1;
+//            it_analyzed = false;
+//            while(val >= prev_val){
+//                for (i = 0; i < gradient_current->size(); ++i) {
+//                    (*params_prev)[i] = (*params_current)[i] - coeff * gamma * (*gradient_current)[i];
+//                }
+//                val = ef.eval(params_prev);
+//
+//
+//                if( coeff < 1e-32){
+////                    COUT_DEBUG("Error, the advised gradient direction is not feasible. Attempting to find a feasible direction in one dimension" << std::endl);
+//                    if( !this->perform_feasible_1D_step(ef, prev_val, gamma, gradient_current, params_current, params_prev) ){
+//                        gamma = 1;
+//                        counter_simplified_direction_bad++;
+//                    }
+//                    else{
+//                        gamma = 1;
+//                        counter_simplified_direction_good++;
+//                    }
+//
+//                    break;
+//                }
+//                else{
+//                    if( val >=  prev_val ){
+////                        COUT_DEBUG("Incorrect step size! Reducing gamma. Errors: " << val << ", prev: " << prev_val << std::endl);
+//                        coeff *= 0.5;
+//
+//                        if( !it_analyzed ){
+//                            counter_bad_guesses++;
+//                        }
+//                    }
+//                    else{
+////                        COUT_DEBUG("Step OK" << std::endl);
+//                        if( !it_analyzed ){
+//                            counter_good_guesses++;
+//                        }
+//                    }
+//                }
+//                it_analyzed = true;
+//            }
+//            gamma *= coeff;
+
+
 
             /* switcheroo */
             ptr_mem = gradient_prev;
@@ -140,22 +237,31 @@ namespace lib4neuro {
             params_prev = params_current;
             params_current = ptr_mem;
 
-            val = ef.eval(params_current);
 
             COUT_DEBUG(std::string("Iteration: ") << (unsigned int)(iter_counter)
-                                                  << ". Step size: " << gamma
+                                                  << ". Step size: " << gamma * cooling
                                                   << ". C: " << c
                                                   << ". Gradient norm: " << grad_norm
                                                   << ". Total error: " << val
                                                   << ".\r" );
 
             WRITE_TO_OFS_DEBUG(ofs, "Iteration: " << (unsigned int)(iter_counter)
-                                                  << ". Step size: " << gamma
+                                                  << ". Step size: " << gamma * cooling
                                                   << ". C: " << c
                                                   << ". Gradient norm: " << grad_norm
                                                   << ". Total error: " << val
                                                   << "." << std::endl);
 
+//            if(iter_counter % 100 == 0){
+//                COUT_INFO(std::string("Iteration: ") << (unsigned int)(iter_counter)
+//                                                      << ". Step size: " << gamma
+//                                                      << ". C: " << c
+//                                                      << ". Gradient norm: " << grad_norm
+//                                                      << ". Total error: " << val
+//                                                      << ".\r");
+//            }
+
+            cooling *= 0.9999;
 
         }
         COUT_DEBUG(std::string("Iteration: ") << (unsigned int)(iter_counter)
@@ -164,19 +270,20 @@ namespace lib4neuro {
                                               << ". Gradient norm: " << grad_norm
                                               << ". Total error: " << val
                                               << "." << std::endl);
+        COUT_DEBUG("Number of total steps: " << counter_bad_guesses + counter_good_guesses << ", good: " << counter_good_guesses << ", bad: " << counter_bad_guesses << ", from which " << counter_simplified_direction_good + counter_simplified_direction_bad << " were attempted by simplified direction, success: " << counter_simplified_direction_good << ", fail: " << counter_simplified_direction_bad << std::endl << std::endl );
 
         if(iter_idx == 0) {
-            COUT_INFO("Maximum number of iterations (" << this->maximum_niters << ") was reached!" << std::endl);
+            COUT_INFO(std::endl << "Maximum number of iterations (" << this->maximum_niters << ") was reached! Final error: " << val << std::endl);
 
             if(ofs && ofs->is_open()) {
-                *ofs << "Maximum number of iterations (" << this->maximum_niters << ") was reached!" << std::endl;
+                *ofs << "Maximum number of iterations (" << this->maximum_niters << ") was reached! Final error: " << val << std::endl;
 
             }
 
         } else {
-            COUT_INFO("Gradient Descent method converged after "
-                              << this->maximum_niters-iter_idx
-                              << " iterations."
+            COUT_INFO(std::endl << "Gradient Descent method converged after "
+                              << this->maximum_niters - iter_idx
+                              << " iterations. Final error:" << val
                               << std::endl);
 #ifdef L4N_DEBUG
             if(ofs && ofs->is_open()) {
diff --git a/src/LearningMethods/GradientDescent.h b/src/LearningMethods/GradientDescent.h
index 7936937f..bd75a317 100644
--- a/src/LearningMethods/GradientDescent.h
+++ b/src/LearningMethods/GradientDescent.h
@@ -83,6 +83,24 @@ namespace lib4neuro {
                           double fi,
                           double fim);
 
+        /**
+         * Analyses direction of parameters change and performs the most feasible step in one parameter
+         * @param ef[in] error function to be optimized
+         * @param error_previous[in] evaluation of the error function on the @parameters_before state
+         * @param step_coefficient[in] scalar value denoting the scaling of the step in one direction
+         * @param direction direction[in] vector to be analyzed
+         * @param parameters_before[in] state of the parameter space before analysis
+         * @param parameters_after[out] suggested state of the parameters after the analysis completes
+         */
+        virtual bool perform_feasible_1D_step(
+                lib4neuro::ErrorFunction &ef,
+                double error_previous,
+                double step_coefficient,
+                std::vector<double> * direction,
+                std::vector<double> *parameters_before,
+                std::vector<double> *parameters_after
+                );
+
     public:
 
         /**
diff --git a/src/LearningMethods/GradientDescentBB.cpp b/src/LearningMethods/GradientDescentBB.cpp
new file mode 100644
index 00000000..3f86452e
--- /dev/null
+++ b/src/LearningMethods/GradientDescentBB.cpp
@@ -0,0 +1,229 @@
+/**
+ * DESCRIPTION OF THE FILE
+ *
+ * @author Michal KravÄŤenko
+ * @date 4.2.19 -
+ */
+
+#include "GradientDescentBB.h"
+#include "message.h"
+
+namespace lib4neuro {
+    GradientDescentBB::GradientDescentBB(double epsilon, size_t n_to_restart, int max_iters, size_t batch) {
+        this->tolerance = epsilon;
+        this->restart_frequency = n_to_restart;
+        this->optimal_parameters = new std::vector<double>(0);
+        this->maximum_niters = max_iters;
+        this->batch = batch;
+    }
+
+    GradientDescentBB::~GradientDescentBB() {
+        if (this->optimal_parameters) {
+            delete this->optimal_parameters;
+            this->optimal_parameters = nullptr;
+        }
+    }
+
+
+    void GradientDescentBB::optimize(lib4neuro::ErrorFunction &ef, std::ofstream* ofs) {
+
+        /* Copy data set max and min values, if it's normalized */
+        if(ef.get_dataset()->is_normalized()) {
+            ef.get_network_instance()->set_normalization_strategy_instance(
+                    ef.get_dataset()->get_normalization_strategy());
+        }
+
+        COUT_INFO("Finding a solution via a Gradient Descent method with adaptive step-length..." << std::endl);
+        COUT_INFO("Initial error: " << ef.eval() << std::endl);
+
+        if(ofs && ofs->is_open()) {
+            *ofs << "Finding a solution via a Gradient Descent method with adaptive step-length..." << std::endl;
+            *ofs << "Initial error: " << ef.eval() << std::endl;
+        }
+
+        double grad_norm = this->tolerance * 10.0, gamma, sx, beta;
+        double grad_norm_prev;
+        size_t i;
+        long long int iter_idx = this->maximum_niters;
+        size_t iter_counter = 0;
+
+        gamma = 1.0;
+        double prev_val, val = 0.0, c = 1.25, val_best;
+
+        size_t n_parameters = ef.get_dimension();
+
+
+        std::vector<double> *gradient_current = new std::vector<double>(n_parameters);
+        std::vector<double> *gradient_prev = new std::vector<double>(n_parameters);
+        std::vector<double> *params_current = ef.get_parameters();
+        std::vector<double> *params_prev = new std::vector<double>(n_parameters);
+        std::vector<double> *params_best = new std::vector<double>(*params_current);
+
+        std::vector<double> *ptr_mem;
+
+        double alpha = -1.0, cc, gg;
+        std::vector<double> dot__( 3 );
+        double d1 = 0.0, d2 = 0.0, d3 = 0.0;
+
+
+        std::fill(gradient_current->begin(), gradient_current->end(), 0.0);
+        std::fill(gradient_prev->begin(), gradient_prev->end(), 0.0);
+        val = ef.eval(params_current);
+        val_best = val;
+
+//        this-> batch = 0;
+        double cooling_factor = 1.0;
+        while (grad_norm > this->tolerance && (iter_idx != 0)) {
+            iter_idx--;
+            iter_counter++;
+            prev_val = val;
+            grad_norm_prev = grad_norm;
+
+            /* reset of the current gradient */
+            std::fill(gradient_current->begin(), gradient_current->end(), 0.0);
+//        std::fill(gradient_mem.begin(), gradient_mem.end(), 0.0);
+            ef.calculate_error_gradient(*params_current, *gradient_current, 1.0, this->batch);
+//        double error_analytical = this->calculate_gradient( ef.get_dataset()->get_data(), (size_t)2, params_current, gradient_current );
+
+//        for(size_t k = 0; k < gradient_mem.size(); ++k){
+//            printf("%f : %f\n", gradient_mem[ k ], gradient_current->at( k ));
+//        }
+//        printf("---------------------\n");
+
+            grad_norm = 0.0;
+            for (auto v: *gradient_current) {
+                grad_norm += v * v;
+                //COUT_DEBUG( grad_norm << std::endl );
+            }
+            grad_norm = std::sqrt(grad_norm);
+
+            /* Update of the parameters */
+            /* step length calculation */
+            if (iter_counter < 10 || iter_counter % this->restart_frequency < 10 ) {
+                /* fixed step length */
+                gamma = 0.1 * this->tolerance;
+//                gamma = 1 / grad_norm;
+//                gamma = 1e-4;
+                cooling_factor = 1.0;
+            } else {
+
+                std::fill( dot__.begin( ), dot__.end( ), 0.0 );
+                d1 = d2 = d3 = 0.0;
+
+                for ( int d = 0; d < gradient_current->size(); d++ ) {
+                    cc = params_current->at( d ) - params_prev->at( d );
+                    gg = gradient_current->at( d ) - gradient_prev->at( d );
+
+                    d1 += cc * cc;
+                    d2 += cc * gg;
+                    d3 += gg * gg;
+                }
+
+                dot__[0] = d1;
+                dot__[1] = d2;
+                dot__[2] = d3;
+
+                gamma = 1;
+                if ( fabs( dot__[1] ) > 0.0 ) {
+                    gamma = 0.25*( dot__[0] / dot__[1] );
+                }
+            }
+
+            for (i = 0; i < gradient_current->size(); ++i) {
+                (*params_prev)[i] = (*params_current)[i] - cooling_factor * gamma * (*gradient_current)[i];
+            }
+
+
+            /* switcheroo */
+            ptr_mem = gradient_prev;
+            gradient_prev = gradient_current;
+            gradient_current = ptr_mem;
+
+            ptr_mem = params_prev;
+            params_prev = params_current;
+            params_current = ptr_mem;
+
+            val = ef.eval(params_current);
+            if( val < val_best ){
+                val_best = val;
+
+                for(i = 0; i < gradient_current->size(); ++i){
+                    params_best->at( i ) = params_current->at( i );
+                }
+            }
+
+            COUT_DEBUG(std::string("Iteration: ") << (unsigned int)(iter_counter)
+                                                  << ". Step size: " << gamma*cooling_factor
+                                                  << ". C: " << c
+                                                  << ". Gradient norm: " << grad_norm
+                                                  << ". Total error: " << val << ". the lowest error: " << val_best
+                                                  << ".\r" );
+
+            WRITE_TO_OFS_DEBUG(ofs, "Iteration: " << (unsigned int)(iter_counter)
+                                                  << ". Step size: " << gamma*cooling_factor
+                                                  << ". C: " << c
+                                                  << ". Gradient norm: " << grad_norm
+                                                  << ". Total error: " << val << ". the lowest error: " << val_best
+                                                  << "." << std::endl);
+
+//            if(iter_counter % 100 == 0){
+//                COUT_INFO(std::string("Iteration: ") << (unsigned int)(iter_counter)
+//                                                      << ". Step size: " << gamma
+//                                                      << ". C: " << c
+//                                                      << ". Gradient norm: " << grad_norm
+//                                                      << ". Total error: " << val
+//                                                      << ".\r");
+//            }
+
+            cooling_factor *= 0.99999;
+
+        }
+        COUT_DEBUG(std::string("Iteration: ") << (unsigned int)(iter_counter)
+                                              << ". Step size: " << gamma*cooling_factor
+                                              << ". C: " << c
+                                              << ". Gradient norm: " << grad_norm
+                                              << ". Total error: " << val
+                                              << "." << std::endl);
+
+
+        if(iter_idx == 0) {
+            COUT_INFO(std::endl << "Maximum number of iterations (" << this->maximum_niters << ") was reached! Final error: " << val_best << std::endl);
+
+            if(ofs && ofs->is_open()) {
+                *ofs << "Maximum number of iterations (" << this->maximum_niters << ") was reached! Final error: " << val_best << std::endl;
+
+            }
+
+        } else {
+            COUT_INFO(std::endl << "Gradient Descent method converged after "
+                                << this->maximum_niters - iter_idx
+                                << " iterations. Final error:" << val_best
+                                << std::endl);
+#ifdef L4N_DEBUG
+            if(ofs && ofs->is_open()) {
+                *ofs << "Gradient Descent method converged after "
+                     << this->maximum_niters-iter_idx
+                     << " iterations."
+                     << std::endl;
+            }
+#endif
+        }
+
+        *this->optimal_parameters = *params_best;
+
+
+//        ef.analyze_error_gradient(*params_current, *gradient_current, 1.0, this->batch);
+        ef.get_network_instance()->copy_parameter_space(this->optimal_parameters);
+
+        delete gradient_current;
+        delete gradient_prev;
+        delete params_current;
+        delete params_prev;
+        delete params_best;
+    }
+
+    std::vector<double> *GradientDescentBB::get_parameters() {
+        return this->optimal_parameters;
+    }
+
+}
diff --git a/src/LearningMethods/GradientDescentBB.h b/src/LearningMethods/GradientDescentBB.h
new file mode 100644
index 00000000..b93b469f
--- /dev/null
+++ b/src/LearningMethods/GradientDescentBB.h
@@ -0,0 +1,91 @@
+/**
+ * DESCRIPTION OF THE FILE
+ *
+ * @author Michal KravÄŤenko
+ * @date 4.2.19 -
+ */
+
+#ifndef LIB4NEURO_GRADIENTDESCENTBB_H
+#define LIB4NEURO_GRADIENTDESCENTBB_H
+
+
+#include "../settings.h"
+#include "../constants.h"
+#include "ILearningMethods.h"
+#include "../ErrorFunction/ErrorFunctions.h"
+
+namespace lib4neuro {
+    /**
+     *
+     */
+    class GradientDescentBB : public ILearningMethods {
+
+    private:
+
+        /**
+         * Threshold for the successful ending of the optimization - deviation from minima
+         */
+        double tolerance;
+
+        /**
+         *
+         */
+        double max_error;
+
+        /**
+         * Number of iterations to reset step size to tolerance/10.0
+         */
+        size_t restart_frequency;
+
+        /**
+         *
+         */
+        size_t batch;
+
+        /**
+         *
+         */
+        size_t iter_max;
+
+        /**
+         * Maximal number of iterations - optimization will stop after that, even if not converged
+         */
+        long long int maximum_niters;
+
+        /**
+         * Vector of minima coordinates
+         */
+        std::vector<double> *optimal_parameters;
+
+    public:
+
+        /**
+         * Creates an instance of Gradient Descent Optimizer (i.e. back-propagation)
+         * @param epsilon Threshold for the successful ending of the optimization - deviation from minima
+         * @param n_to_restart Number of iterations to reset step size to tolerance/10.0
+         * @param max_iters Maximal number of iterations - optimization will stop after that, even if not converged
+         */
+        LIB4NEURO_API explicit GradientDescentBB(double epsilon = 1e-3, size_t n_to_restart = 100, int max_iters = 1000, size_t batch = 0);
+
+        /**
+         * Deallocates the instance
+         */
+        LIB4NEURO_API ~GradientDescentBB();
+
+        /**
+         *
+         * @param ef
+         */
+        LIB4NEURO_API void optimize(lib4neuro::ErrorFunction &ef, std::ofstream* ofs = nullptr) override;
+
+        /**
+         *
+         * @return
+         */
+        LIB4NEURO_API std::vector<double> *get_parameters() override;
+    };
+
+}
+
+
+#endif //LIB4NEURO_GRADIENTDESCENTBB_H
diff --git a/src/LearningMethods/GradientDescentSingleItem.cpp b/src/LearningMethods/GradientDescentSingleItem.cpp
new file mode 100644
index 00000000..504ac99c
--- /dev/null
+++ b/src/LearningMethods/GradientDescentSingleItem.cpp
@@ -0,0 +1,106 @@
+/**
+ * DESCRIPTION OF THE FILE
+ *
+ * @author Michal KravÄŤenko
+ * @date 19.2.19 -
+ */
+
+#include "GradientDescentSingleItem.h"
+
+#include <random.hpp>
+#include "message.h"
+
+namespace lib4neuro {
+    GradientDescentSingleItem::GradientDescentSingleItem(double epsilon, size_t n_to_restart, int max_iters, size_t batch) {
+        this->tolerance = epsilon;
+        this->restart_frequency = n_to_restart;
+        this->optimal_parameters = new std::vector<double>(0);
+        this->maximum_niters = max_iters;
+        this->batch = batch;
+    }
+
+    GradientDescentSingleItem::~GradientDescentSingleItem() {
+        if (this->optimal_parameters) {
+            delete this->optimal_parameters;
+            this->optimal_parameters = nullptr;
+        }
+    }
+
+
+    double GradientDescentSingleItem::get_optimal_step_size(lib4neuro::ErrorFunction &f, std::vector<double> &x,
+                                                   std::vector<double> &d, size_t n_elems) {
+
+        double alpha = 10.0 / n_elems;
+        alpha = 1.0;
+        double value = f.eval();
+        double value_shifted = value + 1.0;
+
+
+        std::vector<double> shifted_x(x);
+        while( value_shifted > value ){
+            alpha *= 0.5;
+
+            for( size_t i = 0; i < x.size(); ++i ){
+                shifted_x[ i ] = x [ i ] - alpha * d[ i ];
+            }
+
+            value_shifted = f.eval( &shifted_x );
+        }
+//        std::cout << "Error reduction: " << value - value_shifted << std::endl;
+        return alpha;
+    }
+
+
+    void GradientDescentSingleItem::optimize(lib4neuro::ErrorFunction &ef, std::ofstream* ofs) {
+
+        COUT_INFO("Finding a solution via a Gradient Descent [Single Item] method with adaptive step-length..." << std::endl);
+        COUT_INFO("Initial error: " << ef.eval() << std::endl);
+
+        size_t total_elements = ef.get_dataset()->get_n_elements(), updated_elements = 0, iter = 0;
+        double max_error = 1.0, error, gamma;
+        size_t iter_idx = this->maximum_niters;
+        size_t  dim = ef.get_network_instance()->get_n_biases() + ef.get_network_instance()->get_n_weights();
+
+        std::vector<double> parameter_vector = *ef.get_parameters();
+        std::vector<double> gradient_vector(dim);
+        std::vector<double> search_direction(dim);
+        std::vector<double> error_vector(ef.get_network_instance()->get_n_outputs());
+        while( max_error >= this->tolerance && iter_idx >= 1 ){
+            iter_idx--;
+            iter++;
+
+            max_error = 0.0;
+            updated_elements = 0;
+            std::fill(search_direction.begin(), search_direction.end(), 0);
+            for( size_t i = 0; i < ef.get_dataset()->get_n_elements(); ++i){
+                error = ef.eval_single_item_by_idx( i, &parameter_vector, error_vector );
+
+                if( error > max_error ){
+                    max_error = error;
+                }
+
+                if( error > this->tolerance ){
+                    updated_elements++;
+                    ef.calculate_error_gradient_single(error_vector, gradient_vector);
+
+                    for(size_t j = 0; j < dim; ++j ){
+                        search_direction[ j ] += gradient_vector[ j ];
+                    }
+                }
+            }
+            gamma = this->get_optimal_step_size(ef, parameter_vector, search_direction, updated_elements);
+
+            for( size_t j = 0; j < dim; ++j ){
+                parameter_vector[ j ] -= gamma * search_direction[ j ];
+            }
+
+            COUT_DEBUG("Iteration: " << iter << ", Total elements in train set: " << total_elements << ", # of elements with high error: " << updated_elements << ", max. error: " << max_error << "\r");
+        }
+        COUT_DEBUG("Iteration: " << iter << ", Total elements in train set: " << total_elements << ", # of elements with high error: " << updated_elements << ", max. error: " << max_error << std::endl);
+    }
+
+    std::vector<double> *GradientDescentSingleItem::get_parameters() {
+        return this->optimal_parameters;
+    }
+
+}
diff --git a/src/LearningMethods/GradientDescentSingleItem.h b/src/LearningMethods/GradientDescentSingleItem.h
new file mode 100644
index 00000000..73ebf17b
--- /dev/null
+++ b/src/LearningMethods/GradientDescentSingleItem.h
@@ -0,0 +1,106 @@
+/**
+ * DESCRIPTION OF THE FILE
+ *
+ * @author Michal KravÄŤenko
+ * @date 19.2.19 -
+ */
+
+#ifndef LIB4NEURO_GRADIENTDESCENTSINGLEITEM_H
+#define LIB4NEURO_GRADIENTDESCENTSINGLEITEM_H
+
+
+#include "../settings.h"
+#include "../constants.h"
+#include "ILearningMethods.h"
+#include "../ErrorFunction/ErrorFunctions.h"
+#include "GradientDescentBB.h"
+
+namespace lib4neuro {
+    /**
+     *
+     */
+    class GradientDescentSingleItem : public ILearningMethods {
+
+    private:
+
+        /**
+         * Threshold for the successful ending of the optimization - deviation from minima
+         */
+        double tolerance;
+
+        /**
+         *
+         */
+        double max_error;
+
+        /**
+         * Number of iterations to reset step size to tolerance/10.0
+         */
+        size_t restart_frequency;
+
+        /**
+         *
+         */
+        size_t batch;
+
+        /**
+         *
+         */
+        size_t iter_max;
+
+        /**
+         * Maximal number of iterations - optimization will stop after that, even if not converged
+         */
+        long long int maximum_niters;
+
+        /**
+         * Vector of minima coordinates
+         */
+        std::vector<double> *optimal_parameters;
+
+
+    protected:
+
+        /**
+         * Finds the optimal value of step-length in direction @d from position @x of function @f
+         * @param f
+         * @param x
+         * @param d
+         * @param n_elems
+         * @return
+         */
+        virtual double get_optimal_step_size(lib4neuro::ErrorFunction &f, std::vector<double> &x, std::vector<double> &d, size_t n_elems);
+
+
+    public:
+
+        /**
+         * Creates an instance of Gradient Descent Optimizer (i.e. back-propagation)
+         * @param epsilon Threshold for the successful ending of the optimization - deviation from minima
+         * @param n_to_restart Number of iterations to reset step size to tolerance/10.0
+         * @param max_iters Maximal number of iterations - optimization will stop after that, even if not converged
+         */
+        LIB4NEURO_API explicit GradientDescentSingleItem(double epsilon = 1e-3, size_t n_to_restart = 100, int max_iters = 1000, size_t batch = 0);
+
+        /**
+         * Deallocates the instance
+         */
+        LIB4NEURO_API ~GradientDescentSingleItem();
+
+        /**
+         *
+         * @param ef
+         */
+        LIB4NEURO_API void optimize(lib4neuro::ErrorFunction &ef, std::ofstream* ofs = nullptr) override;
+
+        /**
+         *
+         * @return
+         */
+        LIB4NEURO_API std::vector<double> *get_parameters() override;
+    };
+
+}
+
+
+#endif //LIB4NEURO_GRADIENTDESCENTSINGLEITEM_H
diff --git a/src/LearningMethods/ILearningMethods.cpp b/src/LearningMethods/ILearningMethods.cpp
index d0bb4b1c..6aa47daf 100644
--- a/src/LearningMethods/ILearningMethods.cpp
+++ b/src/LearningMethods/ILearningMethods.cpp
@@ -5,4 +5,5 @@
  * @date 10.9.18 -
  */
 
-#include "ILearningMethods.h"
\ No newline at end of file
+#include "ILearningMethods.h"
+
diff --git a/src/LearningMethods/ILearningMethods.h b/src/LearningMethods/ILearningMethods.h
index d0009c9b..80c1a939 100644
--- a/src/LearningMethods/ILearningMethods.h
+++ b/src/LearningMethods/ILearningMethods.h
@@ -27,13 +27,15 @@ private:
      */
     lib4neuro::ErrorFunction *ef = nullptr;
 
+
+
 public:
     /**
      * Runs the method specific learning algorithm minimizing the given error function
      */
     virtual void optimize( lib4neuro::ErrorFunction &ef, std::ofstream* ofs = nullptr ) = 0;
 
-    /**
+     /**
      * Updates the optimal weight&bias settings in the passed vector
      */
     virtual std::vector<double>* get_parameters( ) = 0;
diff --git a/src/LearningMethods/LearningSequence.cpp b/src/LearningMethods/LearningSequence.cpp
new file mode 100644
index 00000000..c0b786ea
--- /dev/null
+++ b/src/LearningMethods/LearningSequence.cpp
@@ -0,0 +1,61 @@
+/**
+ * DESCRIPTION OF THE FILE
+ *
+ * @author Michal KravÄŤenko
+ * @date 19.2.19 -
+ */
+
+#include "LearningSequence.h"
+
+namespace lib4neuro {
+
+    LearningSequence::LearningSequence( double tolerance, int max_n_cycles ){
+        this->tol = tolerance;
+        this->max_number_of_cycles = max_n_cycles;
+        this->best_parameters = new std::vector<double>();
+    }
+
+    LearningSequence::~LearningSequence() {
+
+
+    }
+
+    std::vector<double>* LearningSequence::get_parameters() {
+        if( this->learning_sequence.size() > 0 ){
+            return this->learning_sequence[0]->get_parameters( );
+        }
+        return nullptr;
+    }
+
+    void LearningSequence::add_learning_method(ILearningMethods *method) {
+        this->learning_sequence.push_back( method );
+    }
+
+    void LearningSequence::optimize(lib4neuro::ErrorFunction &ef, std::ofstream *ofs) {
+
+        double error = ef.eval();
+        double the_best_error = error;
+        int mcycles = this->max_number_of_cycles, cycle_idx = 0;
+
+        while( error > this->tol && mcycles != 0){
+            mcycles--;
+            cycle_idx++;
+
+            for( auto m: this->learning_sequence ){
+                m->optimize( ef, ofs );
+                error = ef.eval();
+
+                if( error < the_best_error ){
+                    the_best_error = error;
+                    *this->best_parameters = *ef.get_parameters();
+                }
+
+                if( error <= this->tol ){
+                    ef.get_network_instance()->copy_parameter_space( this->best_parameters );
+                    return;
+                }
+            }
+            COUT_DEBUG( "Cycle: " << cycle_idx << ", the lowest error: " << the_best_error << std::endl );
+        }
+    }
+}
\ No newline at end of file
diff --git a/src/LearningMethods/LearningSequence.h b/src/LearningMethods/LearningSequence.h
new file mode 100644
index 00000000..ef38950b
--- /dev/null
+++ b/src/LearningMethods/LearningSequence.h
@@ -0,0 +1,80 @@
+/**
+ * DESCRIPTION OF THE FILE
+ *
+ * @author Michal KravÄŤenko
+ * @date 19.2.19 -
+ */
+
+#ifndef LIB4NEURO_LEARNINGSEQUENCE_H
+#define LIB4NEURO_LEARNINGSEQUENCE_H
+
+#include <4neuro.h>
+#include "../settings.h"
+#include "../constants.h"
+#include "ILearningMethods.h"
+
+namespace lib4neuro {
+    /**
+     *
+     */
+    class LearningSequence : public ILearningMethods {
+
+    private:
+
+        /**
+         *
+         */
+        std::vector<ILearningMethods*> learning_sequence;
+
+        /**
+         *
+         */
+        double tol;
+
+        /**
+         *
+         */
+        std::vector<double> *best_parameters = nullptr;
+
+        /**
+         *
+         */
+        int max_number_of_cycles = -1;
+
+
+    public:
+
+        /**
+         *
+         */
+        LIB4NEURO_API explicit LearningSequence( double tolerance = 1e-6, int max_n_cycles = -1);
+
+        /**
+         * Deallocates the instance
+         */
+        LIB4NEURO_API ~LearningSequence();
+
+        /**
+         *
+         * @param ef
+         * @param ofs
+         */
+        LIB4NEURO_API void optimize(lib4neuro::ErrorFunction &ef, std::ofstream* ofs = nullptr) override;
+
+        /**
+         *
+         * @return
+         */
+        LIB4NEURO_API std::vector<double> *get_parameters() override;
+
+        /**
+         *
+         * @param method
+         */
+        LIB4NEURO_API void add_learning_method( ILearningMethods * method );
+    };
+
+}
+
+
+#endif //LIB4NEURO_LEARNINGSEQUENCE_H
diff --git a/src/LearningMethods/ParticleSwarm.cpp b/src/LearningMethods/ParticleSwarm.cpp
index a7698056..e8c3154d 100644
--- a/src/LearningMethods/ParticleSwarm.cpp
+++ b/src/LearningMethods/ParticleSwarm.cpp
@@ -307,7 +307,7 @@ namespace lib4neuro {
 //    for(unsigned int i = 0; i < this->n_particles; ++i){
 //        this->particle_swarm[i]->print_coordinate();
 //    }
-        printf("Initial best value: %10.8f\n", optimal_value);
+        COUT_INFO("Initial best value: " << optimal_value << std::endl);
 
         while (outer_it < this->iter_max) {
             max_velocity = 0;
@@ -402,12 +402,10 @@ namespace lib4neuro {
         this->determine_optimal_coordinate_and_value(*this->p_min_glob, optimal_value);
         if (outer_it < this->iter_max) {
             /* Convergence reached */
-            printf("\nFound optimum in %d iterations. Objective function value: %10.8f\n", (int) outer_it,
-                   optimal_value);
+            COUT_INFO( std::endl << "Found optimum in "  <<  outer_it << " iterations. Objective function value: " << optimal_value << std::endl);
         } else {
             /* Maximal number of iterations reached */
-            printf("\nMax number of iterations reached (%d)!  Objective function value: %10.8f\n", (int) outer_it,
-                   optimal_value);
+            COUT_INFO( std::endl << "Max number of iterations reached ("  <<  outer_it << ")!  Objective function value: " << optimal_value <<std:: endl);
         }
 //    for (size_t i = 0; i <= this->func_dim - 1; ++i) {
 //        printf("%10.8f \n", (*this->p_min_glob)[i]);
-- 
GitLab