LevenbergMarquardt.cpp

#include <armadillo>

#include "LevenbergMarquardt.h"
#include "../message.h"

struct lib4neuro::LevenbergMarquardt::LevenbergMarquardtImpl {
    /**
 * Threshold for the successful ending of the optimization - deviation from minima
 */
    double tolerance;

    double tolerance_gradient;

    double tolerance_parameters;

    double LM_step_acceptance_threshold;

    double lambda_initial;

    double lambda_increase;

    double lambda_decrease;

    /**
     * Maximal number of iterations - optimization will stop after that, even if not converged
     */
    size_t maximum_niters;

    size_t batch_size;

    /**
     * Returns Jacobian matrix of the residual function using the backpropagation algorithm
     * Returns the right hand side of the resulting system of equations related to data errors in @data and approximating function @f
     * @param f
     * @param J
     * @param rhs
     * @param data
     */
    void get_jacobian_and_rhs(lib4neuro::ErrorFunction& ef,
                              arma::Mat<double>& J,
                              arma::Col<double>& rhs,
                              size_t data_subset_size);
};

void lib4neuro::LevenbergMarquardt::LevenbergMarquardtImpl::get_jacobian_and_rhs(
    lib4neuro::ErrorFunction& ef,
    arma::Mat<double>& J,
    arma::Col<double>& rhs,
    size_t data_subset_size) {


    std::vector<std::vector<double>> jacobian;
    std::vector<double>              rhs_vec;

    if (data_subset_size <= 0) {
        data_subset_size = ef.get_n_data_set();
    }

    if (data_subset_size < ef.get_n_data_set()) {
        ef.divide_data_train_test((double) data_subset_size / (double) ef.get_n_data_set());
    }
    ef.get_jacobian_and_rhs(jacobian,
                            rhs_vec);

    if (data_subset_size < ef.get_n_data_set()) {
        ef.return_full_data_set_for_training();
    }

    size_t dim_out      = jacobian.size();
    size_t n_parameters = rhs_vec.size();

    J.reshape(dim_out,
              n_parameters);
    rhs.resize(n_parameters);
    J.fill(0.0);
    rhs.fill(0.0);

    for (size_t ri = 0; ri < jacobian.size(); ++ri) {
        for (size_t ci = 0; ci < n_parameters; ++ci) {
            J.at(ri,
                 ci) = jacobian[ri][ci];
        }
    }
    for (size_t ci = 0; ci < n_parameters; ++ci) {
        rhs.at(ci) = rhs_vec.at(ci);
    }
}

namespace lib4neuro {
    LevenbergMarquardt::LevenbergMarquardt(size_t max_iters,
                                           size_t bs,
                                           double tolerance,
                                           double tolerance_gradient,
                                           double tolerance_parameters,
                                           double LM_step_acceptance_threshold,
                                           double lambda_initial,
                                           double lambda_increase,
                                           double lambda_decrease) : p_impl(new LevenbergMarquardtImpl()) {

        this->p_impl->batch_size                   = bs;
        this->p_impl->tolerance                    = tolerance;
        this->p_impl->tolerance_gradient           = tolerance_gradient;
        this->p_impl->tolerance_parameters         = tolerance_parameters;
        this->p_impl->LM_step_acceptance_threshold = LM_step_acceptance_threshold;
        this->p_impl->lambda_initial               = lambda_initial;
        this->p_impl->lambda_increase              = lambda_increase;
        this->p_impl->lambda_decrease              = lambda_decrease;
        this->p_impl->maximum_niters               = max_iters;
    }

    void LevenbergMarquardt::optimize(lib4neuro::ErrorFunction& ef,
                                      std::ofstream* ofs) {
        optimize(ef,
                 LM_UPDATE_TYPE::MARQUARDT,
                 ofs);
    }

    void LevenbergMarquardt::optimize(lib4neuro::ErrorFunction& ef,
                                      lib4neuro::LM_UPDATE_TYPE update_type,
                                      std::ofstream* ofs) {
        double current_err = ef.eval();

        COUT_INFO(
            "Finding a solution via a Levenberg-Marquardt method... Starting error: " << current_err << std::endl);
        if (ofs && ofs->is_open()) {
            *ofs << "Finding a solution via a Levenberg-Marquardt method... Starting error: " << current_err
                 << std::endl;
        }

        size_t n_parameters  = ef.get_dimension();
        size_t n_data_points = ef.get_n_data_set();
        if (this->p_impl->batch_size > 0) {
            n_data_points = this->p_impl->batch_size;
        }
        std::vector<double>* params_current = new std::vector<double>(ef.get_parameters());

        std::shared_ptr<std::vector<double>> params_tmp;
        params_tmp.reset(new std::vector<double>(n_parameters));
        arma::Mat<double> J(n_data_points,
                            n_parameters);  // Jacobian matrix
        arma::Mat<double> H(n_data_points,
                            n_parameters);  // Hessian matrix
        arma::Mat<double> H_new(n_data_points,
                                n_parameters);

        double lambda   = this->p_impl->lambda_initial;  // Dumping parameter
        double prev_err = 0, update_norm = 0, gradient_norm = 0, mem_double = 0, jacobian_norm = 1;


        bool                update_J = true;
        arma::Col<double>   update;
        arma::Col<double>   rhs;
        std::vector<double> d_prep(n_data_points);
        arma::Col<double>   d;

        double slowdown_coeff = 0.25;
        //-------------------//
        // Solver iterations //
        //-------------------//
        size_t iter_counter   = 0;
        do {

            if (update_J) {
                /* Get Jacobian matrix */
                this->p_impl->get_jacobian_and_rhs(ef,
                                                   J,
                                                   rhs,
                                                   this->p_impl->batch_size);


                gradient_norm = 0;

                // TODO parallelize with OpenMP?
                for (size_t ci = 0; ci < n_parameters; ++ci) {
                    mem_double = rhs[ci];
                    mem_double *= mem_double;
                    gradient_norm += mem_double;
                }
                gradient_norm  = std::sqrt(gradient_norm) / J.n_rows;

                /* Get approximation of Hessian (H ~ J'*J) */
                H = J.t() * J;

                jacobian_norm = 0;
                for (size_t ri = 0; ri < n_parameters; ++ri) {
                    for (size_t ci = 0; ci < n_parameters; ++ci) {
                        jacobian_norm += H.at(ri,
                                              ci) * H.at(ri,
                                                         ci);
                    }
                }
                jacobian_norm  = std::sqrt(jacobian_norm);

                /* Evaluate the error before updating parameters */
                prev_err = ef.eval();
            }

            /* H_new = H + lambda*I */
            H_new = H + lambda * arma::eye(n_parameters,
                                           n_parameters);


            /* Compute the update vector */
            update = arma::solve(H_new,
                                 rhs);

            /* Compute the error after update of parameters */
            update_norm = 0.0;
            for (size_t i = 0; i < n_parameters; i++) {
                params_tmp->at(i) = params_current->at(i) + update.at(i);
                update_norm += update.at(i) * update.at(i);
            }
            update_norm   = std::sqrt(update_norm);
            current_err   = ef.eval(params_tmp.get());

            /* Check, if the parameter update improved the function */
            if (current_err < prev_err) {

                /* If the convergence threshold is achieved, finish the computation */
                if (current_err < this->p_impl->tolerance) {
                    break;
                }

                /* If the error is lowered after parameter update, accept the new parameters and lower the damping
                 * factor lambda */

                //TODO rewrite without division!
                lambda /= this->p_impl->lambda_decrease;

                for (size_t i = 0; i < n_parameters; i++) {
                    params_current->at(i) = params_tmp->at(i);
                }

                prev_err = current_err;
                update_J = true;


            } else {
                /* If the error after parameters update is not lower, increase the damping factor lambda */
                update_J = false;
                lambda *= this->p_impl->lambda_increase;
            }
            COUT_DEBUG("Iteration: " << iter_counter << " Current error: " << current_err << ", Current gradient norm: "
                                     << gradient_norm << ", Direction norm: " << update_norm << "\r");

            if (ofs && ofs->is_open()) {
                *ofs << "Iteration: " << iter_counter << " Current error: " << current_err << ", Current gradient norm: "
                     << gradient_norm << ", Direction norm: " << update_norm << std::endl;
            }
        } while (iter_counter++ < this->p_impl->maximum_niters && (update_norm > this->p_impl->tolerance));
        COUT_DEBUG("Iteration: " << iter_counter << " Current error: " << current_err << ", Current gradient norm: "
                                 << gradient_norm << ", Direction norm: " << update_norm << std::endl);
        if (ofs && ofs->is_open()) {
            *ofs << "Iteration: " << iter_counter << " Current error: " << current_err << ", Current gradient norm: "
                 << gradient_norm << ", Direction norm: " << update_norm << std::endl;
        }

        /* Store the optimized parameters */
        this->optimal_parameters = *params_current;

        /* Dealloc vector of parameters */
        if (params_current) {
            delete params_current;
            params_current = nullptr;
        }

        ef.set_parameters(this->optimal_parameters);

		COUT_INFO("Finished in " << iter_counter << " iterations with error: " << current_err << " and gradient norm: " << gradient_norm << std::endl);

    }

    LevenbergMarquardt::~LevenbergMarquardt() = default;
}