Something went wrong on our end
-
kra568 authored
[ENH] fixed compilation, utilization of Salomon modules, inclusion of PETSc framework in the parallel version of levenberg-marquardt algorithm, minor fixes of the acsf example and run scripts
kra568 authored[ENH] fixed compilation, utilization of Salomon modules, inclusion of PETSc framework in the parallel version of levenberg-marquardt algorithm, minor fixes of the acsf example and run scripts
LevenbergMarquardt.cpp 18.53 KiB
#define ARMA_ALLOW_FAKE_GCC
#include <armadillo>
#include <mpi.h>
#ifdef WITH_PETSC
#include <petscmat.h>
#include <petscvec.h>
#include <petscsys.h>
#include <petscerror.h>
#include <petscviewer.h>
#include <petscksp.h>
#endif
#include "../mpi_wrapper.h"
#include "LevenbergMarquardt.h"
#include "../message.h"
struct lib4neuro::LevenbergMarquardt::LevenbergMarquardtImpl {
/**
* Threshold for the successful ending of the optimization - deviation from minima
*/
double tolerance;
double tolerance_gradient;
double tolerance_parameters;
double LM_step_acceptance_threshold;
double lambda_initial;
double lambda_increase;
double lambda_decrease;
/**
* Maximal number of iterations - optimization will stop after that, even if not converged
*/
size_t maximum_niters;
size_t batch_size;
/**
* Returns Jacobian matrix of the residual function using the backpropagation algorithm
* Returns the right hand side of the resulting system of equations related to data errors in @data and approximating function @f
* @param f
* @param J
* @param rhs
* @param data
*/
void get_jacobian_and_rhs(lib4neuro::ErrorFunction& ef,
arma::Mat<double>& H,
arma::Col<double>& rhs,
size_t data_subset_size);
size_t solve_system_CG(
const arma::Mat<double>& A,
const arma::Col<double>& rhs,
arma::Col<double>& sol
);
};
//*****************************************************************
// Iterative template routine -- GMRES
//
// GMRES solves the unsymmetric linear system Ax = b using the
// Generalized Minimum Residual method
//
// GMRES follows the algorithm described on p. 20 of the
// SIAM Templates book.
//
// The return value indicates convergence within max_iter (input)
// iterations (0), or no convergence within max_iter iterations (1).
//
// Upon successful return, output arguments have the following values:
//
// x -- approximate solution to Ax = b
// max_iter -- the number of iterations performed before the
// tolerance was reached
// tol -- the residual after the final iteration
//
//*****************************************************************
size_t lib4neuro::LevenbergMarquardt::LevenbergMarquardtImpl::solve_system_CG(
const arma::Mat<double>& A,
const arma::Col<double>& rhs,
arma::Col<double>& sol
){
const double zero = 1e-32;
const double prec = 1e-6;
const auto n = rhs.n_elem;
size_t iter = 1;
if( sol.n_elem != n ){
sol.resize( n );
}
arma::Col<double> r( n );
r = rhs - A * sol;
double norm_r = arma::dot(r, r);
double norm_r_prev;
if ( norm_r < zero ) {
return 0;
}
arma::Col<double> p( n );
p = r;
arma::Col<double> Ap( n );
double alpha, beta;
while( true ){
++iter;
norm_r_prev = norm_r;
Ap = A * p;
alpha = norm_r_prev / arma::dot(p, Ap);
sol += alpha * p;
r -= alpha * Ap;
norm_r = arma::dot(r, r);
if ( norm_r < zero || iter >= n) {
break;
}
beta = norm_r / norm_r_prev;
p = r + beta * p;
}
return iter;
}
void lib4neuro::LevenbergMarquardt::LevenbergMarquardtImpl::get_jacobian_and_rhs(
lib4neuro::ErrorFunction& ef,
arma::Mat<double>& H,
arma::Col<double>& rhs,
size_t data_subset_size) {
if (data_subset_size <= 0) {
data_subset_size = ef.get_n_data_set();
}
if (data_subset_size < ef.get_n_data_set()) {
ef.divide_data_train_test((double) data_subset_size / (double) ef.get_n_data_set());
}
size_t n_parameters = ef.get_dimension( );
if( H.n_rows != n_parameters || H.n_cols != n_parameters ){
H.reshape( n_parameters, n_parameters );
}
if( rhs.n_elem != n_parameters ){
rhs.resize( n_parameters );
}
H.fill(0.0);
rhs.fill(0.0);
for( size_t entry_idx = 0; entry_idx < ef.get_n_data_set( ); ++entry_idx ){
ef.add_to_hessian_and_rhs_single( H, rhs, entry_idx );
}
if (data_subset_size < ef.get_n_data_set()) {
ef.return_full_data_set_for_training();
}
}
namespace lib4neuro {
LevenbergMarquardt::LevenbergMarquardt(size_t max_iters,
size_t bs,
double tolerance,
double tolerance_gradient,
double tolerance_parameters,
double LM_step_acceptance_threshold,
double lambda_initial,
double lambda_increase,
double lambda_decrease) : p_impl(new LevenbergMarquardtImpl()) {
this->p_impl->batch_size = bs;
this->p_impl->tolerance = tolerance;
this->p_impl->tolerance_gradient = tolerance_gradient;
this->p_impl->tolerance_parameters = tolerance_parameters;
this->p_impl->LM_step_acceptance_threshold = LM_step_acceptance_threshold;
this->p_impl->lambda_initial = lambda_initial;
this->p_impl->lambda_increase = lambda_increase;
this->p_impl->lambda_decrease = lambda_decrease;
this->p_impl->maximum_niters = max_iters;
}
void LevenbergMarquardt::optimize(lib4neuro::ErrorFunction& ef,
std::ofstream* ofs) {
optimize(ef,
LM_UPDATE_TYPE::MARQUARDT,
ofs);
}
void LevenbergMarquardt::optimize(lib4neuro::ErrorFunction& ef,
lib4neuro::LM_UPDATE_TYPE update_type,
std::ofstream* ofs) {
double current_err = ef.eval( );
COUT_INFO(
"Finding a solution via the Levenberg-Marquardt method... Starting error: " << current_err);
if (ofs && ofs->is_open() && lib4neuro::mpi_rank == 0) {
*ofs << "Finding a solution via a Levenberg-Marquardt method... Starting error: " << current_err;
}
size_t n_parameters = ef.get_dimension();
size_t n_data_points = ef.get_n_data_set();
if (this->p_impl->batch_size > 0) {
n_data_points = this->p_impl->batch_size;
}
std::vector<double>* params_current = new std::vector<double>(ef.get_parameters());
double lambda = this->p_impl->lambda_initial; // Dumping parameter
std::shared_ptr<std::vector<double>> params_tmp;
params_tmp.reset(new std::vector<double>(n_parameters));
arma::Mat<double> H(n_parameters,
n_parameters); // Hessian matrix
double prev_err = 0, update_norm = 0, gradient_norm = 0, mem_double = 0, jacobian_norm = 1;
bool update_J = true;
arma::Col<double> rhs;
arma::Col<double> d;
double slowdown_coeff = 0.25;
//-------------------//
// Solver iterations //
//-------------------//
size_t iter_counter = 0;
size_t update_counter = 0;
double update_time = 0.0;
double update_sync_rhs = 0.0;
double update_sync_H = 0.0;
double compute_time = 0.0;
double compute_sync = 0.0;
double total_time = -MPI_Wtime( );
size_t ninner_iters = 0;
#ifdef WITH_PETSC
/* Prepare PETSc */
Mat H_PETSc, H_PETSc_new;
Vec RHS_PETSc, lambda_vec, mpi_vec; //TODO maybe use just one vec. instead of lambda_vec and mpi_vec?
PetscInt Istart,Iend;
PetscErrorCode ierr;
KSP ksp; /* KSP solver context */
PC pc; /* Preconditioner */
VecScatter scatter;
PetscInt* inds;
PetscScalar* all_vals;
PetscMalloc1(n_parameters, &inds);
PetscMalloc1(n_parameters, &all_vals);
for(PetscInt i = 0; i < n_parameters; i++) {
inds[i] = i;
}
ierr = VecCreate(PETSC_COMM_WORLD, &RHS_PETSc); CHKERRXX(ierr);
// ierr = VecSetType(RHS_PETSc, VECSTANDARD); CHKERRXX(ierr);
ierr = VecSetSizes(RHS_PETSc, PETSC_DECIDE, n_parameters); CHKERRXX(ierr);
ierr = VecSetFromOptions(RHS_PETSc); CHKERRXX(ierr);
// ierr = VecSetUp(RHS_PETSc); CHKERRXX(ierr);
ierr = VecCreate(PETSC_COMM_WORLD, &lambda_vec); CHKERRXX(ierr);
ierr = VecSetSizes(lambda_vec, PETSC_DECIDE, n_parameters); CHKERRXX(ierr);
ierr = VecSetFromOptions(lambda_vec); CHKERRXX(ierr);
ierr = VecCreate(PETSC_COMM_WORLD, &lambda_vec); CHKERRXX(ierr);
ierr = VecSetSizes(lambda_vec, PETSC_DECIDE, n_parameters); CHKERRXX(ierr);
ierr = VecSetFromOptions(lambda_vec); CHKERRXX(ierr);
ierr = VecCreate(PETSC_COMM_WORLD, &mpi_vec); CHKERRXX(ierr);
ierr = VecSetSizes(mpi_vec, PETSC_DECIDE, n_parameters); CHKERRXX(ierr);
ierr = VecSetFromOptions(mpi_vec); CHKERRXX(ierr);
// ierr = MatCreateDense(PETSC_COMM_WORLD, PETSC_DECIDE, PETSC_DECIDE, n_parameters, n_parameters, nullptr, &H_PETSc); CHKERRXX(ierr);
ierr = MatCreate(PETSC_COMM_WORLD, &H_PETSc); CHKERRXX(ierr);
ierr = MatSetType(H_PETSc, MATMPIDENSE);
ierr = MatSetSizes(H_PETSc, PETSC_DECIDE, PETSC_DECIDE, n_parameters, n_parameters); CHKERRXX(ierr);
ierr = MatSetUp(H_PETSc); CHKERRXX(ierr);
ierr = MatSetOption(H_PETSc, MAT_SYMMETRIC,PETSC_TRUE); CHKERRXX(ierr);
ierr = MatGetOwnershipRange(H_PETSc, &Istart, &Iend); CHKERRXX(ierr);
#else
arma::Mat<double> H_new(n_parameters, n_parameters);
arma::Col<double> update;
#endif
do {
iter_counter++;
// COUT_INFO("Iteration: " << iter_counter << " Current error: " << current_err << ", Current gradient norm: "
// << gradient_norm << ", Direction norm: " << update_norm << "\r");
if (update_J) {
update_counter++;
/* Get Jacobian matrix */
update_time -= MPI_Wtime( );
this->p_impl->get_jacobian_and_rhs(ef,
H,
rhs,
this->p_impl->batch_size);
update_time += MPI_Wtime( );
update_sync_rhs -= MPI_Wtime( );
#ifdef WITH_PETSC
ierr = VecSet(RHS_PETSc, 0); CHKERRXX(ierr);
ierr = VecSetValues(RHS_PETSc, n_parameters, inds, static_cast<PetscScalar*>(rhs.memptr()), ADD_VALUES); CHKERRXX(ierr);
ierr = VecAssemblyBegin(RHS_PETSc); CHKERRXX(ierr);
ierr = VecAssemblyEnd(RHS_PETSc); CHKERRXX(ierr);
// ierr = VecView(RHS_PETSc, PETSC_VIEWER_STDOUT_WORLD); CHKERRXX(ierr);
#else
MPI_Allreduce(
MPI_IN_PLACE,
rhs.begin(),
rhs.n_elem,
MPI_DOUBLE,
MPI_SUM,
lib4neuro::mpi_active_comm
);
#endif
update_sync_rhs += MPI_Wtime( );
update_sync_H -= MPI_Wtime( );
#ifdef WITH_PETSC
ierr = MatZeroEntries(H_PETSc); CHKERRXX(ierr);
ierr = MatSetValues(H_PETSc, n_parameters, inds, n_parameters, inds, static_cast<PetscScalar*>(H.memptr()), ADD_VALUES); CHKERRXX(ierr);
ierr = MatAssemblyBegin(H_PETSc, MAT_FINAL_ASSEMBLY); CHKERRXX(ierr);
ierr = MatAssemblyEnd(H_PETSc, MAT_FINAL_ASSEMBLY); CHKERRXX(ierr);
// ierr = MatView(H_PETSc, PETSC_VIEWER_STDOUT_WORLD); CHKERRXX(ierr);
#else
MPI_Allreduce(
MPI_IN_PLACE,
H.begin(),
H.n_elem,
MPI_DOUBLE,
MPI_SUM,
lib4neuro::mpi_active_comm
);
#endif
update_sync_H += MPI_Wtime( );
// if(lib4neuro::mpi_rank == 0) {
// rhs.print();
// H.print();
// }
gradient_norm = 0;
// TODO parallelize with OpenMP?
for (size_t ci = 0; ci < n_parameters; ++ci) {
mem_double = rhs[ci];
mem_double *= mem_double;
gradient_norm += mem_double;
}
gradient_norm = std::sqrt(gradient_norm) / n_parameters;
/* Evaluate the error before updating parameters */
prev_err = ef.eval();
}
/* H_new = H + lambda*I */
#ifdef WITH_PETSC
ierr = VecSet(lambda_vec, lambda); CHKERRXX(ierr);
// ierr = VecAssemblyBegin(lambda_vec); CHKERRXX(ierr);
// ierr = VecAssemblyEnd(lambda_vec); CHKERRXX(ierr);
ierr = MatDuplicate(H_PETSc, MAT_COPY_VALUES, &H_PETSc_new); CHKERRXX(ierr);
ierr = MatDiagonalSet(H_PETSc_new, lambda_vec, ADD_VALUES); CHKERRXX(ierr);
#else
H_new = H;
for( size_t i = 0; i < n_parameters; ++i ){
H_new.at( i, i ) += lambda;
}
#endif
/* Compute the update vector */
compute_time -= MPI_Wtime( );
#ifdef WITH_PETSC
ierr = KSPCreate(PETSC_COMM_WORLD, &ksp); CHKERRXX(ierr);
ierr = KSPSetOperators(ksp, H_PETSc_new, H_PETSc_new); CHKERRXX(ierr);
// ierr = KSPSetType(ksp, KSPPREONLY); CHKERRXX(ierr);
ierr = KSPSetFromOptions(ksp); CHKERRXX(ierr);
// ierr = KSPGetPC(ksp, &pc); CHKERRXX(ierr);
// ierr = PCSetType(pc, PCLU); CHKERRXX(ierr);
// ierr = PCFactorSetMatSolverType(pc,MATELEMENTAL); CHKERRXX(ierr);
ierr = KSPSolve(ksp,RHS_PETSc,RHS_PETSc); CHKERRXX(ierr);
#else
update = arma::solve(
H_new,
rhs
);
#endif
compute_time += MPI_Wtime( );
#ifdef WITH_PETSC
/* Copy distributed PETSc vector into a local one for every process and read the values */
ierr = VecScatterCreateToAll(RHS_PETSc, &scatter, &mpi_vec); CHKERRXX(ierr);
ierr = VecScatterBegin(scatter,RHS_PETSc, mpi_vec, INSERT_VALUES, SCATTER_FORWARD); CHKERRXX(ierr);
ierr = VecScatterEnd(scatter,RHS_PETSc, mpi_vec, INSERT_VALUES, SCATTER_FORWARD); CHKERRXX(ierr);
ierr = VecGetValues(mpi_vec, n_parameters, inds, all_vals); CHKERRXX(ierr);
#endif
/* Compute the error after update of parameters */
update_norm = 0.0;
for (size_t i = 0; i < n_parameters; i++) {
#ifdef WITH_PETSC
params_tmp->at(i) = params_current->at(i) + all_vals[i];
update_norm += all_vals[i] * all_vals[i];
#else
params_tmp->at(i) = params_current->at(i) + update.at(i);
update_norm += update.at(i) * update.at(i);
#endif
}
update_norm = std::sqrt(update_norm);
current_err = ef.eval(params_tmp.get());
/* Check, if the parameter update improved the function */
if (current_err < prev_err) {
/* If the error is lowered after parameter update, accept the new parameters and lower the damping
* factor lambda */
//TODO rewrite without division!
lambda /= this->p_impl->lambda_decrease;
for (size_t i = 0; i < n_parameters; i++) {
params_current->at(i) = params_tmp->at(i);
}
prev_err = current_err;
update_J = true;
/* If the convergence threshold is achieved, finish the computation */
if (current_err < this->p_impl->tolerance) {
break;
}
} else {
/* If the error after parameters update is not lower, increase the damping factor lambda */
update_J = false;
lambda *= this->p_impl->lambda_increase;
}
// COUT_DEBUG("Iteration: " << iter_counter << " Current error: " << current_err << ", Current gradient norm: "
// << gradient_norm << ", Direction norm: " << update_norm << "\r");
if (ofs && ofs->is_open() && lib4neuro::mpi_rank == 0) {
*ofs << "Iteration: " << iter_counter << " Current error: " << current_err << ", Current gradient norm: "
<< gradient_norm << ", Direction norm: " << update_norm << std::endl;
}
} while (iter_counter < this->p_impl->maximum_niters);
total_time += MPI_Wtime( );
COUT_DEBUG("Iteration: " << iter_counter << " Current error: " << current_err << ", Current gradient norm: "
<< gradient_norm << ", Direction norm: " << update_norm << std::endl);
if (ofs && ofs->is_open() && lib4neuro::mpi_rank == 0) {
*ofs << "Iteration: " << iter_counter << " Current error: " << current_err << ", Current gradient norm: "
<< gradient_norm << ", Direction norm: " << update_norm << std::endl;
}
/* Store the optimized parameters */
this->optimal_parameters = *params_current;
#ifdef WITH_PETSC
/* Dealloc PETSc variables */
ierr = MatDestroy(&H_PETSc); CHKERRXX(ierr);
ierr = VecDestroy(&RHS_PETSc); CHKERRXX(ierr);
ierr = VecDestroy(&mpi_vec); CHKERRXX(ierr);
ierr = VecDestroy(&lambda_vec); CHKERRXX(ierr);
ierr = VecScatterDestroy(&scatter); CHKERRXX(ierr);
ierr = KSPDestroy(&ksp); CHKERRXX(ierr);
ierr = PetscFree(inds); CHKERRXX(ierr);
ierr = PetscFree(all_vals); CHKERRXX(ierr);
// ierr = PetscViewerDestroy(&viewer); CHKERRXX(ierr);
#endif
/* Dealloc vector of parameters */
if (params_current) {
delete params_current;
params_current = nullptr;
}
ef.set_parameters(this->optimal_parameters);
// COUT_INFO("Finished in " << iter_counter << " iterations with error: " << current_err << ", gradient norm: " << gradient_norm << ", update norm: " << update_norm << std::endl);
COUT_INFO( "Finished in " << total_time << " [s], with " << update_counter << " updates of hessian, in " << iter_counter << " outer iterations, in " << ninner_iters << " inner iterations" );
COUT_INFO( " error " << current_err );
COUT_INFO( " update norm " << update_norm );
COUT_INFO( " calculation of Hessian " << 100*(update_time + update_sync_rhs + update_sync_H) / (update_time + update_sync_rhs + update_sync_H + compute_time + compute_sync) << " [%], time per one calculation " << (update_time + update_sync_rhs + update_sync_H) / update_counter << " [s]" );
COUT_INFO( " calculation of direction " << 100*(compute_time + compute_sync) / (update_time + update_sync_rhs + update_sync_H + compute_time + compute_sync) << " [%], time per one iteration " << (compute_time + compute_sync) / iter_counter << " [s]" );
}
LevenbergMarquardt::~LevenbergMarquardt() = default;
}