From 8cc559221f7154d10b961852f3574b22cd355ab5 Mon Sep 17 00:00:00 2001
From: kra568 <kra568@login3.smc.salomon.it4i.cz>
Date: Thu, 10 Oct 2019 19:16:44 +0200
Subject: [PATCH] [FIX] minor fixes

---
 run_test.sh                                |   2 +-
 src/DataSet/DataSet.cpp                    |   7 +-
 src/ErrorFunction/ErrorFunctions.cpp       |   7 +
 src/LearningMethods/LevenbergMarquardt.cpp | 297 +++++++++++++++++++--
 src/Network/NeuralNetwork.cpp              |  16 +-
 src/SymmetryFunction/SymmetryFunction.cpp  |  13 +
 src/message.h                              |   1 +
 7 files changed, 308 insertions(+), 35 deletions(-)

diff --git a/run_test.sh b/run_test.sh
index 35b0886b..f2bbac0f 100755
--- a/run_test.sh
+++ b/run_test.sh
@@ -1,6 +1,6 @@
 #!/bin/sh
 export OMP_NUM_THREADS=1
-./build_scripts/compile_salomon.sh
+#./build_scripts/compile_salomon.sh
 source ./build_scripts/load_salomon_modules.inc
 cd ./build/examples
 mpirun -np 1 ./dev_sandbox
diff --git a/src/DataSet/DataSet.cpp b/src/DataSet/DataSet.cpp
index 6655dd54..910c9663 100644
--- a/src/DataSet/DataSet.cpp
+++ b/src/DataSet/DataSet.cpp
@@ -158,7 +158,12 @@ namespace lib4neuro {
 
         this->n_elements = this->data.size();
         COUT_INFO("Shuffling finished!");
-        std::cout << "MPI[" << lib4neuro::mpi_rank << "] -> " << this->n_elements << " data entries" << std::endl;
+		for( int pi = 0; pi < lib4neuro::mpi_nranks; ++pi ){
+			if( lib4neuro::mpi_rank == pi ){
+				std::cout << "MPI[" << lib4neuro::mpi_rank << "] -> " << this->n_elements << " data entries" << std::endl;
+			}
+			MPI_Barrier( lib4neuro::mpi_active_comm );
+		}
     }
 
     void DataSet::MPI_gather_data_on_master( ){
diff --git a/src/ErrorFunction/ErrorFunctions.cpp b/src/ErrorFunction/ErrorFunctions.cpp
index d514aa6d..960668da 100644
--- a/src/ErrorFunction/ErrorFunctions.cpp
+++ b/src/ErrorFunction/ErrorFunctions.cpp
@@ -379,6 +379,13 @@ namespace lib4neuro {
         }
 
         double result = error / (this->rescale_error?n_elements:1.0);
+		// for( int pi = 0; pi < lib4neuro::mpi_nranks; ++pi ){
+			// if( lib4neuro::mpi_rank == pi ){
+				// std::cout <<std::setprecision(6) << "[" << pi << "]eval: " << result << std::endl;
+			// }
+			// MPI_Barrier( lib4neuro::mpi_active_comm );
+		// }
+		
 
         if (verbose) {
             COUT_DEBUG("MSE = " << result << std::endl);
diff --git a/src/LearningMethods/LevenbergMarquardt.cpp b/src/LearningMethods/LevenbergMarquardt.cpp
index 7d35d8e8..1864bb82 100644
--- a/src/LearningMethods/LevenbergMarquardt.cpp
+++ b/src/LearningMethods/LevenbergMarquardt.cpp
@@ -42,7 +42,213 @@ struct lib4neuro::LevenbergMarquardt::LevenbergMarquardtImpl {
                               arma::Mat<double>& H,
                               arma::Col<double>& rhs,
                               size_t data_subset_size);
+
+	size_t solve_system_CG(
+		const arma::Mat<double>& A,
+		const arma::Col<double>& rhs,
+		arma::Col<double>& sol
+	);
 };
+//*****************************************************************
+// Iterative template routine -- GMRES
+//
+// GMRES solves the unsymmetric linear system Ax = b using the
+// Generalized Minimum Residual method
+//
+// GMRES follows the algorithm described on p. 20 of the
+// SIAM Templates book.
+//
+// The return value indicates convergence within max_iter (input)
+// iterations (0), or no convergence within max_iter iterations (1).
+//
+// Upon successful return, output arguments have the following values:
+//
+//        x  --  approximate solution to Ax = b
+// max_iter  --  the number of iterations performed before the
+//               tolerance was reached
+//      tol  --  the residual after the final iteration
+//
+//*****************************************************************
+
+
+//template < class Matrix, class Vector >
+//void
+//Update(Vector &x, int k, Matrix &h, Vector &s, Vector v[])
+//{
+//  Vector y(s);
+//
+//  // Backsolve:
+//  for (int i = k; i >= 0; i--) {
+//    y(i) /= h(i,i);
+//    for (int j = i - 1; j >= 0; j--)
+//      y(j) -= h(j,i) * y(i);
+//  }
+//
+//  for (int j = 0; j <= k; j++)
+//    x += v[j] * y(j);
+//}
+//
+//
+//template < class Real >
+//Real
+//abs(Real x)
+//{
+//  return (x > 0 ? x : -x);
+//}
+//
+//
+//template < class Operator, class Vector, class Preconditioner,
+//           class Matrix, class Real >
+//int
+//GMRES(const Operator &A, Vector &x, const Vector &b,
+//      const Preconditioner &M, Matrix &H, int &m, int &max_iter,
+//      Real &tol)
+//{
+//  Real resid;
+//  int i, j = 1, k;
+//  Vector s(m+1), cs(m+1), sn(m+1), w;
+//
+//  Real normb = norm(M.solve(b));
+//  Vector r = M.solve(b - A * x);
+//  Real beta = norm(r);
+//
+//  if (normb == 0.0)
+//    normb = 1;
+//
+//  if ((resid = norm(r) / normb) <= tol) {
+//    tol = resid;
+//    max_iter = 0;
+//    return 0;
+//  }
+//
+//  Vector *v = new Vector[m+1];
+//
+//  while (j <= max_iter) {
+//    v[0] = r * (1.0 / beta);    // ??? r / beta
+//    s = 0.0;
+//    s(0) = beta;
+//
+//    for (i = 0; i < m && j <= max_iter; i++, j++) {
+//      w = M.solve(A * v[i]);
+//      for (k = 0; k <= i; k++) {
+//        H(k, i) = dot(w, v[k]);
+//        w -= H(k, i) * v[k];
+//      }
+//      H(i+1, i) = norm(w);
+//      v[i+1] = w * (1.0 / H(i+1, i)); // ??? w / H(i+1, i)
+//
+//      for (k = 0; k < i; k++)
+//        ApplyPlaneRotation(H(k,i), H(k+1,i), cs(k), sn(k));
+//
+//      GeneratePlaneRotation(H(i,i), H(i+1,i), cs(i), sn(i));
+//      ApplyPlaneRotation(H(i,i), H(i+1,i), cs(i), sn(i));
+//      ApplyPlaneRotation(s(i), s(i+1), cs(i), sn(i));
+//
+//      if ((resid = abs(s(i+1)) / normb) < tol) {
+//        Update(x, i, H, s, v);
+//        tol = resid;
+//        max_iter = j;
+//        delete [] v;
+//        return 0;
+//      }
+//    }
+//    Update(x, m - 1, H, s, v);
+//    r = M.solve(b - A * x);
+//    beta = norm(r);
+//    if ((resid = beta / normb) < tol) {
+//      tol = resid;
+//      max_iter = j;
+//      delete [] v;
+//      return 0;
+//    }
+//  }
+//
+//  tol = resid;
+//  delete [] v;
+//  return 1;
+//}
+//
+//
+//#include <math.h>
+//
+//
+//template<class Real>
+//void GeneratePlaneRotation(Real &dx, Real &dy, Real &cs, Real &sn)
+//{
+//  if (dy == 0.0) {
+//    cs = 1.0;
+//    sn = 0.0;
+//  } else if (abs(dy) > abs(dx)) {
+//    Real temp = dx / dy;
+//    sn = 1.0 / sqrt( 1.0 + temp*temp );
+//    cs = temp * sn;
+//  } else {
+//    Real temp = dy / dx;
+//    cs = 1.0 / sqrt( 1.0 + temp*temp );
+//    sn = temp * cs;
+//  }
+//}
+//
+//
+//template<class Real>
+//void ApplyPlaneRotation(Real &dx, Real &dy, Real &cs, Real &sn)
+//{
+//  Real temp  =  cs * dx + sn * dy;
+//  dy = -sn * dx + cs * dy;
+//  dx = temp;
+//}
+
+
+size_t lib4neuro::LevenbergMarquardt::LevenbergMarquardtImpl::solve_system_CG(
+		const arma::Mat<double>& A,
+		const arma::Col<double>& rhs,
+		arma::Col<double>& sol
+	){
+
+		const double zero = 1e-32;
+		const double prec = 1e-6;
+		const auto n = rhs.n_elem;
+		size_t iter = 1;
+		if( sol.n_elem != n ){
+			sol.resize( n );
+		}
+
+
+		arma::Col<double> r( n );
+		r = rhs - A * sol;
+
+		double norm_r = arma::dot(r, r);
+		double norm_r_prev;
+		if ( norm_r < zero ) {
+		  return 0;
+		}
+		arma::Col<double> p( n );
+		p = r;
+
+        arma::Col<double> Ap( n );
+		double alpha, beta;
+		while( true ){
+            ++iter;
+            norm_r_prev = norm_r;
+
+            Ap = A * p;
+            alpha = norm_r_prev / arma::dot(p, Ap);
+            sol += alpha * p;
+
+            r -= alpha * Ap;
+            norm_r = arma::dot(r, r);
+            if ( norm_r < zero || iter >= n) {
+              break;
+            }
+
+            beta = norm_r / norm_r_prev;
+            p = r + beta * p;
+		}
+
+
+
+		return iter;
+  }
 
 void lib4neuro::LevenbergMarquardt::LevenbergMarquardtImpl::get_jacobian_and_rhs(
     lib4neuro::ErrorFunction& ef,
@@ -58,7 +264,7 @@ void lib4neuro::LevenbergMarquardt::LevenbergMarquardtImpl::get_jacobian_and_rhs
     if (data_subset_size < ef.get_n_data_set()) {
         ef.divide_data_train_test((double) data_subset_size / (double) ef.get_n_data_set());
     }
-	
+
     size_t n_parameters = ef.get_dimension( );
 	if( H.n_rows != n_parameters || H.n_cols != n_parameters ){
 		H.reshape( n_parameters, n_parameters );
@@ -68,7 +274,7 @@ void lib4neuro::LevenbergMarquardt::LevenbergMarquardtImpl::get_jacobian_and_rhs
 	}
 	H.fill(0.0);
 	rhs.fill(0.0);
-	
+
 	for( size_t entry_idx = 0; entry_idx  < ef.get_n_data_set( ); ++entry_idx ){
 		ef.add_to_hessian_and_rhs_single( H, rhs, entry_idx );
 	}
@@ -110,13 +316,12 @@ namespace lib4neuro {
     void LevenbergMarquardt::optimize(lib4neuro::ErrorFunction& ef,
                                       lib4neuro::LM_UPDATE_TYPE update_type,
                                       std::ofstream* ofs) {
-        double current_err = ef.eval();
+        double current_err = ef.eval( );
 
         COUT_INFO(
-            "Finding a solution via the Levenberg-Marquardt method... Starting error: " << current_err << std::endl);
+            "Finding a solution via the Levenberg-Marquardt method... Starting error: " << current_err);
         if (ofs && ofs->is_open() && lib4neuro::mpi_rank == 0) {
-            *ofs << "Finding a solution via a Levenberg-Marquardt method... Starting error: " << current_err
-                 << std::endl;
+            *ofs << "Finding a solution via a Levenberg-Marquardt method... Starting error: " << current_err;
         }
 
         size_t n_parameters  = ef.get_dimension();
@@ -147,20 +352,52 @@ namespace lib4neuro {
         // Solver iterations //
         //-------------------//
         size_t iter_counter   = 0;
+		size_t update_counter = 0;
+
+		double update_time = 0.0;
+		double update_sync_rhs = 0.0;
+		double update_sync_H = 0.0;
+		double compute_time = 0.0;
+		double compute_sync = 0.0;
+		double total_time = -MPI_Wtime( );
+		size_t ninner_iters = 0;
+
         do {
+			iter_counter++;
 //			COUT_INFO("Iteration: " << iter_counter << " Current error: " << current_err << ", Current gradient norm: "
 //                                     << gradient_norm << ", Direction norm: " << update_norm << "\r");
 
             if (update_J) {
+				update_counter++;
                 /* Get Jacobian matrix */
+				update_time -= MPI_Wtime( );
                 this->p_impl->get_jacobian_and_rhs(ef,
                                                    H,
                                                    rhs,
                                                    this->p_impl->batch_size);
-
+				update_time += MPI_Wtime( );
+				update_sync_rhs -= MPI_Wtime( );
+				MPI_Allreduce(
+					MPI_IN_PLACE,
+					rhs.begin(),
+					rhs.n_elem,
+					MPI_DOUBLE,
+					MPI_SUM,
+					lib4neuro::mpi_active_comm
+				);
+				update_sync_rhs += MPI_Wtime( );
+				update_sync_H -= MPI_Wtime( );
+				MPI_Allreduce(
+					MPI_IN_PLACE,
+					H.begin(),
+					H.n_elem,
+					MPI_DOUBLE,
+					MPI_SUM,
+					lib4neuro::mpi_active_comm
+				);
+				update_sync_H += MPI_Wtime( );
 
                 gradient_norm = 0;
-
                 // TODO parallelize with OpenMP?
                 for (size_t ci = 0; ci < n_parameters; ++ci) {
                     mem_double = rhs[ci];
@@ -169,16 +406,6 @@ namespace lib4neuro {
                 }
                 gradient_norm  = std::sqrt(gradient_norm) / n_parameters;
 
-                jacobian_norm = 0;
-                for (size_t ri = 0; ri < n_parameters; ++ri) {
-                    for (size_t ci = 0; ci < n_parameters; ++ci) {
-                        jacobian_norm += H.at(ri,
-                                              ci) * H.at(ri,
-                                                         ci);
-                    }
-                }
-                jacobian_norm  = std::sqrt(jacobian_norm);
-
                 /* Evaluate the error before updating parameters */
                 prev_err = ef.eval();
             }
@@ -192,12 +419,22 @@ namespace lib4neuro {
 
 
             /* Compute the update vector */
-            update = arma::solve(H_new,
-                                 rhs);
-            MPI_Allreduce( MPI_IN_PLACE, &update[0], update.size(), MPI_DOUBLE, MPI_SUM, lib4neuro::mpi_active_comm );
-            for( size_t ii = 0; ii < update.size(); ++ii ){
-                update.at( ii ) /= lib4neuro::mpi_nranks;
-            }
+            // update = arma::solve(
+				// H_new,
+				// rhs,
+				// arma::solve_opts::flag_fast + arma::solve_opts::flag_allow_ugly + arma::solve_opts::flag_no_band
+			// );
+			compute_time -= MPI_Wtime( );
+             update = arma::solve(
+				 H_new,
+				 rhs
+			 );
+//			ninner_iters += this->p_impl->solve_system_CG( H, rhs, update );
+			compute_time += MPI_Wtime( );
+
+			// compute_sync -= MPI_Wtime( );
+            // MPI_Allreduce( MPI_IN_PLACE, &update[0], update.size(), MPI_DOUBLE, MPI_SUM, lib4neuro::mpi_active_comm );
+			// compute_sync += MPI_Wtime( );
 
             /* Compute the error after update of parameters */
             update_norm = 0.0;
@@ -242,7 +479,10 @@ namespace lib4neuro {
                 *ofs << "Iteration: " << iter_counter << " Current error: " << current_err << ", Current gradient norm: "
                      << gradient_norm << ", Direction norm: " << update_norm << std::endl;
             }
-        } while (iter_counter++ < this->p_impl->maximum_niters && (update_norm > this->p_impl->tolerance_parameters));
+        } while (iter_counter < this->p_impl->maximum_niters);
+		total_time += MPI_Wtime( );
+
+
         COUT_DEBUG("Iteration: " << iter_counter << " Current error: " << current_err << ", Current gradient norm: "
                                  << gradient_norm << ", Direction norm: " << update_norm << std::endl);
         if (ofs && ofs->is_open() && lib4neuro::mpi_rank == 0) {
@@ -261,7 +501,12 @@ namespace lib4neuro {
 
         ef.set_parameters(this->optimal_parameters);
 
-		COUT_INFO("Finished in " << iter_counter << " iterations with error: " << current_err << " and gradient norm: " << gradient_norm << " and update norm: " << update_norm << std::endl);
+		// COUT_INFO("Finished in " << iter_counter << " iterations with error: " << current_err << ", gradient norm: " << gradient_norm << ", update norm: " << update_norm << std::endl);
+		COUT_INFO( "Finished in " << total_time << " [s], with " << update_counter << " updates of hessian, in " << iter_counter << " outer iterations, in " << ninner_iters << " inner iterations" );
+		COUT_INFO( " error " << current_err );
+		COUT_INFO( " update norm " << update_norm );
+		COUT_INFO( " calculation of Hessian  " << 100*(update_time + update_sync_rhs + update_sync_H) / (update_time + update_sync_rhs + update_sync_H + compute_time + compute_sync) << " [%], time per one calculation " << (update_time + update_sync_rhs + update_sync_H) / update_counter << " [s]" );
+		COUT_INFO( " calculation of direction " << 100*(compute_time + compute_sync) / (update_time + update_sync_rhs + update_sync_H + compute_time + compute_sync) << " [%], time per one iteration " << (compute_time + compute_sync) / iter_counter << " [s]" );
     }
 
     LevenbergMarquardt::~LevenbergMarquardt() = default;
diff --git a/src/Network/NeuralNetwork.cpp b/src/Network/NeuralNetwork.cpp
index 97830af8..044e61dd 100644
--- a/src/Network/NeuralNetwork.cpp
+++ b/src/Network/NeuralNetwork.cpp
@@ -85,7 +85,7 @@ namespace lib4neuro {
         if (this->output_neuron_indices.size() != output.size()) {
             THROW_INVALID_ARGUMENT_ERROR("Data output size != Network output size");
         }
-		
+
 		lib4neuro::network_evaluation_counter++;
 
         double potential, bias;
@@ -274,9 +274,9 @@ namespace lib4neuro {
         if (this->output_neuron_indices.size() != output.size()) {
             THROW_INVALID_ARGUMENT_ERROR("Data output size != Network output size");
         }
-		
+
 		lib4neuro::network_evaluation_counter++;
-        
+
 		double potential, bias;
         int    bias_idx;
 
@@ -360,7 +360,7 @@ namespace lib4neuro {
             neuron_idx = current_layer->at(i);
             scaling_backprog[neuron_idx] = error_derivative[i] * error_scaling;
         }
-		
+
         /* we iterate through all the layers in reverse order and calculate partial derivatives scaled correspondingly */
         for (size_t j = this->neuron_layers_feedforward.size(); j > 0; --j) {
 
@@ -415,7 +415,7 @@ namespace lib4neuro {
                                                      ::std::vector<double>& gradient) {
 
 		lib4neuro::network_backpropagation_counter++;
-		
+
         ::std::vector<double> scaling_backprog(this->get_n_neurons());
         ::std::fill(scaling_backprog.begin(),
                     scaling_backprog.end(),
@@ -501,7 +501,8 @@ namespace lib4neuro {
     void NeuralNetwork::randomize_weights() {
 
         if( lib4neuro::mpi_rank == 0 ){
-            boost::random::mt19937 gen(std::time(0));
+             boost::random::mt19937 gen(std::time(0));
+//            boost::random::mt19937 gen(0);
 
             // Init weight guess ("optimal" for logistic activation functions)
             double r = 4 * sqrt(6. / (this->connection_weights.size()));
@@ -525,7 +526,8 @@ namespace lib4neuro {
     void NeuralNetwork::randomize_biases() {
 
         if( lib4neuro::mpi_rank == 0 ){
-            boost::random::mt19937 gen(std::time(0));
+             boost::random::mt19937 gen(std::time(0));
+//            boost::random::mt19937 gen(0);
 
             // Init weight guess ("optimal" for logistic activation functions)
             boost::random::uniform_real_distribution<> dist(-1,
diff --git a/src/SymmetryFunction/SymmetryFunction.cpp b/src/SymmetryFunction/SymmetryFunction.cpp
index 33e60cf2..d8fbfb47 100644
--- a/src/SymmetryFunction/SymmetryFunction.cpp
+++ b/src/SymmetryFunction/SymmetryFunction.cpp
@@ -6,6 +6,8 @@
 #include <cmath>
 
 #include "../exceptions.h"
+#include "../message.h"
+
 #include "SymmetryFunction.h"
 #include "SymmetryFunctionSerialization.h"
 
@@ -145,17 +147,28 @@ double lib4neuro::G2::eval(unsigned int particle_ind,
     double extension = this->parameters.at(SYMMETRY_FUNCTION_PARAMETER::EXTENSION);
     double shift = this->parameters.at(SYMMETRY_FUNCTION_PARAMETER::SHIFT);
 
+//    std::cout << "particle " << particle_ind << "(";
+//    for( auto el: cartesian_coord.at(particle_ind).second ){
+//        std::cout << el << " ";
+//    }
+//    std::cout << ")";
     for(size_t i = 0; i < cartesian_coord.size(); i++) {
         if(i == particle_ind) {
             continue;
         }
+//        std::cout << " comparing with particle " << i << "(";
+//        for( auto el: cartesian_coord.at(i).second ){
+//            std::cout << el << " ";
+//        }
 
         /* Compute Euclid distance of i-th particle with the others */
         distance = this->euclid_distance(cartesian_coord.at(particle_ind).second, cartesian_coord.at(i).second);
+//        std::cout << ") distance = " << distance << " partial contribution " << std::exp(-extension * (distance - shift) * (distance - shift)) * this->cutoff_func->eval(distance) << std::endl;
 
         /* Call cutoff function */
         sum +=  std::exp(-extension * (distance - shift) * (distance - shift)) * this->cutoff_func->eval(distance);
     }
+//    COUT_INFO( "result " << sum);
 
     return sum;
 }
diff --git a/src/message.h b/src/message.h
index d292ebf5..9535ba4a 100644
--- a/src/message.h
+++ b/src/message.h
@@ -6,6 +6,7 @@
 
 #include <cassert>
 #include <iomanip>
+#include <iostream>
 
 #define COL_WIDTH 20
 #define R_ALIGN std::setw(COL_WIDTH) << std::right
-- 
GitLab