Commit 06efdf51 authored by kra568's avatar kra568
Browse files

[FIX] fixed the manner in which the serialization of dataset normalization is used

parent 456b3f8d
......@@ -87,6 +87,9 @@ double optimize_via_LBMQ(l4n::NeuralNetwork& net,
unsigned int nb,
double tol
) {
if( niters == 0 ){
return ef.eval(nullptr);
}
size_t max_iterations = niters;
size_t batch_size = nb;
......@@ -174,9 +177,8 @@ void dynamic_test(
unsigned int batch_size,
double tol,
std::vector<unsigned int> net_complx,
const std::string& src_file,
const std::string& src_net_file,
const std::string& tgt_net_file,
const std::string& data_file,
const std::string& net_file,
const std::vector<double> &g2_cutoff_coefficients,
const std::vector<double> &g2_extensions,
const std::vector<double> &g2_shifts,
......@@ -186,9 +188,11 @@ void dynamic_test(
const std::vector<double> &g5_angles,
unsigned int cross_validation_k,
unsigned int cross_validation_ntests,
const std::string& cross_validation_file
const std::string& cross_validation_file,
const std::string& norm_file
){
try {
/******************************** CREATING A SYSTEM OF RADIAL TRANS. FUNCTIONS ***************************/
/* Specify cutoff functions */
std::vector<l4n::CutoffFunction2> g2_cutofffunctions;
for (auto el: g2_cutoff_coefficients) {
......@@ -230,9 +234,11 @@ void dynamic_test(
helium_sym_funcs);
std::unordered_map<l4n::ELEMENT_SYMBOL, l4n::Element*> elements;
elements[l4n::ELEMENT_SYMBOL::He] = &helium;
/**************************** FINISHED CREATING A SYSTEM OF RADIAL TRANS. FUNCTIONS ************************/
/**************************** READING COORDINATE DATA FILE ************************/
/* Read data */
l4n::XYZReader reader(src_file,
l4n::XYZReader reader(data_file,
true);
reader.read();
......@@ -243,34 +249,45 @@ void dynamic_test(
std::shared_ptr<l4n::DataSet> ds = reader.get_acsf_data_set(elements);
// ds->print_data();
/* Create a neural network */
std::unordered_map<l4n::ELEMENT_SYMBOL, std::vector<unsigned int>> n_hidden_neurons;
n_hidden_neurons[l4n::ELEMENT_SYMBOL::He] = net_complx;
std::unordered_map<l4n::ELEMENT_SYMBOL, std::vector<l4n::NEURON_TYPE>> type_hidden_neurons;
for (auto el :n_hidden_neurons) {
for (auto i = 0; i < el.second.size() - 1; ++i) {
type_hidden_neurons[el.first].push_back(l4n::NEURON_TYPE::LOGISTIC);
}
type_hidden_neurons[el.first].push_back(l4n::NEURON_TYPE::LINEAR);
std::shared_ptr<l4n::NormalizationStrategyACSF> ns;
try {
ns = std::make_shared<l4n::NormalizationStrategyACSF>(norm_file, *reader.get_element_list());
}
auto ns = std::make_shared<l4n::NormalizationStrategyACSF>(l4n::NormalizationStrategyACSF(elements,
catch (const std::exception& e) {
if (lib4neuro::mpi_rank == 0) {
std::cerr << e.what() << std::endl;
}
ns = std::make_shared<l4n::NormalizationStrategyACSF>(l4n::NormalizationStrategyACSF(elements,
*reader.get_element_list(),
*ds->get_data()));
}
ds->set_normalization_strategy(ns);
ds->normalize( );
/**************************** FINISHED READING COORDINATE DATA FILE ************************/
/**************************** READING NETWORK FILE ************************/
l4n::ACSFNeuralNetwork* net;
try {
net = new l4n::ACSFNeuralNetwork(src_net_file, *reader.get_element_list());
net = new l4n::ACSFNeuralNetwork(net_file, *reader.get_element_list());
// net.load_structure_from_file( src_net_file );
}
catch (const std::exception& e) {
if (lib4neuro::mpi_rank == 0) {
std::cerr << e.what() << std::endl;
}
/* Create a neural network */
std::unordered_map<l4n::ELEMENT_SYMBOL, std::vector<unsigned int>> n_hidden_neurons;
n_hidden_neurons[l4n::ELEMENT_SYMBOL::He] = net_complx;
std::unordered_map<l4n::ELEMENT_SYMBOL, std::vector<l4n::NEURON_TYPE>> type_hidden_neurons;
for (auto el :n_hidden_neurons) {
for (auto i = 0; i < el.second.size() - 1; ++i) {
type_hidden_neurons[el.first].push_back(l4n::NEURON_TYPE::LOGISTIC);
}
type_hidden_neurons[el.first].push_back(l4n::NEURON_TYPE::LINEAR);
}
net = new l4n::ACSFNeuralNetwork(elements,
*reader.get_element_list(),
reader.contains_charge() && !reader.get_ignore_charge(),
......@@ -278,7 +295,7 @@ void dynamic_test(
type_hidden_neurons);
net->randomize_parameters();
}
// net->print_structure();
/**************************** FINISHED READING NETWORK FILE ************************/
l4n::MSE mse(net,
ds.get(),
......@@ -336,7 +353,8 @@ void dynamic_test(
std::cout << "maximal absolute error: " << max_error_abs << std::endl;
std::cout << "maximal relative error: " << 100 * max_error_rel << " %" << std::endl;
std::cout << "*****************************************************" << std::endl;
net->save_text(tgt_net_file);
net->save_text( net_file );
ns->save_text( norm_file );
}
// std::sort(out_dat.begin(), out_dat.end());
......@@ -392,17 +410,16 @@ void dynamic_test(
int main( int argc, char** argv ) {
MPI_INIT
std::cout << "2" << std::endl;
unsigned int niters = 1000;
unsigned int batch_size = 0;
unsigned int cross_validation_k = 1;
unsigned int cross_validation_ntests = 0;
double tol = 1e-1;
std::string src_file = "/mem.txt";
std::string src_net_file = "";
std::string tgt_net_file = "/mem.txt";
std::string data_file = "";
std::string net_file = "";
std::string cross_validation_file = "";
std::string norm_file = "";
unsigned int nlayers = 0;
std::vector<unsigned int> net_complx = {2, 1};
......@@ -424,12 +441,12 @@ int main( int argc, char** argv ) {
id++;
if( argc > id ){
src_file = argv[id];
data_file = argv[id];
}
id++;
if( argc > id ){
tgt_net_file = argv[id];
net_file = argv[id];
}
id++;
......@@ -500,11 +517,6 @@ int main( int argc, char** argv ) {
}
id += ng5;
if( argc > id ){
src_net_file = argv[id];
}
id++;
if( argc > id ){
cross_validation_k = atoi( argv[id] );
}
......@@ -520,6 +532,11 @@ int main( int argc, char** argv ) {
}
id++;
if(argc > id){
norm_file = argv[ id ];
}
id++;
nlayers = net_complx.size();
ng2 = g2_extensions.size();
ng5 = g5_extensions.size();
......@@ -550,9 +567,9 @@ int main( int argc, char** argv ) {
}
}
std::cout << "***********************************************************" << std::endl;
std::cout << "Data file: " << src_file << std::endl;
std::cout << "Network will be loaded from: " << src_net_file << std::endl;
std::cout << "Network will be saved in: " << tgt_net_file << std::endl;
std::cout << "Data file: " << data_file << std::endl;
std::cout << "Data normalization file: " << norm_file << std::endl;
std::cout << "Network will be loaded from & saved to: " << net_file << std::endl;
std::cout << "***********************************************************" << std::endl;
std::cout << "Cross validation file: " << cross_validation_file << std::endl;
......@@ -569,9 +586,8 @@ int main( int argc, char** argv ) {
batch_size,
tol,
net_complx,
src_file,
src_net_file,
tgt_net_file,
data_file,
net_file,
g2_cutoff_coefficients,
g2_extensions,
g2_shifts,
......@@ -581,7 +597,8 @@ int main( int argc, char** argv ) {
g5_angles,
cross_validation_k,
cross_validation_ntests,
cross_validation_file
cross_validation_file,
norm_file
);
} catch (const std::exception& e) {
......
......@@ -8,41 +8,50 @@
#include "NormalizationStrategyACSFSerialization.h"
#include "exceptions.h"
lib4neuro::NormalizationStrategyACSF::NormalizationStrategyACSF(
const std::string &filepath,
const std::vector<ELEMENT_SYMBOL> &element_order
) {
this->init_from_file(filepath);
this->order_of_elements = element_order;
}
lib4neuro::NormalizationStrategyACSF::NormalizationStrategyACSF(
const std::unordered_map<ELEMENT_SYMBOL, Element*>& element_description,
const std::vector<ELEMENT_SYMBOL> &element_order,
const std::vector<std::pair<std::vector<double>, std::vector<double>>> &data) {
const std::vector<std::pair<std::vector<double>, std::vector<double>>> &data
) {
this->order_of_elements = element_order;
this->outputs_min = std::numeric_limits<double>::max();
this->outputs_max = std::numeric_limits<double>::min();
for( auto el: element_description ){
this->number_of_inputs_per_element[ el.first ] = el.second->getSymmetryFunctions()->size();
this->inputs_min[el.first] = std::vector<double>(this->number_of_inputs_per_element[ el.first ]);
this->inputs_max[el.first] = std::vector<double>(this->number_of_inputs_per_element[ el.first ]);
std::fill(this->inputs_min[el.first].begin(), inputs_min[el.first].end(), std::numeric_limits<double>::max());
std::fill(this->inputs_max[el.first].begin(), inputs_max[el.first].end(), std::numeric_limits<double>::min());
}
unsigned int first_input_idx;
for( auto d: data ){
first_input_idx = 0;
for( auto el: this->order_of_elements ){
for( unsigned int j = 0; j < this->number_of_inputs_per_element[ el ]; ++j ){
this->inputs_min[el][ j ] = std::min(this->inputs_min[el][ j ], d.first[ j + first_input_idx ] );
this->inputs_max[el][ j ] = std::max(this->inputs_max[el][ j ], d.first[ j + first_input_idx ] );
}
first_input_idx += this->number_of_inputs_per_element[ el ];
}
this->outputs_min = std::min( this->outputs_min, d.second[ 0 ] );
this->outputs_max = std::max( this->outputs_max, d.second[ 0 ] );
}
/* sycnhronization of normalized data */
MPI_Allreduce( MPI_IN_PLACE, &this->outputs_min, 1, MPI_DOUBLE, MPI_MIN, lib4neuro::mpi_active_comm );
MPI_Allreduce( MPI_IN_PLACE, &this->outputs_max, 1, MPI_DOUBLE, MPI_MAX, lib4neuro::mpi_active_comm );
......@@ -67,7 +76,7 @@ lib4neuro::NormalizationStrategyACSF::NormalizationStrategyACSF(const std::vecto
}
void lib4neuro::NormalizationStrategyACSF::normalize_input(std::vector<double>& inp){
unsigned int first_input_idx = 0;
double len, dist2min;
......@@ -100,17 +109,60 @@ void lib4neuro::NormalizationStrategyACSF::de_normalize_input(std::vector<double
void lib4neuro::NormalizationStrategyACSF::normalize_output(std::vector<double> &out){
double len, dist2min;
len = this->outputs_max - this->outputs_min;
dist2min = out[ 0 ] - this->outputs_min;
out[ 0 ] = 2*(dist2min/len) - 1.0;
}
void lib4neuro::NormalizationStrategyACSF::de_normalize_output(std::vector<double> &out){
double len;
len = this->outputs_max - this->outputs_min;
out[ 0 ] = (out[ 0 ] + 1.0) * len * 0.5 + this->outputs_min;
}
void lib4neuro::NormalizationStrategyACSF::save_text( const std::string &filepath) {
if( lib4neuro::mpi_rank > 0 ){
return;
}
::std::ofstream ofs(filepath);
{
boost::archive::text_oarchive oa(ofs);
oa << *this;
ofs.close();
}
}
void lib4neuro::NormalizationStrategyACSF::init_from_file(const std::string &filepath) {
for( int i = 0; i < lib4neuro::mpi_nranks; ++i ){
if( i == lib4neuro::mpi_rank ){
::std::ifstream ifs(filepath);
if (ifs.is_open()) {
try {
boost::archive::text_iarchive ia(ifs);
ia >> *this;
}
catch (boost::archive::archive_exception& e) {
MPI_INTERRUPT
THROW_RUNTIME_ERROR(
"Serialized archive error: '" + e.what() + "'! Please, check if your file is really "
"the serialized NeuralNetwork.");
}
ifs.close();
} else {
MPI_INTERRUPT
THROW_RUNTIME_ERROR("File '" + filepath + "' couldn't be open!");
}
}
MPI_ERROR_CHECK
if(lib4neuro::mpi_msg){
THROW_RUNTIME_ERROR("File '" + filepath + "' couldn't be open!");
}
MPI_Barrier(lib4neuro::mpi_active_comm);
}
}
......@@ -21,6 +21,7 @@ namespace lib4neuro {
*/
std::unordered_map<ELEMENT_SYMBOL, std::vector<double> > inputs_min;
std::unordered_map<ELEMENT_SYMBOL, std::vector<double> > inputs_max;
double outputs_max, outputs_min;
/**
* information about the various elements
......@@ -30,6 +31,8 @@ namespace lib4neuro {
std::vector<ELEMENT_SYMBOL> order_of_elements;
void init_from_file(const std::string &fn);
public:
/**
......@@ -37,6 +40,11 @@ namespace lib4neuro {
*/
struct access;
NormalizationStrategyACSF(
const std::string &filepath,
const std::vector<ELEMENT_SYMBOL>& element_order
);
NormalizationStrategyACSF(
const std::unordered_map<ELEMENT_SYMBOL, Element*>& element_description,
const std::vector<ELEMENT_SYMBOL>& element_order,
......@@ -62,6 +70,9 @@ namespace lib4neuro {
void de_normalize_output(std::vector<double>& out) override;
LIB4NEURO_API void save_text( const std::string &filepath );
};
}//end of namespace lib4neuro
......
......@@ -14,76 +14,46 @@
#include <boost/serialization/unordered_map.hpp>
#include "NormalizationStrategyACSF.h"
#include "NormalizationStrategySerialization.h"
//#include "NormalizationStrategySerialization.h"
struct lib4neuro::NormalizationStrategyACSF::access {
template<class Archive>
static void save_construct_data(
Archive & ar, const lib4neuro::NormalizationStrategyACSF* ns, const unsigned long int file_version) {
//TODO check if it's possible to call base_object in this function
ar & boost::serialization::base_object<NormalizationStrategy>(ns);
// ar << ns->max_min_inp_val;
ar << ns->inputs_min;
ar << ns->inputs_max;
ar << ns->outputs_min;
ar << ns->outputs_max;
ar << ns->number_of_inputs_per_element;
ar << ns->order_of_elements;
}
template<class Archive>
static void load_construct_data(Archive & ar,
lib4neuro::NormalizationStrategyACSF* ns,
const unsigned long int file_version) {
//TODO check if it's possible to call base_object in this function
ar & boost::serialization::base_object<NormalizationStrategy>(ns);
// std::vector<double> max_min_inp_val;
std::unordered_map<ELEMENT_SYMBOL, std::vector<double>> inputs_min;
std::unordered_map<ELEMENT_SYMBOL, std::vector<double>> inputs_max;
double outputs_min;
double outputs_max;
std::unordered_map<ELEMENT_SYMBOL, unsigned int> number_of_inputs_per_element;
std::vector<ELEMENT_SYMBOL> element_order;
// ar >> max_min_inp_val;
ar >> inputs_min;
ar >> inputs_max;
ar >> outputs_min;
ar >> outputs_max;
ar >> number_of_inputs_per_element;
ar >> element_order;
::new(ns)lib4neuro::NormalizationStrategyACSF(element_order,
inputs_min,
inputs_max,
number_of_inputs_per_element,
outputs_min,
outputs_max);
}
};
namespace boost::serialization {
namespace lib4neuro {
struct NormalizationStrategyACSF::access {
template<class Archive>
inline void load_construct_data(Archive & ar,
lib4neuro::NormalizationStrategyACSF* ns,
const unsigned long int file_version) {
lib4neuro::NormalizationStrategyACSF::access::load_construct_data(ar, ns, file_version);
static void serialize(Archive& ar,
NormalizationStrategyACSF& ns,
const unsigned int version) {
ar & ns.inputs_min;
ar & ns.inputs_max;
ar & ns.outputs_max;
ar & ns.outputs_min;
ar & ns.number_of_inputs_per_element;
}
};
template<class Archive>
inline void save_construct_data(Archive & ar,
const lib4neuro::NormalizationStrategyACSF* ns,
const unsigned long int file_version) {
lib4neuro::NormalizationStrategyACSF::access::save_construct_data(ar, ns, file_version);
}
template void
NormalizationStrategyACSF::access::serialize<boost::archive::text_oarchive>(boost::archive::text_oarchive&,
NormalizationStrategyACSF&,
const unsigned int);
}
template<class Archive>
void serialize(Archive& ar,
lib4neuro::NormalizationStrategyACSF& ns,
const unsigned int version) {}
namespace boost::serialization {
/**
* Serialization function
* @tparam Archive Boost library template
* @param ar Boost parameter - filled automatically during serialization!
* @param ns NormalizationStrategyACSF instance
* @param version Boost parameter - filled automatically during serialization!
*/
template<class Archive>
void serialize(Archive& ar,
lib4neuro::NormalizationStrategyACSF& ns,
const unsigned int version) {
lib4neuro::NormalizationStrategyACSF::access::serialize(ar,
ns,
version);
}
} // namespace boost::serialization
#endif //LIB4NEURO_NORMALIZATIONSTRATEGYACSFSERIALIZATION_H
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment