Newer
Older
Martin Beseda
committed
#include <algorithm>
Martin Beseda
committed
#include "DataSetSerialization.h"
Martin Beseda
committed
namespace lib4neuro {
Martin Beseda
committed
Martin Beseda
committed
DataSet::DataSet() {
this->n_elements = 0;
this->input_dim = 0;
this->output_dim = 0;
}
Martin Beseda
committed
DataSet::DataSet(std::string file_path) {
std::ifstream ifs(file_path);
boost::archive::text_iarchive ia(ifs);
ia >> *this;
ifs.close();
}
Martin Beseda
committed
DataSet::DataSet(std::vector<std::pair<std::vector<double>,
std::vector<double>>> *data_ptr,
NormalizationStrategy* ns) {
Martin Beseda
committed
this->n_elements = data_ptr->size();
this->data = *data_ptr;
this->input_dim = this->data[0].first.size();
this->output_dim = this->data[0].second.size();
Martin Beseda
committed
if(ns) {
this->normalization_strategy = ns;
this->max_inp_val = this->normalization_strategy->get_max_value();
this->min_inp_val = this->normalization_strategy->get_min_value();
}
Martin Beseda
committed
//TODO check the complete data set for input/output dimensions
}
Martin Beseda
committed
DataSet::DataSet(double lower_bound,
double upper_bound,
unsigned int size,
double output,
NormalizationStrategy* ns) {
Martin Beseda
committed
std::vector<std::pair<std::vector<double>, std::vector<double>>> new_data_vec;
this->data = new_data_vec;
this->n_elements = 0;
this->input_dim = 1;
this->output_dim = 1;
if(ns) {
this->normalization_strategy = ns;
this->max_inp_val = this->normalization_strategy->get_max_value();
this->min_inp_val = this->normalization_strategy->get_min_value();
}
Martin Beseda
committed
this->add_isotropic_data(lower_bound, upper_bound, size, output);
}
DataSet::DataSet(std::vector<double> &bounds,
unsigned int no_elems_in_one_dim,
std::vector<double> (*output_func)(std::vector<double> &),
unsigned int output_dim,
NormalizationStrategy* ns) {
Martin Beseda
committed
std::vector<std::pair<std::vector<double>, std::vector<double>>> new_data_vec;
this->data = new_data_vec;
this->input_dim = bounds.size() / 2;
this->output_dim = output_dim;
this->n_elements = 0;
if(ns) {
this->normalization_strategy = ns;
this->max_inp_val = this->normalization_strategy->get_max_value();
this->min_inp_val = this->normalization_strategy->get_min_value();
}
Martin Beseda
committed
this->add_isotropic_data(bounds, no_elems_in_one_dim, output_func);
}
Martin Beseda
committed
void DataSet::add_data_pair(std::vector<double> &inputs, std::vector<double> &outputs) {
if(this->n_elements == 0 && this->input_dim == 0 && this->output_dim == 0) {
this->input_dim = inputs.size();
this->output_dim = outputs.size();
}
Martin Beseda
committed
if (inputs.size() != this->input_dim) {
throw InvalidDimension("Bad input dimension.");
} else if (outputs.size() != this->output_dim) {
throw InvalidDimension("Bad output dimension.");
}
Martin Beseda
committed
this->n_elements++;
this->data.emplace_back(std::make_pair(inputs, outputs));
Martin Beseda
committed
}
Martin Beseda
committed
void DataSet::add_isotropic_data(double lower_bound, double upper_bound, unsigned int size, double output) {
Martin Beseda
committed
if (this->input_dim != 1 || this->output_dim != 1) {
throw InvalidDimension("Cannot add data with dimensionality 1:1 when the data set "
"is of different dimensionality!");
}
Martin Beseda
committed
double frac = (upper_bound - lower_bound) / (size - 1);
std::vector<double> inp, out;
Martin Beseda
committed
out = {output};
Martin Beseda
committed
for (unsigned int i = 0; i < size; ++i) {
inp = {frac * i};
this->data.emplace_back(std::make_pair(inp, out));
}
Martin Beseda
committed
this->n_elements += size;
Martin Beseda
committed
void DataSet::add_isotropic_data(std::vector<double> &bounds, unsigned int no_elems_in_one_dim,
std::vector<double> (*output_func)(std::vector<double> &)) {
// TODO add check of dataset dimensions
Martin Beseda
committed
std::vector<std::vector<double>> grid;
std::vector<double> tmp;
double frac;
Martin Beseda
committed
for (unsigned int i = 0; i < bounds.size(); i += 2) {
frac = (bounds[i] + bounds[i + 1]) / (no_elems_in_one_dim - 1);
tmp.clear();
for (double j = bounds[i]; j <= bounds[i + 1]; j += frac) {
tmp.emplace_back(j);
}
Martin Beseda
committed
grid.emplace_back(tmp);
Martin Beseda
committed
grid = this->cartesian_product(&grid);
for (auto vec : grid) {
this->n_elements++;
this->data.emplace_back(std::make_pair(vec, output_func(vec)));
}
Martin Beseda
committed
std::vector<std::pair<std::vector<double>, std::vector<double>>> *DataSet::get_data() {
return &(this->data);
}
Martin Beseda
committed
size_t DataSet::get_n_elements() {
return this->n_elements;
Martin Beseda
committed
size_t DataSet::get_input_dim() {
return this->input_dim;
}
Martin Beseda
committed
size_t DataSet::get_output_dim() {
return this->output_dim;
}
void DataSet::print_data() {
if (n_elements) {
for (auto p : this->data) {
/* INPUT */
for (auto v : std::get<0>(p)) {
std::cout << v << " ";
}
std::cout << "-> ";
/* OUTPUT */
for (auto v : std::get<1>(p)) {
std::cout << v << " ";
}
Martin Beseda
committed
std::cout << std::endl;
}
Martin Beseda
committed
void DataSet::store_text(std::string &file_path) {
//TODO check if stream was successfully opened
std::ofstream ofs(file_path);
boost::archive::text_oarchive oa(ofs);
oa << *this;
ofs.close();
Martin Beseda
committed
template<class T>
std::vector<std::vector<T>> DataSet::cartesian_product(const std::vector<std::vector<T>> *v) {
std::vector<std::vector<double>> v_combined_old, v_combined, v_tmp;
std::vector<double> tmp;
Martin Beseda
committed
for (const auto &e : v->at(0)) {
tmp = {e};
v_combined.emplace_back(tmp);
}
Martin Beseda
committed
for (unsigned int i = 1; i < v->size(); i++) { // Iterate through remaining vectors of 'v'
v_combined_old = v_combined;
v_combined.clear();
for (const auto &e : v->at(i)) {
for (const auto &vec : v_combined_old) {
tmp = vec;
tmp.emplace_back(e);
/* Add only unique elements */
if (std::find(v_combined.begin(), v_combined.end(), tmp) == v_combined.end()) {
v_combined.emplace_back(tmp);
}
Martin Beseda
committed
return v_combined;
Martin Beseda
committed
void DataSet::normalize() {
// if(this->normalized) {
// throw std::runtime_error("This data set is already normalized!");
// }
Martin Beseda
committed
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
/* Find maximum and minimum values */
this->max_inp_val = this->min_inp_val = this->data[0].first.at(0);
double tmp, tmp2;
for(auto pair : this->data) {
/* Finding maximum */
//TODO make more efficiently
tmp = *std::max_element(pair.first.begin(), pair.first.end());
tmp2 = *std::max_element(pair.second.begin(), pair.second.end());
tmp = std::max(tmp, tmp2);
if (tmp > this->max_inp_val) {
this->max_inp_val = tmp;
}
/* Finding minimum */
tmp = *std::min_element(pair.first.begin(), pair.first.end());
tmp2 = *std::min_element(pair.second.begin(), pair.second.end());
tmp = std::min(tmp, tmp2);
if (tmp < this->min_inp_val) {
this->min_inp_val = tmp;
}
}
/* Normalize every number in the data set */
for(auto& pair : this->data) {
for(auto& v : pair.first) {
v = this->normalization_strategy->normalize(v, this->max_inp_val, this->min_inp_val);
Martin Beseda
committed
}
for(auto& v : pair.second) {
v = this->normalization_strategy->normalize(v, this->max_inp_val, this->min_inp_val);
Martin Beseda
committed
}
}
// this->normalized = true;
Martin Beseda
committed
}
void DataSet::get_input(std::vector<double> &d, size_t idx){
assert(d.size() == this->data[idx].first.size());
for (size_t j = 0; j < this->data[idx].first.size(); ++j) {
d[j] = this->data[idx].first[j];
}
}
void DataSet::get_output(std::vector<double> &d, size_t idx){
assert(d.size() == this->data[idx].second.size());
for (size_t j = 0; j < this->data[idx].second.size(); ++j) {
d[j] = this->data[idx].second[j];
}
}
void DataSet::de_normalize_single(std::vector<double> &d1, std::vector<double> &d2){
assert(d1.size() == d2.size());
for (size_t j = 0; j < d1.size(); ++j) {
d2[j] = this->normalization_strategy->de_normalize(d1[j]);
NormalizationStrategy* DataSet::get_normalization_strategy() {
return this->normalization_strategy;
}
// bool DataSet::is_normalized() {
// return this->normalized;
// }
double DataSet::get_max_inp_val() {
return this->max_inp_val;
}
double DataSet::get_min_inp_val() {
return this->min_inp_val;
}