Skip to content
Snippets Groups Projects
DataSet.cpp 16.4 KiB
Newer Older
Martin Beseda's avatar
Martin Beseda committed

#include <boost/serialization/export.hpp>
BOOST_CLASS_EXPORT_IMPLEMENT(lib4neuro::DataSet);
        this->n_elements             = 0;
        this->input_dim              = 0;
        this->output_dim             = 0;
Martin Beseda's avatar
Martin Beseda committed
        this->normalization_strategy = std::make_shared<DoubleUnitStrategy>(DoubleUnitStrategy());
    DataSet::DataSet(std::string file_path) {
        std::ifstream ifs(file_path);
Martin Beseda's avatar
Martin Beseda committed
        if (ifs.is_open()) {
            try {
                boost::archive::text_iarchive ia(ifs);
                ia >> *this;
            }
            catch (boost::archive::archive_exception& e) {
Martin Beseda's avatar
Martin Beseda committed
                THROW_RUNTIME_ERROR(
                    "Serialized archive error: '" + e.what() + "'! Please, check if your file is really "
                                                               "the serialized DataSet.");
            THROW_RUNTIME_ERROR("File " + file_path + " couldn't be open!");
Martin Beseda's avatar
Martin Beseda committed
        this->normalization_strategy = std::make_shared<DoubleUnitStrategy>(DoubleUnitStrategy());

Martin Beseda's avatar
Martin Beseda committed
    DataSet::DataSet(std::vector<std::pair<std::vector<double>, std::vector<double>>>* data_ptr,
        this->data       = *data_ptr;
        this->input_dim  = this->data[0].first.size();
        this->output_dim = this->data[0].second.size();
Martin Beseda's avatar
Martin Beseda committed
        if (ns) {
Martin Beseda's avatar
Martin Beseda committed
            std::shared_ptr<NormalizationStrategy> ns_tmp;
            ns_tmp.reset(ns);
            this->normalization_strategy = ns_tmp;
        //TODO check the complete data set for input/output dimensions
    }
    DataSet::DataSet(double lower_bound,
                     double upper_bound,
                     unsigned int size,
                     double output,
                     NormalizationStrategy* ns) {
        std::vector<std::pair<std::vector<double>, std::vector<double>>> new_data_vec;
        this->data       = new_data_vec;
        this->input_dim  = 1;
Martin Beseda's avatar
Martin Beseda committed
        if (ns) {
            std::shared_ptr<NormalizationStrategy> ns_tmp(ns);
Martin Beseda's avatar
Martin Beseda committed
            this->normalization_strategy = ns_tmp;
Martin Beseda's avatar
Martin Beseda committed
        this->add_isotropic_data(lower_bound,
                                 upper_bound,
                                 size,
                                 output);
Martin Beseda's avatar
Martin Beseda committed
    DataSet::DataSet(std::vector<double>& bounds,
                     unsigned int no_elems_in_one_dim,
Martin Beseda's avatar
Martin Beseda committed
                     std::vector<double> (* output_func)(std::vector<double>&),
                     unsigned int output_dim,
                     NormalizationStrategy* ns) {
        std::vector<std::pair<std::vector<double>, std::vector<double>>> new_data_vec;
        this->data       = new_data_vec;
        this->input_dim  = bounds.size() / 2;
        this->output_dim = output_dim;
        this->n_elements = 0;
Martin Beseda's avatar
Martin Beseda committed
        if (ns) {
Martin Beseda's avatar
Martin Beseda committed
            std::shared_ptr<NormalizationStrategy> ns_tmp;
            ns_tmp.reset(ns);
            this->normalization_strategy = ns_tmp;
Martin Beseda's avatar
Martin Beseda committed
        this->add_isotropic_data(bounds,
                                 no_elems_in_one_dim,
                                 output_func);
    DataSet::~DataSet() {
    void DataSet::shift_outputs_to_zero() {

        auto first_elem = this->data.at(0).second;

        for(size_t j = 0; j < this->data.size(); ++j){
            for(size_t i = 0; i < this->get_output_dim(); ++i){
                this->data.at(j).second[i] -= first_elem[i];
            }
        }
    }

Martin Beseda's avatar
Martin Beseda committed
    void DataSet::add_data_pair(std::vector<double>& inputs,
                                std::vector<double>& outputs) {
        if (this->n_elements == 0 && this->input_dim == 0 && this->output_dim == 0) {
            this->input_dim  = inputs.size();
            this->output_dim = outputs.size();
        }

            THROW_RUNTIME_ERROR("Bad input dimension.");
        } else if (outputs.size() != this->output_dim) {
            THROW_RUNTIME_ERROR("Bad output dimension.");
Martin Beseda's avatar
Martin Beseda committed
        this->data.emplace_back(std::make_pair(inputs,
                                               outputs));
Martin Beseda's avatar
Martin Beseda committed
    void DataSet::add_isotropic_data(double lower_bound,
                                     double upper_bound,
                                     unsigned int size,
                                     double output) {
        if (this->input_dim != 1 || this->output_dim != 1) {
            THROW_RUNTIME_ERROR("Cannot add data with dimensionality 1:1 when the data set "
                                "is of different dimensionality!");
        double frac;
Martin Beseda's avatar
Martin Beseda committed
        if (size < 1) {
            THROW_INVALID_ARGUMENT_ERROR("Size of added data has to be >=1 !");
        } else if (size == 1) {
            frac = 1;
        } else {
            frac = (upper_bound - lower_bound) / (size - 1);
        }

        for (unsigned int i = 0; i < size; ++i) {
            inp = {frac * i};
Martin Beseda's avatar
Martin Beseda committed
            this->data.emplace_back(std::make_pair(inp,
                                                   out));
Martin Beseda's avatar
Martin Beseda committed
    void DataSet::add_isotropic_data(std::vector<double>& bounds,
                                     unsigned int no_elems_in_one_dim,
                                     std::vector<double> (* output_func)(std::vector<double>&)) {
        std::vector<double>              tmp;
        double                           frac;
Martin Beseda's avatar
Martin Beseda committed
        if (no_elems_in_one_dim < 1) {
            THROW_INVALID_ARGUMENT_ERROR("Number of elements in one dimension has to be >=1 !");
        }
        for (unsigned int i = 0; i < bounds.size(); i += 2) {
Martin Beseda's avatar
Martin Beseda committed
            if (no_elems_in_one_dim == 1) {
                frac = 1;
            } else {
Martin Beseda's avatar
Martin Beseda committed
                frac = (bounds[i] - bounds[i + 1]) / (no_elems_in_one_dim - 1);
            tmp.clear();
            for (double j = bounds[i]; j <= bounds[i + 1]; j += frac) {
                tmp.emplace_back(j);
            }
        grid = this->cartesian_product(&grid);

        for (auto vec : grid) {
            this->n_elements++;
Martin Beseda's avatar
Martin Beseda committed
            this->data.emplace_back(std::make_pair(vec,
                                                   output_func(vec)));
Martin Beseda's avatar
Martin Beseda committed
    std::vector<std::pair<std::vector<double>, std::vector<double>>>* DataSet::get_data() {
    size_t DataSet::get_n_elements() {
        return this->n_elements;
    size_t DataSet::get_input_dim() {
        return this->input_dim;
    }
    size_t DataSet::get_output_dim() {
        return this->output_dim;
    }

    void DataSet::print_data() {
        if (n_elements) {
            for (auto p : this->data) {
                /* INPUT */
                for (auto v : std::get<0>(p)) {
                    std::cout << v << " ";
                }

                std::cout << "-> ";

                /* OUTPUT */
                for (auto v : std::get<1>(p)) {
                    std::cout << v << " ";
                }
    void DataSet::store_text(std::string file_path) {
Martin Beseda's avatar
Martin Beseda committed
        if (!ofs.is_open()) {
            THROW_RUNTIME_ERROR("File " + file_path + " couldn't be open!");
        } else {
            boost::archive::text_oarchive oa(ofs);
            oa << *this;
            ofs.close();
        }
    }

    void DataSet::store_data_text(std::ofstream* file_path) {
        for (auto e : this->data) {
            /* First part of the pair */
            for (unsigned int i = 0; i < e.first.size() - 1; i++) {
                *file_path << this->get_denormalized_value(e.first.at(i)) << ",";
            *file_path << this->get_denormalized_value(e.first.back()) << " ";

            /* Second part of the pair */
            for (unsigned int i = 0; i < e.second.size() - 1; i++) {
                *file_path << this->get_denormalized_value(e.second.at(i)) << ",";
            *file_path << this->get_denormalized_value(e.second.back()) << std::endl;
    void DataSet::store_data_text(std::string file_path) {
        std::ofstream ofs(file_path);

Martin Beseda's avatar
Martin Beseda committed
        if (!ofs.is_open()) {
            THROW_RUNTIME_ERROR("File " + file_path + " couldn't be open!");
        } else {
            this->store_data_text(&ofs);
Martin Beseda's avatar
Martin Beseda committed
    std::vector<std::vector<T>> DataSet::cartesian_product(const std::vector<std::vector<T>>* v) {
        std::vector<std::vector<double>> v_combined_old, v_combined, v_tmp;
        std::vector<double>              tmp;
Martin Beseda's avatar
Martin Beseda committed
        for (const auto& e : v->at(0)) {
        for (unsigned int i = 1; i < v->size(); i++) {  // Iterate through remaining vectors of 'v'
            v_combined_old = v_combined;
            v_combined.clear();

Martin Beseda's avatar
Martin Beseda committed
            for (const auto& e : v->at(i)) {
                for (const auto& vec : v_combined_old) {
                    tmp = vec;
                    tmp.emplace_back(e);

                    /* Add only unique elements */
Martin Beseda's avatar
Martin Beseda committed
                    if (std::find(v_combined.begin(),
                                  v_combined.end(),
                                  tmp) == v_combined.end()) {
Martin Beseda's avatar
Martin Beseda committed
        if (!this->normalization_strategy) {
            THROW_INVALID_ARGUMENT_ERROR("There is no normalization strategy given for this data set, so it can not be "
                                         "normalized!");
Martin Beseda's avatar
Martin Beseda committed
        if (this->max_min_inp_val.empty()) {
            this->max_min_inp_val.emplace_back(this->data.at(0).first.at(0));
            this->max_min_inp_val.emplace_back(this->data.at(0).first.at(0));
        }

        double    tmp, tmp2;
Martin Beseda's avatar
Martin Beseda committed
        for (auto pair : this->data) {
            /* Finding maximum */
            //TODO make more efficiently
            tmp  = *std::max_element(pair.first.begin(),
                                     pair.first.end());
Martin Beseda's avatar
Martin Beseda committed
            tmp2 = *std::max_element(pair.second.begin(),
                                     pair.second.end());
Martin Beseda's avatar
Martin Beseda committed
            tmp = std::max(tmp,
                           tmp2);
            /* Testing for a new maxima */
            if (tmp > this->max_min_inp_val.at(0)) {
                this->max_min_inp_val.at(0) = tmp;
            tmp  = *std::min_element(pair.first.begin(),
                                     pair.first.end());
Martin Beseda's avatar
Martin Beseda committed
            tmp2 = *std::min_element(pair.second.begin(),
                                     pair.second.end());
Martin Beseda's avatar
Martin Beseda committed
            tmp = std::min(tmp,
                           tmp2);
            /* Testing for a new minima */
            if (tmp < this->max_min_inp_val.at(1)) {
                this->max_min_inp_val.at(1) = tmp;
Martin Beseda's avatar
Martin Beseda committed
        for (auto& pair : this->data) {
            for (auto& v : pair.first) {
                v = this->normalization_strategy->normalize(v,
                                                            this->max_min_inp_val.at(0),
                                                            this->max_min_inp_val.at(1));
Martin Beseda's avatar
Martin Beseda committed
            for (auto& v : pair.second) {
                v = this->normalization_strategy->normalize(v,
                                                            this->max_min_inp_val.at(0),
                                                            this->max_min_inp_val.at(1));
Martin Beseda's avatar
Martin Beseda committed
    double DataSet::get_normalized_value(double val) {
        if (!this->normalized || !this->normalization_strategy) {
Martin Beseda's avatar
Martin Beseda committed
        return this->normalization_strategy->normalize(val,
                                                       this->max_min_inp_val.at(0),
                                                       this->max_min_inp_val.at(1));
    double DataSet::get_denormalized_value(double val) {
        if (!this->normalized || !this->normalization_strategy) {
            return val;
        }
        return this->normalization_strategy->de_normalize(val);
Martin Beseda's avatar
Martin Beseda committed
    void DataSet::get_input(std::vector<double>& d,
                            size_t idx) {
        assert(d.size() == this->data[idx].first.size());
        for (size_t j = 0; j < this->data[idx].first.size(); ++j) {
            d[j] = this->data[idx].first[j];
        }
    }

Martin Beseda's avatar
Martin Beseda committed
    void DataSet::get_output(std::vector<double>& d,
                             size_t idx) {
        assert(d.size() == this->data[idx].second.size());
        for (size_t j = 0; j < this->data[idx].second.size(); ++j) {
            d[j] = this->data[idx].second[j];
        }
    }

    void DataSet::de_normalize() {
        std::vector<double> tmp_inp(this->data.at(0).first.size());
        std::vector<double> tmp_out(this->data.at(0).second.size());

Martin Beseda's avatar
Martin Beseda committed
        for (auto& pair: this->data) {
            for (size_t i = 0; i < pair.first.size(); i++) {
                tmp_inp.at(i) = this->normalization_strategy->de_normalize(pair.first.at(i));
            }
            pair.first = tmp_inp;
        }

Martin Beseda's avatar
Martin Beseda committed
        for (auto& pair: this->data) {
            for (size_t i = 0; i < pair.second.size(); i++) {
                tmp_out.at(i) = this->normalization_strategy->de_normalize(pair.second.at(i));
            }
            pair.second = tmp_out;
        }

        /* Remove found max and minimal values, because of is_normalized() method */
        this->max_min_inp_val.clear();
Martin Beseda's avatar
Martin Beseda committed
    void DataSet::de_normalize_single(std::vector<double>& d1,
                                      std::vector<double>& d2) {
        assert(d1.size() == d2.size());
        for (size_t j = 0; j < d1.size(); ++j) {
            d2[j] = this->normalization_strategy->de_normalize(d1[j]);
    NormalizationStrategy* DataSet::get_normalization_strategy() {
Martin Beseda's avatar
Martin Beseda committed
        return this->normalization_strategy.get();
    void DataSet::set_normalization_strategy(NormalizationStrategy* ns) {
Martin Beseda's avatar
Martin Beseda committed
        if (ns) {
            this->normalization_strategy.reset(ns);
        }
    }

    bool DataSet::is_normalized() {
        return !this->max_min_inp_val.empty();
    }

    double DataSet::get_max_inp_val() {

    /**
     * Method returning random amount of data pairs between 1-max
     */
    std::vector<std::pair<std::vector<double>, std::vector<double>>> DataSet::get_random_data_batch(size_t max) {
Martin Beseda's avatar
Martin Beseda committed
        if (max <= 0) {
        } else {
            std::vector<std::pair<std::vector<double>, std::vector<double>>> newData;
Martin Beseda's avatar
Martin Beseda committed
            srand(time(NULL));  //TODO use Mersen twister from Boost
Martin Beseda's avatar
Martin Beseda committed
            size_t n_chosen = rand() % std::min(max,
                                                this->data.size()) + 1;
Martin Beseda's avatar
Martin Beseda committed
            n_chosen = max;
            std::vector<size_t> chosens;
            size_t              chosen;
            for (size_t i = 0; i < n_chosen; i++) {
Martin Beseda's avatar
Martin Beseda committed
                chosen = rand() % this->data.size();
Martin Beseda's avatar
Martin Beseda committed
                auto it = std::find(chosens.begin(),
                                    chosens.end(),
                                    chosen);
                    i--;
                } else {
                    newData.push_back(this->data.at(chosen));
Martin Beseda's avatar
Martin Beseda committed
                    chosens.push_back(chosen);
	
	void DataSet::add_zero_output_columns(size_t n_columns)
	{
		for (size_t i = 0; i < this->n_elements; i++)
		{
			for (size_t j = 0; j < n_columns; j++)
			{
				this->data.at(i).second.push_back(0);
			}
		}
		this->output_dim += n_columns;
	}