Skip to content
Snippets Groups Projects
DataSet.cpp 9.36 KiB
Newer Older
Martin Beseda's avatar
Martin Beseda committed
//
// Created by martin on 7/13/18.
//

    DataSet::DataSet() {
        this->n_elements = 0;
        this->input_dim = 0;
        this->output_dim = 0;
    }
    DataSet::DataSet(std::string file_path) {
        std::ifstream ifs(file_path);
        boost::archive::text_iarchive ia(ifs);
        ia >> *this;
        ifs.close();
    }
    DataSet::DataSet(std::vector<std::pair<std::vector<double>,
                     std::vector<double>>> *data_ptr,
                     NormalizationStrategy* ns) {
        this->n_elements = data_ptr->size();
        this->data = *data_ptr;
        this->input_dim = this->data[0].first.size();
        this->output_dim = this->data[0].second.size();
        if(ns) {
            this->normalization_strategy = ns;
            this->max_inp_val = this->normalization_strategy->get_max_value();
            this->min_inp_val = this->normalization_strategy->get_min_value();
        }

        //TODO check the complete data set for input/output dimensions
    }
    DataSet::DataSet(double lower_bound,
                     double upper_bound,
                     unsigned int size,
                     double output,
                     NormalizationStrategy* ns) {
        std::vector<std::pair<std::vector<double>, std::vector<double>>> new_data_vec;
        this->data = new_data_vec;
        this->n_elements = 0;
        this->input_dim = 1;
        this->output_dim = 1;
        if(ns) {
            this->normalization_strategy = ns;
            this->max_inp_val = this->normalization_strategy->get_max_value();
            this->min_inp_val = this->normalization_strategy->get_min_value();
        }

        this->add_isotropic_data(lower_bound, upper_bound, size, output);
    }
    DataSet::DataSet(std::vector<double> &bounds,
                     unsigned int no_elems_in_one_dim,
                     std::vector<double> (*output_func)(std::vector<double> &),
                     unsigned int output_dim,
                     NormalizationStrategy* ns) {
        std::vector<std::pair<std::vector<double>, std::vector<double>>> new_data_vec;
        this->data = new_data_vec;
        this->input_dim = bounds.size() / 2;
        this->output_dim = output_dim;
        this->n_elements = 0;
        if(ns) {
            this->normalization_strategy = ns;
            this->max_inp_val = this->normalization_strategy->get_max_value();
            this->min_inp_val = this->normalization_strategy->get_min_value();
        }

        this->add_isotropic_data(bounds, no_elems_in_one_dim, output_func);
    }
    void DataSet::add_data_pair(std::vector<double> &inputs, std::vector<double> &outputs) {
        if(this->n_elements == 0 && this->input_dim == 0 && this->output_dim == 0) {
            this->input_dim = inputs.size();
            this->output_dim = outputs.size();
        }

        if (inputs.size() != this->input_dim) {
            throw InvalidDimension("Bad input dimension.");
        } else if (outputs.size() != this->output_dim) {
            throw InvalidDimension("Bad output dimension.");
        }
        this->n_elements++;
        this->data.emplace_back(std::make_pair(inputs, outputs));
    void DataSet::add_isotropic_data(double lower_bound, double upper_bound, unsigned int size, double output) {
        if (this->input_dim != 1 || this->output_dim != 1) {
            throw InvalidDimension("Cannot add data with dimensionality 1:1 when the data set "
                                   "is of different dimensionality!");
        }
        double frac = (upper_bound - lower_bound) / (size - 1);
        std::vector<double> inp, out;
        for (unsigned int i = 0; i < size; ++i) {
            inp = {frac * i};
            this->data.emplace_back(std::make_pair(inp, out));
        }
    void DataSet::add_isotropic_data(std::vector<double> &bounds, unsigned int no_elems_in_one_dim,
                                     std::vector<double> (*output_func)(std::vector<double> &)) {
        // TODO add check of dataset dimensions
        std::vector<std::vector<double>> grid;
        std::vector<double> tmp;
        double frac;
        for (unsigned int i = 0; i < bounds.size(); i += 2) {
            frac = (bounds[i] + bounds[i + 1]) / (no_elems_in_one_dim - 1);
            tmp.clear();
            for (double j = bounds[i]; j <= bounds[i + 1]; j += frac) {
                tmp.emplace_back(j);
            }
        grid = this->cartesian_product(&grid);

        for (auto vec : grid) {
            this->n_elements++;
            this->data.emplace_back(std::make_pair(vec, output_func(vec)));
        }
    std::vector<std::pair<std::vector<double>, std::vector<double>>> *DataSet::get_data() {
        return &(this->data);
    }
    size_t DataSet::get_n_elements() {
        return this->n_elements;
    size_t DataSet::get_input_dim() {
        return this->input_dim;
    }
    size_t DataSet::get_output_dim() {
        return this->output_dim;
    }

    void DataSet::print_data() {
        if (n_elements) {
            for (auto p : this->data) {
                /* INPUT */
                for (auto v : std::get<0>(p)) {
                    std::cout << v << " ";
                }

                std::cout << "-> ";

                /* OUTPUT */
                for (auto v : std::get<1>(p)) {
                    std::cout << v << " ";
                }

    void DataSet::store_text(std::string &file_path) {
        //TODO check if stream was successfully opened
        std::ofstream ofs(file_path);
        boost::archive::text_oarchive oa(ofs);
        oa << *this;
        ofs.close();
    template<class T>
    std::vector<std::vector<T>> DataSet::cartesian_product(const std::vector<std::vector<T>> *v) {
        std::vector<std::vector<double>> v_combined_old, v_combined, v_tmp;
        std::vector<double> tmp;
        for (const auto &e : v->at(0)) {
            tmp = {e};
            v_combined.emplace_back(tmp);
        }
        for (unsigned int i = 1; i < v->size(); i++) {  // Iterate through remaining vectors of 'v'
            v_combined_old = v_combined;
            v_combined.clear();

            for (const auto &e : v->at(i)) {
                for (const auto &vec : v_combined_old) {
                    tmp = vec;
                    tmp.emplace_back(e);

                    /* Add only unique elements */
                    if (std::find(v_combined.begin(), v_combined.end(), tmp) == v_combined.end()) {
                        v_combined.emplace_back(tmp);
                    }
//        if(this->normalized) {
//            throw std::runtime_error("This data set is already normalized!");
//        }

        /* Find maximum and minimum values */
        this->max_inp_val =  this->min_inp_val = this->data[0].first.at(0);
        double tmp, tmp2;
        for(auto pair : this->data) {
            /* Finding maximum */
            //TODO make more efficiently
            tmp = *std::max_element(pair.first.begin(), pair.first.end());
            tmp2 = *std::max_element(pair.second.begin(), pair.second.end());

            tmp = std::max(tmp, tmp2);

            if (tmp > this->max_inp_val) {
                this->max_inp_val = tmp;
            }

            /* Finding minimum */
            tmp = *std::min_element(pair.first.begin(), pair.first.end());
            tmp2 = *std::min_element(pair.second.begin(), pair.second.end());

            tmp = std::min(tmp, tmp2);

            if (tmp < this->min_inp_val) {
                this->min_inp_val = tmp;
            }
        }

        /* Normalize every number in the data set */
        for(auto& pair : this->data) {
            for(auto& v : pair.first) {
                v = this->normalization_strategy->normalize(v, this->max_inp_val, this->min_inp_val);
                v = this->normalization_strategy->normalize(v, this->max_inp_val, this->min_inp_val);
    void DataSet::get_input(std::vector<double> &d, size_t idx){
        assert(d.size() == this->data[idx].first.size());
        for (size_t j = 0; j < this->data[idx].first.size(); ++j) {
            d[j] = this->data[idx].first[j];
        }
    }

    void DataSet::get_output(std::vector<double> &d, size_t idx){
        assert(d.size() == this->data[idx].second.size());
        for (size_t j = 0; j < this->data[idx].second.size(); ++j) {
            d[j] = this->data[idx].second[j];
        }
    }

    void DataSet::de_normalize_single(std::vector<double> &d1, std::vector<double> &d2){
        assert(d1.size() == d2.size());
        for (size_t j = 0; j < d1.size(); ++j) {
            d2[j] = this->normalization_strategy->de_normalize(d1[j]);
    NormalizationStrategy* DataSet::get_normalization_strategy() {
        return this->normalization_strategy;
    }

//    bool DataSet::is_normalized() {
//        return this->normalized;
//    }

    double DataSet::get_max_inp_val() {
        return this->max_inp_val;
    }

    double DataSet::get_min_inp_val() {
        return this->min_inp_val;
    }