Skip to content
Snippets Groups Projects
DataSet.h 8.66 KiB
Newer Older
Martin Beseda's avatar
Martin Beseda committed
//
// Created by martin on 7/13/18.
//

#ifndef INC_4NEURO_DATASET_H
#define INC_4NEURO_DATASET_H

#include <iostream>
#include <fstream>
#include <utility>
#include <vector>
#include <functional>
Martin Beseda's avatar
Martin Beseda committed
#include <memory>
#include "../NormalizationStrategy/NormalizationStrategy.h"
namespace lib4neuro {
    /**
     * Class representing data, which can be used for training
     * and testing purposes.
     */
    class DataSet {

    private:

        /**
         * Number of elements in the data set
         */
        size_t n_elements = 0;

        /**
         * Dimension of the input
         */
        size_t input_dim = 0;

        /**
         * Dimension of the output
         */
        size_t output_dim = 0;

//        /**
//         * Maximum input value
//         */
//        double max_inp_val = //std::numeric_limits<double>::quiet_NaN();
//
//        /**
//         * Minimum input value
//         */
//        double min_inp_val = std::numeric_limits<double>::quiet_NaN();
         * Maximum (index 0) and minimum (index 1) input value
        std::vector<double> max_min_inp_val;  //TODO make more efficiently, than by vector!
        /**
         * Stored data in the format of pairs of corresponding
         * input and output vectors
         */
        std::vector<std::pair<std::vector<double>, std::vector<double>>> data;

        template<class T>
        std::vector<std::vector<T>> cartesian_product(const std::vector<std::vector<T>> *v);

        //TODO let user choose in the constructor!
Martin Beseda's avatar
Martin Beseda committed
        std::shared_ptr<NormalizationStrategy> normalization_strategy;
//        /**
//         *
//         */
//        bool normalized = false;
    public:

        /**
         * Struct used to access private properties from
         * the serialization function
         */
        struct access;

        /**
         * Constructor for an empty DataSet
         */
        LIB4NEURO_API DataSet();
        /**
         * Constructor reading data from the file
         * @param file_path Path to the file with stored data set
         */
        LIB4NEURO_API DataSet(std::string file_path);

        /**
         * Constructor accepting data vector
         * @param data_ptr Pointer to the vector containing data
         */
        LIB4NEURO_API DataSet(std::vector<std::pair<std::vector<double>, std::vector<double>>> *data_ptr,
                              NormalizationStrategy* ns = nullptr);

        /**
         * Creates a new data set with input values equidistantly positioned
         * over the certain interval and the output value
         * being constant
         *
         * Both input and output are 1-dimensional
         *
         * @todo add bounds as vectors for multi-dimensional data-sets
         *
         * @param lower_bound Lower bound of the input data interval
         * @param upper_bound Upper bound of the input data interval
         * @param size Number of input-output pairs generated
         * @param output Constant output value
         */
        LIB4NEURO_API DataSet(double lower_bound,
                              double upper_bound,
                              unsigned int size,
                              double output,
                              NormalizationStrategy* ns = nullptr);

        /**
         *
         * @param bounds
         * @param no_elems_in_one_dim
         * @param output_func
         * @param output_dim
         */
        LIB4NEURO_API DataSet(std::vector<double> &bounds,
                              unsigned int no_elems_in_one_dim,
                              std::vector<double> (*output_func)(std::vector<double> &),
                              unsigned int output_dim,
                              NormalizationStrategy* ns = nullptr);

        /**
         * Getter for number of elements
         * @return Number of elements in the data set
         */
        LIB4NEURO_API size_t get_n_elements();

        /**
         * Returns the input dimension
         * @return Input dimension
         */
        LIB4NEURO_API size_t get_input_dim();

        /**
         * Return the output dimension
         * @return Output dimension
         */
        LIB4NEURO_API size_t get_output_dim();

        /**
         * Getter for the data structure
         * @return Vector of data
         */
        LIB4NEURO_API std::vector<std::pair<std::vector<double>, std::vector<double>>> *get_data();

        /**
         * Adds a new pair of data to the data set
         * @param inputs Vector of input data
         * @param outputs Vector of output data corresponding to the input data
         */
        LIB4NEURO_API void add_data_pair(std::vector<double> &inputs, std::vector<double> &outputs);

        //TODO expand method to generate multiple data types - chebyshev etc.
        /**
         * Adds a new data with input values equidistantly positioned
         * over the certain interval and the output value
         * being constant
         *
         * Both input and output are 1-dimensional
         *
         * @param lower_bound Lower bound of the input data interval
         * @param upper_bound Upper bound of the input data interval
         * @param size Number of input-output pairs generated
         * @param output Constant output value
         */
        LIB4NEURO_API void add_isotropic_data(double lower_bound, double upper_bound, unsigned int size, double output);

        /**
         * Adds a new data with input values equidistantly positioned
         * over the certain interval and the output value
         * being constant
         *
         * Input can have arbitrary many dimensions,
         * output can be an arbitrary function
         *
         * @param bounds Odd values are lower bounds and even values are corresponding upper bounds
         * @param size Number of input-output pairs generated
         * @param output_func Function determining output value
         */
        LIB4NEURO_API void add_isotropic_data(std::vector<double> &bounds, unsigned int no_elems_in_one_dim,
                                              std::vector<double> (*output_func)(std::vector<double> &));

        //TODO Chebyshev - ch. interpolation points, i-th point = cos(i*alpha) from 0 to pi

        /**
         * Prints the data set
         */
        LIB4NEURO_API void print_data();

        /**
         * Stores the DataSet object to the binary file
         *
         */
        LIB4NEURO_API void store_text(std::string file_path);

        /**
         *
         * @param file_path
         */
        LIB4NEURO_API void store_data_text(std::ofstream* file_path);

        /**
         * Stores the data to the text file in a human readable format
         *
         * @param file_path
        LIB4NEURO_API void store_data_text(std::string file_path);
        /**
         * returns the normalized value of @val
         * @param val
         * @return
         */
        LIB4NEURO_API double get_normalized_value(double val);

        /**
         * Denormalizes the data set
         */
        LIB4NEURO_API void de_normalize();

        /**
         * stores the de-normalized vector @d1 into @d2
         * @param d1
         * @param d2
         */
        LIB4NEURO_API void de_normalize_single(std::vector<double> &d1, std::vector<double> &d2);

        /**
         * stores the @idx-th input in the vector @d
         * @param d
         * @param idx
         */
        LIB4NEURO_API void get_input(std::vector<double> &d, size_t idx);

        /**
         * stores the @idx-th output in the vector @d
         * @param d
         * @param idx
         */
        LIB4NEURO_API void get_output(std::vector<double> &d, size_t idx);

        /**
         *
         * @return
         */
        LIB4NEURO_API NormalizationStrategy* get_normalization_strategy();

        LIB4NEURO_API void set_normalization_strategy(NormalizationStrategy* ns);

        /**
         *
         * @return
         */
        LIB4NEURO_API bool is_normalized();

        /**
         *
         * @return
         */
        LIB4NEURO_API double get_max_inp_val();

        /**
         *
         * @return
         */
        LIB4NEURO_API double get_min_inp_val();
        /**
         *
         * @param max
         * @return
         */
Martin Beseda's avatar
Martin Beseda committed
        LIB4NEURO_API  std::vector<std::pair<std::vector<double>, std::vector<double>>> get_random_data_batch(size_t max);
Martin Beseda's avatar
Martin Beseda committed
#endif //INC_4NEURO_DATASET_H