Skip to content
Snippets Groups Projects
DataSet.h 8.5 KiB
Newer Older
  • Learn to ignore specific revisions
  • Martin Beseda's avatar
    Martin Beseda committed
    //
    // Created by martin on 7/13/18.
    //
    
    #ifndef INC_4NEURO_DATASET_H
    #define INC_4NEURO_DATASET_H
    
    
    #include <iostream>
    #include <fstream>
    
    #include <utility>
    #include <vector>
    
    #include <functional>
    
    #include <boost/random/mersenne_twister.hpp>
    #include <boost/random/uniform_int_distribution.hpp>
    #include <ctime>
    
    #include "../NormalizationStrategy/NormalizationStrategy.h"
    
    namespace lib4neuro {
        /**
         * Class representing data, which can be used for training
         * and testing purposes.
         */
        class DataSet {
    
        private:
    
    
            size_t n_elements = 0;
    
    
            /**
             * Dimension of the input
             */
            size_t input_dim = 0;
    
            /**
             * Dimension of the output
             */
            size_t output_dim = 0;
    
    
    //        /**
    //         * Maximum input value
    //         */
    //        double max_inp_val = //std::numeric_limits<double>::quiet_NaN();
    //
    //        /**
    //         * Minimum input value
    //         */
    //        double min_inp_val = std::numeric_limits<double>::quiet_NaN();
    
             * Maximum (index 0) and minimum (index 1) input value
    
            std::vector<double> max_min_inp_val;  //TODO make more efficiently, than by vector!
    
            /**
             * Stored data in the format of pairs of corresponding
             * input and output vectors
             */
            std::vector<std::pair<std::vector<double>, std::vector<double>>> data;
    
    
            template<class T>
            std::vector<std::vector<T>> cartesian_product(const std::vector<std::vector<T>> *v);
    
    
            //TODO let user choose in the constructor!
    
            NormalizationStrategy* normalization_strategy = new DoubleUnitStrategy;
    
    //        /**
    //         *
    //         */
    //        bool normalized = false;
    
        public:
    
            /**
             * Struct used to access private properties from
             * the serialization function
             */
            struct access;
    
    
            /**
             * Constructor for an empty DataSet
             */
            LIB4NEURO_API DataSet();
    
            /**
             * Constructor reading data from the file
             * @param file_path Path to the file with stored data set
             */
            LIB4NEURO_API DataSet(std::string file_path);
    
            /**
             * Constructor accepting data vector
             * @param data_ptr Pointer to the vector containing data
             */
    
            LIB4NEURO_API DataSet(std::vector<std::pair<std::vector<double>, std::vector<double>>> *data_ptr,
    
                                  NormalizationStrategy* ns = nullptr);
    
    
            /**
             * Creates a new data set with input values equidistantly positioned
             * over the certain interval and the output value
             * being constant
             *
             * Both input and output are 1-dimensional
             *
             * @todo add bounds as vectors for multi-dimensional data-sets
             *
             * @param lower_bound Lower bound of the input data interval
             * @param upper_bound Upper bound of the input data interval
             * @param size Number of input-output pairs generated
             * @param output Constant output value
             */
    
            LIB4NEURO_API DataSet(double lower_bound,
                                  double upper_bound,
                                  unsigned int size,
                                  double output,
                                  NormalizationStrategy* ns = nullptr);
    
    
            /**
             *
             * @param bounds
             * @param no_elems_in_one_dim
             * @param output_func
             * @param output_dim
             */
    
            LIB4NEURO_API DataSet(std::vector<double> &bounds,
                                  unsigned int no_elems_in_one_dim,
                                  std::vector<double> (*output_func)(std::vector<double> &),
                                  unsigned int output_dim,
                                  NormalizationStrategy* ns = nullptr);
    
    
            /**
             * Getter for number of elements
             * @return Number of elements in the data set
             */
            LIB4NEURO_API size_t get_n_elements();
    
            /**
             * Returns the input dimension
             * @return Input dimension
             */
            LIB4NEURO_API size_t get_input_dim();
    
            /**
             * Return the output dimension
             * @return Output dimension
             */
            LIB4NEURO_API size_t get_output_dim();
    
            /**
             * Getter for the data structure
             * @return Vector of data
             */
            LIB4NEURO_API std::vector<std::pair<std::vector<double>, std::vector<double>>> *get_data();
    
            /**
             * Adds a new pair of data to the data set
             * @param inputs Vector of input data
             * @param outputs Vector of output data corresponding to the input data
             */
            LIB4NEURO_API void add_data_pair(std::vector<double> &inputs, std::vector<double> &outputs);
    
            //TODO expand method to generate multiple data types - chebyshev etc.
            /**
             * Adds a new data with input values equidistantly positioned
             * over the certain interval and the output value
             * being constant
             *
             * Both input and output are 1-dimensional
             *
             * @param lower_bound Lower bound of the input data interval
             * @param upper_bound Upper bound of the input data interval
             * @param size Number of input-output pairs generated
             * @param output Constant output value
             */
            LIB4NEURO_API void add_isotropic_data(double lower_bound, double upper_bound, unsigned int size, double output);
    
            /**
             * Adds a new data with input values equidistantly positioned
             * over the certain interval and the output value
             * being constant
             *
             * Input can have arbitrary many dimensions,
             * output can be an arbitrary function
             *
             * @param bounds Odd values are lower bounds and even values are corresponding upper bounds
             * @param size Number of input-output pairs generated
             * @param output_func Function determining output value
             */
            LIB4NEURO_API void add_isotropic_data(std::vector<double> &bounds, unsigned int no_elems_in_one_dim,
                                                  std::vector<double> (*output_func)(std::vector<double> &));
    
            //TODO Chebyshev - ch. interpolation points, i-th point = cos(i*alpha) from 0 to pi
    
            /**
             * Prints the data set
             */
            LIB4NEURO_API void print_data();
    
            /**
             * Stores the DataSet object to the binary file
    
             *
             */
            LIB4NEURO_API void store_text(std::string file_path);
    
    
            /**
             *
             * @param file_path
             */
            LIB4NEURO_API void store_data_text(std::ofstream* file_path);
    
    
            /**
             * Stores the data to the text file in a human readable format
             *
             * @param file_path
    
            LIB4NEURO_API void store_data_text(std::string file_path);
    
            /**
             * Denormalizes the data set
             */
            LIB4NEURO_API void de_normalize();
    
    
            /**
             * stores the de-normalized vector @d1 into @d2
             * @param d1
             * @param d2
             */
            LIB4NEURO_API void de_normalize_single(std::vector<double> &d1, std::vector<double> &d2);
    
            /**
             * stores the @idx-th input in the vector @d
             * @param d
             * @param idx
             */
            LIB4NEURO_API void get_input(std::vector<double> &d, size_t idx);
    
            /**
             * stores the @idx-th output in the vector @d
             * @param d
             * @param idx
             */
            LIB4NEURO_API void get_output(std::vector<double> &d, size_t idx);
    
    
            /**
             *
             * @return
             */
            LIB4NEURO_API NormalizationStrategy* get_normalization_strategy();
    
    
            /**
             *
             * @return
             */
            LIB4NEURO_API bool is_normalized();
    
    
            /**
             *
             * @return
             */
            LIB4NEURO_API double get_max_inp_val();
    
            /**
             *
             * @return
             */
            LIB4NEURO_API double get_min_inp_val();
    
            /**
             *
             * @param max
             * @return
             */
    	    LIB4NEURO_API  std::vector<std::pair<std::vector<double>, std::vector<double>>> get_random_data_batch(size_t max);
    
    Martin Beseda's avatar
    Martin Beseda committed
    #endif //INC_4NEURO_DATASET_H