// // Created by martin on 7/13/18. // #ifndef INC_4NEURO_DATASET_H #define INC_4NEURO_DATASET_H #include <iostream> #include <fstream> #include <utility> #include <vector> #include <string> #include <functional> #include <limits> #include "../settings.h" #include "../Exception/Exceptions.h" #include "../NormalizationStrategy/NormalizationStrategy.h" namespace lib4neuro { /** * Class representing data, which can be used for training * and testing purposes. */ class DataSet { private: /** * Number of elements in the data set */ size_t n_elements = 0; /** * Dimension of the input */ size_t input_dim = 0; /** * Dimension of the output */ size_t output_dim = 0; // /** // * Maximum input value // */ // double max_inp_val = //std::numeric_limits<double>::quiet_NaN(); // // /** // * Minimum input value // */ // double min_inp_val = std::numeric_limits<double>::quiet_NaN(); /** * Maximum (index 0) and minimum (index 1) input value */ std::vector<double> max_min_inp_val; //TODO make more efficiently, than by vector! /** * Stored data in the format of pairs of corresponding * input and output vectors */ std::vector<std::pair<std::vector<double>, std::vector<double>>> data; /** * * @tparam T * @param v * @return */ template<class T> std::vector<std::vector<T>> cartesian_product(const std::vector<std::vector<T>> *v); /** * */ //TODO let user choose in the constructor! NormalizationStrategy* normalization_strategy = nullptr; //new DoubleUnitStrategy; // /** // * // */ // bool normalized = false; public: /** * Struct used to access private properties from * the serialization function */ struct access; /** * Constructor for an empty DataSet */ LIB4NEURO_API DataSet(); /** * Constructor reading data from the file * @param file_path Path to the file with stored data set */ LIB4NEURO_API DataSet(std::string file_path); /** * Constructor accepting data vector * @param data_ptr Pointer to the vector containing data */ LIB4NEURO_API DataSet(std::vector<std::pair<std::vector<double>, std::vector<double>>> *data_ptr, NormalizationStrategy* ns = nullptr); /** * Creates a new data set with input values equidistantly positioned * over the certain interval and the output value * being constant * * Both input and output are 1-dimensional * * @todo add bounds as vectors for multi-dimensional data-sets * * @param lower_bound Lower bound of the input data interval * @param upper_bound Upper bound of the input data interval * @param size Number of input-output pairs generated * @param output Constant output value */ LIB4NEURO_API DataSet(double lower_bound, double upper_bound, unsigned int size, double output, NormalizationStrategy* ns = nullptr); /** * * @param bounds * @param no_elems_in_one_dim * @param output_func * @param output_dim */ LIB4NEURO_API DataSet(std::vector<double> &bounds, unsigned int no_elems_in_one_dim, std::vector<double> (*output_func)(std::vector<double> &), unsigned int output_dim, NormalizationStrategy* ns = nullptr); /** * Getter for number of elements * @return Number of elements in the data set */ LIB4NEURO_API size_t get_n_elements(); /** * Returns the input dimension * @return Input dimension */ LIB4NEURO_API size_t get_input_dim(); /** * Return the output dimension * @return Output dimension */ LIB4NEURO_API size_t get_output_dim(); /** * Getter for the data structure * @return Vector of data */ LIB4NEURO_API std::vector<std::pair<std::vector<double>, std::vector<double>>> *get_data(); /** * Adds a new pair of data to the data set * @param inputs Vector of input data * @param outputs Vector of output data corresponding to the input data */ LIB4NEURO_API void add_data_pair(std::vector<double> &inputs, std::vector<double> &outputs); //TODO expand method to generate multiple data types - chebyshev etc. /** * Adds a new data with input values equidistantly positioned * over the certain interval and the output value * being constant * * Both input and output are 1-dimensional * * @param lower_bound Lower bound of the input data interval * @param upper_bound Upper bound of the input data interval * @param size Number of input-output pairs generated * @param output Constant output value */ LIB4NEURO_API void add_isotropic_data(double lower_bound, double upper_bound, unsigned int size, double output); /** * Adds a new data with input values equidistantly positioned * over the certain interval and the output value * being constant * * Input can have arbitrary many dimensions, * output can be an arbitrary function * * @param bounds Odd values are lower bounds and even values are corresponding upper bounds * @param size Number of input-output pairs generated * @param output_func Function determining output value */ LIB4NEURO_API void add_isotropic_data(std::vector<double> &bounds, unsigned int no_elems_in_one_dim, std::vector<double> (*output_func)(std::vector<double> &)); //TODO Chebyshev - ch. interpolation points, i-th point = cos(i*alpha) from 0 to pi /** * Prints the data set */ LIB4NEURO_API void print_data(); /** * Stores the DataSet object to the binary file */ LIB4NEURO_API void store_text(std::string &file_path); /** * Normalizes the data set */ LIB4NEURO_API void normalize(); /** * stores the de-normalized vector @d1 into @d2 * @param d1 * @param d2 */ LIB4NEURO_API void de_normalize_single(std::vector<double> &d1, std::vector<double> &d2); /** * stores the @idx-th input in the vector @d * @param d * @param idx */ LIB4NEURO_API void get_input(std::vector<double> &d, size_t idx); /** * stores the @idx-th output in the vector @d * @param d * @param idx */ LIB4NEURO_API void get_output(std::vector<double> &d, size_t idx); /** * * @return */ LIB4NEURO_API NormalizationStrategy* get_normalization_strategy(); /** * * @return */ LIB4NEURO_API bool is_normalized(); /** * * @return */ LIB4NEURO_API double get_max_inp_val(); /** * * @return */ LIB4NEURO_API double get_min_inp_val(); LIB4NEURO_API std::vector<std::pair<std::vector<double>, std::vector<double>>> get_random_data_batch(size_t max); }; } #endif //INC_4NEURO_DATASET_H