Skip to content
Snippets Groups Projects
DataSet.h 5.37 KiB
Newer Older
Martin Beseda's avatar
Martin Beseda committed
//
// Created by martin on 7/13/18.
//

#ifndef INC_4NEURO_DATASET_H
#define INC_4NEURO_DATASET_H

#include <iostream>
#include <fstream>
#include <utility>
#include <vector>
#include <functional>
David Vojtek's avatar
David Vojtek committed
#include "../Exception/Exceptions.h"

namespace lib4neuro {
    /**
     * Class representing data, which can be used for training
     * and testing purposes.
     */
    class DataSet {

    private:

        /**
         * Number of elements in the data set
         */
        size_t n_elements;

        /**
         * Dimension of the input
         */
        size_t input_dim = 0;

        /**
         * Dimension of the output
         */
        size_t output_dim = 0;

        /**
         * Stored data in the format of pairs of corresponding
         * input and output vectors
         */
        std::vector<std::pair<std::vector<double>, std::vector<double>>> data;

        template<class T>
        std::vector<std::vector<T>> cartesian_product(const std::vector<std::vector<T>> *v);

    public:

        /**
         * Struct used to access private properties from
         * the serialization function
         */
        struct access;

        /**
         * Constructor reading data from the file
         * @param file_path Path to the file with stored data set
         */
        LIB4NEURO_API DataSet(std::string file_path);

		LIB4NEURO_API DataSet();

David Vojtek's avatar
David Vojtek committed
        /**
         * Constructor accepting data vector
         * @param data_ptr Pointer to the vector containing data
         */
        LIB4NEURO_API DataSet(std::vector<std::pair<std::vector<double>, std::vector<double>>> *data_ptr);

        /**
         * Creates a new data set with input values equidistantly positioned
         * over the certain interval and the output value
         * being constant
         *
         * Both input and output are 1-dimensional
         *
         * @todo add bounds as vectors for multi-dimensional data-sets
         *
         * @param lower_bound Lower bound of the input data interval
         * @param upper_bound Upper bound of the input data interval
         * @param size Number of input-output pairs generated
         * @param output Constant output value
         */
        LIB4NEURO_API DataSet(double lower_bound, double upper_bound, unsigned int size, double output);

        /**
         *
         * @param bounds
         * @param no_elems_in_one_dim
         * @param output_func
         * @param output_dim
         */
        LIB4NEURO_API DataSet(std::vector<double> &bounds, unsigned int no_elems_in_one_dim,
                              std::vector<double> (*output_func)(std::vector<double> &), unsigned int output_dim);

        /**
         * Getter for number of elements
         * @return Number of elements in the data set
         */
        LIB4NEURO_API size_t get_n_elements();

        /**
         * Returns the input dimension
         * @return Input dimension
         */
        LIB4NEURO_API size_t get_input_dim();


        /**
         * Return the output dimension
         * @return Output dimension
         */
        LIB4NEURO_API size_t get_output_dim();

        /**
         * Getter for the data structure
         * @return Vector of data
         */
        LIB4NEURO_API std::vector<std::pair<std::vector<double>, std::vector<double>>> *get_data();

        /**
         * Adds a new pair of data to the data set
         * @param inputs Vector of input data
         * @param outputs Vector of output data corresponding to the input data
         */
        LIB4NEURO_API void add_data_pair(std::vector<double> &inputs, std::vector<double> &outputs);

        //TODO expand method to generate multiple data types - chebyshev etc.
        /**
         * Adds a new data with input values equidistantly positioned
         * over the certain interval and the output value
         * being constant
         *
         * Both input and output are 1-dimensional
         *
         * @param lower_bound Lower bound of the input data interval
         * @param upper_bound Upper bound of the input data interval
         * @param size Number of input-output pairs generated
         * @param output Constant output value
         */
        LIB4NEURO_API void add_isotropic_data(double lower_bound, double upper_bound, unsigned int size, double output);

        /**
         * Adds a new data with input values equidistantly positioned
         * over the certain interval and the output value
         * being constant
         *
         * Input can have arbitrary many dimensions,
         * output can be an arbitrary function
         *
         * @param bounds Odd values are lower bounds and even values are corresponding upper bounds
         * @param size Number of input-output pairs generated
         * @param output_func Function determining output value
         */
        LIB4NEURO_API void add_isotropic_data(std::vector<double> &bounds, unsigned int no_elems_in_one_dim,
                                              std::vector<double> (*output_func)(std::vector<double> &));

        //TODO Chebyshev - ch. interpolation points, i-th point = cos(i*alpha) from 0 to pi

        /**
         * Prints the data set
         */
        LIB4NEURO_API void print_data();

        /**
         * Stores the DataSet object to the binary file
         */
        LIB4NEURO_API void store_text(std::string &file_path);
    };
}
Martin Beseda's avatar
Martin Beseda committed
#endif //INC_4NEURO_DATASET_H