Skip to content
Snippets Groups Projects
DataSet.h 5.78 KiB
Newer Older
  • Learn to ignore specific revisions
  • Martin Beseda's avatar
    Martin Beseda committed
    //
    // Created by martin on 7/13/18.
    //
    
    #ifndef INC_4NEURO_DATASET_H
    #define INC_4NEURO_DATASET_H
    
    
    #include <iostream>
    #include <fstream>
    
    #include <utility>
    #include <vector>
    
    #include <functional>
    
    #include <boost/serialization/base_object.hpp>
    #include <boost/range/size_type.hpp>
    #include <boost/serialization/vector.hpp>
    #include <boost/serialization/utility.hpp>
    #include <boost/archive/text_oarchive.hpp>
    #include <boost/archive/text_iarchive.hpp>
    
    
    /**
     * Class representing an error caused by an incorrect
     * input/output dimension specification
     */
    class InvalidDimension: public std::runtime_error {
    public:
    
        /**
         * Constructor with the general error message
         */
        InvalidDimension();
    
        /**
         * Constructor with specific error message
         * @param msg Specific error message
         */
        explicit InvalidDimension(std::string msg);
    };
    
    
    /**
     * Class representing data, which can be used for training
     * and testing purposes.
     */
    class DataSet {
        friend class boost::serialization::access;
    
    private:
        /**
         * Number of elements in the data set
         */
        size_t n_elements;
    
    
        /**
         * Stored data in the format of pairs of corresponding
         * input and output vectors
         */
        std::vector<std::pair<std::vector<double>, std::vector<double>>> data;
    
    
        template <class T>
        std::vector<std::vector<T>> cartesian_product(const std::vector<std::vector<T>>* v);
    
    
    protected:
        /**
         * Serialization function
         * @tparam Archive Boost library template
         * @param ar Boost parameter - filled automatically during serialization!
         * @param version Boost parameter - filled automatically during serialization!
         */
        template<class Archive>
        void serialize(Archive & ar, const unsigned int version){
    
            ar & this->n_elements;
            ar & this->input_dim;
            ar & this->output_dim;
            ar & this->data;
    
        };
    
    public:
    
        /**
         * Constructor reading data from the file
         * @param file_path Path to the file with stored data set
         */
    
    
        /**
         * Constructor accepting data vector
         * @param data_ptr Pointer to the vector containing data
         */
    
        LIB4NEURO_API DataSet(std::vector<std::pair<std::vector<double>, std::vector<double>>>* data_ptr);
    
        /**
         * Creates a new data set with input values equidistantly positioned
         * over the certain interval and the output value
         * being constant
         *
         * Both input and output are 1-dimensional
         *
         * @todo add bounds as vectors for multi-dimensional data-sets
         *
         * @param lower_bound Lower bound of the input data interval
         * @param upper_bound Upper bound of the input data interval
         * @param size Number of input-output pairs generated
         * @param output Constant output value
         */
    
        LIB4NEURO_API DataSet(double lower_bound, double upper_bound, unsigned int size, double output);
    
        /**
         *
         * @param bounds
         * @param no_elems_in_one_dim
         * @param output_func
         * @param output_dim
         */
    
        LIB4NEURO_API DataSet(std::vector<double> &bounds, unsigned int no_elems_in_one_dim, std::vector<double> (*output_func)(std::vector<double>&), unsigned int output_dim);
    
        /**
         * Getter for number of elements
         * @return Number of elements in the data set
         */
    
        /**
         * Returns the input dimension
         * @return Input dimension
         */
    
        /**
         * Getter for the data structure
         * @return Vector of data
         */
    
        LIB4NEURO_API std::vector<std::pair<std::vector<double>, std::vector<double>>>* get_data();
    
    
        /**
         * Adds a new pair of data to the data set
         * @param inputs Vector of input data
         * @param outputs Vector of output data corresponding to the input data
         */
    
        LIB4NEURO_API void add_data_pair(std::vector<double> &inputs, std::vector<double> &outputs);
    
        //TODO expand method to generate multiple data types - chebyshev etc.
        /**
         * Adds a new data with input values equidistantly positioned
         * over the certain interval and the output value
         * being constant
         *
         * Both input and output are 1-dimensional
         *
         * @param lower_bound Lower bound of the input data interval
         * @param upper_bound Upper bound of the input data interval
         * @param size Number of input-output pairs generated
         * @param output Constant output value
         */
    
        LIB4NEURO_API void add_isotropic_data(double lower_bound, double upper_bound, unsigned int size, double output);
    
        /**
         * Adds a new data with input values equidistantly positioned
         * over the certain interval and the output value
         * being constant
         *
         * Input can have arbitrary many dimensions,
         * output can be an arbitrary function
         *
         * @param bounds Odd values are lower bounds and even values are corresponding upper bounds
         * @param size Number of input-output pairs generated
         * @param output_func Function determining output value
         */
    
        LIB4NEURO_API void add_isotropic_data(std::vector<double> &bounds, unsigned int no_elems_in_one_dim, std::vector<double> (*output_func)(std::vector<double>&));
    
        //TODO Chebyshev - ch. interpolation points, i-th point = cos(i*alpha) from 0 to pi
    
    
        /**
         * Prints the data set
         */
    
    
        /**
         * Stores the DataSet object to the binary file
         */
    
        LIB4NEURO_API void store_text(std::string &file_path);
    
    Martin Beseda's avatar
    Martin Beseda committed
    #endif //INC_4NEURO_DATASET_H