Skip to content
Snippets Groups Projects
DataSet.h 5.58 KiB
//
// Created by martin on 7/13/18.
//

#ifndef INC_4NEURO_DATASET_H
#define INC_4NEURO_DATASET_H

#include <iostream>
#include <fstream>
#include <utility>
#include <vector>
#include <exception>
#include <string>
#include <functional>

#include "../settings.h"

/**
 * Class representing an error caused by an incorrect
 * input/output dimension specification
 */
class InvalidDimension: public std::runtime_error {
public:

    /**
     * Constructor with the general error message
     */
    InvalidDimension();

    /**
     * Constructor with specific error message
     * @param msg Specific error message
     */
    explicit InvalidDimension(std::string msg);
};

/**
 * Class representing data, which can be used for training
 * and testing purposes.
 */
class DataSet {
//    friend class boost::serialization::access;

private:

    /**
     * Number of elements in the data set
     */
    size_t n_elements;

    /**
     * Dimension of the input
     */
    size_t input_dim = 0;

    /**
     * Dimension of the output
     */
    size_t output_dim = 0;

    /**
     * Stored data in the format of pairs of corresponding
     * input and output vectors
     */
    std::vector<std::pair<std::vector<double>, std::vector<double>>> data;

    template <class T>
    std::vector<std::vector<T>> cartesian_product(const std::vector<std::vector<T>>* v);

//protected:
//    /**
//     * Serialization function
//     * @tparam Archive Boost library template
//     * @param ar Boost parameter - filled automatically during serialization!
//     * @param version Boost parameter - filled automatically during serialization!
//     */
//    template<class Archive>
//    void serialize(Archive & ar, const unsigned int version){
//        ar & this->n_elements;
//        ar & this->input_dim;
//        ar & this->output_dim;
//        ar & this->data;
//    };

public:

    struct access;

    /**
     * Constructor reading data from the file
     * @param file_path Path to the file with stored data set
     */
    LIB4NEURO_API DataSet(std::string file_path);

    /**
     * Constructor accepting data vector
     * @param data_ptr Pointer to the vector containing data
     */
    LIB4NEURO_API DataSet(std::vector<std::pair<std::vector<double>, std::vector<double>>>* data_ptr);

    /**
     * Creates a new data set with input values equidistantly positioned
     * over the certain interval and the output value
     * being constant
     *
     * Both input and output are 1-dimensional
     *
     * @todo add bounds as vectors for multi-dimensional data-sets
     *
     * @param lower_bound Lower bound of the input data interval
     * @param upper_bound Upper bound of the input data interval
     * @param size Number of input-output pairs generated
     * @param output Constant output value
     */
    LIB4NEURO_API DataSet(double lower_bound, double upper_bound, unsigned int size, double output);

    /**
     *
     * @param bounds
     * @param no_elems_in_one_dim
     * @param output_func
     * @param output_dim
     */
    LIB4NEURO_API DataSet(std::vector<double> &bounds, unsigned int no_elems_in_one_dim, std::vector<double> (*output_func)(std::vector<double>&), unsigned int output_dim);

    /**
     * Getter for number of elements
     * @return Number of elements in the data set
     */
    LIB4NEURO_API size_t get_n_elements();

    /**
     * Returns the input dimension
     * @return Input dimension
     */
    LIB4NEURO_API size_t get_input_dim();


    /**
     * Return the output dimension
     * @return Output dimension
     */
    LIB4NEURO_API size_t get_output_dim();

    /**
     * Getter for the data structure
     * @return Vector of data
     */
    LIB4NEURO_API std::vector<std::pair<std::vector<double>, std::vector<double>>>* get_data();

    /**
     * Adds a new pair of data to the data set
     * @param inputs Vector of input data
     * @param outputs Vector of output data corresponding to the input data
     */
    LIB4NEURO_API void add_data_pair(std::vector<double> &inputs, std::vector<double> &outputs);

    //TODO expand method to generate multiple data types - chebyshev etc.
    /**
     * Adds a new data with input values equidistantly positioned
     * over the certain interval and the output value
     * being constant
     *
     * Both input and output are 1-dimensional
     *
     * @param lower_bound Lower bound of the input data interval
     * @param upper_bound Upper bound of the input data interval
     * @param size Number of input-output pairs generated
     * @param output Constant output value
     */
    LIB4NEURO_API void add_isotropic_data(double lower_bound, double upper_bound, unsigned int size, double output);

    /**
     * Adds a new data with input values equidistantly positioned
     * over the certain interval and the output value
     * being constant
     *
     * Input can have arbitrary many dimensions,
     * output can be an arbitrary function
     *
     * @param bounds Odd values are lower bounds and even values are corresponding upper bounds
     * @param size Number of input-output pairs generated
     * @param output_func Function determining output value
     */
    LIB4NEURO_API void add_isotropic_data(std::vector<double> &bounds, unsigned int no_elems_in_one_dim, std::vector<double> (*output_func)(std::vector<double>&));

    //TODO Chebyshev - ch. interpolation points, i-th point = cos(i*alpha) from 0 to pi

    /**
     * Prints the data set
     */
    LIB4NEURO_API void print_data();

    /**
     * Stores the DataSet object to the binary file
     */
    LIB4NEURO_API void store_text(std::string &file_path);
};

#endif //INC_4NEURO_DATASET_H