Something went wrong on our end
-
Martin Beseda authoredMartin Beseda authored
DataSet.h 8.25 KiB
//
// Created by martin on 7/13/18.
//
#ifndef INC_4NEURO_DATASET_H
#define INC_4NEURO_DATASET_H
#include <iostream>
#include <fstream>
#include <utility>
#include <vector>
#include <string>
#include <functional>
#include <limits>
#include "../settings.h"
#include "../NormalizationStrategy/NormalizationStrategy.h"
namespace lib4neuro {
/**
* Class representing data, which can be used for training
* and testing purposes.
*/
class DataSet {
private:
/**
* Number of elements in the data set
*/
size_t n_elements = 0;
/**
* Dimension of the input
*/
size_t input_dim = 0;
/**
* Dimension of the output
*/
size_t output_dim = 0;
// /**
// * Maximum input value
// */
// double max_inp_val = //std::numeric_limits<double>::quiet_NaN();
//
// /**
// * Minimum input value
// */
// double min_inp_val = std::numeric_limits<double>::quiet_NaN();
/**
* Maximum (index 0) and minimum (index 1) input value
*/
std::vector<double> max_min_inp_val; //TODO make more efficiently, than by vector!
/**
* Stored data in the format of pairs of corresponding
* input and output vectors
*/
std::vector<std::pair<std::vector<double>, std::vector<double>>> data;
/**
*
* @tparam T
* @param v
* @return
*/
template<class T>
std::vector<std::vector<T>> cartesian_product(const std::vector<std::vector<T>> *v);
/**
*
*/
//TODO let user choose in the constructor!
NormalizationStrategy* normalization_strategy = new DoubleUnitStrategy;
// /**
// *
// */
// bool normalized = false;
public:
/**
* Struct used to access private properties from
* the serialization function
*/
struct access;
/**
* Constructor for an empty DataSet
*/
LIB4NEURO_API DataSet();
/**
* Constructor reading data from the file
* @param file_path Path to the file with stored data set
*/
LIB4NEURO_API DataSet(std::string file_path);
/**
* Constructor accepting data vector
* @param data_ptr Pointer to the vector containing data
*/
LIB4NEURO_API DataSet(std::vector<std::pair<std::vector<double>, std::vector<double>>> *data_ptr,
NormalizationStrategy* ns = nullptr);
/**
* Creates a new data set with input values equidistantly positioned
* over the certain interval and the output value
* being constant
*
* Both input and output are 1-dimensional
*
* @todo add bounds as vectors for multi-dimensional data-sets
*
* @param lower_bound Lower bound of the input data interval
* @param upper_bound Upper bound of the input data interval
* @param size Number of input-output pairs generated
* @param output Constant output value
*/
LIB4NEURO_API DataSet(double lower_bound,
double upper_bound,
unsigned int size,
double output,
NormalizationStrategy* ns = nullptr);
/**
*
* @param bounds
* @param no_elems_in_one_dim
* @param output_func
* @param output_dim
*/
LIB4NEURO_API DataSet(std::vector<double> &bounds,
unsigned int no_elems_in_one_dim,
std::vector<double> (*output_func)(std::vector<double> &),
unsigned int output_dim,
NormalizationStrategy* ns = nullptr);
/**
* Getter for number of elements
* @return Number of elements in the data set
*/
LIB4NEURO_API size_t get_n_elements();
/**
* Returns the input dimension
* @return Input dimension
*/
LIB4NEURO_API size_t get_input_dim();
/**
* Return the output dimension
* @return Output dimension
*/
LIB4NEURO_API size_t get_output_dim();
/**
* Getter for the data structure
* @return Vector of data
*/
LIB4NEURO_API std::vector<std::pair<std::vector<double>, std::vector<double>>> *get_data();
/**
* Adds a new pair of data to the data set
* @param inputs Vector of input data
* @param outputs Vector of output data corresponding to the input data
*/
LIB4NEURO_API void add_data_pair(std::vector<double> &inputs, std::vector<double> &outputs);
//TODO expand method to generate multiple data types - chebyshev etc.
/**
* Adds a new data with input values equidistantly positioned
* over the certain interval and the output value
* being constant
*
* Both input and output are 1-dimensional
*
* @param lower_bound Lower bound of the input data interval
* @param upper_bound Upper bound of the input data interval
* @param size Number of input-output pairs generated
* @param output Constant output value
*/
LIB4NEURO_API void add_isotropic_data(double lower_bound, double upper_bound, unsigned int size, double output);
/**
* Adds a new data with input values equidistantly positioned
* over the certain interval and the output value
* being constant
*
* Input can have arbitrary many dimensions,
* output can be an arbitrary function
*
* @param bounds Odd values are lower bounds and even values are corresponding upper bounds
* @param size Number of input-output pairs generated
* @param output_func Function determining output value
*/
LIB4NEURO_API void add_isotropic_data(std::vector<double> &bounds, unsigned int no_elems_in_one_dim,
std::vector<double> (*output_func)(std::vector<double> &));
//TODO Chebyshev - ch. interpolation points, i-th point = cos(i*alpha) from 0 to pi
/**
* Prints the data set
*/
LIB4NEURO_API void print_data();
/**
* Stores the DataSet object to the binary file
*
*/
LIB4NEURO_API void store_text(std::string file_path);
/**
*
* @param file_path
*/
LIB4NEURO_API void store_data_text(std::ofstream* file_path);
/**
* Stores the data to the text file in a human readable format
*
* @param file_path
*/
LIB4NEURO_API void store_data_text(std::string file_path);
/**
* Normalizes the data set
*/
LIB4NEURO_API void normalize();
/**
* stores the de-normalized vector @d1 into @d2
* @param d1
* @param d2
*/
LIB4NEURO_API void de_normalize_single(std::vector<double> &d1, std::vector<double> &d2);
/**
* stores the @idx-th input in the vector @d
* @param d
* @param idx
*/
LIB4NEURO_API void get_input(std::vector<double> &d, size_t idx);
/**
* stores the @idx-th output in the vector @d
* @param d
* @param idx
*/
LIB4NEURO_API void get_output(std::vector<double> &d, size_t idx);
/**
*
* @return
*/
LIB4NEURO_API NormalizationStrategy* get_normalization_strategy();
/**
*
* @return
*/
LIB4NEURO_API bool is_normalized();
/**
*
* @return
*/
LIB4NEURO_API double get_max_inp_val();
/**
*
* @return
*/
LIB4NEURO_API double get_min_inp_val();
/**
*
* @param max
* @return
*/
LIB4NEURO_API std::vector<std::pair<std::vector<double>, std::vector<double>>> get_random_data_batch(size_t max);
};
}
#endif //INC_4NEURO_DATASET_H