Newer
Older
//
// Created by martin on 7/13/18.
//
#ifndef INC_4NEURO_DATASET_H
#define INC_4NEURO_DATASET_H
#include <iostream>
#include <fstream>
Martin Beseda
committed
#include <string>
#include <limits>
kra568
committed
#include "../settings.h"
Martin Beseda
committed
#include "../NormalizationStrategy/NormalizationStrategy.h"
Martin Beseda
committed
Martin Beseda
committed
Martin Beseda
committed
namespace lib4neuro {
/**
* Class representing data, which can be used for training
* and testing purposes.
*/
class DataSet {
private:
/**
* Number of elements in the data set
*/
Martin Beseda
committed
/**
* Dimension of the input
*/
size_t input_dim = 0;
/**
* Dimension of the output
*/
size_t output_dim = 0;
Martin Beseda
committed
// /**
// * Maximum input value
// */
// double max_inp_val = //std::numeric_limits<double>::quiet_NaN();
//
// /**
// * Minimum input value
// */
// double min_inp_val = std::numeric_limits<double>::quiet_NaN();
Martin Beseda
committed

Michal Kravcenko
committed
bool normalized = false;
Martin Beseda
committed
/**
Martin Beseda
committed
* Maximum (index 0) and minimum (index 1) input value
Martin Beseda
committed
*/
Martin Beseda
committed
std::vector<double> max_min_inp_val; //TODO make more efficiently, than by vector!
Martin Beseda
committed
Martin Beseda
committed
/**
* Stored data in the format of pairs of corresponding
* input and output vectors
*/
std::vector<std::pair<std::vector<double>, std::vector<double>>> data;
/**
*
* @tparam T
* @param v
* @return
*/
Martin Beseda
committed
template<class T>
std::vector<std::vector<T>> cartesian_product(const std::vector<std::vector<T>> *v);
Martin Beseda
committed
/**
*
*/
//TODO let user choose in the constructor!
std::shared_ptr<NormalizationStrategy> normalization_strategy;
Martin Beseda
committed
// /**
// *
// */
// bool normalized = false;
Martin Beseda
committed
Martin Beseda
committed
public:
/**
* Struct used to access private properties from
* the serialization function
*/
struct access;
/**
* Constructor for an empty DataSet
*/
LIB4NEURO_API DataSet();
Martin Beseda
committed
Martin Beseda
committed
/**
* Constructor reading data from the file
* @param file_path Path to the file with stored data set
*/
LIB4NEURO_API DataSet(std::string file_path);
/**
* Constructor accepting data vector
* @param data_ptr Pointer to the vector containing data
*/
Martin Beseda
committed
LIB4NEURO_API DataSet(std::vector<std::pair<std::vector<double>, std::vector<double>>> *data_ptr,
NormalizationStrategy* ns = nullptr);
Martin Beseda
committed
/**
* Creates a new data set with input values equidistantly positioned
* over the certain interval and the output value
* being constant
*
* Both input and output are 1-dimensional
*
* @todo add bounds as vectors for multi-dimensional data-sets
*
* @param lower_bound Lower bound of the input data interval
* @param upper_bound Upper bound of the input data interval
* @param size Number of input-output pairs generated
* @param output Constant output value
*/
LIB4NEURO_API DataSet(double lower_bound,
double upper_bound,
unsigned int size,
double output,
NormalizationStrategy* ns = nullptr);
Martin Beseda
committed
/**
*
* @param bounds
* @param no_elems_in_one_dim
* @param output_func
* @param output_dim
*/
LIB4NEURO_API DataSet(std::vector<double> &bounds,
unsigned int no_elems_in_one_dim,
std::vector<double> (*output_func)(std::vector<double> &),
unsigned int output_dim,
NormalizationStrategy* ns = nullptr);
Martin Beseda
committed
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
/**
* Getter for number of elements
* @return Number of elements in the data set
*/
LIB4NEURO_API size_t get_n_elements();
/**
* Returns the input dimension
* @return Input dimension
*/
LIB4NEURO_API size_t get_input_dim();
/**
* Return the output dimension
* @return Output dimension
*/
LIB4NEURO_API size_t get_output_dim();
/**
* Getter for the data structure
* @return Vector of data
*/
LIB4NEURO_API std::vector<std::pair<std::vector<double>, std::vector<double>>> *get_data();
/**
* Adds a new pair of data to the data set
* @param inputs Vector of input data
* @param outputs Vector of output data corresponding to the input data
*/
LIB4NEURO_API void add_data_pair(std::vector<double> &inputs, std::vector<double> &outputs);
//TODO expand method to generate multiple data types - chebyshev etc.
/**
* Adds a new data with input values equidistantly positioned
* over the certain interval and the output value
* being constant
*
* Both input and output are 1-dimensional
*
* @param lower_bound Lower bound of the input data interval
* @param upper_bound Upper bound of the input data interval
* @param size Number of input-output pairs generated
* @param output Constant output value
*/
LIB4NEURO_API void add_isotropic_data(double lower_bound, double upper_bound, unsigned int size, double output);
/**
* Adds a new data with input values equidistantly positioned
* over the certain interval and the output value
* being constant
*
* Input can have arbitrary many dimensions,
* output can be an arbitrary function
*
* @param bounds Odd values are lower bounds and even values are corresponding upper bounds
* @param size Number of input-output pairs generated
* @param output_func Function determining output value
*/
LIB4NEURO_API void add_isotropic_data(std::vector<double> &bounds, unsigned int no_elems_in_one_dim,
std::vector<double> (*output_func)(std::vector<double> &));
//TODO Chebyshev - ch. interpolation points, i-th point = cos(i*alpha) from 0 to pi
/**
* Prints the data set
*/
LIB4NEURO_API void print_data();
/**
* Stores the DataSet object to the binary file
*
*/
LIB4NEURO_API void store_text(std::string file_path);
/**
*
* @param file_path
*/
LIB4NEURO_API void store_data_text(std::ofstream* file_path);
/**
* Stores the data to the text file in a human readable format
*
* @param file_path
Martin Beseda
committed
*/
LIB4NEURO_API void store_data_text(std::string file_path);
Martin Beseda
committed
/**
* Normalizes the data set
Martin Beseda
committed
*/
LIB4NEURO_API void normalize();

Michal Kravcenko
committed
/**
* returns the normalized value of @val
* @param val
* @return
*/
LIB4NEURO_API double get_normalized_value(double val);
/**
* Denormalizes the data set
*/
LIB4NEURO_API void de_normalize();
/**
* stores the de-normalized vector @d1 into @d2
* @param d1
* @param d2
*/
LIB4NEURO_API void de_normalize_single(std::vector<double> &d1, std::vector<double> &d2);
/**
* stores the @idx-th input in the vector @d
* @param d
* @param idx
*/
LIB4NEURO_API void get_input(std::vector<double> &d, size_t idx);
/**
* stores the @idx-th output in the vector @d
* @param d
* @param idx
*/
LIB4NEURO_API void get_output(std::vector<double> &d, size_t idx);
/**
*
* @return
*/
LIB4NEURO_API NormalizationStrategy* get_normalization_strategy();
LIB4NEURO_API void set_normalization_strategy(NormalizationStrategy* ns);
/**
*
* @return
*/
LIB4NEURO_API bool is_normalized();
/**
*
* @return
*/
LIB4NEURO_API double get_max_inp_val();
/**
*
* @return
*/
LIB4NEURO_API double get_min_inp_val();
Martin Beseda
committed
/**
*
* @param max
* @return
*/
LIB4NEURO_API std::vector<std::pair<std::vector<double>, std::vector<double>>> get_random_data_batch(size_t max);
Martin Beseda
committed
};
}