Newer
Older
#ifndef INC_4NEURO_DATASET_H
#define INC_4NEURO_DATASET_H
#include <utility>
#include <vector>
#include <boost/serialization/base_object.hpp>
#include <boost/range/size_type.hpp>
Martin Beseda
committed
#include <exception>
#include <string>
/**
* Class representing an error caused by an incorrect
* input/output dimension specification
*/
class InvalidDimension: public std::runtime_error {
public:
/**
* Constructor with the general error message
*/
InvalidDimension();
/**
* Constructor with specific error message
* @param msg Specific error message
*/
explicit InvalidDimension(std::string msg);
};
/**
* Class representing data, which can be used for training
* and testing purposes.
*/
class DataSet {
friend class boost::serialization::access;
private:
/**
* Number of elements in the data set
*/
size_t n_elements;
Martin Beseda
committed
/**
* Dimension of the input
*/
unsigned int input_dim = 0;
/**
* Dimension of the output
*/
unsigned int output_dim = 0;
/**
* Stored data in the format of pairs of corresponding
* input and output vectors
*/
std::vector<std::pair<std::vector<double>, std::vector<double>>> data;
protected:
/**
* Serialization function
* @tparam Archive Boost library template
* @param ar Boost parameter - filled automatically during serialization!
* @param version Boost parameter - filled automatically during serialization!
*/
template<class Archive>
void serialize(Archive & ar, const unsigned int version){
if(Archive::is_loading::value) {
/* LOADING data */
ar & this->n_elements;
std::vector<std::pair<std::vector<double>, std::vector<double>>> data_tmp;
double tmp;
Martin Beseda
committed
/* INPUT dimension */
size_t input_dim;
ar & input_dim;
/* OUTPUT dimension */
size_t output_dim;
ar & output_dim;
for(unsigned int i=0; i < this->n_elements; i++) {
Martin Beseda
committed
/* INPUT vector */
Martin Beseda
committed
for(unsigned int j=0; j < input_dim; j++) {
ar & tmp;
inputs.push_back(tmp);
}
/* OUTPUT vector */
Martin Beseda
committed
//TODO check vector dimension like in input
Martin Beseda
committed
for(unsigned int j=0; j < output_dim; j++) {
ar & tmp;
outputs.push_back(tmp);
}
/* Append to the data vector */
data_tmp.emplace_back(std::make_pair(inputs, outputs));
}
this->data = data_tmp;
} else {
/* STORING data */
Martin Beseda
committed
//TODO check stored vectors dimensions
Martin Beseda
committed
size_t dim_inp, dim_out;
/* INPUT dimension */
dim_inp = std::get<0>(this->data[0]).size();
ar & dim_inp;
/* OUTPUT dimension */
dim_out = std::get<1>(this->data[0]).size();
ar & dim_out;
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
for(const auto p : this->data) {
/* Input vector */
for(auto val : std::get<0>(p)) {
ar & val;
}
/* Output vector */
for(auto val : std::get<1>(p)) {
ar & val;
}
}
}
};
public:
/**
* Constructor reading data from the file
* @param file_path Path to the file with stored data set
*/
DataSet(std::string file_path);
/**
* Constructor accepting data vector
* @param data_ptr Pointer to the vector containing data
*/
DataSet(std::vector<std::pair<std::vector<double>, std::vector<double>>>* data_ptr);
/**
* Getter for number of elements
* @return Number of elements in the data set
*/
size_t get_n_elements();
Martin Beseda
committed
/**
* Returns the input dimension
* @return Input dimension
*/
unsigned int get_input_dim();
/**
* Return the output dimension
* @return Output dimension
*/
unsigned int get_output_dim();
/**
* Getter for the data structure
* @return Vector of data
*/
std::vector<std::pair<std::vector<double>, std::vector<double>>>* get_data();
/**
* Adds a new pair of data to the data set
* @param inputs Vector of input data
* @param outputs Vector of output data corresponding to the input data
*/
void add_data_pair(std::vector<double> inputs, std::vector<double> outputs);
/**
* Prints the data set
*/
void print_data();
/**
* Stores the DataSet object to the binary file
*/
void store_text(std::string file_path);
};