// // Created by martin on 14.11.18. // #include <string> #include <fstream> #include <sstream> #include <experimental/filesystem> #include <regex> #include <algorithm> #include <memory> #include <boost/lexical_cast.hpp> #include <boost/algorithm/string/erase.hpp> #include "CSVReader.h" #include "exceptions.h" namespace lib4neuro { CSVReader::CSVReader(std::string file_path, std::string delimiter, bool ignore_first_line) { if(!std::experimental::filesystem::exists(file_path)) { THROW_RUNTIME_ERROR("The file path \'" + file_path + "\' specified in CSVReader does not exist!"); } this->file_path = file_path; this->delimiter = delimiter; this->ignore_first_line = ignore_first_line; this->header_included = ignore_first_line; this->data = std::make_unique<std::vector<std::vector<std::string>>>(); } void CSVReader::read() { std::ifstream ifs(this->file_path); std::string line; if(this->ignore_first_line) { std::getline(ifs, line); } /* Read single line from the file */ while(std::getline(ifs, line)) { /* Ignore empty line */ if(line == "") { continue; } /* Separate elements of the line according to the delimiter */ size_t last = 0; size_t next = 0; std::vector<std::string> separated_line; while ((next = line.find(this->delimiter, last)) != std::string::npos) { separated_line.emplace_back(line.substr(last, next - last)); last = next + 1; } separated_line.emplace_back(line.substr(last)); /* Store the elements from the line to the vector with data */ this->data->emplace_back(separated_line); } ifs.close(); } std::unique_ptr<std::vector<std::vector<std::string>>>* CSVReader::get_data() { return &this->data; } void CSVReader::print_data() { for(auto line : *this->data) { for(auto e : line) { std::cout << e << " "; } std::cout << std::endl; } } std::shared_ptr<DataSet> CSVReader::get_data_set(std::vector<unsigned int>* input_col_indices, std::vector<unsigned int>* output_col_indices) { std::vector<std::pair<std::vector<double>, std::vector<double>>> data_set_contents; if(this->data->empty()) { THROW_LOGIC_ERROR("DataSet can not be created as there were no data read beforehand! Did you forget to call " "the method 'read()'?"); } for (auto line : *this->data) { //TODO check empty values in data std::vector<double> input; for (auto ind : *input_col_indices) { std::string s; try { /* Remove remaining spaces */ s = line.at(ind); boost::algorithm::erase_all(s, " "); /* Strip BOM */ // TODO solve in another way - work properly with different encodings! boost::algorithm::erase_all(s, "\uEFBBBF"); // UTF-8 boost::algorithm::erase_all(s, "\uFEFF"); // UTF-16 /* Check, if the string is a number */ auto tmp = boost::lexical_cast<double>(s); /* Add loaded number to the vector of inputs */ input.push_back(tmp); } catch (const std::out_of_range& e) { THROW_OUT_OF_RANGE_ERROR("Non-existing index specified (" + std::to_string(ind) + ")!"); } catch (const boost::bad_lexical_cast& e) { THROW_RUNTIME_ERROR( "Value \"" + s + "\" is not numerical and so it cannot be used in Data Set!"); } } std::vector<double> output; for (auto ind : *output_col_indices) { output.emplace_back(std::stod(line.at(ind))); } data_set_contents.emplace_back(std::make_pair(input, output)); } return std::make_shared<DataSet>(DataSet(&data_set_contents)); } }