Skip to content
Snippets Groups Projects
CSVReader.cpp 3.86 KiB
Newer Older
//
// Created by martin on 14.11.18.
//

#include <string>
#include <fstream>
#include <sstream>
#include <algorithm>
#include <boost/lexical_cast.hpp>
#include <boost/algorithm/string/erase.hpp>
#include "CSVReader.h"
#include "../exceptions.h"
namespace lib4neuro {
    CSVReader::CSVReader(std::string file_path, std::string delimiter, bool ignore_first_line) {
        if(!std::filesystem::exists(file_path)) {
            THROW_RUNTIME_ERROR("The specified file path in CSVReader does not exist!");
        this->file_path = file_path;
        this->delimiter = delimiter;
        this->ignore_first_line = ignore_first_line;
        this->header_included = ignore_first_line;
        this->data = new std::vector<std::vector<std::string>>;
    }

    void CSVReader::read() {
        std::ifstream ifs(this->file_path);
        std::string line;

        if(this->ignore_first_line) {
            std::getline(ifs, line);
        }

        /* Read single line from the file */
        while(std::getline(ifs, line)) {
            /* Ignore empty line */
            if(line == "") {
                continue;
            }

            /* Separate elements of the line according to the delimiter */
            size_t last = 0;
            size_t next = 0;
            std::vector<std::string> separated_line;
            while ((next = line.find(this->delimiter, last)) != std::string::npos) {
                separated_line.emplace_back(line.substr(last, next - last));
                last = next + 1;
            }
            separated_line.emplace_back(line.substr(last));

            /* Store the elements from the line to the vector with data */
            this->data->emplace_back(separated_line);
        }

        ifs.close();
    }

    std::vector<std::vector<std::string>>* CSVReader::get_data() {
        return this->data;
    }

    void CSVReader::print_data() {
        for(auto line : *this->data) {
            for(auto e : line) {
                std::cout << e << " ";
            }
            std::cout << std::endl;
        }
    }

    DataSet CSVReader::get_data_set(std::vector<unsigned int>* input_col_indices,
                                    std::vector<unsigned int>* output_col_indices) {

        std::vector<std::pair<std::vector<double>, std::vector<double>>> data_set_contents;

        for(auto line : *this->data) {
            //TODO check empty values in data
            std::vector<double> input;
            for(auto ind : *input_col_indices) {
                    /* Remove remaining spaces */
                    s = line.at(ind);
                    boost::algorithm::erase_all(s, " ");

                    /* Strip BOM */
                    // TODO solve in another way - work properly with different encodings!
                    boost::algorithm::erase_all(s, "\uEFBBBF");  // UTF-8
                    boost::algorithm::erase_all(s, "\uFEFF");  // UTF-16

                    /* Check, if the string is a number */
                    double tmp = boost::lexical_cast<double>(s);

                    /* Add loaded number to the vector of inputs */
                } catch(const std::out_of_range& e) {
                    THROW_OUT_OF_RANGE_ERROR("Non-existing index specified (" + std::to_string(ind) + ")!");

                } catch (const boost::bad_lexical_cast& e) {
                    THROW_RUNTIME_ERROR("Value \"" + s + "\" is not numerical and so it cannot be used in Data Set!");
            }

            std::vector<double> output;
            for(auto ind : *output_col_indices) {
                output.emplace_back(std::stod(line.at(ind)));
            }

            data_set_contents.emplace_back(std::make_pair(input, output));
        return DataSet(&data_set_contents);