Skip to content
Snippets Groups Projects
CSVReader.cpp 4.56 KiB
Newer Older
  • Learn to ignore specific revisions
  • #include <string>
    #include <fstream>
    #include <sstream>
    
    #include <boost/lexical_cast.hpp>
    #include <boost/algorithm/string/erase.hpp>
    
    #include "CSVReader.h"
    
    #include "exceptions.h"
    
    bool is_file_accessible(std::string file_path)
    {
        return std::ifstream(file_path).good();
    }
    
    namespace lib4neuro {
    
    Martin Beseda's avatar
    Martin Beseda committed
        CSVReader::CSVReader(std::string file_path,
                             std::string delimiter,
                             bool ignore_first_line) {
    
            if (!is_file_accessible(file_path)) {
                THROW_RUNTIME_ERROR("The file path \'" + file_path + "\' specified in CSVReader is not accessible!");
    
            this->file_path = file_path;
            this->delimiter = delimiter;
            this->ignore_first_line = ignore_first_line;
            this->header_included = ignore_first_line;
        }
    
        void CSVReader::read() {
            std::ifstream ifs(this->file_path);
            std::string line;
    
    
    Martin Beseda's avatar
    Martin Beseda committed
            if (this->ignore_first_line) {
                std::getline(ifs,
                             line);
    
            }
    
            /* Read single line from the file */
    
    Martin Beseda's avatar
    Martin Beseda committed
            while (std::getline(ifs,
                                line)) {
    
                /* Ignore empty line */
    
    Martin Beseda's avatar
    Martin Beseda committed
                if (line == "") {
    
                    continue;
                }
    
                /* Separate elements of the line according to the delimiter */
                size_t last = 0;
                size_t next = 0;
                std::vector<std::string> separated_line;
    
    Martin Beseda's avatar
    Martin Beseda committed
                while ((next = line.find(this->delimiter,
                                         last)) != std::string::npos) {
                    separated_line.emplace_back(line.substr(last,
                                                            next - last));
    
                    last = next + 1;
                }
                separated_line.emplace_back(line.substr(last));
    
                /* Store the elements from the line to the vector with data */
    
                this->data.emplace_back(separated_line);
    
        std::vector<std::vector<std::string>>* CSVReader::get_data() {
    
        }
    
        void CSVReader::print_data() {
    
    Martin Beseda's avatar
    Martin Beseda committed
            for (auto line : this->data) {
                for (auto e : line) {
    
                    std::cout << e << " ";
                }
                std::cout << std::endl;
            }
        }
    
    
    Martin Beseda's avatar
    Martin Beseda committed
        std::shared_ptr<DataSet> CSVReader::get_data_set(std::vector<unsigned int>* input_col_indices,
                                                         std::vector<unsigned int>* output_col_indices) {
    
    
            std::vector<std::pair<std::vector<double>, std::vector<double>>> data_set_contents;
    
    
    Martin Beseda's avatar
    Martin Beseda committed
            if (this->data.empty()) {
    
                THROW_LOGIC_ERROR("DataSet can not be created as there were no data read beforehand! Did you forget to call "
                                  "the method 'read()'?");
            }
    
    
            for (auto line : this->data) {
    
                //TODO check empty values in data
    
                std::vector<double> input;
    
                for (auto ind : *input_col_indices) {
    
                        /* Remove remaining spaces */
                        s = line.at(ind);
    
    Martin Beseda's avatar
    Martin Beseda committed
                        boost::algorithm::erase_all(s,
                                                    " ");
    
    
                        /* Strip BOM */
                        // TODO solve in another way - work properly with different encodings!
    
    Martin Beseda's avatar
    Martin Beseda committed
                        boost::algorithm::erase_all(s,
                                                    "\uEFBBBF");  // UTF-8
                        boost::algorithm::erase_all(s,
                                                    "\uFEFF");  // UTF-16
    
                        /* Check, if the string is a number */
    
    Martin Beseda's avatar
    Martin Beseda committed
                        auto tmp = boost::lexical_cast<double>(s);
    
    
                        /* Add loaded number to the vector of inputs */
    
                    } catch (const std::out_of_range& e) {
    
                        THROW_OUT_OF_RANGE_ERROR("Non-existing index specified (" + std::to_string(ind) + ")!");
    
    
                    } catch (const boost::bad_lexical_cast& e) {
    
                        THROW_RUNTIME_ERROR(
                                "Value \"" + s + "\" is not numerical and so it cannot be used in Data Set!");
    
                }
    
                std::vector<double> output;
    
                for (auto ind : *output_col_indices) {
    
                    output.emplace_back(std::stod(line.at(ind)));
                }
    
    
    Martin Beseda's avatar
    Martin Beseda committed
                data_set_contents.emplace_back(std::make_pair(input,
                                                              output));
    
    Martin Beseda's avatar
    Martin Beseda committed
            return std::make_shared<DataSet>(DataSet(&data_set_contents));