-
Martin Rusek authored
change: Vtr template disabled -> vtr change: added stdafx header into cpps for unit testing chenge: reworked how cluster ground truth works other: numerous minor changes
Martin Rusek authoredchange: Vtr template disabled -> vtr change: added stdafx header into cpps for unit testing chenge: reworked how cluster ground truth works other: numerous minor changes
help.cpp 8.16 KiB
#pragma once
#include "stdafx.h"
#include "help.h"
#include "dataParser.h"
#include <bitset>
#include <cmath>
#include <limits>
#include <random>
using namespace std;
#undef min
void help::trimLeft(string &s, string const &delimiters)
{
const size_t startpos = s.find_first_not_of(delimiters);
if (string::npos != startpos)
{
//s = s.substr(startpos, s.end);
s.erase(s.begin(), s.begin() + startpos);
}
}
void help::trimRight(string &s, string const &delimiters)
{
const size_t endpos = s.find_last_not_of(delimiters);
if (string::npos != endpos)
{
//s = s.substr(0, endpos + 1);
s.erase(s.begin() + endpos + 1, s.end());
}
}
void help::trim(string &s, string const &delimiters)
{
trimLeft(s, delimiters);
trimRight(s, delimiters);
}
vector<string> help::split(string const& s, char const *d)
{
vector<string> output;
bitset<255> delims;
while (*d)
{
unsigned char code = *d++;
delims[code] = true;
}
string::const_iterator beg;
bool in_token = false;
for (string::const_iterator it = s.begin(), end = s.end(); it != end; ++it)
{
if (delims[*it])
{
if (in_token)
{
//output.push_back(beg, it);
output.push_back(vector<string>::value_type(beg, it));
in_token = false;
}
}
else if (!in_token)
{
beg = it;
in_token = true;
}
}
if (in_token)
output.push_back(vector<string>::value_type(beg, s.end()));
return output;
}
vector<string> help::split1(const string &str, const string &delimiter)
{
vector<string> tokens;
size_t prev = 0, pos = 0;
do
{
pos = str.find(delimiter, prev);
if (pos == string::npos) pos = str.length();
string token = str.substr(prev, pos - prev);
if (!token.empty()) tokens.push_back(token);
prev = pos + delimiter.length();
} while (pos < str.length() && prev < str.length());
return tokens;
}
vector<string> help::split2(string const &s, string const &delimiters)
{
vtr<std::string> result;
std::string::size_type pos = 0;
while (std::string::npos != (pos = s.find_first_not_of(delimiters, pos))) {
auto pos2 = s.find_first_of(delimiters, pos);
result.emplace_back(s.substr(pos, std::string::npos == pos2 ? pos2 : pos2 - pos));
pos = pos2;
}
return result;
}
vtr2<double> help::convertToDouble(vtr2<string> const &strInput)
{
vtr2<double> dd;
for (size_t i = 0; i < strInput.size(); i++)
{
vector<double> d = convertToDouble(strInput[i]);
dd.push_back(d);
}
return dd;
}
vector<double> help::convertToDouble(vector<string> const &strInput)
{
vector<double> d;
for (size_t i = 0; i < strInput.size(); i++)
{
d.push_back(stod(strInput[i]));
}
return d;
}
void help::correctBomLine(string &s)
{
if (s.compare(0, 3, "\xEF\xBB\xBF") == 0) // Is the file marked as UTF-8?
{
s.erase(0, 3); // Now get rid of the BOM.
}
else if (s.compare(0, 2, "\xFE\xFF") == 0) // Is the file marked as UTF-16 BE?
{
s.erase(0, 2); // Now get rid of the BOM.
}
else if (s.compare(0, 2, "\xFF\xFE") == 0) // Is the file marked as UTF-16 LE
{
s.erase(0, 2); // Now get rid of the BOM.
}
else if (s.compare(0, 4, "\x00\x00\xFE\xFF") == 0) // Is the file marked as UTF-32 BE?
{
s.erase(0, 4); // Now get rid of the BOM.
}
else if (s.compare(0, 4, "\xFF\xFE\x00\x00") == 0) // Is the file marked as UTF-32 LE?
{
s.erase(0, 4); // Now get rid of the BOM.
}
}
void help::interpolate(vtr3<double> &input)
{
int maxLen = -1;
for (auto const &s : input)
{
if ((int)s.size() > maxLen)
maxLen = (int)s.size();
}
for (size_t i = 0; i < input.size(); i++)
{
int diff = maxLen - (int)input[i].size();
//int half = maxLen / 2;
while (diff > 0)
{
int c = 0;
vtr2<double> row;
for(size_t k = 0; k < input[i].size() - 1 && diff > 0; k++)
{
vector<double> el;
if (k % 2 == 1)
{
for (size_t j = 0; j < input[i][k].size(); j++)
{
double tmp = (input[i][c - 1][j] + input[i][c][j]) / 2.0;
el.push_back(tmp);
}
row.push_back(el);
diff--;
}
else
{
row.push_back(input[i][c]);
c++;
}
}
row.insert(row.end(), input[i].begin() + c, input[i].end());
input[i] = row;
}
}
}
void help::normalizeMany(vtr3<double> &input)
{
for (size_t i = 0; i < input.size(); i++) //dims
{
normalize(input[i]);
}
}
void help::normalize(vtr2<double> &input)
{
for (size_t i = 0; i < input[0].size(); i++) //dims
{
double mean = 0;
for (size_t j = 0; j < input.size(); j++) //lenght of sequence
{
mean += input[j][i];
}
mean = abs(mean / (double)input.size());
//mean = abs(mean);
for (size_t j = 0; j < input.size(); j++) //lenght of sequence
{
input[j][i] = input[j][i] / mean;
}
}
}
void help::normalizeZeroOne(vtr3<double> &input, double max)
{
for (size_t i = 0; i < input.size(); i++)
{
for (size_t j = 0; j < input[i].size(); j++)
{
for (size_t k = 0; k < input[i][j].size(); k++)
{
input[i][j][k] /= max;
}
}
}
}
vtr3<double> help::separateSequence(vtr3<double> const &input, int size)
{
vtr3<double> output;
for (int i = 0; i < size; i++)
{
auto tmp = separateSequenceOne(input[i]);
output.insert(output.end(), tmp.begin(), tmp.end());
}
return output;
}
vtr3<double> help::separateSequenceOne(vtr2<double> const &input)
{
vtr3<double> output;
const size_t dims = input[0].size();
for (size_t i = 0; i < dims; i++)
{
vtr2<double> sequence;
sequence.reserve(input.size());
for (size_t j = 0; j < input.size(); j++)
{
vector<double> el(1);
el[0] = input[j][i];
sequence.push_back(el);
}
output.push_back(sequence);
}
return output;
}
void help::reduce(vtr3<double> &input, int skip)
{
for (size_t i = 0; i < input.size(); i++)
{
vtr2<double> row;
for (size_t j = skip - 1; j < input[i].size(); j += skip)
{
row.push_back(input[i][j]);
}
input[i] = row;
}
}
vtr3<double> help::paa(vtr3<double> const &input, int ratio)
{
vtr3<double> output(input.size());
for (size_t i = 0; i < input.size(); i++)
{
vtr2<double> s;
for (size_t j = 0; j < input[i].size(); j+= ratio)// sequence
{
vector<double> dim(input[i][j].size());
const size_t end = j + ratio >= input[i].size() ? input[i].size() : j + ratio;
for (size_t k = 0; k < input[i][j].size(); k++) //all dims
{
double sum = 0;
int merged = 0;
for (size_t l = j; l < end; l++) //sum individual groups of dims
{
sum += input[i][l][k];
merged++;
}
dim[k] = sum / merged;
}
s.push_back(dim);
}
output[i] = s;
}
return output;
}
vtr3<double> help::smooth(vtr3<double> const & input, int width)
{
vtr3<double> output(input.size());
for (size_t i = 0; i < input.size(); i++)
{
const int dims = (int)input[0][0].size();
vtr2<double> s;
for (int j = 0; j < width - 1; j++)
{
s.push_back(input[i][j]);
}
for (size_t j = 0; j < input[i].size() - width + 1; j++)// sequence
{
vtr<double> sums(dims);
for (int k = 0; k < width; k++) //all dims
{
for (int l = 0; l < dims; l++)
{
sums[l] += input[i][j + k][l] / width;
}
}
s.push_back(sums);
}
output[i] = s;
}
return output;
}
std::string help::stripFileNameFromPath(std::string path)
{
string folder = path.substr(0, path.find_last_of("\\/"));
return folder;
}
double help::getRandom(int min, int max)
{
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_real_distribution<> dis(min, max);
return dis(gen);
}
//static double searchMax(vtr2<double> const &A, int idxStart, int idxEnd)
//{
// double max = numeric_limits<double>::min();
//
// for (size_t i = 0; i < A.size(); i++)
// {
// double sum = 0;
// for (size_t j = 0; j < A[i].size(); j++)
// {
// sum += A[i][j];
// }
//
// if (minA > sum)
// minA = sum;
// }
//}
//
//
//static double searchMin(int idxStart, int idxEnd)
//template<class T>
//void help::mark(node<T>** const &m, size_t row, size_t col)
//{
// for (size_t i = 1; i < row; i++)
// {
// for (size_t j = 1; j < col; j++)
// {
// if (m[i][j].size > m[i + 1][j].size && m[i][j].size > m[i][j + 1].size) {
// if (m[i + 1][j].size == m[i][j + 1].size)
// m[i][j].size++;
// }
// }
// }
//}