Newer
Older
#include "dataParser.h"
#include <bitset>
#include <cmath>
#include <limits>
using namespace std;
#undef min
void help::trimLeft(string &s, string const &delimiters)
{
const size_t startpos = s.find_first_not_of(delimiters);
if (string::npos != startpos)
{
//s = s.substr(startpos, s.end);
s.erase(s.begin(), s.begin() + startpos);
}
}
void help::trimRight(string &s, string const &delimiters)
{
const size_t endpos = s.find_last_not_of(delimiters);
if (string::npos != endpos)
{
//s = s.substr(0, endpos + 1);
s.erase(s.begin() + endpos + 1, s.end());
}
}
void help::trim(string &s, string const &delimiters)
{
trimLeft(s, delimiters);
trimRight(s, delimiters);
}
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
vector<string> help::split(string const& s, char const *d)
{
vector<string> output;
bitset<255> delims;
while (*d)
{
unsigned char code = *d++;
delims[code] = true;
}
string::const_iterator beg;
bool in_token = false;
for (string::const_iterator it = s.begin(), end = s.end(); it != end; ++it)
{
if (delims[*it])
{
if (in_token)
{
//output.push_back(beg, it);
output.push_back(vector<string>::value_type(beg, it));
in_token = false;
}
}
else if (!in_token)
{
beg = it;
in_token = true;
}
}
if (in_token)
output.push_back(vector<string>::value_type(beg, s.end()));
return output;
}
vector<string> help::split1(const string &str, const string &delimiter)
{
vector<string> tokens;
size_t prev = 0, pos = 0;
do
{
pos = str.find(delimiter, prev);
if (pos == string::npos) pos = str.length();
string token = str.substr(prev, pos - prev);
if (!token.empty()) tokens.push_back(token);
prev = pos + delimiter.length();
} while (pos < str.length() && prev < str.length());
return tokens;
}
vector<string> help::split2(string const &s, string const &delimiters)
{
vtr<std::string> result;
std::string::size_type pos = 0;
while (std::string::npos != (pos = s.find_first_not_of(delimiters, pos))) {
auto pos2 = s.find_first_of(delimiters, pos);
result.emplace_back(s.substr(pos, std::string::npos == pos2 ? pos2 : pos2 - pos));
pos = pos2;
}
return result;
}
vtr2<double> help::convertToDouble(vtr2<string> const &strInput)
vtr2<double> dd;
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
for (size_t i = 0; i < strInput.size(); i++)
{
vector<double> d = convertToDouble(strInput[i]);
dd.push_back(d);
}
return dd;
}
vector<double> help::convertToDouble(vector<string> const &strInput)
{
vector<double> d;
for (size_t i = 0; i < strInput.size(); i++)
{
d.push_back(stod(strInput[i]));
}
return d;
}
void help::correctBomLine(string s)
{
if (s.compare(0, 3, "\xEF\xBB\xBF") == 0) // Is the file marked as UTF-8?
{
s.erase(0, 3); // Now get rid of the BOM.
}
else if (s.compare(0, 2, "\xFE\xFF") == 0) // Is the file marked as UTF-16 BE?
{
s.erase(0, 2); // Now get rid of the BOM.
}
else if (s.compare(0, 2, "\xFF\xFE") == 0) // Is the file marked as UTF-16 LE
{
s.erase(0, 2); // Now get rid of the BOM.
}
else if (s.compare(0, 4, "\x00\x00\xFE\xFF") == 0) // Is the file marked as UTF-32 BE?
{
s.erase(0, 4); // Now get rid of the BOM.
}
else if (s.compare(0, 4, "\xFF\xFE\x00\x00") == 0) // Is the file marked as UTF-32 LE?
{
s.erase(0, 4); // Now get rid of the BOM.
}
}
void help::interpolate(vtr3<double> &input)
{
int maxLen = -1;
for (auto const &s : input)
{
if ((int)s.size() > maxLen)
maxLen = (int)s.size();
}
for (size_t i = 0; i < input.size(); i++)
{
int diff = maxLen - (int)input[i].size();
int half = maxLen / 2;
while (diff > 0)
{
int c = 0;
vtr2<double> row;
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
for(size_t k = 0; k < input[i].size() - 1 && diff > 0; k++)
{
vector<double> el;
if (k % 2 == 1)
{
for (size_t j = 0; j < input[i][k].size(); j++)
{
double tmp = (input[i][c - 1][j] + input[i][c][j]) / 2.0;
el.push_back(tmp);
}
row.push_back(el);
diff--;
}
else
{
row.push_back(input[i][c]);
c++;
}
}
row.insert(row.end(), input[i].begin() + c, input[i].end());
input[i] = row;
}
}
}
void help::normalizeMany(vtr3<double> &input)
{
for (int i = 0; i < input.size(); i++) //dims
{
normalize(input[i]);
}
}
void help::normalize(vtr2<double> &input)
{
for (int i = 0; i < input[0].size(); i++) //dims
{
double mean = 0;
for (int j = 0; j < input.size(); j++) //lenght of sequence
{
mean += input[j][i];
}
mean /= input.size();
mean = abs(mean);
for (int j = 0; j < input.size(); j++) //lenght of sequence
{
input[j][i] = input[j][i] / mean;
}
}
}
void help::normalizeZeroOne(vtr3<double> &input, double max)
{
for (size_t i = 0; i < input.size(); i++)
{
for (size_t j = 0; j < input[i].size(); j++)
{
for (size_t k = 0; k < input[i][j].size(); k++)
{
input[i][j][k] /= max;
}
}
}
}
vtr3<double> help::separateSequence(vtr3<double> const &input, int size)
vtr3<double> output;
for (size_t i = 0; i < size; i++)
{
auto tmp = separateSequenceOne(input[i]);
output.insert(output.end(), tmp.begin(), tmp.end());
}
return output;
}
vtr3<double> help::separateSequenceOne(vtr2<double> const &input)
vtr3<double> output;
const size_t dims = input[0].size();
for (size_t i = 0; i < dims; i++)
{
vtr2<double> sequence;
sequence.reserve(input.size());
for (size_t j = 0; j < input.size(); j++)
{
vector<double> el(1);
el[0] = input[j][i];
sequence.push_back(el);
}
output.push_back(sequence);
}
return output;
}
void help::reduce(vtr3<double> &input, string const &skip)
vtr3<double> output(input.size());
for (size_t i = 0; i < input.size(); i++)
{
for (int j = 0; j < skip.size(); j++)
{
vtr2<double> s;
int c = 1;
int remove = skip.at(j) - '0';
for (int k = 0; k < input[i].size(); k++) //lenght of sequence
{
if (c % remove != 0)
s.push_back(input[i][k]);
c++;
}
input[i] = s;
}
}
}
vtr3<double> help::paa(vtr3<double> const &input, int ratio)
vtr3<double> output(input.size());
vtr2<double> s;
for (int j = 0; j < input[i].size(); j+= ratio)// sequence
{
vector<double> dim(input[i][j].size());
const size_t end = j + ratio >= input[i].size() ? input[i].size() : j + ratio;
for (size_t k = 0; k < input[i][j].size(); k++) //all dims
{
double sum = 0;
int merged = 0;
for (size_t l = j; l < end; l++) //sum individual groups of dims
{
sum += input[i][l][k];
merged++;
}
dim[k] = sum / merged;
}
s.push_back(dim);
}
output[i] = s;
}
return output;
}
vtr3<double> help::smooth(vtr3<double> const & input, int width)
vtr3<double> output(input.size());
for (size_t i = 0; i < input.size(); i++)
{
const int dims = (int)input[0][0].size();
vtr2<double> s;
for (size_t j = 0; j < width - 1; j++)
{
s.push_back(input[i][j]);
}
for (int j = 0; j < input[i].size() - width + 1; j++)// sequence
{
vtrD sums(dims);
for (size_t k = 0; k < width; k++) //all dims
{
for (size_t l = 0; l < dims; l++)
{
sums[l] += input[i][j + k][l] / width;
}
}
s.push_back(sums);
}
output[i] = s;
}
return output;
}
std::string help::stripFileNameFromPath(std::string path)
{
string folder = path.substr(0, path.find_last_of("\\/"));
return folder;
}
//template<class T>
//void help::mark(node<T>** const &m, size_t row, size_t col)
//{
// for (size_t i = 1; i < row; i++)
// {
// for (size_t j = 1; j < col; j++)
// {
// if (m[i][j].size > m[i + 1][j].size && m[i][j].size > m[i][j + 1].size) {
// if (m[i + 1][j].size == m[i][j + 1].size)
// m[i][j].size++;
// }
// }
// }
//}