Newer
Older
#include "dataParser.h"
bool help::pathExists(std::string path)
{
std::experimental::filesystem::path p = path;
return std::experimental::filesystem::exists(p);
}
bool help::isFolder(std::string path)
{
std::experimental::filesystem::path p = path;
return std::experimental::filesystem::is_directory(p);
}
bool help::isFile(std::string path)
{
std::experimental::filesystem::path p = path;
if (std::experimental::filesystem::is_directory(p))
return false;
return true;
}
string help::stripFileNameFromPath(std::string path)
{
string folder = path.substr(0, path.find_last_of("\\/"));
return folder;
}
void help::trimLeft(string &s, string const &delimiters)
{
const size_t startpos = s.find_first_not_of(delimiters);
if (string::npos != startpos)
{
//s = s.substr(startpos, s.end);
s.erase(s.begin(), s.begin() + startpos);
}
}
void help::trimRight(string &s, string const &delimiters)
{
const size_t endpos = s.find_last_not_of(delimiters);
if (string::npos != endpos)
{
//s = s.substr(0, endpos + 1);
s.erase(s.begin() + endpos + 1, s.end());
}
}
void help::trim(string &s, string const &delimiters)
{
trimLeft(s, delimiters);
trimRight(s, delimiters);
vector<string> help::split(string const& s, char const *d)
{
vector<string> output;
bitset<255> delims;
while (*d)
{
unsigned char code = *d++;
delims[code] = true;
}
string::const_iterator beg;
bool in_token = false;
for (string::const_iterator it = s.begin(), end = s.end(); it != end; ++it)
{
if (delims[*it])
{
if (in_token)
{
//output.push_back(beg, it);
output.push_back(vector<string>::value_type(beg, it));
in_token = false;
}
}
else if (!in_token)
{
beg = it;
in_token = true;
}
}
if (in_token)
output.push_back(vector<string>::value_type(beg, s.end()));
return output;
}
void help::correctBomLine(string &s)
if (s.compare(0, 3, "\xEF\xBB\xBF") == 0) // Is the file marked as UTF-8?
{
s.erase(0, 3); // Now get rid of the BOM.
}
else if (s.compare(0, 2, "\xFE\xFF") == 0) // Is the file marked as UTF-16 BE?
{
s.erase(0, 2); // Now get rid of the BOM.
}
else if (s.compare(0, 2, "\xFF\xFE") == 0) // Is the file marked as UTF-16 LE
{
s.erase(0, 2); // Now get rid of the BOM.
}
else if (s.compare(0, 4, "\x00\x00\xFE\xFF") == 0) // Is the file marked as UTF-32 BE?
{
s.erase(0, 4); // Now get rid of the BOM.
}
else if (s.compare(0, 4, "\xFF\xFE\x00\x00") == 0) // Is the file marked as UTF-32 LE?
{
s.erase(0, 4); // Now get rid of the BOM.
}
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
double help::findMax(vtr2<double> const &series)
{
double max = double_min;
for (auto i : series)
for(auto j : i)
if (j > max)
max = j;
return max;
}
double help::random_real(int min, int max)
{
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_real_distribution<double> dis(min, max);
return dis(gen);
}
int help::random_int(int min, int max)
{
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_int_distribution<int> dis(min, max);
return dis(gen);
}
vtr2<double> help::random_timeSeries(int size, int dims, int min, int max)
{
vtr2<double> ts(size);
for (int i = 0; i < size; i++)
{
vtr<double> point(dims);
for (int j = 0; j < dims; j++)
{
point[j] = help::random_real(min, max);
ts[i] = (point);
}
}
return ts;
}
void help::interpolate2(vtr3<double> &input)
int maxLen = -1;
for (auto const &s : input)
{
if ((int)s.size() > maxLen)
maxLen = (int)s.size();
}
for (size_t i = 0; i < input.size(); i++)
{
int diff = maxLen - (int)input[i].size();
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
while (diff > 0)
{
int c = 0;
vtr2<double> row;
for(size_t k = 0; k < input[i].size() - 1 && diff > 0; k++)
{
vector<double> el;
if (k % 2 == 1)
{
for (size_t j = 0; j < input[i][k].size(); j++)
{
double tmp = (input[i][c - 1][j] + input[i][c][j]) / 2.0;
el.push_back(tmp);
}
row.push_back(el);
diff--;
}
else
{
row.push_back(input[i][c]);
c++;
}
}
row.insert(row.end(), input[i].begin() + c, input[i].end());
input[i] = row;
}
}
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
void help::interpolate(vtr3<double> &input)
{
int maxLen = -1;
for (auto const &s : input)
{
if ((int)s.size() > maxLen)
maxLen = (int)s.size();
}
for (size_t i = 0; i < input.size(); i++)
{
if (input[i].size() == 1)
{
vtr2<double> row(maxLen);
fill(row.begin(), row.end(), input[i][0]);
input[i] = row;
}
int diff = maxLen - (int)input[i].size();
while (diff > 0)
{
vtr2<double> row;
int c = 0;
for (size_t j = 0; j < input[i].size() && diff > 0; j++)
{
vector<double> point;
if (j % 2 == 1)
{
for (size_t k = 0; k < input[i][j].size(); k++)
{
double tmp = (input[i][c - 1][k] + input[i][c][k]) / 2.0;
point.push_back(tmp);
}
row.push_back(point);
diff--;
}
else
{
row.push_back(input[i][c]);
c++;
}
}
row.insert(row.end(), input[i].begin() + c, input[i].end());
input[i] = row;
}
}
}
void help::normalizeMany(vtr3<double> &input)
for (size_t i = 0; i < input.size(); i++) //dims
{
normalize(input[i]);
}
void help::normalize(vtr2<double> &input)
for (size_t i = 0; i < input[0].size(); i++) //dims
for (size_t j = 0; j < input.size(); j++) //lenght of sequence
{
mean += input[j][i];
}
for (size_t j = 0; j < input.size(); j++) //lenght of sequence
{
input[j][i] = input[j][i] / mean;
}
}
void help::normalizeZeroOne(vtr3<double> &input, double max)
{
for (size_t i = 0; i < input.size(); i++)
{
for (size_t j = 0; j < input[i].size(); j++)
{
for (size_t k = 0; k < input[i][j].size(); k++)
{
input[i][j][k] /= max;
}
}
}
}
vtr2<double> help::normalize(vtr2<double> const &input, double coef)
{
vtr2<double> output(input.size());
for (size_t i = 0; i < input.size(); i++) //dims
{
vtr<double> el(input[i].size());
for (size_t j = 0; j < input[i].size(); j++) //lenght of sequence
{
el[j] = input[i][j] * coef;
}
output[i] = el;
}
return output;
}
vtr3<double> help::separateSequence(vtr3<double> const &input, int size)
{
auto tmp = separateSequenceOne(input[i]);
output.insert(output.end(), tmp.begin(), tmp.end());
}
vtr3<double> help::separateSequenceOne(vtr2<double> const &input)
vtr3<double> output;
const size_t dims = input[0].size();
for (size_t i = 0; i < dims; i++)
{
vtr2<double> sequence;
sequence.reserve(input.size());
for (size_t j = 0; j < input.size(); j++)
{
vector<double> el(1);
el[0] = input[j][i];
sequence.push_back(el);
}
output.push_back(sequence);
}
return output;
void help::reduce(vtr3<double> &input, size_t skip)
for (size_t i = 0; i < input.size(); i++)
{
vtr2<double> row;
for (size_t j = skip - 1; j < input[i].size(); j += skip)
{
row.push_back(input[i][j]);
}
input[i] = row;
}
void help::paa(vtr3<double> &input, size_t ratio)
vtr3<double> output(input.size());
for (size_t i = 0; i < input.size(); i++)
{
vtr2<double> s;
for (size_t j = 0; j < input[i].size(); j+= ratio)// sequence
{
vector<double> dim(input[i][j].size());
const size_t end = j + ratio >= input[i].size() ? input[i].size() : j + ratio;
for (size_t k = 0; k < input[i][j].size(); k++) //all dims
{
double sum = 0;
int merged = 0;
for (size_t l = j; l < end; l++) //sum individual groups of dims
{
sum += input[i][l][k];
merged++;
}
dim[k] = sum / merged;
}
s.push_back(dim);
}
output[i] = s;
}
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
input = output;
}
void help::sax(vtr3<double> &input, size_t numClasses)
{
double max = double_min;
double min = double_max;
for (size_t i = 0; i < input.size(); i++)
{
double tmp = findMax(input[i]);
if (tmp > max)
max = tmp;
if (tmp < min)
max = tmp;
}
double step = max - min / numClasses;
for (size_t i = 0; i < input.size(); i++)
{
for (size_t j = 0; j < input[i].size(); j++)
{
for (size_t k = 0; k < input[i][j].size(); k++)
{
int c = 1;
double stepCount = min + step;
while (stepCount < max)
{
if (input[i][j][k] < min + c * step)
{
input[i][j][k] = min + (c - 1) * step + step / 2;
break;
}
}
}
}
}
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
void help::prolong(vtr3<double> &input, size_t times)
{
for (size_t i = 0; i < input.size(); i++) //every sequence
{
size_t dims = (int)input[0][0].size();
for (size_t j = 0; j < times; j++) //times to prolong
{
size_t alloc = 2 * input[i].size() - 1;
int counter = 0;
vtr2<double> row(alloc);
for (size_t k = 0; k < 2 * input[i].size() - 1; k++)
{
vtr<double> point(dims);
for (size_t l = 0; l < dims; l++)
{
if (k % 2 == 1)
point[l] = (input[i][counter - 1][l] + input[i][counter][l]) / 2;
else
{
point[l] = input[i][counter][l];
}
}
if (k % 2 != 1)
counter++;
row[k] = point;
}
input[i] = row;
}
}
}
void help::smooth(vtr3<double> &input, size_t width)
vtr3<double> output(input.size());
for (size_t i = 0; i < input.size(); i++)
{
const int dims = (int)input[0][0].size();
vtr2<double> s;
{
s.push_back(input[i][j]);
}
for (size_t j = 0; j < input[i].size() - width + 1; j++)// sequence
for (size_t k = 0; k < width; k++) //all dims
{
sums[l] += input[i][j + k][l] / width;
}
}
s.push_back(sums);
}
output[i] = s;
}
vtr2<double> help::convert_arrd(double* const &series, size_t len)
vtr2<double> out(len);
for (size_t i = 0; i < len; i++)
{
vtr<double> point(1);
point[0] = series[i];
vtr2<double> help::convert_arr2d(double* const &series, size_t len, size_t dims)
vtr2<double> out(len);
for (size_t i = 0; i < len; i++)
{
vtr<double> point(&series[0] + (i * dims), &series[0] + ((i + 1) * dims));
//cout << point.size() << endl;
out[i] = point;
}
vtr2<double> help::convert_arr3d(double* const &input, size_t len, size_t dims)
vtr2<double> out(len);
for (size_t i = 0; i < len; i++)
//int size = (sizeof(input[i]) / sizeof(double)) / dims;
//auto tseries = convert_arr2d(input[i], size);
//static double searchMax(vtr2<double> const &A, int idxStart, int idxEnd)
//{
// double max = numeric_limits<double>::min();
//
// for (size_t i = 0; i < A.size(); i++)
// {
// double sum = 0;
// for (size_t j = 0; j < A[i].size(); j++)
// {
// sum += A[i][j];
// }
//
// if (minA > sum)
// minA = sum;
// }
//}
//
//
//static double searchMin(int idxStart, int idxEnd)
//vector<string> help::split1(const string &str, const string &delimiter)
// vector<string> tokens;
// size_t prev = 0, pos = 0;
// do
// {
// pos = str.find(delimiter, prev);
// if (pos == string::npos) pos = str.length();
// string token = str.substr(prev, pos - prev);
// if (!token.empty()) tokens.push_back(token);
// prev = pos + delimiter.length();
// } while (pos < str.length() && prev < str.length());
//
// return tokens;
//vector<string> help::split2(string const &s, string const &delimiters)
//{
// vtr<std::string> result;
// std::string::size_type pos = 0;
// while (std::string::npos != (pos = s.find_first_not_of(delimiters, pos))) {
// auto pos2 = s.find_first_of(delimiters, pos);
// result.emplace_back(s.substr(pos, std::string::npos == pos2 ? pos2 : pos2 - pos));
// pos = pos2;
// }
//
// return result;
//}
//vtr2<double> help::convertToDouble(vtr2<string> const &strInput)
//{
// vtr2<double> dd;
//
// for (size_t i = 0; i < strInput.size(); i++)
// {
// vector<double> d = convertToDouble(strInput[i]);
// dd.push_back(d);
// }
//
// return dd;
//}
//
//vector<double> help::convertToDouble(vector<string> const &strInput)
//{
// vector<double> d;
//
// for (size_t i = 0; i < strInput.size(); i++)
// {
// d.push_back(stod(strInput[i]));
// }
//
// return d;
//}