diff --git a/SequenceComparison/calcul.cpp b/SequenceComparison/calcul.cpp index 3d1d8b256ed10f9e6c1d974d00d8657aa882a45c..b8cb4f3ac5a9f09dc478bed1ff26fb2c3c4686b3 100644 --- a/SequenceComparison/calcul.cpp +++ b/SequenceComparison/calcul.cpp @@ -39,7 +39,7 @@ double calcul::distance_dtw(vtr<double> const &u, vector<double> const &v) for (size_t i = 0; i < u.size(); i++) { sum += pow(u[i] - v[i], 2); - //sum += abs(u[i] - v[i]); + //sum += abs(u[i] - v[i]); //su = u[i] - v[i]; //sum += (su + (su >> 31)) ^ (su >> 31); //su = u[i] - v[i]; @@ -62,83 +62,110 @@ double calcul::distance_lcss(vector<double> const &u, vector<double> const &v, i bool calcul::isInWindow(int row, int col, float ratio, int percent) { - if (ratio < 1) - { - float r = 1 / ratio; - float tmp = ceil(col * r - row); - if (tmp >= -(percent - 1) * r && tmp < (r * percent)) - return true; - } - else - { - float tmp = ceil(row * ratio - col); - if (tmp >= -(percent - 1) * ratio && tmp < (ratio * percent)) - return true; - } - return false; + if (ratio < 1) + { + float r = 1 / ratio; + float tmp = ceil(col * r - row); + if (tmp >= -(percent - 1) * r && tmp < (r * percent)) + return true; + } + else + { + float tmp = ceil(row * ratio - col); + if (tmp >= -(percent - 1) * ratio && tmp < (ratio * percent)) + return true; + } + return false; } double calcul::scorePair_dtw_s1(double ratioRaw, size_t pathLength) { - return sqrt(ratioRaw) / pathLength; + return sqrt(ratioRaw) / pathLength; } double calcul::scorePair_dtw_s2(size_t lenIn, size_t lenOut, size_t lenPath) { - return 1 - (lenIn / static_cast<double>(lenPath) + lenOut / static_cast<double>(lenPath)) / 2; + return 1 - (lenIn / static_cast<double>(lenPath) + lenOut / static_cast<double>(lenPath)) / 2; } double calcul::scorePair_dtw_s3(double ratioRaw) { - return ratioRaw; + return ratioRaw; } double calcul::scorePair_dtw_s4(double ratioRaw, double ratioRawMax) { - return /*1 -*/ (sqrt(ratioRaw) / sqrt(ratioRawMax)); + return /*1 -*/ (sqrt(ratioRaw) / sqrt(ratioRawMax)); } double calcul::scorePair_dtw_s5(double ratioRaw, double ratioRawMax, double coeficient) { - return (/*1 -*/ sqrt(ratioRaw) / sqrt(ratioRawMax)) * coeficient; + return (/*1 -*/ sqrt(ratioRaw) / sqrt(ratioRawMax)) * coeficient; } double calcul::scorePair_dtw_max(vtr2<double> const &A, vtr2<double> const &B) { - double minA = numeric_limits<double>::max(); - double maxB = numeric_limits<double>::min(); - - for (size_t i = 0; i < A.size(); i++) - { - if (minA > A[i][0]) - minA = A[i][0]; - } - - for (size_t i = 0; i < B.size(); i++) - { - if (maxB < B[i][0]) - maxB = B[i][0]; - } - - return pow(maxB - minA, 2) * std::max(A.size(), B.size()); + double minA = numeric_limits<double>::max(); + double maxB = numeric_limits<double>::min(); + + for (size_t i = 0; i < A.size(); i++) + { + double sum = 0; + for (size_t j = 0; j < A[i].size(); j++) + { + sum += A[i][j]; + } + + if (minA > sum) + minA = sum; + } + + for (size_t i = 0; i < B.size(); i++) + { + double sum = 0; + for (size_t j = 0; j < B[i].size(); j++) + { + sum += B[i][j]; + } + + if (maxB < sum) + maxB = sum; + } + + return pow(maxB - minA, 2) * std::max(A.size(), B.size()); } -double calcul::getPairRatio_lcss(int lenIN, int rawscore) +double calcul::scorePair_lcss_s1(double ratioRaw, size_t pathLength) { - return rawscore / (lenIN / 2.0); + return 1 - ratioRaw / static_cast<double>(pathLength); } +double calcul::scorePair_lcss_s2(double ratioRaw, size_t maxABLen) +{ + return 1 - ratioRaw / maxABLen; +} + +double calcul::scorePair_lcss_s3(double ratioRaw) +{ + return ratioRaw; +} + +//double calcul::getPairRatio_lcss(int lenIN, int rawscore) +//{ +// return rawscore / (lenIN / 2.0); +//} + double calcul::getMultiRatio_dtw(vtr3<double> const &input, vtr3<double> const &output) { - size_t sumA = 0; - for (auto s : input) - sumA += s.size(); + size_t sumA = 0; + for (auto s : input) + sumA += s.size(); - size_t sumB = 0; - for (auto s : output) - sumB += s.size(); + size_t sumB = 0; + for (auto s : output) + sumB += s.size(); - return sumA / static_cast<double>(sumB); + return sumA / static_cast<double>(sumB); } //double calcul::GetMRRratio(vtr2<int> const &orderMatrix, map<int, int> &clusters) @@ -164,26 +191,26 @@ double calcul::getMultiRatio_dtw(vtr3<double> const &input, vtr3<double> const & vtrD calcul::getMAPratio(vtr2<int> const &orderMatrix, map<int, int> const &clusters) { - vtrD ratios; - double mapRatio = 0; - for (size_t i = 0; i < orderMatrix.size(); i++) //query (ref sequence) //column - { - double pr = 0; - double coverIdx = 0; - for (size_t j = 1; j < orderMatrix.size(); j++) // row / rank - { - if (clusters.at(orderMatrix[j][i]) == clusters.at((int)i + 1)) - pr += ++coverIdx / j; - } - mapRatio += pr / coverIdx; - ratios.push_back(pr / coverIdx); - //cout << setw(5) << fixed << pr / coverIdx; - } - //cout << " | "; - - ratios.push_back(mapRatio / (orderMatrix.size())); - - return ratios; + vtrD ratios; + double mapRatio = 0; + for (size_t i = 0; i < orderMatrix.size(); i++) //query (ref sequence) //column + { + double pr = 0; + double coverIdx = 0; + for (size_t j = 1; j < orderMatrix.size(); j++) // row / rank + { + if (clusters.at(orderMatrix[j][i]) == clusters.at((int)i + 1)) + pr += ++coverIdx / j; + } + mapRatio += pr / coverIdx; + ratios.push_back(pr / coverIdx); + //cout << setw(5) << fixed << pr / coverIdx; + } + //cout << " | "; + + ratios.push_back(mapRatio / (orderMatrix.size())); + + return ratios; } // diff --git a/SequenceComparison/calcul.h b/SequenceComparison/calcul.h index 8e0abbb40da86799324f0efc6a00f768f91667d4..d7e7a1978463e3e18f40de4887d2091dade197e2 100644 --- a/SequenceComparison/calcul.h +++ b/SequenceComparison/calcul.h @@ -6,42 +6,46 @@ class calcul { public: - //Returns distance between 2 sequence elements. + //Returns distance between 2 sequence elements. static double distance_dtw(vtr<double> const &u, vtr<double> const &v); - - //Returns distance between 2 elements. - //Function used with lcss method for check if all dimensions are under epsilon condition. - static double distance_lcss(vtr<double> const &u, vtr<double> const &v, int idx); - //Returns distance between N elements of sequence. + + //Returns distance between 2 elements. + //Function used with lcss method for check if all dimensions are under epsilon condition. + static double distance_lcss(vtr<double> const &u, vtr<double> const &v, int idx); + //Returns distance between N elements of sequence. static double distancesMany_dtw(vtr2<double> const &points); - - //Retruns true if cell of distance matrix is in warping window. - static bool isInWindow(int row, int col, float ratio, int percent); - - //Score - //Returns similarity in <0, 1> interval. - //static double GetPairRatioDtw(size_t lenIn, size_t lenOut); - //Returns similarity in <0, 1> interval. - static double scorePair_dtw_s1(double ratioRaw, size_t lenPath); - static double scorePair_dtw_s2(size_t lenA, size_t lenB, size_t lenPath); - static double scorePair_dtw_s3(double ratioRaw); - static double scorePair_dtw_s4(double ratioRaw, double ratioRawMax); - static double scorePair_dtw_s5(double ratioRaw, double ratioRawMax, double coeficient); + + //Retruns true if cell of distance matrix is in warping window. + static bool isInWindow(int row, int col, float ratio, int percent); + + //Score + //Returns similarity in <0, 1> interval. + //static double GetPairRatioDtw(size_t lenIn, size_t lenOut); + //Returns similarity in <0, 1> interval. + static double scorePair_dtw_s1(double ratioRaw, size_t lenPath); + static double scorePair_dtw_s2(size_t lenA, size_t lenB, size_t lenPath); + static double scorePair_dtw_s3(double ratioRaw); + static double scorePair_dtw_s4(double ratioRaw, double ratioRawMax); + static double scorePair_dtw_s5(double ratioRaw, double ratioRawMax, double coeficient); - static double scorePair_dtw_max(vtr2<double> const &A, vtr2<double> const &B); + static double scorePair_dtw_max(vtr2<double> const &A, vtr2<double> const &B); - //Returns similarity in <0, 1> interval. - static double getPairRatio_lcss(int lenIN, int rawscore); - //Return similarity in <0, 1> interval. - static double getMultiRatio_dtw(vtr3<double> const &input, vtr3<double> const &output); - //Retruns Mean Average Precision. - static vtrD getMAPratio(vtr2<int> const & orderMatrix, std::map<int, int> const &clusters); - //stati`c double GetMRRratio(vtr2<double> simMatrix); - - //new - static double getLowerBoundKim(vtr2<double> const &, vtr2<double> const &); - //static double GetLowerBoundYi(vtr2<double> const &, vtr2<double> const &); - //static double GetLowerBoundKeogh(vtr2<double> const &, vtr2<double> const &); + static double scorePair_lcss_s1(double ratioRaw, size_t lenPath); + static double scorePair_lcss_s2(double ratioRaw, size_t maxABLen); + static double scorePair_lcss_s3(double ratioRaw); + + //Returns similarity in <0, 1> interval. + //static double getPairRatio_lcss(int lenIN, int rawscore); + //Return similarity in <0, 1> interval. + static double getMultiRatio_dtw(vtr3<double> const &input, vtr3<double> const &output); + //Retruns Mean Average Precision. + static vtrD getMAPratio(vtr2<int> const & orderMatrix, std::map<int, int> const &clusters); + //stati`c double GetMRRratio(vtr2<double> simMatrix); + + //new + static double getLowerBoundKim(vtr2<double> const &, vtr2<double> const &); + //static double GetLowerBoundYi(vtr2<double> const &, vtr2<double> const &); + //static double GetLowerBoundKeogh(vtr2<double> const &, vtr2<double> const &); }; diff --git a/SequenceComparison/dtw.cpp b/SequenceComparison/dtw.cpp index 264c7eee5c2de00ffbaff3c6744a626d96fd6272..f48e12005e6a28e76b488b77916e52e8e63e686b 100644 --- a/SequenceComparison/dtw.cpp +++ b/SequenceComparison/dtw.cpp @@ -25,319 +25,183 @@ double dtw::main(vtr2<double> const &A, vtr2<double> const &B, parameter const & double dtw::alignment(vtr2<double> const &A, vtr2<double> const &B, parameter const ¶ms) { - warpPath backtrack; - if(params.dmDataType == "double") - backtrack = dtw::createMatrix2<double>(A, B, params); - else if(params.dmDataType == "int") - backtrack = dtw::createMatrix2<int>(A, B, params); - else if (params.dmDataType == "float") - backtrack = dtw::createMatrix2<float>(A, B, params); - else if (params.dmDataType == "char" || params.dmDataType == "byte") - backtrack = dtw::createMatrix2<char>(A, B, params); - - //help::mark(m, A.size(), B.size()); //k cemu je mark fce? lol lepsi mena pako - - double result = 0; - if (params.scoreType == 1) - result = calcul::scorePair_dtw_s1(backtrack.rawScore, backtrack.path.size()); - else if (params.scoreType == 2) - result = calcul::scorePair_dtw_s2(A.size(), B.size(), backtrack.path.size()); - else if (params.scoreType == 3) - result = backtrack.rawScore; - else if (params.scoreType == 4) - result = calcul::scorePair_dtw_s4(backtrack.rawScore, calcul::scorePair_dtw_max(A, B)); - else if (params.scoreType == 5) - result = calcul::scorePair_dtw_s5(backtrack.rawScore, calcul::scorePair_dtw_max(A, B), A.size() < B.size() ? A.size() / (double) B.size() : B.size() / (double)A.size()); - - if (params.isRatioReversed()) // -rr switch - return 1 - result; - else - return result; + warpPath backtrack; + if(params.dmDataType == "double") + backtrack = dtw::createMatrix<double>(A, B, params); + else if(params.dmDataType == "int") + backtrack = dtw::createMatrix<int>(A, B, params); + else if (params.dmDataType == "float") + backtrack = dtw::createMatrix<float>(A, B, params); + else if (params.dmDataType == "char" || params.dmDataType == "byte") + backtrack = dtw::createMatrix<char>(A, B, params); + + //help::mark(m, A.size(), B.size()); //k cemu je mark fce? lol lepsi mena pako + + double result = 0; + if (params.scoreType == 1) + result = calcul::scorePair_dtw_s1(backtrack.scoreRaw, backtrack.path.size()); + else if (params.scoreType == 2) + result = calcul::scorePair_dtw_s2(A.size(), B.size(), backtrack.path.size()); + else if (params.scoreType == 3) + result = backtrack.scoreRaw; + else if (params.scoreType == 4) + result = calcul::scorePair_dtw_s4(backtrack.scoreRaw, calcul::scorePair_dtw_max(A, B)); + else if (params.scoreType == 5) + result = calcul::scorePair_dtw_s5(backtrack.scoreRaw, calcul::scorePair_dtw_max(A, B), A.size() < B.size() ? A.size() / (double)B.size() : B.size() / (double)A.size()); + else + result = 0; + + if (params.isRatioReversed()) // -rr switch + return 1 - result; + else + return result; } template<class T> warpPath dtw::createMatrix(vtr2<double> const &A, vtr2<double> const &B, parameter const ¶ms) -{ - node<T>** m = new node<T>*[A.size() + 1]; - for (int i = 0; i < A.size() + 1; i++) - m[i] = new node<T>[B.size() + 1]; - - m[0][0].value = 0; - - int w = (int)(B.size() * params.w); - for (int i = 1; i < A.size() + 1; i++) //row - y - { - int start = max(1, (int)(ceil((i - 1) * (B.size() / (double)A.size() + 0.0000000001)) - w)); - int end = min((int)B.size() + 1, (int)(ceil(i * B.size() / (double)A.size()) + 1) + w); - for (int j = start; j < end; j++) //col - x - { - double u = m[i - 1][j].value; - double l = m[i][j - 1].value; - double d = m[i - 1][j - 1].value; - //double minim = min({ u, l, d }); - - //double minim = min({ m[i - 1][j].value, m[i][j - 1].value, m[i - 1][j - 1].value }); - double minim = 0; - if (l < u) - minim = l; - else - minim = u; - - if (minim > d) - minim = d; - - m[i][j].value = static_cast<T>(calcul::getDistance_dtw(A[i - 1], B[j - 1]) + minim); - - /* if(m[i - 1][j].value > m[i][j].value && m[i][j - 1].value > m[i][j].value && m[i - 1][j - 1].value > m[i][j].value) - minim++; - - if (m[i][j].value < minim) - minim++;*/ - } - } - - //stringstream ss; - - //ss << endl; - //ss << setprecision(4); - //for (size_t i = 0; i < 20; i++) - //{ - // for (size_t j = 0; j < 20; j++) - // { - // ss << setw(9) << m[i][j].value << " "; - // } - // ss << endl; - //} - // /*ss << setprecision(4); - // for (size_t i = A.size() - 20; i < A.size() + 1; i++) - // { - // for (size_t j = B.size() - 20; j < B.size() + 1; j++) - // { - // ss << m[i][j].value<< " "; - // } - // ss << endl; - // }*/ - // cout << ss.str() << endl; - - auto backtrack = backtrack(m, A.size(), B.size()); - backtrack.rawScore = m[A.size()][B.size()].value; - - for (int i = 0; i < A.size() + 1; i++) { - delete[] m[i]; - } - delete[] m; - - return backtrack; -} - -template<class T> -warpPath dtw::backtrack(node<T>** const &m, size_t i, size_t j) { - warpPath wp; - double ratio = j / static_cast<double>(i); - - while (i > 0 && j > 0) - { - wp.sumScore += m[i][j].value; - - double u = m[i - 1][j].value; - double l = m[i][j - 1].value; - double d = m[i - 1][j - 1].value; - if (min({ d, u, l }) == d /*&& y > 1 && x > 1*/) - { - wp.path = "M" + wp.path; - i--; - j--; - } - else - { - if(l < u) - { - wp.path = "L" + wp.path; - j--; - } - else if(u < l) - { - wp.path = "U" + wp.path; - i--; - } - else - { - if(i * ratio > 1) - { - wp.path = "U" + wp.path; - i--; - } - else - { - wp.path = "L" + wp.path; - j--; - } - } - } - } - - while (i > 0) - { - wp.path = "U" + wp.path; - i--; - } - - while (j > 0) - { - wp.path = "L" + wp.path; - j--; - } - - return wp; -} - -template<class T> -warpPath dtw::createMatrix2(vtr2<double> const &A, vtr2<double> const &B, parameter const ¶ms) -{ - Vtr<node<T>, 2> m(A.size() + 1); - for (int i = 0; i < A.size() + 1; i++) - m[i] = vector<node<T>>(B.size() + 1); - - for (int i = 0; i < min((int)A.size(), params.relaxation + 1); i++) - m[i][0].value = 0; - - for (int i = 0; i < min((int)B.size(), params.relaxation + 1); i++) - m[0][i].value = 0; - - const int w = (int)(B.size() * params.w); - for (int i = 1; i < A.size() + 1; i++) //row - y - { - const int start = max(1, (int)(ceil((i - 1) * (B.size() / (double)A.size() + 0.0000000001)) - w)); - const int end = min((int)B.size() + 1, (int)(ceil(i * B.size() / (double)A.size()) + 1) + w); - for (int j = start; j < end; j++) //col - x - { - const double u = m[i - 1][j].value; - const double l = m[i][j - 1].value; - const double d = m[i - 1][j - 1].value; - //double minim = min({ u, l, d }); - - //double minim = min({ m[i - 1][j].value, m[i][j - 1].value, m[i - 1][j - 1].value }); - double minim = 0; - if (l < u) - minim = l; - else - minim = u; - - if (minim > d) - minim = d; - - m[i][j].value = static_cast<T>(calcul::distance_dtw(A[i - 1], B[j - 1]) + minim); - - /* if(m[i - 1][j].value > m[i][j].value && m[i][j - 1].value > m[i][j].value && m[i - 1][j - 1].value > m[i][j].value) - minim++; - - if (m[i][j].value < minim) - minim++;*/ - } - } - auto end = findMinEnd(m, params.relaxation); - auto backtrack = backtrack2(m, end.i, end.j, params); - backtrack.rawScore = m[end.i][end.j].value; - - if (params.isPin()) - cout << endl << print::printPathShape(backtrack.path, end, (int)A.size() + 1, (int)B.size() + 1); - - return backtrack; + Vtr<node<T>, 2> m(A.size() + 1); + for (int i = 0; i < A.size() + 1; i++) + m[i] = vector<node<T>>(B.size() + 1); + + for (int i = 0; i < min((int)A.size(), params.relaxation + 1); i++) + m[i][0].value = 0; + + for (int i = 0; i < min((int)B.size(), params.relaxation + 1); i++) + m[0][i].value = 0; + + const int w = (int)(B.size() * params.w); + for (int i = 1; i < A.size() + 1; i++) //row - y + { + const int start = max(1, (int)(ceil((i - 1) * (B.size() / (double)A.size() + 0.0000000001)) - w)); + const int end = min((int)B.size() + 1, (int)(ceil(i * B.size() / (double)A.size()) + 1) + w); + for (int j = start; j < end; j++) //col - x + { + const double u = m[i - 1][j].value; + const double l = m[i][j - 1].value; + const double d = m[i - 1][j - 1].value; + //double minim = min({ u, l, d }); + + //double minim = min({ m[i - 1][j].value, m[i][j - 1].value, m[i - 1][j - 1].value }); + double minim = 0; + if (l < u) + minim = l; + else + minim = u; + + if (minim > d) + minim = d; + + m[i][j].value = static_cast<T>(calcul::distance_dtw(A[i - 1], B[j - 1]) + minim); + + /* if(m[i - 1][j].value > m[i][j].value && m[i][j - 1].value > m[i][j].value && m[i - 1][j - 1].value > m[i][j].value) + minim++; + + if (m[i][j].value < minim) + minim++;*/ + } + } + + auto end = findRelaxedEnd(m, params.relaxation); + auto back = backtrack(m, end.i, end.j, params); + back.scoreRaw = m[end.i][end.j].value; + + if (params.isPin()) + cout << endl << print::printPathShape(back.path, end, (int)A.size() + 1, (int)B.size() + 1); + + return back; } template<class T> -warpPath dtw::backtrack2(vtr2<node<T>> const &m, size_t i, size_t j, parameter const ¶ms) +warpPath dtw::backtrack(vtr2<node<T>> const &m, size_t i, size_t j, parameter const ¶ms) { - warpPath wp; - double ratio = j / static_cast<double>(i); - - while (i > 0 && j > 0) - { - wp.sumScore += m[i][j].value; - - double u = m[i - 1][j].value; - double l = m[i][j - 1].value; - double d = m[i - 1][j - 1].value; - if (min({ d, u, l }) == d) - { - wp.path = "M" + wp.path; - i--; - j--; - } - else - { - if (l < u) - { - wp.path = "L" + wp.path; - j--; - } - else if (u < l) - { - wp.path = "U" + wp.path; - i--; - } - else - { - if (i * ratio > 1) - { - wp.path = "U" + wp.path; - i--; - } - else - { - wp.path = "L" + wp.path; - j--; - } - } - } - } - - while (i > params.relaxation) - { - wp.path = "U" + wp.path; - i--; - } - - while (j > params.relaxation) - { - wp.path = "L" + wp.path; - j--; - } - - return wp; + warpPath wp; + double ratio = j / static_cast<double>(i); + + while (i > 0 && j > 0) + { + double u = m[i - 1][j].value; + double l = m[i][j - 1].value; + double d = m[i - 1][j - 1].value; + if (min({ d, u, l }) == d) + { + wp.path = "M" + wp.path; + i--; + j--; + } + else + { + if (l < u) + { + wp.path = "L" + wp.path; + j--; + } + else if (u < l) + { + wp.path = "U" + wp.path; + i--; + } + else + { + if (i * ratio > 1) + { + wp.path = "U" + wp.path; + i--; + } + else + { + wp.path = "L" + wp.path; + j--; + } + } + } + } + + while (i > params.relaxation) + { + wp.path = "U" + wp.path; + i--; + } + + while (j > params.relaxation) + { + wp.path = "L" + wp.path; + j--; + } + + return wp; } template<class T> -coords dtw::findMinEnd(vtr2<node<T>> const &m, int relaxation) +coords dtw::findRelaxedEnd(vtr2<node<T>> const &m, int relaxation) { - double tmp = numeric_limits<double>::max(); - coords min; - - int lenA = (int)m.size() - 1; - int lenB = (int)m[0].size() - 1; - - for (size_t i = m.size() - relaxation - 1; i < m.size(); i++) - { - if (m[i][lenB].value < tmp) - { - tmp = m[i][lenB].value; - min.i = (int)i; - min.j = lenB; - } - } - - for (size_t i = m[0].size() - relaxation - 1; i < m[0].size(); i++) - { - if (m[lenA][i].value < tmp) - { - tmp = m[lenA][i].value; - min.i = lenA; - min.j = (int)i; - } - } - - return min; + double tmp = numeric_limits<double>::max(); + coords min; + + int lenA = (int)m.size() - 1; + int lenB = (int)m[0].size() - 1; + + for (size_t i = m.size() - relaxation - 1; i < m.size(); i++) + { + if (m[i][lenB].value < tmp) + { + tmp = m[i][lenB].value; + min.i = (int)i; + min.j = lenB; + } + } + + for (size_t i = m[0].size() - relaxation - 1; i < m[0].size(); i++) + { + if (m[lenA][i].value < tmp) + { + tmp = m[lenA][i].value; + min.i = lenA; + min.j = (int)i; + } + } + + return min; } //string dtw::log(result const &result, parameter const ¶ms) @@ -360,4 +224,141 @@ coords dtw::findMinEnd(vtr2<node<T>> const &m, int relaxation) // str.append(Print::PrintDistanceM(r.m));*/ // // return str; +//} + +//template<class T> +//warpPath dtw::createMatrix(vtr2<double> const &A, vtr2<double> const &B, parameter const ¶ms) +//{ +// node<T>** m = new node<T>*[A.size() + 1]; +// for (int i = 0; i < A.size() + 1; i++) +// m[i] = new node<T>[B.size() + 1]; +// +// m[0][0].value = 0; +// +// int w = (int)(B.size() * params.w); +// for (int i = 1; i < A.size() + 1; i++) //row - y +// { +// int start = max(1, (int)(ceil((i - 1) * (B.size() / (double)A.size() + 0.0000000001)) - w)); +// int end = min((int)B.size() + 1, (int)(ceil(i * B.size() / (double)A.size()) + 1) + w); +// for (int j = start; j < end; j++) //col - x +// { +// double u = m[i - 1][j].value; +// double l = m[i][j - 1].value; +// double d = m[i - 1][j - 1].value; +// //double minim = min({ u, l, d }); +// +// //double minim = min({ m[i - 1][j].value, m[i][j - 1].value, m[i - 1][j - 1].value }); +// double minim = 0; +// if (l < u) +// minim = l; +// else +// minim = u; +// +// if (minim > d) +// minim = d; +// +// m[i][j].value = static_cast<T>(calcul::distance_dtw(A[i - 1], B[j - 1]) + minim); +// +// /* if(m[i - 1][j].value > m[i][j].value && m[i][j - 1].value > m[i][j].value && m[i - 1][j - 1].value > m[i][j].value) +// minim++; +// +// if (m[i][j].value < minim) +// minim++;*/ +// } +// } +// +// //stringstream ss; +// +// //ss << endl; +// //ss << setprecision(4); +// //for (size_t i = 0; i < 20; i++) +// //{ +// // for (size_t j = 0; j < 20; j++) +// // { +// // ss << setw(9) << m[i][j].value << " "; +// // } +// // ss << endl; +// //} +// // /*ss << setprecision(4); +// // for (size_t i = A.size() - 20; i < A.size() + 1; i++) +// // { +// // for (size_t j = B.size() - 20; j < B.size() + 1; j++) +// // { +// // ss << m[i][j].value<< " "; +// // } +// // ss << endl; +// // }*/ +// // cout << ss.str() << endl; +// +// auto backtrack = this.backtrack(m, A.size(), B.size()); +// backtrack.rawScore = m[A.size()][B.size()].value; +// +// for (int i = 0; i < A.size() + 1; i++) { +// delete[] m[i]; +// } +// delete[] m; +// +// return backtrack; +//} +// +//template<class T> +//warpPath dtw::backtrack(node<T>** const &m, size_t i, size_t j) +//{ +// warpPath wp; +// double ratio = j / static_cast<double>(i); +// +// while (i > 0 && j > 0) +// { +// wp.sumScore += m[i][j].value; +// +// double u = m[i - 1][j].value; +// double l = m[i][j - 1].value; +// double d = m[i - 1][j - 1].value; +// if (min({ d, u, l }) == d /*&& y > 1 && x > 1*/) +// { +// wp.path = "M" + wp.path; +// i--; +// j--; +// } +// else +// { +// if (l < u) +// { +// wp.path = "L" + wp.path; +// j--; +// } +// else if (u < l) +// { +// wp.path = "U" + wp.path; +// i--; +// } +// else +// { +// if (i * ratio > 1) +// { +// wp.path = "U" + wp.path; +// i--; +// } +// else +// { +// wp.path = "L" + wp.path; +// j--; +// } +// } +// } +// } +// +// while (i > 0) +// { +// wp.path = "U" + wp.path; +// i--; +// } +// +// while (j > 0) +// { +// wp.path = "L" + wp.path; +// j--; +// } +// +// return wp; //} \ No newline at end of file diff --git a/SequenceComparison/dtw.h b/SequenceComparison/dtw.h index 2bb5cbacfa629ef18dc0774bc5b367cce83260f7..0797b214137e168ae5631b7c4e2295739b505609 100644 --- a/SequenceComparison/dtw.h +++ b/SequenceComparison/dtw.h @@ -5,35 +5,36 @@ class dtw { public: - //Returns result of dtw function. - //dtw function should always be called by this method. + //Returns result of dtw function. + //dtw function should always be called by this method. static double main(vtr2<double> const &, vtr2<double> const &, parameter const ¶ms); - //Returns alignment similarity. - static double alignment(vtr2<double> const &, vtr2<double> const &, parameter const ¶ms); - //Returns 'distance matrix' for 2 input sequence. - template<class T> - static warpPath createMatrix(vtr2<double> const &, vtr2<double> const &, parameter const ¶ms); - //Returns 'warping path' generated form distance matrix. - template<class T> - static warpPath backtrack(node<T>** const &m, size_t, size_t); - - template<class T> - static warpPath createMatrix2(vtr2<double> const &, vtr2<double> const &, parameter const ¶ms); - template<class T> - static warpPath backtrack2(vtr2<node<T>> const & m, size_t i, size_t j, parameter const ¶ms); - template<class T> - static coords findMinEnd(vtr2<node<T>> const &m, int relaxation); - //Log + //Returns alignment similarity. + static double alignment(vtr2<double> const &, vtr2<double> const &, parameter const ¶ms); + //Returns 'distance matrix' for 2 input sequence. + + template<class T> + static warpPath createMatrix(vtr2<double> const &, vtr2<double> const &, parameter const ¶ms); + template<class T> + static warpPath backtrack(vtr2<node<T>> const & m, size_t i, size_t j, parameter const ¶ms); + template<class T> + static coords findRelaxedEnd(vtr2<node<T>> const &m, int relaxation); + //Log //static std::string log(result const &, parameter const &); - - //EXPERIMENT - //Returns 'distance matrix' for 2 input sequence. - //template<class T> - //static warpPath CreateMatrix2(vtr2<double> const &, vtr2<double> const &, parameter const ¶ms); - // - ////Returns 'warping path' generated form distance matrix. - //template<class T> - //static warpPath Backtrack2(T** const &m, size_t, size_t); + + //EXPERIMENT + //Returns 'distance matrix' for 2 input sequence. + //template<class T> + //static warpPath CreateMatrix2(vtr2<double> const &, vtr2<double> const &, parameter const ¶ms); + // + ////Returns 'warping path' generated form distance matrix. + //template<class T> + //static warpPath Backtrack2(T** const &m, size_t, size_t); + + //template<class T> + //static warpPath createMatrix(vtr2<double> const &, vtr2<double> const &, parameter const ¶ms); + ////Returns 'warping path' generated form distance matrix. + //template<class T> + //static warpPath backtrack(node<T>** const &m, size_t, size_t); }; diff --git a/SequenceComparison/headerStruct.h b/SequenceComparison/headerStruct.h index 05f8e256c70151617db2d9ce2a063ea5c10a0481..1531592d9720f46af182df300248410cf666044f 100644 --- a/SequenceComparison/headerStruct.h +++ b/SequenceComparison/headerStruct.h @@ -25,13 +25,13 @@ public: class warpPath { public: - warpPath() : path(""), rawScore(0), normScore(0), sumScore(0) {} + warpPath() : path(""), scoreRaw(0)/*, normScore(0), sumScore(0)*/ {} ~warpPath() {} std::string path; //path - double rawScore; - double normScore; - double sumScore; + double scoreRaw; + //double normScore; + //double sumScore; }; struct result_operation @@ -42,7 +42,7 @@ struct result_operation struct result_pdtw { - double normScore; + double scoreNorm; std::string tree; }; diff --git a/SequenceComparison/help.cpp b/SequenceComparison/help.cpp index 4060c25304386fc641eddfac829edd56b3413e6e..15764537671793eac560f3f47a29dbc653491788 100644 --- a/SequenceComparison/help.cpp +++ b/SequenceComparison/help.cpp @@ -12,11 +12,11 @@ using namespace std; void help::trimLeft(string &s, string const &delimiters) { - const size_t startpos = s.find_first_not_of(delimiters); + const size_t startpos = s.find_first_not_of(delimiters); if (string::npos != startpos) { //s = s.substr(startpos, s.end); - s.erase(s.begin(), s.begin() + startpos); + s.erase(s.begin(), s.begin() + startpos); } } @@ -26,79 +26,79 @@ void help::trimRight(string &s, string const &delimiters) if (string::npos != endpos) { //s = s.substr(0, endpos + 1); - s.erase(s.begin() + endpos + 1, s.end()); + s.erase(s.begin() + endpos + 1, s.end()); } } void help::trim(string &s, string const &delimiters) { - trimLeft(s, delimiters); - trimRight(s, delimiters); + trimLeft(s, delimiters); + trimRight(s, delimiters); } vector<string> help::split(string const& s, char const *d) { - vector<string> output; - - bitset<255> delims; - while (*d) - { - unsigned char code = *d++; - delims[code] = true; - } - - string::const_iterator beg; - bool in_token = false; - for (string::const_iterator it = s.begin(), end = s.end(); it != end; ++it) - { - if (delims[*it]) - { - if (in_token) - { - //output.push_back(beg, it); - output.push_back(vector<string>::value_type(beg, it)); - in_token = false; - } - } - else if (!in_token) - { - beg = it; - in_token = true; - } - } - if (in_token) - output.push_back(vector<string>::value_type(beg, s.end())); - - return output; + vector<string> output; + + bitset<255> delims; + while (*d) + { + unsigned char code = *d++; + delims[code] = true; + } + + string::const_iterator beg; + bool in_token = false; + for (string::const_iterator it = s.begin(), end = s.end(); it != end; ++it) + { + if (delims[*it]) + { + if (in_token) + { + //output.push_back(beg, it); + output.push_back(vector<string>::value_type(beg, it)); + in_token = false; + } + } + else if (!in_token) + { + beg = it; + in_token = true; + } + } + if (in_token) + output.push_back(vector<string>::value_type(beg, s.end())); + + return output; } vector<string> help::split1(const string &str, const string &delimiter) { - vector<string> tokens; - size_t prev = 0, pos = 0; - do - { - pos = str.find(delimiter, prev); - if (pos == string::npos) pos = str.length(); - string token = str.substr(prev, pos - prev); - if (!token.empty()) tokens.push_back(token); - prev = pos + delimiter.length(); - } while (pos < str.length() && prev < str.length()); - - return tokens; + vector<string> tokens; + size_t prev = 0, pos = 0; + do + { + pos = str.find(delimiter, prev); + if (pos == string::npos) pos = str.length(); + string token = str.substr(prev, pos - prev); + if (!token.empty()) tokens.push_back(token); + prev = pos + delimiter.length(); + } while (pos < str.length() && prev < str.length()); + + return tokens; } vector<string> help::split2(string const &s, string const &delimiters) { - vtr<std::string> result; - std::string::size_type pos = 0; - while (std::string::npos != (pos = s.find_first_not_of(delimiters, pos))) { - auto pos2 = s.find_first_of(delimiters, pos); - result.emplace_back(s.substr(pos, std::string::npos == pos2 ? pos2 : pos2 - pos)); - pos = pos2; - } - - return result; + vtr<std::string> result; + std::string::size_type pos = 0; + while (std::string::npos != (pos = s.find_first_not_of(delimiters, pos))) { + auto pos2 = s.find_first_of(delimiters, pos); + result.emplace_back(s.substr(pos, std::string::npos == pos2 ? pos2 : pos2 - pos)); + pos = pos2; + } + + return result; } vtr2<double> help::convertToDouble(vtr2<string> const &strInput) @@ -128,243 +128,232 @@ vector<double> help::convertToDouble(vector<string> const &strInput) void help::correctBomLine(string s) { - if (s.compare(0, 3, "\xEF\xBB\xBF") == 0) // Is the file marked as UTF-8? - { - s.erase(0, 3); // Now get rid of the BOM. - } - else if (s.compare(0, 2, "\xFE\xFF") == 0) // Is the file marked as UTF-16 BE? - { - s.erase(0, 2); // Now get rid of the BOM. - } - else if (s.compare(0, 2, "\xFF\xFE") == 0) // Is the file marked as UTF-16 LE - { - s.erase(0, 2); // Now get rid of the BOM. - } - else if (s.compare(0, 4, "\x00\x00\xFE\xFF") == 0) // Is the file marked as UTF-32 BE? - { - s.erase(0, 4); // Now get rid of the BOM. - } - else if (s.compare(0, 4, "\xFF\xFE\x00\x00") == 0) // Is the file marked as UTF-32 LE? - { - s.erase(0, 4); // Now get rid of the BOM. - } + if (s.compare(0, 3, "\xEF\xBB\xBF") == 0) // Is the file marked as UTF-8? + { + s.erase(0, 3); // Now get rid of the BOM. + } + else if (s.compare(0, 2, "\xFE\xFF") == 0) // Is the file marked as UTF-16 BE? + { + s.erase(0, 2); // Now get rid of the BOM. + } + else if (s.compare(0, 2, "\xFF\xFE") == 0) // Is the file marked as UTF-16 LE + { + s.erase(0, 2); // Now get rid of the BOM. + } + else if (s.compare(0, 4, "\x00\x00\xFE\xFF") == 0) // Is the file marked as UTF-32 BE? + { + s.erase(0, 4); // Now get rid of the BOM. + } + else if (s.compare(0, 4, "\xFF\xFE\x00\x00") == 0) // Is the file marked as UTF-32 LE? + { + s.erase(0, 4); // Now get rid of the BOM. + } } void help::interpolate(vtr3<double> &input) { - int maxLen = -1; - - for (auto const &s : input) - { - if ((int)s.size() > maxLen) - maxLen = (int)s.size(); - } - - for (size_t i = 0; i < input.size(); i++) - { - int diff = maxLen - (int)input[i].size(); - int half = maxLen / 2; - - while (diff > 0) - { - int c = 0; - vtr2<double> row; - for(size_t k = 0; k < input[i].size() - 1 && diff > 0; k++) - { - vector<double> el; - - if (k % 2 == 1) - { - for (size_t j = 0; j < input[i][k].size(); j++) - { - double tmp = (input[i][c - 1][j] + input[i][c][j]) / 2.0; - el.push_back(tmp); - } - - row.push_back(el); - diff--; - } - else - { - row.push_back(input[i][c]); - c++; - } - - } - row.insert(row.end(), input[i].begin() + c, input[i].end()); - input[i] = row; - } - } + int maxLen = -1; + + for (auto const &s : input) + { + if ((int)s.size() > maxLen) + maxLen = (int)s.size(); + } + + for (size_t i = 0; i < input.size(); i++) + { + int diff = maxLen - (int)input[i].size(); + int half = maxLen / 2; + + while (diff > 0) + { + int c = 0; + vtr2<double> row; + for(size_t k = 0; k < input[i].size() - 1 && diff > 0; k++) + { + vector<double> el; + + if (k % 2 == 1) + { + for (size_t j = 0; j < input[i][k].size(); j++) + { + double tmp = (input[i][c - 1][j] + input[i][c][j]) / 2.0; + el.push_back(tmp); + } + + row.push_back(el); + diff--; + } + else + { + row.push_back(input[i][c]); + c++; + } + + } + row.insert(row.end(), input[i].begin() + c, input[i].end()); + input[i] = row; + } + } } void help::normalizeMany(vtr3<double> &input) { - for (int i = 0; i < input.size(); i++) //dims - { - normalize(input[i]); - } + for (int i = 0; i < input.size(); i++) //dims + { + normalize(input[i]); + } } void help::normalize(vtr2<double> &input) { - for (int i = 0; i < input[0].size(); i++) //dims - { - double mean = 0; - for (int j = 0; j < input.size(); j++) //lenght of sequence - { - mean += input[j][i]; - } - mean /= input.size(); - mean = abs(mean); - - for (int j = 0; j < input.size(); j++) //lenght of sequence - { - input[j][i] = input[j][i] / mean; - } - } + for (int i = 0; i < input[0].size(); i++) //dims + { + double mean = 0; + for (int j = 0; j < input.size(); j++) //lenght of sequence + { + mean += input[j][i]; + } + mean /= input.size(); + mean = abs(mean); + + for (int j = 0; j < input.size(); j++) //lenght of sequence + { + input[j][i] = input[j][i] / mean; + } + } } void help::normalizeZeroOne(vtr3<double> &input, double max) { - for (size_t i = 0; i < input.size(); i++) - { - for (size_t j = 0; j < input[i].size(); j++) - { - for (size_t k = 0; k < input[i][j].size(); k++) - { - input[i][j][k] /= max; - } - } - } + for (size_t i = 0; i < input.size(); i++) + { + for (size_t j = 0; j < input[i].size(); j++) + { + for (size_t k = 0; k < input[i][j].size(); k++) + { + input[i][j][k] /= max; + } + } + } } vtr3<double> help::separateSequence(vtr3<double> const &input, int size) { - vtr3<double> output; + vtr3<double> output; - for (size_t i = 0; i < size; i++) - { - auto tmp = separateSequenceOne(input[i]); - output.insert(output.end(), tmp.begin(), tmp.end()); - } + for (size_t i = 0; i < size; i++) + { + auto tmp = separateSequenceOne(input[i]); + output.insert(output.end(), tmp.begin(), tmp.end()); + } - return output; + return output; } vtr3<double> help::separateSequenceOne(vtr2<double> const &input) { - vtr3<double> output; - - const size_t dims = input[0].size(); - - for (size_t i = 0; i < dims; i++) - { - vtr2<double> sequence; - sequence.reserve(input.size()); - for (size_t j = 0; j < input.size(); j++) - { - vector<double> el(1); - el[0] = input[j][i]; - sequence.push_back(el); - } - output.push_back(sequence); - } - - return output; + vtr3<double> output; + + const size_t dims = input[0].size(); + + for (size_t i = 0; i < dims; i++) + { + vtr2<double> sequence; + sequence.reserve(input.size()); + for (size_t j = 0; j < input.size(); j++) + { + vector<double> el(1); + el[0] = input[j][i]; + sequence.push_back(el); + } + output.push_back(sequence); + } + + return output; } -void help::reduce(vtr3<double> &input, string const &skip) +void help::reduce(vtr3<double> &input, int skip) { - vtr3<double> output(input.size()); - - for (size_t i = 0; i < input.size(); i++) - { - for (int j = 0; j < skip.size(); j++) - { - vtr2<double> s; - - int c = 1; - int remove = skip.at(j) - '0'; - for (int k = 0; k < input[i].size(); k++) //lenght of sequence - { - if (c % remove != 0) - s.push_back(input[i][k]); - - c++; - } - input[i] = s; - } - } + for (size_t i = 0; i < input.size(); i++) + { + vtr2<double> row; + for (int j = skip - 1; j < input[i].size(); j += skip) + { + row.push_back(input[i][j]); + } + input[i] = row; + } } vtr3<double> help::paa(vtr3<double> const &input, int ratio) { - vtr3<double> output(input.size()); - - for (size_t i = 0; i < input.size(); i++) - { - vtr2<double> s; - for (int j = 0; j < input[i].size(); j+= ratio)// sequence - { - vector<double> dim(input[i][j].size()); - - const size_t end = j + ratio >= input[i].size() ? input[i].size() : j + ratio; - for (size_t k = 0; k < input[i][j].size(); k++) //all dims - { - double sum = 0; - int merged = 0; - for (size_t l = j; l < end; l++) //sum individual groups of dims - { - sum += input[i][l][k]; - merged++; - } - dim[k] = sum / merged; - } - s.push_back(dim); - } - output[i] = s; - } - - return output; + vtr3<double> output(input.size()); + + for (size_t i = 0; i < input.size(); i++) + { + vtr2<double> s; + for (int j = 0; j < input[i].size(); j+= ratio)// sequence + { + vector<double> dim(input[i][j].size()); + + const size_t end = j + ratio >= input[i].size() ? input[i].size() : j + ratio; + for (size_t k = 0; k < input[i][j].size(); k++) //all dims + { + double sum = 0; + int merged = 0; + for (size_t l = j; l < end; l++) //sum individual groups of dims + { + sum += input[i][l][k]; + merged++; + } + dim[k] = sum / merged; + } + s.push_back(dim); + } + output[i] = s; + } + + return output; } vtr3<double> help::smooth(vtr3<double> const & input, int width) { - vtr3<double> output(input.size()); - - for (size_t i = 0; i < input.size(); i++) - { - const int dims = (int)input[0][0].size(); - - vtr2<double> s; - for (size_t j = 0; j < width - 1; j++) - { - s.push_back(input[i][j]); - } - - for (int j = 0; j < input[i].size() - width + 1; j++)// sequence - { - vtrD sums(dims); - for (size_t k = 0; k < width; k++) //all dims - { - for (size_t l = 0; l < dims; l++) - { - sums[l] += input[i][j + k][l] / width; - } - } - s.push_back(sums); - } - output[i] = s; - } - - return output; + vtr3<double> output(input.size()); + + for (size_t i = 0; i < input.size(); i++) + { + const int dims = (int)input[0][0].size(); + + vtr2<double> s; + for (size_t j = 0; j < width - 1; j++) + { + s.push_back(input[i][j]); + } + + for (int j = 0; j < input[i].size() - width + 1; j++)// sequence + { + vtrD sums(dims); + for (size_t k = 0; k < width; k++) //all dims + { + for (size_t l = 0; l < dims; l++) + { + sums[l] += input[i][j + k][l] / width; + } + } + s.push_back(sums); + } + output[i] = s; + } + + return output; } std::string help::stripFileNameFromPath(std::string path) { - string folder = path.substr(0, path.find_last_of("\\/")); + string folder = path.substr(0, path.find_last_of("\\/")); - return folder; + return folder; } //template<class T> diff --git a/SequenceComparison/help.h b/SequenceComparison/help.h index b803313ca94607cbc0d8da0d8aa8e869d60dd072..4710254cbb145f1a787ac9da442606b768a0e2a7 100644 --- a/SequenceComparison/help.h +++ b/SequenceComparison/help.h @@ -29,7 +29,7 @@ public: static void normalizeZeroOne(vtr3<double> &input, double max); //Returns shortened input sequence by skiping some of their elements. - static void reduce(vtr3<double> &input, std::string const &skip); + static void reduce(vtr3<double> &input, int skip); //Returns piecewise aggregate approximation of sequence. static vtr3<double> paa(vtr3<double> const &input, int ratio); diff --git a/SequenceComparison/lcss.cpp b/SequenceComparison/lcss.cpp index 5e76f45c81e0fae9a03c6b3f6039f871a3acfcc4..69bcf15389ecd3cb87b63386c5e7de33868052de 100644 --- a/SequenceComparison/lcss.cpp +++ b/SequenceComparison/lcss.cpp @@ -11,131 +11,134 @@ using namespace std; double lcss::main(vtr2<double> const &A, vtr2<double> const &B, parameter const ¶ms) { - auto result = alignment(A, B, params); + auto result = alignment(A, B, params); - //result.input = input; - //result.log = Log(result, p.startStamp); //log fces disabled currently + //result.input = input; + //result.log = Log(result, p.startStamp); //log fces disabled currently - return result; + return result; } double lcss::alignment(vtr2<double> const &A, vtr2<double> const &B, parameter const ¶ms) { - warpPath backtrack; - if (params.dmDataType == "double") - backtrack = lcss::createMatrix<double> (A, B, params); - else if (params.dmDataType == "int") - backtrack = lcss::createMatrix<int>(A, B, params); - else if (params.dmDataType == "float") - backtrack = lcss::createMatrix<float>(A, B, params); - else if (params.dmDataType == "char" || params.dmDataType == "byte") - backtrack = lcss::createMatrix<char>(A, B, params); - - double result = 0; - if (params.scoreType == 1) - result = 1 - backtrack.rawScore / backtrack.path.size(); - else if (params.scoreType == 2) - result = backtrack.normScore; - else - result = backtrack.rawScore; - - if (params.isRatioReversed()) - return 1 - result; - else - return result; + warpPath backtrack; + if (params.dmDataType == "double") + backtrack = lcss::createMatrix<double> (A, B, params); + else if (params.dmDataType == "int") + backtrack = lcss::createMatrix<int>(A, B, params); + else if (params.dmDataType == "float") + backtrack = lcss::createMatrix<float>(A, B, params); + else if (params.dmDataType == "char" || params.dmDataType == "byte") + backtrack = lcss::createMatrix<char>(A, B, params); + + double result = 0; + if (params.scoreType == 1) + result = calcul::scorePair_lcss_s1(backtrack.scoreRaw, backtrack.path.size()); + //result = 1 - backtrack.rawScore / backtrack.path.size(); + else if (params.scoreType == 2) + result = calcul::scorePair_lcss_s1(backtrack.scoreRaw, min(A.size(), B.size())); + else if (params.scoreType == 3) + result = backtrack.scoreRaw; + else + result = 0; + + if (params.isRatioReversed()) + return 1 - result; + else + return result; } template<class T> warpPath lcss::createMatrix(vtr2<double> const &A, vtr2<double> const &B, parameter const ¶ms) { - Vtr<node<T>, 2> m(A.size() + 1); - for (int i = 0; i < A.size() + 1; i++) - m[i] = vector<node<T>>(B.size() + 1, node<T>(0)); - - double left = 0; - double up = 0; - const int w = (int)(B.size() * params.w); - for (int i = 1; i < A.size() + 1; i++) //radky - y - { - //int start = max(1, (int)(ceil((i - 1) * ((double)B.size() / (double)A.size() + 0.0000000001)) - w)); - //int end = min((int)B.size() + 1, (int)(ceil(i * (double)B.size() / (double)A.size()) + 1) + w); - for (int j = 1; j < B.size() + 1; j++) //sloupce - x - { - left = m[i][j - 1].value; - up = m[i - 1][j].value; - - bool epsed = true; - int shorterDim = (int)min(A[i - 1].size(), B[j - 1].size()); - for (int u = 0; u < shorterDim; u++) - { - if (calcul::distance_lcss(A[i - 1], B[j - 1], u) > params.epsilon) - epsed = false; - } - - if (epsed && abs(i - j) <= params.delta) //if i-1,j-1 <= l,u ... diag - m[i][j].value = m[i - 1][j - 1].value + 1; - else if (left >= up) - m[i][j].value = static_cast<T>(left); //(UP) jedno kterej - else - m[i][j].value = static_cast<T>(up); //(UP < LEFT) - } - } - - return backtrack(m, A, B); + Vtr<node<T>, 2> m(A.size() + 1); + for (int i = 0; i < A.size() + 1; i++) + m[i] = vector<node<T>>(B.size() + 1, node<T>(0)); + + double left = 0; + double up = 0; + const int w = (int)(B.size() * params.w); + for (int i = 1; i < A.size() + 1; i++) //radky - y + { + //int start = max(1, (int)(ceil((i - 1) * ((double)B.size() / (double)A.size() + 0.0000000001)) - w)); + //int end = min((int)B.size() + 1, (int)(ceil(i * (double)B.size() / (double)A.size()) + 1) + w); + for (int j = 1; j < B.size() + 1; j++) //sloupce - x + { + left = m[i][j - 1].value; + up = m[i - 1][j].value; + + bool epsed = true; + int shorterDim = (int)min(A[i - 1].size(), B[j - 1].size()); + for (int u = 0; u < shorterDim; u++) + { + if (calcul::distance_lcss(A[i - 1], B[j - 1], u) > params.epsilon) + epsed = false; + } + + if (epsed && abs(i - j) <= params.delta) //if i-1,j-1 <= l,u ... diag + m[i][j].value = m[i - 1][j - 1].value + 1; + else if (left >= up) + m[i][j].value = static_cast<T>(left); //(UP) jedno kterej + else + m[i][j].value = static_cast<T>(up); //(UP < LEFT) + } + } + + return backtrack(m, A, B); } template<class T> warpPath lcss::backtrack(vector<vector<node<T>>> const &m, vtr2<double> const &A, vtr2<double> const &B) { - int i = (int)A.size(); - int j = (int)B.size(); - string path = ""; - - while (i > 0 && j > 0) - { - if (m[i - 1][j - 1].value < m[i][j].value) - { - if (to_string(A[i - 1][0]) == "-" || to_string(B[j - 1][0]) == "-") - path = "M" + path; - else if (A[i - 1][0] == B[j - 1][0]) - path = "M" + path; - else - path = "S" + path; - - i--; - j--; - } - else - { - if (m[i - 1][j].value >= m[i][j - 1].value) - { - path = "U" + path; - i--; - } - else/* if (m[i][j - 1].value == m[i][j].size)*/ - { - path = "L" + path; - j--; - } - } - } - - while (i > 0) - { - path = "U" + path; - i--; - } - - while (j > 0) - { - path = "L" + path; - j--; - } - - warpPath wp; - wp.path = path; - wp.rawScore = m[A.size()][B.size()].value; - wp.normScore = 1 - calcul::getPairRatio_lcss((int)(A.size() + B.size()), (int)wp.rawScore); + int i = (int)A.size(); + int j = (int)B.size(); + string path = ""; + + while (i > 0 && j > 0) + { + if (m[i - 1][j - 1].value < m[i][j].value) + { + if (to_string(A[i - 1][0]) == "-" || to_string(B[j - 1][0]) == "-") + path = "M" + path; + else if (A[i - 1][0] == B[j - 1][0]) + path = "M" + path; + else + path = "S" + path; + + i--; + j--; + } + else + { + if (m[i - 1][j].value >= m[i][j - 1].value) + { + path = "U" + path; + i--; + } + else/* if (m[i][j - 1].value == m[i][j].size)*/ + { + path = "L" + path; + j--; + } + } + } + + while (i > 0) + { + path = "U" + path; + i--; + } + + while (j > 0) + { + path = "L" + path; + j--; + } + + warpPath wp; + wp.path = path; + wp.scoreRaw = m[A.size()][B.size()].value; + //wp.normScore = 1 - calcul::getPairRatio_lcss((int)(A.size() + B.size()), (int)wp.rawScore); - return wp; + return wp; } \ No newline at end of file diff --git a/SequenceComparison/main.cpp b/SequenceComparison/main.cpp index f74b9f9cf0159e0419f0a9d67df6ba206288914b..0133526672761144222231073f7e98764a874a77 100644 --- a/SequenceComparison/main.cpp +++ b/SequenceComparison/main.cpp @@ -21,108 +21,113 @@ void mainLogic(vtrS const &args); int main(int argc, char* argv[]) { - vector<string> args; - for (size_t i = 1; i < argc; i++) - args.push_back(argv[i]); - - vector<string> scripts; - for (size_t i = 0; i < args.size() - 1; i++) - { - if (args[i] == "-script") - { - scripts = dataParser::readFileByLine(args[i + 1]); - break; - } - } - - if(scripts.size() > 0) - { - size_t start = args.size() >= 3 ? stoi(args[2]) - 1 : 0; - size_t end = args.size() == 4 ? stoi(args[3]) : scripts.size(); - for (size_t i = start; i < end; i++) - { - mainLogic(help::split(scripts[i], " \t")); - } - } - else - mainLogic(args); - - return 0; + vector<string> args; + for (size_t i = 1; i < argc; i++) + args.push_back(argv[i]); + + vector<string> scripts; + for (size_t i = 0; i < args.size() - 1; i++) + { + if (args[i] == "-script") + { + scripts = dataParser::readFileByLine(args[i + 1]); + break; + } + } + + if(scripts.size() > 0) + { + //size_t start = args.size() >= 3 ? stoi(args[2]) - 1 : 0; + //size_t end = args.size() == 4 ? stoi(args[3]) : scripts.size(); + for (size_t i = 0; i < scripts.size(); i++) + { + auto argsSub = help::split(scripts[i], " \t"); + parameter::parameterPriority(argsSub, args); + mainLogic(argsSub); + } + } + else + mainLogic(args); + + return 0; } void mainLogic(vtrS const &args) { - auto params = parameter::setParameters(args); - - chrono::steady_clock::time_point begin = chrono::steady_clock::now(); - - //INPUT DATA PARSING - - vtr<string> files; - vtr3<double> input; - map<int, int> clusters; - try{ - files = dataParser::getFiles(params.inPath); - input = dataParser::readData(params.inPath); - cout << print::printElapsed("parsing", begin) << endl; - - if(params.isClusterInfo()) - clusters = dataParser::getClusters(params.clusterInfoPath); - } - catch (exception const &e) - { - cout << e.what() << endl; - } - - if (params.pin) - { - cout << print::printVector(files); - cout << print::printInput(input, 30) << endl; // print for debug comment or delete if not needed - } - - //DATA PREPROCESSING - begin = chrono::steady_clock::now(); - - if (params.isReduce()) - help::reduce(input, params.reduce); - if (params.isPaa()) - input = help::paa(input, params.paa); - if (params.znormalize) - help::normalizeMany(input); - if (params.interpolate) - help::interpolate(input); - if (params.isNormalizeZeroOne()) - help::normalizeZeroOne(input, params.normalize); - if (params.isSmooth()) - input = help::smooth(input, params.smooth); - - cout << print::printElapsed("preprocesing", begin) << endl; - - if(params.pin) - cout << print::printInput(input, 30) << endl; // print for debug comment or delete if not needed - - params.startStamp = chrono::steady_clock::now(); - - auto result = operation::main(input, clusters, params); - - if(params.isOutput()){ - print::write(print::printMatrix(result.matrixSimilarity, params), params.outputPath, false); - } - - if (params.isHtml()) { - print::write(print::printParameterString(args) + "</br>" + - print::printMapRatios(calcul::getMAPratio(result.matrixCluster, clusters)) + - print::printHtmlClusters(input, result.matrixCluster, clusters), params.outputPath, true); - } - - if (params.isGdf()) - print::write(print::printGdf(files, input, result.matrixSimilarity, clusters), params.gdf, false); - - if (params.isOmp()) { - if(result.matrixSimilarity.size() > 0) - cout << print::printMatrix(result.matrixSimilarity, params); - if (result.matrixCluster.size() > 0) - cout << print::printMatrix(result.matrixCluster, params); - } + auto params = parameter::setParameters(args); + + auto begin = chrono::steady_clock::now(); + + //INPUT DATA PARSING + + vtr<string> files; + vtr3<double> input; + map<int, int> clusters; + try{ + files = dataParser::getFiles(params.inPath); + input = dataParser::readData(params.inPath); + if(params.isTime()) + cout << print::printElapsed("parsing", begin) << endl; + + if(params.isClusterInfo()) + clusters = dataParser::getClusters(params.clusterInfoPath); + } + catch (exception const &e) + { + cout << e.what() << endl; + } + + if (params.pin) + { + cout << print::printVector(files); + cout << print::printInput(input, 30) << endl; // print for debug comment or delete if not needed + } + + //DATA PREPROCESSING + begin = chrono::steady_clock::now(); + + if (params.isReduce()) + help::reduce(input, params.reduce); + if (params.isPaa()) + input = help::paa(input, params.paa); + if (params.znormalize) + help::normalizeMany(input); + if (params.interpolate) + help::interpolate(input); + if (params.isNormalizeZeroOne()) + help::normalizeZeroOne(input, params.normalize); + if (params.isSmooth()) + input = help::smooth(input, params.smooth); + + if(params.isTime()) + cout << print::printElapsed("preprocesing", begin) << endl; + + if(params.pin) + cout << print::printInput(input, 30) << endl; // print for debug comment or delete if not needed + + params.startStamp = chrono::steady_clock::now(); + + auto result = operation::main(input, clusters, params); + + if(params.isWriteOutput()){ + print::write(print::printMatrix(result.matrixSimilarity, params), params.outputPath, false); + } + + if (params.isPrintOutput()) { + if(result.matrixSimilarity.size() > 0) + cout << print::printMatrix(result.matrixSimilarity, params); + if (result.matrixCluster.size() > 0) + cout << print::printMatrix(result.matrixCluster, params); + } + + if (params.isHtml()) { + print::write(print::printParameterString(args) + "</br>" + + print::printMapRatios(calcul::getMAPratio(result.matrixCluster, clusters)) + + print::printHtmlClusters(input, result.matrixCluster, clusters), params.outputPath, true); + } + + if (params.isGdf()) + print::write(print::printGdf(files, input, result.matrixSimilarity, clusters), params.gdf, false); + } diff --git a/SequenceComparison/operation.cpp b/SequenceComparison/operation.cpp index a1983ed14b2399591c8ea4940adaf041844ffaa9..8cb872435f5e4ed232cbafbb94dc9b658f27415d 100644 --- a/SequenceComparison/operation.cpp +++ b/SequenceComparison/operation.cpp @@ -19,350 +19,361 @@ using namespace std; result_operation operation::main(vtr3<double> const &input, mapInt const &clusters, parameter const ¶ms) { - FUNC f1 = dtw::main; - FUNCM f2 = pdtw::main; - - if (params.method == "lcss") - f1 = lcss::main; - - result_operation result; - - switch (params.operation) - { - case eOperation::similarityMatrix: - result = params.isOmp() ? getSimilarityMatrix_omp(input, params, f1) : getSimilarityMatrix(input, params, f1); - break; - case eOperation::clusterMatrix: - result = params.isOmp() ? getClusters_omp(input, params, clusters, f1) : getClusters(input, params, clusters, f1); - break; - case eOperation::dimSimilarityMatrix: - result = params.isOmp() ? getSimilarityMatrix_dimensions_omp(input, params, f1) : getSimilarityMatrix_dimensions(input, params, f1); - break; - case eOperation::bestDimSimMatrix: - result = params.isOmp() ? getBestSimilarityMatrix_dimensions_omp(input, params, f1) : getBestSimilarityMatrix_dimensions(input, params, f1); - break; - case eOperation::pdtw: - pdtw::main(input, params); - break; - default: - //throw runtime_error("error: Operation not found"); - cout << ("error: Operation not found"); - exit(0); - break; - } - - return result; + FUNC f1 = dtw::main; + FUNCM f2 = pdtw::main; + + if (params.method == "lcss") + f1 = lcss::main; + + result_operation result; + + switch (params.operation) + { + case eOperation::similarityMatrix: + result = params.isOmp() ? getSimilarityMatrix_omp(input, params, f1) : getSimilarityMatrix(input, params, f1); + break; + case eOperation::clusterMatrix: + result = params.isOmp() ? getClusters_omp(input, params, clusters, f1) : getClusters(input, params, clusters, f1); + break; + case eOperation::dimSimilarityMatrix: + result = params.isOmp() ? getSimilarityMatrix_dimensions_omp(input, params, f1) : getSimilarityMatrix_dimensions(input, params, f1); + break; + case eOperation::bestDimSimMatrix: + result = params.isOmp() ? getBestSimilarityMatrix_dimensions_omp(input, params, f1) : getBestSimilarityMatrix_dimensions(input, params, f1); + break; + case eOperation::pdtw: + pdtw::main(input, params); + break; + default: + //throw runtime_error("error: Operation not found"); + cout << ("error: Operation not found"); + exit(0); + break; + } + + return result; } result_operation operation::getSimilarityMatrix(vtr3<double> const &input, parameter const ¶ms, FUNC f) { - vtr2<double> matrix(input.size()); - for (size_t i = 0; i < input.size(); i++) - { - vector<double> simRow(input.size()); - matrix[i] = simRow; - } - - for (size_t i = 0; i < input.size(); i++) - cout << setw(3 + params.precision) << i; - - cout << setprecision(params.precision) << endl; - for (size_t i = 0; i < input.size(); i++) - { - - cout << setw(3) << i + 1 <<" "; - for (size_t j = 0; j < input.size(); j++) - { - double tmp = 0; - if (i >= j) - { - tmp = f(input[i], input[j], params); - cout << setw(params.precision + 3) << fixed << tmp; - matrix[i][j] = tmp; - matrix[j][i] = tmp; - } - else - cout << string(params.precision + 3, ' '); - } - cout << endl; - } - - cout << print::printElapsed("elapsed", params.startStamp) << endl; - - result_operation result; - result.matrixSimilarity = matrix; - - return result; + vtr2<double> matrix(input.size()); + for (size_t i = 0; i < input.size(); i++) + { + vector<double> simRow(input.size()); + matrix[i] = simRow; + } + + for (size_t i = 0; i < input.size(); i++) + cout << setw(3 + params.precision) << i; + + cout << setprecision(params.precision) << endl; + for (size_t i = 0; i < input.size(); i++) + { + + cout << setw(3) << i + 1 <<" "; + for (size_t j = 0; j < input.size(); j++) + { + double tmp = 0; + if (i >= j) + { + tmp = f(input[i], input[j], params); + cout << setw(params.precision + 3) << fixed << tmp; + matrix[i][j] = tmp; + matrix[j][i] = tmp; + } + else + cout << string(params.precision + 3, ' '); + } + cout << endl; + } + + if (params.isTime()) + cout << print::printElapsed("elapsed", params.startStamp) << endl; + + result_operation result; + result.matrixSimilarity = matrix; + + return result; } result_operation operation::getSimilarityMatrix_omp(vtr3<double> const &input, parameter const ¶ms, FUNC f) { - vtr2<double> matrix; - vector<double> simRow(input.size()); - for (size_t i = 0; i < input.size(); i++) - { - matrix.push_back(simRow); - } - - //cout << thread::hardware_concurrency() << endl; - //cout << omp_get_num_procs() << endl; - //cout << omp_get_num_threads() << endl; - //cout << omp_get_num_devices() << endl; - - int size = (int)(input.size() * (input.size() + 1) / 2); - omp_set_num_threads(params.omp); - #pragma omp parallel for schedule(static, 1) /*num_threads(params.threads)*//* proc_bind(spread)*/ - for (int g = 1; g < /*input.size() * input.size()*/size + 1; g++) - { - int i = (int)ceil(sqrt(2 * g + 0.25) - 0.5); - int j = g - i * (i + 1) / 2 + i; - i--; - j--; - - matrix[i][j] = f(input[i], input[j], params); - matrix[j][i] = matrix[i][j]; - } - - cout << print::printElapsed("elapsed", params.startStamp) << endl; - - result_operation result; - result.matrixSimilarity = matrix; - - return result; + vtr2<double> matrix; + vector<double> simRow(input.size()); + for (size_t i = 0; i < input.size(); i++) + { + matrix.push_back(simRow); + } + + //cout << thread::hardware_concurrency() << endl; + //cout << omp_get_num_procs() << endl; + //cout << omp_get_num_threads() << endl; + //cout << omp_get_num_devices() << endl; + + int size = (int)(input.size() * (input.size() + 1) / 2); + omp_set_num_threads(params.omp); + #pragma omp parallel for schedule(static, 1) /*num_threads(params.threads)*//* proc_bind(spread)*/ + for (int g = 1; g < /*input.size() * input.size()*/size + 1; g++) + { + int i = (int)ceil(sqrt(2 * g + 0.25) - 0.5); + int j = g - i * (i + 1) / 2 + i; + i--; + j--; + + matrix[i][j] = f(input[i], input[j], params); + matrix[j][i] = matrix[i][j]; + } + + if (params.isTime()) + cout << print::printElapsed("elapsed", params.startStamp) << endl; + + result_operation result; + result.matrixSimilarity = matrix; + + return result; } result_operation operation::getClusters(vtr3<double> const &input, parameter const ¶ms, map<int, int> const &clusters, FUNC f) { - auto result = getSimilarityMatrix(input, params, f); - //resultCopy.clusterMatrix = result.clusterMatrix; - - for (size_t i = 0; i < input.size(); i++) - { - vector<int> row; - for (size_t j = 0; j < input.size(); j++) - { - row.push_back((int)i + 1); - } - result.matrixCluster.push_back(row); - } - - auto resultCopy = result; - for (size_t i = 0; i < input.size(); i++) //row - { - double tmpD = resultCopy.matrixSimilarity[i][i]; - resultCopy.matrixSimilarity[i][i] = resultCopy.matrixSimilarity[0][i]; - resultCopy.matrixSimilarity[0][i] = tmpD; - - int tmpI = resultCopy.matrixCluster[i][i]; - resultCopy.matrixCluster[i][i] = resultCopy.matrixCluster[0][i]; - resultCopy.matrixCluster[0][i] = tmpI; - } - - help::Sort2dVectorByColumns<double, int>(resultCopy.matrixSimilarity, resultCopy.matrixCluster, params.scoreReversed); - - cout << print::printMapRatios(calcul::getMAPratio(resultCopy.matrixCluster, clusters)); - - result.matrixCluster = resultCopy.matrixCluster; - - return result; + auto result = getSimilarityMatrix(input, params, f); + //resultCopy.clusterMatrix = result.clusterMatrix; + + for (size_t i = 0; i < input.size(); i++) + { + vector<int> row; + for (size_t j = 0; j < input.size(); j++) + { + row.push_back((int)i + 1); + } + result.matrixCluster.push_back(row); + } + + auto resultCopy = result; + for (size_t i = 0; i < input.size(); i++) //row + { + double tmpD = resultCopy.matrixSimilarity[i][i]; + resultCopy.matrixSimilarity[i][i] = resultCopy.matrixSimilarity[0][i]; + resultCopy.matrixSimilarity[0][i] = tmpD; + + int tmpI = resultCopy.matrixCluster[i][i]; + resultCopy.matrixCluster[i][i] = resultCopy.matrixCluster[0][i]; + resultCopy.matrixCluster[0][i] = tmpI; + } + + help::Sort2dVectorByColumns<double, int>(resultCopy.matrixSimilarity, resultCopy.matrixCluster, params.scoreReversed); + + cout << print::printMapRatios(calcul::getMAPratio(resultCopy.matrixCluster, clusters)); + + result.matrixCluster = resultCopy.matrixCluster; + + return result; } result_operation operation::getClusters_omp(vtr3<double> const &input, parameter const ¶ms, map<int, int> const &clusters, FUNC f) { - auto result = getSimilarityMatrix_omp(input, params, f); - - for (size_t i = 0; i < input.size(); i++) - { - vector<int> row; - for (size_t j = 0; j < input.size(); j++) - { - row.push_back((int)i + 1); - } - result.matrixCluster.push_back(row); - } - - auto resultCopy = result; - for (size_t i = 0; i < input.size(); i++) //row - { - double tmpD = resultCopy.matrixSimilarity[i][i]; - resultCopy.matrixSimilarity[i][i] = resultCopy.matrixSimilarity[0][i]; - resultCopy.matrixSimilarity[0][i] = tmpD; - - int tmpI = resultCopy.matrixCluster[i][i]; - resultCopy.matrixCluster[i][i] = resultCopy.matrixCluster[0][i]; - resultCopy.matrixCluster[0][i] = tmpI; - } - - help::Sort2dVectorByColumns<double, int>(resultCopy.matrixSimilarity, resultCopy.matrixCluster, params.scoreReversed); - - cout << print::printMapRatios(calcul::getMAPratio(resultCopy.matrixCluster, clusters)); - - result.matrixCluster = resultCopy.matrixCluster; - - return result; + auto result = getSimilarityMatrix_omp(input, params, f); + + for (size_t i = 0; i < input.size(); i++) + { + vector<int> row; + for (size_t j = 0; j < input.size(); j++) + { + row.push_back((int)i + 1); + } + result.matrixCluster.push_back(row); + } + + auto resultCopy = result; + for (size_t i = 0; i < input.size(); i++) //row + { + double tmpD = resultCopy.matrixSimilarity[i][i]; + resultCopy.matrixSimilarity[i][i] = resultCopy.matrixSimilarity[0][i]; + resultCopy.matrixSimilarity[0][i] = tmpD; + + int tmpI = resultCopy.matrixCluster[i][i]; + resultCopy.matrixCluster[i][i] = resultCopy.matrixCluster[0][i]; + resultCopy.matrixCluster[0][i] = tmpI; + } + + help::Sort2dVectorByColumns<double, int>(resultCopy.matrixSimilarity, resultCopy.matrixCluster, params.scoreReversed); + + cout << print::printMapRatios(calcul::getMAPratio(resultCopy.matrixCluster, clusters)); + + result.matrixCluster = resultCopy.matrixCluster; + + return result; } result_operation operation::getSimilarityMatrix_dimensions(vtr3<double> const &input, parameter const ¶ms, FUNC f) { - const size_t dims = input[0][0].size(); - - vtr3<double> inputSeparated; - if (params.operation == 3 || params.operation == 4) - inputSeparated = help::separateSequence(input, 2); - - vtr2<double> matrix; - - cout << setprecision(params.precision); - for (size_t i = 0; i < dims; i++) - { - vector<double> simRow; - for (size_t j = dims; j < inputSeparated.size(); j++) - { - double tmp; - tmp = f(inputSeparated[i], inputSeparated[j], params); - cout << setw(params.precision + 3) << fixed << tmp; - - simRow.push_back(tmp); - } - cout << endl; - matrix.push_back(simRow); - } - - cout << print::printElapsed("elapsed", params.startStamp); - - result_operation result; - result.matrixSimilarity = matrix; - - return result; + const size_t dims = input[0][0].size(); + + vtr3<double> inputSeparated; + //if (params.operation == 3 || params.operation == 4) + inputSeparated = help::separateSequence(input, 2); + + vtr2<double> matrix; + + cout << setprecision(params.precision); + for (size_t i = 0; i < dims; i++) + { + vector<double> simRow; + for (size_t j = dims; j < inputSeparated.size(); j++) + { + double tmp = f(inputSeparated[i], inputSeparated[j], params); // needs to be calculated both diagonal matrices ! + if(params.operation == 3) + cout << setw(params.precision + 3) << fixed << tmp; + + simRow.push_back(tmp); + } + if (params.operation == 3) + cout << endl; + matrix.push_back(simRow); + } + + if (params.isTime() && params.operation == 3) + cout << print::printElapsed("elapsed", params.startStamp); + + result_operation result; + result.matrixSimilarity = matrix; + + return result; } result_operation operation::getSimilarityMatrix_dimensions_omp(vtr3<double> const &input, parameter const ¶ms, FUNC f) { - const int dims = (int)input[0][0].size(); + const int dims = (int)input[0][0].size(); - vtr3<double> inputSeparated; - if (params.operation == 3 || params.operation == 4) - inputSeparated = help::separateSequence(input, 2); + vtr3<double> inputSeparated; + inputSeparated = help::separateSequence(input, 2); - vtr2<double> matrix(dims); - for (size_t i = 0; i < dims; i++) - { - vector<double> simRow(dims); - matrix[i] = simRow; - } + vtr2<double> matrix(dims); + for (size_t i = 0; i < dims; i++) + { + vector<double> simRow(dims); + matrix[i] = simRow; + } - omp_set_num_threads(params.omp); - #pragma omp parallel for schedule(dynamic, 1)/* num_threads(params.threads)*/ - for (int g = 0; g < dims * dims; g++) - { - int i = g / dims; - int j = g % dims; + omp_set_num_threads(params.omp); + #pragma omp parallel for schedule(dynamic, 1)/* num_threads(params.threads)*/ + for (int g = 0; g < dims * dims; g++) + { + int i = g / dims; + int j = g % dims; - double tmp = f(inputSeparated[i], inputSeparated[j + dims], params); - matrix[i][j] = tmp; - matrix[j][i] = tmp; - } + double tmp = f(inputSeparated[i], inputSeparated[j + dims], params); + matrix[i][j] = tmp; + //matrix[j][i] = tmp; + } - cout << print::printElapsed("elapsed", params.startStamp) << endl; + if (params.isTime()) + cout << print::printElapsed("elapsed", params.startStamp) << endl; - result_operation result; - result.matrixSimilarity = matrix; + result_operation result; + result.matrixSimilarity = matrix; - return result; + return result; } result_operation operation::getBestSimilarityMatrix_dimensions(vtr3<double> const &input, parameter const ¶ms, FUNC f) { - vtr2<double> matrix; - - cout << setprecision(params.precision); - for (size_t i = 0; i < input.size(); i++) - { - vector<double> simRow; - cout << setw(3) << i + 1 << " "; - for (size_t j = 0; j < input.size(); j++) - { - vtr3<double> subInput; - subInput.push_back(input[i]); - subInput.push_back(input[j]); - - if (i >= j) - { - double maxSim = -1; - for (int i = 0; i < matrix.size(); i++) - { - for (int j = 0; j < matrix[i].size(); j++) - { - if (maxSim < matrix[i][j]) - { - maxSim = matrix[i][j]; - } - } - } - - cout << setw(params.precision + 3) << fixed << maxSim; - simRow.push_back(maxSim); - } - else - cout << string(params.precision + 3, ' '); - } - - cout << endl; - matrix.push_back(simRow); - } - - cout << print::printElapsed("elapsed", params.startStamp); - - result_operation result; - result.matrixSimilarity = matrix; - - return result; + result_operation result; + vtr2<double> matrixSim; + + cout << setprecision(params.precision); + for (size_t i = 0; i < input.size(); i++) + { + vector<double> simRow; + cout << setw(3) << i + 1 << " "; + for (size_t j = 0; j < input.size(); j++) + { + vtr3<double> subInput; + subInput.push_back(input[i]); + subInput.push_back(input[j]); + + if (i >= j) + { + matrixSim = getSimilarityMatrix_dimensions(subInput, params, f).matrixSimilarity; + + double maxSim = -1; + for (int i = 0; i < matrixSim.size(); i++) + { + for (int j = 0; j < matrixSim[i].size(); j++) + { + if (maxSim < matrixSim[i][j]) + maxSim = matrixSim[i][j]; + } + } + + cout << setw(params.precision + 3) << fixed << maxSim; + simRow.push_back(maxSim); + } + else + cout << string(params.precision + 3, ' '); + } + + cout << endl; + result.matrixSimilarity.push_back(simRow); + } + + if (params.isTime()) + cout << print::printElapsed("elapsed", params.startStamp); + + /* result_operation result; + result.matrixSimilarity = matrix;*/ + + return result; } result_operation operation::getBestSimilarityMatrix_dimensions_omp(vtr3<double> const &input, parameter const ¶ms, FUNC f) { - vtr2<double> matrix(input.size()); - for (size_t i = 0; i < input.size(); i++) - { - vector<double> simRow(input.size()); - matrix[i] = simRow; - } - - cout << setprecision(params.precision); - for (size_t i = 0; i < input.size(); i++) - { - vector<double> simRow; - for (size_t j = 0; j < input.size(); j++) - { - vtr3<double> subInput; - subInput.push_back(input[i]); - subInput.push_back(input[j]); - - if (i >= j) - { - double maxSim = -1; - for (int i = 0; i < matrix.size(); i++) - for (int j = 0; j< matrix[i].size(); j++) - if (maxSim < matrix[i][j]) - { - maxSim = matrix[i][j]; - } - - cout << setw(params.precision + 3) << fixed << maxSim; - simRow.push_back(maxSim); - } - else - cout << string(params.precision + 3, ' '); - } - cout << endl; - matrix.push_back(simRow); - } - - cout << print::printElapsed("elapsed", params.startStamp); - - result_operation result; - result.matrixSimilarity = matrix; - - return result; + vtr2<double> matrix(input.size()); + for (size_t i = 0; i < input.size(); i++) + { + vector<double> simRow(input.size()); + matrix[i] = simRow; + } + + cout << setprecision(params.precision); + for (size_t i = 0; i < input.size(); i++) + { + vector<double> simRow; + for (size_t j = 0; j < input.size(); j++) + { + vtr3<double> subInput; + subInput.push_back(input[i]); + subInput.push_back(input[j]); + + if (i >= j) + { + matrix = getSimilarityMatrix_dimensions(subInput, params, f).matrixSimilarity; + + double maxSim = -1; + for (int i = 0; i < matrix.size(); i++) + { + for (int j = 0; j< matrix[i].size(); j++) + { + if (maxSim < matrix[i][j]) + maxSim = matrix[i][j]; + } + } + + cout << setw(params.precision + 3) << fixed << maxSim; + simRow.push_back(maxSim); + } + else + cout << string(params.precision + 3, ' '); + } + cout << endl; + matrix.push_back(simRow); + } + + if(params.isTime()) + cout << print::printElapsed("elapsed", params.startStamp) << endl; + + result_operation result; + result.matrixSimilarity = matrix; + + return result; } \ No newline at end of file diff --git a/SequenceComparison/parameter.cpp b/SequenceComparison/parameter.cpp index 6142ed75fc5b4212f61dbc7cd1d460431827df71..2828e793d196e8603072d9639de697708cff7ada 100644 --- a/SequenceComparison/parameter.cpp +++ b/SequenceComparison/parameter.cpp @@ -9,311 +9,354 @@ namespace fs = std::experimental::filesystem; parameter parameter::setParameters(vtrS const &args) { - auto mapSetting = mapParameters(args); - - parameter params; - try { - params = useParameters(mapSetting); - } - catch (exception const &e) - { - cout << "error: Invalid args, " << e.what() << endl; - exit(0); - } - - try { - checkParameters(params, mapSetting); - } - catch (exception const &e) - { - cout << "error: parameters check, " << e.what() << endl; - exit(0); - } - - - return params; + auto mapSetting = mapParameters(args); + + parameter params; + try { + params = useParameters(mapSetting); + } + catch (exception const &e) + { + cout << "error: Invalid args, " << e.what() << endl; + exit(0); + } + + try { + checkParameters(params, mapSetting); + } + catch (exception const &e) + { + cout << "error: parameters check, " << e.what() << endl; + exit(0); + } + + + return params; } map<string, string> parameter::mapParameters(vtrS const &args) { - map<string, string> map; - /*vector<string> args; - - for (size_t i = 0; i < argc; i++) - { - args.push_back(argv[i]); - }*/ - - for (size_t i = 0; i < args.size() - 1; i++) - { - if (args[i][0] == '-' && args[i] != "-in") - map[args[i]] = args[i + 1]; - else if (args[i] == "-in") - map[args[i]] = mapMultiParameter(args, (int)i + 1); - } - - if (args.size() > 1) - { - for (size_t i = 0; i < args.size(); i++) - { - if (args[i] == "-help") - { - printHelp(); - exit(0); - } - else if (args[i] == "-i") - map["-i"] = "true"; - else if (args[i] == "-zn") - map["-zn"] = "true"; - else if (args[i] == "-pin") - map["-pin"] = "true"; - else if (args[i] == "-smooth") - map["-smooth"] = mapOptionalParameter(args, (int)i + 1, "3"); - else if (args[i] == "-rr") - map["-rr"] = "true"; - } - } - - return map; + map<string, string> map; + + for (size_t i = 0; i < args.size() - 1; i++) + { + if (args[i][0] == '-' && args[i] != "-in") + map[args[i]] = args[i + 1]; + else if (args[i] == "-in") + map[args[i]] = mapMultiParameter(args, (int)i + 1); + } + + if (args.size() > 1) + { + for (size_t i = 0; i < args.size(); i++) + { + if (args[i] == "-help") + { + printHelp(); + exit(0); + } + else if (args[i] == "-i") + map["-i"] = "true"; + else if (args[i] == "-zn") + map["-zn"] = "true"; + else if (args[i] == "-pin") + map["-pin"] = "true"; + else if (args[i] == "-smooth") + map["-smooth"] = mapOptionalParameter(args, (int)i + 1, "3"); + else if (args[i] == "-rr") + map["-rr"] = "true"; + else if (args[i] == "-time") + map["-time"] = "true"; + } + } + + return map; } string parameter::mapOptionalParameter(vtrS const &args, int idx, string param_default) { - if (idx >= args.size() || args[idx][0] != '-') - return param_default; - else - return args[idx]; + if (idx >= args.size() || args[idx][0] != '-') + return param_default; + else + return args[idx]; } string parameter::mapMultiParameter(vtrS const &args, int idx) { - string str = ""; - for (size_t j = idx; j < args.size(); j++) - { - if (args[j][0] != '-') - { - str += args[j] + ";"; - } - else - break; - } - - return str; + string str = ""; + for (size_t j = idx; j < args.size(); j++) + { + if (args[j][0] != '-') + { + str += args[j] + ";"; + } + else + break; + } + + return str; } void parameter::checkParameters(parameter const ¶ms, map<string, string> const &mapSetting) { - if (params.method != "dtw" && params.method != "lcss" && params.method != "pdtw") - throw runtime_error("Method not found"); - - if (mapSetting.count("-w") > 0 && params.w < 0) - { - throw runtime_error("parameter w is lower then 0."); - } - - if (mapSetting.count("-e") > 0 && params.epsilon < 0) - { - throw runtime_error("parameter e must be positive"); - } - - if (params.inPath.size() < 1) - throw runtime_error("Input file not found"); - - if (params.operation < 0 ) - { - throw runtime_error("Operation not found"); - } - - if (mapSetting.count("-omp") > 0 && params.omp < 1) - { - throw runtime_error("Invalid number of threads"); - } - - if (mapSetting.count("-p") > 0 && params.precision < 1) - { - throw runtime_error("Invalid precision setting"); - } - - if (mapSetting.count("-s") > 0 && params.scoreType < 1) - { - throw runtime_error("Invalid score type setting"); - } - - if (mapSetting.count("-r") > 0 && params.reduce != "") - { - throw runtime_error("Invalid reduction setting"); - } - - if (mapSetting.count("-type") > 0) - { - if (params.dmDataType != "int" && params.dmDataType != "float" && params.dmDataType != "double" && params.dmDataType != "char" && params.dmDataType != "byte") - { - throw runtime_error("Invalid data type for distance matrix"); - } - } - - if (mapSetting.count("-gdf") > 0 && !fs::is_directory(help::stripFileNameFromPath(params.gdf))) - { - throw runtime_error("Invalid gdf folder."); - } - - if (params.operation == 2 && !fs::exists(params.clusterInfoPath)) - { - throw runtime_error("Invalid cluster info file path."); - } + if (params.method != "dtw" && params.method != "lcss" && params.method != "pdtw") + throw runtime_error("Method not found"); + + if (mapSetting.count("-w") > 0 && params.w < 0) + { + throw runtime_error("parameter w is lower then 0."); + } + + if (mapSetting.count("-e") > 0 && params.epsilon < 0) + { + throw runtime_error("parameter e must be positive"); + } + + if (params.inPath.size() < 1) + throw runtime_error("Input file not found"); + + if (params.operation < 0 ) + { + throw runtime_error("Operation not found"); + } + + if (mapSetting.count("-omp") > 0 && params.omp < 1) + { + throw runtime_error("Invalid number of threads"); + } + + if (mapSetting.count("-p") > 0 && params.precision < 1) + { + throw runtime_error("Invalid precision setting"); + } + + if (mapSetting.count("-s") > 0 && params.scoreType < 1) + { + throw runtime_error("Invalid score type setting"); + } + + if (mapSetting.count("-r") > 0 && params.reduce < 2) + { + throw runtime_error("Invalid reduction setting"); + } + + if (mapSetting.count("-type") > 0) + { + if (params.dmDataType != "int" && params.dmDataType != "float" && params.dmDataType != "double" && params.dmDataType != "char" && params.dmDataType != "byte") + { + throw runtime_error("Invalid data type for distance matrix"); + } + } + + if (mapSetting.count("-gdf") > 0 && !fs::is_directory(help::stripFileNameFromPath(params.gdf))) + { + throw runtime_error("Invalid gdf folder."); + } + + if (params.operation == 2 && !fs::exists(params.clusterInfoPath)) + { + throw runtime_error("Invalid cluster info file path."); + } } parameter parameter::useParameters(map<string, string> &mapSetting) { - parameter params; - if (mapSetting.size() < 1) { - printHelp(); - } - else if (mapSetting.size() > 0) - { - params.inPath = help::split(mapSetting.at("-in"), ";"); - params.outputPath = mapSetting.count("-out") > 0 ? mapSetting.at("-out") : params.outputPath; - params.gdf = mapSetting.count("-gdf") > 0 ? mapSetting.at("-gdf") : params.gdf; - params.html = mapSetting.count("-html") > 0 ? mapSetting.at("-html") : params.html; - params.method = mapSetting.count("-m") > 0 ? mapSetting.at("-m") : params.method; - params.w = mapSetting.count("-w") > 0 ? static_cast<float>(stod(mapSetting.at("-w")) / 100.0) : params.w; - params.epsilon = mapSetting.count("-e") > 0 ? static_cast<float>(stod(mapSetting.at("-e"))) : params.epsilon; - params.delta = mapSetting.count("-d") > 0 ? stof(mapSetting.at("-d")) : params.delta; - params.operation = mapSetting.count("-op") > 0 ? static_cast<int>(stoi(mapSetting.at("-op"))) : params.operation; - params.interpolate = mapSetting.count("-i") > 0 ? true : params.interpolate; - params.scoreType = mapSetting.count("-s") > 0 ? std::abs(static_cast<int>(stoi(mapSetting.at("-s")))) : params.scoreType; - params.znormalize = mapSetting.count("-zn") > 0 ? true : false; - params.normalize = mapSetting.count("-n") > 0 ? stoi(mapSetting.at("-n")) : params.normalize; - params.omp = mapSetting.count("-omp") > 0 ? stoi(mapSetting.at("-omp")) : params.omp; - params.precision = mapSetting.count("-p") > 0 ? stoi(mapSetting.at("-p")) : params.precision; - params.reduce = mapSetting.count("-r") > 0 ? mapSetting.at("-r") : params.reduce; - params.paa = mapSetting.count("-paa") > 0 ? stoi(mapSetting.at("-paa")) : params.paa; - params.dmDataType = mapSetting.count("-type") > 0 ? mapSetting.at("-type") : params.dmDataType; - params.pin = mapSetting.count("-pin") > 0 ? true : params.pin; - params.clusterInfoPath = mapSetting.count("-c") > 0 ? mapSetting.at("-c") : ""; - params.smooth = mapSetting.count("-smooth") > 0 ? stoi(mapSetting.at("-smooth")): 0; - params.scoreReversed = mapSetting.count("-rr") > 0 ? true : false; - params.relaxation = mapSetting.count("-relax") > 0 ? stoi(mapSetting.at("-relax")) : 0; - } - - return params; + parameter params; + if (mapSetting.size() < 1) { + printHelp(); + } + else if (mapSetting.size() > 0) + { + params.inPath = help::split(mapSetting.at("-in"), ";"); + params.outputPath = mapSetting.count("-out") > 0 ? mapSetting.at("-out") : params.outputPath; + params.gdf = mapSetting.count("-gdf") > 0 ? mapSetting.at("-gdf") : params.gdf; + params.html = mapSetting.count("-html") > 0 ? mapSetting.at("-html") : params.html; + params.method = mapSetting.count("-m") > 0 ? mapSetting.at("-m") : params.method; + params.w = mapSetting.count("-w") > 0 ? static_cast<float>(stod(mapSetting.at("-w")) / 100.0) : params.w; + params.epsilon = mapSetting.count("-e") > 0 ? static_cast<float>(stod(mapSetting.at("-e"))) : params.epsilon; + params.delta = mapSetting.count("-d") > 0 ? stof(mapSetting.at("-d")) : params.delta; + params.operation = mapSetting.count("-op") > 0 ? static_cast<int>(stoi(mapSetting.at("-op"))) : params.operation; + params.interpolate = mapSetting.count("-i") > 0 ? true : params.interpolate; + params.scoreType = mapSetting.count("-s") > 0 ? std::abs(static_cast<int>(stoi(mapSetting.at("-s")))) : params.scoreType; + params.znormalize = mapSetting.count("-zn") > 0 ? true : false; + params.normalize = mapSetting.count("-n") > 0 ? stoi(mapSetting.at("-n")) : params.normalize; + params.omp = mapSetting.count("-omp") > 0 ? stoi(mapSetting.at("-omp")) : params.omp; + params.precision = mapSetting.count("-p") > 0 ? stoi(mapSetting.at("-p")) : params.precision; + params.reduce = mapSetting.count("-r") > 0 ? stoi(mapSetting.at("-r")) : params.reduce; + params.paa = mapSetting.count("-paa") > 0 ? stoi(mapSetting.at("-paa")) : params.paa; + params.dmDataType = mapSetting.count("-type") > 0 ? mapSetting.at("-type") : params.dmDataType; + params.pin = mapSetting.count("-pin") > 0 ? true : params.pin; + params.timeMeasure = mapSetting.count("-time") > 0 ? true : params.timeMeasure; + params.clusterInfoPath = mapSetting.count("-c") > 0 ? mapSetting.at("-c") : ""; + params.smooth = mapSetting.count("-smooth") > 0 ? stoi(mapSetting.at("-smooth")): 0; + params.scoreReversed = mapSetting.count("-rr") > 0 ? true : false; + params.relaxation = mapSetting.count("-relax") > 0 ? stoi(mapSetting.at("-relax")) : 0; + } + + return params; } bool parameter::isZNormalization() const { - return znormalize; + return znormalize; } bool parameter::isOmp() const { - if (omp > 0) - return true; - else - return false; + if (omp > 0) + return true; + else + return false; } -bool parameter::isOutput() const +bool parameter::isWriteOutput() const { - if (outputPath != "") - return true; - else - return false; + if (outputPath != "") + return true; + else + return false; +} + +bool parameter::isPrintOutput() const +{ + if (outputPath != "") + return true; + else + return false; } bool parameter::isPaa() const { - if (paa > 1) - return true; - else - return false; + if (paa > 1) + return true; + else + return false; } bool parameter::isReduce() const { - if (reduce != "") - return true; - else - return false; + if (reduce > 1) + return true; + else + return false; } bool parameter::isGdf() const { - if (gdf != "") - return true; - else - return false; + if (gdf != "") + return true; + else + return false; } bool parameter::isNormalizeZeroOne() const { - if (normalize > 0) - return true; - else - return false; + if (normalize > 0) + return true; + else + return false; } bool parameter::isSmooth() const { - return smooth; + return smooth; } bool parameter::isRatioReversed() const { - return scoreReversed; + return scoreReversed; } bool parameter::isClusterInfo() const { - if (clusterInfoPath != "") - return true; - else - return false; + if (clusterInfoPath != "") + return true; + else + return false; +} + +bool parameter::isHtml() const +{ + if (html != "") + return true; + else + return false; } bool parameter::isPin() const { - return pin; + return pin; } -bool parameter::isHtml() const +bool parameter::isTime() const +{ + return timeMeasure; +} + +void parameter::parameterPriority(vtr<string> &args, vtr<string> const &argsPriority) { - if (html != "") - return true; - else - return false; + for (size_t i = 2; i < argsPriority.size(); i++) + { + if (argsPriority[i][0] != '-') + continue; + + bool insert = true; + for (size_t j = 0; j < args.size(); j++) + { + if (args[j][0] != '-') + continue; + + if (argsPriority[i] == args[j]) { + args[j + 1] = argsPriority[i + 1]; + insert = false; + } + } + + if (insert) + { + args.push_back(argsPriority[i]); + + int c = 1; + while (i + c < argsPriority.size() && argsPriority[i + c][0] != '-') + { + args.push_back(argsPriority[i + 1]); + c++; + } + } + } } void parameter::printHelp() { - cout << "I/O options"; - cout << "-in [input folder path, input file path]" << endl; - cout << "-out [output file path]" << endl; - cout << "-gdf [gdf output file path]" << endl; - cout << "-html [html output file path]" << endl; - cout << "-m [dtw,lcss] - Choose method." << endl; - cout << "-w [size] - Set size of warping window." << endl; - cout << "-e [size] - Set parameter epsilon for method lcss." << endl; - cout << "-d [size] - Set parameter delta for method lcss." << endl; - cout << "RESULT options" << endl; - cout << "-s [score calculation] - different score calculation (default: -s 1)" << endl; - cout << "-s 1 = sqrt(rawScore) / |path|" << endl; - cout << "-s 2 = (|A|+|B|) / (|newA|+|newB|)" << endl; - cout << "-s 3 = sqrt(ratioRaw)" << endl; - cout << "-s 4 = sqrt(dtw(A,B)) / sqrt(dtvMax(A,B))" << endl; - cout << "-s 5 = sqrt(dtw(A,B)) / sqrt(dtvMax(A,B)) * (min(|A|,|B|) / max(|A|,|B|)" << endl; - cout << "-p [precision] - Set floting point precision <0,10> for print." << endl; - cout << "-op [op] - choose operation you want" << endl; - cout << "-op 1 - Similarity matrix between N sequence." << endl; - cout << "-op 2 - Clustering of N sequences." << endl; - cout << "-op 3 - Similarity matrix of dimensions (pair dtw)." << endl; - cout << "-op 4 - Best similarity matrix of dimensions." << endl; - cout << "DATA PREPROCESING options" << endl; - cout << "-n [max value] - Sequence are normalized to <0,1> interval before analysation." << endl; - cout << "-i - Sequence are interpolated before analysation." << endl; - cout << "-r [skips] - skips = 2 will skip every 2. element in sequence (1/2). skip = 23 will skip every 2. element in first cycle and every 3. in second cycle (2/6)." << endl; - cout << "-paa [size - number of neighbour elements to average] - Piecewise Aggregate Approximation - Scales down size of input sequence by averaging neighbour sequence elements." << endl; - cout << "-smooth [size] - sequence are smoothed by moveing window." << endl; - cout << "OPTIMIZATION options" << endl; - cout << "-omp [threads] - Use parallel version of operaions." << endl; - cout << "-type [type] - Data type which is used for distance matrix (int, float, double, byte). CAN CRASH if type is too small."; + cout << "I/O options"; + cout << "-in [input folder path, input file path]" << endl; + cout << "-out [output file path]" << endl; + cout << "-gdf [gdf output file path]" << endl; + cout << "-html [html output file path]" << endl; + cout << "-m [dtw,lcss] - Choose method." << endl; + cout << "-w [size] - Set size of warping window." << endl; + cout << "-e [size] - Set parameter epsilon for method lcss." << endl; + cout << "-d [size] - Set parameter delta for method lcss." << endl; + cout << "RESULT options" << endl; + cout << "-s [score calculation] - different score calculation (default: -s 1)" << endl; + cout << "-s 1 = sqrt(rawScore) / |path|" << endl; + cout << "-s 2 = (|A|+|B|) / (|newA|+|newB|)" << endl; + cout << "-s 3 = sqrt(ratioRaw)" << endl; + cout << "-s 4 = sqrt(dtw(A,B)) / sqrt(dtvMax(A,B))" << endl; + cout << "-s 5 = sqrt(dtw(A,B)) / sqrt(dtvMax(A,B)) * (min(|A|,|B|) / max(|A|,|B|)" << endl; + cout << "-p [precision] - Set floting point precision <0,10> for print." << endl; + cout << "-op [op] - choose operation you want" << endl; + cout << "-op 1 - Similarity matrix between N sequence." << endl; + cout << "-op 2 - Clustering of N sequences." << endl; + cout << "-op 3 - Similarity matrix of dimensions (pair dtw)." << endl; + cout << "-op 4 - Best similarity matrix of dimensions." << endl; + cout << "DATA PREPROCESING options" << endl; + cout << "-n [max value] - Sequence are normalized to <0,1> interval before analysation." << endl; + cout << "-i - Sequence are interpolated before analysation." << endl; + cout << "-r [skips] - skips = 2 will skip every 2. element in sequence (1/2). skip = 23 will skip every 2. element in first cycle and every 3. in second cycle (2/6)." << endl; + cout << "-paa [size - number of neighbour elements to average] - Piecewise Aggregate Approximation - Scales down size of input sequence by averaging neighbour sequence elements." << endl; + cout << "-smooth [size] - sequence are smoothed by moveing window." << endl; + cout << "OPTIMIZATION options" << endl; + cout << "-omp [threads] - Use parallel version of operaions." << endl; + cout << "-type [type] - Data type which is used for distance matrix (int, float, double, byte). CAN CRASH if type is too small."; } \ No newline at end of file diff --git a/SequenceComparison/parameter.h b/SequenceComparison/parameter.h index 198820a727b70876cc2f934199fbd573dcf42fe4..41434261dd5beabd060d2724eb98768cbe148991 100644 --- a/SequenceComparison/parameter.h +++ b/SequenceComparison/parameter.h @@ -9,75 +9,79 @@ class parameter { public: - std::string method; //method - int operation; //number of operation + std::string method; //method + int operation; //number of operation - double w; //warping windows (dtw) + double w; //warping windows (dtw) double delta; //time dilation - lcss double epsilon; //size difference - lcss - int relaxation; //size of relaxation (relaxation of endpoints) - - // I/O - vtr<std::string> inPath; //input paths - std::string outputPath; //outpath path for log - std::string gdf; //path of gdf file generation - std::string clusterInfoPath; //path of cluster info file - std::string html; + int relaxation; //size of relaxation (relaxation of endpoints) + + // I/O + vtr<std::string> inPath; //input paths + std::string outputPath; //outpath path for log + std::string gdf; //path of gdf file generation + std::string clusterInfoPath; //path of cluster info file + std::string html; - //result options - int scoreType; //returned value form methods is raw score from end of the distance matrix - int precision; //number print precision - bool scoreReversed; //falg if score shoul be reversed from 0,1 to 1,0 - std::string dmDataType; //data type of nodes in distance matrix + //result options + int scoreType; //returned value form methods is raw score from end of the distance matrix + int precision; //number print precision + bool scoreReversed; //falg if score shoul be reversed from 0,1 to 1,0 + std::string dmDataType; //data type of nodes in distance matrix - //data preprocessing options - std::string reduce; //reduction of input data by omiting elements from sequence -r [2-every second, 3-every hird elm removed] - bool interpolate; //parameter -i will interpolate input s to same length - bool znormalize; //parameter -n will normalize input data - int normalize; //parameter -zn will normalize data into interval <0,1> - int smooth; //smooth sequence by moving window averages - int paa; //piecewise aggregate approximation - -paa [number of elements to be averaged] + //data preprocessing options + int reduce; //reduction of input data by omiting elements from sequence -r [2 - takes every second, 3 - every third elm removed] + bool interpolate; //parameter -i will interpolate input s to same length + bool znormalize; //parameter -n will normalize input data + int normalize; //parameter -zn will normalize data into interval <0,1> + int smooth; //smooth sequence by moving window averages + int paa; //piecewise aggregate approximation - -paa [number of elements to be averaged] - bool pin; //prints input before and after prerpocessing step (for check purposes) + bool pin; //prints input before and after prerpocessing step (for check purposes) - //paralelisation options - int omp; //set number of threads - - //time stamp when choosen method started //not used currently - std::chrono::steady_clock::time_point startStamp; - - //default init - parameter() : - method("dtw"), operation(0), scoreType(1), omp(0), dmDataType("double"), scoreReversed(false), - w(1), delta(10), epsilon(0), relaxation(0), - gdf(""), outputPath(""), clusterInfoPath(""), html(""), - normalize(-1), znormalize(false), interpolate(false), precision(3), reduce(""), paa(1), smooth(0), - pin(false) - {}; + //paralelisation options + int omp; //set number of threads + + //time stamp when choosen method started //not used currently + std::chrono::steady_clock::time_point startStamp; + bool timeMeasure; + + //default init + parameter() : + method("dtw"), operation(0), scoreType(1), omp(0), dmDataType("double"), scoreReversed(false), + w(1), delta(10), epsilon(0), relaxation(0), + gdf(""), outputPath(""), clusterInfoPath(""), html(""), + normalize(-1), znormalize(false), interpolate(false), precision(3), reduce(0), paa(1), smooth(0), + pin(false), timeMeasure(false) + {}; - ~parameter() {}; + ~parameter() {}; - static parameter setParameters(vtrS const &args); - static std::map<std::string, std::string> mapParameters(vtrS const &args); - static std::string mapOptionalParameter(vtrS const & args, int idx, std::string param_default); - static parameter useParameters(std::map<std::string, std::string> &mapSetting); - static std::string mapMultiParameter(vtrS const &args, int idx); - static void checkParameters(parameter const ¶ms, std::map<std::string, std::string> const &mapSetting); - static void printHelp(); + static parameter setParameters(vtrS const &args); + static std::map<std::string, std::string> mapParameters(vtrS const &args); + static std::string mapOptionalParameter(vtrS const & args, int idx, std::string param_default); + static parameter useParameters(std::map<std::string, std::string> &mapSetting); + static std::string mapMultiParameter(vtrS const &args, int idx); + static void checkParameters(parameter const ¶ms, std::map<std::string, std::string> const &mapSetting); + static void parameterPriority(vtr<std::string> &args, vtr<std::string> const &argsPriority); + static void printHelp(); - bool isClusterInfo() const; - bool isGdf() const; - bool isOutput() const; - bool isHtml() const; + bool isClusterInfo() const; + bool isGdf() const; + bool isWriteOutput() const; + bool isPrintOutput() const; + bool isHtml() const; - bool isZNormalization() const; - bool isReduce() const; - bool isPaa() const; - bool isNormalizeZeroOne() const; - bool isSmooth()const; + bool isZNormalization() const; + bool isReduce() const; + bool isPaa() const; + bool isNormalizeZeroOne() const; + bool isSmooth()const; - bool isOmp() const; - bool isRatioReversed() const; + bool isOmp() const; + bool isRatioReversed() const; - bool isPin() const; + bool isPin() const; + bool isTime() const; }; \ No newline at end of file