diff --git a/.gitignore b/.gitignore index 89d9b7e8a840196ca9fe3020cea7a59b46c00f4e..9bf0d025a5e94a29c6993680c8629c09a3837da6 100644 --- a/.gitignore +++ b/.gitignore @@ -270,4 +270,4 @@ __pycache__/ /cimg /SequenceComparison/notused /SClinux - +Doxyfile \ No newline at end of file diff --git a/SCwin/SCwin.args.json b/SCwin/SCwin.args.json index b780f9672853adba78649d7da92f32762f604075..04ea95cf36f53fa68680caaf834f37b4ba611079 100644 --- a/SCwin/SCwin.args.json +++ b/SCwin/SCwin.args.json @@ -73,12 +73,12 @@ "Command": "-op 4" }, { - "Id": "fe43848e-0a9a-46c7-aefe-6e02c688e69d", - "Command": "-m lcss" + "Id": "add63608-14a7-4def-9cb1-7278cc343b0c", + "Command": "-op 5" }, { - "Id": "add63608-14a7-4def-9cb1-7278cc343b0c", - "Command": "-op 6" + "Id": "fe43848e-0a9a-46c7-aefe-6e02c688e69d", + "Command": "-m lcss" }, { "Id": "1ff9530d-1d8f-4348-8eb7-96ce5091f7e8", @@ -86,7 +86,7 @@ }, { "Id": "1b30753d-e053-479c-b0cb-ad83a7009c9f", - "Command": "-paa 20" + "Command": "-paa 65" }, { "Id": "7ca22c6e-0b52-4357-9fc7-995b2c0c0164", @@ -94,11 +94,11 @@ }, { "Id": "ffa5ab9f-8524-4479-a79b-dade7c3d1508", - "Command": "-dist 3" + "Command": "-mem" }, { "Id": "cd60865d-7f0b-4ee2-b7e7-b2debb07b834", - "Command": "-tt 0 -ta 0 -te 0" + "Command": "-tt 100 -ta 0 -te 100 -tl 5" }, { "Id": "827df8ff-ff78-4e42-a7ff-23929a9e03a2", @@ -106,7 +106,7 @@ }, { "Id": "83b8ddec-117c-4556-830d-d701ab5d082a", - "Command": "-i" + "Command": "-wd 50 -d 1 1" }, { "Id": "fcaac480-da78-4fb0-89f7-8edc217a986d", @@ -114,7 +114,7 @@ }, { "Id": "35f58106-c0db-41a2-a509-a11d3975cef5", - "Command": "-c c:\\code\\data\\covers80\\cCovers80.txt" + "Command": "-local" }, { "Id": "f08d0acd-19e1-40ec-9180-31b29bd11ad1", @@ -134,7 +134,7 @@ }, { "Id": "eebf3b27-0fdc-4a84-81eb-fec4308c6032", - "Command": "-pr 1" + "Command": "-clu 5" }, { "Id": "c9c3410e-67ed-4f97-9290-8e3533731181", @@ -142,7 +142,7 @@ }, { "Id": "461b3809-ec0b-4a7d-a1c5-10690de7c78d", - "Command": "-omp 4" + "Command": "-omp 2" }, { "Id": "4edd88c1-3930-4569-891e-889c4870eb29", diff --git a/SCwin/main.cpp b/SCwin/main.cpp index 717c72738dad107f146db343b5247236f9aaa429..f4972c6a1fd3e1dd83a8ad5bd223da52cb05404f 100644 --- a/SCwin/main.cpp +++ b/SCwin/main.cpp @@ -10,7 +10,20 @@ int main(int argc, char **argv) for (int i = 1; i < argc; i++) args.push_back(argv[i]); - mains::main_entry(args); + //args.push_back("-in"); + //args.push_back("c:\\code\\data\\sc\\chord"); + //args.push_back("-mem"); + ////args.push_back("-draw"); + ////args.push_back("c:\\code\\data\\sc\\test\\pic1"); + ////args.push_back("-local"); + ////args.push_back("-tl"); + ////args.push_back("50"); + ////args.push_back("-clu"); + ////args.push_back("5"); + //args.push_back("-op"); + //args.push_back("0"); + + mains::master(args); return 0; } \ No newline at end of file diff --git a/SequenceComparison/CMakeLists.txt b/SequenceComparison/CMakeLists.txt index 4f4778e308d1e22dd16ce5e659a2eead929a6ab6..62bc5c4136b66934dad3c24a93f21ba1c54311aa 100644 --- a/SequenceComparison/CMakeLists.txt +++ b/SequenceComparison/CMakeLists.txt @@ -7,7 +7,7 @@ if(NOT CMAKE_BUILD_TYPE) set(CMAKE_BUILD_TYPE Release) endif() -set(CMAKE_CXX_FLAGS "-std=c++14 -Wall -pedantic -lstdc++fs -lX11") +set(CMAKE_CXX_FLAGS "-std=c++14 -Wall -pedantic -lstdc++fs -lX11 -m64") set(CMAKE_CXX_FLAGS_RELEASE "-O3") set(CMAKE_CXX_FLAGS_DEBUG "-g") @@ -29,10 +29,44 @@ else() message(STATUS "STATUS: X11 not found") endif() +#if libX11.so lib is in different location on your system -> add path here. +find_library(LIB_X11 NAME libX11.so PATHS + /usr/lib + /usr/lib64 + /usr/local/lib + /usr/local/lib64 +) + +if(NOT LIB_X11) + message(STATUS "ERROR: libX11.so not found") +else() + message("OK: libX11.so found: "${LIB_X11}) +endif() + +#if libstdc++fs.a lib is in different location on your system -> add path here. +find_library(LIB_FS NAME libstdc++fs.a PATHS + /usr/lib/gcc/x86_64-linux-gnu/7.2.0 + /usr/lib/gcc/x86_64-linux-gnu/7.1.0 + /usr/lib/gcc/x86_64-linux-gnu/6.4.0 + /usr/lib/gcc/x86_64-linux-gnu/6.3.0 + /usr/lib/gcc/x86_64-linux-gnu/5.4.0 + /usr/lib + /usr/lib64 + /usr/local/lib + /usr/local/lib64 +) + +if(NOT LIB_FS) + message(STATUS "ERROR: libstdc++fs not found") +else() + message("OK: libstdc++fs found: "${LIB_FS}) +endif() SET(DEPEND_EXT - /usr/lib/x86_64-linux-gnu/libX11.so - /usr/lib/gcc/x86_64-linux-gnu/7.2.0/libstdc++fs.a + ${LIB_X11} + ${LIB_FS} +# /usr/lib/x86_64-linux-gnu/libX11.so +# /usr/lib/gcc/x86_64-linux-gnu/7.2.0/libstdc++fs.a ) add_library(msalib @@ -44,7 +78,9 @@ add_library(msalib operation.cpp dtw.cpp lcss.cpp + pdtw.cpp print.cpp + preprocess.cpp ) add_executable( diff --git a/SequenceComparison/SequenceComparison.vcxitems b/SequenceComparison/SequenceComparison.vcxitems index 7548d0eefa6f217b3e5bd33b1ed08aa138b91927..fd17d99a2596f4e1de4f54efa47dcf39066a0259 100644 --- a/SequenceComparison/SequenceComparison.vcxitems +++ b/SequenceComparison/SequenceComparison.vcxitems @@ -23,6 +23,7 @@ <ClInclude Include="$(MSBuildThisFileDirectory)dtw.h" /> <ClInclude Include="$(MSBuildThisFileDirectory)entrypoint.h" /> <ClInclude Include="$(MSBuildThisFileDirectory)mains.h" /> + <ClInclude Include="$(MSBuildThisFileDirectory)preprocess.h" /> <ClInclude Include="$(MSBuildThisFileDirectory)stdafx.h" /> <ClInclude Include="$(MSBuildThisFileDirectory)structs.h" /> <ClInclude Include="$(MSBuildThisFileDirectory)help.h" /> @@ -49,6 +50,7 @@ <ClCompile Include="$(MSBuildThisFileDirectory)operation.cpp" /> <ClCompile Include="$(MSBuildThisFileDirectory)parameter.cpp" /> <ClCompile Include="$(MSBuildThisFileDirectory)pdtw.cpp" /> + <ClCompile Include="$(MSBuildThisFileDirectory)preprocess.cpp" /> <ClCompile Include="$(MSBuildThisFileDirectory)print.cpp" /> <ClCompile Include="$(MSBuildThisFileDirectory)stdafx.cpp"> <PrecompiledHeader>Create</PrecompiledHeader> diff --git a/SequenceComparison/calcul.cpp b/SequenceComparison/calcul.cpp index 147e6eb1bebfb61f2eaa4c3a7e646e8764b98f73..504f53cf17fc85d8319b3e0dd77fb3d8d5afa936 100644 --- a/SequenceComparison/calcul.cpp +++ b/SequenceComparison/calcul.cpp @@ -82,7 +82,7 @@ double calcul::distance_dtw_simd(vtr<double> const &u, vtr<double> const &v, siz return sum; } -double calcul::distance_dtw_csi_chroma(vtr<double> const &u, vtr<double> const &v, double treshold = 0.07) +double calcul::distance_dtw_csiChroma(vtr<double> const &u, vtr<double> const &v, double treshold = 0.07) { vtr<int> u_filter(u.size()); vtr<int> v_filter(v.size()); @@ -150,7 +150,7 @@ double calcul::distance_dtw_csi_chroma(vtr<double> const &u, vtr<double> const & return result; } -double calcul::distance_dtw_csi_chord(vtr<double> const &u, vtr<double> const &v, vtr<int> const &uKey, vtr<int> const &vKey) +double calcul::distance_dtw_csiChord(vtr<double> const &u, vtr<double> const &v, vtr<int> const &uKey, vtr<int> const &vKey) { int minU = constant::MAX_int; int minV = constant::MAX_int; @@ -183,35 +183,28 @@ double calcul::distance_dtw_csi_chord(vtr<double> const &u, vtr<double> const &v if (idxU > 11) idxU -= 12; if (idxV > 11) idxV -= 12; - auto dist1 = calcul::distance_circleOfFifth(idxU, idxV); //distance between chord roots + auto dist1 = calcul::distance_circleFifth(idxU, idxV); //distance between chord roots + auto dist2 = calcul::distance_circleFifth(uKey[uKey.size() - 1], vKey[vKey.size() - 1]); //dist key roots - auto dist2 = calcul::distance_circleOfFifth(uKey[uKey.size() - 1], vKey[vKey.size() - 1]); //dist key roots - - vtr<bool> pitchU(48); //pitch space of 4 levels (level e is irrelevant) - vtr<bool> pitchV(48); - - pitchU[idxU] = 1; //level a root - pitchU[idxU + 12] = 1; //level b root - pitchU[idxU + 19 % 12] = 1; //level b 2. - pitchU[idxU + 24] = 1; //level c root - pitchU[idxU + 31 % 24] = 1; //level c 3. - - if(idxU > 11) //level c 2. - pitchU[idxU + 27 % 24] = 1; - else - pitchU[idxU + 28 % 24] = 1; - - pitchV[idxV] = 1; //level a root - pitchV[idxV + 12] = 1; //level b root - pitchV[idxV + 19 % 12] = 1; //level b 2. - pitchV[idxV + 24] = 1; //level c root - pitchV[idxV + 31 % 24] = 1; //level c 3. - - if (idxV > 11) //level c 2. - pitchV[idxV + 27 % 24] = 1; - else - pitchV[idxV + 28 % 24] = 1; + auto getPitch = [](int idx) { + vtr<bool> pitch(48); //pitch space of 4 levels (level e is irrelevant) + pitch[idx] = 1; //level a root + pitch[idx + 12] = 1; //level b root + pitch[idx + 19 % 12] = 1; //level b 2. + pitch[idx + 24] = 1; //level c root + pitch[idx + 31 % 24] = 1; //level c 3. + + if (idx > 11) //level c 2. + pitch[idx + 27 % 24] = 1; + else + pitch[idx + 28 % 24] = 1; + + return pitch; + }; + auto pitchU = getPitch(idxU); //pitch space of 4 levels (level e is irrelevant) + auto pitchV = getPitch(idxV); + //minU = int_max; //reinit variables //minV = int_max; //idxU = 0; @@ -310,7 +303,7 @@ double calcul::score_dtw_s5(double ratioRaw, double ratioRawMax, double coeficie return (sqrt(ratioRaw) / sqrt(ratioRawMax)) * coeficient; } -double calcul::score_dtw_max(vtr2<double> const &A, vtr2<double> const &B, coords start, coords end) +double calcul::score_dtw_max(vtr2<double> const &A, vtr2<double> const &B, coord start, coord end) { double min = constant::MAX_double; double max = constant::MIN_double; @@ -609,7 +602,7 @@ double calcul::lb_keogh(vtr2<double> const &A, vtr2<double> const &B, parameter return result; } -double calcul::distance_circleOfFifth(int idx1, int idx2) +double calcul::distance_circleFifth(int idx1, int idx2) { double distance = std::abs(idx1 - idx2); @@ -619,6 +612,23 @@ double calcul::distance_circleOfFifth(int idx1, int idx2) return distance; } +int calcul::dtw_wpassStart(size_t i, int w, double coeficient) +{ + return std::max(1, (int)(ceil((i - 1) * coeficient + 0.0000000001) - w)); +} + +int calcul::dtw_wpassEnd(size_t i, int w, double coeficient, int lenB) +{ + return std::min(lenB + 1, (int)(ceil(i * coeficient) + 1) + w); +} + +bool calcul::dtw_isFlexiblePass(int i, int j, int iPast, int jPast, int w, int d) +{ + //int kd = k - d; +// return std::abs((i * k - i * kd) - (j * k - j * kd)) < w; + return std::abs((i - iPast) - (j - jPast)) < w; +} + double calcul::vtr_mean(vtr<double> const &v) { double sum = 0; @@ -637,4 +647,74 @@ double calcul::vtr_std(vtr<double> const &v) sum += pow(i - mean, 2); return sqrt(sum / v.size()); +} + +template <typename T> +T calcul::vtr_max(vtr<T> const &input) +{ + double max = constant::MIN_double; + + for (auto &i : input) + if (i > max) + max = i; + + return max; +} +template double calcul::vtr_max<double>(vtr<double> const &input); + +template <typename T> +T calcul::vtr_max(vtr2<T> const &input) +{ + double max = constant::MIN_double; + + for (auto &&i : input) + for (auto &&j : i) + if (j > max) + max = j; + + return max; +} +template double calcul::vtr_max<double>(vtr2<double> const &input); + +template <typename T> +T calcul::vtr_max(vtr3<T> const &input) +{ + double max = constant::MIN_double; + + for (auto &i : input) { + auto tmp = vtr_findMax(i); + + if (tmp > max) + max = tmp; + } + + return max; +} + +template <typename T> +T calcul::vtr_min(vtr2<T> const &input) +{ + double min = constant::MAX_double; + + for (auto &&i : input) + for (auto &&j : i) + if (j < min) + min = j; + + return min; +} +template double calcul::vtr_min<double>(vtr2<double> const &input); + +vtr<double> calcul::vtr_mean(vtr2<double> const &serie) +{ + vtr<double> average(serie.size()); + for (size_t i = 0; i < serie.size(); i++) { + double avg = 0; + for (size_t j = 0; j < serie[0].size(); j++) { + avg += serie[i][j]; + } + average[i] = avg / serie[0].size(); + } + + return average; } \ No newline at end of file diff --git a/SequenceComparison/calcul.h b/SequenceComparison/calcul.h index 1cfb63697f64797cb4928910a25bea7a8fa66c33..a98541ee1cb1bc8c8be11b093132c696371b8f1c 100644 --- a/SequenceComparison/calcul.h +++ b/SequenceComparison/calcul.h @@ -12,19 +12,18 @@ public: static double distance_dtw_manhattan(vtr<double> const &u, vtr<double> const &v); static double distance_dtw_many(vtr2<double> const &points); static double distance_dtw_simd(vtr<double> const &u, vtr<double> const &v, size_t simds, size_t rest); - static double distance_dtw_csi_chroma(vtr<double> const &u, vtr<double> const &v, double treshold/* = 0.07*/); - static double distance_dtw_csi_chord(vtr<double> const &u, vtr<double> const &v, vtr<int> const &uKey, vtr<int> const &vKey); - static double distance_circleOfFifth(int idx1, int idx2); - + static double distance_dtw_csiChroma(vtr<double> const &u, vtr<double> const &v, double treshold/* = 0.07*/); + static double distance_dtw_csiChord(vtr<double> const &u, vtr<double> const &v, vtr<int> const &uKey, vtr<int> const &vKey); + static double distance_circleFifth(int idx1, int idx2); static double distance_lcss(vtr<double> const &u, vtr<double> const &v, int idx); - //scores + //score static double score_dtw_s1(double ratioRaw); static double score_dtw_s2(double ratioRaw, size_t pathLength); static double score_dtw_s3(size_t lenA, size_t lenB, size_t lenPath); static double score_dtw_s4(double ratioRaw, double ratioRawMax); static double score_dtw_s5(double ratioRaw, double ratioRawMax, double coeficient); - static double score_dtw_max(vtr2<double> const &A, vtr2<double> const &B, coords start, coords end); + static double score_dtw_max(vtr2<double> const &A, vtr2<double> const &B, coord start, coord end); static double score_lcss_s1(double ratioRaw, size_t pathLength); static double score_lcss_s2(double ratioRaw, size_t maxABLen); @@ -50,9 +49,18 @@ public: //lower bound static double lb_keogh(vtr2<double> const &A, vtr2<double> const &B, parameter const ¶ms); + static int dtw_wpassStart(size_t i, int w, double coeficient); + static int dtw_wpassEnd(size_t i, int w, double coeficient, int lenB); + static bool dtw_isFlexiblePass(int i, int j, int iPast, int jPast, int w, int d); + + template <typename T> static T vtr_max(vtr<T> const &input); + template <typename T> static T vtr_max(vtr2<T> const &input); + template <typename T> static T vtr_max(vtr3<T> const &input); + template <typename T> static T vtr_min(vtr2<T> const &input); + static double vtr_mean(vtr<double> const &v); + static vtr<double> vtr_mean(vtr2<double> const &v); static double vtr_std(vtr<double> const &v); - }; #endif //CALCUL_H \ No newline at end of file diff --git a/SequenceComparison/draw.h b/SequenceComparison/draw.h index 08ed3778e349fc20c7d4a3e276f3b19f56b950ca..d5ca1728be37635a9890dcff12bfc49d797c2a5e 100644 --- a/SequenceComparison/draw.h +++ b/SequenceComparison/draw.h @@ -4,6 +4,7 @@ #include "structs.h" #include "parameter.h" #include "help.h" +#include "calcul.h" #if defined (_MSC_VER) #pragma component(browser, off, references) @@ -43,26 +44,22 @@ public: { CImg<short> tsA; CImg<short> tsB; - - auto maxA = help::vtr_findMax(input.A); - auto maxB = help::vtr_findMax(input.B); - if (input.A[0].size() > 1) - { - tsA = draw::series_bars(input.A, std::max(maxA, maxB)).rotate(90); - tsB = draw::series_bars(input.B, std::max(maxA, maxB)); + auto max = std::max(calcul::vtr_max(input.A), calcul::vtr_max(input.B)); + auto maxMean = std::max(calcul::vtr_max(calcul::vtr_mean(input.A)), calcul::vtr_max(calcul::vtr_mean(input.B))); - //plotS1 = draw::series_dim_mean(input.A, std::max(maxA, maxB)).rotate(90); - //plotS2 = draw::series_dim_mean(input.B, std::max(maxA, maxB)); + if (input.A[0].size() > 1) { + tsA = (draw::draw_serieBars(input.A, max).append(draw::draw_serieMean(input.A, maxMean), 'y')).rotate(90); + tsB = draw::draw_serieBars(input.B, max).append(draw::draw_serieMean(input.B, maxMean), 'y'); } - else - { - tsA = draw::series(input.A, std::max(maxA, maxB)).rotate(90); - tsB = draw::series(input.B, std::max(maxA, maxB)); + else{ + tsA = draw::draw_serie(input.A, max).rotate(90); + tsB = draw::draw_serie(input.B, max); } CImg<short> m2 = result.matrix_noacc; m2 = m2.append(tsB, 'y'); + CImg<short> all; all = result.matrix_acc; all = all.append(tsA); @@ -85,85 +82,67 @@ public: matrix = matrix.append(row, 'y'); } - CImg<short> plot_tsA; - CImg<short> plot_tsB; - auto maxA = help::vtr_findMax(input.A); - auto maxB = help::vtr_findMax(input.B); + CImg<short> serieA; + CImg<short> serieB; + auto max = std::max(calcul::vtr_max(input.A), calcul::vtr_max(input.B)); if (input.A[0].size() > 1) { - plot_tsA = draw::series_bars(input.A, std::max(maxA, maxB)).rotate(90); - plot_tsB = draw::series_bars(input.B, std::max(maxA, maxB)); - - //plotS1 = draw::series_dim_mean(input.A, std::max(maxA, maxB)).rotate(90); - //plotS2 = draw::series_dim_mean(input.B, std::max(maxA, maxB)); + serieA = (draw::draw_serieBars(input.A, max).append(draw::draw_serieMean(input.A, max))).rotate(90); + serieB = draw::draw_serieBars(input.B, max).append(draw::draw_serieMean(input.B, max), 'y'); } else { - plot_tsA = draw::series(input.A, std::max(maxA, maxB)).rotate(90); - plot_tsB = draw::series(input.B, std::max(maxA, maxB)); + serieA = draw::draw_serie(input.A, max).rotate(90); + serieB = draw::draw_serie(input.B, max); } - auto all = matrix.append(plot_tsB, 'y'); - all = plot_tsA.append(all); + auto all = matrix.append(serieB, 'y'); + all = serieA.append(all); all.save(generateName(info, params.drawOut).c_str()); } - static CImg<short> matrix(vtr2<node> const &matrix, vtr<result_path> const &warpings, vtr2<range> const &ranges, parameter const ¶ms) + static CImg<short> draw_combine(vtr2<node> const &matrix, vtr<result_path> const &warpings, parameter const ¶ms) { CImg<short> img((int)matrix[0].size() - 1, (int)matrix.size() - 1, 1, 3, 0); - int lenA = (int)matrix.size() - 1; - int lenB = (int)matrix[0].size() - 1; - - double max = constant::MIN_double; + + const float green[] = { 0, 255, 0 }; - const int w = (int)(lenB * params.w); - double coef = lenB / static_cast<double>(lenA); + draw_matrix(img, matrix, params); + draw_warpings(img, warpings, green); - for (size_t i = 1; i < lenA + 1; i++) // Iterating over rows - { - size_t start = std::max(1, (int)(ceil((i - 1) * coef + 0.0000000001) - w)); - size_t end = std::min(lenB + 1, (int)(ceil(i * coef) + 1) + w); - for (size_t j = start; j < end; j++) - { - if (matrix[i][j].value > max) - max = matrix[i][j].value; - } - } + if (params.drawMin) + draw_minimums(img, matrix); + + return img; + } - vtr2<coords> pathCoords(warpings.size()); - for (int i = 0; i < warpings.size(); i++) - pathCoords[i] = warpings[i].convert_toCoords(); + static void draw_matrix(CImg<short> &img, vtr2<node> const &matrix, parameter const ¶ms) + { + double max = getMax(matrix, params); - const float red[] = { 255, 0, 0 }; - const float teal[] = { 255, 0, 0 }; - const float green[] = { 0, 255, 0 }; - - int count = 0; - for (size_t i = 1; i < matrix.size(); i++) //row//y - { + for (size_t i = 1; i < matrix.size(); i++) { //row//y for (size_t j = 1; j < matrix[i].size(); j++) //coll//x - { + { auto colorRGB = draw::getColor(0, max, matrix[i][j].value); const float color[] = { colorRGB.r, colorRGB.g, colorRGB.b }; - img.draw_point((int)j - 1, (int)i - 1, color); + img.draw_point((int)j - 1, (int)i - 1, color); } } - - for (size_t i = 0; i < pathCoords.size(); i++) - for(size_t j = 0; j < ranges[i].size(); j++) - for (size_t k = ranges[i][j].start; k < ranges[i][j].end; k++) - img.draw_point(pathCoords[i][k].col - 1, pathCoords[i][k].row - 1, green); + } - for (size_t i = 1; i < matrix.size(); i++) //row//y - { - for (size_t j = 1; j < matrix[i].size(); j++) //coll//x + static void draw_minimums(CImg<short> &img, vtr2<node> const &matrix) + { + const float red[] = { 255, 0, 0 }; + + for (size_t i = 1; i < matrix.size(); i++) { //row//y + for (size_t j = 1; j < matrix[i].size(); j++)//coll//x { double current = matrix[i][j].value; - - if (params.drawMin && i + 1 < matrix.size() && j + 1 < matrix[i].size() && + + if (i + 1 < matrix.size() && j + 1 < matrix[i].size() && matrix[i - 1][j].value > current && matrix[i - 1][j + 1].value > current && matrix[i][j - 1].value > current && matrix[i][j + 1].value > current && matrix[i + 1][j - 1].value > current && matrix[i + 1][j].value > current) { @@ -172,70 +151,60 @@ public: } } } - - return img; } - static CImg<short> matrix_min(vtr2<node> const &matrix, parameter const ¶ms) + static void draw_warpings(CImg<short> &img, vtr<result_path> const &warpings, const float *color) { - CImg<short> img((int)matrix[0].size() - 1, (int)matrix.size() - 1, 1, 3, 0); + for (size_t i = 0; i < warpings.size(); i++) + for (size_t j = 0; j < warpings[i].pathCoords.size(); j++) + img.draw_point(warpings[i].pathCoords[j].col - 1, warpings[i].pathCoords[j].row - 1, color); + } + + static double getMax(vtr2<node> const &matrix, parameter const ¶ms) + { + int lenA = (int)matrix.size() - 1; + int lenB = (int)matrix[0].size() - 1; double max = constant::MIN_double; - for (size_t i = 1; i < matrix.size(); i++) // Iterating over rows + const int w = (int)(lenB * params.w); + double coef = lenB / static_cast<double>(lenA); + + for (size_t i = 1; i < lenA + 1; i++) // Iterating over rows { - for (size_t j = 1; j < matrix[i].size(); j++) + const size_t start = calcul::dtw_wpassStart(i, w, coef); + const size_t end = calcul::dtw_wpassEnd(i, w, coef, lenB); + for (size_t j = start; j < end; j++) { if (matrix[i][j].value > max) max = matrix[i][j].value; } } - for (size_t i = 1; i < matrix.size(); i++) //row//y - { - for (size_t j = 1; j < matrix[i].size(); j++) //coll//x - { - double current = matrix[i][j].value; - - auto colorRGB = draw::getColor(0, max, matrix[i][j].value); - const float color[] = { colorRGB.r, colorRGB.g, colorRGB.b }; - const float red[] = { 255, 0, 0 }; - - if (params.drawMin && i + 1 < matrix.size() && j + 1 < matrix[i].size() && - matrix[i - 1][j].value > current && matrix[i - 1][j + 1].value > current && matrix[i][j - 1].value > current && - matrix[i][j + 1].value > current && matrix[i + 1][j - 1].value > current && matrix[i + 1][j].value > current){ - - img.draw_point((int)j - 1, (int)i - 1, red); - } - else - img.draw_point((int)j - 1, (int)i - 1, color); - } - } - - return img; + return max; } - static CImg<short> series(vtr2<double> const &series, double max) + static CImg<short> draw_serie(vtr2<double> const &serie, double max) { - CImg<short> img((int)series.size() /*+ 1*/, 100, 1, 3, 0); - - double mul = 99 / max; + int height = (int)ceil(serie.size() * 0.1); + CImg<short> img((int)serie.size(), height, 1, 3, 0); + double mul = (height - 1) / max; const float color[] = { 255, 0, 0 }; - for (size_t i = 0; i < series.size() - 1; i++) + for (size_t i = 0; i < serie.size() - 1; i++) { - int a = (int)(mul * series[i][0]); - int b = (int)(mul * series[i + 1][0]); + int a = (int)(mul * serie[i][0]); + int b = (int)(mul * serie[i + 1][0]); - img.draw_line((int)i /*+ 1*/, a, (int)i + 1, b, color); + img.draw_line((int)i, a, (int)i + 1, b, color); } return img.mirror('y'); } - static CImg<short> series_bars(vtr2<double> const &series, double max) + static CImg<short> draw_serieBars(vtr2<double> const &series, double max) { - int barSize = 5; + int barSize = (int)ceil(series.size() * 0.01); CImg<short> img((int)series.size(), barSize * (int)series[0].size() , 1, 3, 0); size_t dims = series[0].size(); @@ -247,36 +216,38 @@ public: color mycolor = getColor(0, max, series[j][i]); const float color[] = { mycolor.r, mycolor.g, mycolor.b }; - img.draw_line((int)j, (int)i * barSize, (int)j, (int)i * barSize, color); + img.draw_line((int)j, (int)i * barSize, (int)j, ((int)i + 1) * barSize, color); } } return img; } - static CImg<short> series_mean(vtr2<double> const &series, double max) + static CImg<short> draw_serieMean(vtr2<double> const &serie, double max) { - CImg<short> img((int)series.size(), 100, 1, 3, 0); - - double mul = 99 / max; + int height = (int)ceil(serie.size() * 0.1); + CImg<short> img((int)serie.size(), height, 1, 3, 0); + double mul = (height - 1) / max; // -2 ..means are double... so they overlap with bars + auto means = calcul::vtr_mean(serie); + const float color[] = { 255, 0, 0 }; - for (size_t i = 0; i < series.size() - 1; i++) + for (size_t i = 0; i < serie.size() - 1; i++) { - double mean1 = 0, mean2 = 0; - for (size_t j = 0; j < series[0].size(); j++) - { - mean1 += series[i][j]; - mean2 += series[i + 1][j]; - } - - mean1 = mean1 / series[i].size(); - mean2 = mean2 / series[i + 1].size(); + //double mean1 = 0, mean2 = 0; + //for (size_t j = 0; j < serie[0].size(); j++) + //{ + // mean1 += serie[i][j]; + // mean2 += serie[i + 1][j]; + //} + // + //mean1 = mean1 / serie[i].size(); + //mean2 = mean2 / serie[i + 1].size(); - int a = (int)(mul * mean1); - int b = (int)(mul * mean2); + int y1 = (int)floor(mul * means[i]); + int y2 = (int)floor(mul * means[i + 1]); - img.draw_line((int)i, a, (int)i + 1, b, color); + img.draw_line((int)i, y1, (int)i + 1, y2, color); } return img.mirror('y'); @@ -286,7 +257,6 @@ public: { std::stringstream ss; if (help::pathExists(path)) { - if (help::isFolder(path)) ss << path << "\\" @@ -318,7 +288,6 @@ public: static color getColor(double min, double max, double value) { color c; - double mul = 0; if (max != 0) @@ -368,6 +337,18 @@ public: return c; } + + }; -#endif //DRAW_H \ No newline at end of file +#endif //DRAW_H + +/*vtr2<coord> pathCoords(warpings.size()); +for (int i = 0; i < warpings.size(); i++) +pathCoords[i] = warpings[i].convert_toCoords();*/ + + +/*for (size_t i = 0; i < warpings.size(); i++) +for(size_t j = 0; j < ranges[i].size(); j++) +for (size_t k = ranges[i][j].start; k < ranges[i][j].end; k++) +img.draw_point(warpings[i].pathCoords[k].col - 1, warpings[i].pathCoords[k].row - 1, green);*/ \ No newline at end of file diff --git a/SequenceComparison/dtw.cpp b/SequenceComparison/dtw.cpp index b8a200292b5520aa345870cfc8ba6792725a8d15..af1a8dae1498f10c359b236f8cdd444fb9fc4585 100644 --- a/SequenceComparison/dtw.cpp +++ b/SequenceComparison/dtw.cpp @@ -12,17 +12,14 @@ using namespace std; #undef min #undef max -/* -sumary: entry point function for dtw method -parameters: -input_method - input for the dtw method -input_info - info about input data -parameter - parameter for dtw method -*/ - -vtr<double> dtw::main(input_method const &input, input_info const &info, parameter const ¶ms) +///sumary: entry point function for dtw method +///parameters: +///input_method - input for the dtw method +///input_info - info about input data +///parameter - parameter for dtw method +result_dtw dtw::main(input_method const &input, input_info const &info, parameter const ¶ms) { - vtr<double> result; + result_dtw result; if (params.segmented) result = main_segment(input, info, params); @@ -32,14 +29,12 @@ vtr<double> dtw::main(input_method const &input, input_info const &info, paramet return result; } -/* -sumary: entry point function for dtw method -parameters: -input_method - input for the dtw method -input_info - info about input data -parameter - parameter for dtw method -*/ -vtr<double> dtw::main_pair(input_method const &input, input_info const &info, parameter const ¶ms) +///sumary: entry point function for dtw method +///parameters: +///input_method - input for the dtw method +///input_info - info about input data +///parameter - parameter for dtw method +result_dtw dtw::main_pair(input_method const &input, input_info const &info, parameter const ¶ms) { if ((int)((input.A.size() * input.B.size()) / 131072) > params.ram) //131072 to convert bytes to MB { @@ -58,20 +53,18 @@ vtr<double> dtw::main_pair(input_method const &input, input_info const &info, pa if (params.isRatioReversed()) { for (size_t i = 0; i < result.score.size(); i++) - result.score[params.scoreType - 1] = 1 - result.score[params.scoreType - 1]; + result.score[params.scoreType] = 1 - result.score[params.scoreType]; } - return result.score; + return result; } -/* -sumary: entry point function for dtw method -parameters: -input_method - input for the dtw method -input_info - info about input data -parameter - parameter for dtw method -*/ -vtr<double> dtw::main_segment(input_method const &input, input_info const &info, parameter const ¶ms) +///sumary: entry point function for dtw method +///parameters: +///input_method - input for the dtw method +///input_info - info about input data +///parameter - parameter for dtw method +result_dtw dtw::main_segment(input_method const &input, input_info const &info, parameter const ¶ms) { //segmentig int win = params.segmented + 1; @@ -116,11 +109,9 @@ vtr<double> dtw::main_segment(input_method const &input, input_info const &info, for (size_t i = 0; i < result.size(); i++) for (size_t j = 0; j < result[i].size(); j++) result[i][j].score[j] -= 1; - - return result[0][0].score; } - else - return result[0][0].score; + + return result[0][0]; } /* @@ -140,9 +131,9 @@ result_dtw dtw::configure(input_method const &input, input_info const &info, par else if (params.distance == 2) d.classic = calcul::distance_dtw_manhattan; else if (params.distance == 3) - d.csi_chroma = calcul::distance_dtw_csi_chroma; + d.csi_chroma = calcul::distance_dtw_csiChroma; else if (params.distance == 4) - d.csi_chord = calcul::distance_dtw_csi_chord; + d.csi_chord = calcul::distance_dtw_csiChord; else if (params.distance == 5) d.classic = calcul::distance_dtw_euklid_mean; @@ -166,14 +157,12 @@ result_dtw dtw::configure(input_method const &input, input_info const &info, par return result; } -/* -sumary: entry point function for dtw method -parameters: -input_method - input for the dtw method -input_info - info about input data -DISTANCE - pointer to function used for distance calculations -parameter - parameter for dtw method -*/ +///sumary: entry point function for dtw method +///parameters: +///input_method - input for the dtw method +///input_info - info about input data +///DISTANCE - pointer to function used for distance calculations +///parameter - parameter for dtw method result_dtw dtw::alignment(input_method const &input, input_info const &info, DISTANCE distance, parameter const ¶ms) { auto m = dtw::matrix(input, distance, params); @@ -191,54 +180,69 @@ result_dtw dtw::alignment(input_method const &input, input_info const &info, DIS result_dtw result; if (params.drawOut.size() > 0) { - result.matrix_acc = draw::matrix(m, warping, ranges, params); - result.matrix_noacc = draw::matrix(dtw::matrix_noaccumulation(input, distance, params), warping, ranges, params); - //draw::visualisation<T>(m, dtw::matrix_noaccumulation<T>(input, distance, params), input, warping.path, params.drawOut); - //m, m2, s1, s2, wp, filep + result.matrix_acc = draw::draw_combine(m, warping, params); + result.matrix_noacc = draw::draw_combine(dtw::matrix_noaccumulation(input, distance, params), warping, params); } if (params.isDebugInfo()) { - //cout << endl << print::distanceMatrix(m); + cout << endl << print::distanceMatrix(m); cout << endl << warping[0].path; //cout << endl << print::printPathShape(back.path, end, (int)A.size() + 1, (int)B.size() + 1); //print::write(print::printHtmlDistanceMatrix<T>(m), "c:\\code\\data\\sc\\dm.html", false); } - vtr<double> resultScore; - resultScore.push_back(warping[0].scoreRaw); - resultScore.push_back(calcul::score_dtw_s2(warping[0].scoreRaw, warping[0].path.size())); // back.path.size()); - resultScore.push_back(calcul::score_dtw_s3(warping[0].end.row - warping[0].start.row, warping[0].end.col - warping[0].start.col, warping[0].path.size())); - resultScore.push_back(calcul::score_dtw_s4(warping[0].scoreRaw, calcul::score_dtw_max(input.A, input.B, warping[0].start, warping[0].end))); - resultScore.push_back(calcul::score_dtw_s5(warping[0].scoreRaw, - calcul::score_dtw_max(input.A, input.B, warping[0].start, warping[0].end), - calcul::lenRatio(warping[0].end.row - warping[0].start.row, warping[0].end.col - warping[0].start.row))); - - result.score = resultScore; - result.path = warping[0].path; + //vtr<double> resultScore; + //resultScore.push_back(warping[0].scoreRaw); + //resultScore.push_back(calcul::score_dtw_s2(warping[0].scoreRaw, warping[0].path.size())); // back.path.size()); + //resultScore.push_back(calcul::score_dtw_s3(warping[0].end.row - warping[0].start.row, warping[0].end.col - warping[0].start.col, warping[0].path.size())); + //resultScore.push_back(calcul::score_dtw_s4(warping[0].scoreRaw, calcul::score_dtw_max(input.A, input.B, warping[0].start, warping[0].end))); + //resultScore.push_back(calcul::score_dtw_s5(warping[0].scoreRaw, + // calcul::score_dtw_max(input.A, input.B, warping[0].start, warping[0].end), + // calcul::lenRatio(warping[0].end.row - warping[0].start.row, warping[0].end.col - warping[0].start.row))); + + result.score = score(input, warping); + result.warpings = warping; return result; } +vtr<double> dtw::score(input_method const &input, vtr<result_path> const &warpings) +{ + vtr<double> score(5); + for (auto &i : warpings) { + score[0] += i.scoreRaw; + score[1] += calcul::score_dtw_s2(i.scoreRaw, i.path.size()); // back.path.size()); + score[2] += calcul::score_dtw_s3(i.end.row - i.start.row, i.end.col - i.start.col, i.path.size()); + score[3] += calcul::score_dtw_s4(i.scoreRaw, calcul::score_dtw_max(input.A, input.B, i.start, i.end)); + score[4] += calcul::score_dtw_s5(i.scoreRaw, + calcul::score_dtw_max(input.A, input.B, i.start, i.end), + calcul::lenRatio(i.end.row - i.start.row, i.end.col - i.start.row)); + } + + for (auto &i : score) + i /= warpings.size(); + + return score; +} + result_dtw dtw::alignment_local(input_method const &input, input_info const &info, DISTANCE distance, parameter const ¶ms) { auto m = dtw::matrix_noaccumulation(input, distance, params); - auto minims = dtw::get_minimums(m, params); + auto mMin = dtw::get_minimums(m, params); - for (auto &i : minims) + for (auto &i : mMin) m[i.row][i.col] = 0; - dtw::accumulate_mod(m, minims, params); - - auto warpings = get_warpings(m, minims, params); - auto ranges = filterPaths(m, warpings, params); + dtw::accumulate_mod(m, mMin, params); result_dtw result; + result.warpings = get_warpings(m, mMin, params); + filterPaths(m, result.warpings, params); if (params.drawOut.size() > 0) { - result.matrix_acc = draw::matrix(m, warpings, ranges, params); - result.matrix_noacc = draw::matrix(dtw::matrix_noaccumulation(input, distance, params), warpings, ranges, params); - //draw::visualisation<T>(m, dtw::matrix_noaccumulation<T>(input, distance, params), input, warping.path, params.drawOut); + result.matrix_acc = draw::draw_combine(m, result.warpings/*, ranges*/, params); + result.matrix_noacc = draw::draw_combine(dtw::matrix_noaccumulation(input, distance, params), result.warpings, params); } if (params.isDebugInfo()) { @@ -248,33 +252,28 @@ result_dtw dtw::alignment_local(input_method const &input, input_info const &inf //print::write(print::printHtmlDistanceMatrix<T>(m), "c:\\code\\data\\sc\\dm.html", false); } - vtr<double> resultScore(5); - - for (auto &i : warpings) { - resultScore[0] += i.scoreRaw; - resultScore[1] += calcul::score_dtw_s2(i.scoreRaw, i.path.size()); // back.path.size()); - resultScore[2] += calcul::score_dtw_s3(i.end.row - i.start.row, i.end.col - i.start.col, i.path.size()); - resultScore[3] += calcul::score_dtw_s4(i.scoreRaw, calcul::score_dtw_max(input.A, input.B, i.start, i.end)); - resultScore[4] += calcul::score_dtw_s5(i.scoreRaw, calcul::score_dtw_max(input.A, input.B, i.start, i.end), - calcul::lenRatio(i.end.row - i.start.row, i.end.col - i.start.row)); - } + //result.score = vtr<double>(5); + //for (auto &i : result.warpings) { + // result.score[0] += i.scoreRaw; + // result.score[1] += calcul::score_dtw_s2(i.scoreRaw, i.path.size()); // back.path.size()); + // result.score[2] += calcul::score_dtw_s3(i.end.row - i.start.row, i.end.col - i.start.col, i.path.size()); + // result.score[3] += calcul::score_dtw_s4(i.scoreRaw, calcul::score_dtw_max(input.A, input.B, i.start, i.end)); + // result.score[4] += calcul::score_dtw_s5(i.scoreRaw, calcul::score_dtw_max(input.A, input.B, i.start, i.end), + // calcul::lenRatio(i.end.row - i.start.row, i.end.col - i.start.row)); + //} - for (auto &i : resultScore) - i /= warpings.size(); - - result.score = resultScore; - //result.path = warping.path; + //for (auto &i : result.score) + // i /= result.warpings.size(); + result.score = score(input, result.warpings); return result; } -/* -sumary: entry point function for dtw method -parameters: -input_method - input for the dtw method -DISTANCE - pointer to function used for distance calculations -parameter - parameter for dtw method -*/ +///sumary: entry point function for dtw method +///parameters: +///input_method - input for the dtw method +///DISTANCE - pointer to function used for distance calculations +///parameter - parameter for dtw method vtr2<node> dtw::matrix(input_method const &input, DISTANCE distance, parameter const ¶ms) { int lenA = (int)input.A.size(); @@ -301,12 +300,12 @@ vtr2<node> dtw::matrix(input_method const &input, DISTANCE distance, parameter c m[i][0].value = 0; } + const double coef = lenB / static_cast<double>(lenA); const int w = (int)(lenB * params.w); - double coef = lenB / static_cast<double>(lenA); for (int i = 1; i < lenA + 1; i++) //row - y { - size_t start = max(1, (int)(ceil((i - 1) * coef + 0.0000000001) - w)); - size_t end = min(lenB + 1, (int)(ceil(i * coef) + 1) + w); + const size_t start = calcul::dtw_wpassStart(i, w, coef); + const size_t end = calcul::dtw_wpassEnd(i, w, coef, lenB); for (size_t j = start; j < end; j++) //col - x { const double u = m[i - 1][j].value; @@ -327,10 +326,8 @@ vtr2<node> dtw::matrix(input_method const &input, DISTANCE distance, parameter c return m; } -/* -sumary: -parameters: -*/ +///sumary: +///parameters: vtr2<node> dtw::matrix_noaccumulation(input_method const &input, DISTANCE distance, parameter const ¶ms) { int lenA = (int)input.A.size(); @@ -357,11 +354,12 @@ vtr2<node> dtw::matrix_noaccumulation(input_method const &input, DISTANCE distan m[i][0].value = 0; } + const double coef = lenB / (double)lenA; const int w = (int)(lenB * params.w); for (int i = 1; i < lenA + 1; i++) //row = y { - const size_t start = max(1, (int)(ceil((i - 1) * (lenB / (double)lenA + 0.0000000001)) - w)); - const size_t end = min(lenB + 1, (int)(ceil(i * lenB / (double)lenA) + 1) + w); + const size_t start = calcul::dtw_wpassStart(i, w, coef); + const size_t end = calcul::dtw_wpassEnd(i, w, coef, lenB); for (size_t j = start; j < end; j++) //col = x { if (params.distance == 3) @@ -381,11 +379,12 @@ void dtw::accumulate(vtr2<node> &m, parameter const ¶ms) int lenA = (int)m.size(); int lenB = (int)m[0].size(); + const double coef = lenB / (double)lenA; const int w = (int)(lenB * params.w); for (int i = 1; i < lenA; i++) //row = y { - const size_t start = std::max(1, (int)(ceil((i - 1) * (lenB / (double)lenA + 0.0000000001)) - w)); - const size_t end = std::min(lenB, (int)(ceil(i * lenB / (double)lenA)) + w); + const size_t start = calcul::dtw_wpassStart(i, w, coef); + const size_t end = calcul::dtw_wpassEnd(i, w, coef, lenB); for (size_t j = start; j < end; j++) //col = x { m[i][j].value += std::min({ m[i - 1][j - 1].value, m[i - 1][j].value, m[i][j - 1].value }); @@ -393,19 +392,17 @@ void dtw::accumulate(vtr2<node> &m, parameter const ¶ms) } } -void dtw::accumulate_mod(vtr2<node> &m, vtr<coords> const &minims, parameter const ¶ms) +void dtw::accumulate_mod(vtr2<node> &m, vtr<coord> const &minims, parameter const ¶ms) { int lenA = (int)m.size(); int lenB = (int)m[0].size(); - /*for (auto &i : minims) - m[i.row][i.col] = 0;*/ - + const double coef = lenB / (double)lenA; const int w = (int)(lenB * params.w); for (int i = 1; i < lenA; i++) //row = y { - const size_t start = std::max(1, (int)(ceil((i - 1) * (lenB / (double)lenA + 0.0000000001)) - w)); - const size_t end = std::min(lenB, (int)(ceil(i * lenB / (double)lenA)) + w); + const size_t start = calcul::dtw_wpassStart(i, w, coef); + const size_t end = calcul::dtw_wpassEnd(i, w, coef, lenB); for (size_t j = start; j < end; j++) //col = x { if(m[i][j].value != 0) @@ -414,18 +411,19 @@ void dtw::accumulate_mod(vtr2<node> &m, vtr<coords> const &minims, parameter con } } -vtr<coords> dtw::get_minimums(vtr2<node> const &m, parameter const ¶ms) +vtr<coord> dtw::get_minimums(vtr2<node> const &m, parameter const ¶ms) { - vtr<coords> minims; + vtr<coord> minims; int lenA = (int)m.size(); int lenB = (int)m[0].size(); + const double coef = lenB / (double)lenA; const int w = (int)(lenB * params.w); for (int i = 1; i < lenA; i++) //row = y { - const int start = std::max(1, (int)(ceil((i - 1) * (lenB / (double)lenA + 0.0000000001)) - w)); - const int end = std::min(lenB, (int)(ceil(i * lenB / (double)lenA)) + w); + const int start = calcul::dtw_wpassStart(i, w, coef); + const int end = calcul::dtw_wpassEnd(i, w, coef, lenB); for (int j = start; j < end; j++) //col = x { double current = m[i][j].value; @@ -434,13 +432,13 @@ vtr<coords> dtw::get_minimums(vtr2<node> const &m, parameter const ¶ms) m[i - 1][j].value > current && m[i - 1][j + 1].value > current && m[i][j - 1].value > current && m[i][j + 1].value > current && m[i + 1][j - 1].value > current && m[i + 1][j].value > current) { - minims.push_back(coords(i, j)); + minims.push_back(coord(i, j)); j++; } } } - vtr<coords> filtered; + vtr<coord> filtered; for (size_t i = 0; i < minims.size(); i++) { for (size_t j = i + 1; j < minims.size(); j++) @@ -461,88 +459,92 @@ vtr<coords> dtw::get_minimums(vtr2<node> const &m, parameter const ¶ms) return filtered; } -/* -sumary: -parameters: -vtr2<node<T>> - -i -j -params -*/ -result_path dtw::get_warping(vtr2<node> const &m, coords coord, parameter const ¶ms) +///sumary: +///parameters: +///vtr2<node<T>> - +///i +///j +///params +result_path dtw::get_warping(vtr2<node> const &m, coord coords, parameter const ¶ms) { - result_path wp; - double sizeA = coord.row; - double sizeB = coord.col; - - wp.pathCoords.push_back(coords(coord.row, coord.col)); - - while (coord.row > 0 && coord.col > 0) + result_path warping; + double lenA = coords.row; + double lenB = coords.col; + + const bool flexible = params.isPassFlexible(); + while (coords.row > 0 && coords.col > 0) { - double u = m[coord.row - 1][coord.col].value; - double l = m[coord.row][coord.col - 1].value; - double d = m[coord.row - 1][coord.col - 1].value; + warping.pathCoords.push_back(coord(coords.row, coords.col)); + + double d = 0, u = 0, l = 0; + if (flexible) { + coord past = (int)warping.pathCoords.size() - params.d >= 0 ? warping.pathCoords[warping.pathCoords.size() - params.d] : coord(coords.row, coords.col); + d = calcul::dtw_isFlexiblePass(coords.row - 1, coords.col - 1, past.row, past.col, params.wd, params.d) ? m[coords.row - 1][coords.col - 1].value : constant::MAX_double; + u = calcul::dtw_isFlexiblePass(coords.row - 1, coords.col, past.row, past.col, params.wd, params.d) ? m[coords.row - 1][coords.col].value : constant::MAX_double; + l = calcul::dtw_isFlexiblePass(coords.row, coords.col - 1, past.row, past.col, params.wd, params.d) ? m[coords.row][coords.col - 1].value : constant::MAX_double; + } + else{ + d = m[coords.row - 1][coords.col - 1].value; + u = m[coords.row - 1][coords.col].value; + l = m[coords.row][coords.col - 1].value; + } - if (min({ d, u, l }) == d) - { - wp.path = "M" + wp.path; - coord.row--; - coord.col--; - wp.pathCoords.push_back(coords(coord.row, coord.col)); + if (min({ d, u, l }) == d){ + warping.path = "M" + warping.path; + coords.row--; + coords.col--; } else { - if (l < u) - { - wp.path = "L" + wp.path; - coord.col--; - wp.pathCoords.push_back(coords(coord.row, coord.col)); + if (l < u){ + warping.path = "L" + warping.path; + coords.col--; } - else if (u < l) - { - wp.path = "U" + wp.path; - coord.row--; - - - + else if (u < l){ + warping.path = "U" + warping.path; + coords.row--; } else { - if(sizeA / coord.row > sizeB / coord.col) + if(lenA / coords.row > lenB / coords.col) { - wp.path = "L" + wp.path; - coord.col--; + warping.path = "L" + warping.path; + coords.col--; } - else - { - wp.path = "U" + wp.path; - coord.row--; + else{ + warping.path = "U" + warping.path; + coords.row--; } } } + //warping.pathCoords.push_back(coord(coords.row, coords.col)); } if (!params.isSubsequence() || (params.isSubsequence() && m.size() < m[0].size())) - while (coord.row > params.relax) + while (coords.row > params.relax) { - wp.path = "U" + wp.path; - coord.row--; + warping.path = "U" + warping.path; + coords.row--; + warping.pathCoords.push_back(coord(coords.row, coords.col)); } if (!params.isSubsequence() || (params.isSubsequence() && m[0].size() < m.size())) - while (coord.col > params.relax) + while (coords.col > params.relax) { - wp.path = "L" + wp.path; - coord.col--; + warping.path = "L" + warping.path; + coords.col--; + warping.pathCoords.push_back(coord(coords.row, coords.col)); } - wp.start.row = coord.row; - wp.start.col = coord.col; + warping.start.row = coords.row; // start is upper left...so its where path is finished building(sort of end) + warping.start.col = coords.col; + + std::reverse(warping.pathCoords.begin(), warping.pathCoords.end()); - return wp; + return warping; } -vtr<result_path> dtw::get_warpings(vtr2<node> const &m, vtr<coords> const &minims, parameter const ¶ms) +vtr<result_path> dtw::get_warpings(vtr2<node> const &m, vtr<coord> const &minims, parameter const ¶ms) { vtr<result_path> outPaths; @@ -555,32 +557,29 @@ vtr<result_path> dtw::get_warpings(vtr2<node> const &m, vtr<coords> const &minim return outPaths; } -vtr2<range> dtw::filterPaths(vtr2<node> const &m, vtr<result_path> const &wpaths, parameter const ¶ms) +void dtw::filterPaths(vtr2<node> const &m, vtr<result_path> &warpings, parameter const ¶ms) { - vtr2<range> ranges(wpaths.size()); - map<coords, bool> within2; + vtr2<range> ranges(warpings.size()); - vtr2<coords> wCoord(wpaths.size()); - for (size_t i = 0; i < wpaths.size(); i++) - wCoord[i] = wpaths[i].convert_toCoords(); - - for (int k = 0; k < (int)wpaths.size(); k++) - for (int i = 0; i < (int)wpaths[k].path.size(); i++) + for (int k = 0; k < (int)warpings.size(); k++) + for (int i = 0; i < (int)warpings[k].path.size(); i++) { double cTotal = 0; double max = constant::MIN_double; range rangeTmp = range(-1, 0); bool insert = true; bool accept = false; - for (int j = i; j < (int)wpaths[k].path.size(); j++) + + if ((int)warpings[k].path.size() >= params.treshold_l) { + for (int j = i; j < (int)warpings[k].path.size(); j++) { - double current = m[wCoord[k][j].row][wCoord[k][j].col].value - cTotal; + int len = j - i + 1; + double current = m[warpings[k].pathCoords[j].row][warpings[k].pathCoords[j].col].value - cTotal; cTotal += current; if (max < current) max = current; - - if (max <= params.treshold_e /*&& cTotal <= params.treshold_t*/ && cTotal / (j - i + 1) <= params.treshold_a && (j - i + 1) >= params.treshold_l) + if (max <= params.treshold_e /*&& cTotal <= params.treshold_t*/ && cTotal / len <= params.treshold_a && len >= params.treshold_l) { accept = true; @@ -594,42 +593,45 @@ vtr2<range> dtw::filterPaths(vtr2<node> const &m, vtr<result_path> const &wpaths } } } + } - if (accept) //filtr for excluding subsequences // true subsequences - { - for (auto &i : ranges[k]) - if (i.start <= rangeTmp.start && rangeTmp.end <= i.end) - accept = false; - else if (rangeTmp.start - i.end < 2) { - i.end = rangeTmp.end; - accept = false; - break; - } - } - - if (accept) - ranges[k].push_back(rangeTmp); + if (accept) //filtr for excluding subsequences // true subsequences + { + for (auto &i : ranges[k]) + if (i.start <= rangeTmp.start && rangeTmp.end <= i.end) + accept = false; + else if (rangeTmp.start - i.end < 2) { + i.end = rangeTmp.end; + accept = false; + break; + } } - /*for (int i = 0; i < (int)ranges.size(); i++) + if (accept) + ranges[k].push_back(rangeTmp); + } + + vtr<result_path> result; + for (size_t i = 0; i < warpings.size(); i++) { - for (int j = 0; j < (int)ranges[i].size(); j++) - { - if() + result_path warp; + if (!ranges[i].empty()) { + warp = warpings[i]; + warp.path = warpings[i].path.substr(ranges[i][0].start, ranges[i][0].end - ranges[i][0].start + 1); + warp.pathCoords = vtr<coord>(warpings[i].pathCoords.begin() + ranges[i][0].start, warpings[i].pathCoords.begin() + ranges[i][0].end + 1); + result.push_back(warp); } - }*/ - - return ranges; + } + + warpings = result; } -/* -sumary: -parameters: -*/ -coords dtw::get_relaxedEnds(vtr2<node> const &m, parameter const ¶ms) +///sumary: +///parameters: +coord dtw::get_relaxedEnds(vtr2<node> const &m, parameter const ¶ms) { double min = constant::MAX_double; - coords coordMin; + coord coordMin; int lenA = (int)m.size() - 1; int lenB = (int)m[0].size() - 1; @@ -668,10 +670,8 @@ coords dtw::get_relaxedEnds(vtr2<node> const &m, parameter const ¶ms) return coordMin; } -/* -sumary: -parameters: -*/ +///sumary: +///parameters: result_path dtw::matrix_tiled(vtr2<double> const &A, vtr2<double> const &B, parameter const ¶ms) { vtr2<node> m(A.size() + 1); @@ -699,8 +699,7 @@ result_path dtw::matrix_tiled(vtr2<double> const &A, vtr2<double> const &B, para //const int w = (int)(B.size() * params.w); for (size_t i = 0; i < m.size(); i += block) //row - y { - //size_t start = max(1, (int)(ceil((i - 1) * (static_cast<double>(B.size()) / (double)A.size() + 0.0000000001)) - w)); - //size_t end = min((int)B.size() + 1, (int)(ceil(i * B.size() / (double)A.size()) + 1) + w); + for (size_t j = 0; j < m[0].size(); j += block) //col - x { size_t iis = i + 1; @@ -742,28 +741,18 @@ result_path dtw::matrix_tiled(vtr2<double> const &A, vtr2<double> const &B, para return back; } -/* -sumary: -parameters: -*/ +///sumary: +///parameters: result_path dtw::matrix_memoized(vtr2<double> const &A, vtr2<double> const &B, parameter const ¶ms) { vtr<node2<double>> m1(B.size() + 1); vtr<node2<double>> m2(B.size() + 1); - - //double coef = A.size() / (double)B.size(); - m1[0].value = 0; - //m1[0].path.reserve(A.size() + B.size()); - - string path = ""; - path.reserve(A.size() + B.size()); - //const int w = (int)(B.size() * params.w); - for (size_t i = 1; i < A.size() + 1; i++) //row - y + for (size_t i = 1; i < A.size() + 1; i++) //row y { - //path = ""; - for (size_t j = 1; j < B.size() + 1; j++) //row - y + m2[i].pa.resize(A.size() + B.size()); + for (size_t j = 1; j < B.size() + 1; j++) //col x { const double u = m1[j].value; const double d = m1[j - 1].value; @@ -774,9 +763,10 @@ result_path dtw::matrix_memoized(vtr2<double> const &A, vtr2<double> const &B, p int pathSize = 0; if (min == d){ - //path = std::move(m1[j - 1].path); - //path.push_back('M'); - pathSize = m1[j - 1].pathSize + 1; + m2[j].pa = std::move(m1[j - 1].pa); + m2[j].pa[m2[j].pathSize * 2] = 0; + m2[j].pa[m2[j].pathSize * 2 + 1] = 0; + m2[j].pathSize = m1[j - 1].pathSize + 1; } else if (u < l) to = 1; @@ -802,27 +792,29 @@ result_path dtw::matrix_memoized(vtr2<double> const &A, vtr2<double> const &B, p if(to == 1) { - //path.append(m1[j].path + "U"); - //path.push_back('U'); - pathSize = m1[j].pathSize + 1; + m2[j].pa = m1[j].pa; + m2[j].pa[m2[j].pathSize * 2] = 0; + m2[j].pa[m2[j].pathSize * 2 + 1] = 1; + m2[j].pathSize = m1[j].pathSize + 1; } else if (to == 2) { - //path.append(m2[j - 1].path + "L"); - //path.push_back('L'); - pathSize = m2[j - 1].pathSize + 1; + m2[j].pa = m1[j - 1].pa; + m2[j].pa[m2[j].pathSize * 2] = 1; + m2[j].pa[m2[j].pathSize * 2 + 1] = 0; + m2[j].pathSize = m2[j - 1].pathSize + 1; } - //m2[j].path = std::move(path); - m2[j].pathSize = pathSize; m2[j].value = calcul::distance_dtw_euklid(A[i - 1], B[j - 1]) + min; } + /*m1 = move(m2); + m2.resize(B.size() + 1); + m2[0].value = constant::MAX_double;*/ + m1.swap(m2); m1[0].value = constant::MAX_double; - m2.swap(m1); } result_path back; - //back.path = m1[m2.size() - 1].path; back.scoreRaw = m1[m2.size() - 1].value; back.start.col = 0; back.start.row = 0; @@ -832,10 +824,8 @@ result_path dtw::matrix_memoized(vtr2<double> const &A, vtr2<double> const &B, p return back; } -/* -sumary: -parameters: -*/ +///sumary: +///parameters: result_path dtw::matrix_diagonal(vtr2<double> const &A, vtr2<double> const &B, parameter const ¶ms) { size_t simds = A[0].size() / 4; @@ -981,10 +971,8 @@ result_path dtw::matrix_diagonal(vtr2<double> const &A, vtr2<double> const &B, p return back; } -/* -sumary: -parameters: -*/ +///sumary: +///parameters: result_path dtw::matrix_simd(vtr2<double> const &A, vtr2<double> const &B, parameter const ¶ms) { size_t simds = A[0].size() / 4; @@ -1130,4 +1118,63 @@ result_path dtw::matrix_simd(vtr2<double> const &A, vtr2<double> const &B, param back.end.row = (int)A.size(); return back; -} \ No newline at end of file +} + +vtr2<double> dtw::tserie_merge(vtr2<double> const &A, vtr2<double> const &B, result_path const &warping) +{ + vtr2<double> result; + vtr<double> tmpPoint(A[0].size()); + //warping.convert_toCoords(); + + for (size_t i = 0; i < A[0].size(); i++) + tmpPoint[i] = (A[0][i] + B[0][i]) / 2; + result.push_back(tmpPoint); + + for (size_t i = 1; i < warping.pathCoords.size(); i++) + { + if (warping.pathCoords[i].row - warping.pathCoords[i - 1].row == 0 || warping.pathCoords[i].col - warping.pathCoords[i - 1].col == 0) + continue; + else + for (size_t k = 0; k < A[0].size(); k++) + tmpPoint[k] = (A[warping.pathCoords[i].row - 1][k] + B[warping.pathCoords[i].col - 1][k]) / 2; + + result.push_back(tmpPoint); + } + + return result; +} + +//vtr2<double> dtw::tserie_merge(vtr3<double> const &tset, result_path const &warping) +//{ +// vtr2<double> result; +// warping.convert_toCoords(); +// +// for (size_t i = 0; i < warping.pathCoords.size(); i++) +// { +// +// } +// +// return result; +//} + +//vtr<tspoint> dtw::tserie_merge(vtr<tspoint> const &A, vtr<tspoint> const &B, result_path const &warping) +//{ +// vtr<tspoint> result; +// tspoint tmp(A[0].size()); +// //warping.convert_toCoords(); +// +// tmp = (A[0] + B[0]) / 2; +// result.push_back(tmp); +// +// for (size_t i = 1; i < warping.pathCoords.size(); i++) +// { +// if (warping.pathCoords[i].row - warping.pathCoords[i - 1].row == 0 || warping.pathCoords[i].col - warping.pathCoords[i - 1].col == 0) +// continue; +// else +// tmp = (A[warping.pathCoords[i].row] + B[warping.pathCoords[i].col]) / 2; +// +// result.push_back(tmp); +// } +// +// return result; +//} \ No newline at end of file diff --git a/SequenceComparison/dtw.h b/SequenceComparison/dtw.h index 7523c6ad7df43412a74d5061a1c4b91d0f40c109..9037d6f0deaf97fe8a249360a9dcef841ce81762 100644 --- a/SequenceComparison/dtw.h +++ b/SequenceComparison/dtw.h @@ -9,43 +9,38 @@ class dtw { public: - static vtr<double> main(input_method const &input, input_info const &info, parameter const ¶ms); - static vtr<double> main_pair(input_method const &input, input_info const &info, parameter const ¶ms); - static vtr<double> main_segment(input_method const &input, input_info const &info, parameter const ¶ms); - + static result_dtw main(input_method const &input, input_info const &info, parameter const ¶ms); + static result_dtw main_pair(input_method const &input, input_info const &info, parameter const ¶ms); + static result_dtw main_segment(input_method const &input, input_info const &info, parameter const ¶ms); + static vtr<double> score(input_method const &input, vtr<result_path> const &warpings); static result_dtw configure(input_method const &input, input_info const &info, parameter const ¶ms); static result_dtw alignment(input_method const &input, input_info const &info, DISTANCE distance, parameter const ¶ms); static result_dtw alignment_local(input_method const &input, input_info const &info, DISTANCE distance, parameter const ¶ms); static vtr2<node> matrix(input_method const &input, DISTANCE d, parameter const ¶ms); - static vtr2<node> matrix_noaccumulation(input_method const &input, DISTANCE distance, parameter const ¶ms); - static vtr<coords> get_minimums(vtr2<node> const &m, parameter const ¶ms); - - static void accumulate(vtr2<node> &m, parameter const ¶ms); - - static void accumulate_mod(vtr2<node> &m, vtr<coords> const &minims, parameter const ¶ms); - - static result_path get_warping(vtr2<node> const &m, coords coord, parameter const ¶ms); - - static vtr<result_path> get_warpings(vtr2<node> const &m, vtr<coords> const &minims, parameter const ¶ms); - - static vtr2<range> filterPaths(vtr2<node> const &m, vtr<result_path> const &wpaths, parameter const ¶ms); - - static coords get_relaxedEnds(vtr2<node> const &m, parameter const ¶ms); - - //experimental functions + //experimental matrix calculation static result_path matrix_tiled(vtr2<double> const &A, vtr2<double> const &B, parameter const ¶ms); - static result_path matrix_memoized(vtr2<double> const &A, vtr2<double> const &B, parameter const ¶ms); - static result_path matrix_diagonal(vtr2<double> const &A, vtr2<double> const &B, parameter const ¶ms); - static result_path matrix_simd(vtr2<double> const &A, vtr2<double> const &B, parameter const ¶ms); - + //acumulate matrix from noaccumulation matrix + static void accumulate(vtr2<node> &m, parameter const ¶ms); + static void accumulate_mod(vtr2<node> &m, vtr<coord> const &minims, parameter const ¶ms); + + static result_path get_warping(vtr2<node> const &m, coord coord, parameter const ¶ms); + static vtr<result_path> get_warpings(vtr2<node> const &m, vtr<coord> const &minims, parameter const ¶ms); + static coord get_relaxedEnds(vtr2<node> const &m, parameter const ¶ms); + + static vtr<coord> get_minimums(vtr2<node> const &m, parameter const ¶ms); + static void filterPaths(vtr2<node> const &m, vtr<result_path> &wpaths, parameter const ¶ms); + + static vtr2<double> tserie_merge(vtr2<double> const &A, vtr2<double> const &B, result_path const &warping); + //static vtr<tspoint> tserie_merge(vtr<tspoint> const &A, vtr<tspoint> const &B, result_path const &warping); + //static vtr2<double> tserie_merge(vtr3<double> const &tset, result_path const &warping); }; #endif // DTW_H \ No newline at end of file diff --git a/SequenceComparison/entrypoint.h b/SequenceComparison/entrypoint.h index 6ef9ef8a8bb241ed00a7c2a158b590e01e99f036..61c7cda5cc3ba9e34a8085ed9448f738f0df9cf1 100644 --- a/SequenceComparison/entrypoint.h +++ b/SequenceComparison/entrypoint.h @@ -49,9 +49,9 @@ EXPORT void lib_entrypoint_cmd(char* argv) { std::string strargv(argv); vtr<std::string> args = help::split(strargv, " "); - auto params = parameter::setParameters(args); + //auto params = parameter::setParameters(args); - mains::main_logic(params); + mains::master(args); } EXPORT double lib_entrypoint_pair1d(double* ts1, double* ts2, int len1, int len2, char* argv) @@ -64,9 +64,9 @@ EXPORT double lib_entrypoint_pair1d(double* ts1, double* ts2, int len1, int len2 input[0] = help::convert_arrd(ts1, len1); input[1] = help::convert_arrd(ts2, len2); - auto r = mains::extern_logic(input, args); + auto result = mains::extern_logic(input, args); - return r.scoreMethod[0]; + return result.dtw.score[0]; } EXPORT double lib_entrypoint_pairmd(double* ts1, double* ts2, int len1, int len2, int dims, char* argv) @@ -86,11 +86,11 @@ EXPORT double lib_entrypoint_pairmd(double* ts1, double* ts2, int len1, int len2 std::cout << std::endl; }*/ - auto r = mains::extern_logic(input, args); + auto result = mains::extern_logic(input, args); //std::cout << r.scoreMethod << std::endl; - return r.scoreMethod[0]; + return result.dtw.score[0]; } EXPORT double* lib_entrypoint_arr() diff --git a/SequenceComparison/help.cpp b/SequenceComparison/help.cpp index ccbf1e2ba8a8e671b57787f9a479b49af9ab361a..a7e041ccc9d96662d066c15165006a472d86514b 100644 --- a/SequenceComparison/help.cpp +++ b/SequenceComparison/help.cpp @@ -210,107 +210,6 @@ vtr3<T> help::vtr_initPartial(size_t size1, size_t size2) } template vtr3<double> help::vtr_initPartial<double>(size_t size1, size_t size2); -template <typename T> -T help::vtr_findMax(vtr2<T> const &input) -{ - double max = constant::MIN_double; - - for (auto &&i : input) - for (auto &&j : i) - if (j > max) - max = j; - - return max; -} -template double help::vtr_findMax<double>(vtr2<double> const &input); - -template <typename T> -T help::vtr_findMax(vtr3<T> const &input) -{ - double max = constant::MIN_double; - - for (auto &i : input) { - auto tmp = vtr_findMax(i); - - if (tmp > max) - max = tmp; - } - - return max; -} - -template <typename T> -T help::vtr_findMin(vtr2<T> const &input) -{ - double min = constant::MAX_double; - - for (auto &&i : input) - for (auto &&j : i) - if (j < min) - min = j; - - return min; -} -template double help::vtr_findMin<double>(vtr2<double> const &input); - -void help::normalizeMany(vtr3<double> &input) -{ - for (size_t i = 0; i < input.size(); i++) //dims - { - normalize(input[i]); - } -} - -void help::normalize(vtr2<double> &input) -{ - for (size_t i = 0; i < input[0].size(); i++) //dims - { - double mean = 0; - for (size_t j = 0; j < input.size(); j++) //lenght of sequence - { - mean += input[j][i]; - } - mean = mean / (double)input.size(); - //mean = abs(mean); - - for (size_t j = 0; j < input.size(); j++) //lenght of sequence - { - input[j][i] = input[j][i] / mean; - } - } -} - -void help::normalizeZeroOne(vtr3<double> &input, double max) -{ - for (size_t i = 0; i < input.size(); i++) - { - for (size_t j = 0; j < input[i].size(); j++) - { - for (size_t k = 0; k < input[i][j].size(); k++) - { - input[i][j][k] /= max; - } - } - } -} - -vtr2<double> help::normalize(vtr2<double> const &input, double coef) -{ - vtr2<double> output(input.size()); - - for (size_t i = 0; i < input.size(); i++) //dims - { - vtr<double> el(input[i].size()); - for (size_t j = 0; j < input[i].size(); j++) //lenght of sequence - { - el[j] = input[i][j] * coef; - } - output[i] = el; - } - - return output; -} - vtr3<double> help::separateSequence(vtr3<double> const &input, int size) { vtr3<double> output; @@ -346,12 +245,6 @@ vtr3<double> help::separateSequenceOne(vtr2<double> const &input) return output; } -//void help::reduce(vtr3<double> &input, size_t skip) -//void help::paa(vtr3<double> &input, size_t ratio) -//void help::sax(vtr3<double> &input, size_t numClasses) -//void help::smooth(vtr3<double> &input, size_t width) - - vtr2<double> help::convert_arrd(double* const &series, size_t len) { vtr2<double> out(len); @@ -391,28 +284,6 @@ vtr2<double> help::convert_arr3d(double* const &input, size_t len, size_t dims) return out; } -//vtr<coords> help::convert_toCoords(result_path const &warping) -//{ -// vtr<coords> pathCoords(warping.path.size()); -// -// int row = -1, col = -1; -// for (size_t i = 0; i < warping.path.size(); i++) -// { -// if (warping.path[i] == 'M') { -// row++; -// col++; -// } -// else if (warping.path[i] == 'L') -// col++; -// else if (warping.path[i] == 'U') -// row++; -// -// pathCoords[i] = coords(row + 1, col + 1); //x,y(i,j) -// } -// -// return pathCoords; -//} - void help::sortFollow(vtr<double> &lead, vtr<int> &follow, bool reversed) { for (size_t i = 0; i < lead.size() - 1; i++) @@ -451,268 +322,7 @@ vtr2<T> help::vtr_degrade(vtr3<T> const &input) return v; } -template <typename T> -void help::interpolate(vtr3<T> &input) -{ - int maxLen = -1; - - for (auto &&s : input) - { - if ((int)s.size() > maxLen) - maxLen = (int)s.size(); - } - - for (size_t i = 0; i < input.size(); i++) - { - if (input[i].size() == 1) - { - vtr2<T> row(maxLen); - fill(row.begin(), row.end(), input[i][0]); - input[i] = row; - } - - int diff = maxLen - (int)input[i].size(); - while (diff > 0) - { - vtr2<T> row; - - int c = 0; - for (size_t j = 0; j < input[i].size() && diff > 0; j++) - { - vtr<T> point; - - if (j % 2 == 1) - { - for (size_t k = 0; k < input[i][j].size(); k++) - { - double tmp = (input[i][c - 1][k] + input[i][c][k]) / 2.0; - point.push_back(tmp); - } - - row.push_back(point); - diff--; - } - else - { - row.push_back(input[i][c]); - c++; - } - - } - row.insert(row.end(), input[i].begin() + c, input[i].end()); - input[i] = row; - } - } -} -template void help::interpolate<double>(vtr3<double> &input); - -template <typename T> -void help::interpolate2(vtr3<double> &input) -{ - int maxLen = -1; - - for (auto &&s : input) - { - if ((int)s.size() > maxLen) - maxLen = (int)s.size(); - } - - for (auto &&i : input) - { - int diff = maxLen - (int)i.size(); - - while (diff > 0) - { - int c = 0; - vtr2<T> row; - for (size_t k = 0; k < i.size() - 1 && diff > 0; k++) - { - vtr<T> el; - - if (k % 2 == 1) - { - for (size_t j = 0; j <i[k].size(); j++) - { - T tmp = (i[c - 1][j] + i[c][j]) / 2.0; - el.push_back(tmp); - } - - row.push_back(el); - diff--; - } - else - { - row.push_back(i[c]); - c++; - } - - } - row.insert(row.end(), i.begin() + c, i.end()); - i = row; - } - } -} - -template <typename T> -void help::paa(vtr3<T> &input, size_t ratio) -{ - vtr3<T> output(input.size()); - - for (size_t i = 0; i < input.size(); i++) - { - vtr2<T> s; - for (size_t j = 0; j < input[i].size(); j += ratio)// sequence - { - vtr<T> dim(input[i][j].size()); - const size_t end = j + ratio >= input[i].size() ? input[i].size() : j + ratio; - for (size_t k = 0; k < input[i][j].size(); k++) //all dims - { - double sum = 0; - int merged = 0; - for (size_t l = j; l < end; l++) //sum individual groups of dims - { - sum += input[i][l][k]; - merged++; - } - dim[k] = (T)(sum / merged); - } - s.push_back(dim); - } - output[i] = s; - } - - input = output; -} -template void help::paa<double>(vtr3<double> &input, size_t ratio); -template void help::paa<int>(vtr3<int> &input, size_t ratio); - -void help::sax(vtr3<double> &input, size_t numClasses) -{ - double max = constant::MIN_double; - double min = constant::MAX_double; - for (auto &&i : input) - { - double tmpMax = help::vtr_findMax(i); - if (tmpMax > max) - max = tmpMax; - - double tmpMin = help::vtr_findMin(i); - if (tmpMin < min) - min = tmpMin; - } - - double step = (max - min) / static_cast<double>(numClasses); - - for (auto &&i : input) - { - for (auto &&j : i) - { - for (auto &&k : j) - { - int c = 1; - double stepCount = min; - while (stepCount < max + step) - { - if (k <= min + c * step) - { - k = min + (c - 1) * step + step / 2; - break; - } - stepCount += step; - c++; - } - } - } - } -} - -//Returns shortened input sequence by skiping some of their elements. -template <typename T> -void help::reduce(vtr3<T> &input, size_t skip) -{ - for (auto &&i : input) - { - vtr2<T> row; - for (size_t j = skip - 1; j < i.size(); j += skip) - { - row.push_back(i[j]); - } - i = row; - } -} -template void help::reduce<double>(vtr3<double> &input, size_t skip); -template void help::reduce<int>(vtr3<int> &input, size_t skip); - -template <typename T> -void help::prolong(vtr3<T> &input, size_t times) -{ - for (size_t i = 0; i < input.size(); i++) //every sequence - { - size_t dims = (int)input[0][0].size(); - - for (size_t j = 0; j < times; j++) //times to prolong - { - size_t alloc = 2 * input[i].size() - 1; - - int counter = 0; - vtr2<T> row(alloc); - for (size_t k = 0; k < 2 * input[i].size() - 1; k++) - { - vtr<T> point(dims); - for (size_t l = 0; l < dims; l++) - { - if (k % 2 == 1) - point[l] = (input[i][counter - 1][l] + input[i][counter][l]) / 2; - else - { - point[l] = input[i][counter][l]; - } - } - if (k % 2 != 1) - counter++; - - row[k] = point; - } - input[i] = row; - } - } -} -template void help::prolong<double>(vtr3<double> &input, size_t times); -template void help::prolong<int>(vtr3<int> &input, size_t times); - -//Returns smoothed sekvence by moving window average. -void help::smooth(vtr3<double> &input, size_t width) -{ - vtr3<double> output(input.size()); - - for (size_t i = 0; i < input.size(); i++) - { - const int dims = (int)input[0][0].size(); - - vtr2<double> s; - for (size_t j = 0; j < width - 1; j++) - { - s.push_back(input[i][j]); - } - - for (size_t j = 0; j < input[i].size() - width + 1; j++)// sequence - { - vtr<double> sums(dims); - for (size_t k = 0; k < width; k++) //all dims - { - for (int l = 0; l < dims; l++) - { - sums[l] += static_cast<double>(input[i][j + k][l] / width); - } - } - s.push_back(sums); - } - output[i] = s; - } - - input = output; -} template<typename T> vtr3<T> help::alterStructure(vtr3<T> const &matrix) @@ -757,4 +367,26 @@ template vtr3<int> help::alterStructure(vtr3<int> const &matrix); // } // } // } +//} + +//vtr<coords> help::convert_toCoords(result_path const &warping) +//{ +// vtr<coords> pathCoords(warping.path.size()); +// +// int row = -1, col = -1; +// for (size_t i = 0; i < warping.path.size(); i++) +// { +// if (warping.path[i] == 'M') { +// row++; +// col++; +// } +// else if (warping.path[i] == 'L') +// col++; +// else if (warping.path[i] == 'U') +// row++; +// +// pathCoords[i] = coords(row + 1, col + 1); //x,y(i,j) +// } +// +// return pathCoords; //} \ No newline at end of file diff --git a/SequenceComparison/help.h b/SequenceComparison/help.h index 46449288d5e9098169d2b24a4259fca08d15c7af..fca6bb8e4c5316530e6f057b9035e51b19cbbd6d 100644 --- a/SequenceComparison/help.h +++ b/SequenceComparison/help.h @@ -14,7 +14,6 @@ public: //Checks and fixed string if it contains BOM bytes. static void correctBomLine(std::string &s); - //trim functions static void trimLeft(std::string &s, std::string const &delimiters); static void trimRight(std::string &s, std::string const &delimiters); static void trim(std::string &s, std::string const &delimiters); @@ -29,81 +28,22 @@ public: static int random_int(int min, int max); static double random_real(int min, int max); - static vtr2<double> random_sequence(int len, int dims, int min, int max); - /*NOT USED - static vtr2<double> convertToDouble(vtr2<std::string> const &strInput); - static vtr<double> convertToDouble(vtr<std::string> const &strInput);*/ - static vtr2<double> convert_arrd(double* const &series, size_t len); static vtr2<double> convert_arr2d(double* const &series, size_t len, size_t dims); static vtr2<double> convert_arr3d(double* const &series, size_t len, size_t dims); - //Normalize input sequence. - static void normalizeMany(vtr3<double> &input); - //Normalize 1 input sequence. - static void normalize(vtr2<double> &input); - static vtr2<double> normalize(vtr2<double> const &input, double coef); - //Retruns normalized sequence between <0,1>. - static void normalizeZeroOne(vtr3<double> &input, double max); - //Returns piecewise aggregate approximation of sequence. - //init - template<class T> - static vtr<T> vtr_init(size_t size); - - template<class T> - static vtr2<T> vtr_init(size_t size1, size_t size2); - - template<class T> - static void vtr_init(vtr2<T> &m, size_t size1, size_t size2, T value); - - template<class T> - static vtr3<T> vtr_init(size_t size1, size_t size2, size_t size3); - - template<class T> - static vtr3<T> vtr_initPartial(size_t size1, size_t size2); - - template <typename T> - static T vtr_findMax(vtr2<T> const &input); - - template <typename T> static T vtr_findMax(vtr3<T> const &input); - - template <typename T> - static T vtr_findMin(vtr2<T> const &input); - - template <typename T> - static vtr2<T> vtr_degrade(vtr3<T> const &input); - + template<class T> static vtr<T> vtr_init(size_t size); + template<class T> static vtr2<T> vtr_init(size_t size1, size_t size2); + template<class T> static void vtr_init(vtr2<T> &m, size_t size1, size_t size2, T value); + template<class T> static vtr3<T> vtr_init(size_t size1, size_t size2, size_t size3); + template<class T> static vtr3<T> vtr_initPartial(size_t size1, size_t size2); + + template <typename T> static vtr2<T> vtr_degrade(vtr3<T> const &input); + template <typename T> static vtr3<T> alterStructure(vtr3<T> const &matrix); static void sortFollow(vtr<double> &lead, vtr<int> &follow, bool reversed); - - template <typename T> - static vtr3<T> alterStructure(vtr3<T> const &matrix); - - //static vtr<coords> convert_toCoords(result_path const &warping); - - //Returns input sequence interpolated to same length. - template <typename T> - static void interpolate(vtr3<T> &input); - - template <typename T> - static void interpolate2(vtr3<double> &input); - - template <typename T> - static void paa(vtr3<T> &input, size_t ratio); - - static void sax(vtr3<double> &input, size_t numClasses); - - //Returns shortened input sequence by skiping some of their elements. - template <typename T> - static void reduce(vtr3<T> &input, size_t skip); - - template <typename T> - static void prolong(vtr3<T> &input, size_t times); - - //Returns smoothed sekvence by moving window average. - static void smooth(vtr3<double> &input, size_t width); }; #endif //HELP_H \ No newline at end of file diff --git a/SequenceComparison/lcss.cpp b/SequenceComparison/lcss.cpp index 7d08729d88a0890ac74505a35dd5c1dcefff720c..7a28d6da7d8f2702f7fdd51bf3352a98d11ac2d7 100644 --- a/SequenceComparison/lcss.cpp +++ b/SequenceComparison/lcss.cpp @@ -9,7 +9,7 @@ using namespace std; -vtr<double> lcss::main(input_method const &input, input_info const &info, parameter const ¶ms) +result_dtw lcss::main(input_method const &input, input_info const &info, parameter const ¶ms) { if ((int)((input.A.size() * input.B.size()) / 131072) > params.ram) //131072 to convert bytes to MB { @@ -28,10 +28,10 @@ vtr<double> lcss::main(input_method const &input, input_info const &info, parame if (params.isRatioReversed()) { for (size_t i = 0; i < result.score.size(); i++) - result.score[params.scoreType - 1] = 1 - result.score[params.scoreType - 1]; + result.score[params.scoreType] = 1 - result.score[params.scoreType]; } - return result.score; + return result; } //double lcss::main2(input_method const &input, parameter const ¶ms) @@ -39,9 +39,9 @@ vtr<double> lcss::main(input_method const &input, input_info const &info, parame // auto result = alignment(input, params); // // if (params.isRatioReversed()) // -reversed switch -// return 1 - result[params.scoreType - 1]; +// return 1 - result[params.scoreType]; // else -// return result[params.scoreType - 1]; +// return result[params.scoreType]; //} result_dtw lcss::alignment(input_method const &input, parameter const ¶ms) @@ -63,8 +63,8 @@ result_dtw lcss::alignment(input_method const &input, parameter const ¶ms) result_dtw result; if (params.drawOut.size() > 0) { - result.matrix_acc = draw::matrix(m, warping, ranges, params); - result.matrix_noacc = draw::matrix(lcss::matrix_noaccumulation(input, distance, params), warping, ranges, params); + result.matrix_acc = draw::draw_combine(m, warping, params); + result.matrix_noacc = draw::draw_combine(lcss::matrix_noaccumulation(input, distance, params), warping, params); } result.score.push_back(warping[0].scoreRaw); diff --git a/SequenceComparison/lcss.h b/SequenceComparison/lcss.h index 6cdde4f9f57a543af0e047c7d3e76beca294a178..b9cc36f24b96b19d6d9f9fcf24034b02758b6613 100644 --- a/SequenceComparison/lcss.h +++ b/SequenceComparison/lcss.h @@ -10,20 +10,12 @@ typedef double(*DISTANCE_LCSS)(vtr<double> const &A, vtr<double> const &B, int i class lcss { public: - - //Returns result of lcss function. - //lcss function should always be called by this method. - static vtr<double> main(input_method const &input, input_info const &info, parameter const ¶ms); - //Returns alignment similarity. + static result_dtw main(input_method const &input, input_info const &info, parameter const ¶ms); static result_dtw alignment(input_method const &input, parameter const ¶ms); - //Returns 'distance matrix' for 2 input sequence. static vtr2<node> matrix(input_method const &input, DISTANCE_LCSS d, parameter const ¶ms); static vtr2<node> matrix_noaccumulation(input_method const &input, DISTANCE_LCSS d, parameter const ¶ms); - //Returns 'warping path' generated form distance matrix. static result_path get_warping(vtr2<node> const &m, input_method const &input); - //Log - //static std::string Log(result const &, parameter const &); }; #endif // LCSS_H \ No newline at end of file diff --git a/SequenceComparison/mains.cpp b/SequenceComparison/mains.cpp index b7ffa710f2407b678c3d1d5e877a3c7a1c1faf10..8a4967ea5fdd875650cc5a9b5ab3619a9a13d9ef 100644 --- a/SequenceComparison/mains.cpp +++ b/SequenceComparison/mains.cpp @@ -13,10 +13,61 @@ #include "help.h" #include "calcul.h" #include "draw.h" +#include "preprocess.h" using namespace std; -void mains::main_entry(vtr<string> const &args) +void mains::master(vtr<string> const &args) +{ + /*TContent<int, int, int, int> stru(0, 0, 1, 2); + stru.*/ + + auto params = parseParameters(args); + master_run(params); +} + +vtr<result_operation> mains::master_run(vtr<parameter> const ¶ms) +{ + vtr<result_operation> results; + for (auto p : params) + results.push_back(master_run(p)); + + for (size_t i = 0; i < results.size(); i++) + { + printResult(results[i], params[i]); + } + + return results; +} + +result_operation mains::master_run(parameter const ¶m) +{ + result_time times; + + auto begin = chrono::steady_clock::now(); + auto data = parseData(param); + times.parsing = chrono::duration_cast<chrono::milliseconds>(chrono::steady_clock::now() - begin).count(); + + begin = chrono::steady_clock::now(); + preprocess(data, param); + times.preprocesing = chrono::duration_cast<chrono::milliseconds>(chrono::steady_clock::now() - begin).count(); + + if (param.isDebugInfo()) { + cout << print::input(data.input, 30) << endl; // print for debug comment or delete if not needed + cout << print::input(data.query, 30) << endl; // print for debug comment or delete if not needed + } + + begin = chrono::steady_clock::now(); + auto result = run(data, param); + times.opeartion = chrono::duration_cast<chrono::milliseconds>(chrono::steady_clock::now() - begin).count(); + + result.time = times; + writeResult(data, result, times, param); + + return result; +} + +vtr<parameter> mains::parseParameters(vtr<string> const &args) { //script support vector<string> scripts; @@ -28,42 +79,39 @@ void mains::main_entry(vtr<string> const &args) break; } } - - result_operation result; - auto params = parameter::setParameters(args); + auto param = parameter::setParameters(args); + vtr<parameter> params(scripts.size()); + if (scripts.size() > 0) { for (size_t i = 0; i < scripts.size(); i++) //secondary switches applyed to all script lines ...for more robust behaviour { - auto argsAll = help::split(scripts[i], " \t"); - parameter::applyParameter(argsAll, args); - params = parameter::setParameters(argsAll); - result = main_logic(params); + auto lineArgs = help::split(scripts[i], " \t"); + parameter::applyParameter(lineArgs, args); + params[i] = parameter::setParameters(lineArgs); } } - else - result = main_logic(params); + params.push_back(param); + + return params; } -result_operation mains::main_logic(parameter const ¶ms) -{ - //INPUT DATA PARSING +input_data mains::parseData(parameter const ¶ms) +{ input_data data; - result_time times; - auto begin = chrono::steady_clock::now(); try { data.files.input = parser::getAllFileNames(params.inPath); //file path of all input files data.files.query = parser::getAllFileNames(params.inQuery); //file path of all input files data.files.keyInput = parser::getAllFileNames(params.inKeyInput); //file path of all input files data.files.keyQuery = parser::getAllFileNames(params.inKeyQuery); //file path of all input files data.files.sort(); - + data.input = parser::readData<double>(data.files.input); //parsing if (data.input.size() < 1) throw runtime_error("problem occured when loading input data"); - + if (params.distance == 4) data.keyInput = parser::readData<int>(data.files.keyInput); @@ -77,8 +125,6 @@ result_operation mains::main_logic(parameter const ¶ms) data.keyInput = parser::readData<int>(data.files.keyQuery); } - times.parsing = chrono::duration_cast<chrono::milliseconds>(chrono::steady_clock::now() - begin).count(); - if (params.isClusters()) //parsing clusters file { auto clusterTmp = parser::parseClusters(data.files.query, params.inClusterPath); @@ -106,18 +152,11 @@ result_operation mains::main_logic(parameter const ¶ms) cout << print::inputStats(data.query, 30); } - begin = chrono::steady_clock::now(); - preprocesing(data, params); - times.preprocesing = chrono::duration_cast<chrono::milliseconds>(chrono::steady_clock::now() - begin).count(); - - if (params.isDebugInfo()) { - cout << print::input(data.input, 30) << endl; // print for debug comment or delete if not needed - cout << print::input(data.query, 30) << endl; // print for debug comment or delete if not needed - } - - //RUN METHOD - begin = chrono::steady_clock::now(); + return data; +} +result_operation mains::run(input_data const &data, parameter const ¶ms) +{ result_operation result; if (params.operation < 2 || params.operation == 3 || params.operation == 5) { @@ -127,12 +166,7 @@ result_operation mains::main_logic(parameter const ¶ms) else result = operation::main(data, params); - times.opeartion = chrono::duration_cast<chrono::milliseconds>(chrono::steady_clock::now() - begin).count(); - - - //PRINT REUSULT - mains::printResult(data, result, times, params); - + return result; } @@ -156,7 +190,7 @@ result_operation mains::extern_logic(vtr3<double> const &input, vtr<string> cons data.files.input = inputFiles; begin = chrono::steady_clock::now(); - preprocesing(data, params); + preprocess(data, params); times.preprocesing = chrono::duration_cast<chrono::milliseconds>(chrono::steady_clock::now() - begin).count(); if (params.isDebugInfo()) @@ -182,78 +216,82 @@ result_operation mains::extern_logic(vtr3<double> const &input, vtr<string> cons return result; } -void mains::preprocesing(input_data &data, parameter const& params) +void mains::preprocess(input_data &data, parameter const& params) { //DATA PREPROCESSING if (params.isReduce()) { - help::reduce<double>(data.input, params.reduce); - help::reduce<double>(data.query, params.reduce); - help::reduce<int>(data.keyInput, params.reduce); - help::reduce<int>(data.keyQuery, params.reduce); + preprocess::reduce<double>(data.input, params.reduce); + preprocess::reduce<double>(data.query, params.reduce); + preprocess::reduce<int>(data.keyInput, params.reduce); + preprocess::reduce<int>(data.keyQuery, params.reduce); } if (params.isPaa()) { - help::paa<double>(data.input, params.paa); - help::paa<double>(data.query, params.paa); - help::paa<int>(data.keyInput, params.paa); - help::paa<int>(data.keyQuery, params.paa); + preprocess::paa<double>(data.input, params.paa); + preprocess::paa<double>(data.query, params.paa); + preprocess::paa<int>(data.keyInput, params.paa); + preprocess::paa<int>(data.keyQuery, params.paa); } if (params.isSax()) { - help::sax(data.input, params.sax); - help::sax(data.query, params.sax); + preprocess::sax(data.input, params.sax); + preprocess::sax(data.query, params.sax); } if (params.prolong > 0) { - help::prolong<double>(data.input, params.prolong); - help::prolong<double>(data.query, params.prolong); - help::prolong<int>(data.keyInput, params.prolong); - help::prolong<int>(data.keyQuery, params.prolong); + preprocess::prolong<double>(data.input, params.prolong); + preprocess::prolong<double>(data.query, params.prolong); + preprocess::prolong<int>(data.keyInput, params.prolong); + preprocess::prolong<int>(data.keyQuery, params.prolong); } //if (params.isZNormalization()) // help::normalizeMany(input); if (params.isInterpolate()) { - help::interpolate(data.input); - help::interpolate(data.query); + preprocess::interpolate(data.input); + preprocess::interpolate(data.query); } if (params.isNormalizeZeroOne()) { - help::normalizeZeroOne(data.input, params.normalize); - help::normalizeZeroOne(data.query, params.normalize); + preprocess::normalizeZeroOne(data.input, params.normalize); + preprocess::normalizeZeroOne(data.query, params.normalize); } if (params.isSmooth()) { - help::smooth(data.input, params.smooth); - help::smooth(data.query, params.smooth); + preprocess::smooth(data.input, params.smooth); + preprocess::smooth(data.query, params.smooth); } } -void mains::printResult(input_data const &data, result_operation &result, result_time ×, parameter const ¶ms) +void mains::printResult(result_operation &result, parameter const ¶ms) { - if (!result.scoreMethod.empty()) - cout << print::vector(result.scoreMethod); - - if(!result.matrixSimilarity.empty()) - result.matrixSimilarity = help::alterStructure(result.matrixSimilarity); - //if (!result.matrixCluster.empty()) - // result.matrixCluster = help::alterStructure(result.matrixCluster); + if (!result.dtw.score.empty()) + cout << print::vector(result.dtw.score); if (params.isPrintOutput()) { if (params.isOmp()) - cout << print::matrix(result.matrixSimilarity[params.scoreType - 1], params); + cout << print::matrix(result.matrixSimilarity[params.scoreType], params); if (result.matrixCluster.size() > 0) - cout << print::matrix(result.matrixCluster[params.scoreType - 1], params); + cout << print::matrix(result.matrixCluster[params.scoreType], params); } if (params.operation > 2) cout << print::scores_clustering(result, params.precision); + if (params.isTime()) + cout << print::timeMeasures(result.time, params); + +} + +void mains::writeResult(input_data const &data, result_operation &result, result_time ×, parameter const ¶ms) +{ + /*if (!result.matrixSimilarity.empty()) + result.matrixSimilarity = help::alterStructure(result.matrixSimilarity);*/ + //WRITE LOG FILES - auto begin = chrono::steady_clock::now(); if (params.isWriteOutput()) { - print::write(print::matrix(result.matrixSimilarity[params.scoreType - 1], params), params.outputPath + ".matrix", false); + print::write(print::matrix(result.matrixSimilarity[params.scoreType], params), params.outputPath + ".matrix", false); print::write(print::scores_clustering(result, params.precision), params.outputPath + ".score", false); } @@ -266,50 +304,9 @@ void mains::printResult(input_data const &data, result_operation &result, result } if (params.isGdf()) - print::write(print::gdf(data.files.input, data.input, result.matrixSimilarity[params.scoreType - 1], data.clusters), params.outputPath + ".gdf", false); - - times.write = chrono::duration_cast<chrono::milliseconds>(chrono::steady_clock::now() - begin).count(); - - if (params.isTime()) - cout << print::timeMeasures(times, params); + print::write(print::gdf(data.files.input, data.input, result.matrixSimilarity[params.scoreType], data.clusters), params.outputPath + ".gdf", false); if (params.isWriteOutput()) { print::write(print::timeMeasures(times, params), params.outputPath + ".time", false); } -} - -//for (size_t u = 0; u < 100; u++) -//{ -// for (size_t i = 0; i < 999; i++) -// { -// int min = 1; -// int max = 999; -// int cc = 0; -// map<int, int> isIn; -// for (int j = 0; j < 999; j++) -// { -// std::uniform_int_distribution<int> dis(min, max); -// while (true) { -// int tmp = dis(gen); - -// if (isIn.count(tmp) > 0) -// { -// if (tmp == max) -// while (isIn.count(max) > 0) { -// max--; -// } -// } -// else { -// isIn[tmp] = tmp; -// mx[j][i] = tmp; -// cc++; -// break; -// } -// } -// } -// //cout << cc << endl; -// } - -// auto s = calcul::scoreMap(mx, c); -// cout << s[999] << endl; -//} +} \ No newline at end of file diff --git a/SequenceComparison/mains.h b/SequenceComparison/mains.h index 6e71a550afd58e0b873dbd2e70f2f5ac981a3766..d3173983c41331ac340e140b8925d76ed11d9aae 100644 --- a/SequenceComparison/mains.h +++ b/SequenceComparison/mains.h @@ -7,16 +7,18 @@ class mains { public: - static void main_entry(vtr<std::string> const &args); + static void master(vtr<string> const &args); + static result_operation master_run(parameter const ¶ms); + static vtr<result_operation> master_run(vtr<parameter> const ¶ms); + static vtr<parameter> parseParameters(vtr<std::string> const &args); + static input_data parseData(parameter const ¶ms); + static void preprocess(input_data &data, parameter const ¶ms); + static result_operation run(input_data const &data, parameter const ¶ms); - static result_operation main_logic(parameter const ¶ms); + static void printResult(result_operation &result, parameter const ¶ms); + static void writeResult(input_data const &data, result_operation &result, result_time ×, parameter const ¶ms); static result_operation extern_logic(vtr3<double> const &input, vtr<std::string> const &args); - - static void preprocesing(input_data &data, parameter const ¶ms); - - static void printResult(input_data const &data, result_operation &result, result_time ×, parameter const ¶ms); - }; #endif //MAINS_H \ No newline at end of file diff --git a/SequenceComparison/matrix.cpp b/SequenceComparison/matrix.cpp index 2e8aec689104b776ba009fc3b9eec68e837ac382..bf2e49c69888ebb08236636fb6da8e46ca1cd133 100644 --- a/SequenceComparison/matrix.cpp +++ b/SequenceComparison/matrix.cpp @@ -11,8 +11,7 @@ using namespace std; #undef min #undef max -matrix::matrix() -{} +matrix::matrix(){} matrix::matrix(input_method const &p_input, input_info const &p_info, parameter const &p_params) : input(p_input), info(p_info), params(p_params) {} @@ -48,10 +47,11 @@ void matrix::build() } const int w = (int)(lenB * params.w); + const double coef = lenB / (double)lenA; for (int i = 1; i < lenA + 1; i++) //row = y { - const size_t start = max(1, (int)(ceil((i - 1) * (lenB / (double)lenA + 0.0000000001)) - w)); - const size_t end = min(lenB + 1, (int)(ceil(i * lenB / (double)lenA) + 1) + w); + const size_t start = calcul::dtw_wpassStart(i, w, coef); + const size_t end = calcul::dtw_wpassEnd(i, w, coef, lenB); for (size_t j = start; j < end; j++) //col = x { if (params.distance == 3) @@ -70,10 +70,11 @@ void matrix::build_accumulate() int lenB = (int)m[0].size(); const int w = (int)(lenB * params.w); + const double coef = lenB / (double)lenA; for (int i = 1; i < lenA; i++) //row = y { - const size_t start = max(1, (int)(ceil((i - 1) * (lenB / (double)lenA + 0.0000000001)) - w)); - const size_t end = min(lenB, (int)(ceil(i * lenB / (double)lenA)) + w); + const size_t start = calcul::dtw_wpassStart(i, w, coef); + const size_t end = calcul::dtw_wpassEnd(i, w, coef, lenB); for (size_t j = start; j < end; j++) //col = x { m[i][j] += std::min({ m[i - 1][j - 1], m[i - 1][j], m[i][j - 1] }); @@ -81,18 +82,19 @@ void matrix::build_accumulate() } } -vtr<coords> matrix::get_minimums() +vtr<coord> matrix::get_minimums() { - vtr<coords> minims; + vtr<coord> minims; int lenA = (int)m.size(); int lenB = (int)m[0].size(); const int w = (int)(lenB * params.w); + const double coef = lenB / (double)lenA; for (int i = 1; i < lenA; i++) //row = y { - const int start = max(1, (int)(ceil((i - 1) * (lenB / (double)lenA + 0.0000000001)) - w)); - const int end = min(lenB, (int)(ceil(i * lenB / (double)lenA)) + w); + const int start = calcul::dtw_wpassStart(i, w, coef); + const int end = calcul::dtw_wpassEnd(i, w, coef, lenB); for (int j = start; j < end; j++) //col = x { double current = m[i][j]; @@ -101,7 +103,7 @@ vtr<coords> matrix::get_minimums() m[i - 1][j] > current && m[i - 1][j + 1] > current && m[i][j - 1] > current && m[i][j + 1] > current && m[i + 1][j - 1] > current && m[i + 1][j] > current) { - minims.push_back(coords(i, j)); + minims.push_back(coord(i, j)); } } } @@ -117,7 +119,7 @@ i j params */ -result_path matrix::get_warping(coords &coord) +result_path matrix::get_warping(coord &coord) { result_path wp; double sizeA = coord.row; @@ -198,10 +200,10 @@ vtr<result_path> matrix::get_warpings(vtr<result_path> const &paths) sumary: parameters: */ -coords matrix::get_relaxedEnds() +coord matrix::get_relaxedEnds() { double min = constant::MAX_double; - coords coordMin; + coord coordMin; int lenA = (int)m.size() - 1; int lenB = (int)m[0].size() - 1; diff --git a/SequenceComparison/matrix.h b/SequenceComparison/matrix.h index 66a1a85305e060bc840ffd747939959cd40f2007..ac5bd2fb843d080c752a68cfd8f0b6309e831ca1 100644 --- a/SequenceComparison/matrix.h +++ b/SequenceComparison/matrix.h @@ -18,10 +18,10 @@ public: void build(); void build_accumulate(); - vtr<coords> get_minimums(); - result_path get_warping(coords &coord); + vtr<coord> get_minimums(); + result_path get_warping(coord &coord); vtr<result_path> get_warpings(vtr<result_path> const &paths); - coords get_relaxedEnds(); + coord get_relaxedEnds(); }; #endif //MATRIX_H diff --git a/SequenceComparison/operation.cpp b/SequenceComparison/operation.cpp index 321ba184ec89a042e5d174d70be9eb1b5f5b8817..7ba94443233de2ae4930c4434938cb657e08d43c 100644 --- a/SequenceComparison/operation.cpp +++ b/SequenceComparison/operation.cpp @@ -10,13 +10,14 @@ #include "print.h" #include "pdtw.h" #include "tree.h" +#include "draw.h" using namespace std; result_operation operation::main(input_data_single const &data, parameter const ¶ms) { METHOD f1 = dtw::main; - //FUNCM f2 = pdtw::main; + PDTW f2 = pdtw::main; if (params.method == "lcss") f1 = lcss::main; @@ -34,12 +35,18 @@ result_operation operation::main(input_data_single const &data, parameter const case eOperation::op_oneInput: //op 3 result = clustering(data, params, f1); break; + case eOperation::op_experiment: + result = experiment(data, params, f1, f2); + break; default: - cout << ("error: Operation not found"); + cout << ("error: Operation not found!"); exit(0); break; } + if (result.matrixSimilarity.size() > 0) + result.matrixSimilarity = help::alterStructure(result.matrixSimilarity); + return result; } @@ -67,9 +74,65 @@ result_operation operation::main(input_data const &data, parameter const ¶ms break; } + if(result.matrixSimilarity.size() > 0) + result.matrixSimilarity = help::alterStructure(result.matrixSimilarity); + return result; } +result_operation operation::experiment(input_data_single const &data, parameter const ¶ms, METHOD f1, PDTW f2) +{ + auto result = params.shift ? dtw_shift(data, params, f1) : dtw(data, params, f1); + + auto getPaths = [&w = result.dtw.warpings](){ + vtr3<double> paths; + for (size_t i = 0; i < w.size(); i++){ + vtr2<double> seq; + for (size_t j = 1; j < w[i].pathCoords.size(); j++) { + if(w[i].pathCoords[j].row - w[i].pathCoords[j - 1].row == 1 && w[i].pathCoords[j].col - w[i].pathCoords[j - 1].col == 1) + seq.push_back(vtr<double> { 1 });//diag + else if(w[i].pathCoords[j - 1].row - w[i].pathCoords[j].row == 1) + seq.push_back(vtr<double> { 0 });//up + else + seq.push_back(vtr<double> { 2 });//left + } + paths.push_back(seq); + } + return paths; + }; + + auto paths = getPaths(); + auto pdtw = pdtw::main(paths, input_info(0,1), params); + + const float colors[15][3] = {{ 255, 0, 0 }, { 190, 190, 0 }, { 0, 255, 0 }, { 180, 180, 180 }, { 255, 170, 200 }, + { 255, 128, 0 }, { 255, 255, 255 }, { 0, 128, 255 }, { 255, 255, 255 }, { 255, 255, 255 }, + { 255, 255, 0 }, { 0, 0, 255 }, { 0, 255, 255 }, { 255, 0, 255 }, { 0, 155, 155 }}; + + for (auto v : pdtw.clusters) + cout << v.size() << endl; + + auto getIdx = [&c = pdtw.clusters](size_t i) { + for (size_t k = 0; k < c.size(); k++) + for (auto v : c[k]) + if (v == (int)i) { return (int)k % 15; } + return 0; + }; + + for (size_t i = 0; i < result.dtw.warpings.size(); i++) { + int idx = getIdx(i); + for (size_t j = 0; j < result.dtw.warpings[i].pathCoords.size(); j++) { + result.dtw.matrix_noacc.draw_point(result.dtw.warpings[i].pathCoords[j].col - 1, result.dtw.warpings[i].pathCoords[j].row - 1, colors[idx]); + } + } + + result.dtw.matrix_noacc.append(draw::draw_serie(data.input[1], max(calcul::vtr_max(data.input[0]), calcul::vtr_max(data.input[1]))), 'y'); + result.dtw.matrix_noacc = draw::draw_serie(data.input[0], max(calcul::vtr_max(data.input[0]), calcul::vtr_max(data.input[1]))).rotate(90).append(result.dtw.matrix_noacc, 'x'); + result.dtw.matrix_noacc.save(draw::generateName(input_info(0, 1), params.drawOut).c_str()); + + result_operation resultop; + return resultop; +} + result_operation operation::dtw(input_data_single const &data, parameter const ¶ms, METHOD f) { input_method input; @@ -90,7 +153,7 @@ result_operation operation::dtw(input_data_single const &data, parameter const & //} result_operation result; - result.scoreMethod = f(input, info, params); + result.dtw = f(input, info, params); return result; } @@ -98,7 +161,6 @@ result_operation operation::dtw(input_data_single const &data, parameter const & result_operation operation::similarityMatrix(input_data_single const &data, parameter const ¶ms, METHOD f) { vtr3<double> matrix = help::vtr_initPartial<double>(data.input.size(), data.input.size()); - cout << setprecision(params.precision); for (size_t i = 0; i < data.input.size(); i++) { @@ -117,15 +179,15 @@ result_operation operation::similarityMatrix(input_data_single const &data, para info.nameA = data.files.get_inputName(i); info.nameB = data.files.get_inputName(j); - vtr<double> tmp = f(input, info, params); + auto tmp = f(input, info, params); if (params.scoreType < 2) - cout << setw(params.precision + 5) << fixed << tmp[params.scoreType] << " "; + cout << setw(params.precision + 5) << fixed << tmp.score[params.scoreType] << " "; else - cout << setw(params.precision + 3) << fixed << tmp[params.scoreType] << " "; + cout << setw(params.precision + 3) << fixed << tmp.score[params.scoreType] << " "; - matrix[i][j] = tmp; - matrix[j][i] = tmp; + matrix[i][j] = tmp.score; + matrix[j][i] = tmp.score; /*} else break;*/ @@ -167,7 +229,7 @@ result_operation operation::similarityMatrix_omp(input_data_single const &data, input_info info(i, j); - result.matrixSimilarity[i][j] = f(input, info, params); + result.matrixSimilarity[i][j] = f(input, info, params).score; result.matrixSimilarity[j][i] = result.matrixSimilarity[i][j]; } @@ -231,12 +293,12 @@ result_operation operation::similarityMatrix(input_data const &data, parameter c auto tmp = f(input, info, params)/*[params.scoreType]*/; - matrix[i][j] = tmp; + matrix[i][j] = tmp.score; if (params.scoreType < 2) - cout << setw(params.precision + 5) << fixed << tmp[params.scoreType] << " "; + cout << setw(params.precision + 5) << fixed << tmp.score[params.scoreType] << " "; else - cout << setw(params.precision + 3) << fixed << tmp[params.scoreType] << " "; + cout << setw(params.precision + 3) << fixed << tmp.score[params.scoreType] << " "; } cout << endl; } @@ -268,7 +330,7 @@ result_operation operation::similarityMatrix_omp(input_data const &data, paramet input_info info(i,j); auto tmp = f(input, info, params); - matrix[i][j] = tmp; + matrix[i][j] = tmp.score; } result_operation result; @@ -392,7 +454,7 @@ result_operation operation::clustering(input_data const &data, parameter const & if(params.shift) result = similarityMatrix_shift_omp(data, params, f); else - result = similarityMatrix(data, params, f); + result = similarityMatrix_omp(data, params, f); int depth = (int)result.matrixSimilarity[0][0].size(); result.init_clusterMatrix(depth); @@ -459,7 +521,7 @@ result_operation operation::similarityMatrix_shift_omp(input_data_single const & for (size_t j = 0; j <= i; j++) { subInput.input[1] = data.input[j]; - result.matrixSimilarity[i][j] = dtw_shift(subInput, params, f).scoreMethod; + result.matrixSimilarity[i][j] = dtw_shift(subInput, params, f).dtw.score; result.matrixSimilarity[j][i] = result.matrixSimilarity[i][j]; } } @@ -486,19 +548,19 @@ result_operation operation::dtw_shift(input_data_single const &data, parameter c input.B = inputBRotated; input_info info(0, 1); - resultShifted[g] = f(input, info, params); + resultShifted[g] = f(input, info, params).score; } vtr<double> best(resultShifted[0].size(), (!params.scoreReversed) ? constant::MAX_double : 0); for (size_t i = 0; i < resultShifted.size(); i++) { - if (params.scoreReversed ? best[params.scoreType - 1] < resultShifted[i][params.scoreType - 1] : best[params.scoreType - 1] > resultShifted[i][params.scoreType - 1]) + if (params.scoreReversed ? best[params.scoreType] < resultShifted[i][params.scoreType] : best[params.scoreType] > resultShifted[i][params.scoreType]) best = resultShifted[i]; } result_operation result; - result.scoreMethod = best; + result.dtw.score = best; return result; } @@ -518,7 +580,7 @@ result_operation operation::similarityMatrix_shift_omp(input_data const &data, p for (size_t j = 0; j < data.input.size(); j++) { subInput.input[1] = data.input[j]; - simRow[j] = dtw_shift(subInput, params, f).scoreMethod; + simRow[j] = dtw_shift(subInput, params, f).dtw.score; } matrix[i] = simRow; } @@ -553,7 +615,7 @@ result_operation operation::query_omp(input_data const &data, parameter const &p if (distance < best[i]) { - distance = f(input, info, params)[params.scoreType]; + distance = f(input, info, params).score[params.scoreType]; if (distance < best[i]) { @@ -567,7 +629,7 @@ result_operation operation::query_omp(input_data const &data, parameter const &p } else { - double distance = f(input, info, params)[params.scoreType]; + double distance = f(input, info, params).score[params.scoreType]; if (distance < best[i]) { #pragma omp critical diff --git a/SequenceComparison/operation.h b/SequenceComparison/operation.h index f9d2c6b0635d81a157518591af481bb12c791613..d4ed99be766e7f3e77bedfefcaab9f57d471f6d3 100644 --- a/SequenceComparison/operation.h +++ b/SequenceComparison/operation.h @@ -4,17 +4,18 @@ #include "structs.h" #include "parameter.h" -//typedef double(*METHODkk)(input_method const &input, parameter const ¶ms); -typedef vtr<double>(*METHOD)(input_method const &input, input_info const &info, parameter const ¶ms); -typedef double(*PDTW)(vtr3<double> const &input, input_info const &info, parameter const ¶ms); +typedef result_dtw(*METHOD)(input_method const &input, input_info const &info, parameter const ¶ms); +typedef result_pdtw(*PDTW)(vtr3<double> const &input, input_info const &info, parameter const ¶ms); class operation { public: static result_operation main(input_data const &data, parameter const ¶ms); static result_operation main(input_data_single const &data, parameter const ¶ms); - + static result_operation experiment(input_data_single const &data, parameter const ¶ms, METHOD f1, PDTW f2); + static result_operation dtw(input_data_single const &data, parameter const ¶ms, METHOD f); + static result_operation dtw_shift(input_data_single const &data, parameter const ¶ms, METHOD f); //Returns similarity matrix between N sequence. static result_operation similarityMatrix(input_data_single const &data, parameter const ¶ms, METHOD f); @@ -29,15 +30,9 @@ public: static result_operation clustering_shift(input_data_single const &data, parameter const ¶ms, METHOD f); static result_operation clustering_shift(input_data const &data, parameter const ¶ms, METHOD f); - /*static result_operation similarityMatrix_shift(input_data const &data, parameter const ¶ms, METHOD f); - static result_operation similarityMatrix_shift_query(input_data const &data, parameter const ¶ms, METHOD f);*/ - static result_operation similarityMatrix_shift_omp(input_data_single const &data, parameter const ¶ms, METHOD f); static result_operation similarityMatrix_shift_omp(input_data const &data, parameter const ¶ms, METHOD f); - //static vtr<double> similarity_shifts_omp(vtr3<double> const &input, parameter const ¶ms, FUNC f); - static result_operation dtw_shift(input_data_single const &data, parameter const ¶ms, METHOD f); - static result_operation query_omp(input_data const &data, parameter const ¶ms, METHOD f); ////Returns similarity matrix between dimensions of two sequence. diff --git a/SequenceComparison/parameter.cpp b/SequenceComparison/parameter.cpp index 5307c3636298459cfa344cd08699fa6b5f64a3bf..4e8bfde71d7ad8462340a2397de49d3d681345f3 100644 --- a/SequenceComparison/parameter.cpp +++ b/SequenceComparison/parameter.cpp @@ -111,7 +111,7 @@ void parameter::checkParameters(parameter const ¶ms, map<string, string> con throw runtime_error("Operation not found"); } - if(params.scoreType > 5) + if(params.scoreType > 4) throw runtime_error("Invalid argument of parameter score type (-s)."); if (mapSetting.count("-w") > 0 && params.w < 0) @@ -134,7 +134,7 @@ void parameter::checkParameters(parameter const ¶ms, map<string, string> con throw runtime_error("Invalid precision setting"); } - if (mapSetting.count("-s") > 0 && params.scoreType < 1) + if (mapSetting.count("-s") > 0 && params.scoreType < 0) { throw runtime_error("Invalid score type setting"); } @@ -157,7 +157,7 @@ void parameter::checkParameters(parameter const ¶ms, map<string, string> con throw runtime_error("Invalid gdf folder."); }*/ - if ((params.operation > 2 ) && !fs::exists(params.inClusterPath)) + if ((params.operation == 2 || params.operation == 3) && !fs::exists(params.inClusterPath)) { throw runtime_error("Invalid cluster info file path."); } @@ -180,9 +180,9 @@ void parameter::checkParameters(parameter const ¶ms, map<string, string> con void parameter::checkUnknownParameters(vtr<string> const &args) { - vtr<string> switches = {"-m", "-in", "-out", "-gdf", "-reverse", "-pout", "-html", "-w", "-e", "-d", + vtr<string> switches = {"-m", "-in", "-out", "-gdf", "-reverse", "-pout", "-html", "-w", "-wd", "-d", "-e", "-d", "-i", "-n", "-n01", "-omp", "-p", /*"-print",*/ "-smooth", "-gt", "-time", "-paa", "-r", "-s", "-shift", - "-relax", "-sub", "-debug", "-op", "-query", "-recall", "-mem", "-block", "-simd", "-dist", + "-relax", "-sub", "-debug", "-op", "-query", "-recall", "-mem", "-block", "-simd", "-dist", "-clu", "-lb", "-draw", "-pr", "-tcsi", "-tt", "-ta", "-te", "-tl", "-sax", "-in2", "-query2", "-segment", "-local", "-ram", "-exp", "-dmin"}; for (size_t i = 0; i < args.size(); i++) @@ -202,6 +202,10 @@ void parameter::checkUnknownParameters(vtr<string> const &args) } } } + + for (size_t i = 1; i < args.size(); i++) + if(args[i][0] != '-' && args[i - 1][0] != '-') + throw runtime_error("invalid number of arguments for switch"); } parameter parameter::useParameters(map<string, string> &mapSetting) @@ -222,7 +226,7 @@ parameter parameter::useParameters(map<string, string> &mapSetting) params.operation = mapSetting.count("-op") > 0 ? stoi(mapSetting.at("-op")) : params.operation; params.w = mapSetting.count("-w") > 0 ? static_cast<float>(stod(mapSetting.at("-w")) / 100.0) : params.w; params.epsilon = mapSetting.count("-e") > 0 ? static_cast<float>(stod(mapSetting.at("-e"))) : params.epsilon; - params.scoreType = mapSetting.count("-s") > 0 ? std::abs(static_cast<int>(stoi(mapSetting.at("-s")))) : params.scoreType; + params.scoreType = mapSetting.count("-s") > 0 ? std::abs(static_cast<int>(stoi(mapSetting.at("-s")) - 1)) : params.scoreType; params.delta = mapSetting.count("-d") > 0 ? stof(mapSetting.at("-d")) : params.delta; params.normalize = mapSetting.count("-n01") > 0 ? stoi(mapSetting.at("-n01")) : params.normalize; params.omp = mapSetting.count("-omp") > 0 ? stoi(mapSetting.at("-omp")) : params.omp; @@ -262,6 +266,9 @@ parameter parameter::useParameters(map<string, string> &mapSetting) params.relax = mapSetting.count("-relax") > 0 ? stoi(mapSetting.at("-relax")) : 0; params.localAlignment = mapSetting.count("-local") > 0 ? true : false; params.shift = mapSetting.count("-shift") > 0 ? true : false; + params.clusters = mapSetting.count("-clu") > 0 ? stoi(mapSetting.at("-clu")) : params.clusters; + params.wd = mapSetting.count("-wd") > 0 ? stoi(mapSetting.at("-wd")) : params.wd; + params.d = mapSetting.count("-d") > 0 ? stoi(mapSetting.at("-d")) + 1 : params.d; //+ 1 for d 0 meant current cell /*if(mapSetting.count("-relax") > 0){ auto relaxVtr = help::split(mapSetting.at("-relax"), ";"); @@ -417,6 +424,14 @@ bool parameter::isSecondaryInput() const return false; } +bool parameter::isPassFlexible() const +{ + if (wd > 0) + return true; + + return false; +} + void parameter::applyParameter(vtr<string> &args, vtr<string> const &argsPriority) { for (size_t i = 2; i < argsPriority.size(); i++) diff --git a/SequenceComparison/parameter.h b/SequenceComparison/parameter.h index 3f1c82d4cd99b1df2ccf3cea43e87551f1bd4693..d6f66771bafd28d504886fbcbad1b596d10cf961 100644 --- a/SequenceComparison/parameter.h +++ b/SequenceComparison/parameter.h @@ -41,11 +41,14 @@ public: int block; //optimization size block for tilting int segmented; //segment by voting experts algorithm int ram; //max ram used for distance matrix + int clusters; //number of clusters to draw; //result options - std::string matrixDataType; //data type of nodes in distance matrix + std::string matrixDataType; //data type of nodes in distance matrix double w; //warping windows (dtw) + int wd; //flexible pass (t kocyan) + int d; //fllexibility param (t kocyan) double delta; //time dilation - lcss double epsilon; //size difference - lcss double subsequence; //use subsequence alignment @@ -74,13 +77,13 @@ public: //paralelisation options int omp; //set number of threads for omp - int mpi; //setnubmber of nodes ?? for mpi NOT NOT IMPLEMENTED + int mpi; //setnubmber of nodes ?? for mpi... NOT IMPLEMENTED parameter() : znormalize(), interpolate(), printOutput(), timeMeasure(), debugInfo(), gdf(), html(), memoization(), simd(), scoreReversed(), lowerBound(), drawMin(), experiment(), shift(), - method("dtw"), operation(0), distance (1), scoreType(1), precision(3), block(0), segmented(0), ram(5000), matrixDataType("double"), - w(1), delta(100000), epsilon(1), subsequence(0), treshold_csi(0.07), treshold_t(10), treshold_a(10), treshold_e(10), + method("dtw"), operation(0), distance (1), scoreType(0), precision(3), block(0), segmented(0), ram(5000), clusters(1), matrixDataType("double"), + w(1), wd(0), d(1), delta(100000), epsilon(1), subsequence(0), treshold_csi(0.07), treshold_t(10), treshold_a(10), treshold_e(10), treshold_l(3), reduce(0), normalize(-1), paa(1), sax(0), prolong(0), smooth(0), recall(10), relax(0), omp(1), mpi(1) @@ -120,6 +123,7 @@ public: bool isSimd() const; bool isSax() const; bool isSecondaryInput() const; + bool isPassFlexible() const; }; diff --git a/SequenceComparison/parser.cpp b/SequenceComparison/parser.cpp index 5a8338de61c44d4600f67f8489324ceff133a912..7f389c49eb68feca4a9449765cddc8485bb7523a 100644 --- a/SequenceComparison/parser.cpp +++ b/SequenceComparison/parser.cpp @@ -96,6 +96,87 @@ vector<string> parser::readFileByLine(string const &path) return input; } +template <typename T> +vtr2<T> parser::parseDataFile(std::string const &filePath) +{ + vtr2<T> input; + vtr<T> element; + + std::ifstream file(filePath); + if (file.fail()) { + throw std::runtime_error("Could not open file"); + } + + std::string line; + std::getline(file, line); + help::correctBomLine(line); //control for BOM file + + auto splits = help::split(line, ":"); + if (splits.size() < 3) + line = splits[splits.size() - 1]; + else + std::cout << "file: " << filePath << " is fishy!" << std::endl; + + help::trim(line, " \n\r\t"); + + auto lineDims = help::split(line, " "); + for (size_t i = 0; i < lineDims.size(); i++) + element.push_back(ato<T>(lineDims[i].c_str())); + + input.push_back(element); + while (std::getline(file, line)) + { + element.clear(); + if (line == "") + continue; + + auto splits = help::split(line, ":"); + + line = splits[splits.size() - 1]; //format 0 0 0 0 or : 0 0 0 0 (snad) + + help::trim(line, " \n\r\t"); + + auto lineDims = help::split(line, " "); + for (size_t i = 0; i < lineDims.size(); i++) + element.push_back(ato<T>(lineDims[i].c_str())); + + input.push_back(element); + } + + while (input.back().size() == 0) + { + input.pop_back(); + } + + file.close(); + + return input; +} + +template <typename T> +vtr3<T> parser::readData(vtr<std::string> const &files) +{ + vtr3<T> input(files.size()); + + #pragma omp parallel for schedule(dynamic, 1) + for (int i = 0; i < (int)files.size(); i++) + { + try + { + input[i] = parseDataFile<T>(files[i]); + } + catch (const std::exception &e) + { + std::cout << "error: " << e.what() << std::endl; + exit(0); + } + } + + return input; +} +template vtr3<int> parser::readData(vtr<std::string> const &files); +template vtr3<double> parser::readData(vtr<std::string> const &files); + vtr2<int> parser::parseIntByLine(vtr<string> const &input) { vtr2<int> parsed(input.size()); diff --git a/SequenceComparison/parser.h b/SequenceComparison/parser.h index 7d04e789fcb9706cce85b3ba2acaf61696b080b0..f53a63123edc132586a53f78759bfb60271492d7 100644 --- a/SequenceComparison/parser.h +++ b/SequenceComparison/parser.h @@ -14,88 +14,10 @@ public: static vtr<std::string> getFolderFileNames(std::string const &folder); static vtr<std::string> readFileByLine(std::string const & path); - template <typename T> - static vtr2<T> parseDataFile(std::string const &filePath) - { - vtr2<T> input; - vtr<T> element; - - std::ifstream file(filePath); - if (file.fail()) { - throw std::runtime_error("Could not open file"); - } - - std::string line; - std::getline(file, line); - help::correctBomLine(line); //control for BOM file - - auto splits = help::split(line, ":"); - if (splits.size() < 3) - line = splits[splits.size() - 1]; - else - std::cout << "file: " << filePath << " is fishy!" << std::endl; - - help::trim(line, " \n\r\t"); - - auto lineDims = help::split(line, " "); - for (size_t i = 0; i < lineDims.size(); i++) - element.push_back(ato<T>(lineDims[i].c_str())); - - input.push_back(element); - while (std::getline(file, line)) - { - element.clear(); - if (line == "") - continue; - - auto splits = help::split(line, ":"); - - line = splits[splits.size() - 1]; //format 0 0 0 0 or : 0 0 0 0 (snad) - - help::trim(line, " \n\r\t"); - - auto lineDims = help::split(line, " "); - for (size_t i = 0; i < lineDims.size(); i++) - element.push_back(ato<T>(lineDims[i].c_str())); - - input.push_back(element); - } - - while (input.back().size() == 0) - { - input.pop_back(); - } - - file.close(); - - return input; - } + template <typename T> static vtr2<T> parseDataFile(std::string const &filePath); //Returns vector of parsed (double) sequences in folders/files. - template <typename T> - static vtr3<T> readData(vtr<std::string> const &files) - { - vtr3<T> input(files.size()); - - #pragma omp parallel for schedule(dynamic, 1) - for (int i = 0; i < (int)files.size(); i++) - { - try - { - input[i] = parseDataFile<T>(files[i]); - } - catch (const std::exception &e) - { - std::cout << "error: " << e.what() << std::endl; - exit(0); - } - } - - return input; - } - - /*static vtr2<double> parseDataFile_double(std::string const &filePath); - static vtr2<int> parseDataFile_int(std::string const &filePath);*/ + template <typename T> static vtr3<T> readData(vtr<std::string> const &files); //Returns 2D vector of parsed lines. static vtr2<int> parseIntByLine(vtr<std::string> const &input); @@ -103,6 +25,8 @@ public: //Returns map of <sequence id, cluster id>. static input_clusters parseClusters(vtr<std::string> const&filenames, std::string path, std::string delims = ""); + /*static vtr2<double> parseDataFile_double(std::string const &filePath); + static vtr2<int> parseDataFile_int(std::string const &filePath);*/ //Returns vector of raw sequence in folder. //static vtr<std::string> readFolder(std::string const &folder); //Returns vector of raw sequence in file. diff --git a/SequenceComparison/pdtw.cpp b/SequenceComparison/pdtw.cpp index de73c22fa591f53698cea7db35915fed3960bfc3..4afe80688a9dcc265cd8a21ef6a7a446c7159f16 100644 --- a/SequenceComparison/pdtw.cpp +++ b/SequenceComparison/pdtw.cpp @@ -5,173 +5,158 @@ #include "help.h" #include "print.h" #include "calcul.h" +#include "operation.h" using namespace std; -double pdtw::main(vtr3<double> const &input, input_info const &info, parameter const ¶ms) +result_pdtw pdtw::main(vtr3<double> const &input, input_info const &info, parameter const ¶ms) { - auto result = alignment(input, info, params); - - cout << result.scoreNorm<< endl; - - return result.scoreNorm; + if (input.empty()) { + cout << "error: pdtw: no input." << endl; + exit(0); + } + + return alignment(input, info, params); } result_pdtw pdtw::alignment(vtr3<double> const &input, input_info const &info, parameter const ¶ms) { - vtr2<int> clusters(input.size()); - vector<closest> tree; - - for (int i = 0; i < (int)input.size(); i++) - { - clusters[i].push_back(i); - } - - auto dm = createDiagMatrix(input, params); - - for (size_t i = 0; i < input.size() - 1; i++) - { - closest p = getShortestDistance(dm); // shortes distance between clusters ...not seq - result_dtw r; - - if (clusters[p.row].size() > 1 || clusters[p.col].size() > 1) // if at least one cluster exist - { - //int /*bestAR = 0, bestBR = 0,*/ bestAN = 0, bestBN = 0; - //double bestMax = numeric_limits<double>::max(); - //double bestMin = numeric_limits<double>::min(); - - for (size_t j = 0; j < clusters[p.row].size(); j++) - { - for (size_t k = 0; k < clusters[p.col].size(); k++) - { - input_method subInput(input[clusters[p.row][j]], input[clusters[p.col][k]]); - - r = dtw::configure(subInput, info, params); - - //if (bestMax < r) // raw ..min - //{ - // bestMax = r; - // bestAN = (int)j; - // bestBN = (int)k; - //} - //if (bestMin > r) // raw ..min - //{ - // bestMin = r; - // bestAR = j; - // bestBR = k; - //} - } - } - - } - else - { - input_method subInput(input[clusters[p.row][0]], input[clusters[p.col][0]]); - r = dtw::configure(subInput, info, params); - } - - tree.push_back(p); - - clusters[p.row].insert(clusters[p.row].end(), clusters[p.col].begin(), clusters[p.col].end()); - - if (i == clusters.size() - 2) - { - p = getShortestDistance(dm); - } - - revalueAndMark(dm, clusters, p); - } - - double sum = 0; - for (size_t i = 0; i < tree.size(); i++) - { - sum += tree[i].value; - } - - result_pdtw r; - r.scoreNorm = sum / tree.size(); - r.tree = tree; - - return r; //calcul::GetMultiRatioDtw(input, map[0]); + //auto inputLocal = input; + vtr2<int> clusters(input.size()); + vtr2<int> clusters2(input.size()); + + vector<coordv> tree; + + /*vtr<int> plusR(input.size()); + vtr<int> plusC(input.size());*/ + + for (int i = 0; i < (int)input.size(); i++) + clusters[i].push_back(i); + + auto dm = matrix_similarity(input, params); + for (size_t i = 0; i < std::max(input.size() - params.clusters, (size_t)1); i++) + { + //cout << i << endl; + coordv pair = getMinDistance(dm, params.scoreType); // shortes distance between clusters ...not seq + + //auto merged = dtw::tserie_merge(inputLocal[pair.row], inputLocal[pair.col], dm[pair.row][pair.col].warpings[0]); + //inputLocal[pair.row] = merged; + //inputLocal.erase(inputLocal.begin() + pair.col); + + clusters[pair.row].insert(clusters[pair.row].end(), clusters[pair.col].begin(), clusters[pair.col].end()); + clusters2.push_back(clusters[pair.row]); + + tree.push_back(coordv(pair.row, pair.col)); + revalueAndMark(dm, clusters, pair, params.scoreType); + //clusters.erase(clusters.begin() + pair.col); + + //plusR[pair.row]++; + //plusC[pair.col]++; + + /*int sum = 0; + for (size_t i = 0; i <= pair.row; i++) + sum += plusR[i]; + + int sum2 = 0; + for (size_t i = pair.col + 1; i < plusC.size(); i++) + sum2 += plusC[i];*/ + + //if (i == clusters.size() - 2) + // pair = getMinDistance(dm, params.scoreType); + } + + double sum = 0; + for (size_t i = 0; i < tree.size(); i++) + sum += tree[i].value; + + result_pdtw result; + result.scoreNorm = sum / tree.size(); + result.tree = tree; + result.clusters.insert(result.clusters.end(), clusters2.end() - params.clusters, clusters2.end()); + + return result; //calcul::GetMultiRatioDtw(input, map[0]); } -vtr2<double> pdtw::createDiagMatrix(vtr3<double> const &input, parameter const ¶ms) +vtr2<double> pdtw::matrix_similarity(vtr3<double> const &input, parameter const ¶ms) { - vtr2<double> dm; - - for (size_t i = 0; i < input.size(); i++) - { - vector<result_dtw> row(input.size()); - for (size_t j = 0; j < input.size(); j++) - { - if (i == j) - continue; + vtr2<double> m(input.size()); + parameter param = params; + param.localAlignment = false; + param.operation = 1; + param.drawOut = ""; + + for (size_t i = 0; i < input.size(); i++) + { + m[i] = vtr<double>(input.size()); + + for (size_t j = 0; j <= i; j++) + { + if (i == j) { + m[i][j] = constant::MAX_double; + continue; + } input_method subInput(input[i], input[j]); - //row[j] = dtw::alignment(subInput, params)[0]; - } - //dm.push_back(row); - } + + m[i][j] = dtw::configure(subInput, input_info(i, j), param).score[params.scoreType]; + m[j][i] = m[i][j]; + } + } - return dm; + return m; } -closest pdtw::getShortestDistance(vtr2<double> const &dm) +coordv pdtw::getMinDistance(vtr2<double> const &dm, int scoreType) { - int one = 0; - int two = 0; - double value = 0; - - double max = constant::MIN_double; - for (size_t i = 0; i < dm.size(); i++) - { - for (size_t j = i + 1; j < dm[i].size(); j++) - { - if (i == j) - continue; - - if (max < dm[i][j]) - { - max = dm[i][j]; - value = max; - one = (int)i; - two = (int)j; - } - } - } - - closest close; - if (one < two) { - close.row = one; - close.col = two; - close.value = value; - return close; - } - else { - close.row = two; - close.col = one; - close.value = value; - return close; - } + coordv close; + + double min = constant::MAX_double; + for (size_t i = 0; i < dm.size(); i++) + { + for (size_t j = 0; j <= i; j++) + { + if (min > dm[i][j]) + { + min = dm[i][j]; + close.value = min; + close.row = (int)i; + close.col = (int)j; + } + } + } + + if (close.col < close.row) { + int tmp = close.col; + close.col = close.row; + close.row = tmp; + } + return close; } -void pdtw::revalueAndMark(vtr2<double> &dm, vtr2<int> const &clusters, closest const &p) +void pdtw::revalueAndMark(vtr2<double> &dm, vtr2<int> const &clusters, coordv const &pair, int scoreType) { - const size_t keep = p.row; - const size_t discard = p.col; - - for (size_t i = 0; i < dm.size(); i++) - { - if (i == keep) - continue; - - dm[i][keep] = (clusters[keep].size() * dm[keep][i] + clusters[discard].size() * dm[i][discard]) / (clusters[keep].size() + clusters[discard].size()); - dm[keep][i] = dm[i][keep]; - } - - for (size_t i = 0; i < dm.size(); i++) - { - dm[i][discard] = constant::MIN_float; //min jeliko� hledane maximum v dm matici (cim blizzi 1 tim podobnejsi) - dm[discard][i] = constant::MIN_float; - } + const size_t keep = pair.row; + const size_t discard = pair.col; + + for (size_t i = 0; i < dm[keep].size(); i++) + dm[keep][i] = (dm[keep][i] + dm[discard][i]) / 2; + + for (size_t i = 0; i < dm[keep].size(); i++) + dm[i][keep] = (dm[i][keep] + dm[i][discard]) / 2; + + for (size_t i = 0; i < dm[keep].size(); i++) { + dm[discard][i] = constant::MAX_double; + dm[i][discard] = constant::MAX_double; + } + + //dm[keep][i] = dm[i][keep]; + //dm[i][keep].score[scoreType] = (clusters[keep].size() * dm[keep][i].score[scoreType] + clusters[discard].size() * dm[i][discard].score[scoreType]) / (clusters[keep].size() + clusters[discard].size()); + + //dm.erase(dm.begin() + discard, dm.begin() + discard + 1); + //for (size_t i = 0; i < dm.size(); i++) + //{ + // dm[i].erase(dm[i].begin() + discard, dm[i].begin() + discard + 1); + // //dm[i][discard].score[scoreType] = constant::MIN_double; //min jeliko� hledane maximum v dm matici (cim blizzi 1 tim podobnejsi) + // //dm[discard][i].score[scoreType] = constant::MIN_double; + //} } \ No newline at end of file diff --git a/SequenceComparison/pdtw.h b/SequenceComparison/pdtw.h index 5c2ed8b9d57363fd949343ac37165f18d3bbbd74..0cd73a6c0a9fffb9d6d928a082c45fbf66c39fb6 100644 --- a/SequenceComparison/pdtw.h +++ b/SequenceComparison/pdtw.h @@ -7,14 +7,17 @@ class pdtw { public: - static double main(vtr3<double> const &input, input_info const &info, parameter const ¶ms); - static result_pdtw alignment(vtr3<double> const &input, input_info const &info, parameter const ¶ms); - //static double GetEdits(vtr2<double> const &A, vtr2<double> const &B); - static vtr2<double> createDiagMatrix(vtr3<double> const &input, parameter const ¶ms); - //Returns tuple with most similar pair of sequence. (pdtw) - static closest getShortestDistance(vtr2<double> const &dm); - //Returns revalued similarity matrix. (pdtw) - static void revalueAndMark(vtr2<double> &dm, vtr2<int> const &clusters, closest const &p); + static result_pdtw main(vtr3<double> const &input, input_info const &info, parameter const ¶ms); + static result_pdtw alignment(vtr3<double> const &input, input_info const &info, parameter const ¶ms); + + //static double GetEdits(vtr2<double> const &A, vtr2<double> const &B); + static vtr2<double> matrix_similarity(vtr3<double> const &input, parameter const ¶ms); + + //Returns tuple with most similar pair of sequence. (pdtw) + static coordv getMinDistance(vtr2<double> const &dm, int scoreType); + + //Returns revalued similarity matrix. (pdtw) + static void revalueAndMark(vtr2<double> &dm, vtr2<int> const &clusters, coordv const &p, int scoreType); }; #endif //PDTW_H \ No newline at end of file diff --git a/SequenceComparison/preprocess.cpp b/SequenceComparison/preprocess.cpp new file mode 100644 index 0000000000000000000000000000000000000000..12457057264e294142d664d2af1b8262f2a596fd --- /dev/null +++ b/SequenceComparison/preprocess.cpp @@ -0,0 +1,324 @@ +#include "stdafx.h" +#include "preprocess.h" +#include "calcul.h" + +template <typename T> +void preprocess::interpolate(vtr3<T> &input) +{ + int maxLen = -1; + + for (auto &&s : input) + { + if ((int)s.size() > maxLen) + maxLen = (int)s.size(); + } + + for (size_t i = 0; i < input.size(); i++) + { + if (input[i].size() == 1) + { + vtr2<T> row(maxLen); + fill(row.begin(), row.end(), input[i][0]); + input[i] = row; + } + + int diff = maxLen - (int)input[i].size(); + while (diff > 0) + { + vtr2<T> row; + + int c = 0; + for (size_t j = 0; j < input[i].size() && diff > 0; j++) + { + vtr<T> point; + + if (j % 2 == 1) + { + for (size_t k = 0; k < input[i][j].size(); k++) + { + double tmp = (input[i][c - 1][k] + input[i][c][k]) / 2.0; + point.push_back(tmp); + } + + row.push_back(point); + diff--; + } + else + { + row.push_back(input[i][c]); + c++; + } + + } + row.insert(row.end(), input[i].begin() + c, input[i].end()); + input[i] = row; + } + } +} +template void preprocess::interpolate<double>(vtr3<double> &input); + +template <typename T> +void preprocess::interpolate2(vtr3<double> &input) +{ + int maxLen = -1; + + for (auto &&s : input) + { + if ((int)s.size() > maxLen) + maxLen = (int)s.size(); + } + + for (auto &&i : input) + { + int diff = maxLen - (int)i.size(); + + while (diff > 0) + { + int c = 0; + vtr2<T> row; + for (size_t k = 0; k < i.size() - 1 && diff > 0; k++) + { + vtr<T> el; + + if (k % 2 == 1) + { + for (size_t j = 0; j <i[k].size(); j++) + { + T tmp = (i[c - 1][j] + i[c][j]) / 2.0; + el.push_back(tmp); + } + + row.push_back(el); + diff--; + } + else + { + row.push_back(i[c]); + c++; + } + + } + row.insert(row.end(), i.begin() + c, i.end()); + i = row; + } + } +} + +template <typename T> +void preprocess::paa(vtr3<T> &input, size_t ratio) +{ + vtr3<T> output(input.size()); + + for (size_t i = 0; i < input.size(); i++) + { + vtr2<T> s; + for (size_t j = 0; j < input[i].size(); j += ratio)// sequence + { + vtr<T> dim(input[i][j].size()); + + const size_t end = j + ratio >= input[i].size() ? input[i].size() : j + ratio; + for (size_t k = 0; k < input[i][j].size(); k++) //all dims + { + double sum = 0; + int merged = 0; + for (size_t l = j; l < end; l++) //sum individual groups of dims + { + sum += input[i][l][k]; + merged++; + } + dim[k] = (T)(sum / merged); + } + s.push_back(dim); + } + output[i] = s; + } + + input = output; +} +template void preprocess::paa<double>(vtr3<double> &input, size_t ratio); +template void preprocess::paa<int>(vtr3<int> &input, size_t ratio); + +void preprocess::sax(vtr3<double> &input, size_t numClasses) +{ + double max = constant::MIN_double; + double min = constant::MAX_double; + for (auto &&i : input) + { + double tmpMax = calcul::vtr_max(i); + if (tmpMax > max) + max = tmpMax; + + double tmpMin = calcul::vtr_min(i); + if (tmpMin < min) + min = tmpMin; + } + + double step = (max - min) / static_cast<double>(numClasses); + + for (auto &&i : input) + { + for (auto &&j : i) + { + for (auto &&k : j) + { + int c = 1; + double stepCount = min; + while (stepCount < max + step) + { + if (k <= min + c * step) + { + k = min + (c - 1) * step + step / 2; + break; + } + stepCount += step; + c++; + } + } + } + } +} + +//Returns shortened input sequence by skiping some of their elements. +template <typename T> +void preprocess::reduce(vtr3<T> &input, size_t skip) +{ + for (auto &&i : input) + { + vtr2<T> row; + for (size_t j = skip - 1; j < i.size(); j += skip) + { + row.push_back(i[j]); + } + i = row; + } +} +template void preprocess::reduce<double>(vtr3<double> &input, size_t skip); +template void preprocess::reduce<int>(vtr3<int> &input, size_t skip); + +template <typename T> +void preprocess::prolong(vtr3<T> &input, size_t times) +{ + for (size_t i = 0; i < input.size(); i++) //every sequence + { + size_t dims = (int)input[0][0].size(); + + for (size_t j = 0; j < times; j++) //times to prolong + { + size_t alloc = 2 * input[i].size() - 1; + + int counter = 0; + vtr2<T> row(alloc); + for (size_t k = 0; k < 2 * input[i].size() - 1; k++) + { + vtr<T> point(dims); + for (size_t l = 0; l < dims; l++) + { + if (k % 2 == 1) + point[l] = (input[i][counter - 1][l] + input[i][counter][l]) / 2; + else + { + point[l] = input[i][counter][l]; + } + } + if (k % 2 != 1) + counter++; + + row[k] = point; + } + input[i] = row; + } + } +} +template void preprocess::prolong<double>(vtr3<double> &input, size_t times); +template void preprocess::prolong<int>(vtr3<int> &input, size_t times); + +//Returns smoothed sekvence by moving window average. +void preprocess::smooth(vtr3<double> &input, size_t width) +{ + vtr3<double> output(input.size()); + + for (size_t i = 0; i < input.size(); i++) + { + const int dims = (int)input[0][0].size(); + + vtr2<double> s; + for (size_t j = 0; j < width - 1; j++) + { + s.push_back(input[i][j]); + } + + for (size_t j = 0; j < input[i].size() - width + 1; j++)// sequence + { + vtr<double> sums(dims); + for (size_t k = 0; k < width; k++) //all dims + { + for (int l = 0; l < dims; l++) + { + sums[l] += static_cast<double>(input[i][j + k][l] / width); + } + } + s.push_back(sums); + } + output[i] = s; + } + + input = output; +} + +void preprocess::normalizeMany(vtr3<double> &input) +{ + for (size_t i = 0; i < input.size(); i++) //dims + { + normalize(input[i]); + } +} + +void preprocess::normalize(vtr2<double> &input) +{ + for (size_t i = 0; i < input[0].size(); i++) //dims + { + double mean = 0; + for (size_t j = 0; j < input.size(); j++) //lenght of sequence + { + mean += input[j][i]; + } + mean = mean / (double)input.size(); + //mean = abs(mean); + + for (size_t j = 0; j < input.size(); j++) //lenght of sequence + { + input[j][i] = input[j][i] / mean; + } + } +} + +void preprocess::normalizeZeroOne(vtr3<double> &input, double max) +{ + for (size_t i = 0; i < input.size(); i++) + { + for (size_t j = 0; j < input[i].size(); j++) + { + for (size_t k = 0; k < input[i][j].size(); k++) + { + input[i][j][k] /= max; + } + } + } +} + +vtr2<double> preprocess::normalize(vtr2<double> const &input, double coef) +{ + vtr2<double> output(input.size()); + + for (size_t i = 0; i < input.size(); i++) //dims + { + vtr<double> el(input[i].size()); + for (size_t j = 0; j < input[i].size(); j++) //lenght of sequence + { + el[j] = input[i][j] * coef; + } + output[i] = el; + } + + return output; +} \ No newline at end of file diff --git a/SequenceComparison/preprocess.h b/SequenceComparison/preprocess.h new file mode 100644 index 0000000000000000000000000000000000000000..1f1a66a98487c12a18a5e945738f7fa457d37d43 --- /dev/null +++ b/SequenceComparison/preprocess.h @@ -0,0 +1,23 @@ +#pragma once +class preprocess +{ +public: + //Returns input sequence interpolated to same length. + template <typename T> static void interpolate(vtr3<T> &input); + template <typename T> static void interpolate2(vtr3<double> &input); + static void sax(vtr3<double> &input, size_t numClasses); + + template <typename T> static void paa(vtr3<T> &input, size_t ratio); + template <typename T> static void reduce(vtr3<T> &input, size_t skip); + template <typename T> static void prolong(vtr3<T> &input, size_t times); + + //Returns smoothed sekvence by moving window average. + static void smooth(vtr3<double> &input, size_t width); + //Normalize input sequence. + static void normalizeMany(vtr3<double> &input); + static void normalize(vtr2<double> &input); + static vtr2<double> normalize(vtr2<double> const &input, double coef); + //Retruns normalized sequence between <0,1>. + static void normalizeZeroOne(vtr3<double> &input, double max); +}; + diff --git a/SequenceComparison/print.cpp b/SequenceComparison/print.cpp index 14e14685d6d88732ed9e1e96d8bbe8bb43053d28..fb3abd986fc6d91ce62910da7d1434ae41257cd0 100644 --- a/SequenceComparison/print.cpp +++ b/SequenceComparison/print.cpp @@ -199,7 +199,7 @@ string print::matrix(vtr2<T> const &simM, parameter const ¶ms) for (size_t j = 0; j < simM[i].size(); j++) { if (params.scoreType < 2) - stream << setw(params.precision + 5) << fixed << simM[i][j] << " "; + stream << setw(params.precision + 4) << fixed << simM[i][j] << " "; else stream << fixed << simM[i][j] << " "; @@ -213,7 +213,7 @@ string print::matrix(vtr2<T> const &simM, parameter const ¶ms) template string print::matrix(vtr2<int> const &simM, parameter const ¶ms); template string print::matrix(vtr2<double> const &simM, parameter const ¶ms); -string print::tree(vtr<closest> const &tree) +string print::tree(vtr<coordv> const &tree) { std::stringstream ss; @@ -225,7 +225,7 @@ string print::tree(vtr<closest> const &tree) return ss.str(); } -string print::pathShape(string const &path, coords p, size_t lenA, size_t lenB) +string print::pathShape(string const &path, coord p, size_t lenA, size_t lenB) { vtr2<int> tmp(lenA); for (size_t i = 0; i < lenA; i++) diff --git a/SequenceComparison/print.h b/SequenceComparison/print.h index 5f6cf5c281f16b163a743a75b62622e8f61769fa..4949c93cfe99397e9e7e9e4de5cd5d43253027fc 100644 --- a/SequenceComparison/print.h +++ b/SequenceComparison/print.h @@ -19,11 +19,11 @@ public: template <typename T> static std::string matrix(vtr2<T> const &simMatrix, parameter const ¶ms); //Returns formated tree for pdtw method. - static std::string tree(vtr<closest> const &tree); + static std::string tree(vtr<coordv> const &tree); static std::string distanceMatrix(vtr2<node> const &m); - static std::string pathShape(std::string const & path, coords p, size_t lenA, size_t lenB); + static std::string pathShape(std::string const & path, coord p, size_t lenA, size_t lenB); static std::string htmlClusters(vtr3<double> const &input, vtr2<int> const &order, input_clusters const &clusters); diff --git a/SequenceComparison/structs.h b/SequenceComparison/structs.h index 9c6dc64c2b39e9a3dac8395837bbea0561980d13..fb483a4533780e96cb3e75ce2a30736a762a5332 100644 --- a/SequenceComparison/structs.h +++ b/SequenceComparison/structs.h @@ -95,15 +95,13 @@ enum eOperation { op_similarityMatrix_two, op_oneInput, op_twoInput, + op_experiment, //op_segmented, - //op_oneInput_shift, - //op_twoInput_shift, op_queryOne, op_queryMulti, op_dimSimilarityMatrix, op_bestDimSimMatrix, op_pdtw, - op_experiment }; enum eMethod { @@ -116,7 +114,7 @@ class node{ public: double value; //values in distance matrix are saved in float currently...will change if needed - node() : value(std::numeric_limits<double>::max()) {}; + node() : value(constant::MAX_double/*std::numeric_limits<double>::max()*/) {}; node(double value) : value(value) {}; ~node() {}; }; @@ -125,38 +123,100 @@ template<class T> struct node2 { T value; //values in distance matrix are saved in float currently...will change if needed int pathSize; + vtr<bool> pa; //std::string path; node2() : value(std::numeric_limits<T>::max()), pathSize(0) {}; node2(T value) : value(value), pathSize(0) {}; - ~node2() {}; }; +struct tspoint{ + vtr<double> point; + + tspoint() {} + tspoint(size_t size) + { + point = vtr<double>(size); + } + + tspoint(double d) + { + point.push_back(d); + } + + size_t size() const + { + return point.size(); + } + + tspoint& operator+=(tspoint const &p) + { + for (size_t i = 0; i < point.size(); i++) + point[i] += p.point[i]; + + return *this; + } + + tspoint& operator-=(tspoint const &p) + { + for (size_t i = 0; i < point.size(); i++) + point[i] += p.point[i]; + + return *this; + } + + tspoint& operator/(size_t value) + { + for (size_t i = 0; i < point.size(); i++) + point[i] /= value; + + return *this; + } + + bool operator==(tspoint const &p) const + { + if (point == p.point) + return true; + + return false; + } + + bool operator!=(tspoint const &p) const + { + if (point != p.point) + return true; + + return false; + } +}; +static tspoint operator+(tspoint lhs, tspoint const &rhs) { return lhs += rhs; } + struct tserie { - vtr2<double> serie; + vtr<tspoint> serie; tserie(vtr<double> sequence) { serie.resize(sequence.size()); - //serie = vtr2<double>(sequence.size()); - + for (int i = 0; i < sequence.size(); i++) - serie[i].push_back(sequence[i]); + { + //serie[i].point.resize(1); + serie[i] = tspoint(sequence[i]); + } } - tserie(vtr2<double> const &sequence) - { - serie = sequence; - } + //tserie(vtr2<double> const &sequence) + //{ + // serie = sequence; + //} tserie(std::string const &sequence) { auto splits = help::split(sequence, ","); serie.resize(splits.size()); - //serie = vtr2<double>(splits.size()); for (int i = 0; i < splits.size(); i++) - serie[i].push_back(stod(splits[i])); + serie[i] = tspoint(stod(splits[i])); } size_t size() const @@ -164,17 +224,17 @@ struct tserie { return serie.size(); } - size_t dims() const + size_t dim() const { return serie[0].size(); } - const vtr<double>& operator[](size_t i) const + const tspoint& operator[](size_t i) const { return serie[i]; } - vtr<double>& operator[](size_t i) + tspoint& operator[](size_t i) { return serie[i]; } @@ -204,31 +264,30 @@ struct result_time long long write = 0; }; -struct closest { +struct coordv { int row = 0; int col = 0; double value = 0; + + coordv() {} + coordv(int r, int c) : row(r), col(c) {} }; struct result_pdtw { double scoreNorm = 0; - vtr<closest> tree; + vtr<coordv> tree; + vtr2<int> clusters; }; -struct coords { +struct coord { int row = 0; int col = 0; - coords() {} - coords(int r, int c) : row(r), col(c) {} -}; + coord() {} + coord(int r, int c) : row(r), col(c) {} + coord(size_t r, size_t c) : row(static_cast<int>(r)), col(static_cast<int>(r)) {} -struct result_dtw { - vtr<double> score; - std::string path; - cimg_library::CImg<short> matrix_acc; - cimg_library::CImg<short> matrix_noacc; }; template<class T> @@ -242,37 +301,44 @@ struct couple2 { struct result_path { std::string path; //path - vtr<coords> pathCoords; + vtr<coord> pathCoords; vtr<double> values; double scoreRaw = 0; //double pathSum = 0; - coords start; - coords end; + coord start; + coord end; - vtr<coords> convert_toCoords() const - { - vtr<coords> pathCoords(path.size()); + //vtr<coord> convert_toCoords() const + //{ + // vtr<coord> pathCoords(path.size()); - int row = -1, col = -1; - for (size_t i = 0; i < path.size(); i++) - { - if (path[i] == 'M' || path[i] == 'S') { - row++; - col++; - } - else if (path[i] == 'L') - col++; - else if (path[i] == 'U') - row++; + // int row = start.row - 1, col = start.col - 1; + // for (size_t i = 0; i < path.size(); i++) + // { + // if (path[i] == 'M' || path[i] == 'S') { + // row++; + // col++; + // } + // else if (path[i] == 'L') + // col++; + // else if (path[i] == 'U') + // row++; + + // pathCoords[i] = coord(row + 1, col + 1); //x,y(i,j) + // } - pathCoords[i] = coords(row + 1, col + 1); //x,y(i,j) - } + // return pathCoords; + //} - return pathCoords; - } +}; +struct result_dtw { + vtr<double> score; + vtr<result_path> warpings; + cimg_library::CImg<short> matrix_acc; + cimg_library::CImg<short> matrix_noacc; }; struct range { @@ -295,32 +361,13 @@ struct clusterValue std::string name = ""; }; -struct input_clusters -{ - std::map<int, clusterValue> ids; - std::map<int, int> size; - - int getClusterID(size_t idx) const - { - return ids.at((int)idx).idCluster; - } - int getClusterSize(size_t idx) const - { - return size.at(ids.at((int)idx).idCluster); - } - - std::string getID_str(size_t idx) const - { - return ids.at((int)idx).idString; - } -}; struct input_files{ vtr<std::string> query; - vtr<std::string> input; //reference; - vtr<std::string> keyInput; //reference; - vtr<std::string> keyQuery; //reference; + vtr<std::string> input; + vtr<std::string> keyInput; + vtr<std::string> keyQuery; std::string get_inputName(size_t idx) const { @@ -369,6 +416,27 @@ struct input_files{ } }; +struct input_clusters +{ + std::map<int, clusterValue> ids; + std::map<int, int> size; + + int getClusterID(size_t idx) const + { + return ids.at((int)idx).idCluster; + } + + int getClusterSize(size_t idx) const + { + return size.at(ids.at((int)idx).idCluster); + } + + std::string getID_str(size_t idx) const + { + return ids.at((int)idx).idString; + } +}; + struct input_data_single { vtr3<double> input; vtr3<int> key; @@ -436,52 +504,14 @@ struct input_info { idxA = idxA_p; idxB = idxB_p; }; -}; -//struct input_me2thod { -// vtr3<double> A; -// vtr3<double> B; -// vtr3<int> A2; -// vtr3<int> B2; -// std::string nameA; -// std::string nameB; -// size_t idxA; -// size_t idxB; -// -// input_method() {}; -// -// input_method(vtr3<double> const &refA, vtr3<double> const &refB) -// { -// A = refA; -// B = refB; -// //A.push_back(refA); -// //B.push_back(refB); -// } -// -// input_method(vtr2<double> const &refA, vtr2<double> const &refB) -// { -// /*A = refA; -// B = refB;*/ -// A.push_back(refA); -// B.push_back(refB); -// } -// -// input_method(vtr3<double> const &refA, vtr3<double> const &refB, vtr3<int> const &refAKey, vtr3<int> const &refBKey) -// { -// A = refA; -// B = refB; -// A2 = refAKey; -// B2 = refBKey; -// } -// -// input_method(vtr2<double> const &refA, vtr2<double> const &refB, vtr2<int> const &refAKey, vtr2<int> const &refBKey) -// { -// A.push_back(refA); -// B.push_back(refB); -// A2.push_back(refAKey); -// B2.push_back(refBKey); -// } -//}; + input_info(size_t idxA_p, size_t idxB_p, std::string nameA_p, std::string nameB_p) { + idxA = idxA_p; + idxB = idxB_p; + nameA = nameA_p; + nameB = nameB_p; + }; +}; struct result_operation_query { vtr<int> answerID; @@ -505,20 +535,27 @@ struct result_method { vtr<short> img; }; -struct matrix_cluster + + +template<typename ... Ts> +struct TContent { - vtr3<int> matrix; +}; - matrix_cluster(int cols, int rows) - { - /*for (int j = 0; j < ; j++) - { - for (int k = 0; k < (int)data.input.size(); k++) - { - idxs[i][j][k] = k + 1; - } - }*/ - } +template<typename T> +struct TContent<T> +{ + T in; + TContent(T st) : in(st) {}; +}; + +template<typename T, typename ... Next> +struct TContent<T, Next...> +{ + T in; + TContent<Next...> next; + + TContent(T st, Next... np) : in(st), next(np...) {}; }; struct result_operation @@ -529,11 +566,12 @@ struct result_operation vtr2<double> scoreAverageRanks; vtr2<double> scorePrecisions; vtr2<double> scoreRecalls; - vtr<double> scoreMap; vtr<double> scoreMeanAverageRank; vtr<double> scoreMeanPrecision; vtr<double> scoreMeanRecall; - vtr<double> scoreMethod; + vtr<double> scoreMap; + result_dtw dtw; + result_time time; //void calculScores(input_data data, bool scoreReversed) //{ diff --git a/SequenceComparison/templates.h b/SequenceComparison/templates.h index 0e7267f4c1c285e8addaef0307e54ff2bae1189a..f2c320a6b59e9fec3a9ee1b5eb602ef1d8941dcd 100644 --- a/SequenceComparison/templates.h +++ b/SequenceComparison/templates.h @@ -69,14 +69,14 @@ inline double ato<double>(const char* c) { return atof(c); } //struct MyVec { // using type = std::vector<typename MyVec<T, size - 1>::type>; //}; -//Template for multidimensional vectors. -//creates last inner vector +////Template for multidimensional vectors. +////creates last inner vector //template<class T> //struct MyVec<T, 1> { // using type = std::vector<T>; //}; -//Template for multidimensional vectors. -//main template use MyVec template +////Template for multidimensional vectors. +////main template use MyVec template //template<class T, size_t size> //using Vtr = typename MyVec<T, size>::type; //typedef std::map<int, int> mapInt; diff --git a/unit/ut_dtw.cpp b/unit/ut_dtw.cpp index 6653c7b0c52c51598486d98f1bb7d89cd2fdc355..00c6e82c8203c91897bb525c835ca42399894c94 100644 --- a/unit/ut_dtw.cpp +++ b/unit/ut_dtw.cpp @@ -20,11 +20,11 @@ TEST_CASE("score types zero equality") auto result = dtw::main(input, info, params); - CHECK(0.0 == result[0]); - CHECK(0.0 == result[1]); - CHECK(0.0 == result[2]); - CHECK(0.0 == result[3]); - CHECK(0.0 == result[4]); + CHECK(0.0 == result.score[0]); + CHECK(0.0 == result.score[1]); + CHECK(0.0 == result.score[2]); + CHECK(0.0 == result.score[3]); + CHECK(0.0 == result.score[4]); } TEST_CASE("DTW: length score difference 1") @@ -45,11 +45,11 @@ TEST_CASE("DTW: length score difference 1") input_info info; auto result = dtw::main(input, info, params); - CHECK(0.0 != result[0]); - CHECK(0.0 != result[1]); - CHECK(0.0 != result[2]); - CHECK(0.0 != result[3]); - CHECK(0.0 != result[4]); + CHECK(0.0 != result.score[0]); + CHECK(0.0 != result.score[1]); + CHECK(0.0 != result.score[2]); + CHECK(0.0 != result.score[3]); + CHECK(0.0 != result.score[4]); } TEST_CASE("DTW: length score difference 2") @@ -79,11 +79,11 @@ TEST_CASE("DTW: length score difference 2") input_info info; //equal even when diff len (when same data) auto result = dtw::main(input, info, params); - CHECK(0.0 == result[0]); - CHECK(0.0 == result[1]); - CHECK(0.0 != result[2]); - CHECK(0.0 != result[3]); - CHECK(0.0 != result[4]); + CHECK(0.0 == result.score[0]); + CHECK(0.0 == result.score[1]); + CHECK(0.0 != result.score[2]); + CHECK(0.0 != result.score[3]); + CHECK(0.0 != result.score[4]); } TEST_CASE("DTW: warping equality") @@ -132,9 +132,9 @@ TEST_CASE("DTW: warping equality") input_method input(A, B); input_info info; - auto wp = dtw::alignment(input, info, f_distance, params); + auto result = dtw::alignment(input, info, f_distance, params); - CHECK("MUUUMLMMLMLLML" == wp.path); + CHECK("MUUUMLMMLMLLML" == result.warpings[0].path); } /*TEST_METHOD(dtw_matrix_noAccumulation) @@ -200,11 +200,11 @@ TEST_CASE("OP: dtw normal & parallel equality") auto rA = dtw::main(input, info, paramsA); auto rB = dtw::main(input, info, paramsB); - CHECK(rA[0] == rB[0]); - CHECK(rA[1] == rB[1]); - CHECK(rA[2] == rB[2]); - CHECK(rA[3] == rB[3]); - CHECK(rA[4] == rB[4]); + CHECK(rA.score[0] == rB.score[0]); + CHECK(rA.score[1] == rB.score[1]); + CHECK(rA.score[2] == rB.score[2]); + CHECK(rA.score[3] == rB.score[3]); + CHECK(rA.score[4] == rB.score[4]); } //TEST_CASE("dtw_matrix_variations_equality") @@ -364,7 +364,7 @@ TEST_CASE("DTW: accumulated & no accumulation equality") p.localAlignment = true; auto r2 = dtw::main(input, info, p); - if (r1 == r2) + if (r1.score == r2.score) CHECK(true); } diff --git a/unit/ut_op.cpp b/unit/ut_op.cpp index c3c712a97759257d667ac3bd2e692ff4a8196b7d..79693b4f155189312ea8565b14d1dadb47cfdc30 100644 --- a/unit/ut_op.cpp +++ b/unit/ut_op.cpp @@ -18,6 +18,7 @@ TEST_CASE("operation succesfull run") input_info info; parameter params; + params.inPath = vtr<string>(1); params.method = "dtw"; @@ -30,37 +31,45 @@ TEST_CASE("operation succesfull run") params.inPath[0] = pathW; else params.inPath[0] = pathL; - + params.operation = 0; - auto result = mains::main_logic(params); - CHECK(!result.scoreMethod.empty()); + auto result = mains::master_run(params); + CHECK(!result.dtw.score.empty()); params.operation = 1; - result = mains::main_logic(params); + result = mains::master_run(params); CHECK(!result.matrixSimilarity.empty()); + CHECK(result.matrixSimilarity.size() == 5); + CHECK(result.matrixSimilarity[0].size() == 31); + CHECK(result.matrixSimilarity[0][0].size() == 31); params.operation = 2; params.inQuery = vtr<string>(1); params.inQuery[0] = params.inPath[0]; - result = mains::main_logic(params); + result = mains::master_run(params); CHECK(!result.matrixSimilarity.empty()); params.operation = 3; params.inClusterPath = help::pathExists(pathcW) ? pathcW : pathcL; - result = mains::main_logic(params); - CHECK(!result.matrixSimilarity.empty()); - - params.operation = 4; - result = mains::main_logic(params); + result = mains::master_run(params); CHECK(!result.matrixSimilarity.empty()); - + CHECK(result.matrixSimilarity.size() == 5); + CHECK(result.matrixSimilarity[0].size() == 31); + CHECK(result.matrixSimilarity[0][0].size() == 31); + params.operation = 3; params.shift = true; - result = mains::main_logic(params); + result = mains::master_run(params); CHECK(!result.matrixSimilarity.empty()); - + params.operation = 4; - result = mains::main_logic(params); + params.shift = false; + result = mains::master_run(params); + CHECK(!result.matrixSimilarity.empty()); + + params.operation = 4; + params.shift = false; + result = mains::master_run(params); CHECK(!result.matrixSimilarity.empty()); } @@ -91,23 +100,23 @@ TEST_CASE("equality - shift/noshift") params.inPath[0] = pathL; params.operation = 0; - auto result = mains::main_logic(params); + auto result = mains::master_run(params); params.operation = 0; params.shift = true; - auto resultS = mains::main_logic(params); + auto resultS = mains::master_run(params); - check = ut_help::vtr_equal(result.scoreMethod, resultS.scoreMethod, EPS); + check = ut_help::vtr_equal(result.dtw.score, resultS.dtw.score, EPS); CHECK(check); params.operation = 1; params.shift = false; - result = mains::main_logic(params); + result = mains::master_run(params); params.operation = 1; params.shift = true; - resultS = mains::main_logic(params); + resultS = mains::master_run(params); check = ut_help::vtr_equal(result.matrixSimilarity, resultS.matrixSimilarity, EPS); CHECK(check); @@ -117,11 +126,11 @@ TEST_CASE("equality - shift/noshift") params.operation = 2; params.shift = false; - result = mains::main_logic(params); + result = mains::master_run(params); params.operation = 2; params.shift = true; - resultS = mains::main_logic(params); + resultS = mains::master_run(params); check = ut_help::vtr_equal(result.matrixSimilarity, resultS.matrixSimilarity, EPS) ? true : false; CHECK(true); @@ -130,11 +139,11 @@ TEST_CASE("equality - shift/noshift") params.operation = 3; params.shift = false; - result = mains::main_logic(params); + result = mains::master_run(params); params.operation = 3; params.shift = true; - resultS = mains::main_logic(params); + resultS = mains::master_run(params); check = ut_help::vtr_equal(result.matrixSimilarity, resultS.matrixSimilarity, EPS); CHECK(check); @@ -161,11 +170,11 @@ TEST_CASE("equality - shift/noshift") params.operation = 4; params.shift = false; - result = mains::main_logic(params); + result = mains::master_run(params); params.operation = 4; params.shift = true; - resultS = mains::main_logic(params); + resultS = mains::master_run(params); check = ut_help::vtr_equal(result.matrixSimilarity, resultS.matrixSimilarity, EPS); CHECK(check);