diff --git a/SCwin/main.cpp b/SCwin/main.cpp index a15f75dbe390847aa356c4fe485f380380a52a3d..d737b35b38c52fe8d00ce9e4dbd45fbd5203cc44 100644 --- a/SCwin/main.cpp +++ b/SCwin/main.cpp @@ -4,6 +4,10 @@ using namespace std; +///Standard main fuction +///@param[in] argc number of arguments +///@param[in] argv array of arguments +///@return int int main(int argc, char **argv) { vector<string> args; diff --git a/SequenceComparison/SequenceComparison.vcxitems b/SequenceComparison/SequenceComparison.vcxitems index 3451bcb7474bda1ad4886f3149c9066857b08c92..add7fd55fc358d7fe5d88a872f9b1df2f7b77e84 100644 --- a/SequenceComparison/SequenceComparison.vcxitems +++ b/SequenceComparison/SequenceComparison.vcxitems @@ -17,6 +17,7 @@ <ClInclude Include="$(MSBuildThisFileDirectory)calcul.h" /> <ClInclude Include="$(MSBuildThisFileDirectory)CImg.h" /> <ClInclude Include="$(MSBuildThisFileDirectory)cstruct.h" /> + <ClInclude Include="$(MSBuildThisFileDirectory)distancet.h" /> <ClInclude Include="$(MSBuildThisFileDirectory)parser.h" /> <ClInclude Include="$(MSBuildThisFileDirectory)matrix.h" /> <ClInclude Include="$(MSBuildThisFileDirectory)draw.h" /> diff --git a/SequenceComparison/calcul.h b/SequenceComparison/calcul.h index 8af5acdbfa33cd83b0b4b1bafaacb7cc2f769775..f651bcf93df2a232be08548958e5ef32c5d5c69e 100644 --- a/SequenceComparison/calcul.h +++ b/SequenceComparison/calcul.h @@ -63,83 +63,4 @@ public: static double vtr_std(vtr<double> const &v); }; -//struct distS { -// union DISTANCE { -// DISTANCE_classic classic; ///< classic distance (calculated from two vectors) -// DISTANCE_csiChroma csiChroma; ///< chroma distance (cover song identification) -// DISTANCE_csiChord csiChord; ///< chord distance (cover song identification) -// -// DISTANCE(DISTANCE_classic f) : classic{} {} -// DISTANCE(DISTANCE_csiChroma f) : csiChroma{} {} -// DISTANCE(DISTANCE_csiChord f) : csiChord{} {} -// }; -// -// /*distS(DISTANCE_classic f) : DISTANCE -// distS(DISTANCE_csiChroma f) -// distS(DISTANCE_csiChord f) -// distS(int type) -// { -// switch (type) -// { -// case 1: DISTANCE(calcul::distance_dtw_euklid()); break; -// case 2: DISTANCE(calcul::distance_dtw_euklid); break; -// case 3: DISTANCE(calcul::distance_dtw_csiChroma); break; -// case 4: DISTANCE(calcul::distance_dtw_csiChord); break; -// case 5: DISTANCE(calcul::distance_dtw_euklid); break; -// } -// }*/ -// /*constexpr distS(tag<DISTANCE_classic>, DISTANCE_classic f) : classic(f) {} -// constexpr distS(tag<DISTANCE_csiChroma>, DISTANCE_csiChroma f) : csiChroma(f) {} -// constexpr distS(tag<DISTANCE_csiChord>, DISTANCE_csiChord f) : csiChord(f) {} -// constexpr distS(int type) : distS(type == 1 ? distS(tag<DISTANCE_classic>(), calcul::distance_dtw_euklid) : -// type == 2 ? distS(tag<DISTANCE_csiChroma>(), calcul::distance_dtw_csiChroma) : -// distS(tag<DISTANCE_csiChord>(), calcul::distance_dtw_csiChord)) {}*/ -//}; - -///Contains pointer to currently used distance function -struct sdistance { - ///Contains currently used distance. - union DISTANCE { - DISTANCE_classic classic; ///< classic distance (calculated from two vectors) - DISTANCE_csiChroma csiChroma; ///< chroma distance (cover song identification) - DISTANCE_csiChord csiChord; ///< chord distance (cover song identification) - } dist; ///< pointer to distance function - - int type; ///< distance type - - /// default constructor - //sdistance() {} - /// constructor for initialization - ///@param[in] type_ distance type - sdistance(int type_) : dist(), type(0) - { - switch (type_) - { - case 1: dist.classic = calcul::distance_dtw_euklid; type = 1; break; - case 2: dist.classic = calcul::distance_dtw_manhattan; type = 1; break; - case 3: dist.csiChroma = calcul::distance_dtw_csiChroma; type = 2; break; - case 4: dist.csiChord = calcul::distance_dtw_csiChord; type = 3; break; - case 5: dist.classic = calcul::distance_cosine; type = 1; break; - } - } - - ///Calculates distance based on distance type. - ///@param[in] input data - ///@param[in] i index - ///@param[in] j index - ///@return distance between two time series points. - double getDistance(input_method const &input, unsigned i, unsigned j) const - { - unsigned row = i - 1; - unsigned col = j - 1; - - if (type == 1) - return dist.classic(input.A[row], input.B[col]); - else if (type == 2) - return dist.csiChroma(input.A[row], input.B[col], 0.07); - else - return dist.csiChord(input.A[row], input.B[col], input.A2[row], input.B2[col]); - } -}; - #endif //CALCUL_H \ No newline at end of file diff --git a/SequenceComparison/distancet.h b/SequenceComparison/distancet.h new file mode 100644 index 0000000000000000000000000000000000000000..6537ce3be7c034370f6ccdfad09d5e804ddcfb5b --- /dev/null +++ b/SequenceComparison/distancet.h @@ -0,0 +1,87 @@ +#ifndef DISTANCET_H +#define DISTANCET_H + +#include "calcul.h" + +///Contains pointer to currently used distance function +struct distancet { + + ///Contains currently used distance. + union DISTANCE { + DISTANCE_classic classic; ///< classic distance (calculated from two vectors) + DISTANCE_csiChroma csiChroma; ///< chroma distance (cover song identification) + DISTANCE_csiChord csiChord; ///< chord distance (cover song identification) + } dist; ///< pointer to distance function + + int type; ///< distance type + + ///Default constructor + //sdistance() {} + ///Initialization constructor + ///@param[in] type_ distance type + distancet(int type_) : dist(), type(0) + { + switch (type_) + { + case 1: dist.classic = calcul::distance_dtw_euklid; type = 1; break; + case 2: dist.classic = calcul::distance_dtw_manhattan; type = 1; break; + case 3: dist.csiChroma = calcul::distance_dtw_csiChroma; type = 2; break; + case 4: dist.csiChord = calcul::distance_dtw_csiChord; type = 3; break; + case 5: dist.classic = calcul::distance_cosine; type = 1; break; + } + } + + ///Calculates distance based on distance type. + ///@param[in] input data + ///@param[in] i index + ///@param[in] j index + ///@return distance between two time series points. + double getDistance(input_method const &input, unsigned i, unsigned j) const + { + unsigned row = i - 1; + unsigned col = j - 1; + + if (type == 1) + return dist.classic(input.A[row], input.B[col]); + else if (type == 2) + return dist.csiChroma(input.A[row], input.B[col], 0.07); + else + return dist.csiChord(input.A[row], input.B[col], input.A2[row], input.B2[col]); + } +}; + +#endif //DISTANCET_H + + +//struct distS { +// union DISTANCE { +// DISTANCE_classic classic; ///< classic distance (calculated from two vectors) +// DISTANCE_csiChroma csiChroma; ///< chroma distance (cover song identification) +// DISTANCE_csiChord csiChord; ///< chord distance (cover song identification) +// +// DISTANCE(DISTANCE_classic f) : classic{} {} +// DISTANCE(DISTANCE_csiChroma f) : csiChroma{} {} +// DISTANCE(DISTANCE_csiChord f) : csiChord{} {} +// }; +// +// /*distS(DISTANCE_classic f) : DISTANCE +// distS(DISTANCE_csiChroma f) +// distS(DISTANCE_csiChord f) +// distS(int type) +// { +// switch (type) +// { +// case 1: DISTANCE(calcul::distance_dtw_euklid()); break; +// case 2: DISTANCE(calcul::distance_dtw_euklid); break; +// case 3: DISTANCE(calcul::distance_dtw_csiChroma); break; +// case 4: DISTANCE(calcul::distance_dtw_csiChord); break; +// case 5: DISTANCE(calcul::distance_dtw_euklid); break; +// } +// }*/ +// /*constexpr distS(tag<DISTANCE_classic>, DISTANCE_classic f) : classic(f) {} +// constexpr distS(tag<DISTANCE_csiChroma>, DISTANCE_csiChroma f) : csiChroma(f) {} +// constexpr distS(tag<DISTANCE_csiChord>, DISTANCE_csiChord f) : csiChord(f) {} +// constexpr distS(int type) : distS(type == 1 ? distS(tag<DISTANCE_classic>(), calcul::distance_dtw_euklid) : +// type == 2 ? distS(tag<DISTANCE_csiChroma>(), calcul::distance_dtw_csiChroma) : +// distS(tag<DISTANCE_csiChord>(), calcul::distance_dtw_csiChord)) {}*/ +//}; \ No newline at end of file diff --git a/SequenceComparison/dtw.cpp b/SequenceComparison/dtw.cpp index e6edc3dd87bb642abe50071f1d020ec5c53d4fb7..b3d39137cb61fef0ef38e33ebf2a6f3a5aa1b0a5 100644 --- a/SequenceComparison/dtw.cpp +++ b/SequenceComparison/dtw.cpp @@ -12,11 +12,11 @@ using namespace std; #undef min #undef max -///Main entry point function for dtw method +///Calculates dtw (main entry point function for dtw method). ///@param[in] input input data -///@param[in] info data informations +///@param[in] info input data informations ///@param[in] params parameters -///@return dtw results. +///@return dtw results result_dtw dtw::main(input_method const &input, input_info const &info, parameter const ¶ms) { result_dtw result; @@ -29,11 +29,11 @@ result_dtw dtw::main(input_method const &input, input_info const &info, paramete return result; } -///Calculates pair dtw for pair of time series. +///Calculates dtw for the pair of time series. ///@param[in] input input data -///@param[in] info data informations +///@param[in] info input data informations ///@param[in] params parameters -///@return dtw results. +///@return dtw results result_dtw dtw::main_pair(input_method const &input, input_info const &info, parameter const ¶ms) { if ((int)((input.A.size() * input.B.size()) / 131072) > params.ram) //131072 to convert bytes to MB @@ -60,9 +60,9 @@ result_dtw dtw::main_pair(input_method const &input, input_info const &info, par ///Segments input time series and calculates dtw above all pairs of found sub time series. ///@param[in] input input data -///@param[in] info data information +///@param[in] info input data information ///@param[in] params parameters -///@return dtw results. +///@return dtw results result_dtw dtw::main_segment(input_method const &input, input_info const &info, parameter const ¶ms) { //segments @@ -94,13 +94,13 @@ result_dtw dtw::main_segment(input_method const &input, input_info const &info, return result[0][0]; } -///Configuration of alignment (type of alignment). +///Chooses type of alignment. ///@param[in] input input data ///@param[in] params parameters -///@return dtw results. +///@return dtw results result_dtw dtw::configure(input_method const &input, parameter const ¶ms) { - sdistance d(params.distance); + distancet d(params.distance); result_dtw result; result_path warping; @@ -114,10 +114,10 @@ result_dtw dtw::configure(input_method const &input, parameter const ¶ms) ///Calculates alignment of input time series. ///@param[in] input data -///@param[in] dist distance type +///@param[in] dist distance type (euklid, manhattan, CSI: chord, chroma) ///@param[in] params parameters -///@return dtw results. -result_dtw dtw::alignment(input_method const &input, sdistance const &dist, parameter const ¶ms) +///@return dtw results +result_dtw dtw::alignment(input_method const &input, distancet const &dist, parameter const ¶ms) { auto m = dtw::matrix(input, dist, params); auto end = getEnds(m, params); @@ -149,11 +149,11 @@ result_dtw dtw::alignment(input_method const &input, sdistance const &dist, para } ///Calculates local alignments for input time series. -///@param[in] input data -///@param[in] dist distance type +///@param[in] input data +///@param[in] dist distance type (uklid, manattan, CSI: chord, chroma) ///@param[in] params parameters -///@return dtw results. -result_dtw dtw::alignment_local(input_method const &input, sdistance const &dist, parameter const ¶ms) +///@return dtw results +result_dtw dtw::alignment_local(input_method const &input, distancet const &dist, parameter const ¶ms) { auto m = dtw::matrix_noaccumulation(input, dist, params); auto minims = dtw::get_minimums(m, params); @@ -186,7 +186,7 @@ result_dtw dtw::alignment_local(input_method const &input, sdistance const &dist ///Calculates all final scores (s1 - s5). ///@param[in] input input data ///@param[in] warpings warping paths from which final scores are calculated -///@return dtw score results. +///@return dtw score results vtr<double> dtw::getScore(input_method const &input, vtr<result_path> const &warpings) { vtr<double> score(5); @@ -208,10 +208,10 @@ vtr<double> dtw::getScore(input_method const &input, vtr<result_path> const &war ///Calculates accumulated distance matrix. ///@param[in] input input data -///@param[in] distance distance type +///@param[in] distance distance type (uklid, manattan, CSI: chord, chroma) ///@param[in] params parameters -///@return 2d distance matrix. -vtr2<node> dtw::matrix(input_method const &input, sdistance const &distance, parameter const ¶ms) +///@return 2d distance matrix +vtr2<node> dtw::matrix(input_method const &input, distancet const &distance, parameter const ¶ms) { int lenA = (int)input.A.size(); int lenB = (int)input.B.size(); @@ -257,12 +257,12 @@ vtr2<node> dtw::matrix(input_method const &input, sdistance const &distance, par return m; } -//////Calculates non-accumulated distance matrix. +///Calculates non-accumulated distance matrix. ///@param[in] input input data -///@param[in] dist distance type +///@param[in] dist distance type (uklid, manattan, CSI: chord, chroma) ///@param[in] params parameters -///@return 2d distance matrix. -vtr2<node> dtw::matrix_noaccumulation(input_method const &input, sdistance const &dist, parameter const ¶ms) +///@return 2d distance matrix +vtr2<node> dtw::matrix_noaccumulation(input_method const &input, distancet const &dist, parameter const ¶ms) { int lenA = (int)input.A.size(); int lenB = (int)input.B.size(); @@ -329,7 +329,7 @@ void dtw::accumulate(vtr2<node> &m, parameter const ¶ms) } } -///Accumulates non-accumulated distance matrix. TODO +///Accumulates non-accumulated distance matrix (version for the distance matrix where minimums (Kocyan) are zeroed). ///@param[in] m non-accumulated distance matrix ///@param[in] minims local minimums found in non-accumulated distance matrix (Kocyan). ///@param[in] params parameters @@ -354,7 +354,7 @@ void dtw::accumulate_mod(vtr2<node> &m, vtr<coord> const &minims, parameter cons } } -///Finds minimums in non-accumulated distance matrix. (Kocyan) +///Finds minimums (defined by Kocyan) in non-accumulated distance matrix. ///@param[in] m input distance matrix ///@param[in] params parameters ///@return all minimums found in non-accumulated distance matrix (Kocyan). @@ -414,11 +414,11 @@ vtr<coord> dtw::get_minimums(vtr2<node> const &m, parameter const ¶ms) return filtered; } -///Finds warping path in accumulated distance matrix. +///Finds warping path in the accumulated distance matrix. ///@param[in] m accumulated distance matrix -///@param[in] coords start coordinations of warping path (from where warping path is searched... so its actually end). +///@param[in] coords coordinations from where is the warping path searched (bottom right corner -> so its actually end) ///@param[in] params parameters -///@return generated warping path from accumulated distance matrix. +///@return generated warping path from accumulated distance matrix result_path dtw::getWarping(vtr2<node> const &m, coord coords, parameter const ¶ms) { result_path warping; @@ -504,11 +504,11 @@ result_path dtw::getWarping(vtr2<node> const &m, coord coords, parameter const & return warping; } -///Finds all warping paths from warping paths start coordinations. +///Finds all warping paths from the warping paths start coordinations. ///@param[in] m accumulated distance matrix -///@param[in] minims local minimums found in non-accumulated distance matrix (Kocyan). +///@param[in] minims local minimums found in the non-accumulated distance matrix (Kocyan) ///@param[in] params parameters -///@return generated warping paths from accumulated distance matrices. +///@return generated warping paths vtr<result_path> dtw::getWarpings(vtr2<node> const &m, vtr<coord> const &minims, parameter const ¶ms) { vtr<result_path> paths(minims.size()); @@ -523,7 +523,7 @@ vtr<result_path> dtw::getWarpings(vtr2<node> const &m, vtr<coord> const &minims, return paths; } -///Filters warping paths. Filters out sub path and overlapping path (local dtw, Kocyan). +///Filters found warping paths form minimums (local dtw, Kocyan). ///@param[in] m input matrix ///@param[in] warpings warping paths ///@param[in] params parameters @@ -660,10 +660,10 @@ void dtw::filterWarpings(vtr2<node> const &m, vtr<result_path> &warpings, parame warpings = filterSame(); } -///Finds ends of warping path. +///Finds end of the warping path. ///@param[in] m input matrix ///@param[in] params parameters -///@return warping path end coordinations. (from where warping path starts generation). +///@return warping path end coordinations (bottom right corner). coord dtw::getEnds(vtr2<node> const &m, parameter const ¶ms) { double min = constant::MAX_double; @@ -792,7 +792,6 @@ coord dtw::getEnds(vtr2<node> const &m, parameter const ¶ms) // return back; //} - ///Calculates accumulated distance matrix by using tilting optimization technique. EXPERIMENTAL ///@param[in] A time series ///@param[in] B time series @@ -866,7 +865,6 @@ coord dtw::getEnds(vtr2<node> const &m, parameter const ¶ms) // return back; //} - ///Calculates accumulated distance matrix by using diagonal filling optimization technique. EXPERIMENT ///@param[in] A time series ///@param[in] B time series diff --git a/SequenceComparison/dtw.h b/SequenceComparison/dtw.h index 2e18b4c15186dfad6ba9df127e3fb85598471466..333681319f65160f5a84a97d16888c563831b695 100644 --- a/SequenceComparison/dtw.h +++ b/SequenceComparison/dtw.h @@ -4,6 +4,7 @@ #include "structs.h" #include "parameter.h" #include "calcul.h" +#include "distancet.h" ///Contains DTW methods class dtw @@ -15,11 +16,11 @@ public: static result_dtw configure(input_method const &input, parameter const ¶ms); static vtr<double> getScore(input_method const &input, vtr<result_path> const &warpings); - static result_dtw alignment(input_method const &input, sdistance const &dist, parameter const ¶ms); - static result_dtw alignment_local(input_method const &input, sdistance const &dist, parameter const ¶ms); + static result_dtw alignment(input_method const &input, distancet const &dist, parameter const ¶ms); + static result_dtw alignment_local(input_method const &input, distancet const &dist, parameter const ¶ms); - static vtr2<node> matrix(input_method const &input, sdistance const &dist, parameter const ¶ms); - static vtr2<node> matrix_noaccumulation(input_method const &input, sdistance const &dist, parameter const ¶ms); + static vtr2<node> matrix(input_method const &input, distancet const &dist, parameter const ¶ms); + static vtr2<node> matrix_noaccumulation(input_method const &input, distancet const &dist, parameter const ¶ms); //static result_path matrix_tiled(vtr2<double> const &A, vtr2<double> const &B, parameter const ¶ms); //static result_path matrix_memoized(vtr2<double> const &A, vtr2<double> const &B, parameter const ¶ms); diff --git a/SequenceComparison/entrypoint.h b/SequenceComparison/entrypoint.h index 435769fde8766ab01e815f87c085b255e1d03fe8..76dad7c8673b8fb06469b2f8a44c797b26b74ee3 100644 --- a/SequenceComparison/entrypoint.h +++ b/SequenceComparison/entrypoint.h @@ -22,6 +22,7 @@ #pragma warning Unknown dynamic link import/export semantics. #endif +///Contains results for extranal call. struct result_lib { double arr; int len; @@ -29,6 +30,7 @@ struct result_lib { result_lib() : arr(0), len(0) {}; }; + EXPORT void lib_entrypoint_cmd(char* argv) { std::string strargv(argv); diff --git a/SequenceComparison/help.cpp b/SequenceComparison/help.cpp index 30f5939605c453cc899774d91315a877706681b5..38d26b2f26c5024e1297dad9f7fcd7fda9da1ce5 100644 --- a/SequenceComparison/help.cpp +++ b/SequenceComparison/help.cpp @@ -10,7 +10,7 @@ using namespace std; #undef min ///Checks if path exists. -///@param path file path +///@param[in] path file path ///@return TRUE if path exists bool help::isPath(std::string path) { @@ -19,7 +19,7 @@ bool help::isPath(std::string path) } ///Checks if path belongs to folder. -///@param path file path +///@param[in] path file path ///@return TRUE if path is folder bool help::isFolder(std::string path) { @@ -28,7 +28,7 @@ bool help::isFolder(std::string path) } ///Checks if path belongs to file. -///@param path file path +///@param[in] path file path ///@return TRUE if path is file bool help::isFile(std::string path) { @@ -41,7 +41,7 @@ bool help::isFile(std::string path) } ///Strips file name from path. -///@param path file path +///@param[in] path file path ///@return file path without file name string help::stripFileNameFromPath(std::string path) { @@ -49,8 +49,8 @@ string help::stripFileNameFromPath(std::string path) } ///Trims white space form the left side of the string. -///@param s string -///@param white whitespace characters. +///@param[in] s string +///@param[in] white whitespace characters. void help::trimLeft(string &s, string const &white) { const size_t startpos = s.find_first_not_of(white); @@ -61,8 +61,8 @@ void help::trimLeft(string &s, string const &white) } ///Trims white space form the right side of the string. -///@param s string -///@param white whitespace character +///@param[in] s string +///@param[in] white whitespace character void help::trimRight(string &s, string const &delimiters) { const size_t endpos = s.find_last_not_of(delimiters); @@ -73,9 +73,9 @@ void help::trimRight(string &s, string const &delimiters) } } -///Trims white space form both sides of the string. -///@param s string -///@param white whitespace character +///Trims white spaces from both sides of the string. +///@param[in] s string +///@param[in] white string containing whitespace characters void help::trim(string &s, string const &white) { trimLeft(s, white); @@ -84,8 +84,8 @@ void help::trim(string &s, string const &white) ///Splits string by selected delimiters. ///@param[in] s string -///@param[in] delimiters string delimiters -///@return split string by character delimiters +///@param[in] delimiters array containing delimiters +///@return splitted strings vector<string> help::split(string const& s, char const *delimiters) { vector<string> output; @@ -122,7 +122,7 @@ vector<string> help::split(string const& s, char const *delimiters) return output; } -///Check if string contains BOM characters and if yes then removes them. +///Checks if string contains BOM characters and if yes then removes them (please use encoding without BOM or UTF-8). ///@param[in] s string void help::correctBomLine(string &s) { @@ -148,7 +148,7 @@ void help::correctBomLine(string &s) } } -///generates random real number. +///Generates random real number. ///@param[in] min minimal generated number ///@param[in] max maximal generated number ///@return random real number @@ -161,7 +161,7 @@ double help::random_real(int min, int max) return dis(gen); } -///generates random integer number. +///Generates random integer number. ///@param[in] min minimal generated number ///@param[in] max maximal generated number ///@return random int number @@ -174,20 +174,20 @@ int help::random_int(int min, int max) return dis(gen); } -///generates random time series. +///Generates random time series. ///@param[in] len time series length ///@param[in] dims number of dimensions -///@param[in] min minimal value of time series element -///@param[in] max maximal value of time series element +///@param[in] min minimal value of the time series element +///@param[in] max maximal value of the time series element ///@return random time series -vtr2<double> help::random_series(unsigned len, unsigned dims, int min, int max) +vtr2<double> help::random_series(int len, int dims, int min, int max) { vtr2<double> ts(len); - for (size_t i = 0; i < len; i++) + for (size_t i = 0; i < (size_t)len; i++) { vtr<double> point(dims); - for (size_t j = 0; j < dims; j++) + for (size_t j = 0; j < (size_t)dims; j++) { point[j] = help::random_real(min, max); ts[i] = (point); @@ -197,7 +197,7 @@ vtr2<double> help::random_series(unsigned len, unsigned dims, int min, int max) return ts; } -///Initialize 1d vector. +///Initializes 1d vector. ///@param[in] size length of the vector ///@return 1d vector of specified size template<class T> @@ -206,7 +206,7 @@ vtr<T> help::vtr_init(size_t size) return vtr<T>(size); } -///Initialize 2d vector. +///Initializes 2d vector. ///@param[in] size1 length of the vector ///@param[in] size2 length of the sub vectors ///@return 2d vector of specified sizes @@ -225,7 +225,7 @@ vtr2<T> help::vtr_init(size_t size1, size_t size2) ///@param[in] size1 length of the vector ///@param[in] size2 length of the sub vectors ///@param[in] value initialization value -///@return 2d vector of specified sizes +///@return 2d vector of specified sizes and value template<class T> void help::vtr_init(vtr2<T> &m, size_t size1, size_t size2, T value) { @@ -238,7 +238,7 @@ void help::vtr_init(vtr2<T> &m, size_t size1, size_t size2, T value) } template void help::vtr_init<float>(vtr2<float> &m, size_t size1, size_t size2, float value); -///Initialize 3d vector. +///Initializes 3d vector. ///@param[in] size1 length of the vector ///@param[in] size2 length of the sub vectors ///@param[in] size3 length of the sub sub vectors @@ -255,10 +255,10 @@ vtr3<T> help::vtr_init(size_t size1, size_t size2, size_t size3) template vtr3<int> help::vtr_init<int>(size_t size1, size_t size2, size_t size3); template vtr3<double> help::vtr_init<double>(size_t size1, size_t size2, size_t size3); -///Initialize two dimensions of 3d vector. +///Initializes two dimensions of 3d vector. ///@param[in] size1 length of the vector ///@param[in] size2 length of the sub vectors -///@return 3d vector with initialized 2 dimensions +///@return 3d vector with initialized first 2 dimensions template<class T> vtr3<T> help::vtr_initPartial(size_t size1, size_t size2) { @@ -287,9 +287,9 @@ template vtr3<double> help::vtr_initPartial<double>(size_t size1, size_t size2); // return output; //} -///Separates time series dimensions into separate time series. +///Separates time series dimensions into tohe separate time series. ///@param[in] input input time series -///@return separated time series +///@return set of separeted time series vtr3<double> help::separateSequences(vtr2<double> const &input) { vtr3<double> output; @@ -361,7 +361,7 @@ vtr2<double> help::convert_arr2d(double* const &series, unsigned len, unsigned d ///Sorts vector and secondary vector follows sorting of the first vector ///@param[in] lead sorted vector -///@param[in] follow vector which copy sorting of the first vector +///@param[in] follow vector which follows sorting of the first vector ///@param[in] reversed if true: sorting direction will be reversed void help::sortFollow(vtr<double> &lead, vtr<int> &follow, bool reversed) { @@ -385,9 +385,9 @@ void help::sortFollow(vtr<double> &lead, vtr<int> &follow, bool reversed) } //Sorts vector and secondary vector follows sorting of the first vector -//@param[in] lead sorted vector -//@param[in] follow vector which copy sorting of the first vector -//@param[in] reversed if true: sorting direction will be reversed +//@param[in] lead vector to be sorted +//@param[in] +//@param[in] //template <typename T> //vtr2<T> help::vtr_degrade(vtr3<T> const &input) //{ @@ -405,7 +405,7 @@ void help::sortFollow(vtr<double> &lead, vtr<int> &follow, bool reversed) // return v; //} -///Alters 3d matrix from [i][j][k] to [k][i][j] for easier manipulation with sub vectors. +///Alters 3d matrix from [i][j][k] to [k][i][j] for easier manipulation with other dimension in some situatuions (used in operation 2). ///@param[in] matrix 3d vector for dimension reordering ///@return altered 3d matrix template<typename T> diff --git a/SequenceComparison/help.h b/SequenceComparison/help.h index c8ad6f5c98357037238a062f11d47fa5e8bcb02b..06199e27afe7104e71f00bddf87b287ba8b12601 100644 --- a/SequenceComparison/help.h +++ b/SequenceComparison/help.h @@ -24,7 +24,7 @@ public: static int random_int(int min, int max); static double random_real(int min, int max); - static vtr2<double> random_series(unsigned len, unsigned dims, int min, int max); + static vtr2<double> random_series(int len, int dims, int min, int max); static vtr2<double> convert_arrd(double* const &series, unsigned len); static vtr2<double> convert_arr2d(double* const &series, unsigned len, unsigned dims); diff --git a/SequenceComparison/lcss.cpp b/SequenceComparison/lcss.cpp index a2be8ee40004cf70ffbf84c0df72ae40caf3ad94..b5f2779ade10e51d9e14e556bb6a0217775db459 100644 --- a/SequenceComparison/lcss.cpp +++ b/SequenceComparison/lcss.cpp @@ -9,10 +9,10 @@ using namespace std; -///Main entry point for lcss method. -///@param input time series to be analyzed -///@param info information about input files -///@param params parameters +///Calculates lcss method (main entry point for lcss method). +///@param[in] input time series to be anlysed +///@param[in] info informations about input files +///@param[in] params parameters ///@return method results result_dtw lcss::main(input_method const &input, input_info const &info, parameter const ¶ms) { @@ -38,10 +38,10 @@ result_dtw lcss::main(input_method const &input, input_info const &info, paramet return result; } -///Wraps alignment building. ///Builds distance matrix and searched it for warping path. -///@param input time series to be analyzed -///@param params parameters -///@return dtw method results +///Calculates alignment for input time series. +///@param[in] input time series to be analyzed +///@param[in] params parameters +///@return alignment results result_dtw lcss::alignment(input_method const &input, parameter const ¶ms) { DISTANCE_LCSS distance = calcul::distance_lcss; @@ -52,7 +52,7 @@ result_dtw lcss::alignment(input_method const &input, parameter const ¶ms) cout << endl << print::distanceMatrix(m); vtr<result_path> warping(1); - warping[0] = get_warping(m, input); + warping[0] = get_warping(m, coord(m.size() - 1, m[0].size() - 1), input); warping[0].scoreRaw = m[input.A.size()][input.B.size()].value; result_dtw result; @@ -66,10 +66,10 @@ result_dtw lcss::alignment(input_method const &input, parameter const ¶ms) return result; } -///Wraps alignment building. ///Builds distance matrix and searched it for warping path. -///@param input time series to be analyzed -///@param warping warping path -///@return final alignment scores between 2 time series +///Calculates final alignment scores. +///@param[in] input time series to be analyzed +///@param[in] warping warping path +///@return alignment scores vtr<double> lcss::getScore(input_method const &input, vtr<result_path> const &warping) { vtr<double> score; @@ -81,11 +81,11 @@ vtr<double> lcss::getScore(input_method const &input, vtr<result_path> const &wa return score; } -///Builds distance matrix. -///@param input time series to be analyzed -///@param distance distance function -///@param params parameters -///@return final alignment scores between 2 time series +///Builds accumulated distance matrix. +///@param[in] input time series +///@param[in] distance distance function +///@param[in] params parameters +///@return accumulated distance matrix vtr2<node> lcss::matrix(input_method const &input, DISTANCE_LCSS distance, parameter const ¶ms) { vtr2<node> m(input.A.size() + 1); @@ -121,10 +121,10 @@ vtr2<node> lcss::matrix(input_method const &input, DISTANCE_LCSS distance, param return m; } -///Builds non-accumulated distance matrix and searches it for warping path. -///@param input time series to be analyzed -///@param distance distance function -///@param params paramteres +///Builds non-accumulated distance matrix. +///@param[in] input time series to be analyzed +///@param[in] distance distance function +///@param[in] params paramteres ///@return non-accumulated distance matrix vtr2<node> lcss::matrix_noaccumulation(input_method const &input, DISTANCE_LCSS distance, parameter const ¶ms) { @@ -154,60 +154,69 @@ vtr2<node> lcss::matrix_noaccumulation(input_method const &input, DISTANCE_LCSS return m; } -///Generates warping path from distance matrix. -///@param m accumulated distance matrix -///@param input time series -///@return non-accumulated distance matrix -result_path lcss::get_warping(vtr2<node> const &m, input_method const &input) +///Generates warping path from the accumulated distance matrix. +///@param[in] m accumulated distance matrix +///@param[in] coords coordinations from where is the warping path searched (bottom right corner -> so its actually end) +///@param[in] input time series +///@return warping path +result_path lcss::get_warping(vtr2<node> const &m, coord coords, input_method const &input) { - int i = (int)input.A.size(); - int j = (int)input.B.size(); - string path = ""; + result_path warping; + warping.end = coords; - while (i > 0 && j > 0) + while (coords.row > 1 && coords.col > 1) { - if (m[i - 1][j - 1].value < m[i][j].value) - { - if (to_string(input.A[i - 1][0]) == "-" || to_string(input.B[j - 1][0]) == "-") - path = "M" + path; - else if (input.A[i - 1][0] == input.B[j - 1][0]) - path = "M" + path; - else - path = "S" + path; - - i--; - j--; + warping.pathCoords.push_back(coord(coords.row, coords.col)); + + /*if (m[coords.row - 1][coords.col - 1].value < m[coords.row][coords.col].value) + {*/ + /*if (to_string(input.A[coords.row - 1][0]) == "-" || to_string(input.B[coords.col - 1][0]) == "-") + path = "M" + path;*/ + if (input.A[coords.row - 1][0] == input.B[coords.col - 1][0]){ + warping.path = "M" + warping.path; + /*else + path = "S" + path;*/ + + coords.row--; + coords.col--; } else { - if (m[i - 1][j].value >= m[i][j - 1].value) + if (m[coords.row - 1][coords.col].value >= m[coords.row][coords.col - 1].value) { - path = "U" + path; - i--; + warping.path = "U" + warping.path; + coords.row--; } else/* if (m[i][j - 1].value == m[i][j].size)*/ { - path = "L" + path; - j--; + warping.path = "L" + warping.path; + coords.col--; } } } - while (i > 0) + while (coords.row > 1) { - path = "U" + path; - i--; + warping.path = "U" + warping.path; + warping.pathCoords.push_back(coord(coords.row, coords.col)); + coords.row--; } - while (j > 0) + while (coords.col > 1) { - path = "L" + path; - j--; + warping.path = "L" + warping.path; + warping.pathCoords.push_back(coord(coords.row, coords.col)); + coords.col--; } - result_path wp; - wp.path = path; - wp.scoreRaw = m[input.A.size()][input.B.size()].value; - - return wp; + warping.path = "M" + warping.path; + warping.pathCoords.push_back(coord(coords.row, coords.col)); + coords.row--; + coords.col--; + + warping.scoreRaw = m[input.A.size()][input.B.size()].value; + + std::reverse(warping.pathCoords.begin(), warping.pathCoords.end()); + + return warping; } \ No newline at end of file diff --git a/SequenceComparison/lcss.h b/SequenceComparison/lcss.h index ec67ebb634d5dacb867c9f1c4b403159ec49e404..0639976b83336d33ea110833dee5b441226ed438 100644 --- a/SequenceComparison/lcss.h +++ b/SequenceComparison/lcss.h @@ -6,7 +6,7 @@ typedef double(*DISTANCE_LCSS)(vtr<double> const &A, vtr<double> const &B, int idx); -///Contains lcss method and it's sub functions. +///Contains lcss functions. class lcss { public: @@ -17,7 +17,7 @@ public: static vtr2<node> matrix(input_method const &input, DISTANCE_LCSS d, parameter const ¶ms); static vtr2<node> matrix_noaccumulation(input_method const &input, DISTANCE_LCSS d, parameter const ¶ms); - static result_path get_warping(vtr2<node> const &m, input_method const &input); + static result_path get_warping(vtr2<node> const &m, coord coords, input_method const &input); }; #endif // LCSS_H \ No newline at end of file diff --git a/SequenceComparison/mains.cpp b/SequenceComparison/mains.cpp index eaee9d07984ebbd2ff698d0fd0800c9815c84c52..51005407be7d9caa639453a2bcb74bed6e100e75 100644 --- a/SequenceComparison/mains.cpp +++ b/SequenceComparison/mains.cpp @@ -18,7 +18,7 @@ using namespace std; ///Main entry point of the application. -///@param[in] args passed application arguments +///@param[in] args application arguments void mains::master(vtr<string> const &args) { auto script = parser::parseScript(args); @@ -36,7 +36,7 @@ void mains::master(vtr<string> const &args) } } -///Main entry point of the application. +///Main logic function of the application. ///@param[in] params parameters ///@param[in] unit if true: called from unit test application ///@return operation results @@ -70,7 +70,7 @@ result_operation mains::master_run(parameter const ¶ms, bool unit) return result; } -///Data loading. +///Loads input data. ///@param[in] params parameters ///@return input data input_data mains::parseData(parameter const ¶ms) @@ -78,10 +78,10 @@ input_data mains::parseData(parameter const ¶ms) input_data data; try { - data.files.input = parser::getAllFileNames(params.inPath); //file path of all input files - data.files.query = parser::getAllFileNames(params.inQuery); //file path of all input files - data.files.keyInput = parser::getAllFileNames(params.inKeyInput); //file path of all input files - data.files.keyQuery = parser::getAllFileNames(params.inKeyQuery); //file path of all input files + data.files.input = parser::getAllFileNames(params.inPath); //file path of all input files + data.files.query = parser::getAllFileNames(params.inQuery); //file path of all input files + data.files.keyInput = parser::getAllFileNames(params.inKeyInput); //file path of all input files + data.files.keyQuery = parser::getAllFileNames(params.inKeyQuery); //file path of all input files data.files.sort(); data.input = parser::readData<double>(data.files.input); //parsing @@ -93,7 +93,7 @@ input_data mains::parseData(parameter const ¶ms) if (params.isQuery()) { - data.query = parser::readData<double>(data.files.query); //parsing + data.query = parser::readData<double>(data.files.query); //parsing if (data.query.size() < 1) throw runtime_error("problem occurred when loading query data"); @@ -122,8 +122,8 @@ input_data mains::parseData(parameter const ¶ms) cout << print::vector(data.files.query); cout << print::vector(data.files.input); - cout << print::input(data.query, 30) << endl; // print for debug comment or delete if not needed - cout << print::input(data.input, 30) << endl; // print for debug comment or delete if not needed + cout << print::input(data.query, 30) << endl; //print for debug comment or delete if not needed + cout << print::input(data.input, 30) << endl; //print for debug comment or delete if not needed cout << print::inputStats(data.input, 30); cout << print::inputStats(data.query, 30); } @@ -131,7 +131,7 @@ input_data mains::parseData(parameter const ¶ms) return data; } -///Operation execution. +///Executes choosen operation. ///@param[in] data input data ///@param[in] params parameters ///@return operation results @@ -149,7 +149,7 @@ result_operation mains::run(input_data const &data, parameter const ¶ms) return result; } -///Main logic for external calls. +///Main logic for external calls. EXPERIMENT ///@param[in] input input data ///@param[in] args application arguments ///@return operation results @@ -190,7 +190,7 @@ result_operation mains::extern_logic(vtr3<double> const &input, vtr<string> cons return result; } -///Data preprocessing. +///Preprocess input data (depends on used switches). ///@param[in] data input data ///@param[in] params parameters void mains::preprocess(input_data &data, parameter const& params) @@ -223,16 +223,14 @@ void mains::preprocess(input_data &data, parameter const& params) preprocess::prolong<int>(data.keyQuery, params.pre_prolong); } - //if (params.isZNormalization()) - // help::normalizeMany(input); if (params.pre_interpolate) { preprocess::interpolate(data.input); preprocess::interpolate(data.query); } if (params.isNormalizeBy()) { - preprocess::normalizeBy(data.input, params.pre_normalize_by); - preprocess::normalizeBy(data.query, params.pre_normalize_by); + preprocess::normalizeBy(data.input, params.pre_normalizeBy); + preprocess::normalizeBy(data.query, params.pre_normalizeBy); } if (params.pre_smooth) { @@ -241,7 +239,7 @@ void mains::preprocess(input_data &data, parameter const& params) } } -///Final results printing to the console. +///Prints final results to the console. ///@param[in] result operation results ///@param[in] params parameters void mains::printResult(result_operation &result, parameter const ¶ms) @@ -263,10 +261,10 @@ void mains::printResult(result_operation &result, parameter const ¶ms) cout << print::timeMeasures(result.time); } -///Final results writing to the disk. +///Writes final results to the disk. ///@param[in] data input data ///@param[in] result operation results -///@param[in] times elapsed times measurements +///@param[in] times elapsed times measurements during various phases ///@param[in] params parameters void mains::writeResult(input_data const &data, result_operation &result, result_time ×, parameter const ¶ms) { @@ -295,7 +293,7 @@ void mains::writeResult(input_data const &data, result_operation &result, result ///Contains predefined constat data structures. namespace cstruct { - ///Contains RGB color + ///Contains 15 basic RGB colors. extern const float colorsBase[15][3] = { { 0, 255, 0 }, //green { 190, 190, 0 }, //darker yellow @@ -346,8 +344,8 @@ namespace cstruct "789EC9", "6D80BA", "953F00", "5EFF03", "E4FFFC", "1BE177", "BCB1E5", "76912F" }; - ///Contains chord scale (used in CSI chroma distance, switch: -d 3) - extern const vtr2<bool> scaleChord{ + ///Contains chord scale (used in the CSI chroma distance, switch: -dist 3). + extern const vtr2<bool> scaleChord { { 1,0,0,0,1,0,0,1,0,0,0,0 }, //12 major triad scale vectors { 0,1,0,0,0,1,0,0,1,0,0,0 }, { 0,0,1,0,0,0,1,0,0,1,0,0 }, @@ -375,7 +373,7 @@ namespace cstruct { 0,0,1,0,0,0,1,0,0,0,0,1 }, }; - ///Contains key scale (used in CSI chord distance, switch: -d 4) + ///Contains key scale (used in the CSI chord distance, switch: -dist 4). extern const vtr2<bool> scaleKey{ { 1,0,1,0,1,1,0,1,0,1,0,1 }, //12 major triad scale vectors { 1,1,0,1,0,1,1,0,1,0,1,0 }, @@ -390,21 +388,21 @@ namespace cstruct { 1,0,1,1,0,1,0,1,0,1,1,0 }, { 0,1,0,1,1,0,1,0,1,0,1,1 }, - { 1,0,1,1,0,1,0,1,1,0,1,0 }, //12 minor triad scale vectors - { 0,1,0,1,1,0,1,0,1,1,0,1 }, - { 1,0,1,0,1,1,0,1,0,1,1,0 }, - { 0,1,0,1,0,1,1,0,1,0,1,1 }, - { 1,0,1,0,1,0,1,1,0,1,0,1 }, - { 1,1,0,1,0,1,0,1,1,0,1,0 }, - { 0,1,1,0,1,0,1,0,1,1,0,1 }, - { 1,0,1,1,0,1,0,1,0,1,1,0 }, - { 0,1,0,1,1,0,1,0,1,0,1,1 }, - { 1,0,1,0,1,1,0,1,0,1,0,1 }, - { 1,1,0,1,0,1,1,0,1,0,1,0 }, - { 0,1,1,0,1,0,1,1,0,1,0,1 }, + //{ 1,0,1,1,0,1,0,1,1,0,1,0 }, //12 minor triad scale vectors + //{ 0,1,0,1,1,0,1,0,1,1,0,1 }, + //{ 1,0,1,0,1,1,0,1,0,1,1,0 }, + //{ 0,1,0,1,0,1,1,0,1,0,1,1 }, + //{ 1,0,1,0,1,0,1,1,0,1,0,1 }, + //{ 1,1,0,1,0,1,0,1,1,0,1,0 }, + //{ 0,1,1,0,1,0,1,0,1,1,0,1 }, + //{ 1,0,1,1,0,1,0,1,0,1,1,0 }, + //{ 0,1,0,1,1,0,1,0,1,0,1,1 }, + //{ 1,0,1,0,1,1,0,1,0,1,0,1 }, + //{ 1,1,0,1,0,1,1,0,1,0,1,0 }, + //{ 0,1,1,0,1,0,1,1,0,1,0,1 }, }; - ///Contains circle of fifth distance (used in chord distance, switch: -d 4). + ///Contains circle of fifth distances (used in the chord distance, switch: -dist 4). extern const std::map<int, int> cofDistance = { { 0,0 }, { 1,5 }, diff --git a/SequenceComparison/operation.cpp b/SequenceComparison/operation.cpp index da201c1ee6d386d631ea79d19bb087eff8e619dd..25bf4e094f8358dbd4bc1a879378c4efc61d024c 100644 --- a/SequenceComparison/operation.cpp +++ b/SequenceComparison/operation.cpp @@ -15,8 +15,8 @@ using namespace std; -///Entry function for input input without query part. Called for operations 0, 1, 3 and 5. -///@param[in] data data WITHOUT query part +///Main operation logic for input without query part. +///@param[in] data input data WITHOUT query part ///@param[in] params paramters ///@return operation results result_operation operation::main(input_data_single const &data, parameter const ¶ms) @@ -39,7 +39,7 @@ result_operation operation::main(input_data_single const &data, parameter const case 2: //op 2 result = clustering(data, params, f1); break; - case 3: + case 3: //op 3 result = pdtw_localClustering(data, params, f1); //(Kocyan) break; default: @@ -54,8 +54,8 @@ result_operation operation::main(input_data_single const &data, parameter const return result; } -///Entry function for input data with query part. Called for operations 2, 4. -///@param[in] data data WITH query part +///Entry function for input data with query part. +///@param[in] data input data WITH query part ///@param[in] params paramters ///@return operation results result_operation operation::main(input_data const &data, parameter const ¶ms) @@ -91,10 +91,10 @@ result_operation operation::main(input_data const &data, parameter const ¶ms return result; } -///Calls classic pair dtw function. +///Calls classic pair dtw/lcss function. ///@param[in] data data WITHOUT query part ///@param[in] params paramters -///@param[in] f function pointer for currently used method (dtw, lcss) +///@param[in] f function pointer for used method (dtw, lcss) ///@return operation results result_operation operation::dtw(input_data_single const &data, parameter const ¶ms, METHOD f) { @@ -108,9 +108,9 @@ result_operation operation::dtw(input_data_single const &data, parameter const & } ///Calls local dtw function (Kocyan). -///@param[in] data data WITHOUT query part -///@param[in] params paramters -///@param[in] f1 function pointer for currently used method (dtw, lcss) +///@param[in] data input data WITHOUT query part +///@param[in] params parameters +///@param[in] f1 function pointer for used method (dtw, lcss) ///@return operation results result_operation operation::pdtw_localClustering(input_data_single const &data, parameter const ¶ms, METHOD f1) { @@ -171,10 +171,10 @@ result_operation operation::pdtw_localClustering(input_data_single const &data, return resultop; } -///Generates similarity matrix (not to confuse with distance matrix) by using dtw or lcss method internally. -///@param[in] data data WITHOUT query part +///Generates similarity matrix (not to confuse with distance matrix) by using dtw or lcss methods internally. +///@param[in] data input data WITHOUT query part ///@param[in] params paramters -///@param[in] f function pointer for currently used method (dtw, lcss) +///@param[in] f function pointer for used method (dtw, lcss) ///@return operation results result_operation operation::similarityMatrix(input_data_single const &data, parameter const ¶ms, METHOD f) { @@ -207,11 +207,11 @@ result_operation operation::similarityMatrix(input_data_single const &data, para return result; } -///Generates similarity matrix (not to confuse with distance matrix) by using dtw or lcss method internally. +///Generates similarity matrix (not to confuse with distance matrix) by using dtw or lcss methods internally. ///Use OpenMP for parallelization in order to optimize performance. -///@param[in] data data WITHOUT query part -///@param[in] params paramters -///@param[in] f function pointer for currently used method (dtw, lcss) +///@param[in] data input data WITHOUT query part +///@param[in] params parameters +///@param[in] f function pointer for used method (dtw, lcss) ///@return operation results result_operation operation::similarityMatrix_omp(input_data_single const &data, parameter const ¶ms, METHOD f) { @@ -243,10 +243,10 @@ result_operation operation::similarityMatrix_omp(input_data_single const &data, return result; } -///Generates similarity matrix (not to confuse with distance matrix) by using dtw or lcss method internally. -///@param[in] data data WITH query part -///@param[in] params paramters -///@param[in] f function pointer for currently used method (dtw, lcss) +///Generates similarity matrix (not to confuse with the distance matrix) by using dtw or lcss methods internally. +///@param[in] data input data WITH query part +///@param[in] params parameters +///@param[in] f function pointer for used method (dtw, lcss) ///@return operation results result_operation operation::similarityMatrix(input_data const &data, parameter const ¶ms, METHOD f) { @@ -259,47 +259,12 @@ result_operation operation::similarityMatrix(input_data const &data, parameter c for (int i = 0; i < (int)data.query.size(); i++) { cout << setw(5) << i + 1 << " "; - //double best = double_max; for (int j = 0; j < (int)data.input.size(); j++) { - /*double meanA = calcul::ts_mean(inputA[i]); - double meanB = calcul::ts_mean(inputB[j]); - - double coef = meanA / meanB; - - vtr2<double> inputBcoef; - if (params.isZNormalization()) { - inputBcoef = help::normalize(inputB[j], coef);*/ - - //double tmp; - /*if (params.lowerBound) { - tmp = calcul::lb_keogh(inputA[i], inputB[j], params); - - if (tmp < best) { - tmp = f(inputA[i], inputB[j], params); - - if (tmp < best) - best = tmp; - cout << "!"; - } - } - else { - tmp = f(inputA[i], inputB[j], params); - cout << "!!"; - }*/ - - - //tmp = f(inputA[i], inputBcoef, params); - /* if(params.isDebugInfo()) - cout << print::timeSeries(inputBcoef) << endl; - } - else*/ - input_method input(data, i, j); input_info info(i, j); - auto tmp = f(input, info, params)/*[params.scoreType]*/; - + auto tmp = f(input, info, params); matrix[i][j] = tmp.score; if (params.scoreType < 2) @@ -316,11 +281,11 @@ result_operation operation::similarityMatrix(input_data const &data, parameter c return result; } -///Generates similarity matrix (not to confuse with distance matrix) by using dtw or lcss method internally. +///Generates similarity matrix (not to confuse with the distance matrix) by using dtw or lcss methods internally. ///Use OpenMP for parallelization in order to optimize performance. -///@param[in] data data WITH query part +///@param[in] data input data WITH query part ///@param[in] params paramters -///@param[in] f function pointer for currently used method (dtw, lcss) +///@param[in] f function pointer for used method (dtw, lcss) ///@return operation results result_operation operation::similarityMatrix_omp(input_data const &data, parameter const ¶ms, METHOD f) { @@ -347,11 +312,11 @@ result_operation operation::similarityMatrix_omp(input_data const &data, paramet return result; } -///Calculates map, rank, recall and precision scores for input data by using ground truth informations (operation 3). -///Scores are calculated from internally generated similarity matrix. -///@param[in] data data WITHOUT query part +///Calculates map, rank, recall and precision scores for input data by using ground truth informations (operation 2). +///Scores are calculated from the internally generated similarity matrix. +///@param[in] data input data WITHOUT query part ///@param[in] params paramters -///@param[in] f function pointer for currently used method (dtw, lcss) +///@param[in] f function pointer for used method (dtw, lcss) ///@return operation results result_operation operation::clustering(input_data_single const &data, parameter const ¶ms, METHOD f) { @@ -401,12 +366,12 @@ result_operation operation::clustering(input_data_single const &data, parameter return result; } -///Calculates map, rank, recall and precision scores for input data by using ground truth informations (operation 3). -///Scores are calculated from internally generated similarity matrix. +///Calculates map, rank, recall and precision scores for input data by using ground truth informations (operation 2). +///Scores are calculated from the internally generated similarity matrix. ///Uses shifting in point comparison in order to improve results. -///@param[in] data data WITHOUT query part +///@param[in] data input data WITHOUT query part ///@param[in] params paramters -///@param[in] f function pointer for currently used method (dtw, lcss) +///@param[in] f function pointer for used method (dtw, lcss) ///@return operation results result_operation operation::clustering_shift(input_data_single const &data, parameter const ¶ms, METHOD f) { @@ -467,11 +432,11 @@ result_operation operation::clustering_shift(input_data_single const &data, para return result; } -///Calculates map, rank, recall and precision scores for input data by using ground truth informations (operation 4). -///Scores are calculated from internally generated similarity matrix. -///@param[in] data data WITH query part +///Calculates map, rank, recall and precision scores for input data by using ground truth informations (operation 2). +///Scores are calculated from the internally generated similarity matrix. +///@param[in] data input data WITH query part ///@param[in] params paramters -///@param[in] f function pointer for currently used method (dtw, lcss) +///@param[in] f function pointer for used method (dtw, lcss) ///@return operation results result_operation operation::clustering(input_data const &data, parameter const ¶ms, METHOD f) { @@ -506,12 +471,12 @@ result_operation operation::clustering(input_data const &data, parameter const & return result; } -///Calculates map, rank, recall and precision scores for input data by using ground truth informations (operation 4). -///Scores are calculated from internally generated similarity matrix. +///Calculates map, rank, recall and precision scores for input data by using ground truth informations (operation 2). +///Scores are calculated from the internally generated similarity matrix. ///Uses point shifting in order to improve results precision. ///@param[in] data data WITH query part ///@param[in] params paramters -///@param[in] f function pointer for currently used method (dtw, lcss) +///@param[in] f function pointer for used method (dtw, lcss) ///@return operation results result_operation operation::clustering_shift(input_data const &data, parameter const ¶ms, METHOD f) { @@ -541,10 +506,10 @@ result_operation operation::clustering_shift(input_data const &data, parameter c return result; } -///Calculates similarity matrix (not to confuse with distance matrix) by point shifting. -///@param[in] data data WITHOUT query part +///Calculates similarity matrix (not to confuse with the distance matrix) by point shifting. +///@param[in] data input data WITHOUT query part ///@param[in] params paramters -///@param[in] f function pointer for currently used method (dtw, lcss) +///@param[in] f function pointer for used method (dtw, lcss) ///@return operation results result_operation operation::similarityMatrix_shift_omp(input_data_single const &data, parameter const ¶ms, METHOD f) { @@ -568,9 +533,9 @@ result_operation operation::similarityMatrix_shift_omp(input_data_single const & } ///Calculates dtw method by using point shifting. -///@param[in] data data WITHOUT query part +///@param[in] data input data WITHOUT query part ///@param[in] params paramters -///@param[in] f function pointer for currently used method (dtw, lcss) +///@param[in] f function pointer for used method (dtw, lcss) ///@return operation results result_operation operation::dtw_shift(input_data_single const &data, parameter const ¶ms, METHOD f) { @@ -608,10 +573,10 @@ result_operation operation::dtw_shift(input_data_single const &data, parameter c return result; } -///Calculates similarity matrix (not to confuse with distance matrix) by point shifting. -///@param[in] data data WITH query part -///@param[in] params paramters -///@param[in] f function pointer for currently used method (dtw, lcss) +///Calculates similarity matrix (not to confuse with the distance matrix) by point shifting (parallelized by OpenMP). +///@param[in] data input data WITH query part +///@param[in] params parameters +///@param[in] f function pointer for used method (dtw, lcss) ///@return operation results result_operation operation::similarityMatrix_shift_omp(input_data const &data, parameter const ¶ms, METHOD f) { @@ -639,9 +604,9 @@ result_operation operation::similarityMatrix_shift_omp(input_data const &data, p } ///Operation for testing Keogh's lower bound optimization technique. EXPERIMENTAL -///@param[in] data data WITH query part +///@param[in] data input data WITH query part ///@param[in] params paramters -///@param[in] f function pointer for currently used method (dtw, lcss) +///@param[in] f function pointer for used method (dtw, lcss) ///@return operation results result_operation operation::query_omp(input_data const &data, parameter const ¶ms, METHOD f) { @@ -652,7 +617,8 @@ result_operation operation::query_omp(input_data const &data, parameter const &p int size = (int)(data.query.size() * data.input.size()); omp_set_num_threads(params.omp); -#pragma omp parallel for schedule(dynamic, 1) /*num_threads(params.threads)*//* proc_bind(spread)*/ + + #pragma omp parallel for schedule(dynamic, 1) /*num_threads(params.threads)*//* proc_bind(spread)*/ for (int g = 0; g < size; g++) { int i = g / (int)data.query.size(); @@ -980,4 +946,36 @@ result_operation operation::query_omp(input_data const &data, parameter const &p // //result.matrixSimilarity = matrix; // // return result_shifts; -//} \ No newline at end of file +//} +/*double meanA = calcul::ts_mean(inputA[i]); +double meanB = calcul::ts_mean(inputB[j]); + +double coef = meanA / meanB; + +vtr2<double> inputBcoef; +if (params.isZNormalization()) { +inputBcoef = help::normalize(inputB[j], coef);*/ + +//double tmp; +/*if (params.lowerBound) { +tmp = calcul::lb_keogh(inputA[i], inputB[j], params); + +if (tmp < best) { +tmp = f(inputA[i], inputB[j], params); + +if (tmp < best) +best = tmp; +cout << "!"; +} +} +else { +tmp = f(inputA[i], inputB[j], params); +cout << "!!"; +}*/ + + +//tmp = f(inputA[i], inputBcoef, params); +/* if(params.isDebugInfo()) +cout << print::timeSeries(inputBcoef) << endl; +} +else*/ \ No newline at end of file diff --git a/SequenceComparison/parameter.cpp b/SequenceComparison/parameter.cpp index 5f3590536fd06d788ca500338947f253e27e4706..616b19098be63a5247cfe4db43f0b5d55cdf7547 100644 --- a/SequenceComparison/parameter.cpp +++ b/SequenceComparison/parameter.cpp @@ -7,9 +7,9 @@ using namespace std; namespace fs = std::experimental::filesystem; -///Parse and process input command line parameters. -///@param[in] args vector of input arguments. -///@return input paramters in the form of parameter class. +///Parse and process input command line arguments. +///@param[in] args vector of input arguments +///@return input paramters in the form of parameter class void parameter::setParameters(vtr<string> const &args) { arguments = args; @@ -44,9 +44,9 @@ void parameter::setParameters(vtr<string> const &args) } } -///Returns mapped pairs: switch;value from input parameters. -///@param[in] args vector of input parametrs. -///@return map which contains switch;argument pairs. +///Maps pairs of switch;value from the input parameters. +///@param[in] args vector of input arguments +///@return map with switch;argument pairs map<string, string> parameter::mapParameters(vtr<string> const &args) { map<string, string> map; @@ -113,7 +113,7 @@ map<string, string> parameter::mapParameters(vtr<string> const &args) //} ///Enforces basic conditions necessary for parameter use. If conditions are not followed runtime error is thrown. -///@param[in] mapSetting map containing switch;argument pairs. +///@param[in] mapSetting map containing switch;argument pairs void parameter::checkParameters(map<string, string> const &mapSetting) { if (method != "dtw" && method != "lcss" && method != "pdtw") @@ -228,8 +228,8 @@ void parameter::checkUnknownParameters() throw runtime_error("invalid number of arguments for switch"); } -///Sets parsed input command line arguments. -///@param[in] mapSetting map containing switch;argument pairs. +///Sets parsed input command line arguments into the paramter class. +///@param[in] mapSetting map containing switch;argument pairs void parameter::useParameters(map<string, string> &mapSetting) { if (mapSetting.size() < 1) { @@ -252,8 +252,8 @@ void parameter::useParameters(map<string, string> &mapSetting) delta = mapSetting.count("-d") > 0 ? stof(mapSetting.at("-d")) : delta; omp = mapSetting.count("-omp") > 0 ? stoi(mapSetting.at("-omp")) : omp; precision = mapSetting.count("-p") > 0 ? stoi(mapSetting.at("-p")) : precision; - pre_normalize_by = mapSetting.count("-nby") > 0 ? stoi(mapSetting.at("-nby")) : pre_normalize_by; - pre_normalize_z = mapSetting.count("-nz") > 0 ? true : false; + pre_normalizeBy = mapSetting.count("-nby") > 0 ? stoi(mapSetting.at("-nby")) : pre_normalizeBy; + pre_normalizeZ = mapSetting.count("-nz") > 0 ? true : false; pre_reduce = mapSetting.count("-r") > 0 ? stoi(mapSetting.at("-r")) : pre_reduce; pre_paa = mapSetting.count("-paa") > 0 ? stoi(mapSetting.at("-paa")) : pre_paa; pre_sax = mapSetting.count("-sax") > 0 ? stoi(mapSetting.at("-sax")) : pre_sax; @@ -264,7 +264,7 @@ void parameter::useParameters(map<string, string> &mapSetting) pre_smooth = mapSetting.count("-smooth") > 0 ? stoi(mapSetting.at("-smooth")): 0; subsequence = mapSetting.count("-sub") > 0 ? stod(mapSetting.at("-sub")) : subsequence; //print = mapSetting.count("-print") > 0 ? mapSetting.at("-print") : params.print; - recall = mapSetting.count("-recall") > 0 ? stoi(mapSetting.at("-recall")) : recall; + //recall = mapSetting.count("-recall") > 0 ? stoi(mapSetting.at("-recall")) : recall; block = mapSetting.count("-block") > 0 ? stoi(mapSetting.at("-block")) : 0; distance = mapSetting.count("-dist") > 0 ? stoi(mapSetting.at("-dist")) : distance; scoreReversed = mapSetting.count("-reverse") > 0 ? true : false; @@ -308,7 +308,7 @@ void parameter::useParameters(map<string, string> &mapSetting) } } -///Updates loaded parameters by priority paramaeters. Used for script priority parameters. +///Updates loaded parameters by priority parameters. Used for script priority parameters. ///@param[in] args vector of input arguments ///@param[in] argsPriority vector of input priority (overwrites arguments values from args) arguments ///@return updated command line arguments of priority arguments @@ -356,7 +356,7 @@ bool parameter::isOmp() const return false; } -///@return TRUE if write output path is set. +///@return TRUE if output path for results is set. bool parameter::isWriteOutput() const { if (outputPath != "") @@ -437,10 +437,10 @@ bool parameter::isPassFlexible() const return false; } -///@return TRUE if normalization 01 switch is set +///@return TRUE if normalizationBy switch is set bool parameter::isNormalizeBy() const { - if (pre_normalize_by > 0) + if (pre_normalizeBy > 0) return true; return false; @@ -460,7 +460,7 @@ void parameter::printHelp() cout << "-out [output file path] - file name without ext (extern call not supported)" << endl; cout << "-draw [output file/folder path] - file or folder path for distance matrix graphic visualization (folder - names are generated for all matrices, file - name is reused/overwritten) (extern call not supported)" << endl; cout << "-dmin - draw minimums in distance matrix." << endl; - cout << "-gflex - draw flexible warping pass (see: -fw, -fd)." << endl; + cout << "-dflex - draw flexible warping pass (see: -fw, -fd)." << endl; cout << "-dsleep [ms] - sleep time after bmp file is written to the disc" << endl; cout << "-gdf [output file path] - file name without ext (extern call not supported)" << endl; cout << "-html [html output file path] - file name without ext (extern call not supported)" << endl; @@ -501,5 +501,5 @@ void parameter::printHelp() cout << "-script [file path [additional switches]] - path to script file, possible to use additional params which are applied to all script lines (overwrites same line parameters)." << endl; cout << " If switch is used multiple times last instance have higher priority." << endl; cout << "-help - prints help (overrides everything)" << endl; - cout << "note: order of switches is irrelevant (write them as they pop in your mind)." << endl; + cout << "note: order of switches is irrelevant." << endl; } \ No newline at end of file diff --git a/SequenceComparison/parameter.h b/SequenceComparison/parameter.h index 8efb3787f64857ef0db51d191058580b8f535094..4b95b1db1b5820b66c6900cd15926b5874367ca8 100644 --- a/SequenceComparison/parameter.h +++ b/SequenceComparison/parameter.h @@ -12,80 +12,79 @@ public: vtr<std::string> arguments; ///< Contains command line arguments //file input/output parameters - vtr<std::string> inPath; ///< Contains file input paths from which are loaded and parsed analyzed input data. switch: -in - vtr<std::string> inQuery; ///< Contains file input paths from which are loaded and parsed analyzed query input data. switch: -query - vtr<std::string> inKeyInput; ///< Contains file input paths from which are loaded and parsed analyzed secondary input data (used when distance 4: chord is used). switch: -in2 - vtr<std::string> inKeyQuery; ///< Contains file input paths from which are loaded and parsed analyzed secondary query input data (used when distance 4: chord is used). switch: -query2 - std::string inGroundTruthPath; ///< Contains ground truth file path. switch: -gt - std::string outputPath; ///< Contains path for output files. switch: -out - std::string outDraw; ///< Contains path for graphic output files. switch: -draw - bool drawMin; ///< if true: local minimums will be included in graphic visualization of the distance matrix. switch: -dmin - bool drawFlex; ///< if true: flexible warping pass will be included in graphic visualization of the distance matrix. switch: -dflex - int drawSleep; ///< sleep n milliseconds after picture is saved (written to disk) - bool forceSingle; ///< if true: input_data_single is used even if query data are specified (just for convenience). + vtr<std::string> inPath; ///< Contains file input paths from which are loaded and parsed analyzed input data (-in) + vtr<std::string> inQuery; ///< Contains file input paths from which are loaded and parsed analyzed query input data (-query) + vtr<std::string> inKeyInput; ///< Contains file input paths from which are loaded and parsed analyzed secondary input data (-dist 4, -in2) + vtr<std::string> inKeyQuery; ///< Contains file input paths from which are loaded and parsed analyzed secondary query input data (-dist 4, -query2) + std::string inGroundTruthPath; ///< Contains ground truth file path (-gt) + std::string outputPath; ///< Contains path for output files (-out) + std::string outDraw; ///< Contains path for graphic output files (-draw) + bool drawMin; ///< if true: local minimums will be included in graphic visualization of the distance matrix (-dmin) + bool drawFlex; ///< if true: flexible warping pass will be included in graphic visualization of the distance matrix (-dflex) + int drawSleep; ///< sleep n milliseconds after picture is written to the disk (-dsleep) + bool forceSingle; ///< if true: input_data_single is used even if query data are specified (-signle). - bool out_gdf; ///< if true: gdf output will be generated. switch: -gdf - bool out_html; ///< if true: html output will be generated. switch: -html + bool out_gdf; ///< if true: gdf output will be generated (-gdf) + bool out_html; ///< if true: html output will be generated (-html) //print output pararameters - int precision; ///< Sets floating precision for console outputs. - bool time; ///< if true: time measurements will be printed. switch: -time - bool printOutput; ///< if true: standard outputs will be printed. switch: -pout - bool debugInfo; ///< if true: more detailed information will be printed during operation. Used for debug purposes. switch: -debug + int precision; ///< Sets floating precision for console outputs (-p) + bool time; ///< if true: time measurements will be printed (-time) + bool printOutput; ///< if true: standard outputs will be printed (-pout) + bool debugInfo; ///< if true: more detailed information will be printed during operation. Used for debug purposes (-debug) - bool shift; ///< if true: during distance matrix generation point dimension are rotated. switch: -shift - bool scoreReversed; ///< if true: score should be reversed from 0,1 to 1,0. WARNING: not all scores are compatible with this (example: s1 (raw score)). - bool localAlignment; ///< if true: local alignment will be used for warping path search (Kocyan). + bool shift; ///< if true: during distance matrix generation point dimension are rotated (-shift) + bool scoreReversed; ///< if true: score should be reversed from 0,1 to 1,0. (-reverse) WARNING: not all scores are compatible with this + bool localAlignment; ///< if true: local alignment will be used for warping path search (Kocyan). //experimental options - int block; ///< Sets size of block for tilting optimization. EXPERIMENTAL - bool tmp_exp; ///< if true: temporal - bool experiment; ///< if true: run experimental code (development). EXPERIMENTAL - bool simd; ///< if true: SIMD instructions are used for distance matrix calculation. switch: -simd EXPERIMENTAL - bool memoization; ///< if true: memoization optimization technique will be used for distance matrix calculation. switch: -mem EXPERIMENTAL - bool lowerBound; ///< if true: Keogh's lower bound will be used for query operation (1,2). switch: -lb EXPERIMENTAL + int block; ///< Sets size of block for tilting optimization (-block) EXPERIMENTAL + bool tmp_exp; ///< if true: temporal + bool experiment; ///< if true: run experimental code (development). EXPERIMENTAL + bool simd; ///< if true: SIMD instructions are used for distance matrix calculation (-simd) EXPERIMENTAL + bool memoization; ///< if true: memoization optimization technique will be used for distance matrix calculation (-mem) EXPERIMENTAL + bool lowerBound; ///< if true: Keogh's lower bound will be used for query operation (-lb) EXPERIMENTAL - std::string method; ///< used method (dtw, lcss). switch: -m - int operation; ///< chosen operation type. switch: -op - int distance; ///< chosen distance function type. switch: -shift: -d "distance type" - int scoreType; ///< chosen score type. switch: -s "score type" - int ram; ///< Sets maximal RAM used for distance matrix. Default: 5GB. switch: -ram "MBs" - int clusters; ///< Sets number of clusters at which pdtw algorithm stops in the operation 5. switch: -clu "size"; - int ve_window; ///< Sets voting experts length per vote (Kocyan). switch: -velen "length" - int ve_smooth; ///< Set smoothing window width when segmenting time series by voting experts algorithm. switch: -smooth "size" + std::string method; ///< Specifies used method: dtw, lcss (-m) + int operation; ///< Specifies operation type (-op) + int distance; ///< Specifies distance function type (-dist) + int scoreType; ///< Specifies score type (-s) + int ram; ///< Sets maximal RAM used for distance matrix (-ram MBs, default: 5000MB) + int clusters; ///< Sets number of clusters at which pdtw algorithm stops in the operation 5 (-clu) + int ve_window; ///< Sets voting experts length per vote (Kocyan) (-velen) + int ve_smooth; ///< Set smoothing window width when segmenting time series by voting experts algorithm (-smooth) //result options - //std::string matrixDataType; ///< data type of nodes in distance matrix + //std::string matrixDataType; ///< data type of nodes in distance matrix - double w; ///< Sets warping windows width (dtw). switch: -w "size" - int fw; ///< Sets flexible warping pass width (Kocyan). switch: -fw "size" - int fd; ///< Sets flexibility parameter for flexible warping pass (Kocyan). switch: -fd "size" - double delta; ///< Sets delta paramter for lcss method. switch: -d "size" - double epsilon; ///< Sets epsilon parameter for lcss method. switch: -e "size" - double subsequence; ///< Sets minimal time series length difference (ratio) for subsequence to be used. switch: -sub "size" - double treshold_csi; ///< threshold: minimal tone value in chroma distance (chroma distance -d 3). switch: -tcsi "size" - double treshold_t; ///< threshold: maximal allowed threshold warping path value (Kocyan). switch: -tt "size" - double treshold_a; ///< threshold: maximal allowed average warping path value (Kocyan). switch: -ta "size" - double treshold_e; ///< threshold: maximal allowed warping path point value (Kocyan). switch: -te "size" - double treshold_l; ///< threshold: maximal allowed warping path length (Kocyan custom). switch: -tl "size" + double w; ///< Sets warping windows width for dtw method (-w size) + int fw; ///< Sets flexible warping pass width (Kocyan) (-fw size) + int fd; ///< Sets flexibility parameter for flexible warping pass (Kocyan) (-fd size) + double delta; ///< Sets delta paramter for lcss method (-d size) + double epsilon; ///< Sets epsilon parameter for lcss method (-e size) + double subsequence; ///< Sets minimal time series length difference (ratio) for subsequence to be used (-sub size) + double treshold_csi; ///< threshold: minimal tone value in chroma distance (-d 3, -tcsi size) + double treshold_t; ///< threshold: maximal allowed threshold warping path value (Kocyan) (-tt size) + double treshold_a; ///< threshold: maximal allowed average warping path value (Kocyan) (-ta size) + double treshold_e; ///< threshold: maximal allowed warping path point value (Kocyan) (-te size) + double treshold_l; ///< threshold: maximal allowed warping path length (Kocyan custom) (-tl size) - int recall; ///< first top x - int relax; ///< size of relaxation + //int recall; ///< first top x + int relax; ///< Specifies relaxation for dtw method: how much can start/end of alignment move from first/last cell of distance matrix (-relax size) - //data preprocessing options - int pre_paa; ///< if set: input time series will be reduced by piecewise aggregate approximation. switch: -paa "width" - int pre_reduce; ///< if set: input time series will be reduced by skipping elements for length reductions (2: every 2. element is taken, 3: every 3. el. is taken, ect..). switch: -r "skip size" - int pre_sax; ///< if set: input time series will be preprocessed by symbolic approximation. switch -sax "levels" - int pre_prolong; ///< if set: input time series will be prolonged by interlacing. switch: -pr "times" - int pre_smooth; ///< if set: input time series will be smoothed by moving window averages. switch: -smooth "window width" - bool pre_interpolate; ///< if true: input time series will be interpolated to the same length. switch: -i - bool pre_normalize_z; ///< if true: input time series will be z-normalized. switch: -nz - int pre_normalize_by; ///< if set: input time series will be 01-normalized. switch: -n01 "value by which to normalize" + //data preprocessing options + int pre_paa; ///< if set: input time series will be reduced by piecewise aggregate approximation (-paa "width") + int pre_reduce; ///< if set: input time series will be reduced by skipping elements for length reductions (-r skip, 2: every 2. element is taken, 3: every 3. el. is taken, ...) + int pre_sax; ///< if set: input time series will be preprocessed by symbolic approximation (-sax levels) + int pre_prolong; ///< if set: input time series will be prolonged by interlacing (-pr times) + int pre_smooth; ///< if set: input time series will be smoothed by moving window averages (-smooth size) + bool pre_interpolate; ///< if true: input time series will be interpolated to the same length (-i) + bool pre_normalizeZ; ///< if true: input time series will be z-normalized (-nz) + int pre_normalizeBy; ///< if set: input time series will be 01-normalized (-nby value) //parallelization options - int omp; ///< set number of threads for omp - int mpi; ///< set number of nodes ?? for mpi... NOT IMPLEMENTED + int omp; ///< sets number of threads used for similarity matrix parallelization. parameter() : arguments(), @@ -97,9 +96,9 @@ public: block(0), tmp_exp(false), experiment(false), simd(false), memoization(false), lowerBound(false), method("dtw"), operation(0), distance (1), scoreType(0), ram(5000), clusters(1), ve_window(0), ve_smooth(0), w(1), fw(0), fd(1), delta(100000), epsilon(1), subsequence(0), treshold_csi(0.07), treshold_t(10), treshold_a(10), treshold_e(10), treshold_l(3), - recall(10), relax(0), - pre_paa(1), pre_reduce(0), pre_sax(0), pre_prolong(0), pre_smooth(0), pre_interpolate(), pre_normalize_z(), pre_normalize_by(-1), - omp(1), mpi(1) + /*recall(10),*/ relax(0), + pre_paa(1), pre_reduce(0), pre_sax(0), pre_prolong(0), pre_smooth(0), pre_interpolate(), pre_normalizeZ(), pre_normalizeBy(-1), + omp(1) {}; void setParameters(vtr<std::string> const &args); diff --git a/SequenceComparison/parser.cpp b/SequenceComparison/parser.cpp index 60acff8704a81688f1674fdae5c802f7a683529c..b195fcbde8fa1a3dc3580b607beb344f76fad014 100644 --- a/SequenceComparison/parser.cpp +++ b/SequenceComparison/parser.cpp @@ -105,8 +105,8 @@ vector<string> parser::readFileByLine(string const &path) return input; } -///Parse data file to specific type. -///@tparam type into which will be data file parsed +///Parses data file to specific type. +///@tparam type into which will be data parsed ///@param[in] filePath input file path ///@return time series template <typename T> @@ -167,7 +167,7 @@ vtr2<T> parser::parseDataFile(std::string const &filePath) } ///Parse data from multiple input files to specific type. -///@tparam type to which will be data files parsed +///@tparam type to which will be data parsed ///@param[in] files input file paths ///@return time series template <typename T> @@ -217,7 +217,7 @@ template vtr3<double> parser::readData(vtr<std::string> const &files); ///Generates ground truth data from input file names and ground truth file (constains ids contained in file names). ///@param[in] fileNames input file paths ///@param[in] path ground truth file path -///@return ground truth +///@return ground truths input_groundTruth parser::parseGroundTruth(vtr<string> const &fileNames, string path) { input_groundTruth clusters; diff --git a/SequenceComparison/pdtw.cpp b/SequenceComparison/pdtw.cpp index a4622f039744a6f01f9dd2ff3ccd72bdb87b6366..c959ed5d9fcb47d4f66cee821c80d18d92d71c35 100644 --- a/SequenceComparison/pdtw.cpp +++ b/SequenceComparison/pdtw.cpp @@ -10,7 +10,7 @@ using namespace std; -///Entry point to pdtw function. Hierarchically clusters input time series based on calculated similarity matrix. +///Main entry point for pdtw function. Hierarchically clusters input time series based on the calculated similarity matrix. ///@param[in] input time series to be clustered by pdtw method ///@param[in] params parameters ///@return method results @@ -24,7 +24,7 @@ result_pdtw pdtw::main(vtr3<double> const &input, parameter const ¶ms) return alignment(input, params); } -///Hierarchically clusters input time series based on calculated similarity matrix +///Hierarchically clusters input time series based on the calculated similarity matrix ///@param[in] input time series to be clustered by pdtw method ///@param[in] params parameters ///@return method results @@ -62,7 +62,7 @@ result_pdtw pdtw::alignment(vtr3<double> const &input, parameter const ¶ms) return result; } -///Calculates similarity matrix which is used as base for hierarchically clustering. +///Calculates similarity matrix which is used as base for hierarchical clustering. ///@param[in] input time series to be clustered by pdtw method ///@param[in] params parameters ///@return similarity matrices (depends on how many score functions are defined) @@ -80,7 +80,7 @@ vtr3<double> pdtw::matrix_similarity(vtr3<double> const &input, parameter const return operation::similarityMatrix_omp(data, param, f).matrixSimilarity; } -///Searches similarity matrix for the most similarity pair of time series. +///Searches similarity matrix for the most similar pair of time series. ///@param[in] sm similarity matrix ///@param[in] scoreType score type (s1 ... sn) ///@param[in] scoreReversed if true: best similarity is away from zero (max) @@ -123,9 +123,9 @@ coordv pdtw::getBestSimilarity(vtr3<double> const &sm, int scoreType, bool score return close; } -///Updates similarity matrix after clustering occurs (merged time series/ clusters effect similarity matrix). +///Updates similarity matrix after clustering occurs (merges time series/clusters -> similarity matrix update). ///@param[in] sm similarity matrix -///@param[in] clusters TODO +///@param[in] clusters contains currently formed clusters ///@param[in] pair the most similar pair of time series/clusters ///@param[in] scoreType score type (s1 ... sn) ///@param[in] scoreReversed if true: best similarity is away from zero (max) diff --git a/SequenceComparison/preprocess.cpp b/SequenceComparison/preprocess.cpp index 3e5a66767df5269956e9bd006dd118952a2dd2e6..e1c3da579530559596178965a8f5cbfe10ff34a2 100644 --- a/SequenceComparison/preprocess.cpp +++ b/SequenceComparison/preprocess.cpp @@ -2,8 +2,8 @@ #include "preprocess.h" #include "calcul.h" -///Interpolates time series to same length. -///@param input input time series +///Interpolates time series to the same length. +///@param[in] input input time series template <typename T> void preprocess::interpolate(vtr3<T> &input) { @@ -59,8 +59,8 @@ void preprocess::interpolate(vtr3<T> &input) } template void preprocess::interpolate<double>(vtr3<double> &input); -///Interpolates time series to same length (fix for short time series). -///@param input input time series +///Interpolates time series to the same length (fix for short time series). +///@param[in] input input time series template <typename T> void preprocess::interpolate2(vtr3<double> &input) { @@ -108,9 +108,9 @@ void preprocess::interpolate2(vtr3<double> &input) } } -///Reduces length of input time series. -///@param input input time series -///@param ratio number by how many elements are time series averaged. +///Reduces lengths of time series (Piecewise Aggregate Approximation). +///@param[in] input input time series +///@param[in] ratio number how many times will be time series shorter template <typename T> void preprocess::paa(vtr3<T> &input, size_t ratio) { @@ -145,9 +145,9 @@ void preprocess::paa(vtr3<T> &input, size_t ratio) template void preprocess::paa<double>(vtr3<double> &input, size_t ratio); template void preprocess::paa<int>(vtr3<int> &input, size_t ratio); -///Alters input time series so they contains specific number of classes/levels. -///@param input input time series -///@param numClasses number of classes/levels +///Alters time series so they contains specific number of classes/levels (Symbolic aggregate approximation, for reducing time series length use -paa or -r). +///@param[in] input input time series +///@param[in] numClasses number of classes/levels void preprocess::sax(vtr3<double> &input, size_t numClasses) { double max = constant::MIN_double; @@ -188,9 +188,9 @@ void preprocess::sax(vtr3<double> &input, size_t numClasses) } } -///Reduces input time series by skipping elements. -///@param input input time series -///@param skip how many alignments will be skipped +///Reduces time series by skipping elements. +///@param[in] input input time series +///@param[in] skip how many elements will be skipped template <typename T> void preprocess::reduce(vtr3<T> &input, size_t skip) { @@ -207,9 +207,9 @@ void preprocess::reduce(vtr3<T> &input, size_t skip) template void preprocess::reduce<double>(vtr3<double> &input, size_t skip); template void preprocess::reduce<int>(vtr3<int> &input, size_t skip); -///Prolongs input time series by inserting average of neighbor elements. -///@param input input time series -///@param times how many times will be time series prolonged (ex: 1=len*2, 2=len*4, 3=len*8). +///Prolongs time series by inserting averages of neighbor elements. +///@param[in] input input time series +///@param[in] times how many times will be time series prolonged (ex: 1=len*2, 2=len*4, 3=len*8) template <typename T> void preprocess::prolong(vtr3<T> &input, size_t times) { @@ -247,9 +247,9 @@ void preprocess::prolong(vtr3<T> &input, size_t times) template void preprocess::prolong<double>(vtr3<double> &input, size_t times); template void preprocess::prolong<int>(vtr3<int> &input, size_t times); -///Smooths input time series by moving window. -///@param input input time series -///@param width size of moving window +///Smooths time series by moving window. +///@param[in] input input time series +///@param[in] width size of the moving window void preprocess::smooth(vtr3<double> &input, size_t width) { vtr3<double> output(input.size()); @@ -282,8 +282,8 @@ void preprocess::smooth(vtr3<double> &input, size_t width) input = output; } -///Normalize input time series by time series mean. -///@param input input time series +///Normalize time series by time series mean. +///@param[in] input input time series void preprocess::normalize(vtr3<double> &input) { for (size_t i = 0; i < input.size(); i++) //time series @@ -306,9 +306,9 @@ void preprocess::normalize(vtr3<double> &input) } } -///Normalize input time series by custom value. -///@param input input time series -///@param value value by which will be elements of time series divided +///Normalize time series by custom value. +///@param[in] input input time series +///@param[in] value value by which will be elements of the time series divided void preprocess::normalizeBy(vtr3<double> &input, double value) { for (size_t i = 0; i < input.size(); i++) @@ -324,8 +324,8 @@ void preprocess::normalizeBy(vtr3<double> &input, double value) } ///Normalize input time series by custom value. -///@param input input time series -///@param value value by which will be elements of time series divided +///@param[in] input input time series +///@param[in] value value by which will be elements of time series divided //vtr2<double> preprocess::normalize(vtr2<double> const &input, double coef) //{ // vtr2<double> output(input.size()); diff --git a/SequenceComparison/preprocess.h b/SequenceComparison/preprocess.h index 6cdaf3c8ef6337c2cd0471aaad9c328c9500d060..6dc5df6dc79f3a4746d1e45ef779ccb6d477f528 100644 --- a/SequenceComparison/preprocess.h +++ b/SequenceComparison/preprocess.h @@ -18,8 +18,8 @@ public: static void smooth(vtr3<double> &input, size_t width); static void normalize(vtr3<double> &input); - //static vtr2<double> normalize(vtr2<double> const &input, double coef); static void normalizeBy(vtr3<double> &input, double value); + //static vtr2<double> normalize(vtr2<double> const &input, double coef); }; #endif //PREPROCESS_H diff --git a/SequenceComparison/print.cpp b/SequenceComparison/print.cpp index 4b70a45f3a3b1a58218c70ab92a62e1333db3c56..b3ad9b1f232420386384fb967464728dd66b5160 100644 --- a/SequenceComparison/print.cpp +++ b/SequenceComparison/print.cpp @@ -1,17 +1,27 @@ #include "stdafx.h" #include "print.h" +#include <chrono> +#include <ctime> +#include <fstream> +#include <time.h> #include "parameter.h" #include "help.h" #include "cstruct.h" -#include <ctime> -#include <time.h> -#include <fstream> -#include <chrono> +#include "calcul.h" using namespace std; -///Prints program arguments. +///Prints input. +///@param[in] var string to be printed +template <typename T> +void print::printT(T const &var) +{ + cout << var; +} +template void print::printT<string>(string const &str); + +///Formats program arguments to a string. ///@param[in] args program arguments ///@return formated string string print::args(vtr<string> const &args) @@ -25,7 +35,7 @@ string print::args(vtr<string> const &args) return ss.str(); } -///Prints operation elapsed time. +///Formats elapsed time measurements during operation to a string. ///@param[in] time elapsed time measurements ///@return formated string string print::timeMeasures(result_time const &time) @@ -39,10 +49,10 @@ string print::timeMeasures(result_time const &time) return ss.str(); } -///Prints input time series. +///Formats time series to a string. ///@param[in] input time series to print ///@param[in] howMany number of input time series to print -///@param[in] isPair true if pair operation is invoked (only two input time series) +///@param[in] isPair if true: prints only first two time series (pair operation) ///@return formated string string print::input(vtr3<double> const &input, size_t howMany, bool isPair) { @@ -71,7 +81,7 @@ string print::input(vtr3<double> const &input, size_t howMany, bool isPair) return ss.str(); } -//Prints program arguments. +//Formats program arguments to a string. //@params[in] args program arguments //@return formated string //string print::tseries(vtr2<double> const &input) @@ -93,7 +103,7 @@ string print::input(vtr3<double> const &input, size_t howMany, bool isPair) // return ss.str(); //} -///Prints statistics about input time series. +///Formats calculated statistics about input time series to a string. ///@param[in] input time series to print ///@param[in] howMany number of input time series to print ///@return formated string @@ -101,15 +111,9 @@ string print::inputStats(vtr3<double> const &input, size_t howMany = 20) { stringstream ss; - for (size_t i = 0; i < input.size(); i++) //list of sequences + for (size_t i = 0; i < std::min(howMany, input.size()); i++) { - double mean = 0; - ss << i + 1 << " "; - for (size_t j = 0; j < std::min(howMany, input[i].size()); j++) //standalone s - { - mean += input[i][j][0]; - } - ss << "length:\t" << input[i].size() << "\tmean: " << mean / input[i].size() << endl; + ss << "length: " << input[i].size() << "\tmean: " << calcul::vtr_mean(calcul::vtr_mean(input[i])) << endl; } return ss.str(); @@ -129,7 +133,7 @@ void print::write(string const &output, string path, bool append = false ) f.close(); } -///Prints distance matrix values to html format. +///Formats distance matrix values to html/string format. ///@param[in] m distance matrix ///@return formated string string print::html_matrix(vtr2<node> const &m) @@ -159,13 +163,13 @@ string print::html_matrix(vtr2<node> const &m) return ss.str(); } -///Prints cluster information to gdf file. +///Formats cluster information to gdf/string format. ///@param[in] files input file names -///@param[in] input file names -///@param[in] similarity matrix -///@param[in] clusters ground truth +///@param[in] input set of time series +///@param[in] sm similarity matrix +///@param[in] clusters ground truth informations ///@return formated string -string print::gdf(vtr<string> const &files, vtr3<double> const &input, vtr2<double> const &similarity, input_groundTruth const &clusters) +string print::gdf(vtr<string> const &files, vtr3<double> const &input, vtr2<double> const &sm, input_groundTruth const &clusters) { stringstream ss; @@ -181,10 +185,6 @@ string print::gdf(vtr<string> const &files, vtr3<double> const &input, vtr2<doub ss << ",'" << colorString(clusters.getClusterID(i + 1)) << "',10,"; - //ss << ",'0,0,0'," << 10 << ","; /*input[i].size()*/; //color,width,meta1,meta2 - - //ss << "0,"; //meta 0 - ss << clusters.getClusterID(i + 1) << "|" << name << ","; //sequence; @@ -198,11 +198,11 @@ string print::gdf(vtr<string> const &files, vtr3<double> const &input, vtr2<doub ss << "edgedef>node1 VARCHAR, node2 VARCHAR, weight DOUBLE, directed BOOLEAN, color VARCHAR" << endl; - for (size_t i = 0; i < similarity.size(); i++) + for (size_t i = 0; i < sm.size(); i++) { - for (size_t j = 0; j < similarity[i].size(); j++) + for (size_t j = 0; j < sm[i].size(); j++) { - ss << i + 1 << "," << j + 1 << "," << similarity[i][j] << ",false"; + ss << i + 1 << "," << j + 1 << "," << sm[i][j] << ",false"; ss << "," << "'0,0,0'"/*help::GetColorString((int)i + 1)*/ << endl; } @@ -211,7 +211,7 @@ string print::gdf(vtr<string> const &files, vtr3<double> const &input, vtr2<doub return ss.str(); } -///Prints similarity matrix. +///Formats similarity matrix to a string. ///@tparam matrix data type ///@param[in] m 2d matrix ///@param[in] params parameters @@ -239,8 +239,8 @@ std::string print::matrix(vtr2<T> const &m, parameter const ¶ms) template string print::matrix(vtr2<int> const &simM, parameter const ¶ms); template string print::matrix(vtr2<double> const &simM, parameter const ¶ms); -///Prints pdtw clustering tree. In other words prints order in which were clustered time series clustered. -///@param[in] tree indexes of clustered time series in matrix (example: 0,1;2,5;0,2: 1. merged ts at 0,1 idxs 2. 2,5 3. merge of previously merged pair (merge result is saved in to lower index..so in step 3. are merged previously merged ts). +///Formats pdtw clustering tree to a string. Order in which were time series clustered. +///@param[in] tree indexes of clustered time series (example: 0,1;2,5;0,2: 1. merged ts at 0,1 idxs 2. 2,5 3. merge of previously merged pair (merge result is saved in to the lower index). ///@return formated string string print::tree(vtr<coordv> const &tree) { @@ -254,9 +254,9 @@ string print::tree(vtr<coordv> const &tree) return ss.str(); } -///Prints path shape in matrix. +///Formats path shape in matrix to a string. ///@param[in] path warping path -///@param[in] p coordinations of warping path points +///@param[in] p coordinations of warping path end (bottom right end, from where generation of warping path starts) ///@param[in] lenA length of time series A ///@param[in] lenB length of time series B ///@return formated string @@ -301,7 +301,7 @@ string print::warping(string const &path, coord p, size_t lenA, size_t lenB) return ss.str(); } -///Prints cluster matrix (generated in operation 3 and 4) in html format. +///Formats cluster matrix (generated in operation 2) to a html/string format. ///@param[in] input input time series ///@param[in] order matrix containing result clusters ///@param[in] clusters ground truth clusters @@ -334,8 +334,8 @@ string print::html_clusters(vtr3<double> const &input, vtr2<int> const &order, i return ss.str(); } -///Prints color -///@param[in] id index of color in array. +///Formats color to a string. +///@param[in] id index of color in an array ///@return formated string string print::colorString(size_t id) { @@ -351,8 +351,8 @@ string print::colorString(size_t id) return ss.str(); } -///Prints scores for operations 3 and 4. -///@param[in] result results from operation +///Formats scores of operations 2 to a string. +///@param[in] result operation results ///@param[in] precision floating point precision ///@return formated string string print::scores_clustering(result_operation const &result, int precision) @@ -386,7 +386,7 @@ string print::scores_clustering(result_operation const &result, int precision) return ss.str(); } -///Prints distance matrix (generated in dtw and lcss methods). +///Formats distance matrix (generated in dtw and lcss methods) to a string. ///@param[in] m distance matrix to be printed ///@return formated string string print::distanceMatrix(vtr2<node> const &m) @@ -406,7 +406,7 @@ string print::distanceMatrix(vtr2<node> const &m) return ss.str(); } -///Prints vector. +///Formats vector to a string. ///@tparam T data type of vector ///@param[in] vector vector to be printed ///@return formated string @@ -424,7 +424,7 @@ std::string print::vector(vtr<T> const &vector) template string print::vector(vtr<double> const &vector); template string print::vector(vtr<string> const &vector); -///Prints 2D vector. +///Formats 2D vector. ///@tparam data type of vector ///@param[in] vector vector to be printed ///@return formated string diff --git a/SequenceComparison/print.h b/SequenceComparison/print.h index 2f53a24163b42688e58e427e4f8ebc22befcddb5..ab90f0d7f50bbaedee15aa1b336d6ce3124c93c5 100644 --- a/SequenceComparison/print.h +++ b/SequenceComparison/print.h @@ -8,7 +8,9 @@ ///Contains print and write functions. class print { -public: +public: + template <typename T> static void printT(T const &var); + static std::string args(vtr<std::string> const &args); static std::string timeMeasures(result_time const &time); @@ -26,7 +28,7 @@ public: template <typename T> static std::string vector(vtr<T> const &vector); //template <typename T> static std::string vector(vtr2<T> const &vector); - static std::string gdf(vtr<std::string> const &files, vtr3<double> const &input, vtr2<double> const &similarity, input_groundTruth const &clusters); + static std::string gdf(vtr<std::string> const &files, vtr3<double> const &input, vtr2<double> const &sm, input_groundTruth const &clusters); static std::string html_clusters(vtr3<double> const &input, vtr2<int> const &order, input_groundTruth const &clusters); static std::string html_matrix(vtr2<node> const &m); diff --git a/SequenceComparison/structs.h b/SequenceComparison/structs.h index 5854b9618ae7db2647e9bfdf775b56cada43fed4..cb7c22567cd5844efba26720acd0ffdb59d447cf 100644 --- a/SequenceComparison/structs.h +++ b/SequenceComparison/structs.h @@ -38,22 +38,22 @@ ///@details All distance matrices are build from these nodes. class node{ public: - double value; ///< distance matrix value + double value; ///< distance matrix node value ///default constructor - node() : value(constant::MAX_double/*std::numeric_limits<double>::max()*/) {} + node() : value(constant::MAX_double) {} ///initialization constructor ///@param[in] value initialization value node(double value) : value(value) {} }; -///Contains value of the distance matrix cell/node. EXPRIMENTAL +///Contains value of the distance matrix cell/node. EXPERIMENTAL ///@tparam T type in which will be distance matrix allocated in memory. template<class T> struct node2 { - T value; ///< distance matrix value + T value; ///< distance matrix node value int pathSize; ///< length of the warping path - vtr<bool> pa; ///< warping path + vtr<bool> path; ///< warping path //std::string path; ///default constructor @@ -196,7 +196,7 @@ struct node2 { // } //}; -///Contains elapsed time measurements during various operation phases. +///Contains elapsed time measurements during various application phases. struct result_time { long long parsing = 0; ///< time elapsed during data parsing phase (ms) @@ -205,7 +205,7 @@ struct result_time long long write = 0; ///< time elapsed during creation of log (ms) }; -///Contains coordination of node in matrix. +///Contains coordination of the node in matrix. struct coord { int row; ///< y coordination int col; ///< x coordination @@ -236,11 +236,11 @@ struct coord { ///Contains matrix coordinations and value united with these coordinations. struct coordv { - int row; ///< y coordination - int col; ///< x coordination + int row; ///< y coordination + int col; ///< x coordination double value; ///< value on this coordinations - ///default constructor + ///Default constructor coordv() : row(0), col(0), value(0) {} ///Initialization constructor for int type ///@param[in] row_ matrix row/y coordinations @@ -248,11 +248,11 @@ struct coordv { coordv(int row_, int col_) : row(row_), col(col_) {} }; -///Contains results of PDTW method. +///Contains results of the PDTW method. struct result_pdtw { double scoreNorm = 0; ///< final time series similarity score - vtr<coordv> tree; ///< Contains order in which are time series clustered. Cluster with higher index is unit with lower cluster (cluster with lower index is new unit cluster) + vtr<coordv> tree; ///< Contains order in which are time series clustered. Cluster with higher index is unit with lower cluster (cluster with lower index is new united cluster) vtr2<int> clusters; ///< contains formed clusters }; @@ -271,17 +271,17 @@ struct couple2 { couple2(T first_, T second_) : first(first_), second(second_) {} }; -///Contains warping path generated above accumulated distance matrix. +///Contains warping path generated above the accumulated distance matrix. struct result_path { std::string path; ///< generated warping path (M, U, L) vtr<coord> pathCoords; ///< coordinations of generated warping path vtr<double> values; ///< values of generated warping path - double scoreRaw = 0; ///< raw score (unmodified score from distance matrix) + double scoreRaw = 0; ///< raw score (unmodified final score from distance matrix, last cell) coord start; ///< start coordinations of the warping path in the distance matrix (upper left end of the warping path) - coord end; ///< end coordinations of the warping path in the distance matrix (bottom right end of the warping path) + coord end; ///< end coordinations of the warping path in the distance matrix (bottom right end of the warping path, start of the generation) //vtr<coord> convert_toCoords() const //{ @@ -306,7 +306,7 @@ struct result_path //} }; -///Contains results for dtw method. +///Contains results for the dtw method. struct result_dtw { vtr<double> score; ///< dtw score (s1 - s5) //vtr<coord> minims; ///< @@ -317,8 +317,8 @@ struct result_dtw { ///Contains range. struct range { - int start; ///< start (included) - int end; ///< end (included) + int start; ///< start (included) + int end; ///< end (included) ///Default constructor range() : start(0), end(0) {} @@ -330,7 +330,7 @@ struct range { ///@return range length size_t size() { - return end - start + 1; + return (end - start) + 1; } }; @@ -341,8 +341,9 @@ struct input_files { vtr<std::string> keyInput; ///< secondary input files paths vtr<std::string> keyQuery; ///< secondary query files paths - ///@return extracted file name from full path. + ///@param[in] idx position index of the file path + ///@return extracted file name from the full path. std::string get_inputName(size_t idx) const { size_t cut = input[idx].find_last_of("\\/") + 1; @@ -381,8 +382,8 @@ struct input_files { // return name; //} - ///Sorts paths of input files in order to assure that order of parsed time series are same for both Windows and Linux. - ///Depends on the theory that sort function works identically with Windows and Linux compilers. + ///Sorts paths of the input files in order to assure that order of the parsed time series is same for both Windows and Linux. + ///Depends on the theory that sort function works identically across Windows/Linux and different compilers. void sort() { std::sort(query.begin(), query.end()); @@ -395,16 +396,16 @@ struct input_files { ///Contains cluster ground truth informations for specific time series. struct clusterValue { - int idCluster = 0; ///< ground truth cluster id (index of the ground truth cluster into which concrete time series belongs) TODO: check - std::string seriesId = ""; ///< time series id which is substring of name (example: "A5090") + int idCluster = 0; ///< ground truth cluster id (index of the ground truth cluster into which specific time series belongs) + std::string seriesId = ""; ///< time series id which is substring of the name (example: "A5090") std::string seriesName = ""; ///< entire name of time series (example: "A5090-key-vectors-flat.txt") }; -///Contains clusters ground truth informations for analyzed time series (operation 3 and 4). +///Contains clusters ground truth informations for analyzed time series (operation 2). struct input_groundTruth { - std::map<int, clusterValue> ids; ///< matches time series id (inner) with ground truth informations - std::map<int, int> size; ///< matches time series id (inner) with the size of the cluster to which it belongs + std::map<int, clusterValue> ids; ///< contins pairs: time series id (inner id: id is sem as index in input vector after sorting) and ground truth cluster id + std::map<int, int> size; ///< contins pairs: series id (inner) and size of the cluster to which it belongs ///@return ground truth cluster id ///@param[in] idx index (inner) of the time series under which it can be found in the vector container (index position) @@ -414,14 +415,14 @@ struct input_groundTruth } ///@return size of the ground truth cluster - ///@param[in] idx index (inner) of the time series under which it can be found in vector container (index position) + ///@param[in] idx index (inner) of the time series under which it can be found in the vector container (index position) int getClusterSize(size_t idx) const { return size.at(ids.at((int)idx).idCluster); } ///@return string id (part of the time series name) - ///@param[in] idx index (inner) of the time series under which it can be found in vector container (index position) + ///@param[in] idx index (inner) of the time series under which it can be found in the vector container (index position) std::string getID_str(size_t idx) const { return ids.at((int)idx).seriesId; @@ -431,22 +432,22 @@ struct input_groundTruth ///Contains input data for operations accepting single variant of the input data (without query data). struct input_data_single { vtr3<double> input; ///< set of time series - vtr3<int> key; ///< secondary set of input time series used when distance type parameter is set to 4 (chord distance) - input_groundTruth clusters; ///< ground truth info + vtr3<int> key; ///< secondary set of input time series (-dist 4, chord distance) + input_groundTruth clusters; ///< ground truth inforomations input_files files; ///< file names for all 4 types of inputs }; -///Contains input data for operation with secondary inputs (with query data). +///Contains input data for the operation with secondary inputs (with query data). struct input_data { vtr3<double> input; ///< set of time series - vtr3<double> query; ///< set of time series which are used in as a query in operation 4 - vtr3<int> keyInput; ///< secondary set of input time series used when distance type parameter is set to 4 (chord distance) - vtr3<int> keyQuery; ///< secondary set of input query time series used by operation 4 and when distance type is set to 4 - input_groundTruth clusters; ///< ground truth info + vtr3<double> query; ///< set of time series (-op 2, -query path) + vtr3<int> keyInput; ///< secondary set of input time series (-dist 4, chord distance) + vtr3<int> keyQuery; ///< secondary set of input query time series (-op 2, -dist 4) + input_groundTruth clusters; ///< ground truth informations input_files files; ///< file names for all 4 types of inputs - ///Converts input data of type input_data to input_data_single type. - ///@return converted input data from secondary input to + ///Converts input data of the type input_data to the input_data_single type. + ///@return converted input data from secondary input to single variation input_data_single convert_inputDataSingle() const { input_data_single single; @@ -572,8 +573,8 @@ struct input_method_single_ref { ///Contains informations about input data (time series). struct input_info { - size_t idxA; ///< time series A index - size_t idxB; ///< time series B index + size_t idxA; ///< time series A index (inner id) + size_t idxB; ///< time series B index (inner id) std::string nameA; ///< time series A name std::string nameB; ///< time series B name @@ -587,8 +588,8 @@ struct input_info { idxB = idxB_; }; ///Initialization constructor for indexes and names. - ///@param[in] idxA_ time series index - ///@param[in] idxB_ time series index + ///@param[in] idxA_ time series index (inner id) + ///@param[in] idxB_ time series index (inner id) ///@param[in] nameA_ time series name ///@param[in] nameB_ time series name input_info(size_t idxA_, size_t idxB_, std::string nameA_, std::string nameB_) { @@ -642,8 +643,8 @@ struct color { ///Contains operation results. struct result_operation { - vtr3<double> matrixSimilarity; ///< similarity matrix output of operation 1 and others if used internally (pre sorted, not to confuse with distance matrix - vtr3<int> matrixCluster; ///< cluster matrix used for calculation of mean scores in operation 3 and 4 + vtr3<double> matrixSimilarity; ///< similarity matrix output (not to confuse with distance matrix) + vtr3<int> matrixCluster; ///< cluster matrix used for calculation of mean scores (-op 2) vtr2<double> scoreAveragePrecisions; ///< average precisions (row) vtr2<double> scoreAverageRanks; ///< average ranks (row) vtr2<double> scorePrecisions; ///< average precisions (row) @@ -655,8 +656,8 @@ struct result_operation result_dtw dtw; ///< result from dtw method result_time time; ///< time measurements - ///Initialize cluster matrix. - ///@param[in] depth number of score types (currently 5 (s1 - s5) for dtw and 3 for lcss) + ///Initializes cluster matrix. + ///@param[in] depth number of score types (currently 5 (s1 - s5) for dtw and 4 for lcss) void init_clusterMatrix(size_t depth) { matrixCluster = help::vtr_init<int>(matrixSimilarity.size(), matrixSimilarity[0].size(), matrixSimilarity[0][0].size()); diff --git a/SequenceComparison/veSegment.cpp b/SequenceComparison/veSegment.cpp index 4e5a00a386c73dac0a2317f248fa35c4e64041af..e83868934777dc1b7bfa68bdb9fe86e4d47a2474 100644 --- a/SequenceComparison/veSegment.cpp +++ b/SequenceComparison/veSegment.cpp @@ -2,10 +2,10 @@ #include "veSegment.h" #include "veTree.h" -///Builds ngram tree, votes, smooths votes and based on these votes segments input time series into meaningful sub time series. -///@param[in] input segmented time series +///Wraps building of ngram tree, experts voting, smoothing and time series segmenting. +///@param[in] input time series ///@param[in] win voting experts window/word size -///@param[in] smooth moving window size for voting experts votes +///@param[in] smooth moving window size for voting experts votes smoothing ///@return segmented time series vtr3<double> veSegment::getSegments(vtr2<double> const &input, int win, int smooth) { @@ -45,9 +45,9 @@ vtr3<double> veSegment::getSegments(vtr2<double> const &input, int win, int smoo } ///Smooths voting experts votes. -///@param[in] votes to be smoothed -///@param[in] win moving window size (window is symmetric, win: 1 = average from 3 cells) -///@return smoothed voting experts votes +///@param[in] votes voting experts votes to be smoothed +///@param[in] win moving window size (window is symmetric, win=1=average from 3 cells, win=2=average from 5 cells) +///@return smoothed voting experts votes0 vtr<int> veSegment::smoothVotes(vtr<int> votes, int win) { vtr<int> smoothed(votes.size()); @@ -64,12 +64,11 @@ vtr<int> veSegment::smoothVotes(vtr<int> votes, int win) return smoothed; } -///Segments time series bases on input votes. -///@param[in] tseries time series to be segmented +///Segments time series based on the voting experts votes. +///@param[in] series time series to be segmented ///@param[in] votes voting experts votes -//@param[in] win voting experts voting window size -///@return Segmented time series -vtr3<double> veSegment::getSegmenetedSerie(vtr2<double> const &tseries, vtr<int> const &votes) +///@return segmented time series +vtr3<double> veSegment::getSegmenetedSerie(vtr2<double> const &series, vtr<int> const &votes) { vtr3<double> segments; vtr<int> breaksIdx; @@ -82,18 +81,18 @@ vtr3<double> veSegment::getSegmenetedSerie(vtr2<double> const &tseries, vtr<int> breaksIdx.push_back((int)i); if (breaksIdx.size() == 0) { - segments.push_back(tseries); + segments.push_back(series); return segments; } - vtr2<double> tmp(tseries.begin(), tseries.begin() + breaksIdx[0] + 1); + vtr2<double> tmp(series.begin(), series.begin() + breaksIdx[0] + 1); segments.push_back(tmp); for (size_t i = 0; i < breaksIdx.size() - 1; i++) { - tmp = vtr2<double>(tseries.begin() + breaksIdx[i] + 1, tseries.begin() + breaksIdx[i + 1] + 1); + tmp = vtr2<double>(series.begin() + breaksIdx[i] + 1, series.begin() + breaksIdx[i + 1] + 1); segments.push_back(tmp); } - tmp = vtr2<double>(tseries.begin() + breaksIdx[breaksIdx.size() - 1] + 1, tseries.end()); + tmp = vtr2<double>(series.begin() + breaksIdx[breaksIdx.size() - 1] + 1, series.end()); segments.push_back(tmp); return segments; diff --git a/SequenceComparison/veSegment.h b/SequenceComparison/veSegment.h index a475e9ae8ed701ae82ef2b24e3c0b7030db31a37..ac9addc6a2401d97cc340282ad35c211f560be6a 100644 --- a/SequenceComparison/veSegment.h +++ b/SequenceComparison/veSegment.h @@ -8,7 +8,7 @@ class veSegment { public: static vtr3<double> getSegments(vtr2<double> const &input, int win, int smooth); - static vtr3<double> getSegmenetedSerie(vtr2<double> const &tseries, vtr<int> const &votes); + static vtr3<double> getSegmenetedSerie(vtr2<double> const &series, vtr<int> const &votes); static vtr<int> smoothVotes(vtr<int> votes, int win); }; diff --git a/SequenceComparison/veTree.cpp b/SequenceComparison/veTree.cpp index 056d234c15e97d788b33e9c0b45076989ebccde7..7e74678cca0fa36b341b86144cef122522612be1 100644 --- a/SequenceComparison/veTree.cpp +++ b/SequenceComparison/veTree.cpp @@ -2,14 +2,14 @@ #include "veTree.h" #include "calcul.h" -///initialization constructor -///@param[in] tseries time series from which ngram tree is built. -///@param[in] depth sets how deep (number of layers) is tree. Deepness is determined by how long words (sub time series) are inserted into the tree). len(word) = depth -veTree::veTree(vtr2<double> const &tseries, int depth) : children(), stats(), value(), frequency(0), entropy(0) +///Initialization constructor. +///@param[in] series time series from which ngram tree is built +///@param[in] depth sets deepnes of the ngram tree (number of layers). Deepness is determined by length of the sub time series (word length = depth) +veTree::veTree(vtr2<double> const &series, int depth) : children(), stats(), value(), frequency(0), entropy(0) { - for (size_t i = 0; i < tseries.size() - (depth - 1); i++) + for (size_t i = 0; i < series.size() - (depth - 1); i++) { - vtr2<double> word(tseries.begin() + i, tseries.begin() + i + depth); + vtr2<double> word(series.begin() + i, series.begin() + i + depth); if (word.empty()) return; @@ -27,8 +27,8 @@ veTree::veTree(vtr2<double> const &tseries, int depth) : children(), stats(), va } ///Inserts word into the tree. -///@param[in] word inserted word (sub sequence of input time series) in to the tree -///@param[in] pos position of word point which will be saved in currently processed tree node +///@param[in] word inserted word (sub sequence of the input time series) in to the tree +///@param[in] pos position of the word element which will be saved in currently processed tree node ///@return tree node veTree veTree::push(vtr2<double> const &word, size_t pos) { @@ -45,8 +45,8 @@ veTree veTree::push(vtr2<double> const &word, size_t pos) } ///Increments already existing nodes. -///@param[in] word inserted word (sub sequence of input time series) in to the tree -///@param[in] pos position of word point which will be saved in currently processed tree node +///@param[in] word inserted word (sub sequence of input time series) into the tree +///@param[in] pos position of the word element which will be saved in currently processed tree node ///@return tree node void veTree::increment(vtr2<double> const &word, size_t pos) { @@ -62,8 +62,8 @@ void veTree::increment(vtr2<double> const &word, size_t pos) children[idx].increment(word, ++pos); } -///Searches all children for position of searched time series point. -///@return if found: position index of time series point. else: returns -1. +///Searches children for the input time series element. +///@return if found: index of child contaning time series point else: returns -1 int veTree::find(vtr<double> const &point) const { for (size_t i = 0; i < children.size(); i++) @@ -75,7 +75,7 @@ int veTree::find(vtr<double> const &point) const return -1; } -///Calculates entropy for entire tree. +///Calculates entropy for the entire tree. void veTree::obtainEntropy() { std::queue<veTree*> q; @@ -101,85 +101,8 @@ void veTree::obtainEntropy() } } -///Standardize ngram tree stats (frequency, entropy) in tree by: z = (sample - mean(sample)) / std(sample) -//void veTree::standardizeStats() -//{ -// std::queue<veTree*> q; -// q.push(this); -// -// int level = 0; -// int count = (int)this->childs.size(); -// int subLevelCount = 0; -// while (!q.empty()) -// { -// veTree *node = q.front(); -// q.pop(); -// -// if (count == 0) -// { -// count = subLevelCount; -// subLevelCount = 0; -// level++; -// } -// -// node->frequency = (node->frequency - this->stats[level].fMean) / this->stats[level].fStd; -// node->entropy = (node->entropy - this->stats[level].eMean) / this->stats[level].eStd; -// -// for (size_t i = 0; i < node->childs.size(); i++) { -// if (node->children[i].childs.size() != 0) { -// q.push(&node->children[i]); -// subLevelCount += (int)node->children[i].childs.size(); -// } -// count--; -// } -// } -//} -//void veTree::standardizeStats() -//{ -// std::queue<veTree*> q; -// q.push(this); -// -// vtr<double> lFrec; -// vtr<double> lEntr; -// -// while (!q.empty()) -// { -// veTree *node = q.front(); -// q.pop(); -// -// lFrec.push_back(node -> frequency); -// lEntr.push_back(node -> entropy); -// -// for (size_t i = 0; i < node->childs.size(); i++) { -// if (node->children[i].childs.size() != 0) { -// q.push(&node->children[i]); -// } -// } -// } -// -// double fMean = calcul::vtr_mean(lFrec); -// double eMean = calcul::vtr_mean(lEntr); -// double fStd = calcul::vtr_std(lFrec); -// double eStd = calcul::vtr_std(lEntr); -// -// while (!q.empty()) -// { -// veTree *node = q.front(); -// q.pop(); -// -// node->frequency = (node->frequency - fMean) / fStd; -// node->entropy = (node->entropy - eMean) / eStd; -// -// for (size_t i = 0; i < node->childs.size(); i++) { -// if (node->children[i].childs.size() != 0) { -// q.push(&node->children[i]); -// } -// } -// } -//} - -///traverse in level order g through ngram tree. -///@return vector of voting experts stats by level order traversing (line by line and by that order). +///Calculates ngram tree statistics by level order traversing (frequency: mean and std, entropy: mean and std). +///@return ngram tree statistics vtr<ve_LevelStats> veTree::getStats() { std::queue<veTree*> q; @@ -227,19 +150,19 @@ vtr<ve_LevelStats> veTree::getStats() return result; } -///Voting experts votes. -///@param[in] tseries voted time series +///Calculates voting experts votes. +///@param[in] series voted time series ///@param[in] winSize voting expert window size ///@return voting experts votes -vtr<int> veTree::getVotes(vtr2<double> const &tseries, int winSize) +vtr<int> veTree::getVotes(vtr2<double> const &series, int winSize) { - vtr<ve_nodeStats> votes(tseries.size() - 1); + vtr<ve_nodeStats> votes(series.size() - 1); - for (size_t i = 0; i < tseries.size() - (winSize - 1); i++) //calculates node stats for word nodes (for one node) - votes[i] = this->getNodeStats(tseries, this->stats, winSize, (int)i, 0); //word: ts sub of window size + for (size_t i = 0; i < series.size() - (winSize - 1); i++) //calculates node stats for word nodes (for one node) + votes[i] = this->getNodeStats(series, this->stats, winSize, (int)i, 0); //word: ts sub of window size - vtr<int> voteResult(tseries.size() - 1); - for (size_t i = 0; i < tseries.size() - (winSize); i++) + vtr<int> voteResult(series.size() - 1); + for (size_t i = 0; i < series.size() - (winSize); i++) { voteResult[i + votes[i].idxF]++; voteResult[i + votes[i].idxE]++; @@ -251,17 +174,17 @@ vtr<int> veTree::getVotes(vtr2<double> const &tseries, int winSize) return voteResult; } -///Calculates node stat. -///@param[in] tseries time series -///@param[in] stats ngram tree stats in level order traversing order +///Calculates node stats. +///@param[in] series time series +///@param[in] stats ngram tree stats ///@param[in] winSize voting experts window size ///@return voting experts node stats -ve_nodeStats veTree::getNodeStats(vtr2<double> const &tseries, vtr<ve_LevelStats> const &stat, int winSize, int pos, int depth) +ve_nodeStats veTree::getNodeStats(vtr2<double> const &series, vtr<ve_LevelStats> const &stat, int winSize, int pos, int depth) { if (frequency == 0) { - int idx = find(tseries[pos]); + int idx = find(series[pos]); if (idx != -1) { - auto tmp = children[idx].getNodeStats(tseries, stat, winSize, pos + 1, depth + 1); + auto tmp = children[idx].getNodeStats(series, stat, winSize, pos + 1, depth + 1); return tmp; } } @@ -275,10 +198,10 @@ ve_nodeStats veTree::getNodeStats(vtr2<double> const &tseries, vtr<ve_LevelStats if (winSize - depth > 0) { ve_nodeStats innerVote; - int idx = find(tseries[pos]); + int idx = find(series[pos]); if (idx != -1) { - innerVote = children[idx].getNodeStats(tseries, stat, winSize, pos + 1, depth + 1); + innerVote = children[idx].getNodeStats(series, stat, winSize, pos + 1, depth + 1); if (innerVote.frequency >= vote.frequency) { vote.frequency = innerVote.frequency; @@ -295,12 +218,87 @@ ve_nodeStats veTree::getNodeStats(vtr2<double> const &tseries, vtr<ve_LevelStats return vote; } - - ///@param[in] time series ///@param[in] voting experts window size ///@return votes for input time series //vtr<int> veTree::getVotes(vtr2<double> const &tseries, int win) //{ // return vote(tseries, stats, win); +//} + +///Standardize ngram tree stats (frequency, entropy) in tree by: z = (sample - mean(sample)) / std(sample) +//void veTree::standardizeStats() +//{ +// std::queue<veTree*> q; +// q.push(this); +// +// int level = 0; +// int count = (int)this->childs.size(); +// int subLevelCount = 0; +// while (!q.empty()) +// { +// veTree *node = q.front(); +// q.pop(); +// +// if (count == 0) +// { +// count = subLevelCount; +// subLevelCount = 0; +// level++; +// } +// +// node->frequency = (node->frequency - this->stats[level].fMean) / this->stats[level].fStd; +// node->entropy = (node->entropy - this->stats[level].eMean) / this->stats[level].eStd; +// +// for (size_t i = 0; i < node->childs.size(); i++) { +// if (node->children[i].childs.size() != 0) { +// q.push(&node->children[i]); +// subLevelCount += (int)node->children[i].childs.size(); +// } +// count--; +// } +// } +//} +//void veTree::standardizeStats() +//{ +// std::queue<veTree*> q; +// q.push(this); +// +// vtr<double> lFrec; +// vtr<double> lEntr; +// +// while (!q.empty()) +// { +// veTree *node = q.front(); +// q.pop(); +// +// lFrec.push_back(node -> frequency); +// lEntr.push_back(node -> entropy); +// +// for (size_t i = 0; i < node->childs.size(); i++) { +// if (node->children[i].childs.size() != 0) { +// q.push(&node->children[i]); +// } +// } +// } +// +// double fMean = calcul::vtr_mean(lFrec); +// double eMean = calcul::vtr_mean(lEntr); +// double fStd = calcul::vtr_std(lFrec); +// double eStd = calcul::vtr_std(lEntr); +// +// while (!q.empty()) +// { +// veTree *node = q.front(); +// q.pop(); +// +// node->frequency = (node->frequency - fMean) / fStd; +// node->entropy = (node->entropy - eMean) / eStd; +// +// for (size_t i = 0; i < node->childs.size(); i++) { +// if (node->children[i].childs.size() != 0) { +// q.push(&node->children[i]); +// } +// } +// } //} \ No newline at end of file diff --git a/SequenceComparison/veTree.h b/SequenceComparison/veTree.h index bbba1698e4412ce2ea0683d1de36255b28bcecf7..df67225fd8dc669f5b71f42c27873edabf71cb03 100644 --- a/SequenceComparison/veTree.h +++ b/SequenceComparison/veTree.h @@ -5,7 +5,7 @@ #include "templates.h" #include "calcul.h" -///Contains votes for node in the ngram tree (Kocyan). +///Contains votes for node in the ngram tree. struct ve_nodeStats { double frequency; ///< frequency of occurrence of current node double entropy; ///< entropy of current node @@ -34,7 +34,7 @@ struct ve_nodeStats { } }; -///Contains calculated stat from votes for node in the ngram tree (Kocyan). +///Contains node statistics. struct ve_LevelStats { double fMean; ///< mean frequency double fStd; ///< frequency standard deviation @@ -45,36 +45,32 @@ struct ve_LevelStats { ve_LevelStats() : fMean(0), fStd(0), eMean(0), eStd(0) {}; }; -///Contains ngram tree for voting experts algorithm (Kocyan). +///Building block of the ngram tree for voting experts algorithm. class veTree { private: vtr<veTree> children; ///< child of current node vtr<ve_LevelStats> stats; ///< node mean stats calculated from entire tree for node - vtr<double> value; ///< node value (word point) - double frequency; ///< node occurrence frequency - double entropy; ///< node entropy + vtr<double> value; ///< node value (word point) + double frequency; ///< node occurrence frequency + double entropy; ///< node entropy public: ///default constructor veTree() : children(), stats(), value(), frequency(0), entropy(0) {} ///initialization constructor - veTree(vtr2<double> const &tseries, int depth); + veTree(vtr2<double> const &series, int depth); - vtr<int> getVotes(vtr2<double> const &tseries, int win); + vtr<int> getVotes(vtr2<double> const &series, int win); private: int find(vtr<double> const &point) const; veTree push(vtr2<double> const &word, size_t pos); void increment(vtr2<double> const &word, size_t pos); - ve_nodeStats getNodeStats(vtr2<double> const &tseries, vtr<ve_LevelStats> const &stat, int winSize, int pos, int depth); + ve_nodeStats getNodeStats(vtr2<double> const &series, vtr<ve_LevelStats> const &stat, int winSize, int pos, int depth); vtr<ve_LevelStats> getStats(); - //vtr<int> vote(vtr2<double> const &tseries, vtr<ve_stats> const &stat, int winSize); void obtainEntropy(); - - //double getMean - //void standardizeStats(); }; #endif //VETREE_H diff --git a/unit/ut_dtw.cpp b/unit/ut_dtw.cpp index 1f8ad9bed18fa435b40ec3408f2a346f4f70a389..864d0d927847b7e2c322235b1387ecde59bb3c1b 100644 --- a/unit/ut_dtw.cpp +++ b/unit/ut_dtw.cpp @@ -128,7 +128,7 @@ TEST_CASE("DTW: warping equality") B[10][0] = 4; parameter params; - sdistance f_distance(1); + distancet f_distance(1); input_method input(A, B); input_info info(0, 1); @@ -307,7 +307,7 @@ TEST_CASE("DTW: no accumulation, diagonal zero check") input_method input(A, B); input_info info(0, 1); parameter p; - sdistance d(1); + distancet d(1); p.scoreType = 1; m = dtw::matrix_noaccumulation(input, d, p);