calcul.cpp

#include "stdafx.h"

#include "calcul.h"
#include <numeric>
#include <deque>
#include <bitset>
#include "cstruct.h"

using namespace std;

///Calculates distance between two vectors.
///@param[in] u element of the compared time series (n dimensional vector)
///@param[in] v element of the compared time series (n dimensional vector)
///@return Euclidean distance
double calcul::distance_dtw_euklid(vtr<double> const &u, vtr<double> const &v)
{
	double sum = 0;

	for (size_t i = 0; i < u.size(); i++)
	{		
		sum += pow(u[i] - v[i], 2);
	}

	return sum;
}

///Calculates distance between two vectors.
///@param[in] u element of the compared time series (n dimensional vector)
///@param[in] v element of the compared time series (n dimensional vector)
///@return Euclidean mean distance
//double calcul::distance_dtw_euklid_mean(vtr<double> const &u, vtr<double> const &v)
//{
//	double sum = 0;
//	for (size_t i = 0; i < u.size(); i++)
//	{
//		double ratio = 0;
//
//		if (u[i] > v[i])
//			ratio += 1 - v[i] / u[i];
//		else if (v[i] > u[i])
//			sum += 1 - u[i] / v[i];
//
//		sum += std::abs(1 - u[i] / (v[i] + 0.00000001));	
//	}
//
//	return sum;
//}

///Calculates distance between two vectors.
///@param[in] u element of the compared time series (n dimensional vector)
///@param[in] v element of the compared time series (n dimensional vector)
///@return Manhattan distance
double calcul::distance_dtw_manhattan(vtr<double> const &u, vtr<double> const &v)
{
	double sum = 0;

	for (size_t i = 0; i < u.size(); i++)
	{
		sum += abs(u[i] - v[i]);
	}

	return sum;
}

///Calculates chroma distance between two 12d vectors (specific data music data format needed).
///@param[in] u element of the compared time series (12d vector)
///@param[in] v element of the compared time series (12d vector)
///@param[in] threshold used for filtering insignificant tones in the input vectors
///@return Euclidean distance
double calcul::distance_dtw_csiChroma(vtr<double> const &u, vtr<double> const &v, double threshold /*def value in header*/)
{
	vtr<int> u_filter(u.size());
	vtr<int> v_filter(v.size());

	//binary filtering
	for (size_t i = 0; i < u.size(); i++)
	{
		if (u[i] > threshold)
			u_filter[i] = 1;

		if (v[i] > threshold)
			v_filter[i] = 1;
	}

	auto getRoot = [&scale = cstruct::scaleChord](vtr<double> const &vec) {
		int idx = 0;
		int min = constant::MAX_int;
		for (int i = 0; i < (int)scale.size(); i++) {
			int sum = 0;
			for (size_t j = 0; j < vec.size(); j++)
				sum += (int)std::abs(vec[j] - scale[i][j]);			//max value == 1 -> pow 2 -> abs

			if (sum < min) {
				min = sum;
				idx = i;
			}
		}
		return idx;
	};

	//search minimal distance between filtered vectors and minor/major scale
	//for (int i = 0; i < (int)scaleChord.size(); i++)
	//{
	//	int sumU = 0;
	//	int sumV = 0;
	//	for (size_t j = 0; j < u.size(); j++)
	//	{
	//		sumU += std::abs(u_filter[j] - scaleChord[i][j]);
	//		sumV += std::abs(v_filter[j] - scaleChord[i][j]);
	//		//max value == 1 -> pow 2 -> abs
	//	}

	//	if (sumU < minU)
	//	{
	//		minU = sumU;
	//		idxU = i;
	//	}

	//	if (sumV < minV)
	//	{
	//		minV = sumV;
	//		idxV = i;
	//	}
	//}

	auto idxU = getRoot(u);
	auto idxV = getRoot(v);

	//calculate result (zero u/v if minimal scale vectors == 1)
	double result = 0;
	for (size_t i = 0; i < u.size(); i++)
	{
		double tmpU;
		if (cstruct::scaleChord[idxU][i] == 1)
			tmpU = 0;
		else
			tmpU = u[i];

		double tmpV;
		if (cstruct::scaleChord[idxV][i] == 1)
			tmpV = 0;
		else
			tmpV = v[i];

		//result += std::abs(tmpU - tmpV);
		result += std::pow(tmpU - tmpV, 2);
	}

	return result;
}

///Calculates chord distance between two pairs of two 12d vectors.
///@param[in] u element of the compared time series (12d vector)
///@param[in] v element of the compared time series (12d vector)
///@param[in] uKey element of the compared key time series (12d vector)
///@param[in] vKey element of the compared key time series (12d vector)
///@return chord distance
double calcul::distance_dtw_csiChord(vtr<double> const  &u, vtr<double> const &v, vtr<int> const &uKey, vtr<int> const &vKey)
{
	auto getRoot = [](vtr<double> const &chord) {
		auto getShift = [](int idx) { return idx > 11 ? idx % 12 : idx; };
		for (int i = 0; i < (int)chord.size(); i++) {
			if (chord[i]) {
				if (chord[getShift(i + 7)]) {
					if (chord[getShift(i + 3)])
						return i;
					if (chord[getShift(i + 4)])
						return i;
				}
			}
		}
		return -1;
	};

	auto getIdxKey = [&scale = cstruct::scaleKey](vtr<int> const &vec) {
		int idx = 0;
		int min = constant::MAX_int;
		for (int i = 0; i < (int)scale.size(); i++) {
			int sum = 0;
			for (size_t j = 0; j < vec.size(); j++)
				sum += (int)std::abs(vec[j] - scale[i][j]);			//max value == 1 -> pow 2 -> abs

			if (sum < min) {
				min = sum;
				idx = i;
			}
		}
		return idx;
	};

	int idxU = getRoot(u);
	int idxV = getRoot(v);
	int idxUK = getIdxKey(uKey);
	int idxVK = getIdxKey(vKey);

	/*int shiftU = (idxU > 11) ? 3 : 4;
	if (shiftU == 3) idxU -= 12;

	int shiftV = (idxV > 11) ? 3 : 4;
	if (shiftV == 3) idxV -= 12;*/

	auto getPitch = [](vtr<double> const &vec, vtr<int> const &key, int idx) {
		vtr<bool> pitch(48);					//pitch space of 4 levels (level e is irrelevant)
		pitch[idx] = 1;							//level a root

		pitch[idx + 12] = 1;					//level b root (copy)
		pitch[12 + (idx + 7) % 12] = 1;			//level b 2.

		for (size_t i = 0; i < 12; i++) 		//add key to 4. level (level d) pitch spaces (entire key vector) 
			pitch[i + 24] = vec[i];

		for (size_t i = 0; i < 12; i++)			//copy 3. level to 4. 
			pitch[i + 36] = vec[i];

		for (size_t i = 0; i < 12; i++) {		//add key to 4. level (level d) pitch spaces (entire key vector) 
			if (key[i] == 1)
				pitch[i + 36] = key[i];
		}
		return pitch;
	};

	auto pitchU = getPitch(u, uKey, idxU/*, shiftU*/);			//pitch space of 4 levels (level e is irrelevant)
	auto pitchV = getPitch(v, vKey, idxV/*, shiftV*/);

	auto getCofDistance = [](int idx1, int idx2) {
		return cstruct::cofDistance.at(std::abs(idx1 - idx2));
	};

	auto dist1 = getCofDistance(idxU, idxV);		//distance between chord roots
	auto dist2 = getCofDistance(idxUK, idxVK);

	int dist3 = 0;									//calculation of pitch space distance
	for (size_t i = 0; i < 48; i++)
	{
		if (pitchU[i] != pitchV[i])
			dist3++;
	}

	/*int count = 0;
	for (size_t i = 0; i < pitchU.size() / 12; i++) {
		for (size_t j = 0; j < 12; j++)
		{
			if (pitchU[count++])
				cout << "1";
			else
				cout << "0";
		}
		cout << endl;
	}

	count = 0;
	for (size_t i = 0; i < pitchU.size() / 12; i++) {
		for (size_t j = 0; j < 12; j++)
		{
			if (pitchV[count++])
				cout << "1";
			else
				cout << "0";
		}
		cout << endl;
	}*/

	return dist1 + dist2 + dist3;
}

//Calculates sum of Euclidean distance between all pairs.
//@param[in] series time series
//@return sum of dtw distances
//double calcul::distance_dtw_many(vtr2<double> const &series)
//{
//	double sum = 0;
//
//	for (size_t i = 0; i < series.size(); i++)
//	{
//		for (size_t j = i + 1; j < series.size(); j++)
//		{
//			sum += distance_dtw_euklid(series[i], series[j]);
//		}
//	}
//
//	return sum;
//}

///Calculates lcss distance between two elements of the compared time series.
///@param[in] u element of the compared time series (n dimensional vector)
///@param[in] v element of the compared time series (n dimensional vector)
///@param[in] idx index
///@return lcss distance
double calcul::distance_lcss(vtr<double> const &u, vtr<double> const &v, int idx)
{
	if (u.size() == 0 || v.size() == 0)
		return 0;

	double sum = abs(u[idx] - v[idx]);

	return sum;
}

///Calculates dtw raw score s1 (last cell in the distance matrix).
///@param[in] scoreRaw raw score
///@return dtw raw score s1
//double calcul::score_dtw_s1(double scoreRaw)
//{
//	return scoreRaw;
//}

///Calculates dtw score s2 (s1 divided by warping path length)
///@param[in] scoreRaw raw score (last cell in the distance matrix)
///@param[in] pathLength length of the warping path
///@return dtw score s2
double calcul::score_dtw_s2(double scoreRaw, size_t pathLength)
{
	return sqrt(scoreRaw) / static_cast<double>(pathLength);
}

///Calculates dtw score s3.
///@param[in] lenA length of the time series A
///@param[in] lenB length of the time series B
///@param[in] pathLength length of the warping path
///@return dtw score s3
double calcul::score_dtw_s3(size_t lenA, size_t lenB, size_t pathLength)
{
	return 1 - (lenA / static_cast<double>(pathLength) + lenB / static_cast<double>(pathLength)) / 2;
}

///Calculates dtw score s4.
///@param[in] scoreRaw raw score (last cell in the distance matrix)
///@param[in] scoreRawMax maximal dtw score
///@return dtw score s4
double calcul::score_dtw_s4(double scoreRaw, double scoreRawMax)
{
	return (sqrt(scoreRaw) / sqrt(scoreRawMax));
}

///Calculates dtw score s5.
///@param[in] scoreRaw raw score (last cell in distance matrix)
///@param[in] scoreRawMax maximal dtw score
///@param[in] coefficient ratio of the compared time series lengths
///@return dtw score s5
double calcul::score_dtw_s5(double scoreRaw, double scoreRawMax, double coefficient)
{
	return (sqrt(scoreRaw) / sqrt(scoreRawMax)) * coefficient;
}

///Calculates maximal dtw score.
///@param[in] A time series A
///@param[in] B time series B
///@param[in] start coordinations of the warping path start (left upper end).
///@param[in] end coordinations of the warping path end (bottom right end)
///@return maximal raw dtw score
double calcul::score_dtw_max(vtr2<double> const &A, vtr2<double> const &B, coord start, coord end)
{
	double min = constant::MAX_double;
	double max = constant::MIN_double;
		
	size_t sA = 0;
	size_t eA = (int)A.size();
	size_t sB = 0;
	size_t eB = (int)B.size();
 
	if (A.size() < B.size()) {
		sB = start.col;
		eB = end.col;
	}

	if (B.size() < A.size()) {
		sA = start.row;
		eA = end.row;
	}

	for (size_t i = sA; i < eA; i++)
	{
		double sum = 0;
		for (size_t j = 0; j < A[i].size(); j++)	
		{
			sum += A[i][j];
		}

		if (min > sum)	
			min = sum;

		if (max < sum)		//added
			max = sum;
	}
	
	for (size_t i = sB; i < eB; i++)
	{
		double sum = 0;
		for (size_t j = 0; j < B[i].size(); j++)
		{
			sum += B[i][j];
		}

		if (min > sum)		//added
			min = sum;

		if (max < sum)
			max = sum;
	}

	return pow(max - min, 2) * std::max(eA - sA, eB - sB);
}

///Calculates input time series length ratio.
///@param[in] lenA length of the time series A
///@param[in] lenB length of the time series B
///@return length ratio
double calcul::lenRatio(size_t lenA, size_t lenB)
{
	if (lenA < lenB)
		return (double)lenA / lenB;
	else if (lenB < lenA)
		return (double)lenB / lenA;
	else
		return 1;
}

///Calculates ratio.
///@param[in] dividend
///@param[in] divisor
///@return ratio
double calcul::ratio(size_t dividend, size_t divisor)
{
	return dividend / (double)divisor;
}

///Calculates lcss score s1.
///@param[in] scoreRaw raw score (last cell in the distance matrix)
///@param[in] pathLength length of the warping path
///@return lcss score s1
double calcul::score_lcss_s1(double scoreRaw, size_t pathLength)
{
	return 1 - scoreRaw / static_cast<double>(pathLength);
}

///Calculates lcss score s2.
///@param[in] scoreRaw raw score (last cell in the distance matrix)
///@param[in] maxABLen length of the longer input time series
///@return lcss score s2
double calcul::score_lcss_s2(double scoreRaw, size_t maxABLen)
{
	return 1 - scoreRaw / maxABLen;
}

///Calculates lcss score s3.
///@param[in] scoreRaw raw score (last cell in the distance matrix)
///@return lcss score s3
double calcul::score_lcss_s3(double scoreRaw)
{
	return scoreRaw;
}


//double calcul::score_multi_dtw(vtr3<double> const &input, vtr3<double> const &output)
//{
//	size_t sumA = 0;
//	for (auto s : input)
//		sumA += s.size();
//
//	size_t sumB = 0;
//	for (auto s : output)
//		sumB += s.size();
//
//	return sumA / static_cast<double>(sumB);
//}

///Calculates average rank.
///@param[in] ref cluster id reference
///@param[in] queryAnswer query answer row (from id matrix (int))
///@param[in] clusters ground truth of the clusters
///@return average rank
double calcul::score_averageRank(int ref, vtr<int> const &queryAnswer, input_groundTruth const &clusters)
{
	int clusterSize = clusters.getClusterSize(ref);

	int counter = 0;
	double averageRank = 0;
	for (size_t i = 0; i < queryAnswer.size(); i++)         //query (ref sequence) //column
	{
		if (clusters.getClusterID(queryAnswer[i]) == clusters.getClusterID(ref))
		{
			averageRank += (int)i + 1;
			counter++;
			if (counter == clusterSize - 1)
				break;
		}
	}

	averageRank = averageRank / (clusterSize - 1);
	return averageRank;
}

///Calculates average precision.
///@param[in] ref cluster id reference
///@param[in] queryAnswer query answer row (from id matrix (int))
///@param[in] clusters ground truth of the clusters
///@return average precision
double calcul::score_averagePrecision(int ref, vtr<int> const &queryAnswer, input_groundTruth const &clusters)
{	
	int clusterSize = clusters.getClusterSize(ref);
		
	double pr = 0;
	int counter = 0;
	for (size_t i = 0; i < queryAnswer.size(); i++)         //query (ref sequence) //column
	{
		if (clusters.getClusterID(queryAnswer[i]) == clusters.getClusterID(ref))
		{
			pr += ++counter / (double)(i + 1);
			if (counter == clusterSize - 1)
				break;
		}
	}

	double ap = pr / (clusterSize - 1);
	return ap;
}

///Calculates map score (mean average precision).
///@param[in] averagePrecisions vector of average precisions
///@return map score
double calcul::score_map(vtr<double> const &averagePrecisions)
{	
	return accumulate(averagePrecisions.begin(), averagePrecisions.end(), 0.0) / averagePrecisions.size();
}

///Calculates precision.
///@param[in] ref cluster id reference
///@param[in] queryAnswer query answer row (from id matrix (int))
///@param[in] clusters ground truth of the clusters
///@return precision fraction
double calcul::score_precision(int ref, vtr<int> const &queryAnswer, input_groundTruth const &clusters)
{
	int clusterSize = clusters.getClusterSize(ref);

	int truePositives = 0;
	int falsePositives = 0;
	//int range = min(topThreshold, (int)top.size());
	for (int i = 0; i < clusterSize - 1; i++)
	{
		if (clusters.getClusterID(queryAnswer[i]) == clusters.getClusterID(ref))
			truePositives++;
		else
			falsePositives++;
	}

	double precision = (double)truePositives / (truePositives + falsePositives);
	return precision;
}

///Calculates recall.
///@param[in] ref cluster id reference
///@param[in] queryAnswer query answer row (from id matrix (int))
///@param[in] clusters ground truth of the clusters
///@return recall fraction
double calcul::score_recall(int ref, vtr<int> const &queryAnswer, input_groundTruth const &clusters)
{	
	int clusterSize = clusters.getClusterSize(ref);

	int truePositives = 0;
	for (int i = 0; i < clusterSize - 1; i++)
	{
		if (clusters.getClusterID(queryAnswer[i]) == clusters.getClusterID(ref))
			truePositives++;
	}

	double recall = (double)truePositives / (clusterSize - 1);
	return recall;
}

//
//double GetMultiScorelcss(vtr<string, 3> input, vector<int> raws)
//{
//    size_t sumA = 0;
//    for (auto s: input)
//        sumA += s.size();
//
//    int sumB = 0;
//    for (auto s: raws)
//        sumB += s;
//
//    return sumA / (double)sumB;
//}

///Calculates Keogh's lower bound for dtw. EXPERIMENT
///@param[in] A time series A
///@param[in] B time series B
///@param[in] params parameters
///@return Keogh's lower bound distance
double calcul::lb_keogh(vtr2<double> const &A, vtr2<double> const &B, parameter const &params)
{
	int width = 1 + 2 * (int)params.w;
	int w = (int)params.w;
	deque<int> maxfifo, minfifo;
	maxfifo.push_back(0);
	minfifo.push_back(0);

	vtr<double> maxvalues(B.size());
	vtr<double> minvalues(B.size());

	for (int i = 1; i < static_cast<int>(A.size()); ++i) {
		if (i >= w + 1) {
			maxvalues[i - w - 1] = A[maxfifo.front()][0];
			minvalues[i - w - 1] = A[minfifo.front()][0];
		}
		if (A[i][0] > A[i - 1][0]) { //overshoot
			maxfifo.pop_back();
			while (maxfifo.size() > 0) {
				if (A[i][0] <= A[maxfifo.back()][0]) break;
				maxfifo.pop_back();
			}
		}
		else {
			minfifo.pop_back();
			while (minfifo.size() > 0) {
				if (A[i][0] >= A[minfifo.back()][0]) break;
				minfifo.pop_back();
			}
		}
		maxfifo.push_back(static_cast<int>(i));
		minfifo.push_back(static_cast<int>(i));
		if (i == width + maxfifo.front()) maxfifo.pop_front();
		else if (i == width + minfifo.front()) minfifo.pop_front();
	}

	for (size_t i = A.size(); i <= A.size() + w; ++i) {
		maxvalues[i - w - 1] = A[maxfifo.front()][0];
		minvalues[i - w - 1] = A[minfifo.front()][0];
		if (static_cast<int>(i) - maxfifo.front() >= width) maxfifo.pop_front();
		if (static_cast<int>(i) - minfifo.front() >= width) minfifo.pop_front();
	}

	double result = 0;
	for (size_t i = 0; i < A.size(); i++)
	{
		if (B[i][0] > maxvalues[i])
			result += pow(B[i][0] - maxvalues[i], 2);
		else if (B[i][0] < minvalues[i])
			result += pow(B[i][0] - minvalues[i], 2);
	}

	return result;
}

///Calculates Cosine distance.
///@param[in] u element of the compared time series
///@param[in] v element of the compared time series
///@return cosine distance
double calcul::distance_cosine(vtr<double> const &u, vtr<double> const &v)
{
	double sumAB = 0;
	for (size_t i = 0; i < u.size(); i++)
		sumAB += u[i] * v[i];

	double sumA = 0;
	for (size_t i = 0; i < u.size(); i++)
		sumA += pow(u[i], 2);

	double sumB = 0;
	for (size_t i = 0; i < u.size(); i++)
		sumB += pow(v[i], 2);

	return sumAB / (sqrt(sumA) * sqrt(sumB));
}

///Calculates first cell included in the warping window.
///@param[in] row row
///@param[in] win warping window width
///@param[in] coefficient input time series length ratio
///@return index of first cell in warping window
int calcul::dtw_wpassStart(size_t row, int win, double coefficient)
{
	return std::max(1, (int)(ceil((row - 1) * coefficient + 0.0000000001) - win));
}

///Calculates last cell included in the warping window.
///@param[in] row row
///@param[in] win warping window width
///@param[in] coefficient input time series length ratio
///@param[in] lenB length of the time series B
///@return index of last cell in warping window
int calcul::dtw_wpassEnd(size_t row, int win, double coefficient, int lenB)
{
	return std::min(lenB + 1, (int)(ceil(row * coefficient) + 1) + win);
	//const size_t end = min(lenB + 1, (int)(ceil(i * lenB / (double)lenA) + 1) + w);
}

///Calculates if cell is included in the flexible warping pass.
///@param[in] row row
///@param[in] col column
///@param[in] past last cell which was included
///@param[in] w flexible warping pass width
///@return TRUE if cell is in the flexible pass
///@return FALSE if cell is not in the flexible pass
bool calcul::dtw_isFlexiblePass(int row, int col, coord const &past, int w)
{
	//return std::abs((row - past.row) - (col - past.col)) < w;
	return std::abs((past.row - row) - (past.col - col)) < w;
}

///Calculates mean value of the input vector.
///@param[in] v vector of doubles
///@return mean
double calcul::vtr_mean(vtr<double> const &v)
{
	double sum = 0;
	for (auto &i : v)
		sum += i;

	return sum / v.size();
}

///Calculates vector of means of the 2d input vector.
///@param[in] series two dimensional vector
///@return vector of means
vtr<double> calcul::vtr_mean(vtr2<double> const &series)
{
	vtr<double> average(series.size());

	for (size_t i = 0; i < series.size(); i++)
		average[i] = vtr_mean(series[i]);

	/*vtr<double> average(serie.size());
	for (size_t i = 0; i < serie.size(); i++) {
		double avg = 0;
		for (size_t j = 0; j < series[0].size(); j++) {
			avg += series[i][j];
		}
		average[i] = avg / series[0].size();
	}*/

	return average;
}

///Calculates standard deviation of the input vector.
///@param[in] v vector
///@return standard deviation
double calcul::vtr_std(vtr<double> const &v)
{
	double mean = vtr_mean(v);

	double sum = 0;
	for (auto &i : v)
		sum += pow(i - mean, 2);

	return sqrt(sum / v.size());
}

///Finds maximal value contained in the input vector.
///@tparam data type of searched vector
///@param[in] input vector
///@return maximal value
template <typename T>
T calcul::vtr_max(vtr<T> const &input)
{
	T max = (T)constant::MIN_double;

	for (auto &i : input)
		if (i > max)
			max = i;

	return max;
}
template int calcul::vtr_max(vtr<int> const &input);
template double calcul::vtr_max<double>(vtr<double> const &input);

///Finds maximal value contained in the 2d input vector.
///@tparam data type of searched vector
///@param[in] input vector
///@return maximal value
template <typename T>
T calcul::vtr_max(vtr2<T> const &input)
{
	double max = constant::MIN_double;

	for (auto &&i : input)
		for (auto &&j : i)
			if (j > max)
				max = j;

	return max;
}
template double calcul::vtr_max<double>(vtr2<double> const &input);

///Finds maximal value contained in the 3d input vector.
///@param[in] input vector
///@return maximal value
template <typename T>
T calcul::vtr_max(vtr3<T> const &input)
{
	double max = constant::MIN_double;

	for (auto const &i : input) {
		auto tmp = vtr_findMax(i);

		if (tmp > max)
			max = tmp;
	}

	return max;
}

///Finds minimal value contained in the input vector.
///@param[in] input vector
///@return minimal value
template <typename T>
T calcul::vtr_min(vtr<T> const &input)
{
	T min = (T)constant::MAX_double;

	for (auto &i : input)
		if (i < min)
			min = i;

	return min;
}
template double calcul::vtr_min<double>(vtr<double> const &input);

///Finds minimal value contained in the input vector.
///@param[in] input vector
///@return minimal value
template <typename T>
T calcul::vtr_min(vtr2<T> const &input)
{
	double min = constant::MAX_double;

	for (auto const &i : input)
		for (auto const &j : i)
			if (j < min)
				min = j;

	return min;
}
template double calcul::vtr_min<double>(vtr2<double> const &input);