Commit 95b10b0b authored by Ivo Peterek's avatar Ivo Peterek

ENH: new version of clustering. #8

parent 1563b449
......@@ -6,8 +6,10 @@ from matplotlib.backends.backend_qt5agg import FigureCanvasQTAgg as FigureCanvas
from matplotlib.backends.backend_qt5agg import NavigationToolbar2QT as NavigationToolbar
from matplotlib.figure import Figure
import pwlf
import matplotlib.pyplot as plt
from scipy import signal
from sklearn.cluster import DBSCAN
import numpy as np
import matplotlib.pylab as pl
import copy as cp
......@@ -24,6 +26,13 @@ warnings.filterwarnings('ignore', category=matplotlib.cbook.mplDeprecation)
pp = pprint.PrettyPrinter(indent=4)
def metric(x, y, c1, c2):
return c1 * (x[0] - y[0]) ** 2 + c2 * (x[1] - y[1]) ** 2
def similarity(x, y):
return metric(x, y, 0.001, 1)
class Window(QtWidgets.QDialog):
sendInfo = QtCore.pyqtSignal(object)
......@@ -372,125 +381,41 @@ class Window(QtWidgets.QDialog):
def clustering(self):
self.clearCanvas()
self.addToPlot()
original_data = self.samples_data[0]
samples = self.samples_data[0]
nums = [e[0] for e in samples]
engs = [e[1] for e in samples]
avg = np.mean(engs)
var = np.var(engs)
print(avg, var)
print(avg, var, np.sqrt(var))
# lookahead_step = int(0.25 * len(samples))
# lookbehind_step = int(0.25 * len(samples))
min_region_size = 100
if 'Voltage regulator' in self.combo_sample.currentText():
min_region_size = 10
clusters = []
self.samples_data = []
current_cluster = None
prev_sample = None
new_cluster = True
num_extrema = 0
# Trend of next samples' energy
increasing = True
# Smooth samples
smooth_window_size = self.round_down_to_odd(int(len(samples)))
smooth_window_size_small = self.round_down_to_odd(int(0.1*len(samples)))
poly_order = 3
smooth_samples = signal.savgol_filter(engs, smooth_window_size_small, poly_order)
smooth_samples = signal.savgol_filter(smooth_samples, smooth_window_size, poly_order)
# for i in range(1):
# smooth_samples = signal.savgol_filter(smooth_samples, smooth_window_size, poly_order)
smooth_samples = [(nums[i], e) for i, e in enumerate(smooth_samples)]
# Derivative of smooth samples function
smooth_samples_d = signal.savgol_filter(engs, smooth_window_size, poly_order, deriv=1)
smooth_samples_d = [(i, e) for i, e in enumerate(smooth_samples_d)]
for iter_ind, sample in enumerate(samples):
ind = sample[0]
eng = sample[1]
# lookahead_set = None
# if iter_ind == len(samples)-1:
# lookahead_set = [smooth_samples[iter_ind]]
# elif len(smooth_samples[iter_ind+1:]) < lookahead_step:
# lookahead_set = smooth_samples[iter_ind + 1:]
# else:
# lookahead_set = smooth_samples[iter_ind+1:iter_ind+lookahead_step]
#
# lookbehind_set = None
# if iter_ind == 0:
# lookbehind_set = [smooth_samples[iter_ind]]
# elif len(smooth_samples[:iter_ind]) < lookbehind_step:
# lookbehind_set = smooth_samples[:iter_ind]
# else:
# lookbehind_set = smooth_samples[iter_ind-lookbehind_step:iter_ind]
# Is sample an extrema?
is_extrema = False
if iter_ind != 0 and iter_ind != len(samples) - 1:
derivative = smooth_samples_d[iter_ind]
radius = [smooth_samples[iter_ind][1], smooth_samples[iter_ind+1][1], smooth_samples[iter_ind-1][1]]
# if np.abs(derivative[1]) <= 0.001:
# print(iter_ind,radius)
if np.abs(derivative[1]) <= 0.005 and (smooth_samples[iter_ind][1] == max(radius) or smooth_samples[iter_ind][1] == min(radius)):
is_extrema = True
num_extrema += 1
# New cluster for the first sample & discontinuity in energy
if new_cluster or np.abs(prev_sample[1]-eng) > 48:
current_cluster = list()
clusters.append(current_cluster)
current_cluster.append(sample)
new_cluster = False
elif is_extrema:
new_cluster = True
else:
current_cluster.append(sample)
prev_sample = sample
print(num_extrema)
# print('linear approx: start')
# pwlf_fit = pwlf.PiecewiseLinFit(nums, engs)
# x_fit = pwlf_fit.fit(2*num_extrema+1)
# y_fit = pwlf_fit.predict(x_fit)
#
# print(x_fit)
# print(y_fit)
# fit = list(zip(x_fit, y_fit))
#
# print('linear approx: done')
#
# print('linear approx: start')
# pwlf_fit = pwlf.PiecewiseLinFit(nums, engs)
# x_fit = pwlf_fit.fit(2 * num_extrema + 3)
# y_fit = pwlf_fit.predict(x_fit)
#
# print(x_fit)
# print(y_fit)
# fit2 = list(zip(x_fit, y_fit))
#
# print('linear approx: done')
clustering = DBSCAN(eps=0.9*np.sqrt(var), min_samples=min_region_size, metric=similarity).fit(samples)
noisy_samples = False
unique_labels = set(clustering.labels_)
if -1 in unique_labels:
noisy_samples = True
clusters = [[] for e in range(len(unique_labels))]
for i, label in enumerate(clustering.labels_):
clusters[label].append(samples[i])
self.samples_data = []
self.samples_data_lables = []
self.samples_data.extend(clusters)
for num, cluster in enumerate(clusters):
self.samples_data_lables.append('Cluster ' + str(num))
# self.samples_data.append(fit)
# self.samples_data_lables.append('linear approx.')
# self.samples_data.append(fit2)
# self.samples_data_lables.append('2nd linear approx.')
self.samples_data.append(smooth_samples)
self.samples_data_lables.append('Sav-Gol approx.')
if cluster is clusters[-1] and noisy_samples:
self.samples_data_lables.append('Noisy samples')
else:
self.samples_data_lables.append('Cluster ' + str(num))
# self.samples_data.append(original_data)
# self.samples_data_lables.append('Original data')
self.plot()
def load_samples_from_csv(self, csv_path):
new_samples_data = []
added_zeros_count = 0
......@@ -613,12 +538,18 @@ class Window(QtWidgets.QDialog):
if self.plotType:
self.ax.scatter(Y, X, label=self.k[m], color=colors[m], s=self.dotSizeSpinBox.value())
else:
self.ax.plot(Y, X, label=self.k[m], color=colors[m])
if self.k[m] == 'Noisy samples':
self.ax.scatter(Y, X, label=self.k[m], color=colors[m], s=self.dotSizeSpinBox.value())
else:
self.ax.plot(Y, X, label=self.k[m], color=colors[m])
else:
if self.plotType:
self.ax.scatter(X, Y, label=self.k[m], color=colors[m], s=self.dotSizeSpinBox.value())
else:
self.ax.plot(X, Y, label=self.k[m], color=colors[m])
if self.k[m] == 'Noisy samples':
self.ax.scatter(X, Y, label=self.k[m], color=colors[m], s=self.dotSizeSpinBox.value())
else:
self.ax.plot(X, Y, label=self.k[m], color=colors[m])
# plot structure
self.xlab = "Sample number"
......@@ -648,7 +579,7 @@ class Window(QtWidgets.QDialog):
handles, labels = self.ax.get_legend_handles_labels()
lgd = self.ax.legend(handles,labels,loc=2,bbox_to_anchor=(1,1.025),ncol=1, borderaxespad = 0.,prop={'size': 13})
lgd.set_title("Clusters")
#lgd.set_title("Clusters")
for i in range(0,len(self.K)):
for j in range(0,len(self.K[i])):
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment