import numpy as np
from sklearn.metrics import pairwise_distances
import matplotlib.pyplot as plt
[docs]def vat(data, return_odm=False, figure_size=(10, 10)):
"""VAT means Visual assesement of tendency. basically, it allow to asses cluster tendency
through a map based on the dissimiliraty matrix.
Parameters
----------
data : matrix
numpy array
return_odm : return the Ordered Dissimalirity Matrix
boolean (default to False)
figure_size : size of the VAT.
tuple (default to (10,10))
Return
-------
ODM : matrix
the ordered dissimalarity matrix plotted.
"""
ordered_dissimilarity_matrix = compute_ordered_dissimilarity_matrix(data)
_, ax = plt.subplots(figsize=figure_size)
ax.imshow(ordered_dissimilarity_matrix, cmap='gray', vmin=0, vmax=np.max(ordered_dissimilarity_matrix))
if return_odm is True:
return ordered_dissimilarity_matrix
[docs]def compute_ordered_dissimilarity_matrix(X):
"""The ordered dissimilarity matrix is used by visual assesement of tendency. It is a just a a reordering
of the dissimilarity matrix.
Parameters
----------
X : matrix
numpy array
Return
-------
ODM : matrix
the ordered dissimalarity matrix .
"""
# Step 1 :
observation_path = []
matrix_of_pairwise_distance = pairwise_distances(X)
list_of_int = np.zeros(matrix_of_pairwise_distance.shape[0], dtype="int")
index_of_maximum_value = np.argmax(matrix_of_pairwise_distance)
column_index_of_maximum_value = index_of_maximum_value // matrix_of_pairwise_distance.shape[1]
list_of_int[0] = column_index_of_maximum_value
observation_path.append(column_index_of_maximum_value)
K = np.linspace(0, matrix_of_pairwise_distance.shape[0] - 1, matrix_of_pairwise_distance.shape[0], dtype="int")
J = np.delete(K, column_index_of_maximum_value)
# Step 2 :
for r in range(1, matrix_of_pairwise_distance.shape[0]):
p, q = (-1, -1)
mini = np.max(matrix_of_pairwise_distance)
for candidate_p in observation_path:
for candidate_j in J:
if matrix_of_pairwise_distance[candidate_p, candidate_j] < mini:
p = candidate_p
q = candidate_j
mini = matrix_of_pairwise_distance[p, q]
list_of_int[r] = q
observation_path.append(q)
ind_q = np.where(np.array(J) == q)[0][0]
J = np.delete(J, ind_q)
# Step 3
ordered_matrix = np.zeros(matrix_of_pairwise_distance.shape)
for column_index_of_maximum_value in range(ordered_matrix.shape[0]):
for j in range(ordered_matrix.shape[1]):
ordered_matrix[column_index_of_maximum_value, j] = matrix_of_pairwise_distance[
list_of_int[column_index_of_maximum_value], list_of_int[j]]
# Step 4 :
return ordered_matrix
[docs]def ivat(data, return_odm=False, figure_size=(10, 10)):
"""iVat return a visualisation based on the Vat but more reliable and easier to
interpret.
Parameters
----------
data : matrix
numpy array
return_odm : return the Ordered Dissimalirity Matrix
boolean (default to False)
figure_size : size of the VAT.
tuple (default to (10,10))
Return
-------
D_prim : matrix
the ivat ordered dissimalarity matrix.
"""
ordered_matrix = compute_ivat_ordered_dissimilarity_matrix(data)
_, ax = plt.subplots(figsize=figure_size)
ax.imshow(ordered_matrix, cmap='gray', vmin=0, vmax=np.max(ordered_matrix))
if return_odm is True:
return ordered_matrix
[docs]def compute_ivat_ordered_dissimilarity_matrix(X):
"""The ordered dissimilarity matrix is used by ivat. It is a just a a reordering
of the dissimilarity matrix.
Parameters
----------
X : matrix
numpy array
Return
-------
D_prim : matrix
the ordered dissimalarity matrix .
"""
ordered_matrix = compute_ordered_dissimilarity_matrix(X)
re_ordered_matrix = np.zeros((ordered_matrix.shape[0], ordered_matrix.shape[0]))
for r in range(1, ordered_matrix.shape[0]):
# Step 1 : find j for which D[r,j] is minimum and j in [1:r-1]
j = np.argmin(ordered_matrix[r, 0:r])
# Step 2 :
re_ordered_matrix[r, j] = ordered_matrix[r, j]
# Step 3 : pour c : 1,r-1 avec c !=j
c_tab = np.array(range(0, r))
c_tab = c_tab[c_tab != j]
for c in c_tab:
re_ordered_matrix[r, c] = max(ordered_matrix[r, j], re_ordered_matrix[j, c])
re_ordered_matrix[c, r] = re_ordered_matrix[r, c]
return re_ordered_matrix