Source code for pyclustertend.visual_assessment_of_tendency

import numpy as np
from sklearn.metrics import pairwise_distances
import matplotlib.pyplot as plt


[docs]def vat(data, return_odm=False, figure_size=(10, 10)): """VAT means Visual assesement of tendency. basically, it allow to asses cluster tendency through a map based on the dissimiliraty matrix. Parameters ---------- data : matrix numpy array return_odm : return the Ordered Dissimalirity Matrix boolean (default to False) figure_size : size of the VAT. tuple (default to (10,10)) Return ------- ODM : matrix the ordered dissimalarity matrix plotted. """ ordered_dissimilarity_matrix = compute_ordered_dissimilarity_matrix(data) _, ax = plt.subplots(figsize=figure_size) ax.imshow(ordered_dissimilarity_matrix, cmap='gray', vmin=0, vmax=np.max(ordered_dissimilarity_matrix)) if return_odm is True: return ordered_dissimilarity_matrix
[docs]def compute_ordered_dissimilarity_matrix(X): """The ordered dissimilarity matrix is used by visual assesement of tendency. It is a just a a reordering of the dissimilarity matrix. Parameters ---------- X : matrix numpy array Return ------- ODM : matrix the ordered dissimalarity matrix . """ # Step 1 : observation_path = [] matrix_of_pairwise_distance = pairwise_distances(X) list_of_int = np.zeros(matrix_of_pairwise_distance.shape[0], dtype="int") index_of_maximum_value = np.argmax(matrix_of_pairwise_distance) column_index_of_maximum_value = index_of_maximum_value // matrix_of_pairwise_distance.shape[1] list_of_int[0] = column_index_of_maximum_value observation_path.append(column_index_of_maximum_value) K = np.linspace(0, matrix_of_pairwise_distance.shape[0] - 1, matrix_of_pairwise_distance.shape[0], dtype="int") J = np.delete(K, column_index_of_maximum_value) # Step 2 : for r in range(1, matrix_of_pairwise_distance.shape[0]): p, q = (-1, -1) mini = np.max(matrix_of_pairwise_distance) for candidate_p in observation_path: for candidate_j in J: if matrix_of_pairwise_distance[candidate_p, candidate_j] < mini: p = candidate_p q = candidate_j mini = matrix_of_pairwise_distance[p, q] list_of_int[r] = q observation_path.append(q) ind_q = np.where(np.array(J) == q)[0][0] J = np.delete(J, ind_q) # Step 3 ordered_matrix = np.zeros(matrix_of_pairwise_distance.shape) for column_index_of_maximum_value in range(ordered_matrix.shape[0]): for j in range(ordered_matrix.shape[1]): ordered_matrix[column_index_of_maximum_value, j] = matrix_of_pairwise_distance[ list_of_int[column_index_of_maximum_value], list_of_int[j]] # Step 4 : return ordered_matrix
[docs]def ivat(data, return_odm=False, figure_size=(10, 10)): """iVat return a visualisation based on the Vat but more reliable and easier to interpret. Parameters ---------- data : matrix numpy array return_odm : return the Ordered Dissimalirity Matrix boolean (default to False) figure_size : size of the VAT. tuple (default to (10,10)) Return ------- D_prim : matrix the ivat ordered dissimalarity matrix. """ ordered_matrix = compute_ivat_ordered_dissimilarity_matrix(data) _, ax = plt.subplots(figsize=figure_size) ax.imshow(ordered_matrix, cmap='gray', vmin=0, vmax=np.max(ordered_matrix)) if return_odm is True: return ordered_matrix
[docs]def compute_ivat_ordered_dissimilarity_matrix(X): """The ordered dissimilarity matrix is used by ivat. It is a just a a reordering of the dissimilarity matrix. Parameters ---------- X : matrix numpy array Return ------- D_prim : matrix the ordered dissimalarity matrix . """ ordered_matrix = compute_ordered_dissimilarity_matrix(X) re_ordered_matrix = np.zeros((ordered_matrix.shape[0], ordered_matrix.shape[0])) for r in range(1, ordered_matrix.shape[0]): # Step 1 : find j for which D[r,j] is minimum and j in [1:r-1] j = np.argmin(ordered_matrix[r, 0:r]) # Step 2 : re_ordered_matrix[r, j] = ordered_matrix[r, j] # Step 3 : pour c : 1,r-1 avec c !=j c_tab = np.array(range(0, r)) c_tab = c_tab[c_tab != j] for c in c_tab: re_ordered_matrix[r, c] = max(ordered_matrix[r, j], re_ordered_matrix[j, c]) re_ordered_matrix[c, r] = re_ordered_matrix[r, c] return re_ordered_matrix