UML Hc

2 min read
#####################For 1-Dimensional Data###############################
import numpy as np
import pandas as pd
import scipy.cluster.hierarchy as sch
import matplotlib.pyplot as plt
from sklearn.cluster import AgglomerativeClustering
# Data points
data = np.array([7,10,20,28,35]).reshape(-1,1)
# Create labels for data points
labels = ['7','10','20','28','35']
# Plotting dendrograms
methods = ['single', 'complete', 'average']
dendrograms = {}
plt.figure(figsize=(15, 10))
for i, method in enumerate(methods, 1):
#Calculate the linkage matrix
linkage_matrix = sch.linkage(data, method=method) #specify which method you want to use to plot the dendogram
#Store the dendrogram structure
dendrograms[method] = linkage_matrix
#Plot the dendrogram
plt.subplot(2,2,i) # we're using 2x2 you can also try with 1x3
sch.dendrogram(linkage_matrix, labels=labels)
plt.title(f'Dendrogram ({method.capitalize()} Linkage)')
plt.xlabel('Data Points')
plt.ylabel('Distance')
plt.tight_layout()
plt.show()
# Function to plot scatter diagrams with clusters and legends
def plot_clusters_with_legend(data, labels, title):
unique_labels = np.unique(labels)
plt.scatter(data, np.zeros_like(data), c=labels, cmap='viridis', s=100)
for i in unique_labels:
plt.scatter([], [], label=f"Cluster {i+1}", s=100)
plt.title(title)
plt.xlabel("Data Points")
plt.yticks([])
plt.legend(loc='upper right')
plt.show()
#Perform clustering for each method and plot scatter diagrams
for method in methods:
clustering = AgglomerativeClustering(n_clusters=3, linkage=method, metric='euclidean')
cluster_labels = clustering.fit_predict(data)
plot_clusters_with_legend(data.flatten(), cluster_labels, f"Clusters ({method.capitalize()} Linkage)")
#Display intermediate cluster formations
for method, linkage_matrix in dendrograms.items():
intermediate_clusters = pd.DataFrame(
linkage_matrix, columns=['Cluster 1', 'Cluster 2', 'Distance', 'Points']
)
print(f"\nIntermediate Cluster Formations ({method.capitalize()} Linkage):")
print(intermediate_clusters)
#######################For 2-Dimensional Data################
import numpy as np
import pandas as pd
import scipy.cluster.hierarchy as sch
import matplotlib.pyplot as plt
from sklearn.cluster import AgglomerativeClustering
#2D Data points
data = np.array([[1,2], [2,3], [3,4], [5,8], [6,9]])
#Create labels for data points
labels = ['A', 'B', 'C', 'D', 'E']
# Plotting dendrograms
methods = ['single', 'complete', 'average']
dendrograms = {}
plt.figure(figsize=(15, 10))
for i, method in enumerate(methods, 1):
#Calculate the linkage matrix
linkage_matrix = sch.linkage(data, method=method) #specify which method you want to use to plot the dendogram
#Store the dendrogram structure
dendrograms[method] = linkage_matrix
#Plot the dendrogram
plt.subplot(2,2,i) # we're using 2x2 you can also try with 1x3
sch.dendrogram(linkage_matrix, labels=labels)
plt.title(f'Dendrogram ({method.capitalize()} Linkage)')
plt.xlabel('Data Points')
plt.ylabel('Distance')
plt.tight_layout()
plt.show()
# Function to plot scatter diagrams with clusters and legends
def plot_clusters_with_legend(data, labels, title):
unique_labels = np.unique(labels)
cmap = plt.cm.viridis
colors = [cmap(i / max(unique_labels)) for i in unique_labels]
# Plot the scatter plot with matching colors
for i, color in zip(unique_labels, colors):
plt.scatter(data[labels == i, 0], data[labels == i, 1], c=[color], label=f"Cluster {i+1}", s=100)
'''plt.scatter(data, np.zeros_like(data), c=labels, cmap='viridis', s=100)
for i in unique_labels:
plt.scatter([], [], label=f"Cluster {i+1}", s=100)'''
plt.title(title)
plt.xlabel("X-coordinate")
plt.ylabel("Y-coordinate")
plt.legend(loc='upper right')
plt.show()
#Perform clustering for each method and plot scatter diagrams
for method in methods:
clustering = AgglomerativeClustering(n_clusters=3, linkage=method, metric='euclidean')
cluster_labels = clustering.fit_predict(data)
plot_clusters_with_legend(data, cluster_labels, f"Clusters ({method.capitalize()} Linkage)")
#Display intermediate cluster formations
for method, linkage_matrix in dendrograms.items():
intermediate_clusters = pd.DataFrame(
linkage_matrix, columns=['Cluster 1', 'Cluster 2', 'Distance', 'Points']
)
print(f"\nIntermediate Cluster Formations ({method.capitalize()} Linkage):")
print(intermediate_clusters)
0
Subscribe to my newsletter
Read articles from Invoker directly inside your inbox. Subscribe to the newsletter, and don't miss out.
Written by
