UML Hc

InvokerInvoker
2 min read
#####################For 1-Dimensional Data###############################
import numpy as np
import pandas as pd
import scipy.cluster.hierarchy as sch
import matplotlib.pyplot as plt
from sklearn.cluster import AgglomerativeClustering

# Data points
data = np.array([7,10,20,28,35]).reshape(-1,1)

# Create labels for data points
labels = ['7','10','20','28','35']

# Plotting dendrograms
methods = ['single', 'complete', 'average']
dendrograms = {}

plt.figure(figsize=(15, 10))

for i, method in enumerate(methods, 1):
    #Calculate the linkage matrix
    linkage_matrix = sch.linkage(data, method=method)  #specify which method you want to use to plot the dendogram
    #Store the dendrogram structure
    dendrograms[method] = linkage_matrix
    #Plot the dendrogram
    plt.subplot(2,2,i)  # we're using 2x2 you can also try with 1x3
    sch.dendrogram(linkage_matrix, labels=labels)
    plt.title(f'Dendrogram ({method.capitalize()} Linkage)')
    plt.xlabel('Data Points')
    plt.ylabel('Distance')
plt.tight_layout()
plt.show()

# Function to plot scatter diagrams with clusters and legends
def plot_clusters_with_legend(data, labels, title):
    unique_labels = np.unique(labels)
    plt.scatter(data, np.zeros_like(data), c=labels, cmap='viridis', s=100)
    for i in unique_labels:
        plt.scatter([], [], label=f"Cluster {i+1}", s=100)
    plt.title(title)
    plt.xlabel("Data Points")
    plt.yticks([])
    plt.legend(loc='upper right')
    plt.show()

#Perform clustering for each method and plot scatter diagrams
for method in methods:
    clustering = AgglomerativeClustering(n_clusters=3, linkage=method, metric='euclidean')
    cluster_labels = clustering.fit_predict(data)
    plot_clusters_with_legend(data.flatten(), cluster_labels, f"Clusters ({method.capitalize()} Linkage)")

#Display intermediate cluster formations
for method, linkage_matrix in dendrograms.items():
    intermediate_clusters = pd.DataFrame(
        linkage_matrix, columns=['Cluster 1', 'Cluster 2', 'Distance', 'Points']
    )
    print(f"\nIntermediate Cluster Formations ({method.capitalize()} Linkage):")
    print(intermediate_clusters)

#######################For 2-Dimensional Data################
import numpy as np
import pandas as pd
import scipy.cluster.hierarchy as sch
import matplotlib.pyplot as plt
from sklearn.cluster import AgglomerativeClustering

#2D Data points
data = np.array([[1,2], [2,3], [3,4], [5,8], [6,9]])

#Create labels for data points
labels = ['A', 'B', 'C', 'D', 'E']

# Plotting dendrograms
methods = ['single', 'complete', 'average']
dendrograms = {}

plt.figure(figsize=(15, 10))

for i, method in enumerate(methods, 1):
    #Calculate the linkage matrix
    linkage_matrix = sch.linkage(data, method=method)  #specify which method you want to use to plot the dendogram
    #Store the dendrogram structure
    dendrograms[method] = linkage_matrix
    #Plot the dendrogram
    plt.subplot(2,2,i)  # we're using 2x2 you can also try with 1x3
    sch.dendrogram(linkage_matrix, labels=labels)
    plt.title(f'Dendrogram ({method.capitalize()} Linkage)')
    plt.xlabel('Data Points')
    plt.ylabel('Distance')
plt.tight_layout()
plt.show()

# Function to plot scatter diagrams with clusters and legends
def plot_clusters_with_legend(data, labels, title):
    unique_labels = np.unique(labels)
    cmap = plt.cm.viridis
    colors = [cmap(i / max(unique_labels)) for i in unique_labels] 
    # Plot the scatter plot with matching colors
    for i, color in zip(unique_labels, colors):
        plt.scatter(data[labels == i, 0], data[labels == i, 1], c=[color], label=f"Cluster {i+1}", s=100)  
    '''plt.scatter(data, np.zeros_like(data), c=labels, cmap='viridis', s=100)
    for i in unique_labels:
        plt.scatter([], [], label=f"Cluster {i+1}", s=100)'''   
    plt.title(title)
    plt.xlabel("X-coordinate")
    plt.ylabel("Y-coordinate")
    plt.legend(loc='upper right')
    plt.show()

#Perform clustering for each method and plot scatter diagrams
for method in methods:
    clustering = AgglomerativeClustering(n_clusters=3, linkage=method, metric='euclidean')
    cluster_labels = clustering.fit_predict(data)
    plot_clusters_with_legend(data, cluster_labels, f"Clusters ({method.capitalize()} Linkage)")

#Display intermediate cluster formations
for method, linkage_matrix in dendrograms.items():
    intermediate_clusters = pd.DataFrame(
        linkage_matrix, columns=['Cluster 1', 'Cluster 2', 'Distance', 'Points']
    )
    print(f"\nIntermediate Cluster Formations ({method.capitalize()} Linkage):")
    print(intermediate_clusters)
0
Subscribe to my newsletter

Read articles from Invoker directly inside your inbox. Subscribe to the newsletter, and don't miss out.

Written by

Invoker
Invoker