import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_blobs

#generate synthetic data
np.random.seed(42)
data, _ = make_blobs(n_samples=300, centers=3, cluster_std=1.0, random_state=42)

# Make shift implementation
def mean_shift(data, bandwidth=2.0, max_iter=300, tol=1e-3):
    points = data.copy()
    shifts = []  #To store the progressive shifts for visualization

    for iteration in range(max_iter):
        new_points = []
        for point in points:
            #Calculate the kernel density weighted mean of points within the bandwidth
            distances = np.linalg.norm(points - point, axis=1)
            within_bandwidth = points[distances < bandwidth]
            mean_point = np.mean(within_bandwidth, axis=0)
            new_points.append(mean_point)
        new_points = np.array(new_points)
        shifts.append(new_points)
        #Convergence check
        if np.linalg.norm(new_points - points) <tol:
            break
        points = new_points

    return new_points, shifts

# Apply mean shift to the data
bandwidth = 3.0
final_points, all_shifts = mean_shift(data, bandwidth = bandwidth)

#Plotting
fig, ax = plt.subplots(1, len(all_shifts), figsize=(20,5))
for i, shift in enumerate(all_shifts):
    ax[i].scatter(data[:,0], data[:,1], c='lightgray', label='Original Data')
    ax[i].scatter(shift[:,0], shift[:,1], c='red', label='Shifted Centroids')
    ax[i].set_title(f"Iteration {i+1}")
    ax[i].legend()
    ax[i].set_xlim(-10,10)
    ax[i].set_ylim(-10,10)
plt.tight_layout()
plt.show()

#Final Clustered points visualization
plt.figure(figsize=(8,6))
plt.scatter(data[:,0], data[:,1], c='lightgray', label='Original Data')
plt.scatter(final_points[:,0], final_points[:,1], c='red', label='Final Centroids')
plt.title("Final Result of Mean Shift")
plt.legend()
plt.show()

#Case study: Customer Segmentation
#Creating sample data for the case study
np.random.seed(42)
customer_data = np.array([
    [65,70],[62,65],[70,75],[75,80],[80,85],
    [20,15],[25,20],[30,25],[35,30],[40,35],
    [45,85],[50,80],[55,75],[60,70],[65,65],
    [85,10],[90,15],[95,20],[100,25],[105,30]
])

#Visualize the initial data
plt.figure(figsize=(8,6))
plt.scatter(customer_data[:,0], customer_data[:,1], c = 'blue', label='Customer Data')
plt.title("Initial Customer Data")
plt.xlabel("Annual Income (in $K)")
plt.ylabel("Spending Score (1-100)")
plt.legend()
plt.grid()
plt.show()

#Implementing the mean shift algorithm
bandwidth = 10.0
final_points, all_shifts = mean_shift(customer_data, bandwidth = bandwidth)

#Visualize the progressive shifts
fig, ax = plt.subplots(1, len(all_shifts), figsize=(20,5))
for i, shift in enumerate(all_shifts):
    ax[i].scatter(customer_data[:,0], customer_data[:,1], c='lightgray', label='Customer Data')
    ax[i].scatter(shift[:,0], shift[:,1], c='red', label='Shifted Centroids')
    ax[i].set_title(f"Iteration {i+1}")
    ax[i].set_xlim(10,110)
    ax[i].set_ylim(0,100)
    ax[i].set_xlabel("Annual Income(in $K)")
    ax[i].set_ylabel("Spending Score (1-100)")
    ax[i].legend()
plt.tight_layout()
plt.show()

#Final Clustered points visualization
plt.figure(figsize=(8,6))
plt.scatter(customer_data[:,0], customer_data[:,1], c='lightgray', label='Customer Data')
plt.scatter(final_points[:,0], final_points[:,1], c='red', label='Final Centroids')
plt.title("Final Result of Mean Shift - Customer Segmentation")
plt.xlabel("Annual Income(in $K)")
plt.ylabel("Spending Score (1-100)")
plt.legend()
plt.grid()
plt.show()

# Case study : City Traffic Analysis
#Creating sample data for the case study
np.random.seed(42)
traffic_data = np.array([
    #Cluster 1: High-density urban areas
    [500, 40], [520, 42], [480, 38], [510, 41], [495, 39], [530, 45],
    [540, 43], [520,44],
    #Cluster 2: Medium-density suburban areas
    [300,30],[320,28],[310,32],[305,29],[315,31],[290,27],
    [325,33],[310,30],
    #Cluster 3: Low-density rural areas
    [700,50],[720,52],[710,48],[705,49],[715,51],[730,54],
    [740,53],[725,50],
    #Cluster 4: Congested zones
    [200,20],[220,22],[210,18],[215,19],[205,21],[190,17],
    [225,23],[210,20],
    #Cluster 5: Highway zones
    [800,60],[820,62],[810,58],[805,59],[815,61],[830,64],
    [840,63],[825,60],
    #Cluster 6: Moderate-density mixed zones
    [400,35],[420,37],[410,33],[405,36],[415,34],[430,39],
    [440,38],[425,36]
])

#Visualize the initial data
plt.figure(figsize=(8,6))
plt.scatter(traffic_data[:,0], traffic_data[:,1], c='blue', label='Traffic Data')
plt.scatter(final_points[:,0], final_points[:,1], c='red', label='Final Centroids')
plt.title("Initial Traffic data")
plt.xlabel("Traffic Density (vehicles/hour)")
plt.ylabel("Average speed(km/hr)")
plt.legend()
plt.grid()
plt.show()

#Implementing the mean shift algorithm
bandwidth = 30.0
final_points, all_shifts = mean_shift(traffic_data, bandwidth = bandwidth)

#Visualize the progressive shifts
fig, ax = plt.subplots(1, len(all_shifts), figsize=(20,5))
for i, shift in enumerate(all_shifts):
    ax[i].scatter(traffic_data[:,0], traffic_data[:,1], c='lightgray', label='traffic Data')
    ax[i].scatter(shift[:,0], shift[:,1], c='red', label='Shifted Centroids')
    ax[i].set_title(f"Iteration {i+1}")
    ax[i].set_xlim(150,750)
    ax[i].set_ylim(15,55)
    ax[i].set_xlabel("Traffic Density (vehicles/hour)")
    ax[i].set_ylabel("Average speed(km/hr)")
    ax[i].legend()
plt.tight_layout()
plt.show()

#Final Clustered points visualization
plt.figure(figsize=(8,6))
plt.scatter(traffic_data[:,0], traffic_data[:,1], c='lightgray', label='traffic Data')
plt.scatter(final_points[:,0], final_points[:,1], c='red', label='Final Centroids')
plt.title("Final Result of Mean Shift - Traffic analysis")
plt.xlabel("Traffic Density (vehicles/hour)")
plt.ylabel("Average speed(km/hr)")
plt.legend()
plt.grid()
plt.show()
UML 4m

Subscribe to my newsletter

Invoker

Invoker