UML 4m

3 min read
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_blobs
#generate synthetic data
np.random.seed(42)
data, _ = make_blobs(n_samples=300, centers=3, cluster_std=1.0, random_state=42)
# Make shift implementation
def mean_shift(data, bandwidth=2.0, max_iter=300, tol=1e-3):
points = data.copy()
shifts = [] #To store the progressive shifts for visualization
for iteration in range(max_iter):
new_points = []
for point in points:
#Calculate the kernel density weighted mean of points within the bandwidth
distances = np.linalg.norm(points - point, axis=1)
within_bandwidth = points[distances < bandwidth]
mean_point = np.mean(within_bandwidth, axis=0)
new_points.append(mean_point)
new_points = np.array(new_points)
shifts.append(new_points)
#Convergence check
if np.linalg.norm(new_points - points) <tol:
break
points = new_points
return new_points, shifts
# Apply mean shift to the data
bandwidth = 3.0
final_points, all_shifts = mean_shift(data, bandwidth = bandwidth)
#Plotting
fig, ax = plt.subplots(1, len(all_shifts), figsize=(20,5))
for i, shift in enumerate(all_shifts):
ax[i].scatter(data[:,0], data[:,1], c='lightgray', label='Original Data')
ax[i].scatter(shift[:,0], shift[:,1], c='red', label='Shifted Centroids')
ax[i].set_title(f"Iteration {i+1}")
ax[i].legend()
ax[i].set_xlim(-10,10)
ax[i].set_ylim(-10,10)
plt.tight_layout()
plt.show()
#Final Clustered points visualization
plt.figure(figsize=(8,6))
plt.scatter(data[:,0], data[:,1], c='lightgray', label='Original Data')
plt.scatter(final_points[:,0], final_points[:,1], c='red', label='Final Centroids')
plt.title("Final Result of Mean Shift")
plt.legend()
plt.show()
#Case study: Customer Segmentation
#Creating sample data for the case study
np.random.seed(42)
customer_data = np.array([
[65,70],[62,65],[70,75],[75,80],[80,85],
[20,15],[25,20],[30,25],[35,30],[40,35],
[45,85],[50,80],[55,75],[60,70],[65,65],
[85,10],[90,15],[95,20],[100,25],[105,30]
])
#Visualize the initial data
plt.figure(figsize=(8,6))
plt.scatter(customer_data[:,0], customer_data[:,1], c = 'blue', label='Customer Data')
plt.title("Initial Customer Data")
plt.xlabel("Annual Income (in $K)")
plt.ylabel("Spending Score (1-100)")
plt.legend()
plt.grid()
plt.show()
#Implementing the mean shift algorithm
bandwidth = 10.0
final_points, all_shifts = mean_shift(customer_data, bandwidth = bandwidth)
#Visualize the progressive shifts
fig, ax = plt.subplots(1, len(all_shifts), figsize=(20,5))
for i, shift in enumerate(all_shifts):
ax[i].scatter(customer_data[:,0], customer_data[:,1], c='lightgray', label='Customer Data')
ax[i].scatter(shift[:,0], shift[:,1], c='red', label='Shifted Centroids')
ax[i].set_title(f"Iteration {i+1}")
ax[i].set_xlim(10,110)
ax[i].set_ylim(0,100)
ax[i].set_xlabel("Annual Income(in $K)")
ax[i].set_ylabel("Spending Score (1-100)")
ax[i].legend()
plt.tight_layout()
plt.show()
#Final Clustered points visualization
plt.figure(figsize=(8,6))
plt.scatter(customer_data[:,0], customer_data[:,1], c='lightgray', label='Customer Data')
plt.scatter(final_points[:,0], final_points[:,1], c='red', label='Final Centroids')
plt.title("Final Result of Mean Shift - Customer Segmentation")
plt.xlabel("Annual Income(in $K)")
plt.ylabel("Spending Score (1-100)")
plt.legend()
plt.grid()
plt.show()
# Case study : City Traffic Analysis
#Creating sample data for the case study
np.random.seed(42)
traffic_data = np.array([
#Cluster 1: High-density urban areas
[500, 40], [520, 42], [480, 38], [510, 41], [495, 39], [530, 45],
[540, 43], [520,44],
#Cluster 2: Medium-density suburban areas
[300,30],[320,28],[310,32],[305,29],[315,31],[290,27],
[325,33],[310,30],
#Cluster 3: Low-density rural areas
[700,50],[720,52],[710,48],[705,49],[715,51],[730,54],
[740,53],[725,50],
#Cluster 4: Congested zones
[200,20],[220,22],[210,18],[215,19],[205,21],[190,17],
[225,23],[210,20],
#Cluster 5: Highway zones
[800,60],[820,62],[810,58],[805,59],[815,61],[830,64],
[840,63],[825,60],
#Cluster 6: Moderate-density mixed zones
[400,35],[420,37],[410,33],[405,36],[415,34],[430,39],
[440,38],[425,36]
])
#Visualize the initial data
plt.figure(figsize=(8,6))
plt.scatter(traffic_data[:,0], traffic_data[:,1], c='blue', label='Traffic Data')
plt.scatter(final_points[:,0], final_points[:,1], c='red', label='Final Centroids')
plt.title("Initial Traffic data")
plt.xlabel("Traffic Density (vehicles/hour)")
plt.ylabel("Average speed(km/hr)")
plt.legend()
plt.grid()
plt.show()
#Implementing the mean shift algorithm
bandwidth = 30.0
final_points, all_shifts = mean_shift(traffic_data, bandwidth = bandwidth)
#Visualize the progressive shifts
fig, ax = plt.subplots(1, len(all_shifts), figsize=(20,5))
for i, shift in enumerate(all_shifts):
ax[i].scatter(traffic_data[:,0], traffic_data[:,1], c='lightgray', label='traffic Data')
ax[i].scatter(shift[:,0], shift[:,1], c='red', label='Shifted Centroids')
ax[i].set_title(f"Iteration {i+1}")
ax[i].set_xlim(150,750)
ax[i].set_ylim(15,55)
ax[i].set_xlabel("Traffic Density (vehicles/hour)")
ax[i].set_ylabel("Average speed(km/hr)")
ax[i].legend()
plt.tight_layout()
plt.show()
#Final Clustered points visualization
plt.figure(figsize=(8,6))
plt.scatter(traffic_data[:,0], traffic_data[:,1], c='lightgray', label='traffic Data')
plt.scatter(final_points[:,0], final_points[:,1], c='red', label='Final Centroids')
plt.title("Final Result of Mean Shift - Traffic analysis")
plt.xlabel("Traffic Density (vehicles/hour)")
plt.ylabel("Average speed(km/hr)")
plt.legend()
plt.grid()
plt.show()
0
Subscribe to my newsletter
Read articles from Invoker directly inside your inbox. Subscribe to the newsletter, and don't miss out.
Written by
