Uml Tsne

InvokerInvoker
2 min read
#Visualisation for the t-SNE process UML

#We calculate pairwise similarities in the high dimensional space using Euclidean distance
#We estimate probability distributions in the high- dimensional space based on the nearest neighbors and their distance
#We use scikit-learn's TSNE to perform the t-SNE transformations to a 2D space
#We visualize the original high-dimensional space and the t-SNE -transformed low-dimensional space side by side

import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.manifold import TSNE

# Load the Iris dataset
iris = datasets.load_iris()
X = iris.data
y = iris.target

# Step 1: Calculate pairwise similarities in high-dimensional space
proximity_matrix = np.linalg.norm(X[:, np.newaxis] - X, axis=-1)

# Step 2: Probability Distribution in High-Dimensional Space
perplexity = 30
P_high_dim = np.zeros_like(proximity_matrix)

for i in range(len(X)):
    indices = np.argsort(proximity_matrix[i])[1:perplexity+1]
    P_high_dim[i, indices] = np.exp(-proximity_matrix[i, indices] / 2.0)
    P_high_dim[i, indices] /= np.sum(P_high_dim[i, indices])

# Step 3: Initialization in Low-Dimensional Space
tsne = TSNE(n_components=2, random_state=42)
X_low_dim = tsne.fit_transform(X)

# Visualization
plt.figure(figsize=(12, 6))

# Scatter plot for the original high-dimensional space
plt.subplot(1, 2, 1)
plt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.Set1, edgecolor='k')
plt.title('Original High-Dimensional Space')
plt.xlabel('Feature 1')
plt.ylabel('Feature 2')

# Scatter plot for the low-dimensional space after t-SNE
plt.subplot(1, 2, 2)
plt.scatter(X_low_dim[:, 0], X_low_dim[:, 1], c=y, cmap=plt.cm.Set1, edgecolor='k')
plt.title('t-SNE Visualisation')
plt.xlabel('t-SNE Component 1')
plt.ylabel('t-SNE Component 2')
plt.tight_layout()
plt.show()

import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.datasets import fetch_openml
from sklearn.manifold import TSNE
from mpl_toolkits.mplot3d import Axes3D
from sklearn.preprocessing import StandardScaler

# Step 1: Load MNIST dataset
mnist = fetch_openml('mnist_784', version=1)
X = mnist.data.astype(np.float32) #Feature matrix (70,000 samples, 784 features)
y = mnist.target.astype(int) # Labels (0 to 9)

# Step 2: Normalize data for better performance
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Step 3: Use only a subset of data for faster computation
subset_size = 2000 # Reduce sample size for faster visualization
X_subset, y_subset = X_scaled[:subset_size], y[:subset_size]

# Step 4: Apply t-SNE to reduce 784D to 2D
tsne_2d = TSNE(n_components=2, random_state=42, perplexity=30, learning_rate=200)
X_2d = tsne_2d.fit_transform(X_subset)

# Step 5: Visualize the t-SNE transformed 2D data
plt.figure(figsize=(10,8))
scatter = plt.scatter(X_2d[:,0], X_2d[:, 1], c=y_subset, cmap='tab10', alpha=0.7, edgecolors='k')
plt.colorbar(scatter, label="Digit Label")
plt.title("t-SNE Visualization of MNIST in 2D")
plt.xlabel("t-SNE Component 1")
plt.ylabel("t-SNE Component 2")
plt.show()

# Step 6: Apply t-SNE to reduce 784D to 3D
tsne_3d = TSNE(n_components=3, random_state=42, perplexity=30, learning_rate=200)
X_3d = tsne_3d.fit_transform(X_subset)

# Step 7: Visualize the t-SNE transformed 3D data
fig = plt.figure(figsize=(10,8))
ax = fig.add_subplot(111, projection='3d')
scatter = ax.scatter(X_3d[:, 0], X_3d[:, 1], X_3d[:, 2], c=y_subset, cmap='tab10', alpha=0.7, edgecolors='k')

# Lables and title for 3D plot
ax.set_title("t-SNE Visualisation of MNIST in 3D")
ax.set_xlabel("t-SNE Component 1")
ax.set_ylabel("t-SNE Component 2")
ax.set_zlabel("t-SNE Component 3")
plt.colorbar(scatter, label="Digit Label")
plt.show()
0
Subscribe to my newsletter

Read articles from Invoker directly inside your inbox. Subscribe to the newsletter, and don't miss out.

Written by

Invoker
Invoker