In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

In [2]:
class AutoEncoder(nn.Module):
    def __init__(self, latent_dim=64):
        super(AutoEncoder, self).__init__()
        # Encoder: Convolutional layers to capture image features
        self.encoder = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3, stride=2, padding=1),
            nn.LeakyReLU(),
            nn.Conv2d(32, 64, kernel_size=3, stride=2, padding=1),
            nn.LeakyReLU(),
            nn.Flatten(),
            nn.Linear(64 * 7 * 7, latent_dim)
        )
        # Decoder: Reconstruct the image from the latent representation
        self.decoder = nn.Sequential(
            nn.Linear(latent_dim, 64 * 7 * 7),
            nn.LeakyReLU(),
            nn.Unflatten(1, (64, 7, 7)),
            nn.ConvTranspose2d(64, 32, kernel_size=3, stride=2, padding=1, output_padding=1),
            nn.LeakyReLU(),
            nn.ConvTranspose2d(32, 1, kernel_size=3, stride=2, padding=1, output_padding=1),
            nn.Sigmoid()
        )

    def forward(self, x):
        z = self.encoder(x)
        x_recon = self.decoder(z)
        return x_recon, z

In [3]:
# Data preparation
transform = transforms.ToTensor()
mnist_train = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
train_loader = DataLoader(mnist_train, batch_size=128, shuffle=True)
# Load the MNIST test set
mnist_test = datasets.MNIST(root='./data', train=False, download=True, transform=transform)
test_loader = DataLoader(mnist_test, batch_size=256, shuffle=False)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = AutoEncoder(latent_dim=64).to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)
num_epochs = 20

for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    for imgs, _ in train_loader:
        imgs = imgs.to(device)
        optimizer.zero_grad()
        recon_imgs, _ = model(imgs)
        loss = criterion(recon_imgs, imgs)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1}, Loss: {total_loss/len(train_loader):.4f}")

Epoch 1, Loss: 0.0280
Epoch 2, Loss: 0.0053
Epoch 3, Loss: 0.0037
Epoch 4, Loss: 0.0031
Epoch 5, Loss: 0.0028
Epoch 6, Loss: 0.0025
Epoch 7, Loss: 0.0024
Epoch 8, Loss: 0.0023
Epoch 9, Loss: 0.0022
Epoch 10, Loss: 0.0021
Epoch 11, Loss: 0.0020
Epoch 12, Loss: 0.0020
Epoch 13, Loss: 0.0019
Epoch 14, Loss: 0.0019
Epoch 15, Loss: 0.0018
Epoch 16, Loss: 0.0018
Epoch 17, Loss: 0.0018
Epoch 18, Loss: 0.0017
Epoch 19, Loss: 0.0017
Epoch 20, Loss: 0.0017


In [4]:
from sklearn.cluster import KMeans
import numpy as np

In [9]:
# Generate latent embeddings for the entire dataset
model.eval()
latent_vectors = []
labels = []
with torch.no_grad():
    for imgs, lbl in DataLoader(mnist_train, batch_size=256, shuffle=False):
        imgs = imgs.to(device)
        _, z = model(imgs)
        latent_vectors.append(z.cpu().numpy())
        labels.extend(lbl.numpy())

In [None]:
latent_vectors = np.concatenate(latent_vectors, axis=0)

In [None]:
# Apply K-means clustering in the latent space
kmeans = KMeans(n_clusters=50, random_state=42)
cluster_labels = kmeans.fit_predict(latent_vectors)

Adjusted Rand Index: 0.2315


In [13]:
from collections import defaultdict, Counter
import numpy as np

# --- Step 1: Create a mapping from each cluster to its majority class label ---
cluster_label_count = defaultdict(list)
for cl, true_label in zip(cluster_labels, labels):
    cluster_label_count[cl].append(true_label)

cluster_to_label = {}
for cl, lbls in cluster_label_count.items():
    most_common_label, count = Counter(lbls).most_common(1)[0]
    cluster_to_label[cl] = most_common_label

# --- Step 2: Evaluate classification accuracy on the training set ---
train_preds = [cluster_to_label[cl] for cl in cluster_labels]
train_accuracy = np.mean(np.array(train_preds) == np.array(labels))
print("Training Classification Accuracy: {:.4f}".format(train_accuracy))

test_latent_vectors = []
test_labels = []
model.eval()  # Set model to evaluation mode

with torch.no_grad():
    for imgs, lbl in test_loader:
        imgs = imgs.to(device)
        _, z = model(imgs)
        test_latent_vectors.append(z.cpu().numpy())
        test_labels.extend(lbl.numpy())

test_latent_vectors = np.concatenate(test_latent_vectors, axis=0)
# Use the trained KMeans model to predict clusters for the test set
test_cluster_labels = kmeans.predict(test_latent_vectors)

# Map each test cluster to the assigned majority vote label
test_preds = [cluster_to_label[cl] for cl in test_cluster_labels]
test_accuracy = np.mean(np.array(test_preds) == np.array(test_labels))
print("Testing Classification Accuracy: {:.4f}".format(test_accuracy))

Training Classification Accuracy: 0.8448
Testing Classification Accuracy: 0.8516
