# Singular Values Decomposition as part of Neural Network


In [1]:
# Import libraries
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

import matplotlib.pyplot as plt
import numpy as np

In [3]:
torch.set_default_dtype(torch.double)
torch.manual_seed(0)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

Using device: cpu


In [4]:
# Load MNIST dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

train_dataset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 404: Not Found

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:01<00:00, 8195045.50it/s] 


Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 404: Not Found

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 321955.33it/s]


Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 404: Not Found

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:00<00:00, 2977644.24it/s]


Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 404: Not Found

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 2076804.62it/s]

Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw






In [6]:
# Standard model without SVD
class Standart_model(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(Standart_model, self).__init__()
        self.dense1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.dense2 = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        x = x.view(x.size(0), -1)
        x = self.relu(self.dense1(x))
        x = self.dense2(x)
        return x

In [7]:
# train
def train_model(model, train_loader, test_loader, epochs, ortho_weight=0.1, is_svd=False):
    model = model.to(device)

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    train_losses, test_accuracies = [], []

    for epoch in range(epochs):
        model.train()
        total_loss = 0
        for batch_idx, (data, target) in enumerate(train_loader):

            data, target = data.to(device), target.to(device)

            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, target)

            if is_svd:
                ortho_loss = model.ortho_loss()
                loss += ortho_weight * ortho_loss

            loss.backward()
            optimizer.step()
            total_loss += loss.item()

        train_losses.append(total_loss / len(train_loader))

        model.eval()
        correct = 0
        total = 0
        with torch.no_grad():
            for data, target in test_loader:

                data, target = data.to(device), target.to(device)

                output = model(data)
                _, predicted = torch.max(output.data, 1)
                total += target.size(0)
                correct += (predicted == target).sum().item()

        accuracy = 100 * correct / total
        test_accuracies.append(accuracy)

        if epoch % 10 == 0:
            print(f'Epoch {epoch}: Loss = {train_losses[-1]:.4f}, Accuracy = {accuracy:.2f}%')

    return train_losses, test_accuracies

In [8]:
input_size = 28 * 28
hidden_size = 256
output_size = 10
epochs = 50

In [11]:
# change cude to cpu if no gpu
#with torch.cuda.device(0):
print("Learn the standard model")
standard_model = Standart_model(input_size, hidden_size, output_size)
standard_loss, standard_acc = train_model(standard_model, train_loader, test_loader, epochs)

Learn the standard model
Epoch 0: Loss = 0.3958, Accuracy = 92.41%
Epoch 10: Loss = 0.0398, Accuracy = 97.73%
Epoch 20: Loss = 0.0192, Accuracy = 97.69%
Epoch 30: Loss = 0.0111, Accuracy = 97.84%
Epoch 40: Loss = 0.0144, Accuracy = 97.79%


In [14]:
standard_model_cpu = standard_model.cpu()

print(standard_model)

def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)
total_params = count_parameters(standard_model)
print(f"Total number of parameters: {total_params:,}")

Standart_model(
  (dense1): Linear(in_features=784, out_features=256, bias=True)
  (relu): ReLU()
  (dense2): Linear(in_features=256, out_features=10, bias=True)
)
Total number of parameters: 203,530


In [13]:
# with torch.cuda.device(0):
print("Learn SVD model")
svd_model = SVD_Model(input_size, hidden_size, output_size)
svd_loss, svd_acc = train_model(svd_model, train_loader, test_loader, epochs, is_svd=True)

Learn SVD model


NameError: name 'SVD_Model' is not defined

In [None]:
print(svd_model)

total_params_svd = count_parameters(svd_model)
print(f"Total number of parameters: {total_params_svd:,}")

In [None]:
#with torch.cuda.device(0):
print("Prune the SVD model")
rank1, rank2 = svd_model.prune(threshold_ratio=0.1)
print(f"New panks after pruning: fc1={rank1}, fc2={rank2}")

print("Fine-tune the pruned SVD model")
svd_pruned_loss, svd_pruned_acc = train_model(svd_model, train_loader, test_loader, epochs//2, is_svd=True)

In [None]:
print(svd_pruned_acc[-1])
print(svd_model)

total_params_svd_2 = count_parameters(svd_model)
print(f"Total number of parameters: {total_params_svd_2:,}")