<a href="https://colab.research.google.com/github/vifirsanova/hse-python-course/blob/main/compression/pruning_3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.utils.prune as prune
import torchvision
import torchvision.transforms as transforms

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])

trainset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)

testset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=False)

In [4]:
class MNISTModel(nn.Module):
    def __init__(self):
        super(MNISTModel, self).__init__()
        self.fc1 = nn.Linear(28 * 28, 300)
        self.fc2 = nn.Linear(300, 100)
        self.fc3 = nn.Linear(100, 10)

    def forward(self, x):
        x = x.view(-1, 28 * 28)  # Flatten the image
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [5]:
model = MNISTModel()

In [6]:
def train_model(model, trainloader, epochs=5):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

    model.train()
    for epoch in range(epochs):
        running_loss = 0.0
        for images, labels in trainloader:
            # Move data to the device (GPU or CPU)
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

        print(f"Epoch [{epoch + 1}/{epochs}], Loss: {running_loss / len(trainloader):.4f}")

In [7]:
def test_model(model, testloader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in testloader:
            images, labels = images.to(device), labels.to(device)  # Move to GPU/CPU
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    print(f'Accuracy: {accuracy:.2f}%')
    return accuracy

In [8]:
model = MNISTModel().to(device)  # Move the model to GPU/CPU
print("Training before pruning:")
train_model(model, trainloader, epochs=5)

Training before pruning:
Epoch [1/5], Loss: 0.3999
Epoch [2/5], Loss: 0.1636
Epoch [3/5], Loss: 0.1177
Epoch [4/5], Loss: 0.0904
Epoch [5/5], Loss: 0.0751


In [9]:
print("Testing before pruning:")
accuracy_before_pruning = test_model(model, testloader)

Testing before pruning:
Accuracy: 97.41%


In [10]:
def print_weight_statistics(layer, name):
    nonzero = torch.count_nonzero(layer.weight)
    total = layer.weight.numel()
    sparsity = 100 - (nonzero / total * 100)
    print(f"{name}: Non-zero weights: {nonzero}, Total weights: {total}, Sparsity: {sparsity:.2f}%")
    print(f"Sample of weights from {name}: {layer.weight.flatten()[:10]}")

In [11]:
print("Weight statistics before pruning:")
print_weight_statistics(model.fc1, "fc1")
print_weight_statistics(model.fc2, "fc2")

Weight statistics before pruning:
fc1: Non-zero weights: 235200, Total weights: 235200, Sparsity: 0.00%
Sample of weights from fc1: tensor([ 0.0263,  0.0347,  0.0025, -0.0264, -0.0272,  0.0300,  0.0090, -0.0043,
        -0.0122,  0.0257], device='cuda:0', grad_fn=<SliceBackward0>)
fc2: Non-zero weights: 30000, Total weights: 30000, Sparsity: 0.00%
Sample of weights from fc2: tensor([ 0.0817, -0.0483, -0.0303, -0.0522,  0.0482, -0.0035, -0.0059,  0.0037,
        -0.1134,  0.0235], device='cuda:0', grad_fn=<SliceBackward0>)


In [12]:
def check_sparsity(layer):
    nonzero = torch.count_nonzero(layer.weight)
    total = layer.weight.numel()
    print(f"Sparsity of {layer}: {100 - (nonzero / total * 100):.2f}%")

In [13]:
prune.l1_unstructured(model.fc1, name='weight', amount=0.5)
prune.l1_unstructured(model.fc2, name='weight', amount=0.3)

check_sparsity(model.fc1)
check_sparsity(model.fc2)

Sparsity of Linear(in_features=784, out_features=300, bias=True): 50.00%
Sparsity of Linear(in_features=300, out_features=100, bias=True): 30.00%


In [14]:
print("\nApplying pruning to fc1 (50%) and fc2 (30%)...")
prune.l1_unstructured(model.fc1, name='weight', amount=0.5)
prune.l1_unstructured(model.fc2, name='weight', amount=0.3)


Applying pruning to fc1 (50%) and fc2 (30%)...


Linear(in_features=300, out_features=100, bias=True)

In [15]:
print("Retraining after pruning:")
train_model(model, trainloader, epochs=5)

Retraining after pruning:
Epoch [1/5], Loss: 0.0504
Epoch [2/5], Loss: 0.0404
Epoch [3/5], Loss: 0.0354
Epoch [4/5], Loss: 0.0301
Epoch [5/5], Loss: 0.0258


In [16]:
print("Testing after pruning:")
accuracy_after_pruning = test_model(model, testloader)

Testing after pruning:
Accuracy: 98.01%


In [17]:
print("\nWeight statistics after pruning:")
print_weight_statistics(model.fc1, "fc1")
print_weight_statistics(model.fc2, "fc2")


Weight statistics after pruning:
fc1: Non-zero weights: 58800, Total weights: 235200, Sparsity: 75.00%
Sample of weights from fc1: tensor([0.0000, 0.0294, 0.0000, -0.0000, -0.0000, 0.0000, 0.0000, -0.0000, -0.0000,
        0.0000], device='cuda:0')
fc2: Non-zero weights: 14700, Total weights: 30000, Sparsity: 51.00%
Sample of weights from fc2: tensor([ 0.0633, -0.0525, -0.0000, -0.0544,  0.0554, -0.0000, -0.0000,  0.0000,
        -0.1676,  0.0000], device='cuda:0')


In [18]:
model.fc1.weight

tensor([[ 0.0000,  0.0294,  0.0000,  ..., -0.0000,  0.0000,  0.0000],
        [ 0.0000, -0.0286,  0.0000,  ..., -0.0287, -0.0000, -0.0000],
        [ 0.0000, -0.0000, -0.0000,  ...,  0.0000,  0.0000, -0.0000],
        ...,
        [-0.0000, -0.0000, -0.0000,  ..., -0.0000, -0.0000, -0.0000],
        [ 0.0355,  0.0000,  0.0333,  ...,  0.0000,  0.0000, -0.0000],
        [ 0.0000, -0.0000, -0.0000,  ..., -0.0000, -0.0000, -0.0425]],
       device='cuda:0')

In [19]:
model.fc2.weight

tensor([[ 0.0633, -0.0525, -0.0000,  ...,  0.0000, -0.0437,  0.0272],
        [ 0.0748, -0.0000,  0.0686,  ...,  0.0000, -0.0769,  0.0842],
        [-0.0000, -0.0000,  0.0000,  ..., -0.0000,  0.0000,  0.1571],
        ...,
        [ 0.0000, -0.0451, -0.0000,  ...,  0.0616,  0.0000,  0.0262],
        [-0.0000,  0.0000, -0.0258,  ..., -0.0000, -0.0970, -0.1338],
        [ 0.0729,  0.0000, -0.0000,  ..., -0.1437, -0.0000,  0.0392]],
       device='cuda:0')