In [1]:
!pip install nni



In [1]:
# import pytorch libraries
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split
from torch.optim import SGD
from nni.compression.pruning import L1NormPruner
import time
import numpy as np

print(torch.__version__)

2.1.0


## 1. Pretrain a model using MNIST dataset

In [2]:
# Optional to run code on GPU
# Check if CUDA is available and if device is GPU
print('Cuda Available : {}'.format(torch.cuda.is_available()))
if torch.cuda.is_available():
    print('GPU - {0}'.format(torch.cuda.get_device_name()))

Cuda Available : False


In [3]:
# # Define the CNN model,


# # Device configuration
# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# # Hyperparameters
# num_epochs = 3
# batch_size = 64
# learning_rate = 0.01

# # MNIST dataset
# train_dataset = datasets.MNIST(root='data', train=True, transform=transforms.ToTensor(), download=True)
# test_dataset = datasets.MNIST(root='data', train=False, transform=transforms.ToTensor(), download=True)

# # Data loader
# train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
# test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

# # Define a convolutional neural network model for MNIST
# class TorchModel(nn.Module):
#     def __init__(self):
#         super(TorchModel, self).__init__()
#         self.conv1 = nn.Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
#         self.conv2 = nn.Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
#         self.fc1 = nn.Linear(16 * 4 * 4, 120)  # Adjusted to match output of conv2
#         self.fc2 = nn.Linear(120, 84)
#         self.fc3 = nn.Linear(84, 10)
#         self.relu1 = nn.ReLU()
#         self.relu2 = nn.ReLU()
#         self.relu3 = nn.ReLU()
#         self.relu4 = nn.ReLU()
#         self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
#         self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)

#     def forward(self, x):
#         x = self.relu1(self.conv1(x))
#         x = self.pool1(x)
#         x = self.relu2(self.conv2(x))
#         x = self.pool2(x)
#         x = x.view(-1, 16 * 4 * 4)
#         x = self.relu3(self.fc1(x))
#         x = self.relu4(self.fc2(x))
#         x = self.fc3(x)
#         x = F.log_softmax(x, dim=1)  # Add this line
#         return x

# model = TorchModel().to(device)
# print(model)


In [4]:
# test initialise the 16,4,4
# Define the CNN model,


# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Hyperparameters
num_epochs = 3
batch_size = 64
learning_rate = 0.01

# MNIST dataset
train_dataset = datasets.MNIST(root='data', train=True, transform=transforms.ToTensor(), download=True)
test_dataset = datasets.MNIST(root='data', train=False, transform=transforms.ToTensor(), download=True)

# Data loader
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

# Define a convolutional neural network model for MNIST
class TorchModel(nn.Module):
    def __init__(self, v1=16, v2=4, v3=4):
        super(TorchModel, self).__init__()
        self.v1 = v1  # Typically the number of output channels from the last conv layer
        self.v2 = v2  # Height of the feature map
        self.v3 = v3  # Width of the feature map
        self.conv1 = nn.Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
        self.conv2 = nn.Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
        self.fc1 = nn.Linear(self.v1 * self.v2 * self.v3, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
        self.relu1 = nn.ReLU()
        self.relu2 = nn.ReLU()
        self.relu3 = nn.ReLU()
        self.relu4 = nn.ReLU()
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)

    # def forward(self, x):
    #     x = self.relu1(self.conv1(x))
    #     x = self.pool1(x)
    #     x = self.relu2(self.conv2(x))
    #     x = self.pool2(x)
    #     x = x.view(-1, self.v1 * self.v2 * self.v3)  # Use the dynamically set values
    #     x = self.relu3(self.fc1(x))
    #     x = self.relu4(self.fc2(x))
    #     x = self.fc3(x)
    #     x = F.log_softmax(x, dim=1)
    #     return x

    def forward(self, x):
        x = self.relu1(self.conv1(x))
        x = self.pool1(x)
        x = self.relu2(self.conv2(x))
        x = self.pool2(x)
        # Dynamically calculate the correct number of features for flattening
        x = x.view(x.size(0), -1)  # Automatically calculate the correct number of features
        x = self.relu3(self.fc1(x))
        x = self.relu4(self.fc2(x))
        x = self.fc3(x)
        x = F.log_softmax(x, dim=1)
        return x


model = TorchModel(16, 4, 4).to(device)  # Original model dimensions
print(model)

TorchModel(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=256, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
  (relu1): ReLU()
  (relu2): ReLU()
  (relu3): ReLU()
  (relu4): ReLU()
  (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)


In [5]:
# torchviz

# Run the model forward pass and visualize using torchviz:
from torchviz import make_dot

model.eval()  # Set the model to evaluation mode
dummy_input = torch.randn(3, 1, 28, 28)

# Run a forward pass with the dummy input to get the output tensor
output = model(dummy_input)

# Generate the graph using torchviz
dot = make_dot(output, params=dict(model.named_parameters()))

# To display in Jupyter Notebook/JupyterLab
# dot.view()
# Saves the visualization as a PNG file
dot.render('model_original_visualization', format='png')  # Saves the visualization as a PNG file


'model_original_visualization.png'

In [6]:
# sava the model before compression
torch.save(model.state_dict(), 'original_model.pth')

In [7]:

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = SGD(model.parameters(), lr=learning_rate)

# Function to train the model
def train(model, train_loader, optimizer, criterion):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()

# Function to evaluate the model
def evaluate(model, test_loader, criterion):
    model.eval()
    total = 0
    correct = 0
    total_loss = 0.0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            outputs = model(data)
            loss = criterion(outputs, target)
            total_loss += loss.item() * data.size(0)  # Multiply loss by batch size to get total loss for batch
            _, predicted = torch.max(outputs.data, 1)
            total += target.size(0)
            correct += (predicted == target).sum().item()

    average_loss = total_loss / total
    accuracy = 100 * correct / total
    print(f'Loss: {average_loss:.4f}, Accuracy: {accuracy:.2f}%')





In [8]:
# Start timing
import time
start_time = time.time()


# Training and evaluation loop
for epoch in range(num_epochs):
    train(model, train_loader, optimizer, criterion)
    # evaluate(model, test_loader)
    evaluate(model, test_loader, criterion)

# End timing
end_time = time.time()
total_time_original = end_time - start_time

Loss: 0.8702, Accuracy: 72.23%
Loss: 0.3166, Accuracy: 90.51%
Loss: 0.2490, Accuracy: 92.12%


In [9]:
# Pruning configuration
config_list = [{
    'op_types': ['Linear', 'Conv2d'],
    'exclude_op_names': ['fc3'],
    'sparse_ratio': 0.5
}]

# Apply L1NormPruner
pruner = L1NormPruner(model, config_list)
# model = pruner.compress()[0]
# print(pruner)
# print(model)



In [10]:
# compress the model and generate the masks
_, masks = pruner.compress()
# show the masks sparsity
for name, mask in masks.items():
    print(name, ' sparsity : ', '{:.2}'.format(mask['weight'].sum() / mask['weight'].numel()))

conv2  sparsity :  0.5
fc1  sparsity :  0.5
fc2  sparsity :  0.5
conv1  sparsity :  0.5


In [11]:
# need to unwrap the model, if the model is wrapped before speedup
pruner.unwrap_model()

# speedup the model, for more information about speedup, please refer :doc:`pruning_speedup`.
from nni.compression.speedup import ModelSpeedup

m_speedup = ModelSpeedup(model, torch.rand(3, 1, 28, 28).to(device), masks)
m_speedup.speedup_model()


# ModelSpeedup(model, torch.rand(3, 1, 28, 28).to(device), masks).speedup_model()
# (3, 1, 28, 28) in the code represents the dimensions of a tensor

# 3: The number of data samples in the batch. This means that the input consists of 3 separate images being processed simultaneously.
# 1: The number of channels in each image. For grayscale images, such as those typically used in the MNIST dataset, this number is 1. If it were a color image in a standard RGB format, this number would be 3.
# 28, 28: The dimensions of each image. In the case of the MNIST dataset, each image is 28 pixels wide by 28 pixels high.


[2024-05-08 00:53:49] [32mStart to speedup the model...[0m
[2024-05-08 00:53:49] [32mResolve the mask conflict before mask propagate...[0m
[2024-05-08 00:53:49] [32mdim0 sparsity: 0.500000[0m
[2024-05-08 00:53:49] [32mdim1 sparsity: 0.000000[0m
0 Filter
[2024-05-08 00:53:49] [32mdim0 sparsity: 0.500000[0m
[2024-05-08 00:53:49] [32mdim1 sparsity: 0.000000[0m
[2024-05-08 00:53:49] [32mInfer module masks...[0m
[2024-05-08 00:53:49] [32mPropagate original variables[0m
[2024-05-08 00:53:49] [32mPropagate variables for placeholder: x, output mask:  0.0000 [0m
[2024-05-08 00:53:49] [32mPropagate variables for call_module: conv1, weight:  0.5000 bias:  0.5000 , output mask:  0.0000 [0m
[2024-05-08 00:53:49] [32mPropagate variables for call_module: relu1, , output mask:  0.0000 [0m
[2024-05-08 00:53:49] [32mPropagate variables for call_module: pool1, , output mask:  0.0000 [0m
[2024-05-08 00:53:49] [32mPropagate variables for call_module: conv2, weight:  0.5000 bias:  0

TorchModel(
  (conv1): Conv2d(1, 3, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(3, 8, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=128, out_features=60, bias=True)
  (fc2): Linear(in_features=60, out_features=42, bias=True)
  (fc3): Linear(in_features=42, out_features=10, bias=True)
  (relu1): ReLU()
  (relu2): ReLU()
  (relu3): ReLU()
  (relu4): ReLU()
  (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)

In [12]:
print(model)
# 这里是pruned model， 经过prunning：
# layer的数量和layer的类型都没有变化
# 但是由于prune掉了一些weights，所以layer的output weights的个数有减少，也是因此，TorchModel()变了，因此最后要测量eval就需要重新定义TorchModel()

TorchModel(
  (conv1): Conv2d(1, 3, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(3, 8, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=128, out_features=60, bias=True)
  (fc2): Linear(in_features=60, out_features=42, bias=True)
  (fc3): Linear(in_features=42, out_features=10, bias=True)
  (relu1): ReLU()
  (relu2): ReLU()
  (relu3): ReLU()
  (relu4): ReLU()
  (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)


In [13]:
model.eval()  # Set the model to evaluation mode
dummy_input = torch.randn(2, 1, 28, 28)

# Run a forward pass with the dummy input to get the output tensor
output = model(dummy_input)

# Generate the graph using torchviz
dot = make_dot(output, params=dict(model.named_parameters()))

# To display in Jupyter Notebook/JupyterLab
# dot.view()
# Saves the visualization as a PNG file
dot.render('model_pruned_visualization', format='png')  # Saves the visualization as a PNG file


'model_pruned_visualization.png'

## Fine-tuning Compacted Model
Note that if the model has been sped up, you need to re-initialize a new optimizer for fine-tuning.
Because speedup will replace the masked big layers with dense small ones.
>After pruning and speedup, the model is typically smaller and faster, but it may also lose some accuracy or require adjustment to fully adapt to its new structure, which is where fine-tuning comes in.



In [14]:
# Start timing
start_time = time.time()

In [15]:
# Fine-tuning loop for the pruned model
optimizer = SGD(model.parameters(), 1e-2) # 0.01 is the learning rate
criterion = F.nll_loss
for epoch in range(num_epochs):
    train(model, train_loader, optimizer, criterion)
    # evaluate(model, test_loader)
    evaluate(model, test_loader, criterion)



# from nni_assets.compression.mnist_model import TorchModel, trainer, evaluator, device
# optimizer = SGD(model.parameters(), 1e-2) # 0.01 is the learning rate
# criterion = F.nll_loss
# for epoch in range(3):
#     trainer(model, optimizer, criterion)
#     evaluator(model)


Loss: 0.1888, Accuracy: 94.15%
Loss: 0.1547, Accuracy: 95.37%
Loss: 0.1189, Accuracy: 96.24%


In [16]:
# End timing
end_time = time.time()
total_time_compressed = end_time - start_time


In [17]:
# sava the model after compression
torch.save(model.state_dict(), 'compressed_model.pth')

##  Validation: Model compression


1. the model size
2. execution time


1. the model size

In [18]:
import os

size_original = os.path.getsize('original_model.pth')
size_compressed = os.path.getsize('compressed_model.pth')
print(f'Original Model Size: {size_original} bytes')
print(f'Compressed Model Size: {size_compressed} bytes')
print(f'Reduction in Size: {size_original - size_compressed} bytes')


Original Model Size: 181466 bytes
Compressed Model Size: 49334 bytes
Reduction in Size: 132132 bytes


2.the execution time

In [19]:
training_time_reduction=total_time_original-total_time_compressed
print(f'Original Model Execution Time: {total_time_original} (s)')
print(f'Compressed Model Execution Time: {total_time_compressed} (s)')
print(f'Reduction in Execution Time: {training_time_reduction} (s)')

Original Model Execution Time: 37.83460831642151 (s)
Compressed Model Execution Time: 34.30511403083801 (s)
Reduction in Execution Time: 3.529494285583496 (s)
