In [None]:
import torch
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, TensorDataset
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sparseml.pytorch.optim import ScheduledModifierManager
from sparseml.pytorch.optim import ScheduledOptimizer
from sparseml.pytorch.utils import tensor_sparsity, get_prunable_layers
import matplotlib.pyplot as plt


# /home/webexpert/Desktop/seon project/Sparsity_different_datasets.ipynb home/webexpert/Desktop/seon project/Sparsity_different_datasets.ipynb sumithsunil@dev10.intensivate.com:/nas/home/sumithsunil/code/

# docker cp /home/webexpert/Desktop/seon project/Sparsity_different_datasets.ipynb to sumithsunil@dev10.intensivate.com:/nas/home/sumithsunil/code/

### __MNIST data__
- __on 4864 weights__

In [23]:
# Step 1: Load MNIST Dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

trainset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform)
trainloader = DataLoader(trainset, batch_size=32, shuffle=True)

testset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform)
testloader = DataLoader(testset, batch_size=32, shuffle=False)

# Check the number of images in the training and test sets
print(f'Number of training images: {len(trainset)}')
print(f'Number of test images: {len(testset)}')

Number of training images: 60000
Number of test images: 10000


In [29]:
# Step 2: Define the Model Architecture (At least 2 Hidden Layers, 1000 to 5000 weights)
class SimpleSparseNet(nn.Module):
    def __init__(self):
        super(SimpleSparseNet, self).__init__()
        self.fc1 = nn.Linear(28 * 28, 6)  # Input layer to Hidden Layer 1 (784 * 40 = 4704 weights)
        self.bn1 = nn.BatchNorm1d(6)      # Biases: 6 Total: (Scale (gamma) parameters: 6 + Shift (beta) parameters: 6 = 12)
        self.fc2 = nn.Linear(6, 10)       # Hidden Layer 1 to Hidden Layer 2 (6 * 10 = 60 weights)
        self.bn2 = nn.BatchNorm1d(10)      # Biases: 10 Total: (Scale (gamma) parameters: 10 + Shift (beta) parameters: 10 = 20)
        self.fc3 = nn.Linear(10, 10)       # Hidden Layer 2 to Output Layer (10 * 10 = 100 weights)
        # Total weights and biases: (4710 + 12 + 70 + 20 + 110 = 4922), which falls within the specified range

    def forward(self, x):
        x = x.view(-1, 28 * 28)
        x = F.relu(self.bn1(self.fc1(x)))
        x = F.relu(self.bn2(self.fc2(x)))
        x = self.fc3(x)
        return x
    

# Step 3: Train the Base Model
model = SimpleSparseNet()
model = model.to('cuda' if torch.cuda.is_available() else 'cpu')
base_optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=1e-4)
num_epochs = 5

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'{device} is using for computation')

train_losses = []
print("\nTraining Base Model:")
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)
        inputs = inputs.float()  # Ensure inputs are of type float
        base_optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)
        loss = F.cross_entropy(outputs, labels)
        loss.backward()

        # Optimize
        base_optimizer.step()

        running_loss += loss.item()
        if i % 100 == 99:    # Record loss every 100 mini-batches
            train_losses.append(running_loss / 100)
            running_loss = 0.0

# Step 4: Evaluate the Base Model
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        images, labels = images.to(device), labels.to(device)
        images = images.float()  # Ensure images are of type float
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
base_accuracy = 100 * correct / total
print(f'Base Model Accuracy on Test Data: {base_accuracy:.2f}%')

cuda is using for computation

Training Base Model:
Base Model Accuracy on Test Data: 91.06%


In [31]:
# Step 5: Test Various Sparsity Levels
sparsity_targets = [0.60, 0.70, 0.75, 0.80]
for sparsity_target in sparsity_targets:
    print(f"\nTesting Sparsity Level: {sparsity_target * 100:.0f}%")

    # Reinitialize the Model for Sparsification
    sparse_model = SimpleSparseNet()
    sparse_model.load_state_dict(model.state_dict())  # Copy weights from the trained base model
    sparse_model = sparse_model.to(device)

    # Step 6: Apply Sparsification and Train the Sparse Model
    optimizer = torch.optim.SGD(sparse_model.parameters(), lr=0.01, momentum=0.9, weight_decay=1e-4)
    steps_per_epoch = len(trainloader)

    # Create a new sparsification recipe with the target sparsity level
    recipe_content = f'''
    modifiers:
      - !EpochRangeModifier
        start_epoch: 0.0
        end_epoch: 5.0

      - !GlobalMagnitudePruningModifier
        params: __ALL_PRUNABLE__
        start_epoch: 1.0
        end_epoch: 4.0
        update_frequency: 0.1
        init_sparsity: 0.05
        final_sparsity: {sparsity_target}
        mask_type: unstructured
    '''

    with open('temp_recipe.yaml', 'w') as f:
        f.write(recipe_content)

    manager = ScheduledModifierManager.from_yaml('temp_recipe.yaml')
    optimizer = ScheduledOptimizer(optimizer, sparse_model, manager, steps_per_epoch=steps_per_epoch)

    sparsity_levels = []
    accuracies = []
    sparse_train_losses = []

    print("\nTraining Sparse Model:")
    for epoch in range(num_epochs):
        sparse_model.train()
        running_loss = 0.0
        for i, data in enumerate(trainloader, 0):
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)
            inputs = inputs.float()  # Ensure inputs are of type float
            optimizer.zero_grad()

            # Forward pass
            outputs = sparse_model(inputs)
            loss = F.cross_entropy(outputs, labels)
            loss.backward()

            # Optimize
            optimizer.step()

            running_loss += loss.item()
            if i % 100 == 99:    # Record loss every 100 mini-batches
                sparse_train_losses.append(running_loss / 100)
                running_loss = 0.0

        # Step 7: Evaluate Sparsity and Accuracy after Each Epoch
        sparse_model.eval()
        prunable_layers = get_prunable_layers(sparse_model)
        sparsity = 0.0
        total_weights = 0
        remaining_weights = 0
        for (name, layer) in prunable_layers:
            layer_sparsity = tensor_sparsity(layer.weight).item()
            sparsity += layer_sparsity
            total_weights += layer.weight.numel()
            remaining_weights += (layer.weight != 0).sum().item()
        sparsity = (sparsity / len(prunable_layers)) * 100 if len(prunable_layers) > 0 else 0


        correct = 0
        total = 0
        with torch.no_grad():
            for data in testloader:
                images, labels = data
                images, labels = images.to(device), labels.to(device)
                images = images.float()  # Ensure images are of type float
                outputs = sparse_model(images)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
        accuracy = 100 * correct / total
        sparsity_levels.append(sparsity)
        accuracies.append(accuracy)
        print(f'Epoch {epoch + 1} - Sparsity: {sparsity:.2f}% - Accuracy: {accuracy:.2f}% -  Remaining Weights: {remaining_weights}/{total_weights}')


    manager.finalize(sparse_model)


Testing Sparsity Level: 60%

Training Sparse Model:
Epoch 1 - Sparsity: 0.00% - Accuracy: 91.43% -  Remaining Weights: 4864/4864
Epoch 2 - Sparsity: 20.65% - Accuracy: 91.66% -  Remaining Weights: 2860/4864
Epoch 3 - Sparsity: 28.28% - Accuracy: 91.80% -  Remaining Weights: 2076/4864
Epoch 4 - Sparsity: 31.72% - Accuracy: 91.79% -  Remaining Weights: 1946/4864
Epoch 5 - Sparsity: 31.72% - Accuracy: 91.92% -  Remaining Weights: 1946/4864

Testing Sparsity Level: 70%

Training Sparse Model:
Epoch 1 - Sparsity: 0.00% - Accuracy: 91.60% -  Remaining Weights: 4864/4864
Epoch 2 - Sparsity: 22.59% - Accuracy: 91.59% -  Remaining Weights: 2540/4864
Epoch 3 - Sparsity: 33.76% - Accuracy: 91.66% -  Remaining Weights: 1613/4864
Epoch 4 - Sparsity: 36.05% - Accuracy: 91.91% -  Remaining Weights: 1459/4864
Epoch 5 - Sparsity: 36.05% - Accuracy: 91.74% -  Remaining Weights: 1459/4864

Testing Sparsity Level: 75%

Training Sparse Model:
Epoch 1 - Sparsity: 0.00% - Accuracy: 91.47% -  Remaining Weigh

### __MNIST data__
- __on 1588 weights__

In [32]:
# Step 2: Define the Model Architecture (At least 2 Hidden Layers, 1000 to 5000 weights)
class SimpleNN(nn.Module):
    def __init__(self):
        super(SimpleNN, self).__init__()
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(28*28, 2)  # 784 -> 2 neurons
        self.fc2 = nn.Linear(2, 10)     # 2 -> 10 neurons for classification
        self.relu = nn.ReLU()
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x = self.flatten(x)
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.softmax(x)
        return x

# Step 3: Train the Base Model
model = SimpleNN()
model = model.to('cuda' if torch.cuda.is_available() else 'cpu')
base_optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=1e-4)
num_epochs = 5

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'{device} is using for computation')

train_losses = []
print("\nTraining Base Model:")
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)
        inputs = inputs.float()  # Ensure inputs are of type float
        base_optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)
        loss = F.cross_entropy(outputs, labels)
        loss.backward()

        # Optimize
        base_optimizer.step()

        running_loss += loss.item()
        if i % 100 == 99:    # Record loss every 100 mini-batches
            train_losses.append(running_loss / 100)
            running_loss = 0.0


# Step 4: Evaluate the Base Model
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        images, labels = images.to(device), labels.to(device)
        images = images.float()  # Ensure images are of type float
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
base_accuracy = 100 * correct / total
print(f'Base Model Accuracy on Test Data: {base_accuracy:.2f}%')

cuda is using for computation

Training Base Model:
Base Model Accuracy on Test Data: 55.05%


In [33]:
# Step 5: Test Various Sparsity Levels
sparsity_targets = [0.60, 0.70, 0.75, 0.80]
for sparsity_target in sparsity_targets:
    print(f"\nTesting Sparsity Level: {sparsity_target * 100:.0f}%")

    # Reinitialize the Model for Sparsification
    sparse_model = SimpleNN()
    sparse_model.load_state_dict(model.state_dict())  # Copy weights from the trained base model
    sparse_model = sparse_model.to(device)

    # Step 6: Apply Sparsification and Train the Sparse Model
    optimizer = torch.optim.SGD(sparse_model.parameters(), lr=0.01, momentum=0.9, weight_decay=1e-4)
    steps_per_epoch = len(trainloader)

    # Create a new sparsification recipe with the target sparsity level
    recipe_content = f'''
    modifiers:
      - !EpochRangeModifier
        start_epoch: 0.0
        end_epoch: 5.0

      - !GlobalMagnitudePruningModifier
        params: __ALL_PRUNABLE__
        start_epoch: 1.0
        end_epoch: 4.0
        update_frequency: 0.1
        init_sparsity: 0.05
        final_sparsity: {sparsity_target}
        mask_type: unstructured
    '''

    with open('temp_recipe.yaml', 'w') as f:
        f.write(recipe_content)

    manager = ScheduledModifierManager.from_yaml('temp_recipe.yaml')
    optimizer = ScheduledOptimizer(optimizer, sparse_model, manager, steps_per_epoch=steps_per_epoch)

    sparsity_levels = []
    accuracies = []
    sparse_train_losses = []

    print("\nTraining Sparse Model:")
    for epoch in range(num_epochs):
        sparse_model.train()
        running_loss = 0.0
        for i, data in enumerate(trainloader, 0):
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)
            inputs = inputs.float()  # Ensure inputs are of type float
            optimizer.zero_grad()

            # Forward pass
            outputs = sparse_model(inputs)
            loss = F.cross_entropy(outputs, labels)
            loss.backward()

            # Optimize
            optimizer.step()

            running_loss += loss.item()
            if i % 100 == 99:    # Record loss every 100 mini-batches
                sparse_train_losses.append(running_loss / 100)
                running_loss = 0.0

        # Step 7: Evaluate Sparsity and Accuracy after Each Epoch
        sparse_model.eval()
        prunable_layers = get_prunable_layers(sparse_model)
        sparsity = 0.0
        total_weights = 0
        remaining_weights = 0
        for (name, layer) in prunable_layers:
            layer_sparsity = tensor_sparsity(layer.weight).item()
            sparsity += layer_sparsity
            total_weights += layer.weight.numel()
            remaining_weights += (layer.weight != 0).sum().item()
        sparsity = (sparsity / len(prunable_layers)) * 100 if len(prunable_layers) > 0 else 0

        correct = 0
        total = 0
        with torch.no_grad():
            for data in testloader:
                images, labels = data
                images, labels = images.to(device), labels.to(device)
                images = images.float()  # Ensure images are of type float
                outputs = sparse_model(images)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
        accuracy = 100 * correct / total
        sparsity_levels.append(sparsity)
        accuracies.append(accuracy)
        print(f'Epoch {epoch + 1} - Sparsity: {sparsity:.2f}% - Accuracy: {accuracy:.2f}% - Remaining Weights: {remaining_weights}/{total_weights}')

    manager.finalize(sparse_model)


Testing Sparsity Level: 60%

Training Sparse Model:
Epoch 1 - Sparsity: 0.00% - Accuracy: 56.05% - Remaining Weights: 1588/1588
Epoch 2 - Sparsity: 20.85% - Accuracy: 58.58% - Remaining Weights: 934/1588
Epoch 3 - Sparsity: 29.02% - Accuracy: 58.70% - Remaining Weights: 678/1588
Epoch 4 - Sparsity: 30.39% - Accuracy: 58.71% - Remaining Weights: 635/1588
Epoch 5 - Sparsity: 30.39% - Accuracy: 58.65% - Remaining Weights: 635/1588

Testing Sparsity Level: 70%

Training Sparse Model:
Epoch 1 - Sparsity: 0.00% - Accuracy: 55.89% - Remaining Weights: 1588/1588
Epoch 2 - Sparsity: 24.20% - Accuracy: 58.18% - Remaining Weights: 829/1588
Epoch 3 - Sparsity: 38.77% - Accuracy: 57.94% - Remaining Weights: 527/1588
Epoch 4 - Sparsity: 40.40% - Accuracy: 58.26% - Remaining Weights: 476/1588
Epoch 5 - Sparsity: 40.40% - Accuracy: 58.74% - Remaining Weights: 476/1588

Testing Sparsity Level: 75%

Training Sparse Model:
Epoch 1 - Sparsity: 0.00% - Accuracy: 55.35% - Remaining Weights: 1588/1588
Epoch

### __IRIS data__
- __on 1011 weights__

In [3]:
# Step 1: Load Iris Dataset
iris = load_iris()
X = iris.data
y = iris.target
print(f'The Iris data have {X.shape} length')

# Standardize the features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.long)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.long)

# Create DataLoader for training and testing
trainloader = DataLoader(TensorDataset(X_train, y_train), batch_size=16, shuffle=True)
testloader = DataLoader(TensorDataset(X_test, y_test), batch_size=16, shuffle=False)

The Iris data have (150, 4) length


In [36]:
# Step 2: Define the Model Architecture (1011 weights)
class SimpleNN(nn.Module):
    def __init__(self):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(4, 64)  # 4 -> 64 neurons
        self.bn1 = nn.BatchNorm1d(64)
        self.fc2 = nn.Linear(64, 8)   # 64 -> 8 neurons
        self.bn2 = nn.BatchNorm1d(8)
        self.fc3 = nn.Linear(8, 3)    # 8 -> 3 neurons for classification
        self.relu = nn.ReLU()
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x = self.fc1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.bn2(x)
        x = self.relu(x)
        x = self.fc3(x)
        x = self.softmax(x)
        return x

# Step 3: Train the Base Model
model = SimpleNN()
model = model.to('cuda' if torch.cuda.is_available() else 'cpu')
base_optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=1e-4)
num_epochs = 5

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'{device} is using for computation')

train_losses = []
print("\nTraining Base Model:")
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)
        base_optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)
        loss = F.cross_entropy(outputs, labels)
        loss.backward()

        # Optimize
        base_optimizer.step()

        running_loss += loss.item()
        if i % 10 == 9:    # Record loss every 10 mini-batches
            train_losses.append(running_loss / 10)
            running_loss = 0.0

# Step 4: Evaluate the Base Model
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
base_accuracy = 100 * correct / total
print(f'Base Model Accuracy on Test Data: {base_accuracy:.2f}%')

cuda is using for computation

Training Base Model:
Base Model Accuracy on Test Data: 93.33%


In [37]:
# Step 5: Test Various Sparsity Levels
sparsity_targets = [0.60, 0.70, 0.75, 0.80]
for sparsity_target in sparsity_targets:
    print(f"\nTesting Sparsity Level: {sparsity_target * 100:.0f}%")

    # Reinitialize the Model for Sparsification
    sparse_model = SimpleNN()
    sparse_model.load_state_dict(model.state_dict())  # Copy weights from the trained base model
    sparse_model = sparse_model.to(device)

    # Step 6: Apply Sparsification and Train the Sparse Model
    optimizer = torch.optim.SGD(sparse_model.parameters(), lr=0.01, momentum=0.9, weight_decay=1e-4)
    steps_per_epoch = len(trainloader)

    # Create a new sparsification recipe with the target sparsity level
    recipe_content = f'''
    modifiers:
      - !EpochRangeModifier
        start_epoch: 0.0
        end_epoch: 5.0

      - !GlobalMagnitudePruningModifier
        params: __ALL_PRUNABLE__
        start_epoch: 1.0
        end_epoch: 4.0
        update_frequency: 0.1
        init_sparsity: 0.05
        final_sparsity: {sparsity_target}
        mask_type: unstructured
    '''

    with open('temp_recipe.yaml', 'w') as f:
        f.write(recipe_content)

    manager = ScheduledModifierManager.from_yaml('temp_recipe.yaml')
    optimizer = ScheduledOptimizer(optimizer, sparse_model, manager, steps_per_epoch=steps_per_epoch)

    sparsity_levels = []
    accuracies = []
    sparse_train_losses = []

    print("\nTraining Sparse Model:")
    for epoch in range(num_epochs):
        sparse_model.train()
        running_loss = 0.0
        for i, data in enumerate(trainloader, 0):
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()

            # Forward pass
            outputs = sparse_model(inputs)
            loss = F.cross_entropy(outputs, labels)
            loss.backward()

            # Optimize
            optimizer.step()

            running_loss += loss.item()
            if i % 10 == 9:    # Record loss every 10 mini-batches
                sparse_train_losses.append(running_loss / 10)
                running_loss = 0.0

        # Step 7: Evaluate Sparsity and Accuracy after Each Epoch
        sparse_model.eval()
        prunable_layers = get_prunable_layers(sparse_model)
        sparsity = 0.0
        total_weights = 0
        remaining_weights = 0
        for (name, layer) in prunable_layers:
            layer_sparsity = tensor_sparsity(layer.weight).item()
            sparsity += layer_sparsity
            total_weights += layer.weight.numel()
            remaining_weights += (layer.weight != 0).sum().item()
        sparsity = (sparsity / len(prunable_layers)) * 100 if len(prunable_layers) > 0 else 0

        correct = 0
        total = 0
        with torch.no_grad():
            for data in testloader:
                inputs, labels = data
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = sparse_model(inputs)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
        accuracy = 100 * correct / total
        sparsity_levels.append(sparsity)
        accuracies.append(accuracy)
        print(f'Epoch {epoch + 1} - Sparsity: {sparsity:.2f}% - Accuracy: {accuracy:.2f}% - Remaining Weights: {remaining_weights}/{total_weights}')

    manager.finalize(sparse_model)


Testing Sparsity Level: 60%

Training Sparse Model:
Epoch 1 - Sparsity: 0.00% - Accuracy: 96.67% - Remaining Weights: 792/792
Epoch 2 - Sparsity: 28.21% - Accuracy: 96.67% - Remaining Weights: 472/792
Epoch 3 - Sparsity: 43.08% - Accuracy: 100.00% - Remaining Weights: 340/792
Epoch 4 - Sparsity: 44.57% - Accuracy: 93.33% - Remaining Weights: 317/792
Epoch 5 - Sparsity: 44.57% - Accuracy: 100.00% - Remaining Weights: 317/792

Testing Sparsity Level: 70%

Training Sparse Model:
Epoch 1 - Sparsity: 0.00% - Accuracy: 93.33% - Remaining Weights: 792/792
Epoch 2 - Sparsity: 33.18% - Accuracy: 96.67% - Remaining Weights: 421/792
Epoch 3 - Sparsity: 48.55% - Accuracy: 93.33% - Remaining Weights: 265/792
Epoch 4 - Sparsity: 51.82% - Accuracy: 100.00% - Remaining Weights: 238/792
Epoch 5 - Sparsity: 51.82% - Accuracy: 100.00% - Remaining Weights: 238/792

Testing Sparsity Level: 75%

Training Sparse Model:
Epoch 1 - Sparsity: 0.00% - Accuracy: 96.67% - Remaining Weights: 792/792
Epoch 2 - Spars

### __IRIS data__
- __on 4544 weights__

In [38]:
# Step 2: Define the Model Architecture (Between 4000 and 5000 weights)
class SimpleNN(nn.Module):
    def __init__(self):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(4, 64)  # 4 -> 64 neurons
        self.bn1 = nn.BatchNorm1d(64)
        self.fc2 = nn.Linear(64, 64)  # 64 -> 64 neurons
        self.bn2 = nn.BatchNorm1d(64) 
        self.fc3 = nn.Linear(64, 3)   # 64 -> 3 neurons for classification
        self.relu = nn.ReLU()
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x = self.fc1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.bn2(x)
        x = self.relu(x)
        x = self.fc3(x)
        x = self.softmax(x)
        return x


# Step 3: Train the Base Model
model = SimpleNN()
model = model.to('cuda' if torch.cuda.is_available() else 'cpu')
base_optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=1e-4)
num_epochs = 5

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'{device} is using for computation')

train_losses = []
print("\nTraining Base Model:")
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)
        base_optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)
        loss = F.cross_entropy(outputs, labels)
        loss.backward()

        # Optimize
        base_optimizer.step()

        running_loss += loss.item()
        if i % 10 == 9:    # Record loss every 10 mini-batches
            train_losses.append(running_loss / 10)
            running_loss = 0.0

# Step 4: Evaluate the Base Model
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
base_accuracy = 100 * correct / total
print(f'Base Model Accuracy on Test Data: {base_accuracy:.2f}%')

cuda is using for computation

Training Base Model:
Base Model Accuracy on Test Data: 96.67%


In [39]:
# Step 5: Test Various Sparsity Levels
sparsity_targets = [0.60, 0.70, 0.75, 0.80]
for sparsity_target in sparsity_targets:
    print(f"\nTesting Sparsity Level: {sparsity_target * 100:.0f}%")

    # Reinitialize the Model for Sparsification
    sparse_model = SimpleNN()
    sparse_model.load_state_dict(model.state_dict())  # Copy weights from the trained base model
    sparse_model = sparse_model.to(device)

    # Step 6: Apply Sparsification and Train the Sparse Model
    optimizer = torch.optim.SGD(sparse_model.parameters(), lr=0.01, momentum=0.9, weight_decay=1e-4)
    steps_per_epoch = len(trainloader)

    # Create a new sparsification recipe with the target sparsity level
    recipe_content = f'''
    modifiers:
      - !EpochRangeModifier
        start_epoch: 0.0
        end_epoch: 5.0

      - !GlobalMagnitudePruningModifier
        params: __ALL_PRUNABLE__
        start_epoch: 1.0
        end_epoch: 4.0
        update_frequency: 0.1
        init_sparsity: 0.05
        final_sparsity: {sparsity_target}
        mask_type: unstructured
    '''

    with open('temp_recipe.yaml', 'w') as f:
        f.write(recipe_content)

    manager = ScheduledModifierManager.from_yaml('temp_recipe.yaml')
    optimizer = ScheduledOptimizer(optimizer, sparse_model, manager, steps_per_epoch=steps_per_epoch)

    sparsity_levels = []
    accuracies = []
    sparse_train_losses = []

    print("\nTraining Sparse Model:")
    for epoch in range(num_epochs):
        sparse_model.train()
        running_loss = 0.0
        for i, data in enumerate(trainloader, 0):
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()

            # Forward pass
            outputs = sparse_model(inputs)
            loss = F.cross_entropy(outputs, labels)
            loss.backward()

            # Optimize
            optimizer.step()

            running_loss += loss.item()
            if i % 10 == 9:    # Record loss every 10 mini-batches
                sparse_train_losses.append(running_loss / 10)
                running_loss = 0.0

        # Step 7: Evaluate Sparsity and Accuracy after Each Epoch
        sparse_model.eval()
        prunable_layers = get_prunable_layers(sparse_model)
        sparsity = 0.0
        total_weights = 0
        remaining_weights = 0
        for (name, layer) in prunable_layers:
            layer_sparsity = tensor_sparsity(layer.weight).item()
            sparsity += layer_sparsity
            total_weights += layer.weight.numel()
            remaining_weights += (layer.weight != 0).sum().item()
        sparsity = (sparsity / len(prunable_layers)) * 100 if len(prunable_layers) > 0 else 0

        correct = 0
        total = 0
        with torch.no_grad():
            for data in testloader:
                inputs, labels = data
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = sparse_model(inputs)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
        accuracy = 100 * correct / total
        sparsity_levels.append(sparsity)
        accuracies.append(accuracy)
        print(f'Epoch {epoch + 1} - Sparsity: {sparsity:.2f}% - Accuracy: {accuracy:.2f}% - Remaining Weights: {remaining_weights}/{total_weights}')

    manager.finalize(sparse_model)


Testing Sparsity Level: 60%

Training Sparse Model:
Epoch 1 - Sparsity: 0.00% - Accuracy: 100.00% - Remaining Weights: 4544/4544
Epoch 2 - Sparsity: 28.55% - Accuracy: 100.00% - Remaining Weights: 2706/4544
Epoch 3 - Sparsity: 40.18% - Accuracy: 100.00% - Remaining Weights: 1949/4544
Epoch 4 - Sparsity: 42.03% - Accuracy: 100.00% - Remaining Weights: 1818/4544
Epoch 5 - Sparsity: 42.03% - Accuracy: 100.00% - Remaining Weights: 1818/4544

Testing Sparsity Level: 70%

Training Sparse Model:
Epoch 1 - Sparsity: 0.00% - Accuracy: 96.67% - Remaining Weights: 4544/4544
Epoch 2 - Sparsity: 33.20% - Accuracy: 100.00% - Remaining Weights: 2413/4544
Epoch 3 - Sparsity: 46.36% - Accuracy: 100.00% - Remaining Weights: 1519/4544
Epoch 4 - Sparsity: 47.96% - Accuracy: 100.00% - Remaining Weights: 1363/4544
Epoch 5 - Sparsity: 47.96% - Accuracy: 100.00% - Remaining Weights: 1363/4544

Testing Sparsity Level: 75%

Training Sparse Model:
Epoch 1 - Sparsity: 0.00% - Accuracy: 96.67% - Remaining Weights

### __20 Newsgroups Dataset__
- __on 844 weights__

In [40]:
from sklearn.datasets import fetch_20newsgroups
from sklearn.feature_extraction.text import TfidfVectorizer

# Step 1: Load 20 Newsgroups Dataset
newsgroups = fetch_20newsgroups(subset='all', categories=['rec.autos', 'sci.space', 'talk.politics.misc'])
X = newsgroups.data
y = newsgroups.target
print(f'The 20 Newsgroups dataset contains {len(X)} documents and {len(set(y))} categories.')

# Convert text data to TF-IDF features
vectorizer = TfidfVectorizer(max_features=100)
X = vectorizer.fit_transform(X).toarray()

# Standardize the features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.long)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.long)

# Create DataLoader for training and testing
trainloader = DataLoader(TensorDataset(X_train, y_train), batch_size=16, shuffle=True)
testloader = DataLoader(TensorDataset(X_test, y_test), batch_size=16, shuffle=False)

The 20 Newsgroups dataset contains 2752 documents and 3 categories.


In [42]:
# Step 2: Define the Model Architecture (under 5000 weights)
class SimpleNN(nn.Module):
    def __init__(self):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(100, 8)  # 100 -> 16 neurons
        self.bn1 = nn.BatchNorm1d(8)
        self.fc2 = nn.Linear(8, 4)   # 16 -> 8 neurons
        self.bn2 = nn.BatchNorm1d(4)
        self.fc3 = nn.Linear(4, 3)    # 8 -> 3 neurons for classification
        self.relu = nn.ReLU()
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x = self.fc1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.bn2(x)
        x = self.relu(x)
        x = self.fc3(x)
        x = self.softmax(x)
        return x

# Step 3: Train the Base Model
model = SimpleNN()
model = model.to('cuda' if torch.cuda.is_available() else 'cpu')
base_optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=1e-4)
num_epochs = 5

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'{device} is using for computation')

train_losses = []
print("\nTraining Base Model:")
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)
        base_optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)
        loss = F.cross_entropy(outputs, labels)
        loss.backward()

        # Optimize
        base_optimizer.step()

        running_loss += loss.item()
        if i % 10 == 9:    # Record loss every 10 mini-batches
            train_losses.append(running_loss / 10)
            running_loss = 0.0

# Step 4: Evaluate the Base Model
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
base_accuracy = 100 * correct / total
print(f'Base Model Accuracy on Test Data: {base_accuracy:.2f}%')

cuda is using for computation

Training Base Model:
Base Model Accuracy on Test Data: 74.77%


In [43]:
# Step 5: Test Various Sparsity Levels
sparsity_targets = [0.60, 0.70, 0.75, 0.80]
for sparsity_target in sparsity_targets:
    print(f"\nTesting Sparsity Level: {sparsity_target * 100:.0f}%")

    # Reinitialize the Model for Sparsification
    sparse_model = SimpleNN()
    sparse_model.load_state_dict(model.state_dict())  # Copy weights from the trained base model
    sparse_model = sparse_model.to(device)

    # Step 6: Apply Sparsification and Train the Sparse Model
    optimizer = torch.optim.SGD(sparse_model.parameters(), lr=0.01, momentum=0.9, weight_decay=1e-4)
    steps_per_epoch = len(trainloader)

    # Create a new sparsification recipe with the target sparsity level
    recipe_content = f'''
    modifiers:
      - !EpochRangeModifier
        start_epoch: 0.0
        end_epoch: 5.0

      - !GlobalMagnitudePruningModifier
        params: __ALL_PRUNABLE__
        start_epoch: 1.0
        end_epoch: 4.0
        update_frequency: 0.1
        init_sparsity: 0.05
        final_sparsity: {sparsity_target}
        mask_type: unstructured
    '''

    with open('temp_recipe.yaml', 'w') as f:
        f.write(recipe_content)

    manager = ScheduledModifierManager.from_yaml('temp_recipe.yaml')
    optimizer = ScheduledOptimizer(optimizer, sparse_model, manager, steps_per_epoch=steps_per_epoch)

    sparsity_levels = []
    accuracies = []
    sparse_train_losses = []

    print("\nTraining Sparse Model:")
    for epoch in range(num_epochs):
        sparse_model.train()
        running_loss = 0.0
        for i, data in enumerate(trainloader, 0):
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()

            # Forward pass
            outputs = sparse_model(inputs)
            loss = F.cross_entropy(outputs, labels)
            loss.backward()

            # Optimize
            optimizer.step()

            running_loss += loss.item()
            if i % 10 == 9:    # Record loss every 10 mini-batches
                sparse_train_losses.append(running_loss / 10)
                running_loss = 0.0

        # Step 7: Evaluate Sparsity and Accuracy after Each Epoch
        sparse_model.eval()
        prunable_layers = get_prunable_layers(sparse_model)
        sparsity = 0.0
        total_weights = 0
        remaining_weights = 0
        for (name, layer) in prunable_layers:
            layer_sparsity = tensor_sparsity(layer.weight).item()
            sparsity += layer_sparsity
            total_weights += layer.weight.numel()
            remaining_weights += (layer.weight != 0).sum().item()
        sparsity = (sparsity / len(prunable_layers)) * 100 if len(prunable_layers) > 0 else 0

        correct = 0
        total = 0
        with torch.no_grad():
            for data in testloader:
                inputs, labels = data
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = sparse_model(inputs)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
        accuracy = 100 * correct / total
        sparsity_levels.append(sparsity)
        accuracies.append(accuracy)
        print(f'Epoch {epoch + 1} - Sparsity: {sparsity:.2f}% - Accuracy: {accuracy:.2f}% - Remaining Weights: {remaining_weights}/{total_weights}')

    manager.finalize(sparse_model)


Testing Sparsity Level: 60%

Training Sparse Model:
Epoch 1 - Sparsity: 0.00% - Accuracy: 74.77% - Remaining Weights: 844/844
Epoch 2 - Sparsity: 22.58% - Accuracy: 73.87% - Remaining Weights: 494/844
Epoch 3 - Sparsity: 30.21% - Accuracy: 74.59% - Remaining Weights: 359/844
Epoch 4 - Sparsity: 31.08% - Accuracy: 73.32% - Remaining Weights: 338/844
Epoch 5 - Sparsity: 31.08% - Accuracy: 74.23% - Remaining Weights: 338/844

Testing Sparsity Level: 70%

Training Sparse Model:
Epoch 1 - Sparsity: 0.00% - Accuracy: 74.77% - Remaining Weights: 844/844
Epoch 2 - Sparsity: 24.92% - Accuracy: 74.95% - Remaining Weights: 438/844
Epoch 3 - Sparsity: 34.58% - Accuracy: 75.86% - Remaining Weights: 278/844
Epoch 4 - Sparsity: 35.62% - Accuracy: 75.86% - Remaining Weights: 253/844
Epoch 5 - Sparsity: 35.62% - Accuracy: 76.95% - Remaining Weights: 253/844

Testing Sparsity Level: 75%

Training Sparse Model:
Epoch 1 - Sparsity: 0.00% - Accuracy: 74.59% - Remaining Weights: 844/844
Epoch 2 - Sparsity:

### __20 Newsgroups Dataset__
- __on 4952 weights__

In [44]:
from sklearn.datasets import fetch_20newsgroups
from sklearn.feature_extraction.text import TfidfVectorizer

# Step 1: Load 20 Newsgroups Dataset
newsgroups = fetch_20newsgroups(subset='all', categories=['rec.autos', 'sci.space', 'talk.politics.misc'])
X = newsgroups.data
y = newsgroups.target

# Convert text data to TF-IDF features
vectorizer = TfidfVectorizer(max_features=300)
X = vectorizer.fit_transform(X).toarray()

# Standardize the features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.long)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.long)

# Create DataLoader for training and testing
trainloader = DataLoader(TensorDataset(X_train, y_train), batch_size=16, shuffle=True)
testloader = DataLoader(TensorDataset(X_test, y_test), batch_size=16, shuffle=False)

In [45]:
# Step 2: Define the Model Architecture (under 5000 weights)
class SimpleNN(nn.Module):
    def __init__(self):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(300, 16)  # 100 -> 16 neurons
        self.bn1 = nn.BatchNorm1d(16)
        self.fc2 = nn.Linear(16, 8)   # 16 -> 8 neurons
        self.bn2 = nn.BatchNorm1d(8)
        self.fc3 = nn.Linear(8, 3)    # 8 -> 3 neurons for classification
        self.relu = nn.ReLU()
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x = self.fc1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.bn2(x)
        x = self.relu(x)
        x = self.fc3(x)
        x = self.softmax(x)
        return x

# Step 3: Train the Base Model
model = SimpleNN()
model = model.to('cuda' if torch.cuda.is_available() else 'cpu')
base_optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=1e-4)
num_epochs = 5

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'{device} is using for computation')

train_losses = []
print("\nTraining Base Model:")
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)
        base_optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)
        loss = F.cross_entropy(outputs, labels)
        loss.backward()

        # Optimize
        base_optimizer.step()

        running_loss += loss.item()
        if i % 10 == 9:    # Record loss every 10 mini-batches
            train_losses.append(running_loss / 10)
            running_loss = 0.0

# Step 4: Evaluate the Base Model
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
base_accuracy = 100 * correct / total
print(f'Base Model Accuracy on Test Data: {base_accuracy:.2f}%')

cuda is using for computation

Training Base Model:
Base Model Accuracy on Test Data: 88.38%


In [46]:
# Step 5: Test Various Sparsity Levels
sparsity_targets = [0.60, 0.70, 0.75, 0.80]
for sparsity_target in sparsity_targets:
    print(f"\nTesting Sparsity Level: {sparsity_target * 100:.0f}%")

    # Reinitialize the Model for Sparsification
    sparse_model = SimpleNN()
    sparse_model.load_state_dict(model.state_dict())  # Copy weights from the trained base model
    sparse_model = sparse_model.to(device)

    # Step 6: Apply Sparsification and Train the Sparse Model
    optimizer = torch.optim.SGD(sparse_model.parameters(), lr=0.01, momentum=0.9, weight_decay=1e-4)
    steps_per_epoch = len(trainloader)

    # Create a new sparsification recipe with the target sparsity level
    recipe_content = f'''
    modifiers:
      - !EpochRangeModifier
        start_epoch: 0.0
        end_epoch: 5.0

      - !GlobalMagnitudePruningModifier
        params: __ALL_PRUNABLE__
        start_epoch: 1.0
        end_epoch: 4.0
        update_frequency: 0.1
        init_sparsity: 0.05
        final_sparsity: {sparsity_target}
        mask_type: unstructured
    '''

    with open('temp_recipe.yaml', 'w') as f:
        f.write(recipe_content)

    manager = ScheduledModifierManager.from_yaml('temp_recipe.yaml')
    optimizer = ScheduledOptimizer(optimizer, sparse_model, manager, steps_per_epoch=steps_per_epoch)

    sparsity_levels = []
    accuracies = []
    sparse_train_losses = []

    print("\nTraining Sparse Model:")
    for epoch in range(num_epochs):
        sparse_model.train()
        running_loss = 0.0
        for i, data in enumerate(trainloader, 0):
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()

            # Forward pass
            outputs = sparse_model(inputs)
            loss = F.cross_entropy(outputs, labels)
            loss.backward()

            # Optimize
            optimizer.step()

            running_loss += loss.item()
            if i % 10 == 9:    # Record loss every 10 mini-batches
                sparse_train_losses.append(running_loss / 10)
                running_loss = 0.0

        # Step 7: Evaluate Sparsity and Accuracy after Each Epoch
        sparse_model.eval()
        prunable_layers = get_prunable_layers(sparse_model)
        sparsity = 0.0
        total_weights = 0
        remaining_weights = 0
        for (name, layer) in prunable_layers:
            layer_sparsity = tensor_sparsity(layer.weight).item()
            sparsity += layer_sparsity
            total_weights += layer.weight.numel()
            remaining_weights += (layer.weight != 0).sum().item()
        sparsity = (sparsity / len(prunable_layers)) * 100 if len(prunable_layers) > 0 else 0

        correct = 0
        total = 0
        with torch.no_grad():
            for data in testloader:
                inputs, labels = data
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = sparse_model(inputs)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
        accuracy = 100 * correct / total
        sparsity_levels.append(sparsity)
        accuracies.append(accuracy)
        print(f'Epoch {epoch + 1} - Sparsity: {sparsity:.2f}% - Accuracy: {accuracy:.2f}% - Remaining Weights: {remaining_weights}/{total_weights}')

    manager.finalize(sparse_model)


Testing Sparsity Level: 60%

Training Sparse Model:
Epoch 1 - Sparsity: 0.00% - Accuracy: 86.57% - Remaining Weights: 4952/4952
Epoch 2 - Sparsity: 18.32% - Accuracy: 88.38% - Remaining Weights: 2898/4952
Epoch 3 - Sparsity: 26.11% - Accuracy: 86.75% - Remaining Weights: 2105/4952
Epoch 4 - Sparsity: 26.97% - Accuracy: 87.84% - Remaining Weights: 1981/4952
Epoch 5 - Sparsity: 26.97% - Accuracy: 86.39% - Remaining Weights: 1981/4952

Testing Sparsity Level: 70%

Training Sparse Model:
Epoch 1 - Sparsity: 0.00% - Accuracy: 87.11% - Remaining Weights: 4952/4952
Epoch 2 - Sparsity: 22.38% - Accuracy: 88.75% - Remaining Weights: 2569/4952
Epoch 3 - Sparsity: 29.64% - Accuracy: 87.84% - Remaining Weights: 1633/4952
Epoch 4 - Sparsity: 31.67% - Accuracy: 88.38% - Remaining Weights: 1486/4952
Epoch 5 - Sparsity: 31.67% - Accuracy: 87.66% - Remaining Weights: 1486/4952

Testing Sparsity Level: 75%

Training Sparse Model:
Epoch 1 - Sparsity: 0.00% - Accuracy: 88.93% - Remaining Weights: 4952/49

### __IMDB ratings__
- __836 weights__

In [47]:
import pandas as pd

# Step 1: Load IMDb Dataset from Local CSV Files
train_df = pd.read_csv('data/train_data(imdb).csv')
test_df = pd.read_csv('data/test_data(imdb).csv')
print(f'The IMDB dataset contains {len(train_df)} documents and {len(set(train_df))} categories.')

# Convert text data to TF-IDF features
vectorizer = TfidfVectorizer(max_features=100)
X_train = vectorizer.fit_transform(train_df['text']).toarray()
y_train = train_df['label'].values
X_test = vectorizer.transform(test_df['text']).toarray()
y_test = test_df['label'].values

# Convert to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.long)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.long)

# Create DataLoader for training and testing
trainloader = DataLoader(TensorDataset(X_train, y_train), batch_size=16, shuffle=True)
testloader = DataLoader(TensorDataset(X_test, y_test), batch_size=16, shuffle=False)

The IMDB dataset contains 1000 documents and 2 categories.


In [48]:
# Step 2: Define the Model Architecture (under 5000 weights)
class SimpleNN(nn.Module):
    def __init__(self):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(100, 8)  # 100 -> 16 neurons
        self.bn1 = nn.BatchNorm1d(8)
        self.fc2 = nn.Linear(8, 4)   # 16 -> 8 neurons
        self.bn2 = nn.BatchNorm1d(4)
        self.fc3 = nn.Linear(4, 1)    # 8 -> 1 neuron for binary classification
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.fc1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.bn2(x)
        x = self.relu(x)
        x = self.fc3(x)
        x = self.sigmoid(x)
        return x

# Step 3: Train the Base Model
model = SimpleNN()
model = model.to('cuda' if torch.cuda.is_available() else 'cpu')
base_optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=1e-4)
num_epochs = 5

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'{device} is using for computation')

train_losses = []
print("\nTraining Base Model:")
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)
        base_optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)
        labels = labels.float().unsqueeze(1)  # Adjust labels shape for binary classification
        loss = F.binary_cross_entropy(outputs, labels)
        loss.backward()

        # Optimize
        base_optimizer.step()

        running_loss += loss.item()
        if i % 10 == 9:    # Record loss every 10 mini-batches
            train_losses.append(running_loss / 10)
            running_loss = 0.0

# Step 4: Evaluate the Base Model
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        predicted = (outputs > 0.5).float()
        total += labels.size(0)
        correct += (predicted.squeeze() == labels).sum().item()
base_accuracy = 100 * correct / total
print(f'Base Model Accuracy on Test Data: {base_accuracy:.2f}%')

cuda is using for computation

Training Base Model:
Base Model Accuracy on Test Data: 68.40%


In [50]:
# Step 5: Test Various Sparsity Levels
sparsity_targets = [0.60, 0.70, 0.75, 0.80]
for sparsity_target in sparsity_targets:
    print(f"\nTesting Sparsity Level: {sparsity_target * 100:.0f}%")

    # Reinitialize the Model for Sparsification
    sparse_model = SimpleNN()
    sparse_model.load_state_dict(model.state_dict())  # Copy weights from the trained base model
    sparse_model = sparse_model.to(device)

    # Step 6: Apply Sparsification and Train the Sparse Model
    optimizer = torch.optim.SGD(sparse_model.parameters(), lr=0.01, momentum=0.9, weight_decay=1e-4)
    steps_per_epoch = len(trainloader)

    # Create a new sparsification recipe with the target sparsity level
    recipe_content = f'''
    modifiers:
      - !EpochRangeModifier
        start_epoch: 0.0
        end_epoch: 5.0

      - !GlobalMagnitudePruningModifier
        params: __ALL_PRUNABLE__
        start_epoch: 1.0
        end_epoch: 4.0
        update_frequency: 0.1
        init_sparsity: 0.05
        final_sparsity: {sparsity_target}
        mask_type: unstructured
    '''

    with open('temp_recipe.yaml', 'w') as f:
        f.write(recipe_content)

    manager = ScheduledModifierManager.from_yaml('temp_recipe.yaml')
    optimizer = ScheduledOptimizer(optimizer, sparse_model, manager, steps_per_epoch=steps_per_epoch)

    sparsity_levels = []
    accuracies = []
    sparse_train_losses = []

    print("\nTraining Sparse Model:")
    for epoch in range(num_epochs):
        sparse_model.train()
        running_loss = 0.0
        for i, data in enumerate(trainloader, 0):
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()

            # Forward pass
            outputs = sparse_model(inputs)
            labels = labels.float().unsqueeze(1)  # Adjust labels shape for binary classification
            loss = F.binary_cross_entropy(outputs, labels)
            loss.backward()

            # Optimize
            optimizer.step()

            running_loss += loss.item()
            if i % 10 == 9:    # Record loss every 10 mini-batches
                sparse_train_losses.append(running_loss / 10)
                running_loss = 0.0

        # Step 7: Evaluate Sparsity and Accuracy after Each Epoch
        sparse_model.eval()
        prunable_layers = get_prunable_layers(sparse_model)
        sparsity = 0.0
        total_weights = 0
        remaining_weights = 0
        for (name, layer) in prunable_layers:
            layer_sparsity = tensor_sparsity(layer.weight).item()
            sparsity += layer_sparsity
            total_weights += layer.weight.numel()
            remaining_weights += (layer.weight != 0).sum().item()
        sparsity = (sparsity / len(prunable_layers)) * 100 if len(prunable_layers) > 0 else 0

        correct = 0
        total = 0
        with torch.no_grad():
            for data in testloader:
                inputs, labels = data
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = sparse_model(inputs)
                predicted = (outputs > 0.5).float()
                total += labels.size(0)
                correct += (predicted.squeeze() == labels).sum().item()
        accuracy = 100 * correct / total
        sparsity_levels.append(sparsity)
        accuracies.append(accuracy)
        print(f'Epoch {epoch + 1} - Sparsity: {sparsity:.2f}% - Accuracy: {accuracy:.2f}% - Remaining Weights: {remaining_weights}/{total_weights}')

    manager.finalize(sparse_model)


Testing Sparsity Level: 60%

Training Sparse Model:
Epoch 1 - Sparsity: 0.00% - Accuracy: 69.80% - Remaining Weights: 836/836
Epoch 2 - Sparsity: 18.21% - Accuracy: 69.70% - Remaining Weights: 495/836
Epoch 3 - Sparsity: 26.92% - Accuracy: 69.50% - Remaining Weights: 358/836
Epoch 4 - Sparsity: 27.92% - Accuracy: 70.50% - Remaining Weights: 334/836
Epoch 5 - Sparsity: 27.92% - Accuracy: 69.10% - Remaining Weights: 334/836

Testing Sparsity Level: 70%

Training Sparse Model:
Epoch 1 - Sparsity: 0.00% - Accuracy: 68.80% - Remaining Weights: 836/836
Epoch 2 - Sparsity: 21.50% - Accuracy: 68.30% - Remaining Weights: 440/836
Epoch 3 - Sparsity: 33.25% - Accuracy: 67.40% - Remaining Weights: 278/836
Epoch 4 - Sparsity: 34.38% - Accuracy: 68.90% - Remaining Weights: 251/836
Epoch 5 - Sparsity: 34.38% - Accuracy: 68.70% - Remaining Weights: 251/836

Testing Sparsity Level: 75%

Training Sparse Model:
Epoch 1 - Sparsity: 0.00% - Accuracy: 69.40% - Remaining Weights: 836/836
Epoch 2 - Sparsity:

### __IMDB ratings__
- __3336 weights__

In [51]:
import pandas as pd

# Step 1: Load IMDb Dataset from Local CSV Files
train_df = pd.read_csv('data/train_data(imdb).csv')
test_df = pd.read_csv('data/test_data(imdb).csv')

# Convert text data to TF-IDF features
vectorizer = TfidfVectorizer(max_features=200)
X_train = vectorizer.fit_transform(train_df['text']).toarray()
y_train = train_df['label'].values
X_test = vectorizer.transform(test_df['text']).toarray()
y_test = test_df['label'].values

# Convert to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.long)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.long)

# Create DataLoader for training and testing
trainloader = DataLoader(TensorDataset(X_train, y_train), batch_size=16, shuffle=True)
testloader = DataLoader(TensorDataset(X_test, y_test), batch_size=16, shuffle=False)

In [52]:
# Step 2: Define the Model Architecture (under 5000 weights)
class SimpleNN(nn.Module):
    def __init__(self):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(200, 16)  # 200 -> 64 neurons
        self.bn1 = nn.BatchNorm1d(16)
        self.fc2 = nn.Linear(16, 8)   # 64 -> 32 neurons
        self.bn2 = nn.BatchNorm1d(8)
        self.fc3 = nn.Linear(8, 1)    # 32 -> 1 neuron for binary classification
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.fc1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.bn2(x)
        x = self.relu(x)
        x = self.fc3(x)
        x = self.sigmoid(x)
        return x

# Step 3: Train the Base Model
model = SimpleNN()
model = model.to('cuda' if torch.cuda.is_available() else 'cpu')
base_optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=1e-4)
num_epochs = 5

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'{device} is using for computation')

train_losses = []
print("\nTraining Base Model:")
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)
        base_optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)
        labels = labels.float().unsqueeze(1)  # Adjust labels shape for binary classification
        loss = F.binary_cross_entropy(outputs, labels)
        loss.backward()

        # Optimize
        base_optimizer.step()

        running_loss += loss.item()
        if i % 10 == 9:    # Record loss every 10 mini-batches
            train_losses.append(running_loss / 10)
            running_loss = 0.0

# Step 4: Evaluate the Base Model
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        predicted = (outputs > 0.5).float()
        total += labels.size(0)
        correct += (predicted.squeeze() == labels).sum().item()
base_accuracy = 100 * correct / total
print(f'Base Model Accuracy on Test Data: {base_accuracy:.2f}%')

cuda is using for computation

Training Base Model:
Base Model Accuracy on Test Data: 68.30%


In [53]:
# Step 5: Test Various Sparsity Levels
sparsity_targets = [0.60, 0.70, 0.75, 0.80]
for sparsity_target in sparsity_targets:
    print(f"\nTesting Sparsity Level: {sparsity_target * 100:.0f}%")

    # Reinitialize the Model for Sparsification
    sparse_model = SimpleNN()
    sparse_model.load_state_dict(model.state_dict())  # Copy weights from the trained base model
    sparse_model = sparse_model.to(device)

    # Step 6: Apply Sparsification and Train the Sparse Model
    optimizer = torch.optim.SGD(sparse_model.parameters(), lr=0.01, momentum=0.9, weight_decay=1e-4)
    steps_per_epoch = len(trainloader)

    # Create a new sparsification recipe with the target sparsity level
    recipe_content = f'''
    modifiers:
      - !EpochRangeModifier
        start_epoch: 0.0
        end_epoch: 5.0

      - !GlobalMagnitudePruningModifier
        params: __ALL_PRUNABLE__
        start_epoch: 1.0
        end_epoch: 4.0
        update_frequency: 0.1
        init_sparsity: 0.05
        final_sparsity: {sparsity_target}
        mask_type: unstructured
    '''

    with open('temp_recipe.yaml', 'w') as f:
        f.write(recipe_content)

    manager = ScheduledModifierManager.from_yaml('temp_recipe.yaml')
    optimizer = ScheduledOptimizer(optimizer, sparse_model, manager, steps_per_epoch=steps_per_epoch)

    sparsity_levels = []
    accuracies = []
    sparse_train_losses = []

    print("\nTraining Sparse Model:")
    for epoch in range(num_epochs):
        sparse_model.train()
        running_loss = 0.0
        for i, data in enumerate(trainloader, 0):
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()

            # Forward pass
            outputs = sparse_model(inputs)
            labels = labels.float().unsqueeze(1)  # Adjust labels shape for binary classification
            loss = F.binary_cross_entropy(outputs, labels)
            loss.backward()

            # Optimize
            optimizer.step()

            running_loss += loss.item()
            if i % 10 == 9:    # Record loss every 10 mini-batches
                sparse_train_losses.append(running_loss / 10)
                running_loss = 0.0

        # Step 7: Evaluate Sparsity and Accuracy after Each Epoch
        sparse_model.eval()
        prunable_layers = get_prunable_layers(sparse_model)
        sparsity = 0.0
        total_weights = 0
        remaining_weights = 0
        for (name, layer) in prunable_layers:
            layer_sparsity = tensor_sparsity(layer.weight).item()
            sparsity += layer_sparsity
            total_weights += layer.weight.numel()
            remaining_weights += (layer.weight != 0).sum().item()
        sparsity = (sparsity / len(prunable_layers)) * 100 if len(prunable_layers) > 0 else 0

        correct = 0
        total = 0
        with torch.no_grad():
            for data in testloader:
                inputs, labels = data
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = sparse_model(inputs)
                predicted = (outputs > 0.5).float()
                total += labels.size(0)
                correct += (predicted.squeeze() == labels).sum().item()
        accuracy = 100 * correct / total
        sparsity_levels.append(sparsity)
        accuracies.append(accuracy)
        print(f'Epoch {epoch + 1} - Sparsity: {sparsity:.2f}% - Accuracy: {accuracy:.2f}% - Remaining Weights: {remaining_weights}/{total_weights}')

    manager.finalize(sparse_model)


Testing Sparsity Level: 60%

Training Sparse Model:
Epoch 1 - Sparsity: 0.00% - Accuracy: 68.30% - Remaining Weights: 3336/3336
Epoch 2 - Sparsity: 18.69% - Accuracy: 69.30% - Remaining Weights: 1974/3336
Epoch 3 - Sparsity: 28.87% - Accuracy: 71.50% - Remaining Weights: 1428/3336
Epoch 4 - Sparsity: 30.10% - Accuracy: 69.20% - Remaining Weights: 1334/3336
Epoch 5 - Sparsity: 30.10% - Accuracy: 69.70% - Remaining Weights: 1334/3336

Testing Sparsity Level: 70%

Training Sparse Model:
Epoch 1 - Sparsity: 0.00% - Accuracy: 66.20% - Remaining Weights: 3336/3336
Epoch 2 - Sparsity: 26.61% - Accuracy: 68.70% - Remaining Weights: 1756/3336
Epoch 3 - Sparsity: 37.08% - Accuracy: 65.80% - Remaining Weights: 1111/3336
Epoch 4 - Sparsity: 38.98% - Accuracy: 69.80% - Remaining Weights: 1001/3336
Epoch 5 - Sparsity: 38.98% - Accuracy: 69.60% - Remaining Weights: 1001/3336

Testing Sparsity Level: 75%

Training Sparse Model:
Epoch 1 - Sparsity: 0.00% - Accuracy: 71.30% - Remaining Weights: 3336/33

### __Titanic data__
- __752 weights__

In [54]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import fetch_openml

In [55]:
# Step 1: Load Titanic Dataset from sklearn
# Fetch the Titanic dataset using sklearn's fetch_openml
print("Loading Titanic dataset from sklearn...")
titanic = fetch_openml('titanic', version=1, as_frame=True)
titanic_df = titanic.frame

# Preprocessing Titanic Data
titanic_df = titanic_df[['pclass', 'sex', 'age', 'sibsp', 'parch', 'fare', 'embarked', 'survived']]
titanic_df['sex'] = titanic_df['sex'].map({'male': 0, 'female': 1})
titanic_df['embarked'] = titanic_df['embarked'].map({'C': 0, 'Q': 1, 'S': 2})
titanic_df['age'] = titanic_df['age'].fillna(titanic_df['age'].median())
titanic_df['fare'] = titanic_df['fare'].fillna(titanic_df['fare'].median())
titanic_df['embarked'] = titanic_df['embarked'].fillna(titanic_df['embarked'].mode()[0])

X = titanic_df.drop('survived', axis=1).values
y = titanic_df['survived'].values

Loading Titanic dataset from sklearn...


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  titanic_df['sex'] = titanic_df['sex'].map({'male': 0, 'female': 1})
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  titanic_df['embarked'] = titanic_df['embarked'].map({'C': 0, 'Q': 1, 'S': 2})
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  titanic_df['age'] = titanic_df['age'].fillna(titanic_df['ag

In [56]:
# Standardize the features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(pd.to_numeric(y_train, errors='coerce').astype(int), dtype=torch.long)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(pd.to_numeric(y_test, errors='coerce').astype(int), dtype=torch.long)

# Create DataLoader for training and testing
trainloader = DataLoader(TensorDataset(X_train, y_train), batch_size=16, shuffle=True)
testloader = DataLoader(TensorDataset(X_test, y_test), batch_size=16, shuffle=False)

In [57]:
# Step 2: Define the Model Architecture (between 4000 and 5000 weights)
class SimpleNN(nn.Module):
    def __init__(self):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(7, 32)  # 7 -> 128 neurons
        self.bn1 = nn.BatchNorm1d(32)
        self.fc2 = nn.Linear(32, 16)  # 128 -> 64 neurons
        self.bn2 = nn.BatchNorm1d(16)
        self.fc3 = nn.Linear(16, 1)    # 64 -> 1 neuron for binary classification
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.fc1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.bn2(x)
        x = self.relu(x)
        x = self.fc3(x)
        x = self.sigmoid(x)
        return x

# Step 3: Train the Base Model
model = SimpleNN()
model = model.to('cuda' if torch.cuda.is_available() else 'cpu')
base_optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=1e-4)
num_epochs = 5

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'{device} is using for computation')

train_losses = []
print("\nTraining Base Model:")
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)
        base_optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)
        labels = labels.float().unsqueeze(1)  # Adjust labels shape for binary classification
        loss = F.binary_cross_entropy(outputs, labels)
        loss.backward()

        # Optimize
        base_optimizer.step()

        running_loss += loss.item()
        if i % 10 == 9:    # Record loss every 10 mini-batches
            train_losses.append(running_loss / 10)
            running_loss = 0.0

# Step 4: Evaluate the Base Model
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        predicted = (outputs > 0.5).float()
        total += labels.size(0)
        correct += (predicted.squeeze() == labels).sum().item()
base_accuracy = 100 * correct / total
print(f'Base Model Accuracy on Test Data: {base_accuracy:.2f}%')

cuda is using for computation

Training Base Model:
Base Model Accuracy on Test Data: 75.95%


In [58]:
# Step 5: Test Various Sparsity Levels
sparsity_targets = [0.60, 0.70, 0.75, 0.80]
for sparsity_target in sparsity_targets:
    print(f"\nTesting Sparsity Level: {sparsity_target * 100:.0f}%")

    # Reinitialize the Model for Sparsification
    sparse_model = SimpleNN()
    sparse_model.load_state_dict(model.state_dict())  # Copy weights from the trained base model
    sparse_model = sparse_model.to(device)

    # Step 6: Apply Sparsification and Train the Sparse Model
    optimizer = torch.optim.SGD(sparse_model.parameters(), lr=0.01, momentum=0.9, weight_decay=1e-4)
    steps_per_epoch = len(trainloader)

    # Create a new sparsification recipe with the target sparsity level
    recipe_content = f'''
    modifiers:
      - !EpochRangeModifier
        start_epoch: 0.0
        end_epoch: 5.0

      - !GlobalMagnitudePruningModifier
        params: __ALL_PRUNABLE__
        start_epoch: 1.0
        end_epoch: 4.0
        update_frequency: 0.1
        init_sparsity: 0.05
        final_sparsity: {sparsity_target}
        mask_type: unstructured
    '''

    with open('temp_recipe.yaml', 'w') as f:
        f.write(recipe_content)

    manager = ScheduledModifierManager.from_yaml('temp_recipe.yaml')
    optimizer = ScheduledOptimizer(optimizer, sparse_model, manager, steps_per_epoch=steps_per_epoch)

    sparsity_levels = []
    accuracies = []
    sparse_train_losses = []

    print("\nTraining Sparse Model:")
    for epoch in range(num_epochs):
        sparse_model.train()
        running_loss = 0.0
        for i, data in enumerate(trainloader, 0):
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()

            # Forward pass
            outputs = sparse_model(inputs)
            labels = labels.float().unsqueeze(1)  # Adjust labels shape for binary classification
            loss = F.binary_cross_entropy(outputs, labels)
            loss.backward()

            # Optimize
            optimizer.step()

            running_loss += loss.item()
            if i % 10 == 9:    # Record loss every 10 mini-batches
                sparse_train_losses.append(running_loss / 10)
                running_loss = 0.0

        # Step 7: Evaluate Sparsity and Accuracy after Each Epoch
        sparse_model.eval()
        prunable_layers = get_prunable_layers(sparse_model)
        sparsity = 0.0
        total_weights = 0
        remaining_weights = 0
        for (name, layer) in prunable_layers:
            layer_sparsity = tensor_sparsity(layer.weight).item()
            sparsity += layer_sparsity
            total_weights += layer.weight.numel()
            remaining_weights += (layer.weight != 0).sum().item()
        sparsity = (sparsity / len(prunable_layers)) * 100 if len(prunable_layers) > 0 else 0

        correct = 0
        total = 0
        with torch.no_grad():
            for data in testloader:
                inputs, labels = data
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = sparse_model(inputs)
                predicted = (outputs > 0.5).float()
                total += labels.size(0)
                correct += (predicted.squeeze() == labels).sum().item()
        accuracy = 100 * correct / total
        sparsity_levels.append(sparsity)
        accuracies.append(accuracy)
        print(f'Epoch {epoch + 1} - Sparsity: {sparsity:.2f}% - Accuracy: {accuracy:.2f}% - Remaining Weights: {remaining_weights}/{total_weights}')

    manager.finalize(sparse_model)


Testing Sparsity Level: 60%

Training Sparse Model:
Epoch 1 - Sparsity: 0.00% - Accuracy: 75.57% - Remaining Weights: 752/752
Epoch 2 - Sparsity: 29.06% - Accuracy: 75.57% - Remaining Weights: 432/752
Epoch 3 - Sparsity: 40.06% - Accuracy: 75.19% - Remaining Weights: 321/752
Epoch 4 - Sparsity: 41.78% - Accuracy: 78.24% - Remaining Weights: 301/752
Epoch 5 - Sparsity: 41.78% - Accuracy: 77.86% - Remaining Weights: 301/752

Testing Sparsity Level: 70%

Training Sparse Model:
Epoch 1 - Sparsity: 0.00% - Accuracy: 75.19% - Remaining Weights: 752/752
Epoch 2 - Sparsity: 37.17% - Accuracy: 76.72% - Remaining Weights: 381/752
Epoch 3 - Sparsity: 50.04% - Accuracy: 76.72% - Remaining Weights: 249/752
Epoch 4 - Sparsity: 54.22% - Accuracy: 77.10% - Remaining Weights: 226/752
Epoch 5 - Sparsity: 54.22% - Accuracy: 76.72% - Remaining Weights: 226/752

Testing Sparsity Level: 75%

Training Sparse Model:
Epoch 1 - Sparsity: 0.00% - Accuracy: 75.19% - Remaining Weights: 752/752
Epoch 2 - Sparsity:

### __Titanic data__
- __2528 weights__

In [59]:
# Step 2: Define the Model Architecture (between 4000 and 5000 weights)
class SimpleNN(nn.Module):
    def __init__(self):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(7, 64)  # 7 -> 128 neurons
        self.bn1 = nn.BatchNorm1d(64)
        self.fc2 = nn.Linear(64, 32)  # 128 -> 64 neurons
        self.bn2 = nn.BatchNorm1d(32)
        self.fc3 = nn.Linear(32, 1)    # 64 -> 1 neuron for binary classification
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.fc1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.bn2(x)
        x = self.relu(x)
        x = self.fc3(x)
        x = self.sigmoid(x)
        return x

# Step 3: Train the Base Model
model = SimpleNN()
model = model.to('cuda' if torch.cuda.is_available() else 'cpu')
base_optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=1e-4)
num_epochs = 5

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'{device} is using for computation')

train_losses = []
print("\nTraining Base Model:")
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)
        base_optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)
        labels = labels.float().unsqueeze(1)  # Adjust labels shape for binary classification
        loss = F.binary_cross_entropy(outputs, labels)
        loss.backward()

        # Optimize
        base_optimizer.step()

        running_loss += loss.item()
        if i % 10 == 9:    # Record loss every 10 mini-batches
            train_losses.append(running_loss / 10)
            running_loss = 0.0

# Step 4: Evaluate the Base Model
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        predicted = (outputs > 0.5).float()
        total += labels.size(0)
        correct += (predicted.squeeze() == labels).sum().item()
base_accuracy = 100 * correct / total
print(f'Base Model Accuracy on Test Data: {base_accuracy:.2f}%')

cuda is using for computation

Training Base Model:
Base Model Accuracy on Test Data: 77.10%


In [61]:
# Step 5: Test Various Sparsity Levels
sparsity_targets = [0.60, 0.70, 0.75, 0.80]
for sparsity_target in sparsity_targets:
    print(f"\nTesting Sparsity Level: {sparsity_target * 100:.0f}%")

    # Reinitialize the Model for Sparsification
    sparse_model = SimpleNN()
    sparse_model.load_state_dict(model.state_dict())  # Copy weights from the trained base model
    sparse_model = sparse_model.to(device)

    # Step 6: Apply Sparsification and Train the Sparse Model
    optimizer = torch.optim.SGD(sparse_model.parameters(), lr=0.01, momentum=0.9, weight_decay=1e-4)
    steps_per_epoch = len(trainloader)

    # Create a new sparsification recipe with the target sparsity level
    recipe_content = f'''
    modifiers:
      - !EpochRangeModifier
        start_epoch: 0.0
        end_epoch: 5.0

      - !GlobalMagnitudePruningModifier
        params: __ALL_PRUNABLE__
        start_epoch: 1.0
        end_epoch: 4.0
        update_frequency: 0.1
        init_sparsity: 0.05
        final_sparsity: {sparsity_target}
        mask_type: unstructured
    '''

    with open('temp_recipe.yaml', 'w') as f:
        f.write(recipe_content)

    manager = ScheduledModifierManager.from_yaml('temp_recipe.yaml')
    optimizer = ScheduledOptimizer(optimizer, sparse_model, manager, steps_per_epoch=steps_per_epoch)

    sparsity_levels = []
    accuracies = []
    sparse_train_losses = []

    print("\nTraining Sparse Model:")
    for epoch in range(num_epochs):
        sparse_model.train()
        running_loss = 0.0
        for i, data in enumerate(trainloader, 0):
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()

            # Forward pass
            outputs = sparse_model(inputs)
            labels = labels.float().unsqueeze(1)  # Adjust labels shape for binary classification
            loss = F.binary_cross_entropy(outputs, labels)
            loss.backward()

            # Optimize
            optimizer.step()

            running_loss += loss.item()
            if i % 10 == 9:    # Record loss every 10 mini-batches
                sparse_train_losses.append(running_loss / 10)
                running_loss = 0.0

        # Step 7: Evaluate Sparsity and Accuracy after Each Epoch
        sparse_model.eval()
        prunable_layers = get_prunable_layers(sparse_model)
        sparsity = 0.0
        total_weights = 0
        remaining_weights = 0
        for (name, layer) in prunable_layers:
            layer_sparsity = tensor_sparsity(layer.weight).item()
            sparsity += layer_sparsity
            total_weights += layer.weight.numel()
            remaining_weights += (layer.weight != 0).sum().item()
        sparsity = (sparsity / len(prunable_layers)) * 100 if len(prunable_layers) > 0 else 0

        correct = 0
        total = 0
        with torch.no_grad():
            for data in testloader:
                inputs, labels = data
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = sparse_model(inputs)
                predicted = (outputs > 0.5).float()
                total += labels.size(0)
                correct += (predicted.squeeze() == labels).sum().item()
        accuracy = 100 * correct / total
        sparsity_levels.append(sparsity)
        accuracies.append(accuracy)
        print(f'Epoch {epoch + 1} - Sparsity: {sparsity:.2f}% - Accuracy: {accuracy:.2f}% - Remaining Weights: {remaining_weights}/{total_weights}')

    manager.finalize(sparse_model)


Testing Sparsity Level: 60%

Training Sparse Model:
Epoch 1 - Sparsity: 0.00% - Accuracy: 76.34% - Remaining Weights: 2528/2528
Epoch 2 - Sparsity: 25.92% - Accuracy: 77.86% - Remaining Weights: 1452/2528
Epoch 3 - Sparsity: 38.29% - Accuracy: 76.72% - Remaining Weights: 1078/2528
Epoch 4 - Sparsity: 39.73% - Accuracy: 75.95% - Remaining Weights: 1011/2528
Epoch 5 - Sparsity: 39.73% - Accuracy: 75.95% - Remaining Weights: 1011/2528

Testing Sparsity Level: 70%

Training Sparse Model:
Epoch 1 - Sparsity: 0.00% - Accuracy: 75.95% - Remaining Weights: 2528/2528
Epoch 2 - Sparsity: 31.19% - Accuracy: 75.95% - Remaining Weights: 1279/2528
Epoch 3 - Sparsity: 44.29% - Accuracy: 75.95% - Remaining Weights: 837/2528
Epoch 4 - Sparsity: 46.83% - Accuracy: 76.34% - Remaining Weights: 758/2528
Epoch 5 - Sparsity: 46.83% - Accuracy: 76.72% - Remaining Weights: 758/2528

Testing Sparsity Level: 75%

Training Sparse Model:
Epoch 1 - Sparsity: 0.00% - Accuracy: 76.34% - Remaining Weights: 2528/2528
