In [2]:
import torch
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torch.optim as optim
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sparseml.pytorch.optim import ScheduledModifierManager
from sparseml.pytorch.optim import ScheduledOptimizer
from sparseml.pytorch.utils import tensor_sparsity, get_prunable_layers
from sklearn.datasets import fetch_20newsgroups
from sklearn.feature_extraction.text import TfidfVectorizer

In [3]:
# !pip install -q nltk
import nltk
import re
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import PorterStemmer, WordNetLemmatizer
from collections import Counter
# Download necessary NLTK data
nltk.download('punkt')
nltk.download('punkt_tab')
nltk.download('stopwords')
nltk.download('wordnet')

[nltk_data] Downloading package punkt to /home/webexpert/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package punkt_tab to
[nltk_data]     /home/webexpert/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     /home/webexpert/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     /home/webexpert/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [5]:
# Step 1: Load 20 Newsgroups Dataset
newsgroups = fetch_20newsgroups(subset='all', categories=['rec.autos', 'sci.space', 'talk.politics.misc'])
X = newsgroups.data
y = newsgroups.target
print(f'The 20 Newsgroups dataset contains {len(X)} documents and {len(set(y))} categories.')

The 20 Newsgroups dataset contains 2752 documents and 3 categories.


In [6]:
y[1]

2

In [7]:
X[1].splitlines()

['From: kbanaian@bernard.pitzer.claremont.edu (King Banaian)',
 'Subject: Re: National Sales Tax, The Movie',
 'Lines: 43',
 'Organization: Pitzer College',
 '',
 'In article <VEAL.731.734985604@utkvm1.utk.edu> VEAL@utkvm1.utk.edu (David Veal) writes:',
 '>In article <1993Apr16.164750.21913@alchemy.chem.utoronto.ca> golchowy@',
 'alchemy.chem.utoronto.ca (Gerald Olchowy) writes:>',
 '>>In article <9304151442.AA05233@inet-gw-2.pa.dec.com> blh@uiboise.idbsu.edu (Broward L. Horne) writes:',
 '>>',
 ">>Why don't the Republicans get their act together, and say they",
 '>>will support a broad-based VAT that would have to be visible',
 '>>(the VAT in Canada is visible unlike the invisible VATS they',
 '>>have in Europe)',
 '>>and suggest a rate sufficient to halve income and corporate',
 '>>and capital gains tax rates and at a rate sufficient to give',
 '>>the Clintons enough revenue for their health care reform, ',
 '>',
 '>       The Republicans are, in general, fighting any tax increase.',

In [8]:
def clean_text(text, min_word_frequency=2):
    # Initialize stemmer, lemmatizer, and stopwords
    stemmer = PorterStemmer()
    lemmatizer = WordNetLemmatizer()
    stop_words = set(stopwords.words('english'))
    text = text.lower()
    text = re.sub(r'[^\w\s]', '', text)
    tokens = word_tokenize(text)
    tokens = [word for word in tokens if word not in stop_words]
    tokens = [lemmatizer.lemmatize(word) for word in tokens]  # Lemmatization
    tokens = [word for word in tokens if not word.isdigit()]
    return " ".join(tokens)

### __Experiment 1: 150, 8, 8:__

In [9]:
cleaned_text = [clean_text(i) for i in X]
vectorizer = TfidfVectorizer(max_features=150)
cleaned_text = vectorizer.fit_transform(cleaned_text).toarray() 
scaler = StandardScaler()
cleaned_text = scaler.fit_transform(cleaned_text)
X_train, X_test, y_train, y_test = train_test_split(cleaned_text, y, test_size=0.2, random_state=42)
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)
# Convert to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.long)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.long)
# Create DataLoader for training and testing
trainloader = DataLoader(TensorDataset(X_train, y_train), batch_size=16, shuffle=True)
testloader = DataLoader(TensorDataset(X_test, y_test), batch_size=16, shuffle=False)

(2201, 150) (551, 150) (2201,) (551,)


In [11]:
# Check if CUDA is available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'{device} is being used for computation')

# Step 2: Define the Model Architecture
class SimpleNN(nn.Module):
    def __init__(self):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(150, 8)
        self.bn1 = nn.BatchNorm1d(8)
        self.fc2 = nn.Linear(8, 8)
        self.bn2 = nn.BatchNorm1d(8)
        self.fc3 = nn.Linear(8, 3)
        self.relu = nn.ReLU()
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x = self.fc1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.bn2(x)
        x = self.relu(x)
        x = self.fc3(x)
        x = self.softmax(x)
        return x

# Step 3: Train the Base Model
model = SimpleNN()
model = model.to(device)

cpu is being used for computation


In [12]:
# Step 3: Train the Base Model
# Optimizer (use Adam instead of the non-existent 'ADA' optimizer)
base_optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4)
num_epochs = 5

# Step 4: Training Loop
train_losses = []
print("\nTraining Base Model:")
for epoch in range(num_epochs):
    model.train()  # Set model to training mode
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)  # Move data to GPU
        # print(inputs.shape, labels.shape)
        base_optimizer.zero_grad()  # Zero gradients before backprop

        # Forward pass
        outputs = model(inputs)
        loss = F.cross_entropy(outputs, labels)
        loss.backward()  # Backpropagate the loss

        # Update the weights
        base_optimizer.step()

        running_loss += loss.item()
        if i % 10 == 9:  # Record loss every 10 mini-batches
            train_losses.append(running_loss / 10)
            running_loss = 0.0

    # Step 5: Evaluate the Base Model
    model.eval()  # Set model to evaluation mode
    correct = 0
    total = 0
    with torch.no_grad():  # No gradient tracking during evaluation
        for data in testloader:
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)  # Move data to GPU
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    print(f'Epoch {epoch + 1} - Accuracy: {accuracy:.2f}%')


Training Base Model:
Epoch 1 - Accuracy: 66.06%
Epoch 2 - Accuracy: 80.04%
Epoch 3 - Accuracy: 84.03%
Epoch 4 - Accuracy: 84.94%
Epoch 5 - Accuracy: 85.84%


In [13]:
# Step 5: Test Various Sparsity Levels
sparsity_targets = [0.60, 0.70, 0.75, 0.80]
for sparsity_target in sparsity_targets:
    print(f"\nTesting Sparsity Level: {sparsity_target * 100:.0f}%")

    # Reinitialize the Model for Sparsification
    sparse_model = SimpleNN()
    sparse_model.load_state_dict(model.state_dict())  # Copy weights from the trained base model
    sparse_model = sparse_model.to(device)

    # Step 6: Apply Sparsification and Train the Sparse Model
    # optimizer = torch.optim.SGD(sparse_model.parameters(), lr=0.01, momentum=0.9, weight_decay=1e-4)
    optimizer = optim.Adam(sparse_model.parameters(), lr=0.001, weight_decay=1e-4)
    steps_per_epoch = len(trainloader)

    # Create a new sparsification recipe with the target sparsity level
    recipe_content = f'''
    modifiers:
      - !EpochRangeModifier
        start_epoch: 0.0
        end_epoch: 5.0

      - !GlobalMagnitudePruningModifier
        params: __ALL_PRUNABLE__
        start_epoch: 1.0
        end_epoch: 5.0
        update_frequency: 0.3
        init_sparsity: 0.2
        final_sparsity: {sparsity_target}
        mask_type: unstructured
    '''

    with open('temp_recipe.yaml', 'w') as f:
        f.write(recipe_content)

    manager = ScheduledModifierManager.from_yaml('temp_recipe.yaml')
    optimizer = ScheduledOptimizer(optimizer, sparse_model, manager, steps_per_epoch=steps_per_epoch)

    sparsity_levels = []
    accuracies = []
    sparse_train_losses = []

    print("\nTraining Sparse Model:")
    for epoch in range(num_epochs):
        sparse_model.train()
        running_loss = 0.0
        for i, data in enumerate(trainloader, 0):
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()

            # Forward pass
            outputs = sparse_model(inputs)
            # labels = labels.float().unsqueeze(1)  # Adjust labels shape for binary classification
            loss = F.cross_entropy(outputs, labels)
            loss.backward()

            # Optimize
            optimizer.step()

            running_loss += loss.item()
            if i % 10 == 9:    # Record loss every 10 mini-batches
                sparse_train_losses.append(running_loss / 10)
                running_loss = 0.0

        # Step 7: Evaluate Sparsity and Accuracy after Each Epoch
        sparse_model.eval()
        prunable_layers = get_prunable_layers(sparse_model)
        sparsity = 0.0
        total_weights = 0
        remaining_weights = 0
        for (name, layer) in prunable_layers:
            layer_sparsity = tensor_sparsity(layer.weight).item()
            sparsity += layer_sparsity
            total_weights += layer.weight.numel()
            remaining_weights += (layer.weight != 0).sum().item()
        sparsity = (remaining_weights / total_weights) * 100 if remaining_weights > 0 else 0

        correct = 0
        total = 0
        with torch.no_grad():
            for data in testloader:
                inputs, labels = data
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = sparse_model(inputs)
                _, predicted = torch.max(outputs.data, 1)
                # predicted = (outputs > 0.5).float()
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
        accuracy = 100 * correct / total
        sparsity_levels.append(sparsity)
        accuracies.append(accuracy)
        print(f'Epoch {epoch + 1} - Sparsity: {sparsity:.2f}% - Accuracy: {accuracy:.2f}% - Remaining Weights: {remaining_weights}/{total_weights}')
        # print(f'Epoch {epoch + 1} - Accuracy: {accuracy:.2f}%')
        # print(f'Remaining Weights: {remaining_weights}/{total_weights}')

    manager.finalize(sparse_model)

2024-12-06 14:42:21 sparseml.pytorch.utils.logger INFO     Logging all SparseML modifier-level logs to sparse_logs/06-12-2024_14.42.21.log



Testing Sparsity Level: 60%

Training Sparse Model:
Epoch 1 - Sparsity: 100.00% - Accuracy: 85.48% - Remaining Weights: 1288/1288
Epoch 2 - Sparsity: 58.39% - Accuracy: 86.57% - Remaining Weights: 752/1288
Epoch 3 - Sparsity: 46.43% - Accuracy: 85.30% - Remaining Weights: 598/1288
Epoch 4 - Sparsity: 41.23% - Accuracy: 84.94% - Remaining Weights: 531/1288
Epoch 5 - Sparsity: 39.98% - Accuracy: 85.66% - Remaining Weights: 515/1288

Testing Sparsity Level: 70%

Training Sparse Model:
Epoch 1 - Sparsity: 100.00% - Accuracy: 86.03% - Remaining Weights: 1288/1288
Epoch 2 - Sparsity: 52.95% - Accuracy: 85.66% - Remaining Weights: 682/1288
Epoch 3 - Sparsity: 38.04% - Accuracy: 85.66% - Remaining Weights: 490/1288
Epoch 4 - Sparsity: 31.60% - Accuracy: 86.03% - Remaining Weights: 407/1288
Epoch 5 - Sparsity: 29.97% - Accuracy: 86.93% - Remaining Weights: 386/1288

Testing Sparsity Level: 75%

Training Sparse Model:
Epoch 1 - Sparsity: 100.00% - Accuracy: 86.57% - Remaining Weights: 1288/1288

### __Experiment 2: 300, 8, 4:__

In [14]:
cleaned_text = [clean_text(i) for i in X]
vectorizer = TfidfVectorizer(max_features=300)
cleaned_text = vectorizer.fit_transform(cleaned_text).toarray()
scaler = StandardScaler()
cleaned_text = scaler.fit_transform(cleaned_text)
X_train, X_test, y_train, y_test = train_test_split(cleaned_text, y, test_size=0.2, random_state=42)
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)
# Convert to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.long)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.long)
# Create DataLoader for training and testing
trainloader = DataLoader(TensorDataset(X_train, y_train), batch_size=16, shuffle=True)
testloader = DataLoader(TensorDataset(X_test, y_test), batch_size=16, shuffle=False)

(2201, 300) (551, 300) (2201,) (551,)


In [15]:
# Check if CUDA is available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'{device} is being used for computation')

# Step 2: Define the Model Architecture
class SimpleNN(nn.Module):
    def __init__(self):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(300, 8)  # 300 -> 8 neurons
        self.bn1 = nn.BatchNorm1d(8)
        self.fc2 = nn.Linear(8, 4)    # 8 -> 4 neurons
        self.bn2 = nn.BatchNorm1d(4)
        self.fc3 = nn.Linear(4, 3)    # 4 -> 3 neurons for classification
        self.relu = nn.ReLU()
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x = self.fc1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.bn2(x)
        x = self.relu(x)
        x = self.fc3(x)
        x = self.softmax(x)
        return x

# Step 3: Train the Base Model
model = SimpleNN()
model = model.to(device)

cpu is being used for computation


In [16]:
# Optimizer (use Adam instead of the non-existent 'ADA' optimizer)
base_optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4)
num_epochs = 5

# Step 4: Training Loop
train_losses = []
print("\nTraining Base Model:")
for epoch in range(num_epochs):
    model.train()  # Set model to training mode
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)  # Move data to GPU
        # print(inputs.shape, labels.shape)
        base_optimizer.zero_grad()  # Zero gradients before backprop

        # Forward pass
        outputs = model(inputs)
        loss = F.cross_entropy(outputs, labels)
        loss.backward()  # Backpropagate the loss

        # Update the weights
        base_optimizer.step()

        running_loss += loss.item()
        if i % 10 == 9:  # Record loss every 10 mini-batches
            train_losses.append(running_loss / 10)
            running_loss = 0.0

    # Step 5: Evaluate the Base Model
    model.eval()  # Set model to evaluation mode
    correct = 0
    total = 0
    with torch.no_grad():  # No gradient tracking during evaluation
        for data in testloader:
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)  # Move data to GPU
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    print(f'Epoch {epoch + 1} - Accuracy: {accuracy:.2f}%')


Training Base Model:
Epoch 1 - Accuracy: 60.98%
Epoch 2 - Accuracy: 84.39%
Epoch 3 - Accuracy: 86.93%
Epoch 4 - Accuracy: 91.29%
Epoch 5 - Accuracy: 90.02%


In [17]:
# Step 5: Test Various Sparsity Levels
sparsity_targets = [0.60, 0.70, 0.75, 0.80]
for sparsity_target in sparsity_targets:
    print(f"\nTesting Sparsity Level: {sparsity_target * 100:.0f}%")

    # Reinitialize the Model for Sparsification
    sparse_model = SimpleNN()
    sparse_model.load_state_dict(model.state_dict())  # Copy weights from the trained base model
    sparse_model = sparse_model.to(device)

    # Step 6: Apply Sparsification and Train the Sparse Model
    # optimizer = torch.optim.SGD(sparse_model.parameters(), lr=0.01, momentum=0.9, weight_decay=1e-4)
    optimizer = optim.Adam(sparse_model.parameters(), lr=0.001, weight_decay=1e-4)
    steps_per_epoch = len(trainloader)

    # Create a new sparsification recipe with the target sparsity level
    recipe_content = f'''
    modifiers:
      - !EpochRangeModifier
        start_epoch: 0.0
        end_epoch: 5.0

      - !GlobalMagnitudePruningModifier
        params: __ALL_PRUNABLE__
        start_epoch: 1.0
        end_epoch: 5.0
        update_frequency: 0.3
        init_sparsity: 0.2
        final_sparsity: {sparsity_target}
        mask_type: unstructured
    '''

    with open('temp_recipe.yaml', 'w') as f:
        f.write(recipe_content)

    manager = ScheduledModifierManager.from_yaml('temp_recipe.yaml')
    optimizer = ScheduledOptimizer(optimizer, sparse_model, manager, steps_per_epoch=steps_per_epoch)

    sparsity_levels = []
    accuracies = []
    sparse_train_losses = []

    print("\nTraining Sparse Model:")
    for epoch in range(num_epochs):
        sparse_model.train()
        running_loss = 0.0
        for i, data in enumerate(trainloader, 0):
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()

            # Forward pass
            outputs = sparse_model(inputs)
            # labels = labels.float().unsqueeze(1)  # Adjust labels shape for binary classification
            loss = F.cross_entropy(outputs, labels)
            loss.backward()

            # Optimize
            optimizer.step()

            running_loss += loss.item()
            if i % 10 == 9:    # Record loss every 10 mini-batches
                sparse_train_losses.append(running_loss / 10)
                running_loss = 0.0

        # Step 7: Evaluate Sparsity and Accuracy after Each Epoch
        sparse_model.eval()
        prunable_layers = get_prunable_layers(sparse_model)
        sparsity = 0.0
        total_weights = 0
        remaining_weights = 0
        for (name, layer) in prunable_layers:
            layer_sparsity = tensor_sparsity(layer.weight).item()
            sparsity += layer_sparsity
            total_weights += layer.weight.numel()
            remaining_weights += (layer.weight != 0).sum().item()
        sparsity = (remaining_weights / total_weights) * 100 if remaining_weights > 0 else 0

        correct = 0
        total = 0
        with torch.no_grad():
            for data in testloader:
                inputs, labels = data
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = sparse_model(inputs)
                _, predicted = torch.max(outputs.data, 1)
                # predicted = (outputs > 0.5).float()
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
        accuracy = 100 * correct / total
        sparsity_levels.append(sparsity)
        accuracies.append(accuracy)
        print(f'Epoch {epoch + 1} - Sparsity: {sparsity:.2f}% - Accuracy: {accuracy:.2f}% - Remaining Weights: {remaining_weights}/{total_weights}')
        # print(f'Epoch {epoch + 1} - Accuracy: {accuracy:.2f}%')
        # print(f'Remaining Weights: {remaining_weights}/{total_weights}')

    manager.finalize(sparse_model)


Testing Sparsity Level: 60%

Training Sparse Model:
Epoch 1 - Sparsity: 100.00% - Accuracy: 90.20% - Remaining Weights: 2444/2444
Epoch 2 - Sparsity: 58.39% - Accuracy: 89.47% - Remaining Weights: 1427/2444
Epoch 3 - Sparsity: 46.44% - Accuracy: 90.56% - Remaining Weights: 1135/2444
Epoch 4 - Sparsity: 41.24% - Accuracy: 89.29% - Remaining Weights: 1008/2444
Epoch 5 - Sparsity: 40.02% - Accuracy: 90.38% - Remaining Weights: 978/2444

Testing Sparsity Level: 70%

Training Sparse Model:
Epoch 1 - Sparsity: 100.00% - Accuracy: 91.29% - Remaining Weights: 2444/2444
Epoch 2 - Sparsity: 52.99% - Accuracy: 92.20% - Remaining Weights: 1295/2444
Epoch 3 - Sparsity: 38.01% - Accuracy: 91.83% - Remaining Weights: 929/2444
Epoch 4 - Sparsity: 31.55% - Accuracy: 91.29% - Remaining Weights: 771/2444
Epoch 5 - Sparsity: 29.99% - Accuracy: 91.65% - Remaining Weights: 733/2444

Testing Sparsity Level: 75%

Training Sparse Model:
Epoch 1 - Sparsity: 100.00% - Accuracy: 90.38% - Remaining Weights: 2444/

In [18]:
# Step 1: Load MNIST Dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

trainset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform)
trainloader = DataLoader(trainset, batch_size=32, shuffle=True)

testset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform)
testloader = DataLoader(testset, batch_size=32, shuffle=False)

# Check the number of images in the training and test sets
print(f'Number of training images: {len(trainset)}')
print(f'Number of test images: {len(testset)}')

Number of training images: 60000
Number of test images: 10000


In [19]:
# Step 2: Define the Model Architecture (At least 2 Hidden Layers, 1000 to 5000 weights)
class SimpleNN(nn.Module):
    def __init__(self):
        super(SimpleNN, self).__init__()
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(28*28, 2)  # 784 -> 2 neurons
        self.fc2 = nn.Linear(2, 10)     # 2 -> 10 neurons for classification
        self.relu = nn.ReLU()
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x = self.flatten(x)
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.softmax(x)
        return x

# Step 3: Train the Base Model
model = SimpleNN()
model = model.to('cuda' if torch.cuda.is_available() else 'cpu')
base_optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=1e-4)
num_epochs = 5

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'{device} is using for computation')

train_losses = []
print("\nTraining Base Model:")
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)
        inputs = inputs.float()  # Ensure inputs are of type float
        base_optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)
        loss = F.cross_entropy(outputs, labels)
        loss.backward()

        # Optimize
        base_optimizer.step()

        running_loss += loss.item()
        if i % 100 == 99:    # Record loss every 100 mini-batches
            train_losses.append(running_loss / 100)
            running_loss = 0.0


# Step 4: Evaluate the Base Model
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        images, labels = images.to(device), labels.to(device)
        images = images.float()  # Ensure images are of type float
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
base_accuracy = 100 * correct / total
print(f'Base Model Accuracy on Test Data: {base_accuracy:.2f}%')

cpu is using for computation

Training Base Model:
Base Model Accuracy on Test Data: 56.18%


In [20]:
# Step 5: Test Various Sparsity Levels
sparsity_targets = [0.60, 0.70, 0.75, 0.80]
for sparsity_target in sparsity_targets:
    print(f"\nTesting Sparsity Level: {sparsity_target * 100:.0f}%")

    # Reinitialize the Model for Sparsification
    sparse_model = SimpleNN()
    sparse_model.load_state_dict(model.state_dict())  # Copy weights from the trained base model
    sparse_model = sparse_model.to(device)

    # Step 6: Apply Sparsification and Train the Sparse Model
    optimizer = torch.optim.SGD(sparse_model.parameters(), lr=0.01, momentum=0.9, weight_decay=1e-4)
    steps_per_epoch = len(trainloader)

    # Create a new sparsification recipe with the target sparsity level
    recipe_content = f'''
    modifiers:
      - !EpochRangeModifier
        start_epoch: 0.0
        end_epoch: 5.0

      - !GlobalMagnitudePruningModifier
        params: __ALL_PRUNABLE__
        start_epoch: 1.0
        end_epoch: 5.0
        update_frequency: 0.3
        init_sparsity: 0.2
        final_sparsity: {sparsity_target}
        mask_type: unstructured
    '''

    with open('temp_recipe.yaml', 'w') as f:
        f.write(recipe_content)

    manager = ScheduledModifierManager.from_yaml('temp_recipe.yaml')
    optimizer = ScheduledOptimizer(optimizer, sparse_model, manager, steps_per_epoch=steps_per_epoch)

    sparsity_levels = []
    accuracies = []
    sparse_train_losses = []

    print("\nTraining Sparse Model:")
    for epoch in range(num_epochs):
        sparse_model.train()
        running_loss = 0.0
        for i, data in enumerate(trainloader, 0):
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)
            inputs = inputs.float()  # Ensure inputs are of type float
            optimizer.zero_grad()

            # Forward pass
            outputs = sparse_model(inputs)
            loss = F.cross_entropy(outputs, labels)
            loss.backward()

            # Optimize
            optimizer.step()

            running_loss += loss.item()
            if i % 100 == 99:    # Record loss every 100 mini-batches
                sparse_train_losses.append(running_loss / 100)
                running_loss = 0.0

        # Step 7: Evaluate Sparsity and Accuracy after Each Epoch
        sparse_model.eval()
        prunable_layers = get_prunable_layers(sparse_model)
        sparsity = 0.0
        total_weights = 0
        remaining_weights = 0
        for (name, layer) in prunable_layers:
            layer_sparsity = tensor_sparsity(layer.weight).item()
            sparsity += layer_sparsity
            total_weights += layer.weight.numel()
            remaining_weights += (layer.weight != 0).sum().item()
        sparsity = (remaining_weights / total_weights) * 100 if remaining_weights > 0 else 0

        correct = 0
        total = 0
        with torch.no_grad():
            for data in testloader:
                images, labels = data
                images, labels = images.to(device), labels.to(device)
                images = images.float()  # Ensure images are of type float
                outputs = sparse_model(images)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
        accuracy = 100 * correct / total
        sparsity_levels.append(sparsity)
        accuracies.append(accuracy)
        print(f'Epoch {epoch + 1} - Sparsity: {sparsity:.2f}% - Accuracy: {accuracy:.2f}% - Remaining Weights: {remaining_weights}/{total_weights}')

    manager.finalize(sparse_model)


Testing Sparsity Level: 60%

Training Sparse Model:
Epoch 1 - Sparsity: 100.00% - Accuracy: 56.00% - Remaining Weights: 1588/1588
Epoch 2 - Sparsity: 58.63% - Accuracy: 56.27% - Remaining Weights: 931/1588
Epoch 3 - Sparsity: 46.66% - Accuracy: 56.02% - Remaining Weights: 741/1588
Epoch 4 - Sparsity: 41.37% - Accuracy: 56.29% - Remaining Weights: 657/1588
Epoch 5 - Sparsity: 39.99% - Accuracy: 56.34% - Remaining Weights: 635/1588

Testing Sparsity Level: 70%

Training Sparse Model:
Epoch 1 - Sparsity: 100.00% - Accuracy: 56.05% - Remaining Weights: 1588/1588
Epoch 2 - Sparsity: 53.27% - Accuracy: 56.17% - Remaining Weights: 846/1588
Epoch 3 - Sparsity: 38.29% - Accuracy: 56.14% - Remaining Weights: 608/1588
Epoch 4 - Sparsity: 31.74% - Accuracy: 56.14% - Remaining Weights: 504/1588
Epoch 5 - Sparsity: 29.97% - Accuracy: 56.20% - Remaining Weights: 476/1588

Testing Sparsity Level: 75%

Training Sparse Model:
Epoch 1 - Sparsity: 100.00% - Accuracy: 56.03% - Remaining Weights: 1588/1588