In [1]:
print("hello")

hello


In [1]:

import torch
torch.cuda.is_available()
import pytorch_lightning as pl

In [6]:
import torch
import torchvision
import torchvision.transforms as transforms
import pytorch_lightning as pl
import torchvision.models as models
import wandb
from wandb import init
import numpy as np
import random

from pytorch_lightning.loggers import WandbLogger
import torch.nn as nn

# Seed is not working
def set_seed(seed: int = 42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)


pl.seed_everything(42)

set_seed(42)

# Your code to define LightningModule and Trainer goes here...

# Step 1: Load and Subsample the CIFAR10 Data

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

train_dataset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
indices = []
for i in range(10):  # 10 classes in CIFAR10
    class_indices = [idx for idx, label in enumerate(train_dataset.targets) if label == i]
    indices.extend(class_indices[:100])

subsampled_train_dataset = torch.utils.data.Subset(train_dataset, indices)
train_loader = torch.utils.data.DataLoader(subsampled_train_dataset, batch_size=10, shuffle=True)

# Step 2: Define the Lightning Module

class ResNet18Lightning(pl.LightningModule):
    def __init__(self, num_classes=10):
        super(ResNet18Lightning, self).__init__()
        self.resnet18 = models.resnet18(pretrained=False, num_classes=num_classes)
        self.criterion = nn.CrossEntropyLoss()

    def forward(self, x):
        return self.resnet18(x)

    def training_step(self, batch, batch_idx):
        inputs, labels = batch
        outputs = self(inputs)
        loss = self.criterion(outputs, labels)
        self.log('train_loss', loss)
        return loss

    def configure_optimizers(self):
        optimizer = torch.optim.SGD(self.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4)
        return optimizer

# Step 3: Train with wandb logging

wandb.init(project='resnet18_cifar10_subsampled_demo1')
wandb_logger = WandbLogger()

model = ResNet18Lightning().cuda()
trainer = pl.Trainer(max_epochs=20, accelerator="gpu" if torch.cuda.is_available() else "cpu", logger=wandb_logger)

# trainer = pl.Trainer(max_epochs=5, gpus=1 if torch.cuda.is_available() else 0, logger=wandb_logger)
trainer.fit(model, train_loader)

Global seed set to 42


Files already downloaded and verified


0,1
epoch,▁▁▂▂▃▃▃▃▄▄▅▅▆▆▆▆▇▇██
train_loss,▆█▃▃▂▃▂▂▃▂▂▄▁▁▂▂▁▂▁▂
trainer/global_step,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██

0,1
epoch,9.0
train_loss,1.59777
trainer/global_step,999.0


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016672793883238533, max=1.0…

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name      | Type             | Params
-----------------------------------------------
0 | resnet18  | ResNet           | 11.2 M
1 | criterion | CrossEntropyLoss | 0     
-----------------------------------------------
11.2 M    Trainable params
0         Non-trainable params
11.2 M    Total params
44.727    Total estimated model params size (MB)


Training: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=20` reached.


In [None]:


import matplotlib.pyplot as plt

# Assuming model is an instance of ResNet18Lightning

# 1. Check the Magnitudes of the Weights:

print("Weight Magnitudes (L2 Norm) for each layer:")
for name, param in model.named_parameters():
    if param.requires_grad:
        print(f"{name}: {torch.norm(param.data):.4f}")

# 2. Visualize Kernels:

def visualize_kernels(layer_weights, num_kernels=6):
    # Assuming layer_weights is of shape (out_channels, in_channels, kernel_height, kernel_width)
    for i in range(min(num_kernels, layer_weights.shape[0])):
        plt.subplot(1, num_kernels, i + 1)
        # Take the mean over all input channels to get a single 2D image for visualization
        plt.imshow(layer_weights[i].mean(0).cpu().detach().numpy(), cmap='viridis')
        plt.axis('off')
    plt.show()

# Visualize the kernels of the first convolutional layer
print("\nVisualizing kernels of the first convolutional layer:")
first_layer_weights = model.resnet18.conv1.weight.data
visualize_kernels(first_layer_weights)

In [4]:
for name, param in model.named_parameters():
    if param.requires_grad:
        print(f"{name}: {torch.norm(param.data):.4f}")

resnet18.conv1.weight: 9.8421
resnet18.bn1.weight: 8.0017
resnet18.bn1.bias: 0.6670
resnet18.layer1.0.conv1.weight: 11.8414
resnet18.layer1.0.bn1.weight: 7.6199
resnet18.layer1.0.bn1.bias: 0.4097
resnet18.layer1.0.conv2.weight: 11.7206
resnet18.layer1.0.bn2.weight: 7.4228
resnet18.layer1.0.bn2.bias: 0.2968
resnet18.layer1.1.conv1.weight: 11.4897
resnet18.layer1.1.bn1.weight: 7.6158
resnet18.layer1.1.bn1.bias: 0.2748
resnet18.layer1.1.conv2.weight: 11.3751
resnet18.layer1.1.bn2.weight: 7.4234
resnet18.layer1.1.bn2.bias: 0.2078
resnet18.layer2.0.conv1.weight: 11.8333
resnet18.layer2.0.bn1.weight: 10.7678
resnet18.layer2.0.bn1.bias: 0.2659
resnet18.layer2.0.conv2.weight: 16.0075
resnet18.layer2.0.bn2.weight: 10.6751
resnet18.layer2.0.bn2.bias: 0.2699
resnet18.layer2.0.downsample.0.weight: 10.9528
resnet18.layer2.0.downsample.1.weight: 10.9444
resnet18.layer2.0.downsample.1.bias: 0.2699
resnet18.layer2.1.conv1.weight: 15.9238
resnet18.layer2.1.bn1.weight: 10.7672
resnet18.layer2.1.bn1.bias

In [5]:
count = 0
for name, param in model.named_parameters():
    if param.requires_grad:
        print(f"{name}: {torch.norm(param.data):.4f}")
        count += torch.norm(param.data)

print(count)

resnet18.conv1.weight: 9.8421
resnet18.bn1.weight: 8.0017
resnet18.bn1.bias: 0.6670
resnet18.layer1.0.conv1.weight: 11.8414
resnet18.layer1.0.bn1.weight: 7.6199
resnet18.layer1.0.bn1.bias: 0.4097
resnet18.layer1.0.conv2.weight: 11.7206
resnet18.layer1.0.bn2.weight: 7.4228
resnet18.layer1.0.bn2.bias: 0.2968
resnet18.layer1.1.conv1.weight: 11.4897
resnet18.layer1.1.bn1.weight: 7.6158
resnet18.layer1.1.bn1.bias: 0.2748
resnet18.layer1.1.conv2.weight: 11.3751
resnet18.layer1.1.bn2.weight: 7.4234
resnet18.layer1.1.bn2.bias: 0.2078
resnet18.layer2.0.conv1.weight: 11.8333
resnet18.layer2.0.bn1.weight: 10.7678
resnet18.layer2.0.bn1.bias: 0.2659
resnet18.layer2.0.conv2.weight: 16.0075
resnet18.layer2.0.bn2.weight: 10.6751
resnet18.layer2.0.bn2.bias: 0.2699
resnet18.layer2.0.downsample.0.weight: 10.9528
resnet18.layer2.0.downsample.1.weight: 10.9444
resnet18.layer2.0.downsample.1.bias: 0.2699
resnet18.layer2.1.conv1.weight: 15.9238
resnet18.layer2.1.bn1.weight: 10.7672
resnet18.layer2.1.bn1.bias

In [7]:
count = 0
for name, param in model.named_parameters():
    if param.requires_grad:
        print(f"{name}: {torch.norm(param.data):.4f}")
        count += torch.norm(param.data)

print(count)

resnet18.conv1.weight: 9.8440
resnet18.bn1.weight: 8.0611
resnet18.bn1.bias: 0.7251
resnet18.layer1.0.conv1.weight: 11.5586
resnet18.layer1.0.bn1.weight: 7.2694
resnet18.layer1.0.bn1.bias: 0.4568
resnet18.layer1.0.conv2.weight: 11.6408
resnet18.layer1.0.bn2.weight: 6.8925
resnet18.layer1.0.bn2.bias: 0.4752
resnet18.layer1.1.conv1.weight: 11.4017
resnet18.layer1.1.bn1.weight: 7.2609
resnet18.layer1.1.bn1.bias: 0.3661
resnet18.layer1.1.conv2.weight: 11.3618
resnet18.layer1.1.bn2.weight: 6.8239
resnet18.layer1.1.bn2.bias: 0.3658
resnet18.layer2.0.conv1.weight: 11.7671
resnet18.layer2.0.bn1.weight: 10.2493
resnet18.layer2.0.bn1.bias: 0.3135
resnet18.layer2.0.conv2.weight: 15.8296
resnet18.layer2.0.bn2.weight: 10.0152
resnet18.layer2.0.bn2.bias: 0.3624
resnet18.layer2.0.downsample.0.weight: 10.7954
resnet18.layer2.0.downsample.1.weight: 10.7080
resnet18.layer2.0.downsample.1.bias: 0.3624
resnet18.layer2.1.conv1.weight: 15.8265
resnet18.layer2.1.bn1.weight: 10.2464
resnet18.layer2.1.bn1.bias