In [23]:
!pip install thop
!pip install wandb









[notice] A new release of pip is available: 25.0 -> 25.0.1








[notice] To update, run: python.exe -m pip install --upgrade pip










[notice] A new release of pip is available: 25.0 -> 25.0.1









[notice] To update, run: python.exe -m pip install --upgrade pip











In [24]:
import torch.nn.functional as F
import argparse
import torch
import torchvision
import torch.nn as nn
from thop import profile
import torch.optim as optim
from torchvision import datasets, transforms
from torchvision.datasets import CIFAR10
import torchvision.transforms as transforms
from torch.utils.data.dataloader import DataLoader
from torch.optim.lr_scheduler import ReduceLROnPlateau
import numpy as np
import sys
import torch.nn.utils.prune as prune

In [25]:
import wandb
wandb.login()




True

In [26]:
class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1, groups=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, 
                               padding=1, bias=False, groups=groups)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, 
                               padding=1, bias=False, groups=groups)
        self.bn2 = nn.BatchNorm2d(planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion * planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion * planes, 
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion * planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out
    
class ResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10, groups=1):
        super(ResNet, self).__init__()
        self.in_planes = 64

        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1, groups=groups)
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2, groups=groups)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2, groups=groups)
        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2, groups=groups)
        self.linear = nn.Linear(512 * block.expansion, num_classes)

    def _make_layer(self, block, planes, num_blocks, stride, groups):
        strides = [stride] + [1] * (num_blocks - 1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride, groups=groups))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)
    
    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = F.avg_pool2d(out, 4)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out
    
class mini_ResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10, groups=1):
        super().__init__()  # ✅ Correction ici
        self.in_planes = 32  # Commence avec une largeur réduite

        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(32)
        self.layer1 = self._make_layer(block, 32, num_blocks[0], stride=1, groups=groups)
        self.layer2 = self._make_layer(block, 64, num_blocks[1], stride=2, groups=groups)
        self.layer3 = self._make_layer(block, 128, num_blocks[2], stride=2, groups=groups)
        self.layer4 = self._make_layer(block, 256, num_blocks[3], stride=2, groups=groups)
        self.linear = nn.Linear(256 * block.expansion, num_classes)

    def _make_layer(self, block, planes, num_blocks, stride, groups):
        strides = [stride] + [1] * (num_blocks - 1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride, groups=groups))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = F.avg_pool2d(out, 4)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out


def ResNet8(groups=8):
    return ResNet(BasicBlock, [1, 1, 1, 1], groups=groups)  # 8 blocs seulement

def mini_ResNet8(groups=8):
    return mini_ResNet(BasicBlock, [1, 1, 1, 1], groups=groups)  # 8 blocs seulement



In [27]:
def train(data_path , batch_size, learning_rate, 
          weight_decay, epochs, save_path, 
          load_path, prate):
    
    # Set device
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    # Data preprocessing
    normalize_scratch = transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))

    transform_train = transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(15),
        transforms.ToTensor(),
        normalize_scratch,
    ])

    transform_test = transforms.Compose([
        transforms.ToTensor(),
        normalize_scratch,
    ])

    c10train = CIFAR10(data_path,train=True,download=True,transform=transform_train)
    c10test = CIFAR10(data_path,train=False,download=True,transform=transform_test)

    trainloader = DataLoader(c10train,batch_size=batch_size,shuffle=True)
    testloader = DataLoader(c10test,batch_size=batch_size)

    # Charger le modèle groupe_8.pth
    model = mini_ResNet8(groups=8)
    model = model.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
    scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=3, verbose=True)
    # Initialize Weights & Biases
    dict = {
        "batch_size": batch_size,
        "learning_rate": learning_rate,
        "weight_decay": weight_decay,
        "epochs": epochs,
        "prate": prate,
    }
    wandb.init(project="deep-learning-lab3", config=dict , name="miniresnet_8", job_type="training_test")


    # Training loop
    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        correct_train = 0
        total_train = 0
        for images, labels in trainloader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

            # Training accuracy
            _, predicted = outputs.max(1)
            correct_train += predicted.eq(labels).sum().item()
            total_train += labels.size(0)

        train_accuracy = 100 * correct_train / total_train
        train_loss = running_loss / len(trainloader)

        print(f"TRAINING: Epoch [{epoch+1}/{epochs}], Loss: {train_loss:.4f}, Accuracy: {train_accuracy:.4f}, Learning rate: {optimizer.param_groups[0]['lr']}")

        # Evaluate on test set
        model.eval()
        correct_test = 0
        total_test = 0
        test_loss = 0.0

        with torch.no_grad():
            for images, labels in testloader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)
                test_loss += loss.item()

                _, predicted = outputs.max(1)
                correct_test += predicted.eq(labels).sum().item()
                total_test += labels.size(0)

        test_accuracy = 100 * (correct_test / total_test)
        test_loss= test_loss / len(testloader)

        scheduler.step(test_loss / len(testloader))

        # number of parameters
        num_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
        # number operations
        input_tensor = torch.randn(1, 3, 32, 32).to(device)
        flops, _ = profile(model, inputs=(input_tensor,))
        
        print(f"Epoch {epoch+1}: "
              f"Train Loss: {train_loss:.4f}, "
              f"Train Acc: {train_accuracy:.2f}%, "
              f"Test Loss: {test_loss:.4f}, "
              f"Test Acc: {test_accuracy:.2f}% "
              f"Learning Rate: {optimizer.param_groups[0]['lr']}"
              f"Num params: {num_params}"
              f"Num operations: {flops}"
              
              )
        wandb.log({
            "Train Loss": train_loss,
            "Train Accuracy": train_accuracy,
            "Test Loss": test_loss,
            "Test Accuracy": test_accuracy,
            "Learning Rate": optimizer.param_groups[0]['lr'],
            "Num params": num_params,
            "Num operations": flops
        })
        
        
        
    print("Training complete my boss")
    
    # Save model along with training hyperparameters
    checkpoint = {
        "model_state_dict": model.state_dict(),
        "optimizer_state_dict": optimizer.state_dict(),
        "epochs": epochs,
        "weight_decay": weight_decay,
        "learning_rate": learning_rate,
    }
    wandb.finish()
    model.half()
    torch.save(checkpoint, save_path)
    print(f"Model and training details saved to {save_path}")

train('/opt/img/effdl-cifar10/', 64, 0.001, 0.0005, 70, './miniresnet8.pth', './miniresnet8.pth', 0.2)

Files already downloaded and verified
Files already downloaded and verified
TRAINING: Epoch [1/70], Loss: 1.4746, Accuracy: 45.9360, Learning rate: 0.001
[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
Epoch 1: Train Loss: 1.4746, Train Acc: 45.94%, Test Loss: 1.2484, Test Acc: 56.06% Learning Rate: 0.001Num params: 196778Num operations: 10865152.0
TRAINING: Epoch [2/70], Loss: 1.1369, Accuracy: 59.0820, Learning rate: 0.001
[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_linear() for <class 'torch

0,1
Learning Rate,█████████▂▂▂▂▂▂▂▁▁▁▁▁▁▁██▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁
Num operations,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Num params,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Test Accuracy,▃▃▄▅▆▆▆▇████████████▁▂▃▅▆▆▇▇████████████
Test Loss,▇▃▃▃▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁█▅▄▃▃▃▃▃▂▂▂▂▁▁▁▁▁▁▁▁▁
Train Accuracy,▁▅▅▆▆▆▇▇▇▇█████████████▁▃▅▆▆▇▇██████████
Train Loss,▆▅▄▃▃▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁█▃▃▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁

0,1
Learning Rate,0.0
Num operations,10865152.0
Num params,196778.0
Test Accuracy,84.94
Test Loss,0.44329
Train Accuracy,85.348
Train Loss,0.42171


Model and training details saved to ./miniresnet8.pth
