<a href="https://colab.research.google.com/github/univai-courses-ghf/Double-Descent/blob/main/Double_Descent_Training.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Check for GPU

In [None]:
!nvidia-smi

Mon Nov 21 00:41:22 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.32.03    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   39C    P8    10W /  70W |      0MiB / 15109MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

# Imports

In [None]:
import os

import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms

# Logging

In [None]:
save_dir = './' + 'results/'
if not os.path.exists(save_dir):
    os.makedirs(save_dir)
save_file = os.path.join(save_dir,'log_64.txt')

# Resnet

In [None]:
class PreActBlock(nn.Module):
    '''Pre-activation version of the BasicBlock.'''
    expansion = 1

    def __init__(self, in_planes, planes, stride=1, **kwargs):
        super(PreActBlock, self).__init__()
        self.bn1 = nn.BatchNorm2d(in_planes)
        self.conv1 = nn.Conv2d(
            in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
                               stride=1, padding=1, bias=False)

        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes,
                          kernel_size=1, stride=stride, bias=False)
            )

    def forward(self, x):
        out = F.relu(self.bn1(x))
        shortcut = self.shortcut(out) if hasattr(self, 'shortcut') else x
        out = self.conv1(out)
        out = self.conv2(F.relu(self.bn2(out)))
        out += shortcut
        return out

class PreActResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10, init_channels=64):
        super(PreActResNet, self).__init__()
        self.in_planes = init_channels
        c = init_channels

        self.conv1 = nn.Conv2d(3, c, kernel_size=3,
                               stride=1, padding=1, bias=False)
        self.layer1 = self._make_layer(block, c, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 2*c, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 4*c, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 8*c, num_blocks[3], stride=2)
        self.linear = nn.Linear(8*c*block.expansion, num_classes)

    def _make_layer(self, block, planes, num_blocks, stride):
        # eg: [2, 1, 1, ..., 1]. Only the first one downsamples.
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = self.conv1(x)
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = F.avg_pool2d(out, 4)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out

def make_resnet18k(k=64, num_classes=10) -> PreActResNet:
    ''' Returns a ResNet18 with width parameter k. (k=64 is standard ResNet18)'''
    return PreActResNet(PreActBlock, [2, 2, 2, 2], num_classes=num_classes, init_channels=k)


# Training and Logging

Modify the k parameter in make resnet18k() to experiment with different model widths

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
best_acc = 0  # best test accuracy
start_epoch = 0  # start from epoch 0 or last checkpoint epoch

# Data
print('==> Preparing data..')
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
])

trainset = torchvision.datasets.CIFAR10(
    root='./data', train=True, download=True, transform=transform_train)
## 20% label noise to train set
## Comment out below to turn off label noise
num_samples = len(trainset.targets)
rands = np.random.choice(num_samples, num_samples//5, replace=False)
for rand in rands:
  tmp = trainset.targets[rand]
  trainset.targets[rand] = np.random.choice( list(range(0,tmp)) + list(range(tmp+1,10)) )
## Comment out above to turn off label noise
trainloader = torch.utils.data.DataLoader(
    trainset, batch_size=128, shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(
    root='./data', train=False, download=True, transform=transform_test)
testloader = torch.utils.data.DataLoader(
    testset, batch_size=100, shuffle=False, num_workers=2)

classes = ('plane', 'car', 'bird', 'cat', 'deer',
           'dog', 'frog', 'horse', 'ship', 'truck')

# Model
print('==> Building model..')
net = make_resnet18k(k=64, num_classes=10)
net = net.to(device)
if device == 'cuda':
    net = torch.nn.DataParallel(net)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=1e-4)
# optimizer = optim.SGD(net.parameters(), lr=0.1, momentum=0.9) 


# Training
def train(epoch):
    net.train()
    train_loss = 0
    correct = 0
    total = 0
    ## Flooding level
    ## Comment in below to use flooding
    # b = 0.1
    ## Comment in above to use flooding
    for batch_idx, (inputs, targets) in enumerate(trainloader):
        inputs, targets = inputs.to(device), targets.to(device)
        optimizer.zero_grad()
        outputs = net(inputs)
        loss = criterion(outputs, targets)
        ## Comment in below to use flooding
        # loss = (criterion(outputs, targets) - b).abs() + b
        ## Comment in above to use flooding
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        _, predicted = outputs.max(1)
        total += targets.size(0)
        correct += predicted.eq(targets).sum().item()
    return train_loss/(batch_idx+1), 1-correct/total

def test(epoch):
    net.eval()
    test_loss = 0
    correct = 0
    total = 0
    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(testloader):
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = net(inputs)
            loss = criterion(outputs, targets)

            test_loss += loss.item()
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()
        return test_loss/(batch_idx+1), 1-correct/total

with open(save_file, 'a') as f:
   f.write('Epoch,Train Loss,Train Error,Test Loss,Test Error\n')

for epoch in range(start_epoch+1, start_epoch+501):
    train_loss, train_error = train(epoch)
    test_loss, test_error = test(epoch)
    print(f'Epoch: {epoch:03} | Train Loss: {train_loss:.04} | \
Train Error: {train_error:.04} | Test Loss: {test_loss:.04} | \
Test Error: {test_error:.04}')
    with open(save_file, 'a') as f:
      f.write(f'{epoch},{train_loss:.09},{train_error:.09},{test_loss:.09},{test_error:.09}\n')

==> Preparing data..
Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


  0%|          | 0/170498071 [00:00<?, ?it/s]

Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified
==> Building model..
Epoch: 001 | Train Loss: 1.878 | Train Error: 0.6403 | Test Loss: 1.304 | Test Error: 0.4618
Epoch: 002 | Train Loss: 1.644 | Train Error: 0.5178 | Test Loss: 1.169 | Test Error: 0.3889
Epoch: 003 | Train Loss: 1.536 | Train Error: 0.4665 | Test Loss: 1.069 | Test Error: 0.3479
Epoch: 004 | Train Loss: 1.445 | Train Error: 0.4227 | Test Loss: 1.084 | Test Error: 0.3423
Epoch: 005 | Train Loss: 1.386 | Train Error: 0.3962 | Test Loss: 0.8539 | Test Error: 0.257
Epoch: 006 | Train Loss: 1.345 | Train Error: 0.3777 | Test Loss: 0.8156 | Test Error: 0.236
Epoch: 007 | Train Loss: 1.303 | Train Error: 0.36 | Test Loss: 0.7961 | Test Error: 0.221
Epoch: 008 | Train Loss: 1.273 | Train Error: 0.3482 | Test Loss: 0.7168 | Test Error: 0.201
Epoch: 009 | Train Loss: 1.247 | Train Error: 0.3349 | Test Loss: 0.8081 | Test Error: 0.2343
Epoch: 010 | Train Loss: 1.222 | Train Error: 0.3264 |