In [1]:
import numpy as np
import pandas as pd
import torch

import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib widget

# Ignore warnings
import warnings
warnings.filterwarnings("ignore")

seed = 42
nthreads = 4

# Reproducibility

In [2]:
import os
import random

np.random.seed(seed)
os.environ['PYTHONHASHSEED'] = str(seed)
random.seed(seed)
torch.manual_seed(seed)
torch.Generator(device='cpu').manual_seed(seed)

<torch._C.Generator at 0x7fcb736b43b0>

# Device

In [3]:
on_gpu = torch.cuda.is_available()
if on_gpu:
    torch.cuda.manual_seed_all(seed)
device = torch.device('cuda' if on_gpu else 'cpu')

# Data

In [4]:
from pathlib import Path
import torchvision.datasets as datasets
import torch.utils.data as data
    
def split_ds(ds, frac):
    part1 = int(len(ds) * frac)
    part2 = len(ds) - part1
    return data.random_split(ds, [part1, part2])

def make_datasets(root, total_frac=0.75, val_frac=0.2):
    train_full_ds = datasets.FashionMNIST(root, train=True, download=True)
    test_ds = datasets.FashionMNIST(root, train=False, download=True)
    
    core_ds, leftout_ds = split_ds(train_full_ds, total_frac)
    val_ds, train_ds = split_ds(core_ds, val_frac)
    
    return {'train': train_ds, 'val': val_ds, 'test': test_ds}

class MappedDataset(data.Dataset):
    def __init__(self, ds, transform=None):
        self.ds = ds
        self.transform = transform
    
    def __getitem__(self, ix):
        X, y = self.ds[ix]
        if self.transform:
            X = self.transform(X)
        return X, y
    
    def __len__(self):
        return len(self.ds)

def augment_datasets(datasets, transforms=None):
    if transforms:
        datasets['train'] = MappedDataset(datasets['train'], transforms['train'])
        datasets['val'] = MappedDataset(datasets['val'], transforms['val'])
        datasets['test'] = MappedDataset(datasets['test'], transforms['val'])
    
    return datasets

datasets = make_datasets('./data')

In [5]:
from torchvision.utils import make_grid
from torchvision import transforms

def plot_examples(ds, per_row, total):
    dl = data.DataLoader(ds, total, shuffle=True)
    
    Xs, _ = next(iter(dl))
    print(Xs.shape)
    fig, ax = plt.subplots()
    
    ax.axis('off')
    ax.imshow(make_grid(Xs, nrow=per_row).permute((1, 2, 0)))

plot_examples(MappedDataset(datasets['train'], transforms.ToTensor()), 8, 32)

torch.Size([32, 1, 28, 28])


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

## Transformations

In [6]:
train_head = transforms.Compose([
#     transforms.RandomCrop(28, padding=4),
    transforms.ToTensor(),
    transforms.RandomErasing()
])

ds = MappedDataset(datasets['train'], train_head)
ds_tensor = torch.cat([X for (X, y) in data.DataLoader(ds)], 0)
mean = ds_tensor.mean()
std = ds_tensor.std()

datasets = augment_datasets(datasets, transforms={
    'train': transforms.Compose([
#         transforms.RandomCrop(28, padding=4),
        transforms.ToTensor(),
        transforms.RandomErasing(),
        transforms.Normalize((mean), (std)),
    ]),
    'val': transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((mean), (std))
    ])
})

In [7]:
plot_examples(datasets['train'], 8, 32)

torch.Size([32, 1, 28, 28])


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).


# Model

## ResNet18, reduced

In [8]:
from torchvision.models import resnet18
resnet = resnet18()
resnet

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [9]:
from torch.nn import Conv2d, Linear
resnet.conv1 = Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
resnet.fc = Linear(in_features=512, out_features=10, bias=True)

In [10]:
import torch.nn as nn
import torch.nn.functional as func

class ReducedNet(nn.Module):
    def __init__(self):
        super(ReducedNet, self).__init__()
        self.conv1 = nn.Conv2d(1, 16, 5, 1, 2)
        self.pool = nn.MaxPool2d(2, 2)
        self.bn1 = nn.BatchNorm2d(16, eps=1e-05, momentum=0.1,
                                  affine=True, track_running_stats=True)
        
        self.conv2 = nn.Conv2d(16, 32, 5, 1, 2)
        self.bn2 = nn.BatchNorm2d(32, eps=1e-05, momentum=0.1,
                                  affine=True, track_running_stats=True)
        
        self.conv3 = nn.Conv2d(32, 64, 5, 1, 2)
        self.bn3 = nn.BatchNorm2d(64, eps=1e-05, momentum=0.1,
                                  affine=True, track_running_stats=True)
        
        self.fc1 = nn.Linear(64 * 14 * 14, 256)
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, 10)
    
    def forward(self, x):
        x = self.bn1(self.conv1(x))
        x = self.pool(func.relu(x))
        
        x = self.bn2(self.conv2(x))
        x = func.relu(x)
        
        x = self.bn3(self.conv3(x))
        
        x = x.view(-1, 64 * 14 * 14)
        x = func.relu(self.fc1(x))
        x = func.relu(self.fc2(x))
        x = self.fc3(x)
        
        return x

In [11]:
def make_dataloaders(datasets):
    common = {
        'batch_size': 4, 'pin_memory': True, 'num_workers': nthreads,
    }
    
    return {
        'train': data.DataLoader(datasets['train'], **common, shuffle=True),
        'val': data.DataLoader(datasets['val'], **common, shuffle=True),
        'test': data.DataLoader(datasets['test'], **common),
    }

dataloaders = make_dataloaders(datasets)

In [12]:
import time
import copy
from tqdm.auto import tqdm

def report_time(since):
    time_elapsed = time.time() - since
    print('Elapsed: {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))

def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
    since = time.time()
    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch+1, num_epochs))
        print('-' * 10)
        
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()
            else:
                model.eval()

            running_loss = 0.0
            running_corrects = 0
            items = 0

            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                optimizer.zero_grad()

                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
                items += inputs.size(0)
            
            if phase == 'train':
                scheduler.step()

            epoch_loss = running_loss / items
            epoch_acc = running_corrects.double() / items

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                phase, epoch_loss, epoch_acc))

            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

        report_time(since)
        print()

    report_time(since)
    print('Best val Acc: {:4f}'.format(best_acc))

    model.load_state_dict(best_model_wts)
    return model

In [13]:
from torch.nn import CrossEntropyLoss
import torch.optim as optim
import torch.optim.lr_scheduler as sched
import uuid

model = ReducedNet()
model.to(device)

criterion = CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
scheduler = sched.StepLR(optimizer, step_size=7, gamma=0.1)
# optimizer = optim.AdamW(model.parameters())
# scheduler = sched.OneCycleLR(optimizer, max_lr=0.01, epochs=10,
#                              steps_per_epoch=len(datasets['train']))

uid = str(uuid.uuid4())
print(uid)

model = train_model(model, criterion, optimizer, scheduler, num_epochs=25)
torch.save(model.state_dict(), Path('models', uid))

64eb2f0a-58a4-4955-ad41-4d534862017b
Epoch 1/25
----------
train Loss: 0.4885 Acc: 0.8213
val Loss: 0.3293 Acc: 0.8732
Elapsed: 3m 34s

Epoch 2/25
----------
train Loss: 0.3381 Acc: 0.8741
val Loss: 0.3402 Acc: 0.8744
Elapsed: 7m 10s

Epoch 3/25
----------
train Loss: 0.2936 Acc: 0.8893
val Loss: 0.2604 Acc: 0.9072
Elapsed: 10m 45s

Epoch 4/25
----------
train Loss: 0.2687 Acc: 0.8995
val Loss: 0.2672 Acc: 0.8982
Elapsed: 14m 21s

Epoch 5/25
----------
train Loss: 0.2358 Acc: 0.9107
val Loss: 0.2720 Acc: 0.9058
Elapsed: 18m 1s

Epoch 6/25
----------
train Loss: 0.2172 Acc: 0.9180
val Loss: 0.2541 Acc: 0.9141
Elapsed: 21m 40s

Epoch 7/25
----------
train Loss: 0.1967 Acc: 0.9258
val Loss: 0.2537 Acc: 0.9154
Elapsed: 25m 21s

Epoch 8/25
----------
train Loss: 0.1390 Acc: 0.9481
val Loss: 0.2324 Acc: 0.9244
Elapsed: 29m 2s

Epoch 9/25
----------
train Loss: 0.1232 Acc: 0.9542
val Loss: 0.2397 Acc: 0.9243
Elapsed: 32m 52s

Epoch 10/25
----------


ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/usr/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 3418, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-13-0ffc2dafc7e8>", line 19, in <module>
    model = train_model(model, criterion, optimizer, scheduler, num_epochs=25)
  File "<ipython-input-12-4e3d35fe724e>", line 36, in train_model
    outputs = model(inputs)
  File "/home/talos/.local/lib/python3.9/site-packages/torch/nn/modules/module.py", line 727, in _call_impl
    result = self.forward(*input, **kwargs)
  File "<ipython-input-10-4493359bce6d>", line 34, in forward
    x = func.relu(self.fc1(x))
  File "/home/talos/.local/lib/python3.9/site-packages/torch/nn/modules/module.py", line 727, in _call_impl
    result = self.forward(*input, **kwargs)
  File "/home/talos/.local/lib/python3.9/site-packages/torch/nn/modules/linear.py", line 93, in forward
    return F.linear(input, self.weight, self.bias)
  File "/home/talos/.loc

TypeError: object of type 'NoneType' has no len()

# Predictions

In [None]:
preds = pd.DataFrame(columns=['Id', 'Class'])

idx = 0
with torch.no_grad():
    for images, _ in dataloaders['test']:
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        for pred in predicted.numpy():
            preds = preds.append({'Id': idx, 'Class': pred},
                                 ignore_index=True)
            idx += 1

preds.to_csv(Path('preds', '{}.csv'.format(uid)),
             index=False)