In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [1]:
# AlexNet.py
import torch
import torch.nn as nn
import torch.nn.functional as F


class AlexNet(nn.Module):
    def __init__(self):
        super(AlexNet, self).__init__()
        # convolution part
        self.conv1 = nn.Conv2d(
            in_channels=3,
            out_channels=96,
            kernel_size=11,
            stride=4,
            padding=0
        )
        self.pool1 = nn.MaxPool2d(kernel_size=3, stride=2)
        self.conv2 = nn.Conv2d(
            in_channels=96,
            out_channels=256,
            kernel_size=5,
            stride=1,
            padding=2
        )
        self.pool2 = nn.MaxPool2d(kernel_size=3, stride=2)
        self.conv3 = nn.Conv2d(
            in_channels=256,
            out_channels=384,
            kernel_size=3,
            stride=1,
            padding=1
        )
        self.conv4 = nn.Conv2d(
            in_channels=384,
            out_channels=384,
            kernel_size=3,
            stride=1,
            padding=1
        )
        self.conv5 = nn.Conv2d(
            in_channels=384,
            out_channels=256,
            kernel_size=3,
            stride=1,
            padding=1
        )
        self.pool3 = nn.MaxPool2d(kernel_size=3, stride=2)
        # dense part
        self.fc1 = nn.Linear(
            in_features=9216,
            out_features=4096
        )
        self.dropout1 = nn.Dropout(0.5)
        self.fc2 = nn.Linear(
            in_features=4096,
            out_features=4096
        )
        self.dropout2 = nn.Dropout(0.5)
        self.fc3 = nn.Linear(
            in_features=4096,
            out_features=1000
        )

    def forward(self, image):
        # get the batch size, channels, height and width
        # of the input batch of images
        # original size: (bs, 3, 227, 227)
        bs, c, h, w = image.size()
        x = F.relu(self.conv1(image))  # size: (bs, 96, 55, 55)
        x = self.pool1(x)  # size: (bs, 96, 27, 27)
        x = F.relu(self.conv2(x))  # size: (bs, 256, 27, 27)
        x = self.pool2(x)  # size: (bs, 256, 13, 13)
        x = F.relu(self.conv3(x))  # size: (bs, 384, 13, 13)
        x = F.relu(self.conv4(x))  # size: (bs, 384, 13, 13)
        x = F.relu(self.conv5(x))  # size: (bs, 256, 13, 13)
        x = self.pool3(x)  # size: (bs, 256, 6, 6)
        x = x.view(bs, -1)  # size: (bs, 9216)
        x = F.relu(self.fc1(x))  # size: (bs, 4096)
        x = self.dropout1(x)  # size: (bs, 4096)
        # dropout does not change size
        # dropout is used for regularization
        # 0.3 dropout means that only 70% of the nodes
        # of the current layer arre used for the next layer
        x = F.relu(self.fc2(x))  # size: (bs, 4096)
        x = self.dropout2(x)  # size: (bs, 1000)
        # 1000 is number of classes in ImageNet Dataset
        # softmax is an activation function that converts
        # linear output to probabilities that add up to 1
        # for each sample in the batch
        x = torch.softmax(x, axis=1)  # size: (bs, 1000)
        return x


In [2]:
# dataset.py
# import torch
# import numpy as np
from PIL import Image
from PIL import ImageFile

# sometimes, you will have images without an ending bit
# this takes care of those kind of (corrupt) images
ImageFile.LOAD_TRUNCATED_IMAGES = True


class ClassificationDataset:
    """
    A general classification dataset class that you can use for all
    kinds of image classification problems. For example,
    binary classification, multi-class, multi-label classification
    """
    def __init__(self, image_paths, targets, resize=None, augmentations=None):
        """

        :param image_paths: list of path to images
        :param targets: numpy array
        :param resize: tuple, e.g. (256, 256), resizes image if not None
        :param augmentations: albumentation augmentations
        """
        self.image_paths = image_paths
        self.targets = targets
        self.resize = resize
        self.augmentations = augmentations

    def __len__(self):
        """
        Return the total number of samples in the dataset
        :return:
        """
        return len(self.image_paths)

    def __getitem__(self, item):
        """
        For a given 'item' index, return everything we need
        to train given model
        :param item:
        :return:
        """
        # use PIL to open the image
        image = Image.open(self.image_paths[item])
        # convert image to RGB, we have single channel images
        image = image.convert('RGB')
        # grab correct target
        targets = self.targets[item]

        # resize if needed
        if self.resize is not None:
            image = image.resize(
                (self.resize[1], self.resize[0]),
                resample=Image.BILINEAR
            )

        # convert image to numpy array
        image = np.array(image)

        # if we have albumentation augmentations
        # add them to the image
        if self.augmentations is not None:
            augmented = self.augmentations(image=image)
            image = augmented['image']

        # pytorch expects CHW instead of HWC
        image = np.transpose(image, (2, 0, 1)).astype(np.float32)

        # return tensors of image and targets
        # take a look at the types!
        # for regression tasks,
        return {
            'image': torch.tensor(image, dtype=torch.float),
            'targets': torch.tensor(targets, dtype=torch.long)
        }


In [36]:
# engine.py
# import torch
import torch.nn as nn

from tqdm import tqdm


def train(data_loader, model, optimizer, device):
    """
    This function does training for one epoch
    :param data_loader: this is the pytorch dataloader
    :param model: pytorch model
    :param optimizer: optimizer, for e.g. adam, sgd, etc
    :param device: cuda/cpu
    :return:
    """

    # put the model in train mode
    model.train()

    # go over every batch of data in data loader
    for data in data_loader:
        # remember, we have image and targets
        # in our dataset class
        inputs = data['image']
        targets = data['targets']

        # move inputs/targets to cuda/cpu device
        inputs = inputs.to(device, dtype=torch.float)
        targets = targets.to(device, dtype=torch.float)

        # zero grad the optimizer
        optimizer.zero_grad()
        # do the forward step of model
        outputs = model(inputs)
        # calculate loss
        loss = nn.BCEWithLogitsLoss()(outputs, targets.view(-1, 1))
        # backward tep the loss
        loss.backward()
        # step optimizer
        optimizer.step()
        # if you have a scheduler, you either need to
        # step it here or you have to step it after
        # the epoch. here, we are not using any learning
        # rate scheduler


def evaluate(data_loader, model, device):
    """
    This function does evaluation for one epoch
    :param data_loader: this is the pytorch dataloader
    :param model: pytorch model
    :param device: cuda/cpu
    :return:
    """
    # put model in evaluation mode
    model.eval()

    # init lists to store targets and outputs
    final_targets = []
    final_outputs = []

    # we use no_grad context
    with torch.no_grad():

        for data in data_loader:
            inputs = data['image']
            targets = data['targets']
            inputs = inputs.to(device, dtype=torch.float)
            targets = targets.to(device, dtype=torch.float)

            # do the forward step to generate prediction
            output = model(inputs)

            # convert targets and outputs to lists
            targets = targets.detach().cpu().numpy().tolist()
            output = output.detach().cpu().numpy().tolist()

            # extend the original list
            final_targets.extend(targets)
            final_outputs.extend(output)

    # return final output and final targets
    return final_outputs, final_targets

In [5]:
!pip install pretrainedmodels

Collecting pretrainedmodels
  Downloading pretrainedmodels-0.7.4.tar.gz (58 kB)
[K     |████████████████████████████████| 58 kB 872 kB/s eta 0:00:01
Building wheels for collected packages: pretrainedmodels
  Building wheel for pretrainedmodels (setup.py) ... [?25ldone
[?25h  Created wheel for pretrainedmodels: filename=pretrainedmodels-0.7.4-py3-none-any.whl size=60963 sha256=6c9975427f4d3122c696626121f5d6997db00891369ef4f7520a2323779bef2d
  Stored in directory: /root/.cache/pip/wheels/ed/27/e8/9543d42de2740d3544db96aefef63bda3f2c1761b3334f4873
Successfully built pretrainedmodels
Installing collected packages: pretrainedmodels
Successfully installed pretrainedmodels-0.7.4


In [6]:
# model.py
# import torch.nn as nn
import pretrainedmodels


def get_model(pretrained):
    if pretrained:
        model = pretrainedmodels.__dict__['alexnet'](
            pretrained='imagenet'
        )
    else:
        model = pretrainedmodels.__dict__['alexnet'](
            pretrained=None
        )
    # print the model here to know what's going on
    model.last_linear = nn.Sequential(
        nn.BatchNorm1d(4096),
        nn.Dropout(p=0.25),
        nn.Linear(in_features=4096, out_features=2048),
        nn.ReLU(),
        nn.BatchNorm1d(2048, eps=1e-05, momentum=0.1),
        nn.Dropout(p=0.5),
        nn.Linear(in_features=2048, out_features=1)
    )
    return model


def get_resnet18_model(pretrained):
    if pretrained:
        model = pretrainedmodels.__dict__['resnet18'](
            pretrained='imagenet'
        )
    else:
        model = pretrainedmodels.__dict__['resnet18'](
            pretrained=None
        )
    # print the model here to know what's going on
    model.last_linear = nn.Sequential(
        nn.BatchNorm1d(512),
        nn.Dropout(p=0.25),
        nn.Linear(in_features=512, out_features=2048),
        nn.ReLU(),
        nn.BatchNorm1d(2048, eps=1e-05, momentum=0.1),
        nn.Dropout(p=0.5),
        nn.Linear(in_features=2048, out_features=1)
    )
    return model

In [39]:
# train.py
import os

import pandas as pd
import numpy as np

import albumentations
import torch

from sklearn import metrics
from sklearn.model_selection import train_test_split

# location of train.csv and train_png folder
# with all the png images
data_path = "../input/siim-png-train-csv/"

# cuda/cpu device
device = "cuda" if torch.cuda.is_available() else "cpu"

# let's train for 10 epoches
epoches = 10

# load the dataframe
# https://www.kaggle.com/abhishek/train-your-own-mask-rcnn/data?select=train-rle.csv
df = pd.read_csv(os.path.join(data_path, 'train.csv'))

# fetch all image ids
images = df.ImageId.values.tolist()

# a list with image locations
# https://www.kaggle.com/abhishek/siim-png-images
images = [
    os.path.join("../input/siim-png-images/", 'train_png', image + '.png') for image in images
]

# binary target numpy array
targets = df.target.values

# fetch out model, we will try both pretrained
# and non-pretrained weights
model = get_model(pretrained=True)

# move model to device
model.to(device)

# mean and std values of RGB channels for imagenet dataset
# we use these pre-calculated values when we use weights
# from imagenet
# when we do not use imagenet weights, we use the mean and
# standard deviation values of the original dataset
# please note that this is a separate calculation
mean = (0.485, 0.456, 0.406)
std = (0.229, 0.224, 0.225)

# albumentation is an image augmentation library
# that allows you to do many different types of image
# augmentations. here, i am using only normalization
# notice always_apply=True. we always want to apply
# normalization
aug = albumentations.Compose(
    [
        albumentations.Normalize(
            mean, std, max_pixel_value=255.0, always_apply=True
        )
    ]
)

# instead of using kfold, i am using train_test_split
# with a fixed random state
train_images, valid_images, train_targets, valid_targets = train_test_split(
    images, targets, stratify=targets, random_state=42
)

# fetch the ClassificationDataset class
train_dataset = ClassificationDataset(
    image_paths=train_images,
    targets=train_targets,
    resize=(227, 227),
    augmentations=aug
)

# torch dataloader creates batches of data
# from classification dataset class
train_loader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size=16,
    shuffle=True,
    num_workers=4
)

# same for validation
valid_dataset = ClassificationDataset(
    image_paths=valid_images,
    targets=valid_targets,
    resize=(227, 227),
    augmentations=aug
)

valid_loader = torch.utils.data.DataLoader(
    valid_dataset,
    batch_size=16,
    shuffle=False,
    num_workers=4
)

# simple Adam optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=5e-4)

# train and print auc score for all epoches
for epoch in range(epoches):
    train(train_loader, model, optimizer, device=device)
    predictions, valid_targets = evaluate(
        valid_loader, model, device=device
    )
    roc_auc = metrics.roc_auc_score(valid_targets, predictions)
    print(
        f'Epoch={epoch}, Valid ROC AUC={roc_auc}'
    )

Epoch=0, Valid ROC AUC=0.57624287902239
Epoch=1, Valid ROC AUC=0.511279304392924
Epoch=2, Valid ROC AUC=0.5893466933542945
Epoch=3, Valid ROC AUC=0.6132119964668605
Epoch=4, Valid ROC AUC=0.5969652277497306
Epoch=5, Valid ROC AUC=0.585839080087194
Epoch=6, Valid ROC AUC=0.6002795718094374
Epoch=7, Valid ROC AUC=0.672852361774025


Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f063634e680>
Traceback (most recent call last):
  File "/opt/conda/lib/python3.7/site-packages/torch/utils/data/dataloader.py", line 1203, in __del__
    self._shutdown_workers()
  File "/opt/conda/lib/python3.7/site-packages/torch/utils/data/dataloader.py", line 1177, in _shutdown_workers
    w.join(timeout=_utils.MP_STATUS_CHECK_INTERVAL)
  File "/opt/conda/lib/python3.7/multiprocessing/process.py", line 138, in join
    assert self._parent_pid == os.getpid(), 'can only join a child process'
AssertionError: can only join a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f063634e680>
Traceback (most recent call last):
  File "/opt/conda/lib/python3.7/site-packages/torch/utils/data/dataloader.py", line 1203, in __del__
    self._shutdown_workers()
  File "/opt/conda/lib/python3.7/site-packages/torch/utils/data/dataloader.py", line 1177, in _shutdown_workers
    w.join

Epoch=8, Valid ROC AUC=0.5801560740014424
Epoch=9, Valid ROC AUC=0.6138327269191186
