In [None]:
# Install prerequisites

# Install PyCocoTools needed for FasterRCNN
!pip install git+https://github.com/gautamchitnis/cocoapi.git@cocodataset-master#subdirectory=PythonAPI

# Copy useful functions from pytorch vision tools
%cp ../input/pytorch-vision-tools/references/detection/*.* .

In [None]:
# Import required libraries
import numpy as np
import pandas as pd
from torch.utils.data import Dataset, DataLoader, Subset
from PIL import Image, ImageFont, ImageDraw, ImageEnhance
import torchvision
from torchvision import transforms
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from os import path
import torch
import torchvision.utils
import utils
from engine import train_one_epoch, evaluate
from torch.optim.lr_scheduler import CosineAnnealingLR
from torch.optim import Adam
import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2
import pickle
import time
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
class_dictionary = {
    0: 'No finding',
    1: 'Aortic enlargement',
    2: 'Atelectasis',
    3: 'Calcification',
    4: 'Cardiomegaly',
    5: 'Consolidation',
    6: 'ILD',
    7: 'Infiltration',
    8: 'Lung Opacity',
    9: 'Nodule/Mass',
    10: 'Other lesion',
    11: 'Pleural effusion',
    12: 'Pleural thickening',
    13: 'Pneumothorax',
    14: 'Pulmonary fibrosis'
}

In [None]:
# Define a dataset class which defines how to load images,targets for training and validation
class VinBigDataset(Dataset):
    def __init__(self, img_dir, df, transforms, class_id=None):
        self.img_dir = img_dir
        if class_id:
            df = df[df["class_id"] == class_id]
        self.df = df
        self.imgs = df["image_id"].unique()
        self.transforms = transforms
        
    def __len__(self):
        # Return the number of elements in the dataset
        return len(self.imgs)
    
    def __getitem__(self, idx):
        # The dataset iterates over each image id
        # Return the requested image,target from the dataset
        
        # Get the id of the current image
        img_id = self.imgs[idx]
        
        # Get the rows containing annotations for this image
        data_rows = self.df[self.df["image_id"] == img_id]
        boxes = data_rows[["x_min", "y_min", "x_max", "y_max"]].values
        
        # Convert into a torch.Tensor
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        
        # Load the image
        img_path = path.join(self.img_dir, f"{img_id}.png")
        img = Image.open(img_path).convert("RGB")
        
        # Compute the area of the annotated box
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:,0])
        
        # Suppose all instances are not crowd (?)
        num_objs = len(boxes)
        iscrowd = torch.zeros((num_objs,), dtype=torch.int64)
        
        labels = torch.tensor(data_rows["class_id"].values, dtype=torch.int64)
        
        # Define the target for this training data point
        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        target["image_id"] = torch.tensor([idx])
        target["area"] = area
        target["iscrowd"] = iscrowd
        
        if self.transforms is not None:
            to_transform = {
                'image': np.array(img),
                'bboxes': target['boxes'],
                'labels': labels
            }
            transformed = self.transforms(**to_transform)
            img = transformed['image']    
            target['boxes'] = torch.tensor(transformed['bboxes'])
        
#         if self.transforms is not None:
#             img = self.transforms(img)
        
        return img, target

In [None]:
# Function to create transforms for preprocessing
def create_transforms(train):
    if train:
        return A.Compose([
            A.Normalize(mean=(0, 0, 0), std=(1, 1, 1), max_pixel_value=255.0, p=1.0),
            ToTensorV2(p=1.0)
        ])
    else:
        return A.Compose([
            A.Normalize(mean=(0, 0, 0), std=(1, 1, 1), max_pixel_value=255.0, p=1.0),
            ToTensorV2(p=1.0)
        ])
#     transforms.Compose([
#             transforms.ToTensor(),
#        #     transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
#         ])

In [None]:
# Function to create an instance of the model
def create_model():
    # The model has 15 classes (14 abnormalities and no abnormalities)
    num_classes = 15
    
    # Use resnet50 pre-trained on COCO
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
        
    # Fetch the number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    
    # Replace the pre-trained head with a new one
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
   
    return model

# Load training data

In [None]:
# Load the training data csv file
data_dir = "../input/vinbig1024stratified/"

df_train = pd.read_csv(f"../input/vinbig1024stratified-anns-removed/train_sup.csv")
df_train.fillna(0, inplace=True)
df_train.loc[df_train["class_id"] == 14, ['x_max', 'y_max']] = 1.0

# FasterRCNN handles class_id==0 as the background.
df_train["class_id"] = df_train["class_id"] + 1
df_train.loc[df_train["class_id"] == 15, ["class_id"]] = 0

# Print out the total number of images and the total number of annotations 
print(f"{df_train['image_id'].nunique()} images")
print(f"{len(df_train.index)} annotations")

In [None]:
df_train_0 = df_train[df_train['class_id'] == 0]
df_train_no_0 = df_train[df_train['class_id'] != 0]
df_train_0_no_duplicates = df_train_0.drop_duplicates(subset=['image_id', 'class_id'], keep='first')
df_train = df_train_no_0.append(df_train_0_no_duplicates, ignore_index=True)
df_train.reset_index(drop=True, inplace=True)
df_train.head(10)

In [None]:
# df_train_14_no_duplicates = df_train_14.drop_duplicates(subset=['image_id', 'class_id'], keep='first')
# df_train = df_train_no14.append(df_train_14_no_duplicates, ignore_index=True)

In [None]:
# Create an instance of the dataset and transformations for training and validation

train_img_dir = '../input/vinbig1024stratified/output/dataset/train'
# train_img_dir = path.join(data_dir, 'train/train')
train_dataset = VinBigDataset(train_img_dir, df_train, create_transforms(train=True))

# Create data loaders for the training set. The collate function
# defines how to form a minibatch from the indiviaual data items. In our case we
# just want to collate them into a single list.

data_loader_train = DataLoader(\
    train_dataset, batch_size=5, shuffle=True, num_workers=4, collate_fn=utils.collate_fn)

# Print the number of elements in the test and training set
print(f"{len(train_dataset)} items in the training set")

# Load validation data

In [None]:
# Load the VALIDATION data csv file
data_dir = "../input/vinbig1024stratified/"

df_val = pd.read_csv(f"../input/vinbig1024stratified-anns-removed/val_sup.csv")
df_val.fillna(0, inplace=True)
df_val.loc[df_val["class_id"] == 14, ['x_max', 'y_max']] = 1.0

# FasterRCNN handles class_id==0 as the background.
df_val["class_id"] = df_val["class_id"] + 1
df_val.loc[df_val["class_id"] == 15, ["class_id"]] = 0

# Print out the total number of images and the total number of annotations 
print(f"{df_val['image_id'].nunique()} images")
print(f"{len(df_val.index)} annotations")

In [None]:
# Don't remove no abnormality
# df_val = df_val[df_val['class_id'] != 0]
# df_val.reset_index(drop=True, inplace=True)

df_val_0 = df_val[df_val['class_id'] == 0]
df_val_no_0 = df_val[df_val['class_id'] != 0]
df_val_0_no_duplicates = df_val_0.drop_duplicates(subset=['image_id', 'class_id'], keep='first')
df_val = df_val_no_0.append(df_val_0_no_duplicates, ignore_index=True)
df_val.reset_index(drop=True, inplace=True)
df_val.head(10)

In [None]:
# Create an instance of the dataset and transformations for training and validation

val_img_dir = '../input/vinbig1024stratified/output/dataset/validation/'
# train_img_dir = path.join(data_dir, 'train/train')
val_dataset = VinBigDataset(val_img_dir, df_val, create_transforms(train=False))

# Create data loaders for the training set. The collate function
# defines how to form a minibatch from the indiviaual data items. In our case we
# just want to collate them into a single list.

data_loader_val = DataLoader(val_dataset, batch_size=5, shuffle=False, num_workers=4, collate_fn=utils.collate_fn)

# Print the number of elements in the test and training set
print(f"{len(val_dataset)} items in the validation set")

# Run the notebook

In [None]:
model = create_model()
use_pre_trained = True

In [None]:
train_dataset[3]

In [None]:
# image = Image.open("../input/vinbig1024stratified/output/dataset/train/0007d316f756b3fa0baea2ff514ce945.png")
# image

In [None]:
# details = df_train[df_train['image_id'] == '0007d316f756b3fa0baea2ff514ce945']
# details

In [None]:
# from PIL import Image
# from PIL import ImageDraw
# draw = ImageDraw.Draw(image)
# for i in range(len(details)):
#     draw.rectangle([(details[i:i+1]['x_min'], details[i:i+1]['y_min']), (details[i:i+1]['x_max'], details[i:i+1]['y_max'])], outline ="red", width=3)
# image

In [None]:
# Train the model
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

model.to(device)

# Hyperparameters
learning_rate = 0.0001
weight_decay = 0.0001
num_epochs = 25

params = [p for p in model.parameters() if p.requires_grad]

optimizer = Adam(params, lr=learning_rate, weight_decay=weight_decay)
lr_scheduler = CosineAnnealingLR(optimizer, T_max=num_epochs, eta_min=0.00002)

checkpoint = torch.load("../input/model14/model_14.bin")
model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
last_epoch = checkpoint['epoch']
model.train()
print('Epoch = ', last_epoch)
# Create a folder for saving the model weights
%mkdir model
%mkdir stats

train_meters = []
coco_evals = []
t_start = time.time()

for epoch in range(last_epoch+1, num_epochs):
    # Train over the epoch
    logger = train_one_epoch(model, optimizer, data_loader_train, device, epoch, print_freq=100)
    train_meters.append(logger.meters)
    
    # Evaluate on the validation set
    coco_eval = evaluate(model, data_loader_val, device)
    coco_evals.append(coco_eval.coco_eval)
    
    # Update the learning rate
    lr_scheduler.step()
    
    # Save the model weights at every epoch
    torch.save({
        'epoch': epoch,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict()
        }, f"model/model_{epoch}.bin")
    
    # Save the stats at every epoch from epoch 10
    if epoch > 15:
        pickle.dump(train_meters, open(f"stats/train_stats_{epoch}.pkl", 'wb'))
        pickle.dump(coco_evals, open(f"stats/val_stats_{epoch}.pkl", 'wb'))

print('TRAINING AND VALIDATION DONE')
t_end = time.time()
print(f"Training took {t_end - t_start}")

t_start = time.time()
# Validation statistics for every class at the end
coco_evals_class = []
for class_id in range(0, 15):
    class_val_dataset = VinBigDataset(val_img_dir, df_val, create_transforms(train=False), class_id=class_id)

    data_loader_class_val = DataLoader(class_val_dataset, batch_size=5, shuffle=False, num_workers=4, collate_fn=utils.collate_fn)
    
    if len(class_val_dataset) == 0:
        continue
        
    coco_eval_class = evaluate(model, data_loader_class_val, device)
    coco_evals_class.append(coco_eval_class.coco_eval)

t_end = time.time()
print(f"Validation for every class took {t_end - t_start}")

# Save the model weights
torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict()
            }, f"model/model.bin")

# Save the stats
pickle.dump(train_meters, open(f"stats/train_stats.pkl", 'wb'))
pickle.dump(coco_evals, open(f"stats/val_stats.pkl", 'wb'))
pickle.dump(coco_evals_class, open(f"stats/val_stats_classes.pkl", 'wb'))

In [None]:
%%bash 
zip -r -q stats.zip stats/*.*

# Plot statistics

In [None]:
# Functions for getting and plotting training stats

def get_train_stats(name, stats):
    return [s[name].value for s in stats]

def plot_train_stats(train_stats, name):
    loss = get_train_stats('loss', train_stats)
    loss_classifier = get_train_stats('loss_classifier', train_stats)
    loss_box_reg = get_train_stats('loss_box_reg', train_stats)
    loss_objectness = get_train_stats('loss_objectness', train_stats)
    loss_rpn_box_reg = get_train_stats('loss_rpn_box_reg', train_stats)
    # Plot the training stats
    fig = plt.figure(figsize=(12,8))
    plt.plot(loss, label='loss')
    plt.plot(loss_classifier, label='loss_classifier')
    plt.plot(loss_box_reg, label='loss_box_reg')
    plt.plot(loss_objectness, label='loss_objectness')
    plt.plot(loss_rpn_box_reg, label='loss_rpn_box_reg')
    plt.legend()
    plt.xticks(range(0, len(loss), 5))
    plt.xlabel('Epoch')
    plt.title(f'Training stats {name}')

In [None]:
# Functions for getting and plotting validation stats

def get_val_stats(idx, stats):
    return [s['bbox'].stats[idx] for s in stats]

def plot_val_stats(val_stats, name):
    ap_50_95 = get_val_stats(0, val_stats)
    ap_50 = get_val_stats(1, val_stats)
    ap_75 = get_val_stats(2, val_stats)
    ar_all_md100 = get_val_stats(8, val_stats)

     # Plot the validation stats
    fig = plt.figure(figsize=(12,8))
    plt.plot(ap_50_95, label='AP .50:.95')
    plt.plot(ap_50, label='AP .50')
    plt.plot(ap_75, label='AP .75')
    plt.plot(ar_all_md100, label='AR All Max Dets 100')
    plt.legend()
    plt.xticks(range(0, len(ap_50), 5))
    plt.xlabel('Epoch')
    plt.title(f'Validation stats {name}')

In [None]:
# Plot the stats
plot_train_stats(train_meters, 'baseline')
plot_val_stats(coco_evals, 'baseline')

In [None]:
# for i in range(15):
#     plot_val_stats(coco_evals_class[i], f'baseline class {i}')

In [None]:
# model.eval()
# for images, targets in metric_logger.log_every(data_loader_train, 10, header):
#     images = list(image.to(device) for image in images)
#     targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
#     outs = model(images)
#     break
# print(outs)
# print(targets)

# Draw prediction

In [None]:
# Train the model
# device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# model.to(device)

# Hyperparameters
# learning_rate = 0.0001
# weight_decay = 0
# num_epochs = 25
# params = [p for p in model.parameters() if p.requires_grad]
# optimizer = Adam(params, lr=learning_rate, weight_decay=weight_decay)
# lr_scheduler = CosineAnnealingLR(optimizer, T_max=num_epochs, eta_min=0.00002)

# checkpoint = torch.load("../input/model24/model_24.bin")
# model.load_state_dict(checkpoint['model_state_dict'])
# optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
# last_epoch = checkpoint['epoch']
# model.eval()
# if torch.cuda.is_available():
#     model.cuda()

In [None]:
loaded_model = create_model()
checkpoint = torch.load("../input/model14/model_14.bin")
loaded_model.load_state_dict(checkpoint['model_state_dict'])

%mkdir image_predictions

font = ImageFont.truetype('../input/arial-font/arial.ttf', 16)

# for idx in range(10):
idx = 4
img, _ = val_dataset[idx]
label_boxes = np.array(val_dataset[idx][1]["boxes"])
#put the model in evaluation mode
loaded_model.eval()
with torch.no_grad():
    prediction = loaded_model([img])
image = Image.fromarray(img.mul(255).permute(1, 2, 0).byte().numpy())
draw = ImageDraw.Draw(image, "RGBA")

# draw groundtruth
for elem in range(len(label_boxes)):
    draw.rectangle([(label_boxes[elem][0], label_boxes[elem][1]), (label_boxes[elem][2], label_boxes[elem][3])], outline=(78, 186, 74), width=5)
#     draw.text((label_boxes[elem][0], label_boxes[elem][1]-20), text=class_dictionary[val_dataset[idx][1]["labels"][elem].item()], font=font)

for element in range(len(prediction[0]["boxes"])):
    boxes = prediction[0]["boxes"][element].cpu().numpy()
    score = np.round(prediction[0]["scores"][element].cpu().numpy(), decimals= 3)
    if score > 0.4:
        draw.rectangle([(boxes[0], boxes[1]), (boxes[2], boxes[3])], outline=(235, 64, 52), width=3)
#         if (boxes[1] + 100 < boxesp[1])
        score = "{:.3f}".format(score)
        text = f'{class_dictionary[prediction[0]["labels"][element].item()]} {score}'
        size_width, _ = draw.textsize(text, font)
        draw.rectangle([(boxes[0], boxes[1]-20), (boxes[0] + size_width, boxes[1])], fill=(235, 64, 52, 127))
        draw.text((boxes[0], boxes[1]-20), text=text, font=font)

# image_name = df_val.at[idx, 'image_id']
# image.save(f"image_predictions/{image_name}.jpg")
image

In [None]:
idx = 5
img, _ = val_dataset[idx]
label_boxes = np.array(val_dataset[idx][1]["boxes"])
#put the model in evaluation mode
loaded_model.eval()
with torch.no_grad():
    prediction = loaded_model([img])
image = Image.fromarray(img.mul(255).permute(1, 2, 0).byte().numpy())
draw = ImageDraw.Draw(image, "RGBA")

# draw groundtruth
for elem in range(len(label_boxes)):
    draw.rectangle([(label_boxes[elem][0], label_boxes[elem][1]), (label_boxes[elem][2], label_boxes[elem][3])], outline=(78, 186, 74), width=5)
#     draw.text((label_boxes[elem][0], label_boxes[elem][1]-20), text=class_dictionary[val_dataset[idx][1]["labels"][elem].item()], font=font)

for element in range(len(prediction[0]["boxes"])):
    boxes = prediction[0]["boxes"][element].cpu().numpy()
    score = np.round(prediction[0]["scores"][element].cpu().numpy(), decimals= 3)
    if score > 0.4:
        draw.rectangle([(boxes[0], boxes[1]), (boxes[2], boxes[3])], outline=(235, 64, 52), width=3)
#         if (boxes[1] + 100 < boxesp[1])
        score = "{:.3f}".format(score)
        text = f'{class_dictionary[prediction[0]["labels"][element].item()]} {score}'
        size_width, _ = draw.textsize(text, font)
        draw.rectangle([(boxes[0], boxes[1]-20), (boxes[0] + size_width, boxes[1])], fill=(235, 64, 52, 127))
        draw.text((boxes[0], boxes[1]-20), text=text, font=font)

# image_name = df_val.at[idx, 'image_id']
# image.save(f"image_predictions/{image_name}.jpg")
image

In [None]:
idx = 6
img, _ = val_dataset[idx]
label_boxes = np.array(val_dataset[idx][1]["boxes"])
#put the model in evaluation mode
loaded_model.eval()
with torch.no_grad():
    prediction = loaded_model([img])
image = Image.fromarray(img.mul(255).permute(1, 2, 0).byte().numpy())
draw = ImageDraw.Draw(image, "RGBA")

# draw groundtruth
for elem in range(len(label_boxes)):
    draw.rectangle([(label_boxes[elem][0], label_boxes[elem][1]), (label_boxes[elem][2], label_boxes[elem][3])], outline=(78, 186, 74), width=5)
#     draw.text((label_boxes[elem][0], label_boxes[elem][1]-20), text=class_dictionary[val_dataset[idx][1]["labels"][elem].item()], font=font)

for element in range(len(prediction[0]["boxes"])):
    boxes = prediction[0]["boxes"][element].cpu().numpy()
    score = np.round(prediction[0]["scores"][element].cpu().numpy(), decimals= 3)
    if score > 0.4:
        draw.rectangle([(boxes[0], boxes[1]), (boxes[2], boxes[3])], outline=(235, 64, 52), width=3)
#         if (boxes[1] + 100 < boxesp[1])
        score = "{:.3f}".format(score)
        text = f'{class_dictionary[prediction[0]["labels"][element].item()]} {score}'
        size_width, _ = draw.textsize(text, font)
        draw.rectangle([(boxes[0], boxes[1]-20), (boxes[0] + size_width, boxes[1])], fill=(235, 64, 52, 127))
        draw.text((boxes[0], boxes[1]-20), text=text, font=font)

# image_name = df_val.at[idx, 'image_id']
# image.save(f"image_predictions/{image_name}.jpg")
image

In [None]:
idx = 7
img, _ = val_dataset[idx]
label_boxes = np.array(val_dataset[idx][1]["boxes"])
#put the model in evaluation mode
loaded_model.eval()
with torch.no_grad():
    prediction = loaded_model([img])
image = Image.fromarray(img.mul(255).permute(1, 2, 0).byte().numpy())
draw = ImageDraw.Draw(image, "RGBA")

# draw groundtruth
for elem in range(len(label_boxes)):
    draw.rectangle([(label_boxes[elem][0], label_boxes[elem][1]), (label_boxes[elem][2], label_boxes[elem][3])], outline=(78, 186, 74), width=5)
#     draw.text((label_boxes[elem][0], label_boxes[elem][1]-20), text=class_dictionary[val_dataset[idx][1]["labels"][elem].item()], font=font)

for element in range(len(prediction[0]["boxes"])):
    boxes = prediction[0]["boxes"][element].cpu().numpy()
    score = np.round(prediction[0]["scores"][element].cpu().numpy(), decimals= 3)
    if score > 0.4:
        draw.rectangle([(boxes[0], boxes[1]), (boxes[2], boxes[3])], outline=(235, 64, 52), width=3)
#         if (boxes[1] + 100 < boxesp[1])
        score = "{:.3f}".format(score)
        text = f'{class_dictionary[prediction[0]["labels"][element].item()]} {score}'
        size_width, _ = draw.textsize(text, font)
        draw.rectangle([(boxes[0], boxes[1]-20), (boxes[0] + size_width, boxes[1])], fill=(235, 64, 52, 127))
        draw.text((boxes[0], boxes[1]-20), text=text, font=font)

# image_name = df_val.at[idx, 'image_id']
# image.save(f"image_predictions/{image_name}.jpg")
image

In [None]:
idx = 8
img, _ = val_dataset[idx]
label_boxes = np.array(val_dataset[idx][1]["boxes"])
#put the model in evaluation mode
loaded_model.eval()
with torch.no_grad():
    prediction = loaded_model([img])
image = Image.fromarray(img.mul(255).permute(1, 2, 0).byte().numpy())
draw = ImageDraw.Draw(image, "RGBA")

# draw groundtruth
for elem in range(len(label_boxes)):
    draw.rectangle([(label_boxes[elem][0], label_boxes[elem][1]), (label_boxes[elem][2], label_boxes[elem][3])], outline=(78, 186, 74), width=5)
#     draw.text((label_boxes[elem][0], label_boxes[elem][1]-20), text=class_dictionary[val_dataset[idx][1]["labels"][elem].item()], font=font)

for element in range(len(prediction[0]["boxes"])):
    boxes = prediction[0]["boxes"][element].cpu().numpy()
    score = np.round(prediction[0]["scores"][element].cpu().numpy(), decimals= 3)
    if score > 0.4:
        draw.rectangle([(boxes[0], boxes[1]), (boxes[2], boxes[3])], outline=(235, 64, 52), width=3)
#         if (boxes[1] + 100 < boxesp[1])
        score = "{:.3f}".format(score)
        text = f'{class_dictionary[prediction[0]["labels"][element].item()]} {score}'
        size_width, _ = draw.textsize(text, font)
        draw.rectangle([(boxes[0], boxes[1]-20), (boxes[0] + size_width, boxes[1])], fill=(235, 64, 52, 127))
        draw.text((boxes[0], boxes[1]-20), text=text, font=font)

# image_name = df_val.at[idx, 'image_id']
# image.save(f"image_predictions/{image_name}.jpg")
image