# Computer Aided Diagnosis: Diagnosis in dermoscopic images
## Valerio Di Sano, Zafar Toshpulatov, Antoine Merlet

![alt text](https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcQqOdINfzZ4LQu82vL_1PYgMLL8jISvCGTF5fY71zMr01weZ7gGdQ "UdG")


### Disclaimer: This Notebook aims to demonstrate our results on Skin Lesion Classification using Deep Learning.
### All data (Software, Model weights, Packages) should be installed and organized as stated at the end of this file.


## 1. Imports

In [0]:
import argparse
import os
import shutil
import time
import numpy as np
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.distributed as dist
import torch.optim
import torch.utils.data
import torch.utils.data.distributed
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torchvision.models as models
from tensorboardX import SummaryWriter

best_acc = 0 # "Global" holding the best accuracy reached
writer = SummaryWriter('runs')

## 2. Parameters

In [0]:
data_path = './'  # Path to the dataset
nb_classes = 2    # Number of classes

# Network
arch = 'vgg16_bn' # Architecture of the CNN. Use 'vgg16_bn', 'ResNet50' or 'GoogLeNet'
epochs = 300      # Number of epochs
checkpoint = 'D:\ProjectSkin\chall1\checkpoint.pth.tar'   # Load saved checkpoint
start_epoch = 0   # Starting epoch in checkpoint
batch_size = 28   # Number of data inputed into CNN at once. Change depending on GPU RAM

# Optimizer setting
optimizer = 'adam'    # Choice of the optimizer SGD or adam
learning_rate = 1e-5 # Initial optimizer Learning Rate (LR)
lr_decay_fact = 0.1  # Multiplier for learning rate reduction
lr_decay_time = 30    # Number of epochs before learning_rate * lr_decay_fact
momentum_SGD = 0.9   # Momentum for SGD
weight_d = 1e-5      # Weight decay (L2 penalty)

# Run type
evaluate = True  # Set to True to skip training and evaluate only
pretrained = True # set True to load Pytorch pretrained weights (ImageNet)



## 3. Tool functions

In [0]:
# --------------------------- Compile without reading is fine ----------------------------
class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count
        
def adjust_learning_rate(optimizer, epoch):
    """Decrease the Learning Rate by multiplier on given number of epoch"""
    lr = learning_rate * (lr_decay_fact ** (epoch // lr_decay_time))
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr
        
def accuracy(output, target, topk=(1,)): # TODOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOO
    """Computes the precision@k for the specified values of k"""
    batch_size = target.size(0)

    _, pred = output.topk(1, 1, True, True)
    pred = pred.t()
    correct = pred.eq(target.view(1, -1).expand_as(pred))

    res = []
    for k in topk:
        correct_k = correct[:k].view(-1).float().sum(0)
        res.append(correct_k.mul_(100.0 / batch_size))
    return res

def save_checkpoint(state, is_best, filename='checkpoint.pth.tar'): # TODO export in folder with all param
    torch.save(state, filename)
    if is_best: # if new best model, export weights
        shutil.copyfile(filename, 'best_model.pth.tar')


## 4. Train function

In [0]:
def train(train_loader, model, criterion, optimizer, epoch):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    acc = AverageMeter()

    # switch to train mode
    model.train()

    end = time.time()
    running_loss = 0.0
    running_corrects = 0
    
    for i, (input, target) in enumerate(train_loader):
        # measure data loading time
        data_time.update(time.time() - end)

        target = target.cuda(async=True)
        input_var = torch.autograd.Variable(input)
        target_var = torch.autograd.Variable(target)

        # compute gradient and do  step
        optimizer.zero_grad()
        
        with torch.set_grad_enabled(True):
            # compute output
            output = model(input_var)
            loss = criterion(output, target_var)
            
            _, preds = torch.max(output, 1)
            
            loss.backward()
            optimizer.step()
        
        # measure accuracy and record loss
        prec1 = accuracy(output.data, target, topk=(1, 1))
        losses.update(loss.data[0], input.size(0))
        acc.update(prec1[0], input.size(0))


        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % 10 == 0:
            print('Epoch: [{0}][{1}/{2}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  'Accuracy {acc.val:.3f} ({acc.avg:.3f})\t'.format(
                   epoch, i, len(train_loader), batch_time=batch_time,
                   data_time=data_time, loss=losses, acc=acc))
            niter = epoch*len(train_loader)+i
            writer.add_scalar('Train/Loss', losses.val, niter)
            writer.add_scalar('Train/Accuracy', acc.val, niter)


## 5. Validate Function

In [0]:
def validate(val_loader, model, criterion, epoch):
    batch_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()

    # switch to evaluate mode
    model.eval()

    end = time.time()
    for i, (input, target) in enumerate(val_loader):
        target = target.cuda(async=True)
        
        with torch.set_grad_enabled(False):
            input_var = torch.autograd.Variable(input)
            target_var = torch.autograd.Variable(target)
    
            # compute output
            output = model(input_var)
            loss = criterion(output, target_var)
    
            # measure accuracy and record loss
            best_acc = accuracy(output.data, target, topk=(1, 1))
            losses.update(loss.data[0], input.size(0))
            top1.update(best_acc[0], input.size(0))
    
            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()
    
            if i % 10 == 0:
                print('Validation: [{0}/{1}]\t'
                      'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                      'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                      'Accuracy {top1.val:.3f} ({top1.avg:.3f})\t'.format(
                       i, len(val_loader), batch_time=batch_time, loss=losses,
                       top1=top1))
    print(' * Accuracy {top1.avg:.3f}'
          .format(top1=top1))
    writer.add_scalar('Validation/Loss', losses.avg, epoch)
    writer.add_scalar('Validation/Accuracy', top1.avg, epoch)

    return top1.avg

## Test Fucntion

In [0]:
def test_all(tets_loader, model, criterion, epoch):
    # switch to evaluate mode
    model.eval()
    out = np.zeros(len(tets_loader.dataset.imgs))
    end = time.time()
    for i, (input,target) in enumerate(tets_loader):
        with torch.set_grad_enabled(False):
            input_var = torch.autograd.Variable(input)
    
            # compute output
            output = model(input_var)
            output = output.data.cpu().numpy()[0]
            out[i] = output.argmax(axis=0)
    return out

## 6. Initialize network

In [0]:
# Load architecture
if arch == 'vgg16_bn':
    print("Using VGG16_bn (batch normalization)")
    model = models.vgg16_bn(pretrained=True)    # Load given model with pretrained weigths (download if needed)
    num_ftrs = model.classifier[6].in_features  # Get number of output of the second last layer
    model.classifier[6] = nn.Linear(num_ftrs,nb_classes)   # Reset last layer weights, change number of output
    model.features = torch.nn.DataParallel(model.features) # Needed for local processing
    model.cuda() # Transfer model to GPU
elif arch == 'ResNet50':
    model = models.ResNet50(pretrained=True) # TODOOOOOOOOOOOO
    torch.nn.DataParallel(model).cuda() # Transfer model to GPU
elif arch == 'GoogLeNet':
    model = models.GoogLeNet(pretrained=True) # TODOOOOOOOOOOOO
    torch.nn.DataParallel(model).cuda() # Transfer model to GPU
else :
    print('Error: Unrecognized architecture. Exiting...') 

Using VGG16_bn (batch normalization)


In [0]:
# Define working tools
if optimizer == 'SGD' :
    optimizer = torch.optim.SGD(model.parameters(), learning_rate, momentum=momentum_SGD, weight_decay=weight_d)
elif optimizer == 'adam':
    optimizer = torch.optim.Adam(model.parameters(), learning_rate, weight_decay=weight_d) # TODOOOOOOOOOOOOOOOOOOOO
    
criterion = nn.CrossEntropyLoss().cuda()

cudnn.benchmark = True

In [0]:
# Load training checkpoint
if checkpoint: 
    if os.path.isfile(checkpoint):
        data = torch.load(checkpoint)   # get the file
        start_epoch = data['epoch']     # load previous epoch
        best_acc = data['best_acc']     # load previous best accuracy
        model.load_state_dict(data['state_dict'])    # load previous weights
        optimizer.load_state_dict(data['optimizer']) # loadprevious optimiwzer stat
        print("Loaded checkpoint '{}' (epoch {})".format(checkpoint, data['epoch']))
    else:
        print("No checkpoint at '{}'".format(checkpoint))

Loaded checkpoint 'D:\ProjectSkin\chall1\checkpoint.pth.tar' (epoch 112)


## 7. Data Loading

In [0]:
train_dir = os.path.join(data_path, 'train') # Get train data folder

train_dataset = datasets.ImageFolder( # Prepare training data
    train_dir,
    transforms.Compose([
        transforms.RandomSizedCrop(224),   # randomly crop images to fit ImageNet input size
        transforms.RandomHorizontalFlip(), # data augmentation
        transforms.ToTensor(), 
        transforms.Normalize( # Setup normalization according to ImageNet
            mean=[0.485, 0.456, 0.406], 
            std=[0.229, 0.224, 0.225]),
    ]))

train_loader = torch.utils.data.DataLoader( # Define loading schem
    train_dataset, 
    batch_size=batch_size, # Number of images per batch
    shuffle=True,          # Shuffle data order on each epoch
    pin_memory=True)       # Use CUDA pinned memory for tensors

In [0]:
val_dir = os.path.join(data_path, 'val') # Get validation data folder

val_dataset = datasets.ImageFolder( # Prepare validation data
    val_dir, 
    transforms.Compose([
        transforms.Scale(256),
        transforms.CenterCrop(224), # Crop to fit input size. ROI assumed at center
        transforms.ToTensor(), 
        transforms.Normalize( # Setup normalization according to ImageNet
            mean=[0.485, 0.456, 0.406], 
            std=[0.229, 0.224, 0.225]),
    ]))

val_loader = torch.utils.data.DataLoader(
    val_dataset,
    batch_size=batch_size, 
    shuffle=False, # Explicitly reminded: do not shuffle for validation (for stats)
    pin_memory=True)

In [0]:
test_dir = os.path.join(data_path,'test') # Get validation data folder

test_dataset = datasets.ImageFolder( # Prepare test data
    test_dir, 
    transforms.Compose([
        transforms.Scale(256),
        transforms.CenterCrop(224), # Crop to fit input size. ROI assumed at center
        transforms.ToTensor(), 
        transforms.Normalize( # Setup normalization according to ImageNet
            mean=[0.485, 0.456, 0.406], 
            std=[0.229, 0.224, 0.225]),
    ]))

test_loader = torch.utils.data.DataLoader(
    test_dataset,
    batch_size=1, 
    shuffle=False, # Explicitly reminded: do not shuffle for validation (for stats)
    pin_memory=True)

## 8. Train the network

In [0]:
# --------- Evaluate only -------------
if evaluate:
    validate(evaluate, model, criterion, 1)
# -------------------------------------

In [0]:
for epoch in range(start_epoch, epochs):
    
    adjust_learning_rate(optimizer, epoch)
    train(train_loader, model, criterion, optimizer, epoch)
    acc = validate(val_loader, model, criterion, epoch)

    is_best = acc > best_acc # is new accuracy global best
    best_acc = max(acc, best_acc) # update best accuracy if needed
    
    save_checkpoint({
        'epoch': epoch + 1,
        'arch': arch,
        'state_dict': model.state_dict(),
        'best_acc': best_acc,
        'optimizer' : optimizer.state_dict(),
    }, is_best)

## TESTING

In [0]:
# --------- Evaluate only -------------
result = test_all(test_loader, model, criterion, 1)
np.savetxt("test_1.csv", result, delimiter=",")

In [0]:
np.sum((result==0))

344

# Requirements

#### Software and packages:
* Python 3.6
* CUDA 9.0
* cuDNN 7.4.1

* Pytorch 0.4.0
* Torchvision 

#### Model weigths:
If not given with this Notebook, please download them (1.10GB) here: https://drive.google.com/open?id=14mzlsTjZf4p-ihovbOTjyG_r1dZ2-alb   (not uploaded yet)