# Adversarial Attack and Detection in Medical Images using Deap Learning

## Imports
Import common libraries.

In [1]:
import numpy as np
import torch
import matplotlib.pyplot as plt
%load_ext autoreload
%autoreload 2

## Load non-attacked and attacked images

#### Settings

In [2]:
ATT_LOAD_PARTITION_ROOT = './attacked_images_case_2/'      # Path to load images from
ATT_LOAD_PARTITION = './attacked_images_case_2/2021-11-25' # Path to load .csv files from
BATCH_SIZE = 16                                            # Batch size to to use
NUM_WORKERS = 1                                            # Number of workers to use
IMAGE_SIZE = 400                                           # Image size to use

#### Load images

In [3]:
import albumentations as album
from albumentations.pytorch import ToTensorV2
from data import LoadDatasetFromCSV
from torch.utils.data import DataLoader, ConcatDataset



def concat_datasets(dataset, dataset_attacked):
    """
    Concatenate non-attacked images and attacked images to a dataset, with true labels from non-attacked images copied 
    to attacked dataset also.
    """
    cat_dataset = []
    for i, (org, att) in enumerate(zip(dataset, dataset_attacked)):
        img_org = org[0]
        img_att = att[0]
        true_lbl = org[1]
        # Use true label for both sicne we want to retrain model to classify attacked images to
        cat_dataset.append(tuple([img_org, true_lbl]))
        cat_dataset.append(tuple([img_att, true_lbl]))
    return cat_dataset
        


# Pre-computed mean and std
data_mean = torch.tensor([0.7750, 0.5888, 0.7629])
data_std = torch.tensor([0.2129, 0.2971, 0.1774])

# Resize images and rescale values
album_compose = album.Compose([
    album.Resize(IMAGE_SIZE, IMAGE_SIZE),                                          # Resize to IMAGE_SIZE x IMAGE_SIZE
    album.Normalize(mean=[0.0,0.0,0.0], std=[1.0,1.0,1.0], max_pixel_value=255.0), # Rescale values from [0,255] to [0,1]
    album.Normalize(mean=data_mean, std=data_std, max_pixel_value=1.0),            # Rescale values according to above
    ToTensorV2(),
])



# Load datasets for training and test
dataset_attacks_train   = LoadDatasetFromCSV(image_root=ATT_LOAD_PARTITION_ROOT,
                                             csv_path=ATT_LOAD_PARTITION+'/attacked_images/data_labels_train.csv',
                                             transforms=album_compose)
dataset_attacks_test    = LoadDatasetFromCSV(image_root=ATT_LOAD_PARTITION_ROOT,
                                             csv_path=ATT_LOAD_PARTITION+'/attacked_images/data_labels_test.csv',
                                             transforms=album_compose)
dataset_originals_train = LoadDatasetFromCSV(image_root=ATT_LOAD_PARTITION_ROOT,
                                             csv_path=ATT_LOAD_PARTITION+'/original_images/data_labels_train.csv',
                                             transforms=album_compose)
dataset_originals_test  = LoadDatasetFromCSV(image_root=ATT_LOAD_PARTITION_ROOT,
                                             csv_path=ATT_LOAD_PARTITION+'/original_images/data_labels_test.csv',
                                             transforms=album_compose)

# Concatenate training and test sets with both non-attacked images and attacked, with true non-attacked image for both
# Used for re-training the model later
dataset_train = concat_datasets(dataset_originals_train, dataset_attacks_train)
dataset_test  = concat_datasets(dataset_originals_test,  dataset_attacks_test)



# Load data into loaders (attack sets have label to what they are predicted to)
dataloader_attacks_train   = DataLoader(dataset=dataset_attacks_train,
                                        batch_size=BATCH_SIZE, num_workers=NUM_WORKERS, shuffle=False)
dataloader_attacks_test    = DataLoader(dataset=dataset_attacks_test,
                                        batch_size=BATCH_SIZE, num_workers=NUM_WORKERS, shuffle=False)
dataloader_originals_train = DataLoader(dataset=dataset_originals_train,
                                        batch_size=BATCH_SIZE, num_workers=NUM_WORKERS, shuffle=False)
dataloader_originals_test  = DataLoader(dataset=dataset_originals_test,
                                        batch_size=BATCH_SIZE, num_workers=NUM_WORKERS, shuffle=False)

# Create dataloaders for train and test data
dataloader_train           = DataLoader(dataset=dataset_train,
                                        batch_size=BATCH_SIZE, num_workers=NUM_WORKERS, shuffle=False)
dataloader_test            = DataLoader(dataset=dataset_test,
                                        batch_size=BATCH_SIZE, num_workers=NUM_WORKERS, shuffle=False)

## Load classifier model and classify non-attacked and attacked-images

#### Settings

In [4]:
MODEL_NAME = 'efficientnet-b0'
INIT_LR = 0.01
LOAD_PATH = './checkpoints/2021-11-05_400x400_100/models/last.pt'

#### Load and and classify on non-attacked and attacked images

In [5]:
def test_dataloader(dataloader, model, device, loss_function, optimizer):
    test_acc, test_loss, misclassified_images, misclassified_labels, correct_labels = \
        classify.test(dataloader, model, device, loss_function, optimizer)
    print('  Accuracy:   '+str(np.around(100*test_acc,3))+'%')

    
    
# Load model and setup
model = torch.load(LOAD_PATH)
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
model.to(device)
loss_function = torch.nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.Adam(lr=INIT_LR, params=model.parameters(), betas=(0.9, 0.99))
print('Loaded model:', LOAD_PATH)

Loaded model: ./checkpoints/2021-11-05_400x400_100/models/last.pt


In [34]:
import classify



# Classify all data sets
print('\nAttacks train')
test_dataloader(dataloader_attacks_train, model, device, loss_function, optimizer)
print('\nAttacks test')
test_dataloader(dataloader_attacks_test, model, device, loss_function, optimizer)
print('\nNon-attacks train')
test_dataloader(dataloader_originals_train, model, device, loss_function, optimizer)
print('\nNon-attacks test')
test_dataloader(dataloader_originals_test, model, device, loss_function, optimizer)
print('\nConcatenated train data')
test_dataloader(dataloader_train, model, device, loss_function, optimizer)
print('\nConcatenated test data')
test_dataloader(dataloader_test, model, device, loss_function, optimizer)

Loaded model: ./checkpoints/2021-11-05_400x400_100/models/last.pt

Attacks train
  Accuracy:   100.0%

Attacks test
  Accuracy:   100.0%

Non-attacks train
  Accuracy:   95.699%

Non-attacks test
  Accuracy:   97.5%

Concatenated train data
  Accuracy:   47.849%

Concatenated test data
  Accuracy:   48.75%


## Retrain model

#### Settings

In [6]:
LOAD_MODEL_RETRAIN = False

MODEL_NAME_RETRAIN = 'efficientnet-b0'
NUM_CLASSES_RETRAIN = 3
EPOCHS_RETRAIN = 10
INIT_LR_RETRAIN = 0.01
LOAD_PATH_RETRAIN = './checkpoints/2021-12-02_robustness/models/last.pt'
SAVE_PATH_RETRAIN = './checkpoints/2021-12-02_robustness'

#### Retrain model

In [7]:
import time
from datetime import datetime, timedelta
from utils import create_save_path
import classify
from models.efficientnet import EfficientNet
from torch.utils.tensorboard import SummaryWriter
import copy



# Initialize value for keeping track of "best" model with highest validation accuracy
best_monitor_value = 0



# Load trained model
if LOAD_MODEL_RETRAIN:
    
    # Setup
    retrained_model = torch.load(LOAD_PATH_RETRAIN)
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    retrained_model.to(device)
    loss_function = torch.nn.CrossEntropyLoss().to(device)
    optimizer = torch.optim.Adam(lr=INIT_LR_RETRAIN, params=retrained_model.parameters(), betas=(0.9, 0.99))
    
    # Print information
    print('Loaded model:', LOAD_PATH_RETRAIN)

    

# Train model
else:
    
    # Setup device, model, loss function, and optimizer
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    retrained_model = copy.deepcopy(model)
    retrained_model.to(device)
    loss_function = torch.nn.CrossEntropyLoss()
    loss_function.to(device)
    optimizer = torch.optim.Adam(lr=INIT_LR_RETRAIN, params=retrained_model.parameters(), betas=(0.9, 0.99))
    optimizer_step = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=EPOCHS_RETRAIN, eta_min=1e-5)

    # Create path for saving training log and models
    train_log_path, models_path = create_save_path(SAVE_PATH_RETRAIN)

    # Initialize log writer
    log_writer = SummaryWriter(train_log_path, comment=f'LR_{INIT_LR_RETRAIN}_BS_{BATCH_SIZE}')

    # Print starting time
    time_start = time.time()
    print('Started training:', str(datetime.now()))
    print('')
    
    valid_acc = 0
    
    # Train for each epoch
    for epoch in range(EPOCHS_RETRAIN):
        
        # Print current epoch
        print('Epoch {:<3}'.format(epoch+1), end='   ')
        
        # Train model
        train_acc, train_loss = classify.train(dataloader_train, retrained_model, device, loss_function, optimizer)
        print('Training accuracy: {:>3}%, loss: {:<5}'.format(
            np.around(100*train_acc, 2), np.around(train_loss, 5)), end='\n')

        # Optimize
        optimizer_step.step()
        lr = optimizer.param_groups[0]['lr']
        
        # We do not have validation set here
        valid_acc = valid_acc + 1

        # Save model if better than previously best model as well as the last model
        best_monitor_value = classify.save_model(model=retrained_model, epoch=epoch, best_monitor_value=best_monitor_value,
                                                 monitor_value=valid_acc, epochs=EPOCHS_RETRAIN, models_path=models_path)

        # Write to tensorboard log
        log_writer.add_scalar("Train/Training accuracy",   train_acc,  epoch)
        log_writer.add_scalar("Train/Training loss",       train_loss, epoch)
        log_writer.add_scalar("Train/Learning rate",       lr,         epoch)

    # Print time elapsed
    time_elapsed = time.time() - time_start
    print('')
    print('Ended:', str(datetime.now()))
    print('Total training time: {}'.format(str(timedelta(seconds=time_elapsed))))

Started training: 2021-12-02 16:57:42.983307

Epoch 1     Training accuracy: 90.32%, loss: 0.51468   Saved current model as best model.
Epoch 2     Training accuracy: 94.62%, loss: 0.19948   Epoch 3     Training accuracy: 97.85%, loss: 0.06842   Epoch 4     Training accuracy: 98.39%, loss: 0.04012   Epoch 5     Training accuracy: 99.73%, loss: 0.0145   Epoch 6     Training accuracy: 100.0%, loss: 0.00572   Epoch 7     Training accuracy: 100.0%, loss: 0.00319   Epoch 8     Training accuracy: 100.0%, loss: 0.00575   Epoch 9     Training accuracy: 100.0%, loss: 0.00393   Epoch 10    Training accuracy: 100.0%, loss: 0.00413   Saved last model.

Ended: 2021-12-02 17:44:07.876009
Total training time: 0:46:24.892267


#### Try to classify all data sets again after retraining of model

In [9]:
# Classify all data sets again after retraining
print('\nAttacks train')
test_dataloader(dataloader_attacks_train, retrained_model, device, loss_function, optimizer)
print('\nAttacks test')
test_dataloader(dataloader_attacks_test, retrained_model, device, loss_function, optimizer)
print('\nNon-attacks train')
test_dataloader(dataloader_originals_train, retrained_model, device, loss_function, optimizer)
print('\nNon-attacks test')
test_dataloader(dataloader_originals_test, retrained_model, device, loss_function, optimizer)
print('\nConcatenated train data')
test_dataloader(dataloader_train, retrained_model, device, loss_function, optimizer)
print('\nConcatenated test data')
test_dataloader(dataloader_test, retrained_model, device, loss_function, optimizer)


Attacks train
  Accuracy:   4.301%

Attacks test
  Accuracy:   6.25%

Non-attacks train
  Accuracy:   97.312%

Non-attacks test
  Accuracy:   95.0%

Concatenated train data
  Accuracy:   96.505%

Concatenated test data
  Accuracy:   94.375%
