In [1]:
import torch
from torch import nn
from torchvision import transforms
import torch.nn.functional as F
from torch.utils.data import DataLoader
from datasets.mpr_dataset import MPR_Dataset,MPR_Dataset_H5, MPR_Dataset_New_Test, MPR_Dataset_LSTM
from tqdm import tqdm_notebook as tqdm
import yaml
import pandas as pd
import numpy as np
from models import ShuffleNetv2, AttentionShuffleNetV2
from sklearn.metrics import accuracy_score, f1_score
from ast import literal_eval
# from visualize_results import label_predictions_to_images
import os
import cv2
import copy
import matplotlib.pyplot as plt
from collections import OrderedDict
from torch.nn.functional import softmax 
from os.path import join

In [2]:
p = '.'
with open(p + '/config.yaml', 'r') as f:
    config = yaml.load(f, Loader=yaml.FullLoader)

config['device'] = 'cpu'
root_dir = config["data"]["root_dir"]
transform = transforms.Compose([
    transforms.ToTensor(),
    # transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

test_loader = DataLoader(
    MPR_Dataset(root_dir, partition="test", config=config["data"], transform=transform), shuffle=False,
    batch_size=1)

In [3]:
device = torch.device("cuda")
model = ShuffleNetv2(n_classes=3)
# PATH_WEIGHTS = 'best_model_shufflenetv2.pth'
# PATH_WEIGHTS = 'best_model_only_artery.pth'
PATH_WEIGHTS = '/home/petryshak/CoronaryArteryPlaqueIdentification/experiments_replication/exp7/models/model_model_6_val_loss=1.138445.pth'
PATH_WEIGHTS = '/home/petryshak/CoronaryArteryPlaqueIdentification/experiments_replication/only_lad_label_smoothing/models/model_model_20_val_f1=0.6245107.pth'


model.load_state_dict(torch.load(PATH_WEIGHTS))#, map_location={'cuda:0': 'cpu'}))
# model.load_state_dict(torch.load(PATH_WEIGHTS,map_location={'cuda:0': 'cpu'}))
# model.model.fc = nn.Sequential()
model.to(device)
model.eval()
print()




In [4]:
predictions = [] 
probas = []
for step, (x, y) in enumerate(tqdm(test_loader)):
    x = x.to(device)
    y = y
    with torch.no_grad():
        output = model(x)
        softmax_output = softmax(output, dim=1)
        
        _, predicted = torch.max(softmax_output, 1)
        if pd.isnull(predicted.cpu().detach().numpy()):
            print(predicted)
        predictions.extend(predicted.cpu().detach().numpy())
        probas.extend(softmax_output.cpu().detach().numpy())

HBox(children=(IntProgress(value=0, max=7409), HTML(value='')))




In [5]:
len(predictions), len(test_loader)

(7409, 7409)

In [6]:
# p_test_df = pd.read_csv("/home/petryshak/CoronaryArteryPlaqueIdentification/data/all_branches_with_pda_plv/test/labels.csv")
p_test_df = test_loader.dataset.df
# p_test_df = test_loader.df
# p_test_df = p_test_df.dropna()
p_test_df = p_test_df.reset_index()
p_test_df['PRED'] = pd.Series(predictions)
p_test_df['PROBAS'] = pd.Series(probas)
# p_test_df["STENOSIS_SCORE"] = p_test_df["STENOSIS_SCORE"].apply(literal_eval)
p_test_df['PATIENT'] = p_test_df['IMG_PATH'].apply(lambda s: s.split('/')[1])
mapper = {}
for group, values in config['data']['groups'].items():
    for value in values:
        mapper[value] = group
p_test_df["LABELS"] = p_test_df["STENOSIS_SCORE"].apply(lambda x: max([mapper[el] for el in x])).tolist()

In [None]:
p_test_df.dropna().shape

In [None]:
p_test_df['PROBAS'][p_test_df['PROBAS'].isnull()].index

In [7]:
p_test_df.to_csv('only_lad_label_smoothing_test.csv', index=False)

In [None]:
pd.value_counts(p_test_df['LABELS'])

In [None]:
pd.value_counts((p_test_df['PRED'] == 1) & (p_test_df['LABELS'] == 0))

In [None]:
pd.value_counts(p_test_df['PRED'])

## Visualize

In [None]:
# dict_artery = {'LAD': ['D-1', 'D-2', 'LAD', 'D-3', '2D-2', 'D-1Original', 'LADOriginal', 'D-4'],
#                    'LCX': ['LCX', 'OM-2', 'OM-1', 'OM-3', 'OM', 'LCX-PLB', 'LCX-PDA', 'PLV_LCX', 'PDA_LCX'],
#                    'RCA': ['RCA', 'RCA-PLB', 'RCA-PDA', 'PLV_RCA']}
    
# p_test_df['artery'] = p_test_df['ARTERY_SECTION'].apply(lambda x: [k for k in dict_artery.keys() if x in dict_artery[k]][0])

In [None]:
p_test_df.groupby('ARTERY_SECTION').count().LABELS

In [None]:
def calculate_metrics(col_section, col_ids, col_preds, col_labels):
    """
    Calculate final auc and f1 metrics on three levels: per patient, per section and per artery
    :return: {dict} each metric as a key and its calculated metric as a value
    """
    assert len(col_section) == len(col_ids) == len(col_preds) == len(col_labels)

    metrics = {'ACC_section': 0, 'ACC_patient': 0, 'ACC_artery': 0, 'F1_section': 0, 'F1_patient': 0, 'F1_artery': 0}
    dict_artery = {'LAD': ['D-1', 'D-2', 'LAD', 'D-3', '2D-2', 'D-1Original', 'LADOriginal', 'D-4'],
                   'LCX': ['LCX', 'OM-2', 'OM-1', 'OM-3', 'OM', 'LCX-PLB', 'LCX-PDA', 'PLV_LCX', 'PDA_LCX'],
                   'RCA': ['RCA', 'RCA-PLB', 'RCA-PDA', 'PLV_RCA']}
    
    df = pd.concat([col_ids, col_section, col_preds, col_labels], axis=1)
    df = df.rename(columns={col_section.name: 'section', col_ids.name: 'patient', col_preds.name:
        'preds', col_labels.name: 'labels'})

    df['artery'] = df['section'].apply(lambda x: [k for k in dict_artery.keys() if x in dict_artery[k]][0])

    # SECTION
    section_labels =  df[['preds', 'labels','section', 'artery','patient']].groupby(['patient', 'section']).agg('max')
    preds_section = df[['preds', 'labels','section','artery', 'patient']].groupby(['patient', 'section']).agg(lambda x: x.value_counts().index[0])

    acc = accuracy_score(preds_section['preds'], section_labels['labels'])
    f1 = f1_score(preds_section['preds'], section_labels['labels'], average='weighted')
    metrics['ACC_section'], metrics['F1_section'] = acc, f1

    # ARTERY
    sect = section_labels.reset_index()
    artery_labels = sect.groupby(['patient', 'artery']).agg('max')['labels']
    preds_artery = preds_section.reset_index().groupby(['patient', 'artery']).agg('max')['preds'] #x.value_counts().index[0])['preds']
    acc = accuracy_score(preds_artery, artery_labels)
    f1 = f1_score(preds_artery, artery_labels, average='weighted')
    metrics['ACC_artery'], metrics['F1_artery'] = acc, f1
    
    # PATIENT
    art = artery_labels.reset_index()
    patient_labels = art.groupby(['patient']).agg('max')['labels']    
#     print(preds_artery.reset_index())
    preds_patient = preds_artery.reset_index().groupby(['patient']).agg('max' )['preds'] #x.value_counts().index[0])['preds']
    acc = accuracy_score(preds_patient, patient_labels)
    f1 = f1_score(preds_patient, patient_labels, average='weighted')
    metrics['ACC_patient'], metrics['F1_patient'] = acc, f1
    
    return metrics


In [None]:
df = p_test_df.dropna()
# df = df[df['ARTERY_SECTION'].isin(['LAD'])]

In [None]:
""" LR scheduler"""
calculate_metrics(df['ARTERY_SECTION'], df['PATIENT'], df['PRED'], df['LABELS'])

In [None]:
""" LR scheduler"""
calculate_metrics(p_test_df['ARTERY_SECTION'], p_test_df['PATIENT'], p_test_df['PRED'], p_test_df['LABELS'])

## Group and analyze predictions

In [None]:
"""model_model_34_val_f1=0.9360136.pth"""
calculate_metrics(p_test_df['ARTERY_SECTION'], p_test_df['PATIENT'], p_test_df['PRED'], p_test_df['LABELS'])

In [None]:
for segm in p_test_df['ARTERY_SECTION'].unique():
    print('ARTERY SEGMENT: {}'.format(segm))
    df = p_test_df[p_test_df['ARTERY_SECTION']==segm]
    rec_dict = calculate_metrics(df['ARTERY_SECTION'], df['PATIENT'], df['PRED'], df['LABELS'])
    print(rec_dict)
    print()