This script gets the results from the following situation:
1. for each patient, there are several images
2. for each patient, we get the label of largest probability of several images
3. get the correct label for each patient
4. compute the AUC score for each patient

In [7]:
# torch
import torch
import torch.nn as nn

import numpy as np

# model 
from models.densenet121 import make_model

# project
from utils.config_utils import load_config
from utils.Timer import Timer

# system
import time
import pickle
import pandas as pd

In [8]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

from skimage import io

import numpy as np
import pandas as pd

import torchvision
from torchvision import transforms

In [26]:
# transformation for training and validation
# we use 364 is according to ImageNet: (364 / 320) = (256 / 224)
# we use horizontal flip since maybe the lateral pictures have different views
# either from left or from right
train_transform = transforms.Compose([
        transforms.ToPILImage(),
        transforms.Resize([364, 364]),
        transforms.RandomResizedCrop(320),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])

validation_transform = transforms.Compose([
        transforms.ToPILImage(),
        transforms.Resize([364,364]),
        transforms.CenterCrop(320),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])

In [27]:
class CheXpertEvalDataset(Dataset):
    """
    Customized dataset for CheXpert dataset (https://stanfordmlgroup.github.io/competitions/chexpert/)
    """
    def __init__(self, csv_file, image_root_dir, transform=None):
        """
        Args:
            - csv_file: path to trainining and validation csv file, like "./data/CheXpert-v1.0-small/train_preprocessed.csv"
            - image_root_dir: root_dir containing image path in the csv file, like "./data/"
            - transform: transformation for each image
        """
        # for the basic ones
        self.data_frame = pd.read_csv(csv_file)
        self.image_root_dir = image_root_dir
        self.image_path = self.data_frame["Path"]
        self.patientid = self.data_frame["patientID"]
        
        # for the transformation
        self.transform = transform
        
        def __len__(self):
            return len(self.data_frame)
    
    def __getitem__(self, index):
        # to read in the image
        image_filename = self.image_root_dir + self.image_path[index]
        image = io.imread(image_filename, as_gray=True)
        
        # sample is a dictionary which includes the image and 14 labels
        sample = {}
        
        # since the input to pre-trained network should have 3 channels
        # we need to pad it with two repeatition
        image = np.repeat(image[None,...], 3, axis=0).transpose(1,2,0)
        
        # transform the image if transform is not None
        if self.transform:
            image = self.transform(image)
            
        # add image into the sample dictionary
        sample["image"] = image
        
        # add patientID into the sample dictionary
        sample["patientID"] = self.patientid[index]
        
        # get the label for the image
        label_col_names = ["No Finding", "Enlarged Cardiomediastinum", "Cardiomegaly", "Lung Opacity", 
                           "Lung Lesion", "Edema", "Consolidation", "Pneumonia", "Atelectasis",
                           "Pneumothorax", "Pleural Effusion", "Pleural Other", "Fracture", "Support Devices"]
        
        # to get the label for each column
        # 0 --> negative
        # 1 --> positive
        # 2 --> uncertainty (No Finding has no 2)
        for label in label_col_names:
            if self.data_frame[label][index] == 0.0:
                sample[label] = torch.LongTensor([0])
            elif self.data_frame[label][index] == 1.0:
                sample[label] = torch.LongTensor([1])
            else:
                sample[label] = torch.LongTensor([2])
        
        return sample

In [59]:
eval_dataset = CheXpertEvalDataset(csv_file="./valid_patientid.csv", image_root_dir="../data/", transform=validation_transform)
data_sample = eval_dataset.__getitem__(0)

In [60]:
data_sample["image"].shape

torch.Size([3, 320, 320])

In [61]:
def evaluation(sample_dict, model, device):
    """
    Args:
        - sample_dict: the dictionary containing the evaluation sample
        - model: deep learning model we want to evaluation
        - device: cpu or gpu
    Returns:
        - result_dict: classification results for the sample
    """
    # intialize the environment
    model.eval()
    
    # Note here we don't need to keep track of gradients
    with torch.no_grad():
        
        softmax = nn.Softmax(dim=1)
        # the output of the dataloader is a dictionary containing batch of images
        # and corresponding 14 labels
        # image is a uint8 tensor (convert to float!!) of shape [batch_size, 3, 320, 320]
        # each label is a nn.LongTensor type tensor of shape (batch_size, 1) (need to squeeze)
        # for each label: Here, in each one-hot vector: 
        # the first element is negative (0.0), 
        # the second element is positive (1.0) 
        # and the last one is uncertainty (-1.0).
        image = sample_dict["image"].unsqueeze(0).float().to(device)
        
        # input all the input vectors into the model
        # the output dimension is [batch_size, 41]
        preds = model(image)
        
        # get the classification results for this sample
        # Note here, what is different is that we only use first two labels (negative and positive)
        # and get the maximum between them
        # here, since we need to get the probability, we need to use softmax to get the real probability  
        prob_no_finding, preds_no_finding = torch.max(softmax(preds[:, :2]), dim=1)
        prob_en_card, preds_en_card = torch.max(softmax(preds[:, 2:4]), dim=1)
        prob_card, preds_card = torch.max(softmax(preds[:, 5:7]), dim=1) 
        prob_lung_op, preds_lung_op = torch.max(softmax(preds[:, 8:10]), dim=1) 
        prob_lung_le, preds_lung_le = torch.max(softmax(preds[:, 11:13]), dim=1)
        prob_edema, preds_edema = torch.max(softmax(preds[:, 14:16]), dim=1)
        prob_cons, preds_cons = torch.max(softmax(preds[:, 17:19]), dim=1)
        prob_pneu, preds_pneu = torch.max(softmax(preds[:, 20:22]), dim=1)
        prob_atelec, preds_atelec = torch.max(softmax(preds[:, 23:25]), dim=1)
        prob_pneurax, preds_pneurax = torch.max(softmax(preds[:, 26:28]), dim=1) 
        prob_ple_eff, preds_ple_eff = torch.max(softmax(preds[:, 29:31]), dim=1)
        prob_ple_other, preds_ple_other = torch.max(softmax(preds[:, 32:34]), dim=1) 
        prob_frac, preds_frac = torch.max(softmax(preds[:, 35:37]), dim=1)
        prob_sup_dev, preds_sup_dev = torch.max(softmax(preds[:, 38:40]), dim=1)
        
        # construct a dictionary to contain the predictive probability
        pred_prob_dict = {"No Finding" : prob_no_finding.cpu().item(), 
                       "Enlarged Cardiomediastinum" : prob_en_card.cpu().item(), 
                       "Cardiomegaly" : prob_card.cpu().item(),
                       "Lung Opacity" : prob_lung_op.cpu().item(),
                       "Lung Lesion" : prob_lung_le.cpu().item(),
                       "Edema" : prob_edema.cpu().item(),
                       "Consolidation" : prob_cons.cpu().item(),
                       "Pneumonia" : prob_pneu.cpu().item(),
                       "Atelectasis" : prob_atelec.cpu().item(),
                       "Pneumothorax" : prob_pneurax.cpu().item(),
                       "Pleural Effusion" : prob_ple_eff.cpu().item(),
                       "Pleural Other" : prob_ple_other.cpu().item(),
                       "Fracture" : prob_frac.cpu().item(),
                       "Support Devices" : prob_sup_dev.cpu().item()}
        
        pred_label_dict = {"No Finding" : preds_no_finding.cpu().item(), 
                       "Enlarged Cardiomediastinum" :preds_en_card.cpu().item(), 
                       "Cardiomegaly" : preds_card.cpu().item(),
                       "Lung Opacity" : preds_lung_op.cpu().item(),
                       "Lung Lesion" : preds_lung_le.cpu().item(),
                       "Edema" : preds_edema.cpu().item(),
                       "Consolidation" : preds_cons.cpu().item(),
                       "Pneumonia" : preds_pneu.cpu().item(),
                       "Atelectasis" : preds_atelec.cpu().item(),
                       "Pneumothorax" : preds_pneurax.cpu().item(),
                       "Pleural Effusion" : preds_ple_eff.cpu().item(),
                       "Pleural Other" : preds_ple_other.cpu().item(),
                       "Fracture" : preds_frac.cpu().item(),
                       "Support Devices" : preds_sup_dev.cpu().item()} 
        
        return pred_prob_dict, pred_label_dict

In [95]:
# test
params = load_config('config.yaml')
model = make_model(pretrained=params['pretrained'])
PATH_pretrained = "./models/session_2019-05-05[22_03_43]/model_2.pth"
model.load_state_dict(torch.load(PATH_pretrained))

IncompatibleKeys(missing_keys=[], unexpected_keys=[])

In [96]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.to(device)

DenseNet(
  (features): Sequential(
    (conv0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (norm0): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu0): ReLU(inplace)
    (pool0): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (denseblock1): _DenseBlock(
      (denselayer1): _DenseLayer(
        (norm1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace)
        (conv1): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu2): ReLU(inplace)
        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      )
      (denselayer2): _DenseLayer(
        (norm1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplac

In [97]:
pred_prob_dict, pred_label_dict = evaluation(data_sample, model, device)

# Get all labels and corresponding probability

In [98]:
with open("./valid_patientid.csv", "rt") as fin:
    valid_set = pd.read_csv(fin)

In [99]:
patients_detection_results = {}  # construct a dictionary storing the diagnose result for ALL patients from different images
# for each patient
for num, patient in enumerate(pd.unique(valid_set["patientID"])):
    #if num == 1:
    #    break
    patient_records = valid_set.loc[valid_set["patientID"]==patient]
    
    patient_results = {} # the dictionary storing the label and the corresponding probabilty 
    
    patient_prob_results = {'No Finding': [],
                       'Enlarged Cardiomediastinum': [],
                       'Cardiomegaly': [],
                       'Lung Opacity': [],
                       'Lung Lesion': [],
                       'Edema': [],
                       'Consolidation': [],
                       'Pneumonia': [],
                       'Atelectasis': [],
                       'Pneumothorax': [],
                       'Pleural Effusion': [],
                       'Pleural Other': [],
                       'Fracture': [],
                       'Support Devices': []}  # a dictionary storing the diagnose probability result for ONE patient
    
    patient_label_results = {'No Finding': [],
                       'Enlarged Cardiomediastinum': [],
                       'Cardiomegaly': [],
                       'Lung Opacity': [],
                       'Lung Lesion': [],
                       'Edema': [],
                       'Consolidation': [],
                       'Pneumonia': [],
                       'Atelectasis': [],
                       'Pneumothorax': [],
                       'Pleural Effusion': [],
                       'Pleural Other': [],
                       'Fracture': [],
                       'Support Devices': []}  # a dictionary storing the diagnose labels result for ONE patient
    
    # for each image
    for index in patient_records.index:
        data_sample = eval_dataset.__getitem__(index)
        pred_prob_dict, pred_label_dict = evaluation(data_sample, model, device)
        # for each disease
        for disease, prob in pred_prob_dict.items():
            patient_prob_results[disease].append(prob)
        for disease, label in pred_label_dict.items():
            patient_label_results[disease].append(label)
            
    #print(patient_prob_results)
    #print(patient_label_results)
            
    # take the largest probability and the corresponding label
    for disease, probs in patient_prob_results.items():
        # get the max probability
        max_prob = max(probs)
        # get the index of it
        for index, prob in enumerate(probs):
            if prob == max_prob:
                break
        patient_results[disease] = (patient_label_results[disease][index], max_prob)
    
    # store the ONE patient result into ALL patients result
    patients_detection_results[patient] = patient_results

In [100]:
patients_detection_results

{'patient64541': {'No Finding': (0, 0.5853316783905029),
  'Enlarged Cardiomediastinum': (1, 0.5151982307434082),
  'Cardiomegaly': (0, 0.6685343980789185),
  'Lung Opacity': (0, 0.6978243589401245),
  'Lung Lesion': (0, 0.8473894596099854),
  'Edema': (0, 0.721504807472229),
  'Consolidation': (0, 0.8154464960098267),
  'Pneumonia': (0, 0.9189329147338867),
  'Atelectasis': (1, 0.6174995303153992),
  'Pneumothorax': (1, 0.5347614288330078),
  'Pleural Effusion': (0, 0.8297180533409119),
  'Pleural Other': (0, 0.9273844957351685),
  'Fracture': (1, 0.5103188753128052),
  'Support Devices': (0, 0.8093063831329346)},
 'patient64542': {'No Finding': (1, 0.899721086025238),
  'Enlarged Cardiomediastinum': (0, 0.6581596732139587),
  'Cardiomegaly': (0, 0.9304030537605286),
  'Lung Opacity': (0, 0.8919954299926758),
  'Lung Lesion': (0, 0.6754837036132812),
  'Edema': (0, 0.9964461922645569),
  'Consolidation': (0, 0.8988915681838989),
  'Pneumonia': (0, 0.8631916046142578),
  'Atelectasis':

# To get the labels and corresponding probability lists

In [101]:
all_patients_label_lists = {'No Finding': [],
                       'Enlarged Cardiomediastinum': [],
                       'Cardiomegaly': [],
                       'Lung Opacity': [],
                       'Lung Lesion': [],
                       'Edema': [],
                       'Consolidation': [],
                       'Pneumonia': [],
                       'Atelectasis': [],
                       'Pneumothorax': [],
                       'Pleural Effusion': [],
                       'Pleural Other': [],
                       'Fracture': [],
                       'Support Devices': []}

all_patients_prob_lists = {'No Finding': [],
                       'Enlarged Cardiomediastinum': [],
                       'Cardiomegaly': [],
                       'Lung Opacity': [],
                       'Lung Lesion': [],
                       'Edema': [],
                       'Consolidation': [],
                       'Pneumonia': [],
                       'Atelectasis': [],
                       'Pneumothorax': [],
                       'Pleural Effusion': [],
                       'Pleural Other': [],
                       'Fracture': [],
                       'Support Devices': []}

In [102]:
for patient, results in patients_detection_results.items():
    for disease, (label, prob) in results.items():
        all_patients_label_lists[disease].append(label)
        if label == 0:
            all_patients_prob_lists[disease].append(1.0 - prob)
        else:
            all_patients_prob_lists[disease].append(prob)

In [103]:
all_patients_prob_lists

{'No Finding': [0.41466832160949707,
  0.899721086025238,
  0.12234658002853394,
  0.9250892996788025,
  0.15514779090881348,
  0.22368144989013672,
  0.7614573240280151,
  0.075350821018219,
  0.515397846698761,
  0.8143438100814819,
  0.7454822659492493,
  0.02344447374343872,
  0.7083576917648315,
  0.7724113464355469,
  0.06844562292098999,
  0.7658005356788635,
  0.23620152473449707,
  0.8343762159347534,
  0.808587908744812,
  0.3603034019470215,
  0.6082830429077148,
  0.8348233699798584,
  0.8557001352310181,
  0.43953877687454224,
  0.24432289600372314,
  0.9072871208190918,
  0.7887603044509888,
  0.8247876763343811,
  0.5169580578804016,
  0.28492534160614014,
  0.09483414888381958,
  0.8693619966506958,
  0.24402904510498047,
  0.7614042162895203,
  0.8006822466850281,
  0.8238987922668457,
  0.05984973907470703,
  0.8108857870101929,
  0.23088431358337402,
  0.02099078893661499,
  0.41446930170059204,
  0.22859162092208862,
  0.04300832748413086,
  0.7959983348846436,
  0.

# To get the correct label for each disease

In [104]:
# to get the true labels
true_label_filename = "/scratch/hw1666/final_projects/dl4med/ablation_study/no_lateral_views/CheXpert_no_lateral_views/evaluation_results/correct_labels.pkl"
with open(true_label_filename, "rb") as fin:
    true_labels_dict = pickle.load(fin)

In [105]:
true_labels_dict

{'patient64541': {'No Finding': 0,
  'Enlarged Cardiomediastinum': 1,
  'Cardiomegaly': 1,
  'Lung Opacity': 1,
  'Lung Lesion': 0,
  'Edema': 0,
  'Consolidation': 0,
  'Pneumonia': 0,
  'Atelectasis': 0,
  'Pneumothorax': 0,
  'Pleural Effusion': 0,
  'Pleural Other': 0,
  'Fracture': 0,
  'Support Devices': 0},
 'patient64542': {'No Finding': 0,
  'Enlarged Cardiomediastinum': 0,
  'Cardiomegaly': 0,
  'Lung Opacity': 0,
  'Lung Lesion': 0,
  'Edema': 0,
  'Consolidation': 0,
  'Pneumonia': 0,
  'Atelectasis': 0,
  'Pneumothorax': 0,
  'Pleural Effusion': 0,
  'Pleural Other': 0,
  'Fracture': 0,
  'Support Devices': 1},
 'patient64543': {'No Finding': 0,
  'Enlarged Cardiomediastinum': 1,
  'Cardiomegaly': 0,
  'Lung Opacity': 1,
  'Lung Lesion': 0,
  'Edema': 1,
  'Consolidation': 0,
  'Pneumonia': 0,
  'Atelectasis': 0,
  'Pneumothorax': 0,
  'Pleural Effusion': 0,
  'Pleural Other': 0,
  'Fracture': 0,
  'Support Devices': 0},
 'patient64544': {'No Finding': 1,
  'Enlarged Cardi

In [106]:
all_patients_correct_label_lists = {'No Finding': [],
                       'Enlarged Cardiomediastinum': [],
                       'Cardiomegaly': [],
                       'Lung Opacity': [],
                       'Lung Lesion': [],
                       'Edema': [],
                       'Consolidation': [],
                       'Pneumonia': [],
                       'Atelectasis': [],
                       'Pneumothorax': [],
                       'Pleural Effusion': [],
                       'Pleural Other': [],
                       'Fracture': [],
                       'Support Devices': []}

for patient, results in true_labels_dict.items():
    for disease, label in results.items():
        all_patients_correct_label_lists[disease].append(label)

In [107]:
#all_patients_correct_label_lists

# Compute the AUC score for each disease

In [108]:
from sklearn.metrics import roc_auc_score

In [109]:
for disease, labels_list in all_patients_correct_label_lists.items():
    if disease == "Fracture":
        continue
    print(disease, "AUC: ", roc_auc_score(np.array(labels_list), np.array(all_patients_prob_lists[disease])), "\n")

No Finding AUC:  0.9064986737400531 

Enlarged Cardiomediastinum AUC:  0.5408521303258145 

Cardiomegaly AUC:  0.7976028946178201 

Lung Opacity AUC:  0.895935960591133 

Lung Lesion AUC:  0.19095477386934678 

Edema AUC:  0.8948161543098252 

Consolidation AUC:  0.8757440476190477 

Pneumonia AUC:  0.7220052083333334 

Atelectasis AUC:  0.7780266666666666 

Pneumothorax AUC:  0.8393470790378006 

Pleural Effusion AUC:  0.9286534926470589 

Pleural Other AUC:  0.9396984924623115 

Support Devices AUC:  0.8569856985698572 



# Result Analysis

In [2]:
import pandas as pd

In [3]:
# open the validation set
with open("./valid_patientid.csv", "rt") as fin:
    valid_set_ori = pd.read_csv(fin)

In [51]:
valid_set_en_card = valid_set_ori[["Support Devices", "Frontal/Lateral"]]

In [52]:
valid_set_en_card.loc[valid_set_ori["Frontal/Lateral"]=="Frontal"].groupby("Support Devices").count()

Unnamed: 0_level_0,Frontal/Lateral
Support Devices,Unnamed: 1_level_1
0.0,103
1.0,99


In [53]:
valid_set_en_card.loc[valid_set_ori["Frontal/Lateral"]=="Lateral"].groupby("Support Devices").count()

Unnamed: 0_level_0,Frontal/Lateral
Support Devices,Unnamed: 1_level_1
0.0,24
1.0,8
