# Imports

In [1]:
import os
import numpy as np
import time
import sys
import csv
import cv2
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.backends.cudnn as cudnn
import torchvision
import torchvision.transforms as transforms
import torch.optim as optim
import torch.nn.functional as tfunc
from torch.utils.data import Dataset
from torch.utils.data.dataset import random_split
from torch.utils.data import DataLoader
from torch.optim.lr_scheduler import ReduceLROnPlateau
from PIL import Image
import torch.nn.functional as func
import torchxrayvision as xrv
from tqdm.notebook import tqdm

from sklearn.metrics import roc_auc_score
import sklearn.metrics as metrics
import random
import logging


use_gpu = torch.cuda.is_available()

In [3]:
import pandas as pd
path = "/om/user/shobhita/src/chexpert/data/CheXpert-v1.0-small/"
train = pd.read_csv(path + "train_preprocessed.csv")

In [4]:
train

Unnamed: 0.1,Unnamed: 0,Path,Sex,Age,Frontal/Lateral,AP/PA,No Finding,Enlarged Cardiomediastinum,Cardiomegaly,Lung Opacity,...,Edema,Consolidation,Pneumonia,Atelectasis,Pneumothorax,Pleural Effusion,Pleural Other,Fracture,Support Devices,ID
0,173223,CheXpert-v1.0-small/train/patient40439/study1/...,Female,48,Frontal,AP,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,40439
1,103369,CheXpert-v1.0-small/train/patient24820/study2/...,Female,69,Frontal,PA,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,24820
2,55913,CheXpert-v1.0-small/train/patient13550/study3/...,Female,25,Lateral,,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,13550
3,192326,CheXpert-v1.0-small/train/patient46406/study1/...,Female,45,Frontal,AP,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,46406
4,185864,CheXpert-v1.0-small/train/patient44121/study1/...,Male,51,Frontal,AP,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,44121
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
201386,27122,CheXpert-v1.0-small/train/patient06606/study2/...,Male,78,Frontal,AP,0.0,0.0,0.0,1.0,...,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,6606
201387,124987,CheXpert-v1.0-small/train/patient29924/study1/...,Female,84,Frontal,AP,0.0,0.0,1.0,1.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,29924
201388,211800,CheXpert-v1.0-small/train/patient56212/study2/...,Female,90,Frontal,AP,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,56212
201389,130616,CheXpert-v1.0-small/train/patient31303/study1/...,Male,45,Lateral,,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,31303


#  Get data and model

In [14]:
path = "/om/user/shobhita/src/chexpert/data/CheXpert-v1.0-small/"
def load_data():
    # add data augmentations transforms here
    transform = torchvision.transforms.Compose([xrv.datasets.XRayCenterCrop(),
                                                xrv.datasets.XRayResizer(224)])
    # replace the paths for the dataset here
    d_chex_train = xrv.datasets.CheX_Dataset(imgpath=path,
                                       csvpath=path + "train_preprocessed.csv",
                                       transform=transform,views=["PA", "AP"], unique_patients=False)
    d_chex_test = xrv.datasets.CheX_Dataset(imgpath=path,
                                       csvpath=path + "test_train_preprocessed.csv",
                                       transform=transform,views=["PA", "AP"], unique_patients=False)
    return d_chex_train, d_chex_test

def get_model():
    model = xrv.models.DenseNet(num_classes=13)
    print(model.classifier)
    return model

In [9]:
def preprocess_data(dataset):
    for idx, data in enumerate(dataset):
        data['lab']=np.nan_to_num(data['lab'],0)
        data['lab']=np.where(data['lab']==-1, 1, data['lab']) 
    return dataset

# Training loop

In [10]:
def training(model,num_epochs,path_trained_model,train_loader,valid_loader):
    print("training")
    # hyperparameters
    criterion = nn.BCEWithLogitsLoss() 
    optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
    best_valid_loss=10000
    PATH = path_trained_model
    
    # going through epochs
    for epoch in range(num_epochs):
        # training loss
        print("epoch",epoch)
        model.train()
        model.to("cuda:0")
        train_loss = 0
        count=0
        for data_all in train_loader:
            data=data_all['img']
            target=data_all['lab']
            count+=1
            if count % 100 == 0:
                print("data ", count)
            data = data.to("cuda:0")
            target = target.to("cuda:0")
            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()

        # validation loss
        model.eval()
        valid_loss = 0
        with torch.no_grad():
            for data_all in valid_loader:
                data=data_all['img']
                target=data_all['lab']
                data = data.to("cuda:0")
                target = target.to("cuda:0")
                output = model(data)
                loss = criterion(output, target) 
                valid_loss += loss.item()
        train_loss /= len(train_loader)
        valid_loss /= len(valid_loader)
        
        # saves best epoch
        print(f'Epoch: {epoch+1}/{num_epochs}.. Training loss: {train_loss}.. Validation Loss: {valid_loss}')
        if valid_loss < best_valid_loss:
            torch.save(model.state_dict(), PATH)
            best_valid_loss=valid_loss
        print("Best Valid Loss so far:", best_valid_loss)

# Testing loop

In [11]:
def computeAUROC(dataGT, dataPRED, classCount):
        
    outAUROC = []
        
    datanpGT = dataGT.cpu().numpy()
    datanpPRED = dataPRED.cpu().numpy()
        
    for i in range(classCount):
        try:
            outAUROC.append(roc_auc_score(datanpGT[:, i], datanpPRED[:, i]))
        except ValueError:
            pass
    return outAUROC

def testing(model, test_loader, nnClassCount, class_names):
    if use_gpu:
        outGT = torch.FloatTensor().cuda()
        outPRED = torch.FloatTensor().cuda()
       
    model.eval()
        
    with torch.no_grad():
        for batch_idx, data_all in tqdm(enumerate(test_loader)):
            if batch_idx % 100 == 0:
                print(batch_idx)
            
            data=data_all['img']
            target=data_all['lab']
            target = target.cuda()
            data = data.to("cuda:0")
            outGT = torch.cat((outGT, target), 0).cuda()

            #bs, c, h, w = data.size()
            #varInput = data.view(-1, c, h, w)
            
            out = model(data)
            outPRED = torch.cat((outPRED, out), 0)
    aurocIndividual = computeAUROC(outGT, outPRED, nnClassCount)
    aurocMean = np.array(aurocIndividual).mean()
        
    print ('AUROC mean ', aurocMean)
        
    for i in range (0, len(aurocIndividual)):
        print (class_names[i], ' ', aurocIndividual[i])
        
    return outGT, outPRED


In [15]:
train,test=load_data()

Setting XRayResizer engine to cv2 could increase performance.


In [16]:
len(train)

172214

In [13]:
len(train)

225333

In [26]:
transform = torchvision.transforms.Compose([xrv.datasets.XRayCenterCrop(),
                                                xrv.datasets.XRayResizer(224)])
d_chex_train = xrv.datasets.CheX_Dataset(imgpath=path,
                                       csvpath=path + "train.csv",
                                       transform=transform,views=["PA", "AP"], unique_patients=False)

Setting XRayResizer engine to cv2 could increase performance.


In [27]:
len(d_chex_train)

191010

In [8]:
trBatchSize=32
datasetTest, datasetTrain = random_split(train, [500, len(train) - 500])  
dataLoaderTrain = DataLoader(dataset=datasetTrain, batch_size=trBatchSize, shuffle=True,  num_workers=24, pin_memory=True)
dataLoaderVal = DataLoader(dataset=datasetValid, batch_size=trBatchSize, shuffle=False, num_workers=24, pin_memory=True)
dataLoaderTest = DataLoader(dataset=datasetTest, num_workers=24, pin_memory=True)

In [9]:
model=get_model()
training(model=model,num_epochs=2,path_trained_model="densenet_model",train_loader=dataLoaderTrain,valid_loader=dataLoaderVal)

Linear(in_features=1024, out_features=13, bias=True)
training
epoch 0
data  100
data  200
data  300
data  400
data  500
data  600
Epoch: 1/2.. Training loss: 0.4020114891837088.. Validation Loss: 0.24762111902236938
Best Valid Loss so far: 0.24762111902236938
epoch 1
data  100
data  200
data  300
data  400
data  500
data  600
Epoch: 2/2.. Training loss: 0.3808065941364191.. Validation Loss: 0.24135379493236542
Best Valid Loss so far: 0.24135379493236542


In [60]:
class_names=['Enlarged Cardiomediastinum', 'Cardiomegaly',
       'Lung Opacity', 'Lung Lesion', 'Edema', 'Consolidation', 'Pneumonia',
       'Atelectasis', 'Pneumothorax', 'Pleural Effusion', 'Pleural Other',
       'Fracture', 'Support Devices']
testing(model, dataLoaderTest, len(class_names), class_names)

0it [00:00, ?it/s]

0
100
200
300
400
AUROC mean  0.6153491351471989
Enlarged Cardiomediastinum   0.6704626009009628
Cardiomegaly   0.6818390804597702
Lung Opacity   0.6442086648983202
Lung Lesion   0.7087035771246297
Edema   0.5763769698860977
Consolidation   0.503578947368421
Pneumonia   0.488328401672068
Atelectasis   0.6266213921901528
Pneumothorax   0.732669550959126
Pleural Effusion   0.561584840654608
Pleural Other   0.5930181175430844
Fracture   0.5623404255319149
Support Devices   0.6498061877244299


(tensor([[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 1., 0.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 1., 1., 0.]], device='cuda:0'),
 tensor([[-1.6123, -2.3601, -2.0904,  ..., -2.1116, -2.7364, -1.6877],
         [-1.7371, -2.3061, -2.1212,  ..., -2.1768, -2.9851, -1.7004],
         [-1.0029, -1.8266, -1.7144,  ..., -1.9648, -2.6303, -1.4148],
         ...,
         [-1.4946, -2.0531, -1.9153,  ..., -1.9167, -2.6913, -1.6776],
         [-1.8068, -2.3387, -2.1711,  ..., -2.1820, -2.8148, -1.7631],
         [-0.8649, -1.7069, -1.5956,  ..., -1.8459, -2.5626, -1.2829]],
        device='cuda:0'))