In [1]:
# PyTorch Modules
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim
import torch.nn.functional as F

import torchvision
from torchvision import models
from torchvision import transforms
import torchvision.transforms as transforms
import torchvision.datasets as dsets

# Other non-PyTorch Modules
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from tqdm import tqdm
from matplotlib.pyplot import imshow
import matplotlib.pylab as plt
from PIL import Image
import time
from datetime import datetime
import pickle
import json
import random

In [2]:
torch.cuda.empty_cache()

In [3]:
dataDF = pd.read_csv('data_fold.csv')
dataDF = dataDF.set_index('SOPInstanceUID')

In [4]:
dataDF.columns

Index(['StudyInstanceUID', 'SeriesInstanceUID', 'pe_present_on_image',
       'negative_exam_for_pe', 'qa_motion', 'qa_contrast', 'flow_artifact',
       'rv_lv_ratio_gte_1', 'rv_lv_ratio_lt_1', 'leftsided_pe', 'chronic_pe',
       'true_filling_defect_not_pe', 'rightsided_pe', 'acute_and_chronic_pe',
       'central_pe', 'indeterminate', 'window_center', 'window_width',
       'intercept', 'slope', 'slice_thickness', 'kvp', 'ma', 'exposure',
       'img_pos', 'conv_kernel', 'patient_position', 'pixel_spacing',
       'bits_stored', 'high_bit', 'img_count', 'fold'],
      dtype='object')

In [5]:
dataDF = dataDF.reindex(columns=['StudyInstanceUID', 'SeriesInstanceUID', 'pe_present_on_image', 'negative_exam_for_pe',
                       'indeterminate', 'chronic_pe', 'acute_and_chronic_pe', 'central_pe', 'leftsided_pe',
                       'rightsided_pe', 'rv_lv_ratio_gte_1', 'rv_lv_ratio_lt_1','fold','img_pos','patient_position',
                       'intercept', 'slope'])                       

In [6]:
dataDF.head()

Unnamed: 0_level_0,StudyInstanceUID,SeriesInstanceUID,pe_present_on_image,negative_exam_for_pe,indeterminate,chronic_pe,acute_and_chronic_pe,central_pe,leftsided_pe,rightsided_pe,rv_lv_ratio_gte_1,rv_lv_ratio_lt_1,fold,img_pos,patient_position,intercept,slope
SOPInstanceUID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
c0f3cb036d06,6897fa9de148,2bfbb7fd2e8b,0,0,0,0,0,0,1,1,0,1,3,-234.5,HFS,-1024,1
f57ffd3883b6,6897fa9de148,2bfbb7fd2e8b,0,0,0,0,0,0,1,1,0,1,3,-252.5,HFS,-1024,1
41220fda34a3,6897fa9de148,2bfbb7fd2e8b,0,0,0,0,0,0,1,1,0,1,3,-432.5,HFS,-1024,1
13b685b4b14f,6897fa9de148,2bfbb7fd2e8b,0,0,0,0,0,0,1,1,0,1,3,-434.5,HFS,-1024,1
be0b7524ffb4,6897fa9de148,2bfbb7fd2e8b,0,0,0,0,0,0,1,1,0,1,3,-436.5,HFS,-1024,1


In [7]:
dataDF.to_csv('for_reference.csv')

In [8]:
trainDF = dataDF[dataDF['fold']!=4]
valDF = dataDF[dataDF['fold']==4]

In [9]:
embeddingDirPath = 'data/embeddings/CNNmodel_01_epoch1_20201005_1533/val/'

In [10]:
class embeddingsDataset(Dataset):
    """create sample dataset to work with"""

    def __init__(self, dataDF = None, listOfStudies = None):
        self.dataDF = dataDF
        self.listOfStudies = listOfStudies

    def __len__(self):
        return len(self.listOfStudies)

    def __getitem__(self, idx):
        embedDict = pickle.load(open(embeddingDirPath+self.listOfStudies[idx]+'.p', 'rb'))
        embeddingVolume = np.array(embedDict['embeddings'])
        listOfImages = embedDict['ids']
        imageLevelLabels = [self.dataDF.loc[eachImageID, 'pe_present_on_image']for eachImageID in listOfImages]
        imageLevelLabels = np.array(imageLevelLabels).astype(np.float32)
        studyLevelLabels = self.dataDF.loc[listOfImages[0]][3:12].values
        studyLevelLabels = np.array(studyLevelLabels).astype(np.float32)
        return embeddingVolume, (imageLevelLabels, studyLevelLabels)

In [11]:
trainEmbeddingsDataset = embeddingsDataset(dataDF=dataDF, listOfStudies=trainDF['StudyInstanceUID'].unique())
trainEmbeddingsDataloader = DataLoader(trainEmbeddingsDataset, batch_size=1, shuffle=True, num_workers=1)

valEmbeddingsDataset = embeddingsDataset(dataDF=dataDF, listOfStudies=valDF['StudyInstanceUID'].unique())
valEmbeddingsDataloader = DataLoader(valEmbeddingsDataset, batch_size=1, shuffle=False, num_workers=1)

In [12]:
INPUT_SIZE = 64
HIDDEN_SIZE = 32
NUM_LAYERS = 1
NUM_CLASSES = 1

class BiGRU(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes, dropout = 0.1):
        super(BiGRU, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.GRU = nn.GRU(
            input_size, hidden_size, num_layers, batch_first=True, bidirectional=True
        )
        self.dropout = dropout
        self.linear1 = nn.Linear(hidden_size*2, hidden_size)
        self.linear2 = nn.Linear(hidden_size, num_classes)
        self.linear3 = nn.Linear(hidden_size*2, hidden_size)
        self.linear4 = nn.Linear(hidden_size, 9)

    def forward(self, x):
        imageLevelOutputs = []
        h0 = torch.zeros(self.num_layers * 2, x.size(0), self.hidden_size).cuda()
        #c0 = torch.zeros(self.num_layers * 2, x.size(0), self.hidden_size).cuda()

        out, h_n = self.GRU(x, h0)
        
        for i, out_t in enumerate(out.chunk(out.size(1), dim=1)):
            out_t = out_t.squeeze(1)
            out_t = F.relu(self.linear1(out_t))
            out_t = F.dropout(out_t, p=self.dropout)
            out_t = self.linear2(out_t)
            imageLevelOutputs += [out_t]
        imageLevelOutputs = torch.stack(imageLevelOutputs, 1).squeeze(2)
        
        h_n = h_n.view(1,-1)
        studyLevelOutputs = F.relu(self.linear3(h_n))
        studyLevelOutputs = F.dropout(studyLevelOutputs, p=self.dropout)
        studyLevelOutputs = self.linear4(studyLevelOutputs)
        
        return (imageLevelOutputs, studyLevelOutputs)

In [13]:
seq = BiGRU(INPUT_SIZE, HIDDEN_SIZE, NUM_LAYERS, NUM_CLASSES, dropout=0.2).cuda()

# batch,seqNum,features
sampleInput = torch.rand((1,120,64)).cuda()
sampleImgOutput, sampleStdOutput = seq(sampleInput)
print(sampleImgOutput.size())
print(sampleStdOutput.size())

In [14]:
def customLoss(imageLevelOutputLogits, imageLevelLabels, studyLeveloutputLogits, studyLevelLabels):
    imageLevelLoss = F.binary_cross_entropy_with_logits(imageLevelOutputLogits,imageLevelLabels,pos_weight = torch.tensor([2.0]).cuda())
    
    studyLevelLoss = 0
    weightList = [0.0736196319, 0.09202453988, 0.1042944785, 0.1042944785, 0.1877300613, 0.06257668712, 0.06257668712, 0.2346625767, 0.0782208589]
    for eachInd in range(9):
        studyLevelLoss += weightList[eachInd]*F.binary_cross_entropy_with_logits(studyLeveloutputLogits[:,eachInd],studyLevelLabels[:,eachInd],reduction='sum')
    
    return imageLevelLoss+studyLevelLoss

In [15]:
optimizer = optim.Adam(seq.parameters(), lr=1e-4)

In [17]:
def train_loop(model, train_loader):
    train_total = train_correct = train_cost = 0
    seq.train()
    for x, (y_img, y_std) in tqdm(train_loader):
        x = x.cuda()
        y_img = y_img.cuda()
        y_std = y_std.cuda()
        optimizer.zero_grad()
        (o_img, o_std) = seq(x)
        train_total += y_img.size(1)
        train_correct += ((torch.sigmoid(o_img[0,:])>0.5) == (y_img[0,:]>0.5)).sum().item()
        loss = customLoss(o_img, y_img, o_std, y_std)
        loss.backward()
        optimizer.step()
        train_cost += loss.item()
    return train_cost, train_correct/train_total

def valid_loop(model, valid_loader):
    # Evaluate on validation  data 
    val_total = val_correct = val_cost = 0
    seq.eval()
    with torch.no_grad():
        for x_val, (y_val_img, y_val_std) in tqdm(valid_loader):
            x_val = x_val.cuda()
            y_val_img = y_val_img.cuda()
            y_val_std = y_val_std.cuda()
            (o_val_img, o_val_std) = seq(x_val)
            val_total += y_val_img.size(1)
            val_correct += ((torch.sigmoid(o_val_img[0,:])>0.5) == (y_val_img[0,:]>0.5)).sum().item()
            loss = customLoss(o_val_img, y_val_img, o_val_std, y_val_std)
            val_cost += loss.item()
    return val_cost, val_correct/val_total

def main_loop(n_epochs):
    for epoch in range(n_epochs):
        print('epoch ' + str(epoch) + ':')
        train_avgCost, train_acc = train_loop(seq, trainEmbeddingsDataloader)
        val_avgCost, val_acc = valid_loop(seq, valEmbeddingsDataloader)

        print('train_cost: %.4f, train_acc: %.4f, val_cost: %.4f, val_acc: %.4f'\
              % (train_avgCost, train_acc, val_avgCost, val_acc))
        datestring = datetime.now().strftime("%Y%m%d_%H%M")
        modelPath = 'models/embedderModel/ver03_epoch' + str(epoch) + '_' + datestring +'.pth'
        print('saving: ',modelPath)
        torch.save(seq, modelPath)

In [18]:
main_loop(5)

  0%|          | 0/5824 [00:00<?, ?it/s]

epoch 0:


100%|██████████| 5824/5824 [06:54<00:00, 14.06it/s]
100%|██████████| 1455/1455 [00:43<00:00, 33.39it/s]
  0%|          | 0/5824 [00:00<?, ?it/s]

train_cost: 1677.5659, train_acc: 0.9847, val_cost: 665.2918, val_acc: 0.9683
saving:  models/embedderModel/ver03_epoch0_20201006_2004.pth
epoch 1:


100%|██████████| 5824/5824 [06:52<00:00, 14.13it/s]
100%|██████████| 1455/1455 [00:43<00:00, 33.28it/s]
  0%|          | 0/5824 [00:00<?, ?it/s]

train_cost: 1025.8883, train_acc: 0.9886, val_cost: 709.2893, val_acc: 0.9693
saving:  models/embedderModel/ver03_epoch1_20201006_2012.pth
epoch 2:


100%|██████████| 5824/5824 [06:54<00:00, 14.05it/s]
100%|██████████| 1455/1455 [00:43<00:00, 33.08it/s]
  0%|          | 0/5824 [00:00<?, ?it/s]

train_cost: 974.4904, train_acc: 0.9887, val_cost: 711.4215, val_acc: 0.9690
saving:  models/embedderModel/ver03_epoch2_20201006_2020.pth
epoch 3:


100%|██████████| 5824/5824 [06:56<00:00, 13.99it/s]
100%|██████████| 1455/1455 [00:44<00:00, 32.62it/s]
  0%|          | 0/5824 [00:00<?, ?it/s]

train_cost: 943.8899, train_acc: 0.9889, val_cost: 714.6793, val_acc: 0.9690
saving:  models/embedderModel/ver03_epoch3_20201006_2027.pth
epoch 4:


100%|██████████| 5824/5824 [07:05<00:00, 13.67it/s]
100%|██████████| 1455/1455 [00:45<00:00, 31.93it/s]

train_cost: 916.3992, train_acc: 0.9890, val_cost: 765.2495, val_acc: 0.9699
saving:  models/embedderModel/ver03_epoch4_20201006_2035.pth





In [19]:
seq = torch.load('models/embedderModel/ver03_epoch0_20201006_2004.pth')

In [20]:
iterVal = iter(valEmbeddingsDataloader)

In [22]:
# Sanity Check
seq.eval()
with torch.no_grad():
    x,(y_img, _) = next(iterVal)
    x=x.cuda()
    o_img, _ = seq(x)
    pred = torch.sigmoid(o_img)
    for eachIndex in range(pred.size(1)):
        print((pred[0,eachIndex]).type(torch.float).item(), y_img[0, eachIndex].item())

0.0005670030368492007 0.0
0.00040258560329675674 0.0
0.0003917957947123796 0.0
0.0021916909608989954 0.0
0.0003100456960964948 0.0
0.0005813196185044944 0.0
0.0003179519553668797 0.0
0.002153890673071146 0.0
0.00019555141625460237 0.0
0.003845823463052511 0.0
0.0007086272817105055 0.0
0.0009180671186186373 0.0
0.0049347179010510445 0.0
0.0011335922172293067 0.0
0.0010367324575781822 0.0
0.0015702798264101148 0.0
0.002002938650548458 0.0
0.0035155199002474546 0.0
0.00043216324411332607 0.0
0.0029307182412594557 0.0
0.004422558471560478 0.0
0.004740583244711161 0.0
0.01259460486471653 0.0
0.001973669044673443 0.0
0.002283178037032485 0.0
0.004398834891617298 1.0
0.048127029091119766 1.0
0.034608203917741776 1.0
0.023382125422358513 0.0
0.008209526538848877 0.0
0.07961971312761307 0.0
0.06172666326165199 1.0
0.3646923899650574 1.0
0.2665514051914215 1.0
0.1898738592863083 0.0
0.25640901923179626 0.0
0.17503346502780914 0.0
0.3131520748138428 0.0
0.16905435919761658 0.0
0.07973140478134155

In [49]:
with torch.no_grad():
    x,(_, y_std) = next(iterVal)
    x=x.cuda()
    _, o_std = seq(x)
    pred = torch.sigmoid(o_std)
    print(((pred[0]).type(torch.float).cpu().numpy()))
    print(y_std[0].cpu().numpy())

[0.07530528 0.02516204 0.10859079 0.05838068 0.12206057 0.6902007
 0.8465707  0.18094982 0.5447287 ]
[0. 0. 0. 0. 0. 1. 1. 0. 1.]
