In [38]:
# PyTorch Modules
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim
import torch.nn.functional as F

import torchvision
from torchvision import models
from torchvision import transforms
import torchvision.transforms as transforms
import torchvision.datasets as dsets

# Other non-PyTorch Modules
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from tqdm import tqdm
from matplotlib.pyplot import imshow
import matplotlib.pylab as plt
from PIL import Image
import time
from datetime import datetime
import pickle
import json
import random

import pydicom
from pydicom import dcmread

In [10]:
testDataDF = pd.read_csv('test.csv', dtype={'StudyInstanceUID':'string', 'SeriesInstanceUID':'string', 'SOPInstanceUID':'string'})
testDataDF = testDataDF.set_index('SOPInstanceUID')

In [20]:
listOfStudyIDs = testDataDF['StudyInstanceUID'].unique()
print(listOfStudyIDs)

<StringArray>
['df06fad17bc3', 'c8039e7f9e63', '761f6f1a9f5b', 'c8db5b1f6b56',
 '462e805da1f1', '7f6fb39566ed', 'b44cbf5371f2', '62dfc5f411e8',
 '1870d65d0f6a', '26135e3b3b30',
 ...
 'f84e2070f8fa', 'f7d667111876', 'e47f788d8c1c', '45ccc170506e',
 '9a98258fc668', '66fb5816ac72', '4ec6107512ec', '1c30b47fddbe',
 'daa9f40349cf', '84a57a6bc1b4']
Length: 650, dtype: string


In [35]:
def window(img, WL=50, WW=350):
    upper, lower = WL+WW//2, WL-WW//2
    X = np.clip(img.copy(), lower, upper)
    X = X - np.min(X)
    X = X / np.max(X)
    X = (X*255.0).astype('uint8')
    return X

data_transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ])

In [53]:
INPUT_SIZE = 64
HIDDEN_SIZE = 32
NUM_LAYERS = 1
NUM_CLASSES = 1

class BiGRU(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes, dropout = 0.1):
        super(BiGRU, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.GRU = nn.GRU(
            input_size, hidden_size, num_layers, batch_first=True, bidirectional=True
        )
        self.dropout = dropout
        self.linear1 = nn.Linear(hidden_size*2, hidden_size)
        self.linear2 = nn.Linear(hidden_size, num_classes)
        self.linear3 = nn.Linear(hidden_size*2, hidden_size)
        self.linear4 = nn.Linear(hidden_size, 9)

    def forward(self, x):
        imageLevelOutputs = []
        h0 = torch.zeros(self.num_layers * 2, x.size(0), self.hidden_size).cuda()
        #c0 = torch.zeros(self.num_layers * 2, x.size(0), self.hidden_size).cuda()

        out, h_n = self.GRU(x, h0)
        
        for i, out_t in enumerate(out.chunk(out.size(1), dim=1)):
            out_t = out_t.squeeze(1)
            out_t = F.relu(self.linear1(out_t))
            out_t = F.dropout(out_t, p=self.dropout)
            out_t = self.linear2(out_t)
            imageLevelOutputs += [out_t]
        imageLevelOutputs = torch.stack(imageLevelOutputs, 1).squeeze(2)
        
        h_n = h_n.view(1,-1)
        studyLevelOutputs = F.relu(self.linear3(h_n))
        studyLevelOutputs = F.dropout(studyLevelOutputs, p=self.dropout)
        studyLevelOutputs = self.linear4(studyLevelOutputs)
        
        return (imageLevelOutputs, studyLevelOutputs)

In [54]:
model = torch.load('models/CNNmodel/CNNmodel_01_epoch1_20201005_1533_embedder.pth')
model.eval()

seq = torch.load('models/embedderModel/ver03_epoch0_20201006_2004.pth')
seq.eval()

BiGRU(
  (GRU): GRU(64, 32, batch_first=True, bidirectional=True)
  (linear1): Linear(in_features=64, out_features=32, bias=True)
  (linear2): Linear(in_features=32, out_features=1, bias=True)
  (linear3): Linear(in_features=64, out_features=32, bias=True)
  (linear4): Linear(in_features=32, out_features=9, bias=True)
)

In [64]:
defaultScore = {'_pe_present_on_image': 0.053915069524414806,
                 '_negative_exam_for_pe': 0.6763928618101033,
                 '_rv_lv_ratio_gte_1': 0.12875001256566257,
                 '_rv_lv_ratio_lt_1': 0.17437230326919448,
                 '_leftsided_pe': 0.21089872969528548,
                 '_chronic_pe': 0.040139752506710064,
                 '_rightsided_pe': 0.2575653665766779,
                 '_acute_and_chronic_pe': 0.019458347341720122,
                 '_central_pe': 0.054468517151291695,
                 '_indeterminate': 0.020484822355039723}

In [71]:
def sortByImgPosHelper(EmbeddingDict):
    return EmbeddingDict['img_pos']

f = open('submissionWithStudyLabels.csv', 'w')
f.write('id,label\n')

for eachStudyID in tqdm(listOfStudyIDs):
    eachStudyDF = testDataDF[testDataDF['StudyInstanceUID']==eachStudyID]
    listOfImageIDs = eachStudyDF.index.to_list()
    
    listOfEmbeddingDict = []
    for eachImageID in listOfImageIDs:
        thisImagePath = 'data/test/'+eachStudyID+'/'+eachStudyDF.loc[eachImageID,'SeriesInstanceUID']+'/'+eachImageID+'.dcm'
        dcm_data = pydicom.read_file(thisImagePath)
        image = dcm_data.pixel_array * int(dcm_data.RescaleSlope) + int(dcm_data.RescaleIntercept)
        image = np.stack([window(image, WL=-600, WW=1500),
                          window(image, WL=40, WW=400),
                          window(image, WL=100, WW=700)], 2)
        image = image.astype(np.float32)
        image = data_transform(image)
        
        image = image.cuda()
        toPred = image.unsqueeze(0)
        embedding = model(toPred)
        embedding = embedding.detach().cpu().numpy()[0]
        
        img_pos = dcm_data[0x20, 0x32].value[2]
        listOfEmbeddingDict.append({'imageID':eachImageID, 'img_pos':img_pos, 'embedding':embedding})
        
    listOfEmbeddingDict.sort(key=sortByImgPosHelper, reverse=True)
    embeddingVolume = [eachEmbeddingDict['embedding'] for eachEmbeddingDict in listOfEmbeddingDict]
    embeddingVolume = np.array(embeddingVolume)
    embeddingVolume = torch.tensor(embeddingVolume).unsqueeze(0).cuda()
    o_img, o_std = seq(embeddingVolume)
    pred_img = torch.sigmoid(o_img).squeeze(0).cpu().detach().numpy()
    pred_std = torch.sigmoid(o_std).squeeze(0).cpu().detach().numpy()
    
    sortedListOfImageIDs = [eachEmbeddingDict['imageID'] for eachEmbeddingDict in listOfEmbeddingDict]
    for eachIndex, eachImageID in enumerate(sortedListOfImageIDs):
        f.write(eachImageID+','+str(pred_img[eachIndex])+'\n')
        
    # Study level labels
    listOfMetricLabels = ['_negative_exam_for_pe', '_indeterminate', '_chronic_pe', '_acute_and_chronic_pe', '_central_pe', '_leftsided_pe', '_rightsided_pe', '_rv_lv_ratio_gte_1', '_rv_lv_ratio_lt_1']

    for eachIndex, eachMetric in enumerate(listOfMetricLabels):
        f.write(eachStudyID+eachMetric+','+str(pred_std[eachIndex])+'\n')

f.close()    

100%|██████████| 650/650 [1:06:40<00:00,  6.15s/it]


In [None]:
# Sanity Check
seq.eval()
with torch.no_grad():
    x,(y_img, _) = next(iterVal)
    x=x.cuda()
    o_img, _ = seq(x)
    pred = torch.sigmoid(o_img)
    for eachIndex in range(pred.size(1)):
        print((pred[0,eachIndex]).type(torch.float).item(), y_img[0, eachIndex].item())