In [1]:
import numpy as np, pandas as pd, os
import matplotlib.pyplot as plt
import glob
import datetime
import torch
import torchvision.transforms as transforms
from torch import nn
from torch.nn import functional as F
import pydicom
from pydicom import dcmread
from tqdm import tqdm

startTime = datetime.datetime.now()

# Codes from this cell are adopted from Quadcore/Richard Epstein public notebook
# This notebook loads GDCM without Internet access.
# GDCM is needed to read some DICOM compressed images.
# Once you run a notebook and get the GDCM error, you must restart that Kernel to read the files, even if you load the GDCM software.
# Note that you do not "import GDCM". You just "import pydicom".
# The Dataset (gdcm-conda-install) was provided by Ronaldo S.A. Batista. Definitely deserves an upvote!

!cp ../input/gdcm-conda-install/gdcm.tar .
!tar -xvzf gdcm.tar
!conda install --offline ./gdcm/gdcm-2.8.9-py37h71b2a6d_0.tar.bz2

print("GDCM installed.")

import pydicom

In [2]:
testDataDF = pd.read_csv('test.csv', dtype={'StudyInstanceUID':'string', 'SeriesInstanceUID':'string', 'SOPInstanceUID':'string'})
testDataDF = testDataDF.set_index('SOPInstanceUID')
testDataDF.head()

Unnamed: 0_level_0,StudyInstanceUID,SeriesInstanceUID
SOPInstanceUID,Unnamed: 1_level_1,Unnamed: 2_level_1
c3163725fcf6,df06fad17bc3,857e3d760445
d54a8daaf836,df06fad17bc3,857e3d760445
bdc531b699cd,df06fad17bc3,857e3d760445
9e6a68e27df0,df06fad17bc3,857e3d760445
25e3307ba7da,df06fad17bc3,857e3d760445


In [3]:
listOfStudyID = testDataDF['StudyInstanceUID'].unique()
print(len(listOfStudyID))

650


In [4]:
# Helper functions

def window(img, WL=50, WW=350):
    upper, lower = WL+WW//2, WL-WW//2
    X = np.clip(img.copy(), lower, upper)
    X = X - np.min(X)
    X = X / np.max(X)
    X = (X*255.0).astype('uint8')
    return X

data_transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ])

# Helper functions for inference
def dcmDataToImage(dcmData):
    image = dcmData.pixel_array * int(dcmData.RescaleSlope) + int(dcmData.RescaleIntercept)
    image = np.stack([window(image, WL=-600, WW=1500),
                    window(image, WL=40, WW=400),
                    window(image, WL=100, WW=700)], 2)
    #image = image.astype(np.float32)
    return image

def chunks(lst, n):
    """Yield successive n-sized chunks from lst."""
    for i in range(0, len(lst), n):
        yield lst[i:i + n]

def sortByImgPosHelper(dcmDataDict):
    return dcmDataDict['img_pos']

In [5]:
INPUT_SIZE = 64
HIDDEN_SIZE = 32
NUM_LAYERS = 1
NUM_CLASSES = 1

class BiGRU(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(BiGRU, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.GRU = nn.GRU(
            input_size, hidden_size, num_layers, batch_first=True, bidirectional=True
        )
        self.linear1 = nn.Linear(hidden_size*2, hidden_size)
        self.linear2 = nn.Linear(hidden_size, num_classes)
        self.linear3 = nn.Linear(hidden_size*2, hidden_size)
        self.linear4 = nn.Linear(hidden_size, 9)

    def forward(self, x):
        imageLevelOutputs = []
        h0 = torch.zeros(self.num_layers * 2, x.size(0), self.hidden_size).cuda()
        #c0 = torch.zeros(self.num_layers * 2, x.size(0), self.hidden_size).cuda()

        out, h_n = self.GRU(x, h0)
        
        for i, out_t in enumerate(out.chunk(out.size(1), dim=1)):
            out_t = out_t.squeeze(1)
            out_t = F.relu(self.linear1(out_t))
            out_t = self.linear2(out_t)
            imageLevelOutputs += [out_t]
        imageLevelOutputs = torch.stack(imageLevelOutputs, 1).squeeze(2)
        
        h_n = h_n.view(1,-1)
        studyLevelOutputs = F.relu(self.linear3(h_n))
        studyLevelOutputs = self.linear4(studyLevelOutputs)
        
        return (imageLevelOutputs, studyLevelOutputs)

In [6]:
CNNmodel_Path = 'models/CNNmodel/CNNmodel_01_epoch1_CV4_20201008_2252_embedder.pth' 
CNNembedderModel = torch.load(CNNmodel_Path) 
CNNembedderModel.eval();

RNNmodel_Path = 'models/embedderModel/CNNmodel_01_epoch1_CV4_20201008_2252_sequence3_20201012_1953.pth'
RNNmodel = torch.load(RNNmodel_Path) 
RNNmodel.eval();

In [7]:
# Study level labels
listOfMetricLabels = ['_negative_exam_for_pe', '_indeterminate',
                        '_chronic_pe', '_acute_and_chronic_pe',
                        '_central_pe', '_leftsided_pe', '_rightsided_pe',
                        '_rv_lv_ratio_gte_1', '_rv_lv_ratio_lt_1']

defaultScore = {'_pe_present_on_image': 0.053915069524414806,
                 '_negative_exam_for_pe': 0.6763928618101033,
                 '_rv_lv_ratio_gte_1': 0.12875001256566257,
                 '_rv_lv_ratio_lt_1': 0.17437230326919448,
                 '_leftsided_pe': 0.21089872969528548,
                 '_chronic_pe': 0.040139752506710064,
                 '_rightsided_pe': 0.2575653665766779,
                 '_acute_and_chronic_pe': 0.019458347341720122,
                 '_central_pe': 0.054468517151291695,
                 '_indeterminate': 0.020484822355039723}

In [8]:
f = open('submissionNew02.csv', 'w')
f.write('id,label\n')

pred = 0.5

with torch.no_grad():

    for eachStudyID in tqdm(listOfStudyID):
        
        thisStudyDF = testDataDF[testDataDF['StudyInstanceUID']==eachStudyID]
        listOfImageIDs = thisStudyDF.index
        
        try: 
            listOfDcm_dataDict = []
            for eachImageID in listOfImageIDs:
                eachImagePath = 'data/test/'+testDataDF.loc[eachImageID, 'StudyInstanceUID']+'/'+testDataDF.loc[eachImageID, 'SeriesInstanceUID']+'/'+eachImageID+'.dcm'
                dcm_data = dcmread(eachImagePath)
                img_pos = dcm_data[0x20, 0x32].value[2]
                listOfDcm_dataDict.append({'imageID':eachImageID, 'dcm_data':dcm_data, 'img_pos':img_pos})

            listOfDcm_dataDict.sort(key=sortByImgPosHelper, reverse=True)

            tensorChunkIterator = chunks(listOfDcm_dataDict,36)

            embeddingList = []
            for eachChunk in tensorChunkIterator:
                images = [dcmDataToImage(eachImageID['dcm_data']) for eachImageID in eachChunk]
                images = [eachImage.astype(np.float32) for eachImage in images]
                listOfTensors = [data_transform(eachImage) for eachImage in images]
                stackedImagesTensor = torch.stack(listOfTensors, dim=0, out=None)
                stackedImagesTensor = stackedImagesTensor.cuda()
                embedding = CNNembedderModel(stackedImagesTensor)
                embeddingList.append(embedding)

            embeddingVol = torch.cat(embeddingList, dim=0)
            embeddingVol = embeddingVol.unsqueeze(0)

            imageLevelLabels, studyLevelLabels = RNNmodel(embeddingVol)
            imageLevelLabels = torch.sigmoid(imageLevelLabels).squeeze(0).cpu().detach().numpy()
            studyLevelLabels = torch.sigmoid(studyLevelLabels).squeeze(0).cpu().detach().numpy()

            # imageLevelLabels
            for eachIndex in range(len(listOfDcm_dataDict)):
                f.write(listOfDcm_dataDict[eachIndex]['imageID']+','+str(imageLevelLabels[eachIndex])+'\n')

            # studyLevelLavels
            for eachIndex, eachMetric in enumerate(listOfMetricLabels):
                f.write(eachStudyID+eachMetric+','+str(studyLevelLabels[eachIndex])+'\n')
        except:
            # imageLevelLabels
            for eachImageID in listOfImageIDs:
                f.write(eachImageID+','+str(defaultScore['_pe_present_on_image'])+'\n')

            # studyLevelLavels
            for eachIndex, eachMetric in enumerate(listOfMetricLabels):
                f.write(eachStudyID+eachMetric+','+str(defaultScore[eachMetric])+'\n')
            
f.close()

print('finish')

100%|██████████| 650/650 [32:20<00:00,  2.99s/it]

finish



