In [1]:
# PyTorch Modules
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim
import torch.nn.functional as F

import torchvision
from torchvision import models
from torchvision import transforms
import torchvision.transforms as transforms
import torchvision.datasets as dsets

# Other non-PyTorch Modules
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from tqdm import tqdm
from matplotlib.pyplot import imshow
import matplotlib.pylab as plt
from PIL import Image
import time
from datetime import datetime
import pickle
import json
import random

In [2]:
torch.cuda.empty_cache()

In [3]:
now = datetime.now()
DATESTRING = now.strftime("%Y%m%d_%H%M")
print(DATESTRING)

20201004_0132


In [4]:
dataDF = pd.read_csv('data_fold.csv')
dataDF = dataDF.set_index('SOPInstanceUID')

In [5]:
dataDF.head()

Unnamed: 0_level_0,StudyInstanceUID,SeriesInstanceUID,pe_present_on_image,negative_exam_for_pe,qa_motion,qa_contrast,flow_artifact,rv_lv_ratio_gte_1,rv_lv_ratio_lt_1,leftsided_pe,...,ma,exposure,img_pos,conv_kernel,patient_position,pixel_spacing,bits_stored,high_bit,img_count,fold
SOPInstanceUID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
c0f3cb036d06,6897fa9de148,2bfbb7fd2e8b,0,0,0,0,0,0,1,1,...,842,108,-234.5,B30f,HFS,"[0.5234375, 0.5234375]",12,11,124,3
f57ffd3883b6,6897fa9de148,2bfbb7fd2e8b,0,0,0,0,0,0,1,1,...,966,124,-252.5,B30f,HFS,"[0.5234375, 0.5234375]",12,11,124,3
41220fda34a3,6897fa9de148,2bfbb7fd2e8b,0,0,0,0,0,0,1,1,...,596,76,-432.5,B30f,HFS,"[0.5234375, 0.5234375]",12,11,124,3
13b685b4b14f,6897fa9de148,2bfbb7fd2e8b,0,0,0,0,0,0,1,1,...,599,76,-434.5,B30f,HFS,"[0.5234375, 0.5234375]",12,11,124,3
be0b7524ffb4,6897fa9de148,2bfbb7fd2e8b,0,0,0,0,0,0,1,1,...,603,77,-436.5,B30f,HFS,"[0.5234375, 0.5234375]",12,11,124,3


In [6]:
trainDF = dataDF[dataDF['fold']!=4]
valDF = dataDF[dataDF['fold']==4]

In [7]:
embeddingDirPath = 'data/embeddings/expt11/'

In [8]:
class embeddingsDataset(Dataset):
    """create sample dataset to work with"""

    def __init__(self, dataDF = None, listOfStudies = None):
        self.dataDF = dataDF
        self.listOfStudies = listOfStudies

    def __len__(self):
        return len(self.listOfStudies)

    def __getitem__(self, idx):
        embedDict = pickle.load(open(embeddingDirPath+self.listOfStudies[idx]+'.p', 'rb'))
        embeddingVolume = np.array(embedDict['embeddings'])
        listOfImages = embedDict['ids']
        labels = [self.dataDF.loc[eachImageID, 'pe_present_on_image']for eachImageID in listOfImages]
        labels = np.array(labels).astype(np.float32)
        return embeddingVolume, labels

In [9]:
trainEmbeddingsDataset = embeddingsDataset(dataDF=dataDF, listOfStudies=trainDF['StudyInstanceUID'].unique())
trainEmbeddingsDataloader = DataLoader(trainEmbeddingsDataset, batch_size=1, shuffle=True, num_workers=1)

valEmbeddingsDataset = embeddingsDataset(dataDF=dataDF, listOfStudies=valDF['StudyInstanceUID'].unique())
valEmbeddingsDataloader = DataLoader(valEmbeddingsDataset, batch_size=1, shuffle=True, num_workers=1)

In [10]:
INPUT_SIZE = 64
HIDDEN_SIZE = 32
NUM_LAYERS = 1
NUM_CLASSES = 1

class BiGRU(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(BiGRU, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.GRU = nn.GRU(
            input_size, hidden_size, num_layers, batch_first=True, bidirectional=True
        )
        self.linear1 = nn.Linear(hidden_size*2, hidden_size)
        self.linear2 = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        outputs = []
        h0 = torch.zeros(self.num_layers * 2, x.size(0), self.hidden_size).cuda()
        #c0 = torch.zeros(self.num_layers * 2, x.size(0), self.hidden_size).cuda()

        out, _ = self.GRU(x, h0)
        
        for i, out_t in enumerate(out.chunk(out.size(1), dim=1)):
            out_t = out_t.squeeze(1)
            out_t = F.relu(self.linear1(out_t))
            out_t = self.linear2(out_t)
            outputs += [out_t]
        outputs = torch.stack(outputs, 1).squeeze(2)
        return outputs

In [None]:
INPUT_SIZE = 64
HIDDEN_SIZE = 32
NUM_LAYERS = 1
NUM_CLASSES = 1

class BRNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(BRNN, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(
            input_size, hidden_size, num_layers, batch_first=True, bidirectional=True
        )
        self.linear1 = nn.Linear(hidden_size*2, hidden_size)
        self.linear2 = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        outputs = []
        h0 = torch.zeros(self.num_layers * 2, x.size(0), self.hidden_size).cuda()
        c0 = torch.zeros(self.num_layers * 2, x.size(0), self.hidden_size).cuda()

        out, _ = self.lstm(x, (h0, c0))
        
        for i, out_t in enumerate(out.chunk(out.size(1), dim=1)):
            out_t = out_t.squeeze(1)
            out_t = F.relu(self.linear1(out_t))
            out_t = self.linear2(out_t)
            outputs += [out_t]
        outputs = torch.stack(outputs, 1).squeeze(2)
        print(outputs.size())
        return outputs

In [None]:
class Sequence(nn.Module):
    def __init__(self):
        super(Sequence, self).__init__()
        self.gru1 = nn.GRUCell(64, 32)
        self.linear1 = nn.Linear(32, 16)
        self.linear2 = nn.Linear(16, 1)

    def forward(self, input):
        outputs = []
        h_t = torch.zeros(input.size(0), 32, dtype=torch.float).cuda()
        for i, input_t in enumerate(input.chunk(input.size(1), dim=1)):
            input_t = input_t.squeeze(1)
            h_t = self.gru1(input_t, h_t)
            output = F.relu(self.linear1(h_t))
            output = self.linear2(output)
            outputs += [output]
        outputs = torch.stack(outputs, 1).squeeze(2)
        return outputs

In [11]:
seq = BiGRU(INPUT_SIZE, HIDDEN_SIZE, NUM_LAYERS, NUM_CLASSES).cuda()

In [12]:
criterion = nn.BCEWithLogitsLoss(pos_weight=torch.tensor(3.0))

optimizer = optim.Adam(seq.parameters(), lr=3e-4)

In [13]:
def train_loop(model, train_loader):
    train_total = train_correct = train_cost = 0
    seq.train()
    for x,y in tqdm(train_loader):
        x=x.cuda()
        y=y.cuda()
        optimizer.zero_grad()
        o = seq(x)
        train_total += y.size(1)
        train_correct += ((torch.sigmoid(o[0,:])>0.5) == (y[0,:]>0.5)).sum().item()
        loss = criterion(o, y)
        loss.backward()
        optimizer.step()
        train_cost += loss.item()
    return train_cost/train_total, train_correct/train_total

def valid_loop(model, valid_loader):
    # Evaluate on validation  data 
    val_total = val_correct = val_cost = 0
    model.eval()
    with torch.no_grad():
        for x_val, y_val in tqdm(valid_loader):
            x_val = x_val.cuda()
            y_val = y_val.cuda()
            o = seq(x_val)
            val_total += y_val.size(1)
            val_correct += ((torch.sigmoid(o[0,:])>0.5) == (y_val[0,:]>0.5)).sum().item()
            loss = criterion(o, y_val)
            val_cost += loss.item()
    return val_cost/val_total, val_correct/val_total

def main_loop(n_epochs):
    for epoch in range(n_epochs):
        print('epoch ' + str(epoch) + ':')
        train_avgCost, train_acc = train_loop(seq, trainEmbeddingsDataloader)
        val_avgCost, val_acc = valid_loop(seq, valEmbeddingsDataloader)
        print('train_cost: %.4f, train_acc: %.4f, val_cost: %.4f, val_acc: %.4f'\
              % (train_avgCost, train_acc, val_avgCost, val_acc))
        modelPath = 'models/embedderModel/ver01_epoch' + str(epoch) + '_' + DATESTRING +'.pth'
        print('saving: ',modelPath)
        torch.save(seq, modelPath)

In [14]:
main_loop(1)

  0%|          | 0/5824 [00:00<?, ?it/s]

epoch 0:


100%|██████████| 5824/5824 [05:55<00:00, 16.38it/s]
100%|██████████| 1455/1455 [00:35<00:00, 41.01it/s]

train_cost: 0.0004, train_acc: 0.9822, val_cost: 0.0009, val_acc: 0.9707
saving:  models/embedderModel/ver01_epoch0_20201004_0132.pth





In [1]:
iterVal = iter(valEmbeddingsDataloader)
# Sanity Check
seq.eval()
with torch.no_grad():
    x,y = next(iterVal)
    x=x.cuda()
    y=y.cuda()
    o = seq(x)
    pred = torch.sigmoid(o)
    for eachIndex in range(pred.size(1)):
        print((pred[0,eachIndex]).type(torch.float).item(), y[0, eachIndex].item())

NameError: name 'valEmbeddingsDataloader' is not defined