## Setup

In [8]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [2]:
import numpy as np
import pandas as pd
from matplotlib import pyplot
import matplotlib.pyplot as plt
import random

import pydicom
from PIL import Image

import glob
from pathlib import Path

import torch
import torch.optim as optim
from torchvision import transforms, utils
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.nn.functional as F

from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_curve
from sklearn.metrics import roc_auc_score
from skimage.segmentation import mark_boundaries

from lime import lime_image

import gc

In [3]:
torch.set_num_threads(80)

In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [5]:
torch.cuda.get_device_name(1)

'TITAN RTX'

## Data preparation


In [6]:
pos_labels=["Pulmonary tumour","Pulmonary metastasis"]

In [7]:
verdict_df=pd.read_csv('/mnt/idms/PROJECTS/Lung/Verdicts.csv')
statistics_df=pd.read_csv('/mnt/idms/PROJECTS/Lung/Statistics.csv')
verdict_df.fillna(0,inplace=True)
statistics_df.fillna(0,inplace=True)

FileNotFoundError: [Errno 2] No such file or directory: '/mnt/idms/PROJECTS/Lung/Verdicts.csv'

In [None]:
total_cases=len(verdict_df)
pos_cases=int(statistics_df.iloc[-1][pos_labels].sum())
neg_cases=total_cases-pos_cases
used_cases=50
prob_keep_pos=used_cases/(2*pos_cases)
prob_keep_neg=used_cases/(2*neg_cases)

In [None]:
cnt=0
cts=[]
labels=[]
for patient_path in glob.iglob(f'/mnt/idms/PROJECTS/Lung/Tudo-Ulyssys-Unzipped/*'):
    diseases=verdict_df.loc[verdict_df['PatientID'] == patient_path].iloc[0]
    pos_diseases=np.array(diseases[pos_labels])
    pos_or_neg=np.any(pos_diseases==1.0)
    p_keep=prob_keep_pos if pos_or_neg else prob_keep_neg
    label=1.0 if pos_or_neg else 0.0
    cases = list(glob.glob(f'{patient_path}/*.npz'))
    if cases:
        case_path=random.choice(cases)
        #for case_path in glob.iglob(f'{patient_path}/*.npz'):
        cnt+=1
        print(cnt)
        if(pos_or_neg):
            print("Positive found")
        if np.random.choice([True, False],p=[p_keep,1-p_keep]):
            print(f"Case number {cnt} with name {case_path} is included")
            npz=np.load(case_path)
            ct=npz[npz.files[0]]
            cts.append(ct)
            labels.append(label)

In [None]:
labels=np.array(labels)

In [None]:
max_HU=4095.0
min_HU=0.0
downsample_size=384
cropped_size=384
middle_slice_num=300
used_slices=30

In [None]:
#slices=[Image.fromarray((s.pixel_array/max_HU)*255.0).convert("L").resize((downsample_size,downsample_size),resample=Image.LANCZOS) for s in slices]

In [None]:
def center_crop(im, new_width=None, new_height=None):        
    width, height = im.size   # Get dimensions
    #print(im.size)
    left = (width - new_width)/2
    top = (height - new_height)/2
    right = (width + new_width)/2
    bottom = (height + new_height)/2

    # Crop the center of the image
    return im.crop((left, top, right, bottom))

In [None]:
X_train, X_test, y_train, y_test = train_test_split(cts, labels, test_size=0.3, stratify=labels)

In [None]:
class CTLungSlice(Dataset):
    
    def __init__(self, X_data, y_data, transform=None):
        self.X_data = []
        self.y_data = []
        self.len=0
        for ct,lab in zip(X_data,y_data):
            slice_num=ct.shape[2]
            if slice_num>used_slices:
                mid=slice_num//2
                used_part=np.moveaxis(ct[:,:,(mid-middle_slice_num//2):(mid+middle_slice_num//2)],2,0)
                used_part=np.random.choice(used_part,used_slices,replace=False)
                self.X_data.append(used_part)
                self.y_data+=([lab]*used_slices)
                self.len+=used_slices
        self.X_data=np.concatenate(self.X_data).astype(np.uint8)
        self.y_data=np.array(self.y_data)
        self.transform = transform
            
        
    def __getitem__(self, index):
        x=self.X_data[index]
        y=self.y_data[index]
        if self.transform:
            x = self.transform(Image.fromarray(x*255/max_HU))
        return x, y
    
    def __len__ (self):
        return self.len

In [None]:
class CTLungWhole(Dataset):
    
    def __init__(self, X_data, y_data, transform=None):
        self.y_data = []
        self.X_data = []
        self.len=0
        for ct,lab in zip(X_data,y_data):
            slice_num=ct.shape[2]
            if slice_num>used_slices:
                mid=slice_num//2
                used_part=np.moveaxis(ct[:,:,(mid-middle_slice_num//2):(mid+middle_slice_num//2)],2,0)
                used_part=np.random.choice(used_part,used_slices,replace=False)
                self.X_data.append(used_part)
                self.y_data.append(lab)
                self.len+=1
        self.X_data=np.asarray(self.X_data).astype(np.uint8)
        self.y_data = np.array(y_data)
        self.transform = transform
        
    def __getitem__(self, index):
        y=self.y_data[index]
        x=[self.transform(Image.fromarray(item*255/max_HU)) for item in self.X_data[index]] if self.transform else self.X_data[index]
        return x, y
    
    def __len__ (self):
        return len(self.y_data)

In [None]:
batch_size=16

In [None]:
train_transform = transforms.Compose([
    transforms.RandomCrop(cropped_size),
    #transforms.ColorJitter(
    #    brightness=[0,2],
    #    contrast=[0,2]),
    transforms.ToTensor(),
    #transforms.Normalize(0,1)
])

test_transform = transforms.Compose([
    transforms.CenterCrop(cropped_size),
    transforms.ToTensor(),
    #transforms.Normalize(0,1)
])
    
train_dataset = CTLungSlice(X_train, torch.FloatTensor(y_train), transform=train_transform)
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
    
test_dataset = CTLungSlice(X_test,torch.FloatTensor(y_test), transform=test_transform)
test_loader = DataLoader(dataset=test_dataset, batch_size=1, shuffle=False)

whole_test_dataset = CTLungWhole(X_test,torch.FloatTensor(y_test), transform=test_transform)
whole_test_loader = DataLoader(dataset=whole_test_dataset, batch_size=1, shuffle=False)

In [None]:
sample_img,_=next(iter(train_loader))
sample_img.shape

## CNN, train, validating

In [None]:
class CNN(nn.Module):
    def __init__(self, num_classes=2):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 64, kernel_size=5)
        self.conv2 = nn.Conv2d(64, 64, kernel_size=5)
        self.conv2_drop = nn.Dropout2d()
        self.elu = nn.ELU()
        size_before_fc=((cropped_size-4)//2-4)//2
        self.fc1 = nn.Linear(size_before_fc*size_before_fc*64, num_classes)
        self.max_pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
    def forward(self, x):
        x = self.elu(self.max_pool(self.conv1(x)))
        x = self.elu(self.max_pool(self.conv2_drop(self.conv2(x))))
        x = x.view(x.size(0), -1)
        x = self.fc1(x)
        return x

In [None]:
def evaluate(pred, lab):
    binary_pred = pred>0
    binary_lab = lab>0.5
    common = binary_pred==binary_lab
    return torch.sum(common).item()

In [None]:
log_freq=1

In [None]:
def base_train_step(model,device,optimizer,criterion,train_loader):
    
    for batch_idx, (images, labels) in enumerate(train_loader):
        images = images.to(device)
        labels = labels.to(device)
        images = images.view(-1, 1, cropped_size, cropped_size)
        labels = labels.view(-1, 1)
        outputs = model(images)
        loss = criterion(outputs, labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        if batch_idx % log_freq == 0:
            batch_performance=evaluate(outputs, labels)
            print(f"Batch {batch_idx+1} / {len(train_loader)} | Loss = {loss},  Correct = {batch_performance} / {len(labels)}, Ill = {(labels==1).sum().item()}")

In [None]:
def base_model_eval(model,device,test_loader):
    total = 0
    correct = 0
    zz = 0
    with torch.no_grad():
        for batch_idx, (images, labels) in enumerate(test_loader):
            images = images.to(device)
            labels = labels.to(device)
            images = images.view(-1, 1, cropped_size, cropped_size)
            labels = labels.view(-1, 1)
            outputs = model(images)
            total += labels.size(0)
            correct += evaluate(outputs, labels)
            if batch_idx % log_freq == 0:
                batch_performance=evaluate(outputs, labels)
                print(f"Batch {batch_idx+1} / {len(test_loader)} |  Correct = {batch_performance} / {len(labels)}, Ill = {(labels==1).sum().item()}")
    return correct,total

In [None]:
torch.cuda.empty_cache()
del model_base
gc.collect()

In [None]:
epochs = 10
learning_rate = 0.001
act_max=0.0


model_base = CNN(num_classes=1).to(device)
criterion_base = nn.BCEWithLogitsLoss() 
optimizer_base = optim.SGD(model_base.parameters(), lr=learning_rate)

params=[]

for epoch in range(epochs):
    base_train_step(model_base,device,optimizer_base,criterion_base,train_loader)
    correct, total = base_model_eval(model_base,device,test_loader)
    #correct_tr, total_tr = base_model_eval(model_base,device,train_loader)
    print(epoch,"acc:",correct/total," raw: ",correct,total,)#"acc_tr:",correct_tr/total_tr," raw_tr: ",correct_tr,total_tr)
    if correct/total>act_max:
        print("IMPROVED")
        act_max = correct/total
    params.append(model_base.parameters())

In [None]:
torch.save(model_base.state_dict(), "./Lung_model_1")

In [None]:
loaded_model = CNN(num_classes=1)
loaded_model.load_state_dict(torch.load("./PytorchModels/Lung_model_1"))
loaded_model.eval()

In [None]:
loaded_model.to(device)(transforms.ToTensor()(X_train[2][10]).view(1,1,256,256).to(device))

# AUC and ROC Curve

In [None]:
def base_model_eval_pred(model,device,test_loader):
    pred = []
    with torch.no_grad():
        for images, _ in test_loader:
            images = images.to(device)
            images = images.view(-1, 1, cropped_size, cropped_size)
            outputs = model(images).cpu().numpy()
            pred.append(outputs[0][0])
    return np.asarray(pred)

In [None]:
pred = base_model_eval_pred(model_base,device,test_loader)
target_bool = [y for _,y in test_loader]
print(pred)
auc = roc_auc_score(target_bool, pred)
print('AUC=%.3f' % (auc))

fpr, tpr, _ = roc_curve(target_bool, pred)

pyplot.plot(fpr, tpr, marker='.', label='LungMetastasis')

pyplot.xlabel('False Positive Rate')
pyplot.ylabel('True Positive Rate')
pyplot.legend()
pyplot.show()

### Lime

In [None]:
def batch_predict(images):
    model_base.eval()
    batch=[]
    for i in range(images.shape[0]):
        batch.append(transforms.ToTensor()(images[i,:,:,0]))
    batch=torch.stack(batch)
    #model_base.to(device)
    loaded_model.to(device)
    batch = batch.to(device)
    #logits = model_base(batch)
    logits=loaded_model(batch)
    #print(logits>0)
    probs = torch.sigmoid(logits)
    #print(probs)
    return logits.detach().cpu().numpy()

In [None]:
img=transforms.CenterCrop(cropped_size)(Image.fromarray(cts[1][:,:,310]*255/max_HU).convert("L"))
shape=list(img.size)+[3]
shape=tuple(shape)
imgg=np.zeros(shape)
for i in range(3):
    imgg[...,i]=img

In [None]:
explainer = lime_image.LimeImageExplainer()
explanation = explainer.explain_instance(np.array(img), 
                                         batch_predict, # classification function
                                         top_labels=1, 
                                         hide_color=0, 
                                         num_samples=1000) # number of images that will be sent to classification function

In [None]:
np.array(sample_img).shape

In [None]:
np.unique(mask)

In [None]:
Image.fromarray(temp)

In [None]:
temp, mask = explanation.get_image_and_mask(explanation.top_labels[0], positive_only=True, num_features=200, hide_rest=False)
img_boundry1 = mark_boundaries(temp/255.0, mask)
plt.imshow(img_boundry1)

In [None]:
temp, mask = explanation.get_image_and_mask(explanation.top_labels[0], positive_only=False, num_features=2, hide_rest=False)
img_boundry2 = mark_boundaries(temp/255.0, mask)
plt.imshow(img_boundry2)

# Evaluating on CTs

Itt összesítem CT-kre a validáción:

In [None]:
def base_model_eval_whole(model, device, test_loader):
    preds = []
    correct = []
    with torch.no_grad():
        for images, label in test_loader:
            slice_preds=[]
            for image in images:
                image = image.to(device)
                image = image.view(-1, 1, cropped_size, cropped_size)
                outputs = model(image).cpu().numpy()
                slice_preds.append(outputs[0][0])
            correct.append(label)
            preds.append(np.asarray(slice_preds))
    return preds, np.asarray(correct)

In [None]:
preds, target_bool = base_model_eval_whole(model_base, device, whole_test_loader)

max_preds= [np.max(slices) for slices in preds]
mean_preds= [np.mean(slices) for slices in preds]

In [None]:
target_bool

In [None]:
auc_max = roc_auc_score(target_bool, max_preds)
print('AUC of Max method=%.3f' % (auc_max))

print(target_bool, max_preds)
fpr, tpr, _ = roc_curve(target_bool, max_preds)

pyplot.plot(fpr, tpr, marker='.', label='LungMetastasis')

pyplot.xlabel('False Positive Rate')
pyplot.ylabel('True Positive Rate')
pyplot.title('ROC of Max method')
pyplot.legend()
pyplot.show()

In [None]:
auc_mean = roc_auc_score(target_bool, mean_preds)
print('AUC of Mean method=%.3f' % (auc_mean))

print(target_bool, mean_preds)
fpr, tpr, _ = roc_curve(target_bool, mean_preds)

pyplot.plot(fpr, tpr, marker='.', label='LungMetastasis')

pyplot.xlabel('False Positive Rate')
pyplot.ylabel('True Positive Rate')
pyplot.title('ROC of Mean method')
pyplot.legend()
pyplot.show()