In [1]:
%%capture
!pip3 install tensorflow
!pip3 install nibabel
!pip3 install scipy
!pip3 install -r requirements.txt

In [2]:
import os
import time
import zipfile
import numpy as np
import nibabel as nib
from tensorflow import keras
from scipy import ndimage

In [3]:
# Download url of normal CT scans and abnormal scans.

# url = "https://github.com/hasibzunair/3D-image-classification-tutorial/releases/download/v0.2/CT-0.zip"
# filename = os.path.join(os.getcwd(), "CT-0.zip")
# keras.utils.get_file(filename, url)

# url = "https://github.com/hasibzunair/3D-image-classification-tutorial/releases/download/v0.2/CT-23.zip"
# filename = os.path.join(os.getcwd(), "CT-23.zip")
# keras.utils.get_file(filename, url)

if not os.path.exists("./CTData"):
    os.makedirs("CTData")

    with zipfile.ZipFile("CT-0.zip", "r") as z_fp:
        z_fp.extractall("./CTData/")

    with zipfile.ZipFile("CT-23.zip", "r") as z_fp:
        z_fp.extractall("./CTData/")

In [4]:
def read_nifti_file(filepath):
    # Read and load volume
    scan = nib.load(filepath)
    scan = scan.get_fdata()
    return scan

def normalize(volume):
    # Normalize the volume
    min = -1000
    max = 400
    volume[volume < min] = min
    volume[volume > max] = max
    volume = (volume - min) / (max - min)
    volume = volume.astype("float32")
    return volume

def resize_volume(img):
    # Resize across z-axis
    # Set the desired depth
    desired_depth = 64
    desired_width = 128
    desired_height = 128
    # Get current depth
    current_depth = img.shape[-1]
    current_width = img.shape[0]
    current_height = img.shape[1]
    # Compute depth factor
    depth = current_depth / desired_depth
    width = current_width / desired_width
    height = current_height / desired_height
    depth_factor = 1 / depth
    width_factor = 1 / width
    height_factor = 1 / height
    # Rotate
    img = ndimage.rotate(img, 90, reshape = False)
    # Resize across z-axis
    img = ndimage.zoom(img, (width_factor, height_factor, depth_factor), order = 1)
    return img


def process_scan(path):
    # Read and resize volume
    volume = read_nifti_file(path)
    volume = normalize(volume)
    volume = resize_volume(volume)
    return volume

In [5]:
normal_scan_paths = [
    os.path.join(os.getcwd(), "CTData/CT-0", x)
    for x in os.listdir("CTData/CT-0")
]
abnormal_scan_paths = [
    os.path.join(os.getcwd(), "CTData/CT-23", x)
    for x in os.listdir("CTData/CT-23")
]

print("CT scans with normal lung tissue: " + str(len(normal_scan_paths)))
print("CT scans with abnormal lung tissue: " + str(len(abnormal_scan_paths)))

CT scans with normal lung tissue: 100
CT scans with abnormal lung tissue: 100


In [6]:
processTime = time.time()

abnormal_scans = np.array([process_scan(path) for path in abnormal_scan_paths])
normal_scans = np.array([process_scan(path) for path in normal_scan_paths])

abnormal_labels = np.array([1 for _ in range(len(abnormal_scans))])
normal_labels = np.array([0 for _ in range(len(normal_scans))])

print("Pre-processing took {} seconds.".format(round(time.time() - processTime, 2)))

Pre-processing took 531.35 seconds.


In [7]:
# both dataset should have equal samples.
def split_datasets(split, dataset1, dataset2, label1, label2):
    permutation = np.random.permutation(range(len(label1)))
    split = int(len(label1) * split)
    dataset1, label1 = dataset1[permutation,:,:,:], label1[permutation]
    dataset2, label2 = dataset2[permutation,:,:,:], label2[permutation]
    x_train = np.concatenate((dataset1[:split], dataset2[:split]), axis=0)
    y_train = np.concatenate((label1[:split], label2[:split]), axis=0)
    x_val = np.concatenate((dataset1[split:], dataset2[split:]), axis=0)
    y_val = np.concatenate((label1[split:], label2[split:]), axis=0)
    return x_train, y_train, x_val, y_val

In [8]:
x_train, y_train, x_val, y_val = split_datasets(0.7, 
normal_scans, abnormal_scans, normal_labels, abnormal_labels)

In [9]:
from torch.utils.data import DataLoader

trainData = []
valData = []

for i in range(x_train.shape[0]):
  x = np.expand_dims(x_train[i].transpose(2, 0, 1), axis=0)
  trainData.append((x, y_train[i]))

for i in range(x_val.shape[0]):
  x = np.expand_dims(x_val[i].transpose(2, 0, 1), axis=0)
  valData.append((x, y_val[i]))

trainDataLoader = DataLoader(trainData, shuffle=True, batch_size=3)
valDataLoader = DataLoader(valData, batch_size=3)

trainSteps = len(trainDataLoader.dataset) // 3 + 1
valSteps = len(valDataLoader.dataset) // 3 + 1

In [10]:
from torch.optim import Adam
from torch import nn
import generate_model
import torch

In [11]:
def train_model(epoch, model, device, lossFn, optimizer, history):
	print("[INFO] training the network...")
	startTime = time.time()
	history["train_loss"] = []
	history["train_acc"] = []
	history["val_loss"] = []
	history["val_acc"] = []
	for e in range(0, epoch):
		model.train()
		totalTrainLoss = 0
		totalValLoss = 0
		trainCorrect = 0
		valCorrect = 0
		for (x, y) in trainDataLoader:
			(x, y) = (x.to(device), y.to(device))
			optimizer.zero_grad()
			pred = model(x)
			loss = lossFn(pred, y)
			loss.backward()
			optimizer.step()
			totalTrainLoss += loss
			trainCorrect += (pred.argmax(1) == y).type(torch.float).sum().item()

		with torch.no_grad():
			model.eval()
			for (x, y) in valDataLoader:
				(x, y) = (x.to(device), y.to(device))
				pred = model(x)
				totalValLoss += lossFn(pred, y)
				valCorrect += (pred.argmax(1) == y).type(torch.float).sum().item()

		avgTrainLoss = totalTrainLoss / trainSteps
		avgValLoss = totalValLoss / valSteps
		trainCorrect = trainCorrect / len(trainDataLoader.dataset)
		valCorrect = valCorrect / len(valDataLoader.dataset)
		history["train_loss"].append(avgTrainLoss.cpu().detach().numpy())
		history["train_acc"].append(trainCorrect)
		history["val_loss"].append(avgValLoss.cpu().detach().numpy())
		history["val_acc"].append(valCorrect)
		print("[INFO] EPOCH: {}/{}".format(e + 1, epoch))
		print("Train loss: {:.6f}, Train accuracy: {:.4f}".format(
			avgTrainLoss, trainCorrect))
		print("Val loss: {:.6f}, Val accuracy: {:.4f}\n".format(
			avgValLoss, valCorrect))
	print("[INFO] total time taken to train the model: {}s".format(
		round(time.time() - startTime, 2)))

In [18]:
model = generate_model.main('EfficientNet', 10, 2, 1, None)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
lossFn = nn.CrossEntropyLoss()
optimizer = Adam(model.parameters(), lr = 0.03)
history = {}

train_model(50, model, device, lossFn, optimizer, history)

[INFO] training the network...
[INFO] EPOCH: 1/50
Train loss: 4.860854, Train accuracy: 0.4929
Val loss: 1020349120512.000000, Val accuracy: 0.5000

[INFO] EPOCH: 2/50
Train loss: 3.561986, Train accuracy: 0.4857
Val loss: 4928930.500000, Val accuracy: 0.5000

[INFO] EPOCH: 3/50
Train loss: 3.105308, Train accuracy: 0.4857
Val loss: 184170545152.000000, Val accuracy: 0.5000

[INFO] EPOCH: 4/50
Train loss: 2.766868, Train accuracy: 0.4857
Val loss: 3890922389504.000000, Val accuracy: 0.5000

[INFO] EPOCH: 5/50
Train loss: 2.514093, Train accuracy: 0.4929
Val loss: 173850496.000000, Val accuracy: 0.5000

[INFO] EPOCH: 6/50
Train loss: 2.163941, Train accuracy: 0.5357
Val loss: 199002.031250, Val accuracy: 0.5000

[INFO] EPOCH: 7/50
Train loss: 1.917966, Train accuracy: 0.4857
Val loss: 36952.820312, Val accuracy: 0.5000

[INFO] EPOCH: 8/50
Train loss: 1.562223, Train accuracy: 0.5429
Val loss: 935.209717, Val accuracy: 0.5000

[INFO] EPOCH: 9/50
Train loss: 1.605515, Train accuracy: 0.48

In [13]:
# !jupyter nbconvert --to script ct.ipynb