Installing required libraries:

In [157]:
%pip install boto3 nibabel numpy matplotlib scikit-image opencv-python

Collecting opencv-python
  Downloading opencv_python-4.10.0.84-cp37-abi3-win_amd64.whl.metadata (20 kB)
Downloading opencv_python-4.10.0.84-cp37-abi3-win_amd64.whl (38.8 MB)
   ---------------------------------------- 0.0/38.8 MB ? eta -:--:--
   ---------------------------------------- 0.3/38.8 MB ? eta -:--:--
   --- ------------------------------------ 3.1/38.8 MB 12.3 MB/s eta 0:00:03
   -------- ------------------------------- 8.7/38.8 MB 18.5 MB/s eta 0:00:02
   --------------- ------------------------ 14.7/38.8 MB 22.0 MB/s eta 0:00:02
   ---------------------- ----------------- 21.8/38.8 MB 24.6 MB/s eta 0:00:01
   ---------------------------- ----------- 28.0/38.8 MB 25.8 MB/s eta 0:00:01
   ----------------------------------- ---- 34.3/38.8 MB 26.3 MB/s eta 0:00:01
   ---------------------------------------- 38.8/38.8 MB 25.7 MB/s eta 0:00:00
Installing collected packages: opencv-python
Successfully installed opencv-python-4.10.0.84
Note: you may need to restart the kernel to

Importing those libraries:

In [2]:
# Importing nessecary libaries:

import boto3
import nibabel as nib
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import io
import tempfile
import os

1. Rendering Brain Slices
2. Data Preprocessing on the Brain Slices
3. Saving processed Brain Slices as '.png' Images to AWS S3 Buckets

In [3]:
# Setting up the data pipeline to access the 5 brains in the AWS S3 Bucket folder path:

s3 = boto3.resource('s3')
bucket_name = 'chemocraft-data'
folder_path = 'MICCAI_BraTS2020_TrainingData/'
bucket = s3.Bucket(bucket_name)

# Setting Crop values for Data Preprocessing:

crop_left, crop_right = 20, 10
crop_top, crop_bottom = 30, 30

def render_nii_from_s3(filename): # Function to display the middle slice of each brain scan type
    print(f"Fetching file: {filename}")

    obj = bucket.Object(folder_path + filename)
    file_stream = io.BytesIO(obj.get()['Body'].read())

    # Using temp files for efficent computing:

    with tempfile.NamedTemporaryFile(suffix='.nii', delete=False) as temp_file:  # Disable auto-delete
        temp_file.write(file_stream.getvalue())
        temp_file.flush()

        temp_file_path = temp_file.name
        print(f"Temporary file created: {temp_file_path}")

    try:
        img = nib.load(temp_file_path)
        data = img.get_fdata() # Storing brain into data variable

        print(f"Data shape for {filename}: {data.shape}") # Displaying shape of each brain

        if data.size == 0: # if data is nonexistent
            print(f"No data found in {filename}")
            return

        slice_idx = data.shape[2] // 2 # Getting index of middle slice to display it

        plt.figure(figsize=(3, 3)) # Displaying in 3x3 square
        plt.imshow(data[:, :, slice_idx], cmap='gray') # Color is set to grayscale
        plt.title(f'{filename} - Slice {slice_idx}') # Creating a title for the image
        plt.axis('off')  # Hide axes for cleaner display
        plt.show() # Finally showing the image

    except Exception as e:
        print(f"Error loading file {filename}: {e}") # Reports problems with getting the file
    finally:
        try:
            os.remove(temp_file_path)  # We do not want to save files locally, so we now delete the temp files
            print(f"Deleted temporary file: {temp_file_path}")
        except OSError as cleanup_error:
            print(f"Error deleting temp file: {cleanup_error}")

def save_png_from_nii(filename): # Function which will save .png grayscale brain slices to AWS S3 Buckets
    print(f"Fetching file: {filename}")
    obj = bucket.Object(folder_path + filename)
    file_stream = io.BytesIO(obj.get()['Body'].read())

    with tempfile.NamedTemporaryFile(suffix='.nii', delete=False) as temp_file:  # Disable auto-delete
        temp_file.write(file_stream.getvalue())
        temp_file.flush()

        temp_file_path = temp_file.name
        print(f"Temporary file created: {temp_file_path}")
    
        try:
            img = nib.load(temp_file_path)
            data = img.get_fdata()
            
            start_y = crop_top
            end_y = data.shape[0] - crop_bottom
            start_x = crop_left
            end_x = data.shape[1] - crop_right

            if data.size == 0:
                print(f"No data found in {filename}")
                return
            
            for slice_idx in range(data.shape[2]): # For each slice of each brain
                slice_2d = data[:, :, slice_idx]
                cropped_slice = slice_2d[start_y:end_y, start_x:end_x]

                filename = filename.removesuffix(".nii") # Removes the .nii part

                # Folder for each brain inside the Brain_Slices:

                brain_number = filename.split('_')[-2]
                scan_type = filename.split('_')[-1]

                slice_path = f"brain_slices/{brain_number}/{scan_type}"
                print(f"Saving file in directory: {slice_path}") 

                png_filename = f"{slice_path}/{slice_idx}.png" 
                
                with tempfile.NamedTemporaryFile(suffix= '.png', delete=False) as temp_png: # This part creates a temp png .file used to save the grayscale brain slice
                    mpimg.imsave(temp_png.name, cropped_slice, cmap='gray')
                    temp_png.flush()
                    temp_png.seek(0)
                    temp_png_name = temp_png.name
                try: 
                    s3.Bucket(bucket_name).upload_file(temp_png_name, f"tanmay/{png_filename}")
                    os.remove(temp_png_name)
                except Exception as e:
                    print(f"Error saving file: {png_filename}, {e}")
                    
        except Exception as e:
            print(f"Error saving file {filename}: {e}")

found_files = False

i=0 # Counter for the number of brains

for obj in bucket.objects.filter(Prefix=folder_path):
    if obj.key.endswith('.nii'):
        found_files = True
        print(obj.key)
        filename = obj.key.split('/')[-2] + '/' + obj.key.split('/')[-1]  # Get the filename 
        print("     ")
        # render_nii_from_s3(filename)
        save_png_from_nii(filename)
        i+=1

print(f"There are {i} brains. ('.nii' files)")
if not found_files:
    print(f"No .nii files found in the folder {folder_path}")

MICCAI_BraTS2020_TrainingData/BraTS20_Training_002/BraTS20_Training_002_flair.nii
     
Fetching file: BraTS20_Training_002/BraTS20_Training_002_flair.nii
Temporary file created: C:\Users\User\AppData\Local\Temp\tmpawfdkyl6.nii
Saving file in directory: brain_slices/002/flair
Saving file in directory: brain_slices/002/flair
Saving file in directory: brain_slices/002/flair
Saving file in directory: brain_slices/002/flair
Saving file in directory: brain_slices/002/flair
Saving file in directory: brain_slices/002/flair
Saving file in directory: brain_slices/002/flair
Saving file in directory: brain_slices/002/flair
Saving file in directory: brain_slices/002/flair
Saving file in directory: brain_slices/002/flair
Saving file in directory: brain_slices/002/flair
Saving file in directory: brain_slices/002/flair
Saving file in directory: brain_slices/002/flair
Saving file in directory: brain_slices/002/flair
Saving file in directory: brain_slices/002/flair
Saving file in directory: brain_slice

Defining the GAN Architecture
1. The Generator
2. The Discriminator
3. Connecting the Generator & Discriminator
4. Compiling the Model

The Generator:

In [4]:
# Generator's output shape must be the same shape as the cropped real image: (210, 180, 1)

dim = 100

from tensorflow.keras import layers, models

def build_generator(latent_dim, output_shape=(210, 180, 1)):
    model = models.Sequential(name="ChemoCraft Generator")
    print("Building Generator")

    model.add(layers.Input(shape=(latent_dim,)))
    model.add(layers.Dense(128, activation="relu"))
    model.add(layers.Dense(256 * 32, activation="relu"))
    model.add(layers.Reshape(target_shape=(16, 16, 32)))
    model.add(layers.Conv2DTranspose(filters=32, kernel_size=5, strides=6, padding="same", activation="relu"))
    model.add(layers.Conv2DTranspose(filters=8, kernel_size=3, strides=5, padding="same", activation="relu"))
    
    prev_out = model.layers[-1].output.shape

    model.add(layers.Conv2D(1, kernel_size=(prev_out[1]-output_shape[0]+1, prev_out[2]-output_shape[1]+1), strides=1, padding="valid", activation="tanh"))

    return model

chemocraft_generator = build_generator(latent_dim=dim)
chemocraft_generator.summary()

Building Generator


The Discriminator:

In [5]:
# Discriminator's input shape should be the same shape as the generator's output: (210, 180, 1)

shape = (210, 180, 1)

def build_discriminator(input_shape=(210, 180, 1)):

    model = models.Sequential(name="ChemoCraft Discriminator")
    print("Building Discriminator Model")

    model.add(layers.Input(shape=input_shape))
    model.add(layers.Conv2D(filters=16, kernel_size=9, strides=5, padding="same", activation="relu")) 
    model.add(layers.Conv2D(filters=32, kernel_size=5, strides=4, padding="same", activation="relu"))
    model.add(layers.Conv2D(filters=128, kernel_size=3, strides=3, padding="same", activation="relu"))
    model.add(layers.Flatten())
    model.add(layers.Dense(1, activation="sigmoid", name="output"))

    return model
   
chemocraft_discriminator = build_discriminator(shape)
chemocraft_discriminator.summary()

Building Discriminator Model


In [6]:
def compile_gan(generator, discriminator, latent_dim):
    discriminator.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])
    z = layers.Input(shape=(latent_dim,))
    img = generator(z)
    discriminator.trainable = False
    validity = discriminator(img)
    gan = models.Model(z, validity)
    gan.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])
    return gan

chemocraft_gan = compile_gan(chemocraft_generator, chemocraft_discriminator,latent_dim=dim)
chemocraft_gan.summary()

Connecting Generator & Discriminator through the GAN:

In [38]:
import tensorflow as tf

def train_gan(generator, latent_dim, discriminator, gan, train_slices, epochs, batch_size):
    for epoch in range(epochs):
        for _ in range(len(train_slices) // batch_size):
            # Select random batch of real images
            idx = np.random.randint(0, len(train_slices), batch_size)
            real_slices = np.array([train_slices[i] for i in idx])

            # Generate fake images
            noise = np.random.normal(0, 1, (batch_size, latent_dim))
            fake_slices = generator.predict(noise)

            # Train the discriminator
            d_loss_real = discriminator.train_on_batch(real_slices, np.ones((batch_size, 1)))
            d_loss_fake = discriminator.train_on_batch(fake_slices, np.zeros((batch_size, 1)))
            d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)

            # Train the generator
            noise = np.random.normal(0, 1, (batch_size, latent_dim))
            g_loss = gan.train_on_batch(noise, np.ones((batch_size, 1)))

        print(f"Epoch {epoch + 1}/{epochs}, D Loss: {d_loss[0]}, G Loss: {g_loss[0]}")

Setting up Data Pipeline for Training:

In [None]:
from keras.preprocessing.image import img_to_array, load_img

s3 = boto3.resource('s3')
bucket_name = 'chemocraft-data'
folder_path = 'tanmay/brain_slices/'
test_path = f'{folder_path}320/flair/'
bucket = s3.Bucket(bucket_name)

images = []

def load_images(bucket_name, folder_prefix):
    print(f"Loading images from S3 Bucket: {bucket_name}/{folder_prefix}")
    images = []

    for obj in bucket.objects.filter(Prefix=folder_prefix):
        if obj.key.endswith('.png'):
            try:
                file_stream = io.BytesIO(obj.get()['Body'].read())
                image = load_img(file_stream, target_size=(210, 180), color_mode='grayscale')
                image = np.array(image) / 255.0  # Normalize to [0, 1]
                images.append(image)
            
            except Exception as e:
                print(f"Error loading image {obj.key}: {e}")

    return np.array(images)

image_array = load_images(bucket_name=bucket, folder_prefix=folder_path)

print(image_array.shape)

Loading images from S3 Bucket: s3.Bucket(name='chemocraft-data')/tanmay/brain_slices/320/flair/
(155, 210, 180)
