In [None]:
!pip install boto3 numpy nibabel matplotlib tensorflow s3fs

In [None]:
import boto3
import nibabel as nib
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import io
import os
import tempfile
import asyncio

s3 = boto3.resource('s3')
bucket_name = 'chemocraft-data'
folder_path = 'MICCAI_BraTS2020_TrainingData/'
png_path = 'Theo_PNG/'
bucket = s3.Bucket(bucket_name)

crop_left, crop_right = 20, 10
crop_top, crop_bottom = 30, 30

async def upload_processed_nii(obj):

    filename = obj.key.split('/')[-1]
    print(obj.key)
    file_stream = io.BytesIO(obj.get()['Body'].read())
    
    with tempfile.NamedTemporaryFile(suffix='.nii', delete=True) as temp_file:
        temp_file.write(file_stream.getvalue())
        temp_file.flush()  

        try:
            img = nib.load(temp_file.name)
            data = img.get_fdata()

            start_y = crop_top
            end_y = data.shape[0] - crop_bottom
            start_x = crop_left
            end_x = data.shape[1] - crop_right
            
            if data.size == 0:
                print(f"No data found in {filename}")
                return

            async def up_slice(slice_idx):
                slice_2d = data[:, :, slice_idx]
                cropped_slice = slice_2d[start_y:end_y, start_x:end_x]
                
                buf = io.BytesIO()
                mpimg.imsave(buf, cropped_slice, cmap='gray', format='png')
                buf.seek(0)

                try:
                
                    s3.Object(
                        bucket_name, 
                        f'{png_path}{filename.split('.')[0]}/{slice_idx}.png'
                    ).put(
                        Body=buf,
                        ContentType='image/png'
                    )
                
                except Exception as e:
                    print(f"Error uploading file {filename}: {e}")

            for i in range(data.shape[2]):
                await up_slice(i)

            
        except Exception as e:
            print(f"Error loading file {filename}: {e}")

found_files = False
processed = set([brain.key.split('/')[-2] for brain in bucket.objects.filter(Prefix=png_path)])

for obj in bucket.objects.filter(Prefix=folder_path):
    if obj.key.endswith('.nii'):
        found_files = True
        if obj.key.split('/')[-1].split('.')[0] not in processed or obj.key == 'MICCAI_BraTS2020_TrainingData/BraTS20_Training_058/BraTS20_Training_058_t1.nii':
            await upload_processed_nii(obj)

if not found_files:
    print(f"No .nii files found in the folder {folder_path}")


In [65]:
import s3fs

bucket_name = 'chemocraft-data'
png_path = 'Theo_PNG/'

# * new library s3fs to more easily navigate dataset
fs = s3fs.S3FileSystem(anon=False)

# * want to rename 1.png -> 001.png to keep in order
# * wasted time trying to make async work
for path, _, files in fs.walk(f's3://{bucket_name}/{png_path}'):
    for file in files:
        new_file = f"{int(file.split('.')[0]):03}.png"
        
        old_path = f"{path}/{file}"
        new_path = f"{path}/{new_file}"
            
        if old_path != new_path:
            print(f"Renaming {old_path} to {new_path}")
            fs.mv(old_path, new_path)

Renaming chemocraft-data/Theo_PNG/BraTS20_Training_153_t1ce/41.png to chemocraft-data/Theo_PNG/BraTS20_Training_153_t1ce/041.png
Renaming chemocraft-data/Theo_PNG/BraTS20_Training_153_t1ce/42.png to chemocraft-data/Theo_PNG/BraTS20_Training_153_t1ce/042.png
Renaming chemocraft-data/Theo_PNG/BraTS20_Training_153_t1ce/43.png to chemocraft-data/Theo_PNG/BraTS20_Training_153_t1ce/043.png
Renaming chemocraft-data/Theo_PNG/BraTS20_Training_153_t1ce/44.png to chemocraft-data/Theo_PNG/BraTS20_Training_153_t1ce/044.png
Renaming chemocraft-data/Theo_PNG/BraTS20_Training_153_t1ce/45.png to chemocraft-data/Theo_PNG/BraTS20_Training_153_t1ce/045.png
Renaming chemocraft-data/Theo_PNG/BraTS20_Training_153_t1ce/46.png to chemocraft-data/Theo_PNG/BraTS20_Training_153_t1ce/046.png
Renaming chemocraft-data/Theo_PNG/BraTS20_Training_153_t1ce/47.png to chemocraft-data/Theo_PNG/BraTS20_Training_153_t1ce/047.png
Renaming chemocraft-data/Theo_PNG/BraTS20_Training_153_t1ce/48.png to chemocraft-data/Theo_PNG/Br



Renaming chemocraft-data/Theo_PNG/BraTS20_Training_177_seg/0.png to chemocraft-data/Theo_PNG/BraTS20_Training_177_seg/000.png
Renaming chemocraft-data/Theo_PNG/BraTS20_Training_177_seg/1.png to chemocraft-data/Theo_PNG/BraTS20_Training_177_seg/001.png
Renaming chemocraft-data/Theo_PNG/BraTS20_Training_177_seg/10.png to chemocraft-data/Theo_PNG/BraTS20_Training_177_seg/010.png
Renaming chemocraft-data/Theo_PNG/BraTS20_Training_177_seg/11.png to chemocraft-data/Theo_PNG/BraTS20_Training_177_seg/011.png
Renaming chemocraft-data/Theo_PNG/BraTS20_Training_177_seg/12.png to chemocraft-data/Theo_PNG/BraTS20_Training_177_seg/012.png
Renaming chemocraft-data/Theo_PNG/BraTS20_Training_177_seg/13.png to chemocraft-data/Theo_PNG/BraTS20_Training_177_seg/013.png
Renaming chemocraft-data/Theo_PNG/BraTS20_Training_177_seg/14.png to chemocraft-data/Theo_PNG/BraTS20_Training_177_seg/014.png
Renaming chemocraft-data/Theo_PNG/BraTS20_Training_177_seg/15.png to chemocraft-data/Theo_PNG/BraTS20_Training_17

KeyboardInterrupt: 

In [3]:
import io
import random
import numpy as np
import s3fs
from PIL import Image


# * want to create iterator that returns a list of parsed slices from the next folder
# * wasted time trying to animate brain to make sure loaded correctly, but after manually looking at slices it looks right
class S3BrainIterator:
    def __init__(self, bucket_name, prefix):
        self.fs = s3fs.S3FileSystem(anon=False)
        self.bucket_name = bucket_name
        self.prefix = prefix

        self.brain_idx = 0
        self.brain_paths = [] 
        for path in self.fs.ls(f's3://{self.bucket_name}/{self.prefix}'):
            self.brain_paths.append(path+'/')
        random.shuffle(self.brain_paths)

    def __iter__(self):
        self.brain_idx = 0
        random.shuffle(self.brain_paths) # shuffle the dataset after each epoch
        return self

    def __next__(self):
        if self.brain_idx >= len(self.brain_paths):
            raise StopIteration
        
        brain_path = self.brain_paths[self.brain_idx]
        self.brain_idx += 1

        file_paths = self.fs.glob(f'{brain_path}*.png')

        slices = []
        for file_path in file_paths:
            with self.fs.open(file_path, 'rb') as f:
                img = Image.open(io.BytesIO(f.read())).convert('L')
                img_array = np.array(img)
                slices.append(img_array)

        return np.array(slices).astype(np.float32)

    def __len__(self):
        return len(self.brain_paths)

In [5]:
from tensorflow.keras import layers, models

def build_generator(latent_dim):
       model = models.Sequential()
       model.add(layers.Dense(155 * 180 * 210, activation="relu", input_dim=latent_dim))
       model.add(layers.Reshape((155, 180, 210, 1)))
       model.add(layers.Conv3DTranspose(155, kernel_size=4, strides=2, padding="same", activation="relu"))
       model.add(layers.Conv3DTranspose(180, kernel_size=4, strides=2, padding="same", activation="relu"))
       model.add(layers.Conv3D(1, kernel_size=3, strides=1, padding="same", activation="tanh"))
       return model

def build_discriminator(img_shape):
       model = models.Sequential()
       model.add(layers.Conv3D(180, kernel_size=4, strides=2, padding="same", input_shape=img_shape, activation="relu"))
       model.add(layers.Conv3D(155, kernel_size=4, strides=2, padding="same", activation="relu"))
       model.add(layers.Flatten())
       model.add(layers.Dense(1, activation="sigmoid"))
       return model

In [7]:
import tensorflow as tf

bucket_name = 'chemocraft-data'
png_path = 'Theo_PNG/'
bi = S3BrainIterator(bucket_name, png_path)
    
def compile_gan(generator, discriminator, latent_dim):
       discriminator.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])
       z = layers.Input(shape=(latent_dim,))
       img = generator(z)
       discriminator.trainable = False
       validity = discriminator(img)
       gan = models.Model(z, validity)
       gan.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])
       return gan

# Modify the train function to use the iterator (bi)
def train_gan(generator, discriminator, gan, bi, epochs, latent_dim):
    for epoch in range(epochs):
        for i in range(len(bi)):
            print(f"Training on brain {i}")
            # Get a single "batch" (one brain scan with all slices)
            real_slices = next(bi)

            # Generate fake brain scan
            noise = np.random.normal(0, 1, (1, latent_dim))
            fake_slices = generator.predict(noise)

            # Train the discriminator on real and fake
            d_loss_real = discriminator.train_on_batch(real_slices, np.ones((1, 1)))
            d_loss_fake = discriminator.train_on_batch(fake_slices, np.zeros((1, 1)))
            d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)

            # Train the generator
            noise = np.random.normal(0, 1, (1, latent_dim))
            g_loss = gan.train_on_batch(noise, np.ones((1, 1)))

        print(f"Epoch {epoch + 1}/{epochs}, D Loss: {d_loss[0]}, G Loss: {g_loss}")

latent_dim = 155
img_shape = (155, 180, 210, 1)
generator = build_generator(latent_dim)
discriminator = build_discriminator(img_shape)
train_gan(
    generator,
    discriminator,
    compile_gan(generator, discriminator, latent_dim),
    bi,
    1,
    latent_dim
)
    

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


ValueError: Exception encountered when calling Sequential.call().

[1mInput 0 of layer "functional_15" is incompatible with the layer: expected shape=(None, 155, 180, 210, 1), found shape=(None, 620, 720, 840)[0m

Arguments received by Sequential.call():
  • args=('<KerasTensor shape=(None, 620, 720, 840, 1), dtype=float32, sparse=False, name=keras_tensor_50>',)
  • kwargs={'mask': 'None'}

In [9]:
from tensorflow.keras import layers, models
import numpy as np

def build_generator(latent_dim):
    # Input layer for the latent vector
    input_layer = layers.Input(shape=(latent_dim,))

    # Dense layer to project and reshape into a small 3D volume
    x = layers.Dense(64 * 20 * 24 * 16, activation="relu")(input_layer)
    x = layers.Reshape((20, 24, 16, 64))(x)  # Initial shape smaller than target, so we can upsample

    # Transpose convolutions to upsample to (40, 48, 32)
    x = layers.Conv3DTranspose(64, kernel_size=4, strides=2, padding="same", activation="relu")(x)
    # Upsample further to (80, 96, 64)
    x = layers.Conv3DTranspose(32, kernel_size=4, strides=2, padding="same", activation="relu")(x)
    # Final upsample to match the target shape (155, 180, 210, 1)
    x = layers.Conv3DTranspose(1, kernel_size=(5, 6, 5), strides=(2, 2, 2), padding="same", activation="tanh")(x)

    model = models.Model(input_layer, x)
    return model

# Discriminator with specified input shape for brain scans
def build_discriminator(img_shape):
    input_layer = layers.Input(shape=img_shape)
    
    x = layers.Conv3D(32, kernel_size=4, strides=2, padding="same", activation="relu")(input_layer)
    x = layers.Conv3D(64, kernel_size=4, strides=2, padding="same", activation="relu")(x)
    x = layers.Conv3D(128, kernel_size=4, strides=2, padding="same", activation="relu")(x)
    
    x = layers.Flatten()(x)
    x = layers.Dense(1, activation="sigmoid")(x)  # Output real/fake classification
    
    model = models.Model(input_layer, x)
    return model

# Define and compile the GAN model
def compile_gan(generator, discriminator, latent_dim):
    discriminator.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])
    
    z = layers.Input(shape=(latent_dim,))
    img = generator(z)
    discriminator.trainable = False  # Freeze discriminator during generator training
    validity = discriminator(img)
    
    gan = models.Model(z, validity)
    gan.compile(optimizer="adam", loss="binary_crossentropy")
    return gan

# Adjust train function to ensure consistent shape usage
def train_gan(generator, discriminator, gan, iterator, epochs, latent_dim):
    for epoch in range(epochs):
        for i in range(len(iterator)):
            print(f"Training on brain {i}")
            
            # Fetch a real batch (brain scan with all slices)
            real_slices = next(iterator)
            
            # Check shape consistency
            if real_slices.shape != (1, 155, 180, 210, 1):
                print(f"Shape mismatch: {real_slices.shape}")
                continue  # Skip to avoid shape mismatches
            
            # Generate fake brain scan
            noise = np.random.normal(0, 1, (1, latent_dim))
            fake_slices = generator.predict(noise)

            # Train discriminator on real and fake data
            d_loss_real = discriminator.train_on_batch(real_slices, np.ones((1, 1)))
            d_loss_fake = discriminator.train_on_batch(fake_slices, np.zeros((1, 1)))
            d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)

            # Train generator
            noise = np.random.normal(0, 1, (1, latent_dim))
            g_loss = gan.train_on_batch(noise, np.ones((1, 1)))

        print(f"Epoch {epoch + 1}/{epochs}, D Loss: {d_loss[0]}, G Loss: {g_loss}")

bucket_name = 'chemocraft-data'
png_path = 'Theo_PNG/'
bi = S3BrainIterator(bucket_name, png_path)

latent_dim = 155
img_shape = (155, 180, 210, 1)
generator = build_generator(latent_dim)
discriminator = build_discriminator(img_shape)
train_gan(
    generator,
    discriminator,
    compile_gan(generator, discriminator, latent_dim),
    bi,
    1,
    latent_dim
)

ValueError: Input 0 of layer "functional_19" is incompatible with the layer: expected shape=(None, 155, 180, 210, 1), found shape=(None, 160, 192, 128)

In [None]:
from skimage.metrics import structural_similarity as ssim

def generate_3d_image(generator, latent_dim, num_slices=150):
     noise = np.random.normal(0, 1, (num_slices, latent_dim))
     generated_slices = generator.predict(noise)
     generated_3d_image = np.stack(generated_slices, axis=0)  # Shape: (150, 250, 250)
     return generated_3d_image

def evaluate_3d_image(true_image, generated_image):
       for i in range(true_image.shape[0]):  # For each slice
           slice_ssim = ssim(true_image[i], generated_image[i])
           print(f"SSIM for slice {i}: {slice_ssim}")