In [None]:
!pip install boto3 numpy nibabel matplotlib

In [None]:
import boto3
import nibabel as nib
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import io
import os
import tempfile

s3 = boto3.resource('s3')
bucket_name = 'chemocraft-data'
folder_path = 'Data/BraTS20_Training_369 copy/'
local_path = 'slices'
bucket = s3.Bucket(bucket_name)

crop_left, crop_right = 20, 10
crop_top, crop_bottom = 30, 30

def render_nii_from_s3(filename, slice_idx):
    print(f"Fetching file: {filename}")

    obj = bucket.Object(folder_path + filename)
    file_stream = io.BytesIO(obj.get()['Body'].read())
    
    with tempfile.NamedTemporaryFile(suffix='.nii', delete=True) as temp_file:
        temp_file.write(file_stream.getvalue())
        temp_file.flush()  

        try:
            img = nib.load(temp_file.name)
            data = img.get_fdata()
            
            print(f"Data shape for {filename}: {data.shape}")
            
            if data.size == 0:
                print(f"No data found in {filename}")
                return
            
            plt.figure(figsize=(6, 6))
            plt.imshow(data[:, :, slice_idx], cmap='gray')
            plt.title(f'{filename} - Slice {slice_idx}')
            plt.axis('off')  # Hide axes for cleaner display
            plt.show()
        except Exception as e:
            print(f"Error loading file {filename}: {e}")

def save_nii_from_s3(filename):
    """
    Saves an nii file from the S3 Bucket as a directory of preprocessed png slices,
    within the specified local_path
    """
    
    print(f"Fetching file: {filename}")

    obj = bucket.Object(folder_path + filename)
    file_stream = io.BytesIO(obj.get()['Body'].read())
    
    with tempfile.NamedTemporaryFile(suffix='.nii', delete=True) as temp_file:
        temp_file.write(file_stream.getvalue())
        temp_file.flush()  

        try:
            img = nib.load(temp_file.name)
            data = img.get_fdata()

            start_y = crop_top
            end_y = data.shape[0] - crop_bottom
            start_x = crop_left
            end_x = data.shape[1] - crop_right
            
            save_path = os.path.join(local_path, filename.split('.')[0])
            os.makedirs(save_path, exist_ok=True)
            print(f"Saving slices of {filename} in {save_path}")
            
            if data.size == 0:
                print(f"No data found in {filename}")
                return

            for slice_idx in range(data.shape[2]):
                slice_2d = data[:, :, slice_idx]
                cropped_slice = slice_2d[start_y:end_y, start_x:end_x]
                
                slice_path = os.path.join(save_path, f'{slice_idx}.png')
                try:
                    mpimg.imsave(slice_path, cropped_slice, cmap='gray')
                except Exception as e:
                    print(f"Error saving file {slice_path}: {e}")
            
        except Exception as e:
            print(f"Error loading file {filename}: {e}")

found_files = False
for obj in bucket.objects.filter(Prefix=folder_path):
    if obj.key.endswith('.nii'):
        found_files = True
        filename = obj.key.split('/')[-1]  # Get the filename
        save_nii_from_s3(filename)

if not found_files:
    print(f"No .nii files found in the folder {folder_path}")

# Sprint 1: Data Preprocessing (Theo)

This sprint was very straightforward. The goal was to be able to create a directory containing directories of PNG files of the five sample brains, preprocessed and ready to use with a GAN model.

### Development

First, a new method `save_nii_from_s3()` was created, modeled after `render_nii_from_s3()`. Instead of selecting the middle slice of the 3D brain image, it iterates each slice.

The first step of development was to save each slice as a PNG file. A local directory is specified as a global variable, and within it a directory with the name of the `.nii` file (without the extension) is created. Within this subdirectory, a PNG of each slice is saved using `matplotlib.image`, which had to be imported.

Next, to continue with preprocessing, each image slice should be saved in grayscale, and cropped. Suitable cropping offsets were determined, set as global variables, and utilized by taking an array slice of the image data. Grayscale conversion was easily accomplished by adding the keyword argument `cmap='gray'` to `matplotlib.image`'s `imsave()`.

### Moving Forward

The new `save_nii_from_s3()` can be used on the rest of the dataset once accessible, to create images for training a GAN model. The only consideration being if the crop is too large for other images in the dataset, however, this could be easily fixed by changing the offset variables.