<a href="https://colab.research.google.com/github/upashanadutta23/DLPROJECT/blob/main/DL_CV_DATASET.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#Install hugging face datasets
!pip install datasets
!pip install nibabel
!pip install torch torchvision

In [None]:
#Importing necessary Libraries
import nibabel as nib
import torch
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from datasets import load_dataset,load_from_disk


In [None]:
#Processing the scanned Brain Data
def preprocess_nifti(example):
  nii_path = example["nii_filepath"]
  #Load the volume data
  vol = nib.load(nii_path).get_fdata()
  #cropping the sub volume
  vol = vol[7:105, 8:132, :108] #(98,124,108)
  #shifting intensities so that it is not negative
  vol = vol + abs(vol.min())
  #normalising it to [0,1]
  vol = vol / vol.max()
  #converting it to torch tensor(1,1,D,H,W)
  t_tensor = torch.from_numpy(vol).unsqueeze(0).unsqueeze(0)
  #scale factor based on y dimension(124 -> 96)
  scale_factor = 96/124
  #downsampling the tensor
  downsampled = F.interpolate(t_tensor,scale_factor = (scale_factor,scale_factor,scale_factor),mode = "trilinear",align_corners = False)
  #scale factor based on y dimension(124 -> 96)
  scale_factor = 96/124
  #downsampling the tensor
  downsampled = F.interpolate(t_tensor,scale_factor = (scale_factor,scale_factor,scale_factor),mode = "trilinear",align_corners = False)
  #symmetric padding(dim = 96)
  _, _, d,h, w = downsampled.shape
  pad_d = (96 - d) // 2
  pad_h = (96 - h) // 2
  pad_w = (96 - w) // 2
  padding = (pad_w//2 , pad_w - pad_w//2, pad_h//2, pad_h - pad_h//2, pad_d//2, pad_d - pad_d//2)
  final_image = F.pad(downsampled,padding) ##current shape = (1,1,96,96,96)
  final_image = final_image.squeeze(0)
  #Storing the image as numpy
  example["img"] = final_image.numpy()
  return example


In [None]:
##LOADING THE DATASET FROM HUGGING FACE
ds_train = load_dataset("radiata-ai/brain-structure", split = "train", trust_remote_code= True)
ds_test = load_dataset("radiata-ai/brain-structure", split = "test", trust_remote_code= True)
ds_val = load_dataset("radiata-ai/brain-structure", split = "validation", trust_remote_code= True)


In [None]:
##THE PREPROCESSING WILL NOW BE APPLIED FOR EACH SPLIT SET
ds_train = ds_train.map(preprocess_nifti)
ds_test = ds_test.map(preprocess_nifti)
ds_val = ds_val.map(preprocess_nifti)
#returning it in pytorch tensor format
ds_train.set_format(type = 'torch', columns = ['img'])
ds_test.set_format(type = 'torch', columns = ['img'])
ds_val.set_format(type = 'torch', columns = ['img'])


In [None]:
##Save data to disk for uploading
ds_train.save_to_disk("exported_brain_images/train")
ds_test.save_to_disk("exported_brain_images/test")
ds_val.save_to_disk("exported_brain_images/val")


In [None]:
#Load the data from disk
ds_train = load_from_disk("exported_brain_images/train")
ds_test = load_from_disk("exported_brain_images/test")
ds_val = load_from_disk("exported_brain_images/val")