In [1]:
import torch
import json
import os
import shutil
import random

In [2]:
# check if GPU is available 
torch.cuda.is_available()

True

In [3]:
PROJECT_ROOT = os.getcwd()

In [4]:
source_data_dir = os.path.join(PROJECT_ROOT, 'brain tumor extention data')
target_data_dir = os.path.join(PROJECT_ROOT, 'brain tumor extention binary data')

In [5]:
# create target folders under target_data_dir
os.makedirs(os.path.join(target_data_dir, 'tumor'), exist_ok = True)
os.makedirs(os.path.join(target_data_dir, 'no_tumor'), exist_ok = True)

In [6]:
# mapping 'glioma_tumor', 'meningioma_tumor','pituitary_tumor' to 'tumor_folder' and 'non_tumor' to 'no_tumor folder'.
from tqdm import tqdm
import shutil
tumor= ['glioma_tumor', 'meningioma_tumor', 'pituitary_tumor']
no_tumor= ['no_tumor']


for folder in tumor:
    folder_path = os.path.join(source_data_dir, folder)
    for filename in tqdm(os.listdir(folder_path), desc= f"Copying {folder} to tumor"):
        src = os.path.join(folder_path, filename)
        dst = os.path.join(target_data_dir, 'tumor',filename)
        shutil.copy(src, dst) # copy the file including its metadata from src to dst 

Copying glioma_tumor to tumor: 100%|██████████| 826/826 [00:00<00:00, 14160.68it/s]
Copying meningioma_tumor to tumor: 100%|██████████| 822/822 [00:00<00:00, 14557.34it/s]
Copying pituitary_tumor to tumor: 100%|██████████| 827/827 [00:00<00:00, 13703.30it/s]


In [7]:
for folder in no_tumor:
    folder_path = os.path.join(source_data_dir, folder)
    for filename in tqdm(os.listdir(folder_path), desc = f'Copying {folder} to no_tumor'):
        src = os.path.join(folder_path, filename)
        dst = os.path.join(target_data_dir, 'no_tumor', filename)
        shutil.copy2(src, dst)

Copying no_tumor to no_tumor: 100%|██████████| 395/395 [00:00<00:00, 11404.39it/s]


In [8]:
root = os.path.join(PROJECT_ROOT, 'brain tumor extention binary data')

In [9]:
# split data into 'train' 'validation' 'test'
random.seed(42)

source_dirs = ['tumor', 'no_tumor']
target_dirs = ['train', 'validation', 'test']
split_ratio = [0.7, 0.15, 0.15]


In [11]:
for category in source_dirs:
    category_path = os.path.join(target_data_dir, category)
   

In [12]:
for category in source_dirs:
    category_path = os.path.join(target_data_dir, category)
    files = os.listdir(category_path)
    random.shuffle(files)
    n_total = len(files)
    n_train = int(n_total * split_ratio[0])
    n_val = int(n_total * split_ratio[1])
    
    splits = {
        "train": files[:n_train],
        "validation": files[n_train: n_train + n_val],
        "test": files[n_train + n_val:]
    }

    for split in target_dirs:
        target_folder = os.path.join(split, category)
        os.makedirs(target_folder, exist_ok = True)
        for file in splits[split]:
            src = os.path.join(category_path, file)
            dst = os.path.join(target_folder, file)
            shutil.copy(src, dst)

# now create train (tumor, no_tumor) folders; validation (tumor, no_tumor) folders; test(tumor, no_tumor) folders.