In [17]:
import torch
from torch.utils.data import Dataset, DataLoader, random_split
from torchvision import transforms, datasets
from PIL import Image
import os
import torch.nn as nn 
import numpy as np

## Data Preprocessing

In [18]:
class ImageDataset(Dataset):
    def __init__(self, folder1, folder2, transform=None):
        self.folder1 = folder1
        self.folder2 = folder2
        self.transform = transform
        self.image_files = []
        self.labels = []
        
        for f in os.listdir(folder1):
            if os.path.isfile(os.path.join(folder1, f)):
                self.image_files.append(os.path.join(folder1, f))
                self.labels.append(1)
                
        for f in os.listdir(folder2):
            if os.path.isfile(os.path.join(folder2, f)):
                self.image_files.append(os.path.join(folder2, f))
                self.labels.append(0)

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        img_name = self.image_files[idx]
        image = Image.open(img_name).convert("RGB")
        label = self.labels[idx]
        
        if self.transform:
            image = self.transform(image)
        
        return image, label

In [19]:
dataset = ImageDataset(folder1='./data/Brain Tumor', folder2='./data/Healthy', transform=None)

In [20]:
# Split into train, val, test (70%, 20%, 10%)

train_size = int(0.7 * len(dataset))
val_size = int(0.2 * len(dataset))
test_size = len(dataset) - train_size - val_size

train_dataset, val_dataset, test_dataset = random_split(dataset, [train_size, val_size, test_size])


In [21]:
# Create DataLoader objects for each subset

batch_size = 16
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)