# Brain Tumor Classification — End-to-End (Jupyter Notebook)

This notebook is a beginner-friendly, ready-to-run pipeline for brain tumor classification using PyTorch and transfer learning (ResNet18). 
It includes data loading, augmentation, model training, evaluation, and inference. **Do not use this for medical diagnosis.**

Instructions:
1. Place your dataset in the `data/` folder with subfolders `train/`, `val/`, `test/`, each containing class subfolders (e.g. `tumor/`, `no_tumor/`).
2. Install dependencies (cell below).
3. Run cells sequentially.


## 1) Install dependencies
Run the following cell if packages are not installed. In Colab, prefix with `!` or use a separate cell.

In [None]:
# Run this cell only if you need to install packages (uncomment pip lines when needed)
# !pip install torch torchvision torchaudio
# !pip install numpy pandas matplotlib scikit-learn pillow tqdm seaborn

print('Skip installation in managed envs (Colab/Local) if already installed.')

## 2) Imports

In [1]:
import os
import time
import copy
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models, transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader
from tqdm import tqdm
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns
print('Imports loaded')

Imports loaded


## 3) Data structure

Place your data as:
```
data/
  train/
    tumor/
    no_tumor/
  val/
    tumor/
    no_tumor/
  test/
    tumor/
    no_tumor/
```
Each folder should contain images (jpg/png). If you only have one folder, split into train/val/test (70/15/15) before running.


## 4) Transforms & DataLoaders

In [4]:
def get_transforms(img_size=224):
    train_transforms = transforms.Compose([
        transforms.Resize((img_size, img_size)),
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(10),
        transforms.ColorJitter(brightness=0.1, contrast=0.1),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406],
                             [0.229, 0.224, 0.225])
    ])

    val_transforms = transforms.Compose([
        transforms.Resize((img_size, img_size)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406],
                             [0.229, 0.224, 0.225])
    ])

    return train_transforms, val_transforms


def create_dataloaders(data_dir, batch_size=32, img_size=224, num_workers=2):
    train_tf, val_tf = get_transforms(img_size)

    train_dir = os.path.join(data_dir, 'train')
    val_dir = os.path.join(data_dir, 'val')
    test_dir = os.path.join(data_dir, 'test')

    train_ds = ImageFolder(train_dir, transform=train_tf)
    val_ds = ImageFolder(val_dir, transform=val_tf)
    test_ds = ImageFolder(test_dir, transform=val_tf)

    train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True, num_workers=num_workers)
    val_loader = DataLoader(val_ds, batch_size=batch_size, shuffle=False, num_workers=num_workers)
    test_loader = DataLoader(test_ds, batch_size=batch_size, shuffle=False, num_workers=num_workers)

    class_names = train_ds.classes
    print(f'Found classes: {class_names}')

    return train_loader, val_loader, test_loader, class_names

# quick check (won't run until data is present)
# train_loader, val_loader, test_loader, class_names = create_dataloaders('data', batch_size=8)


## 5) Model (ResNet18 transfer learning)

In [5]:
def build_model(num_classes, pretrained=True):
    model = models.resnet18(pretrained=pretrained)
    in_features = model.fc.in_features
    model.fc = nn.Linear(in_features, num_classes)
    return model

# Example: model = build_model(2)


## 6) Training function

This training loop saves the best model by validation accuracy to `outputs/best_model.pth`.

In [11]:
def train(data_dir='data', epochs=10, batch_size=32, lr=1e-3, img_size=224, device=None, out_dir='outputs'):
    if device is None:
        device = 'cuda' if torch.cuda.is_available() else 'cpu'

    os.makedirs(out_dir, exist_ok=True)

    train_loader, val_loader, test_loader, class_names = create_dataloaders(data_dir, batch_size, img_size=img_size)
    num_classes = len(class_names)

    model = build_model(num_classes).to(device)

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.5, patience=3)

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(1, epochs + 1):
        print(f"Epoch {epoch}/{epochs}")

        # Training
        model.train()
        running_loss = 0.0
        running_corrects = 0
        total = 0

        for inputs, labels in tqdm(train_loader):
            inputs = inputs.to(device)
            labels = labels.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels.data).item()
            total += inputs.size(0)

        epoch_loss = running_loss / total
        epoch_acc = running_corrects / total

        # Validation
        model.eval()
        val_loss = 0.0
        val_corrects = 0
        val_total = 0
        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs = inputs.to(device)
                labels = labels.to(device)
                outputs = model(inputs)
                _, preds = torch.max(outputs, 1)
                loss = criterion(outputs, labels)
                val_loss += loss.item() * inputs.size(0)
                val_corrects += torch.sum(preds == labels.data).item()
                val_total += inputs.size(0)

        val_epoch_loss = val_loss / val_total
        val_epoch_acc = val_corrects / val_total

        print(f"Train loss: {epoch_loss:.4f} acc: {epoch_acc:.4f} | Val loss: {val_epoch_loss:.4f} acc: {val_epoch_acc:.4f}")

        # Scheduler step
        scheduler.step(val_epoch_loss)

        # deep copy best
        if val_epoch_acc > best_acc:
            best_acc = val_epoch_acc
            best_model_wts = copy.deepcopy(model.state_dict())
            torch.save(model.state_dict(), os.path.join(out_dir, 'best_model.pth'))

    print(f"Training complete. Best val acc: {best_acc:.4f}")

    # load best model weights
    model.load_state_dict(best_model_wts)
    torch.save(model.state_dict(), os.path.join(out_dir, 'final_model.pth'))

    return model, class_names


In [15]:
model,class_names=train(data_dir="data",epochs=5) 

Found classes: ['no', 'yes']




Epoch 1/5


100%|██████████| 94/94 [08:17<00:00,  5.29s/it] 


Train loss: 0.1900 acc: 0.9337 | Val loss: 0.0479 acc: 0.9762
Epoch 2/5


100%|██████████| 94/94 [08:11<00:00,  5.23s/it]


Train loss: 0.0728 acc: 0.9713 | Val loss: 0.2318 acc: 0.9286
Epoch 3/5


100%|██████████| 94/94 [07:17<00:00,  4.65s/it]


Train loss: 0.0916 acc: 0.9693 | Val loss: 0.0624 acc: 0.9762
Epoch 4/5


100%|██████████| 94/94 [12:03<00:00,  7.70s/it]


Train loss: 0.0396 acc: 0.9870 | Val loss: 0.0655 acc: 0.9762
Epoch 5/5


100%|██████████| 94/94 [07:46<00:00,  4.96s/it]


Train loss: 0.0563 acc: 0.9803 | Val loss: 0.0166 acc: 1.0000
Training complete. Best val acc: 1.0000


## 7) Evaluation on test set

This cell prints classification report and plots a confusion matrix.

In [16]:
def evaluate(model_path, data_dir='data', batch_size=32, img_size=224, device=None):
    if device is None:
        device = 'cuda' if torch.cuda.is_available() else 'cpu'

    train_loader, val_loader, test_loader, class_names = create_dataloaders(data_dir, batch_size, img_size=img_size)
    num_classes = len(class_names)

    model = build_model(num_classes)
    model.load_state_dict(torch.load(model_path, map_location=device))
    model.to(device)
    model.eval()

    y_true = []
    y_pred = []

    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs = inputs.to(device)
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            y_true.extend(labels.numpy().tolist())
            y_pred.extend(preds.cpu().numpy().tolist())

    print(classification_report(y_true, y_pred, target_names=class_names))

    cm = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(8,6))
    sns.heatmap(cm, annot=True, fmt='d', xticklabels=class_names, yticklabels=class_names)
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.title('Confusion Matrix')
    plt.show()


## 8) Single-image inference

In [None]:
from PIL import Image

def load_image(image_path, img_size=224):
    tf = transforms.Compose([
        transforms.Resize((img_size, img_size)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406],
                             [0.229, 0.224, 0.225])
    ])
    img = Image.open(image_path).convert('RGB')
    return tf(img).unsqueeze(0)


def predict(image_path, model_path, num_classes, class_names, device=None):
    if device is None:
        device = 'cuda' if torch.cuda.is_available() else 'cpu'

    model = build_model(num_classes)
    model.load_state_dict(torch.load(model_path, map_location=device))
    model.to(device)
    model.eval()

    img_t = load_image(image_path)
    img_t = img_t.to(device)

    with torch.no_grad():
        outputs = model(img_t)
        probs = torch.nn.functional.softmax(outputs, dim=1)
        conf, pred = torch.max(probs, 1)

    return class_names[pred.item()], conf.item()

# Example usage (run after training):
# label, conf = predict('path/to/img.jpg', 'outputs/best_model.pth', num_classes=2, class_names=['no_tumor','tumor'])
# print(label, conf)


In [18]:
# Example usage (run after training):
label, conf = predict(r'C:\Users\Dell\Downloads\Brain_tumor\data\train\no\No12.jpg', 'outputs/best_model.pth', num_classes=2, class_names=['no_tumor','tumor'])
print(label, conf)



no_tumor 0.9894247055053711


## 9) Tips & next steps

- Start with a small subset of data to quickly validate the pipeline.
- Use GPU if available (Colab provides free GPUs).
- If classes are imbalanced, consider `torch.utils.data.WeightedRandomSampler` or class weights in loss.
- Try stronger augmentations, or different backbones (ResNet34, EfficientNet) for better accuracy.

---

Happy training! If you want, I can also create a Colab-ready notebook link or a ZIP of all project files.
