In [None]:
# %% Imports and global paths

from pathlib import Path
from typing import Optional
import random
import shutil

import numpy as np
import matplotlib.pyplot as plt
import yaml
from ultralytics import YOLO
%matplotlib inline


# %% Paths and basic settings

# IMPORTANT:
# Open this notebook inside the YOLO11 folder (where YOLO11.ipynb lives),
# so that Path.cwd() == .../YOLO11
ROOT_DIR = Path.cwd()

# Dataset root and data.yaml path
DATA_ROOT = Path(r"E:\2025 fall\Fundamentals of Digital Image Processing")
DATA_YAML = DATA_ROOT / "data.yaml"

# Name of the training run (folder under ROOT_DIR)
RUN_NAME = "results"     
RUN_DIR = ROOT_DIR / RUN_NAME
RUN_DIR.mkdir(parents=True, exist_ok=True)

# Finetune dataset root & yaml
FT_DATA_ROOT = Path(r"E:\2025 fall\Fundamentals of Digital Image Processing\Low Resolution_fine tune")
FT_DATA_YAML = DATA_ROOT / "data_finetune.yaml"

# Finetune run name
FT_RUN_NAME = "finetune_motion blur"
FT_RUN_DIR = ROOT_DIR / FT_RUN_NAME

print("ROOT_DIR :", ROOT_DIR)
print("DATA_YAML:", DATA_YAML)
print("RUN_DIR  :", RUN_DIR)
print("Fine-Tune_YAML:", FT_DATA_YAML)


In [None]:
# %% Helper functions: path resolving and train subsampling

def resolve_path(base: Path, p: str) -> Path:
    """Turn a path from data.yaml into an absolute path."""
    p_path = Path(p)
    if p_path.is_absolute():
        return p_path
    return (base / p_path).resolve()


def fix_split_path(cfg: dict, base_dir: Path, key: str) -> Optional[Path]:
    """
    Fix cfg[key] (train/val/test) to a valid absolute path.

    Priority:
        1. Use the path in data.yaml if it exists.
        2. Try base_dir/<key>/images.
        3. Try base_dir/<key>.
    """
    if key not in cfg:
        return None

    raw = cfg[key]
    path = resolve_path(base_dir, raw)
    if path.exists():
        cfg[key] = str(path)
        print(f"[INFO] Using {key} path from yaml: {path}")
        return path

    cand = base_dir / key / "images"
    if cand.exists():
        cfg[key] = str(cand)
        print(f"[WARN] {key} path {path} not found, using fallback {cand}")
        return cand

    cand2 = base_dir / key
    if cand2.exists():
        cfg[key] = str(cand2)
        print(f"[WARN] {key} path {path} not found, using fallback {cand2}")
        return cand2

    print(f"[ERROR] Could not resolve path for {key}: {raw}")
    return None


def create_subsampled_train(
    data_yaml: Path,
    max_train_images: int = 3000,
    seed: int = 42,
) -> Path:
    """
    Read the original data.yaml, fix train/val/test paths,
    create a subsampled train set under DATA_ROOT/train_small,
    and write a new data_subsampled.yaml next to data.yaml.

    Rules:
        - Only sample once: if train_small/images is non-empty, reuse it.
        - Sort file names + use a fixed random seed -> deterministic subset.
        - For each selected image, copy the matching .txt label.
    """
    data_yaml_path = Path(data_yaml)
    with data_yaml_path.open("r", encoding="utf-8") as f:
        cfg = yaml.safe_load(f)

    base_dir = data_yaml_path.parent

    # Fix paths for all splits
    train_path = fix_split_path(cfg, base_dir, "train")
    val_path = fix_split_path(cfg, base_dir, "val")
    _ = fix_split_path(cfg, base_dir, "test")  # keep test for later evaluation

    if train_path is None:
        raise RuntimeError("Could not resolve a valid train path from data.yaml")

    # Assume train_path points to an images folder
    train_images_dir = Path(train_path)
    if train_images_dir.name.lower() != "images":
        cand = train_images_dir / "images"
        if cand.exists():
            train_images_dir = cand

    train_labels_dir = train_images_dir.parent / "labels"

    # Subset folders: DATA_ROOT/train_small/images, .../labels
    subset_root = base_dir / "train_small"
    subset_images_dir = subset_root / "images"
    subset_labels_dir = subset_root / "labels"
    subset_images_dir.mkdir(parents=True, exist_ok=True)
    subset_labels_dir.mkdir(parents=True, exist_ok=True)

    existing_imgs = list(subset_images_dir.glob("*.*"))
    if existing_imgs:
        print(
            f"[INFO] Subsampled train already exists at {subset_images_dir} "
            f"with {len(existing_imgs)} images. Reusing it."
        )
    else:
        all_imgs = [
            p for p in train_images_dir.iterdir()
            if p.is_file() and p.suffix.lower() in {".jpg", ".jpeg", ".png", ".bmp"}
        ]
        all_imgs = sorted(all_imgs, key=lambda x: x.name)
        n_total = len(all_imgs)
        if n_total == 0:
            raise RuntimeError(f"No images found in {train_images_dir}")

        n_select = min(max_train_images, n_total)
        print(f"[INFO] Found {n_total} train images, sampling {n_select} for subset.")

        rng = random.Random(seed)
        selected = rng.sample(all_imgs, n_select)

        for img_path in selected:
            dst_img = subset_images_dir / img_path.name
            shutil.copy2(img_path, dst_img)

            label_src = train_labels_dir / f"{img_path.stem}.txt"
            if label_src.exists():
                dst_label = subset_labels_dir / label_src.name
                shutil.copy2(label_src, dst_label)
            else:
                print(f"[WARN] Label not found for {img_path.name}, skipping label.")

        print(
            f"[INFO] Subsampled train created at {subset_images_dir} "
            f"with {len(selected)} images."
        )

    # Build new yaml config
    new_cfg = dict(cfg)
    new_cfg["train"] = str(subset_images_dir)
    if val_path is not None:
        new_cfg["val"] = str(Path(new_cfg["val"]))
    # "test" was already fixed by fix_split_path, so we keep it as is.

    new_yaml_path = data_yaml_path.with_name("data_subsampled.yaml")
    with new_yaml_path.open("w", encoding="utf-8") as f:
        yaml.safe_dump(new_cfg, f, sort_keys=False, allow_unicode=True)

    print(f"[INFO] New data yaml written to {new_yaml_path}")
    return new_yaml_path



In [None]:
# %% Training (set DO_TRAIN = False if you only want to reuse existing results)

DO_TRAIN = False   # change to True if you want to train again

EPOCHS = 100
IMGSZ = 640
BATCH = 16
MODEL_NAME = "yolo11n.pt" # select model version

if DO_TRAIN:
    subsampled_yaml = create_subsampled_train(
        data_yaml=DATA_YAML,
        max_train_images=3000,
        seed=42,
    )

    model = YOLO(MODEL_NAME)

    model.train(
        data=str(subsampled_yaml),
        epochs=EPOCHS,
        imgsz=IMGSZ,
        batch=BATCH,
        workers=2,           
        project=str(ROOT_DIR),
        name=RUN_NAME,       
        exist_ok=True,
        plots=False,          
    )
else:
    print("Skipping training, reusing existing run:", RUN_DIR)


In [None]:
# %% Finetune YOLO11
from ultralytics import YOLO

DO_FINETUNE = True

FT_EPOCHS = 40
FT_IMGSZ = 640
FT_BATCH = 16


if DO_FINETUNE:

    base_weights = ROOT_DIR / "results" / "weights" / "best.pt"

    print("Finetune base weights:", base_weights)
    model_ft = YOLO(str(base_weights))

    
    model_ft.train(
        data=str(FT_DATA_YAML),
        epochs=FT_EPOCHS,
        imgsz=FT_IMGSZ,
        batch=FT_BATCH,
        workers=2,
        project=str(ROOT_DIR),
        name=FT_RUN_NAME,     
        exist_ok=True,
        plots=False,          
    )
else:
    print("Skipping finetune training, reusing existing run:", FT_RUN_DIR)

In [None]:
# %% Load best model from the training run

BEST_WEIGHTS = RUN_DIR / "weights" / "best.pt"
assert BEST_WEIGHTS.exists(), f"best.pt not found at {BEST_WEIGHTS}"

best_model = YOLO(str(BEST_WEIGHTS))
print("Loaded model from:", BEST_WEIGHTS)


In [None]:
# %% Load best model from the fine-tuning run
FT_BEST_WEIGHTS = FT_RUN_DIR / "weights" / "best.pt"
assert FT_BEST_WEIGHTS.exists(), f"Finetune best.pt not found at {FT_BEST_WEIGHTS}"
best_model_ft = YOLO(str(FT_BEST_WEIGHTS))
print("Loaded finetuned weights from:", FT_BEST_WEIGHTS)

In [None]:
# %% Evaluate on validation set (with plots)

metrics_val = best_model.val(
    data=str(DATA_ROOT / "data_subsampled.yaml"),  # or DATA_YAML if you prefer full train
    split="val",
    workers=2,
    plots=True,   # keep the YOLO-generated plots and val_batch* images
)

box_val = metrics_val.box

map_5095_val = float(box_val.map)       # mAP@[.5:.95]
map_50_val   = float(box_val.map50)     # mAP@0.5
map_75_val   = float(box_val.map75)     # mAP@0.75

# mean precision/recall/F1 over classes
mp_val  = float(box_val.mp)
mr_val  = float(box_val.mr)
mf1_val = float(box_val.f1.mean())  # .f1 is per-confidence; we take mean as a summary

print("\n=== Validation metrics (boxes, B) ===")
print(f"AP@[.5:.95](B): {map_5095_val:.4f}")
print(f"AP@0.5(B)    : {map_50_val:.4f}")
print(f"AP@0.75(B)   : {map_75_val:.4f}")
print(f"mean P(B)     : {mp_val:.4f}")
print(f"mean R(B)     : {mr_val:.4f}")
print(f"mean F1(B)    : {mf1_val:.4f}")

In [None]:
# %% Evaluate on TEST set (no plots, just numbers) + save to runs

metrics_test = best_model.val(
    data=str(DATA_ROOT / "data_subsampled.yaml"),
    split="test",    # this uses the "test" path from data_subsampled.yaml
    workers=2,
    plots=False,     # do not generate extra curves/images for test
)

box_test = metrics_test.box

map_5095_test = float(box_test.map)
map_50_test   = float(box_test.map50)
map_75_test   = float(box_test.map75)
mp_test       = float(box_test.mp)
mr_test       = float(box_test.mr)
mf1_test      = float(box_test.f1.mean())

print("\n=== TEST metrics (boxes, B) ===")
print(f"AP@[.5:.95](B): {map_5095_test:.4f}")
print(f"AP@0.5(B)    : {map_50_test:.4f}")
print(f"AP@0.75(B)   : {map_75_test:.4f}")
print(f"mean P(B)     : {mp_test:.4f}")
print(f"mean R(B)     : {mr_test:.4f}")
print(f"mean F1(B)    : {mf1_test:.4f}")

# ---- Save test metrics under runs/ with a folder name indicating TEST ----
# Folder: ROOT_DIR / "runs" / "test_metrics"
test_results_dir = ROOT_DIR / "runs" / "test_metrics"
test_results_dir.mkdir(parents=True, exist_ok=True)

# Save as a readable text file
txt_path = test_results_dir / "metrics_test_Low Resolution_fine tune_traditional.txt"
with txt_path.open("w", encoding="utf-8") as f:
    f.write("=== TEST metrics (boxes, B) ===\n")
    f.write(f"AP@[.5:.95](B): {map_5095_test:.6f}\n")
    f.write(f"AP@0.5(B)    : {map_50_test:.6f}\n")
    f.write(f"AP@0.75(B)   : {map_75_test:.6f}\n")
    f.write(f"mean P(B)     : {mp_test:.6f}\n")
    f.write(f"mean R(B)     : {mr_test:.6f}\n")
    f.write(f"mean F1(B)    : {mf1_test:.6f}\n")

print(f"\n[Test metrics saved to]")
print(f"  {txt_path}")

In [None]:
# %% Evaluate finetuned model on TEST set 
# remember to change the path in data_finetune.yaml to the needed one

import numpy as np

TEST_METRICS_DIR = ROOT_DIR / "runs" / "test_metrics"
TEST_METRICS_DIR.mkdir(parents=True, exist_ok=True)


metrics_test_ft = best_model_ft.val(
    data=str(FT_DATA_YAML),
    split="test",    
    workers=2,
    plots=False,     
)

box_test = metrics_test_ft.box
map_5095_test = float(box_test.map)
map_50_test   = float(box_test.map50)
map_75_test   = float(box_test.map75)
mp_test       = float(box_test.mp)
mr_test       = float(box_test.mr)
mf1_test      = float(box_test.f1.mean())

print("\n=== Finetune TEST metrics (boxes, B) ===")
print(f"mAP@[.5:.95](B): {map_5095_test:.4f}")
print(f"mAP@0.5(B)    : {map_50_test:.4f}")
print(f"mAP@0.75(B)   : {map_75_test:.4f}")
print(f"mean P(B)     : {mp_test:.4f}")
print(f"mean R(B)     : {mr_test:.4f}")
print(f"mean F1(B)    : {mf1_test:.4f}")

txt_path_ft = TEST_METRICS_DIR / "metrics_test_motion blur_finetune.txt"
with txt_path_ft.open("w", encoding="utf-8") as f:
    f.write("=== Finetune TEST metrics (boxes, B) ===\n")
    f.write(f"mAP@[.5:.95](B): {map_5095_test:.6f}\n")
    f.write(f"mAP@0.5(B)    : {map_50_test:.6f}\n")
    f.write(f"mAP@0.75(B)   : {map_75_test:.6f}\n")
    f.write(f"mean P(B)     : {mp_test:.6f}\n")
    f.write(f"mean R(B)     : {mr_test:.6f}\n")
    f.write(f"mean F1(B)    : {mf1_test:.6f}\n")

print("\n[Finetune test metrics saved to]")
print(" ", txt_path_ft)

test_images_dir = FT_DATA_ROOT / "images" / "test"
assert test_images_dir.exists(), f"Test images dir not found: {test_images_dir}"

pred_results_ft = best_model_ft.predict(
    source=str(test_images_dir),
    imgsz=FT_IMGSZ,
    conf=0.25,   
    iou=0.5,
    project=str(ROOT_DIR / "runs" / "detect"),
    name="test_motion blur_finetune",   
    save=True,
)

print("\n Finetune test predictions saved")

In [None]:
# %% Plot training/validation loss curves from results.csv

import pandas as pd

RESULTS_CSV = DATA_ROOT/ "YOLO11" /"finetune_low resolution" / "results.csv" # change the path according to the needs
assert RESULTS_CSV.exists(), f"results.csv not found at {RESULTS_CSV}"

df = pd.read_csv(RESULTS_CSV)
print("Columns in results.csv:\n", df.columns, "\n")

# ---- Loss curves ----
metrics_loss = [
    ("train/box_loss", "train/box_loss"),
    ("train/cls_loss", "train/cls_loss"),
    ("train/dfl_loss", "train/dfl_loss"),
    ("val/box_loss",   "val/box_loss"),
    ("val/cls_loss",   "val/cls_loss"),
    ("val/dfl_loss",   "val/dfl_loss"),
]

fig, axes = plt.subplots(2, 3, figsize=(14, 8))
axes = axes.ravel()

epochs = df.index + 1  # x-axis

for ax, (col, title) in zip(axes, metrics_loss):
    ax.plot(epochs, df[col])
    ax.set_title(title)
    ax.set_xlabel("epoch")
    ax.set_ylabel("loss")
    ax.grid(True, linestyle="--", alpha=0.4)

plt.tight_layout()

# save + show
loss_fig_path = DATA_ROOT/ "YOLO11" /"finetune_low resolution" / "loss curves_finetune_low resolution.png" # change the path according to the needs
fig.savefig(loss_fig_path, dpi=300)
print(f"Loss curves saved to: {loss_fig_path}")

plt.show()

In [None]:
# %% Plot precision / recall / mAP vs epoch from results.csv

fig, axes = plt.subplots(1, 3, figsize=(16, 4))

epochs = df.index + 1

# precision(B)
axes[0].plot(epochs, df["metrics/precision(B)"])
axes[0].set_title("precision(B)")
axes[0].set_xlabel("epoch")
axes[0].set_ylabel("precision")
axes[0].set_ylim(0, 1)
axes[0].grid(True, linestyle="--", alpha=0.4)

# recall(B)
axes[1].plot(epochs, df["metrics/recall(B)"])
axes[1].set_title("recall(B)")
axes[1].set_xlabel("epoch")
axes[1].set_ylabel("recall")
axes[1].set_ylim(0, 1)
axes[1].grid(True, linestyle="--", alpha=0.4)

# mAP50 & mAP50-95
axes[2].plot(epochs, df["metrics/mAP50(B)"], label="AP50")
axes[2].plot(epochs, df["metrics/mAP50-95(B)"], label="AP50-95")
axes[2].set_title("AP50 / AP50-95")
axes[2].set_xlabel("epoch")
axes[2].set_ylabel("AP")
axes[2].set_ylim(0, 1)
axes[2].legend()
axes[2].grid(True, linestyle="--", alpha=0.4)

plt.tight_layout()

metrics_fig_path = DATA_ROOT/ "YOLO11" /"finetune_low resolution" / "training curves_finetune_low resolution.png" # change the path according to the needs
fig.savefig(metrics_fig_path, dpi=300)
print(f"Training metrics curves saved to: {metrics_fig_path}")

plt.show()