#  Wound Infection Detection


## üì¶ Part 1: Import Libraries


In [11]:
# ============================================================================
# Import all required libraries
# ============================================================================

try:
    import json
    import cv2
    import numpy as np
    import torch
    import torch.nn as nn
    from torch.utils.data import Dataset, DataLoader
    from pathlib import Path
    from typing import Dict, List, Tuple
    import albumentations as A
    from albumentations.pytorch import ToTensorV2
    from tqdm import tqdm
    import random
    import yaml
    import matplotlib.pyplot as plt
    
    # PyTorch Vision
    import torchvision
    from torchvision.models.detection import maskrcnn_resnet50_fpn
    from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
    from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor
    
    print("=" * 60)
    print("‚úì All libraries imported successfully!")
    print("=" * 60)
    print(f"PyTorch: {torch.__version__}")
    print(f"NumPy: {np.__version__}")
    print(f"CUDA: {torch.cuda.is_available()}")
    if torch.cuda.is_available():
        print(f"Device: {torch.cuda.get_device_name(0)}")
    print("=" * 60)
    
except ValueError as e:
    if "numpy.dtype size changed" in str(e):
        print("=" * 60)
        print("‚ùå ÿÆÿ∑ÿ£: ÿ™ÿπÿßÿ±ÿ∂ ÿ®ŸäŸÜ numpy Ÿà scipy")
        print("=" * 60)
        print("\nüîß ÿßŸÑÿ≠ŸÑ:")
        print("   1. ÿ¥ÿ∫ŸëŸÑ Ÿáÿ∞Ÿá ÿßŸÑÿ£ŸàÿßŸÖÿ± ŸÅŸä Terminal:")
        print("      pip install --upgrade --force-reinstall numpy scipy")
        print("   2. ÿ£Ÿà ÿ¥ÿ∫ŸëŸÑ Part 0 ŸÖÿ±ÿ© ÿ£ÿÆÿ±Ÿâ")
        print("   3. ÿ£ÿπÿØ ÿ™ÿ¥ÿ∫ŸäŸÑ Kernel: Kernel ‚Üí Restart")
        print("=" * 60)
        raise
    else:
        raise
        
except ImportError as e:
    print("=" * 60)
    print("‚ùå ÿÆÿ∑ÿ£ ŸÅŸä ÿßÿ≥ÿ™Ÿäÿ±ÿßÿØ ÿßŸÑŸÖŸÉÿ™ÿ®ÿßÿ™!")
    print("=" * 60)
    print(f"ÿßŸÑÿÆÿ∑ÿ£: {e}")
    print("\nüîß ÿßŸÑÿ≠ŸÑ:")
    print("   1. ÿ¥ÿ∫ŸëŸÑ Part 0 ÿ£ŸàŸÑÿßŸã (ÿ™ÿ´ÿ®Ÿäÿ™ ÿßŸÑŸÖŸÉÿ™ÿ®ÿßÿ™)")
    print("   2. ÿ£ÿπÿØ ÿ™ÿ¥ÿ∫ŸäŸÑ Kernel: Kernel ‚Üí Restart")
    print("=" * 60)
    raise


‚úì All libraries imported successfully!
PyTorch: 2.9.1+cpu
NumPy: 2.2.6
CUDA: False


## ‚öôÔ∏è Part 2: Configuration


In [12]:
# ============================================================================
# ÿ™ÿ´ÿ®Ÿäÿ™ ÿßŸÑŸÖŸÉÿ™ÿ®ÿßÿ™ ÿßŸÑŸÖÿ∑ŸÑŸàÿ®ÿ©
# ============================================================================

import sys
import subprocess

def install_package(package):
    """ÿ™ÿ´ÿ®Ÿäÿ™ ŸÖŸÉÿ™ÿ®ÿ© Ÿàÿßÿ≠ÿØÿ© ŸÖÿπ ŸÖÿπÿßŸÑÿ¨ÿ© ÿßŸÑÿ£ÿÆÿ∑ÿßÿ°"""
    try:
        print(f"  üì¶ {package}...", end=" ", flush=True)
        result = subprocess.run(
            [sys.executable, "-m", "pip", "install", package],
            capture_output=True,
            text=True,
            check=False
        )
        if result.returncode == 0:
            print("‚úì")
            return True
        else:
            print("‚ö†Ô∏è (ŸÅÿ¥ŸÑ - ŸÇÿØ ÿ™ŸÉŸàŸÜ ŸÖŸàÿ¨ŸàÿØÿ© ÿ®ÿßŸÑŸÅÿπŸÑ)")
            return False
    except Exception as e:
        print(f"‚ùå ÿÆÿ∑ÿ£: {e}")
        return False

print("=" * 60)
print("üì¶ ÿ™ÿ´ÿ®Ÿäÿ™ ÿßŸÑŸÖŸÉÿ™ÿ®ÿßÿ™...")
print("=" * 60)

# ÿ™ÿ´ÿ®Ÿäÿ™ setuptools Ÿà wheel ÿ£ŸàŸÑÿßŸã (ŸÖŸáŸÖ ŸÑŸÄ Python 3.13)
print("\n[1/3] ÿ™ÿ´ÿ®Ÿäÿ™ setuptools Ÿà wheel...")
install_package("setuptools")
install_package("wheel")

# ÿ™ÿ´ÿ®Ÿäÿ™ numpy Ÿà scipy (ÿ•ÿµÿØÿßÿ±ÿßÿ™ ÿ™ÿØÿπŸÖ Python 3.13)
print("\n[2/3] ÿ™ÿ´ÿ®Ÿäÿ™ numpy Ÿà scipy...")
install_package("numpy>=1.26.0")
install_package("scipy>=1.11.0")

# ÿ™ÿ´ÿ®Ÿäÿ™ ÿ®ÿßŸÇŸä ÿßŸÑŸÖŸÉÿ™ÿ®ÿßÿ™ (Ÿàÿßÿ≠ÿØÿ© ÿ™ŸÑŸà ÿßŸÑÿ£ÿÆÿ±Ÿâ)
print("\n[3/3] ÿ™ÿ´ÿ®Ÿäÿ™ ÿ®ÿßŸÇŸä ÿßŸÑŸÖŸÉÿ™ÿ®ÿßÿ™...")
packages = [
    "torch", "torchvision",
    "opencv-python", "Pillow", "albumentations",
    "pandas", "matplotlib", "seaborn",
    "tqdm", "scikit-learn", "pycocotools",
    "pyyaml", "jupyter", "ipywidgets"
]

for package in packages:
    install_package(package)

print("\n" + "=" * 60)
print("‚úì ÿ™ŸÖ ÿßŸÑÿ™ÿ´ÿ®Ÿäÿ™!")
print("=" * 60)
print("‚ö†Ô∏è ÿ£ÿπÿØ ÿ™ÿ¥ÿ∫ŸäŸÑ Kernel: Kernel ‚Üí Restart")
print("=" * 60)



# ============================================================================
# Configuration - ŸäŸÖŸÉŸÜŸÉ ÿ™ÿπÿØŸäŸÑ Ÿáÿ∞Ÿá ÿßŸÑŸÇŸäŸÖ
# ============================================================================

import platform

CONFIG = {
    # Data - ŸÖÿ≥ÿßÿ±ÿßÿ™ ŸÜÿ≥ÿ®Ÿäÿ© ŸÖŸÜ notebooks/ ÿ•ŸÑŸâ project root
    'data_root': '../data',  # ŸÖÿ¨ŸÑÿØ ÿßŸÑÿ®ŸäÿßŸÜÿßÿ™ ŸÅŸä ÿßŸÑÿ¨ÿ∞ÿ± ÿßŸÑÿ±ÿ¶Ÿäÿ≥Ÿä
    'image_size': [1024, 1024],
    'num_classes': 17,  # 16 wound types + background
    'batch_size': 2,
    # ÿπŸÑŸâ Windowsÿå ÿßÿ≥ÿ™ÿÆÿØŸÖ num_workers=0 ŸÑÿ™ÿ¨ŸÜÿ® ŸÖÿ¥ÿßŸÉŸÑ multiprocessing
    'num_workers': 0 if platform.system() == 'Windows' else 4,
    
    # Training
    'epochs': 50,
    'learning_rate': 0.001,
    'device': 'cuda' if torch.cuda.is_available() else 'cpu',
    
    # Splits
    'train_ratio': 0.7,
    'val_ratio': 0.15,
    'test_ratio': 0.15,
    
    # Paths - ŸÖÿ≥ÿßÿ±ÿßÿ™ ŸÜÿ≥ÿ®Ÿäÿ© ŸÖŸÜ notebooks/ ÿ•ŸÑŸâ project root
    'checkpoints_dir': '../checkpoints_medical_aug',  # ŸÖÿ¨ŸÑÿØ checkpoints ŸÅŸä ÿßŸÑÿ¨ÿ∞ÿ±
    'results_dir': '../results',  # ŸÖÿ¨ŸÑÿØ results ŸÅŸä ÿßŸÑÿ¨ÿ∞ÿ±
}

print("Configuration:")
for key, value in CONFIG.items():
    print(f"  {key}: {value}")
print(f"\n‚úì Device: {CONFIG['device']}")


üì¶ ÿ™ÿ´ÿ®Ÿäÿ™ ÿßŸÑŸÖŸÉÿ™ÿ®ÿßÿ™...

[1/3] ÿ™ÿ´ÿ®Ÿäÿ™ setuptools Ÿà wheel...
  üì¶ setuptools... ‚úì
  üì¶ wheel... ‚úì

[2/3] ÿ™ÿ´ÿ®Ÿäÿ™ numpy Ÿà scipy...
  üì¶ numpy>=1.26.0... ‚úì
  üì¶ scipy>=1.11.0... ‚úì

[3/3] ÿ™ÿ´ÿ®Ÿäÿ™ ÿ®ÿßŸÇŸä ÿßŸÑŸÖŸÉÿ™ÿ®ÿßÿ™...
  üì¶ torch... ‚úì
  üì¶ torchvision... ‚úì
  üì¶ opencv-python... ‚úì
  üì¶ Pillow... ‚úì
  üì¶ albumentations... ‚úì
  üì¶ pandas... ‚úì
  üì¶ matplotlib... ‚úì
  üì¶ seaborn... ‚úì
  üì¶ tqdm... ‚úì
  üì¶ scikit-learn... ‚úì
  üì¶ pycocotools... ‚úì
  üì¶ pyyaml... ‚úì
  üì¶ jupyter... ‚úì
  üì¶ ipywidgets... ‚úì

‚úì ÿ™ŸÖ ÿßŸÑÿ™ÿ´ÿ®Ÿäÿ™!
‚ö†Ô∏è ÿ£ÿπÿØ ÿ™ÿ¥ÿ∫ŸäŸÑ Kernel: Kernel ‚Üí Restart
Configuration:
  data_root: ../data
  image_size: [1024, 1024]
  num_classes: 17
  batch_size: 2
  num_workers: 0
  epochs: 50
  learning_rate: 0.001
  device: cpu
  train_ratio: 0.7
  val_ratio: 0.15
  test_ratio: 0.15
  checkpoints_dir: ../checkpoints_medical_aug
  results_dir: ../results

‚úì Device: cpu


## üìä Part 3: Data Processing


In [13]:
# ============================================================================
# 3.1 CVAT to COCO Converter
# ============================================================================

def convert_cvat_to_coco(data_root: str, output_file: str):
    """ÿ™ÿ≠ŸàŸäŸÑ ÿ™ÿπŸÑŸäŸÇÿßÿ™ CVAT ÿ•ŸÑŸâ ÿµŸäÿ∫ÿ© COCO"""
    
    # Convert to absolute path - handle relative paths correctly
    current_dir = Path.cwd()
    data_root = Path(data_root)
    
    # Handle relative paths - notebook is in notebooks/ directory
    if not data_root.is_absolute():
        # If path starts with ../, it's already relative to parent
        if str(data_root).startswith('../'):
            data_root = current_dir.parent / data_root
        # If path is relative and we're in notebooks/, go up one level
        elif current_dir.name == 'notebooks':
            data_root = current_dir.parent / data_root
        else:
            data_root = current_dir / data_root
    
    data_root = data_root.resolve()
    
    # Check if project.json exists
    project_file = data_root / "project.json"
    if not project_file.exists():
        print("=" * 60)
        print("‚ùå ÿÆÿ∑ÿ£: ŸÖŸÑŸÅ project.json ÿ∫Ÿäÿ± ŸÖŸàÿ¨ŸàÿØ!")
        print("=" * 60)
        print(f"ÿßŸÑŸÖÿ≥ÿßÿ± ÿßŸÑŸÖÿ∑ŸÑŸàÿ®: {project_file}")
        print(f"ÿßŸÑŸÖÿ≥ÿßÿ± ÿßŸÑÿ≠ÿßŸÑŸä: {current_dir}")
        print(f"data_root: {data_root}")
        print("\nüîß ÿßŸÑÿ≠ŸÑ:")
        print("   1. ÿ™ÿ£ŸÉÿØ ÿ£ŸÜ ŸÖÿ¨ŸÑÿØ 'data' ŸÖŸàÿ¨ŸàÿØ ŸÅŸä ÿßŸÑŸÖÿ¨ŸÑÿØ ÿßŸÑÿ±ÿ¶Ÿäÿ≥Ÿä ŸÑŸÑŸÖÿ¥ÿ±Ÿàÿπ")
        print("   2. ÿ™ÿ£ŸÉÿØ ÿ£ŸÜ 'data/project.json' ŸÖŸàÿ¨ŸàÿØ")
        print("   3. ÿ£Ÿà ÿπÿØŸëŸÑ CONFIG['data_root'] ŸÅŸä Part 2")
        print("=" * 60)
        raise FileNotFoundError(f"project.json not found at {project_file}")
    
    # Load project info
    with open(project_file, 'r', encoding='utf-8') as f:
        project_info = json.load(f)
    
    # Create label mapping
    label_map = {label['name']: idx for idx, label in enumerate(project_info['labels'])}
    
    # Initialize COCO structure
    coco_data = {
        'images': [],
        'annotations': [],
        'categories': [{'id': idx, 'name': name} for name, idx in label_map.items()]
    }
    
    image_id = 0
    annotation_id = 0
    
    # Process all tasks
    task_folders = sorted([f for f in data_root.iterdir() if f.is_dir() and f.name.startswith('task_')])
    
    print(f"Processing {len(task_folders)} tasks...")
    
    for task_folder in tqdm(task_folders):
        try:
            # Load annotations
            with open(task_folder / "annotations.json", 'r', encoding='utf-8') as f:
                annotations = json.load(f)
            
            # Get images
            data_dir = task_folder / "data"
            image_files = list(data_dir.glob('*.jpg')) + list(data_dir.glob('*.png'))
            
            for img_file in image_files:
                # Read image to get size
                img = cv2.imread(str(img_file))
                if img is None:
                    continue
                
                h, w = img.shape[:2]
                
                # Add image
                is_infected = '-not-' not in img_file.name.lower()
                coco_data['images'].append({
                    'id': image_id,
                    'file_name': str(img_file.relative_to(data_root)),
                    'width': w,
                    'height': h,
                    'infection_status': is_infected
                })
                
                # Add annotations for this image (first frame)
                if len(annotations) > 0 and 'shapes' in annotations[0]:
                    for shape in annotations[0]['shapes']:
                        if shape['type'] != 'polygon' or shape['label'] not in label_map:
                            continue
                        
                        # Convert polygon points - handle different formats
                        points = shape['points']
                        
                        # Check if points is a list of lists or a flat list
                        if not isinstance(points, list):
                            continue
                        
                        # Handle case where points might be a flat list of floats
                        if len(points) > 0 and isinstance(points[0], (int, float)):
                            # Flat list: [x1, y1, x2, y2, ...]
                            if len(points) % 2 != 0:
                                continue
                            points = [[points[i], points[i+1]] for i in range(0, len(points), 2)]
                        
                        # Ensure points is a list of [x, y] pairs
                        if not all(isinstance(p, (list, tuple)) and len(p) == 2 for p in points):
                            continue
                        
                        polygon = [coord for point in points for coord in point]
                        
                        # Calculate bbox
                        x_coords = [p[0] for p in points]
                        y_coords = [p[1] for p in points]
                        x_min, x_max = min(x_coords), max(x_coords)
                        y_min, y_max = min(y_coords), max(y_coords)
                        bbox = [x_min, y_min, x_max - x_min, y_max - y_min]
                        
                        # Calculate area
                        area = (x_max - x_min) * (y_max - y_min)
                        
                        coco_data['annotations'].append({
                            'id': annotation_id,
                            'image_id': image_id,
                            'category_id': label_map[shape['label']],
                            'segmentation': [polygon],
                            'area': area,
                            'bbox': bbox,
                            'iscrowd': 0
                        })
                        annotation_id += 1
                
                image_id += 1
        
        except Exception as e:
            print(f"Error processing {task_folder.name}: {e}")
    
    # Save COCO file
    Path(output_file).parent.mkdir(parents=True, exist_ok=True)
    with open(output_file, 'w', encoding='utf-8') as f:
        json.dump(coco_data, f, indent=2)
    
    print(f"\n‚úì Done! Saved to {output_file}")
    print(f"‚úì Total images: {len(coco_data['images'])}")
    print(f"‚úì Total annotations: {len(coco_data['annotations'])}")
    
    return coco_data

print("‚úì Converter function defined!")


# ============================================================================
# 3.2 Dataset Splitter
# ============================================================================

def split_dataset(coco_file: str, output_dir: str, train_r=0.7, val_r=0.15, test_r=0.15):
    """ÿ™ŸÇÿ≥ŸäŸÖ ÿßŸÑÿ®ŸäÿßŸÜÿßÿ™ ÿ•ŸÑŸâ train/val/test"""
    
    with open(coco_file, 'r') as f:
        coco_data = json.load(f)
    
    images = coco_data['images']
    random.shuffle(images)
    
    n = len(images)
    n_train = int(n * train_r)
    n_val = int(n * val_r)
    
    splits = {
        'train': images[:n_train],
        'val': images[n_train:n_train+n_val],
        'test': images[n_train+n_val:]
    }
    
    Path(output_dir).mkdir(parents=True, exist_ok=True)
    
    for split_name, split_images in splits.items():
        split_ids = {img['id'] for img in split_images}
        split_anns = [ann for ann in coco_data['annotations'] if ann['image_id'] in split_ids]
        
        split_data = {
            'images': split_images,
            'annotations': split_anns,
            'categories': coco_data['categories']
        }
        
        output_file = Path(output_dir) / f'{split_name}.json'
        with open(output_file, 'w') as f:
            json.dump(split_data, f, indent=2)
        
        print(f"‚úì {split_name}: {len(split_images)} images, {len(split_anns)} annotations")

print("‚úì Splitter function defined!")



‚úì Converter function defined!
‚úì Splitter function defined!


## üèãÔ∏è Part 5: Model Building & Training


In [14]:
# ============================================================================
# 5.1 Build Model
# ============================================================================

def build_model(num_classes=17):
    """ÿ®ŸÜÿßÿ° Mask R-CNN model"""
    
    # Load pretrained model
    model = maskrcnn_resnet50_fpn(pretrained=True)
    
    # Replace box predictor
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    
    # Replace mask predictor
    in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels
    model.roi_heads.mask_predictor = MaskRCNNPredictor(in_features_mask, 256, num_classes)
    
    return model

# Build model
model = build_model(num_classes=CONFIG['num_classes'])
model.to(CONFIG['device'])
print(f"‚úì Model built with {CONFIG['num_classes']} classes")
print(f"‚úì Model moved to {CONFIG['device']}")


‚úì Model built with 17 classes
‚úì Model moved to cpu


In [None]:
# ============================================================================
# 5.2 Create Datasets and DataLoaders
# ============================================================================

# Note: WoundDataset class is defined later in this cell
# The dataset creation code is moved to the end of this cell, after the class definition
# Make sure to run this entire cell from the beginning



# ============================================================================
# 5.3 Training Functions
# ============================================================================

def train_one_epoch(model, optimizer, data_loader, device):
    """ÿ™ÿØÿ±Ÿäÿ® epoch Ÿàÿßÿ≠ÿØ"""
    model.train()
    total_loss = 0
    
    pbar = tqdm(data_loader, desc="Training")
    
    for images, targets in pbar:
        # Move to device
        images = [img.to(device) for img in images]
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
        
        # Forward
        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())
        
        # Backward
        optimizer.zero_grad()
        losses.backward()
        optimizer.step()
        
        total_loss += losses.item()
        pbar.set_postfix({'loss': f'{losses.item():.4f}'})
    
    return total_loss / len(data_loader)


@torch.no_grad()
def validate(model, data_loader, device):
    """Validation"""
    model.eval()
    total_loss = 0
    
    pbar = tqdm(data_loader, desc="Validation")
    
    for images, targets in pbar:
        images = [img.to(device) for img in images]
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
        
        # Get loss
        model.train()  # Need to be in train mode to get loss
        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())
        model.eval()
        
        total_loss += losses.item()
        pbar.set_postfix({'loss': f'{losses.item():.4f}'})
    
    return total_loss / len(data_loader)

print("‚úì Training functions defined!")


# ============================================================================
# 3.3 PyTorch Dataset
# ============================================================================

class WoundDataset(Dataset):
    """Dataset ÿ®ÿ≥Ÿäÿ∑ ŸÑŸÑÿ¨ÿ±Ÿàÿ≠"""
    
    def __init__(self, coco_file: str, data_root: str, image_size=(1024, 1024), is_train=True):
        # Find project root by looking for README.md or requirements.txt
        # Start from current directory and go up until we find project root
        current_dir = Path.cwd()
        project_root = None
        
        # Strategy 1: Try to find project root by looking for README.md or requirements.txt
        # Start from current_dir and go up
        search_dir = current_dir
        max_levels = 10  # Search up to 10 levels
        for level in range(max_levels):
            if (search_dir / 'README.md').exists() or (search_dir / 'requirements.txt').exists():
                project_root = search_dir
                break
            # Check if we've reached filesystem root
            parent = search_dir.parent
            if parent == search_dir:  # Reached filesystem root
                break
            search_dir = parent
        
        # Strategy 2: If project root not found, try to find 'notebooks' directory
        if project_root is None:
            search_dir = current_dir
            for _ in range(10):
                # If we find a 'notebooks' directory, its parent is likely the project root
                if (search_dir / 'notebooks').exists() and (search_dir / 'notebooks').is_dir():
                    project_root = search_dir
                    break
                # Also check if current directory is 'notebooks'
                if search_dir.name == 'notebooks':
                    potential_root = search_dir.parent
                    if (potential_root / 'README.md').exists() or (potential_root / 'requirements.txt').exists():
                        project_root = potential_root
                        break
                parent = search_dir.parent
                if parent == search_dir:
                    break
                search_dir = parent
        
        # Strategy 3: If still not found, try to find 'data' directory with 'splits' subdirectory
        if project_root is None:
            search_dir = current_dir
            for _ in range(10):
                if (search_dir / 'data' / 'splits').exists() and (search_dir / 'data' / 'splits').is_dir():
                    project_root = search_dir
                    break
                parent = search_dir.parent
                if parent == search_dir:
                    break
                search_dir = parent
        
        # Fallback: use current_dir
        if project_root is None:
            project_root = current_dir
        
        # Ensure project_root is absolute and resolved
        project_root = Path(project_root).resolve()
        
        # Resolve data_root path
        data_root = Path(data_root)
        if not data_root.is_absolute():
            if str(data_root).startswith('../'):
                # Remove '../' prefix and resolve from project root
                relative_path = str(data_root)[3:]  # Remove '../'
                data_root = project_root / relative_path
            else:
                data_root = project_root / data_root
        
        self.data_root = data_root.resolve()
        
        # Handle coco_file path - resolve relative to project root
        # project_root is already resolved to absolute path
        coco_file_str = str(coco_file)
        if not Path(coco_file).is_absolute():
            if coco_file_str.startswith('../'):
                # Remove '../' prefix and build path relative to project_root
                relative_path = coco_file_str[3:]  # Remove '../'
                # Build absolute path directly
                coco_file_path = project_root / relative_path
            else:
                # Path is relative but doesn't start with ../
                coco_file_path = project_root / coco_file_str
        else:
            # Path is already absolute
            coco_file_path = Path(coco_file)
        
        # Resolve to absolute path (should already be absolute, but resolve() handles any remaining .. or .)
        coco_file = str(coco_file_path.resolve())
        
        # Verify file exists before trying to open
        if not Path(coco_file).exists():
            # Additional debugging: check if the path components are correct
            debug_info = (
                f"COCO annotation file not found: {coco_file}\n"
                f"Project root: {project_root}\n"
                f"Project root type: {type(project_root)}\n"
                f"Current dir: {current_dir}\n"
                f"Original coco_file parameter: {coco_file}\n"
                f"Relative path extracted: {relative_path if 'relative_path' in locals() else 'N/A'}\n"
                f"Path before resolve: {coco_file_path}\n"
                f"Path after resolve: {coco_file}\n"
                f"Please check the path and ensure the file exists."
            )
            raise FileNotFoundError(debug_info)
        self.image_size = image_size
        self.is_train = is_train
        
        # Load COCO data
        with open(coco_file, 'r', encoding='utf-8') as f:
            self.coco = json.load(f)
        
        self.images = self.coco['images']
        
        # Create annotation index
        self.img_to_anns = {}
        for ann in self.coco['annotations']:
            img_id = ann['image_id']
            if img_id not in self.img_to_anns:
                self.img_to_anns[img_id] = []
            self.img_to_anns[img_id].append(ann)
        
        # Setup transforms
        if self.is_train:
            # Transform with bbox support (for images with annotations)
            self.transform_with_bbox = A.Compose([
                A.HorizontalFlip(p=0.5),
                A.VerticalFlip(p=0.5),
                A.RandomBrightnessContrast(p=0.3),
                A.Resize(*self.image_size),
                A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
                ToTensorV2()
            ], bbox_params=A.BboxParams(format='coco', label_fields=['labels']))
            # Transform without bbox (for images without annotations)
            self.transform_no_bbox = A.Compose([
                A.HorizontalFlip(p=0.5),
                A.VerticalFlip(p=0.5),
                A.RandomBrightnessContrast(p=0.3),
                A.Resize(*self.image_size),
                A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
                ToTensorV2()
            ])
            self.transform = None  # Will use transform_with_bbox or transform_no_bbox in __getitem__
        else:
            # For validation, no bbox_params needed
            self.transform = A.Compose([
                A.Resize(*self.image_size),
                A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
                ToTensorV2()
            ])
            self.transform_with_bbox = None
            self.transform_no_bbox = None
    
    def __len__(self):
        return len(self.images)
    
    def __getitem__(self, idx):
        img_info = self.images[idx]
        img_id = img_info['id']
        
        # Load image
        img_path = self.data_root / img_info['file_name']
        image = cv2.imread(str(img_path))
        
        # Check if image loaded successfully
        if image is None:
            print(f"‚ö†Ô∏è Warning: Could not load image: {img_path}")
            # Create a dummy black image as fallback
            image = np.zeros((img_info['height'], img_info['width'], 3), dtype=np.uint8)
        else:
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        
        # Get annotations
        anns = self.img_to_anns.get(img_id, [])
        
        boxes = []
        labels = []
        masks = []
        
        # Get image dimensions for normalization
        img_h, img_w = image.shape[:2]
        
        for ann in anns:
            # COCO bbox format: [x, y, width, height] in pixels
            bbox = ann['bbox']
            # Normalize to [0, 1] range for albumentations
            normalized_bbox = [
                bbox[0] / img_w,  # x
                bbox[1] / img_h,  # y
                bbox[2] / img_w,  # width
                bbox[3] / img_h   # height
            ]
            boxes.append(normalized_bbox)
            labels.append(ann['category_id'])
            
            # Create mask from polygon
            mask = np.zeros((img_info['height'], img_info['width']), dtype=np.uint8)
            for seg in ann['segmentation']:
                poly = np.array(seg).reshape(-1, 2).astype(np.int32)
                cv2.fillPoly(mask, [poly], 1)
            masks.append(mask)
        
        # Apply transforms
        if self.is_train:
            if len(boxes) > 0:
                # Use transform with bbox support
                transformed = self.transform_with_bbox(
                    image=image,
                    bboxes=boxes,
                    labels=labels,
                    masks=masks
                )
                image = transformed['image']
                boxes = transformed.get('bboxes', [])
                labels = transformed.get('labels', [])
                masks = transformed.get('masks', [])
                
                # Convert normalized bboxes back to pixel coordinates
                # Get transformed image size
                if isinstance(image, torch.Tensor):
                    _, new_h, new_w = image.shape
                else:
                    new_h, new_w = image.shape[:2]
                
                # Convert from normalized [x, y, w, h] to pixel [x, y, w, h]
                boxes_pixel = []
                for bbox in boxes:
                    boxes_pixel.append([
                        bbox[0] * new_w,  # x
                        bbox[1] * new_h,  # y
                        bbox[2] * new_w,  # width
                        bbox[3] * new_h   # height
                    ])
                boxes = boxes_pixel
            else:
                # Use transform without bbox
                transformed = self.transform_no_bbox(image=image)
                image = transformed['image']
        else:
            # For validation, no bbox_params needed
            transformed = self.transform(image=image)
            image = transformed['image']
        
        # Convert to tensors
        if len(boxes) > 0:
            boxes = torch.tensor(boxes, dtype=torch.float32)
            # Convert COCO format [x, y, w, h] to x1,y1,x2,y2
            boxes_xyxy = boxes.clone()
            boxes_xyxy[:, 2] = boxes[:, 0] + boxes[:, 2]  # x2 = x + w
            boxes_xyxy[:, 3] = boxes[:, 1] + boxes[:, 3]  # y2 = y + h
            
            labels = torch.tensor(labels, dtype=torch.int64)
            
            # Convert masks to tensors - handle both numpy arrays and tensors
            mask_tensors = []
            for m in masks:
                if isinstance(m, torch.Tensor):
                    mask_tensors.append(m)
                elif isinstance(m, np.ndarray):
                    mask_tensors.append(torch.from_numpy(m))
                else:
                    # Fallback: convert to numpy first
                    mask_tensors.append(torch.from_numpy(np.array(m)))
            masks = torch.stack(mask_tensors)
        else:
            boxes_xyxy = torch.zeros((0, 4), dtype=torch.float32)
            labels = torch.zeros((0,), dtype=torch.int64)
            masks = torch.zeros((0, *self.image_size), dtype=torch.uint8)
        
        target = {
            'boxes': boxes_xyxy,
            'labels': labels,
            'masks': masks,
            'image_id': torch.tensor([img_id])
        }
        
        return image, target


def collate_fn(batch):
    """Custom collate for DataLoader"""
    return tuple(zip(*batch))

print("‚úì Dataset class defined!")

# ============================================================================
# 5.2 Create Datasets and DataLoaders (moved here after class definition)
# ============================================================================

# Create datasets
train_dataset = WoundDataset(
    '../data/splits/train.json',
    CONFIG['data_root'],
    tuple(CONFIG['image_size']),
    is_train=True
)

val_dataset = WoundDataset(
    '../data/splits/val.json',
    CONFIG['data_root'],
    tuple(CONFIG['image_size']),
    is_train=False
)

# Create dataloaders
train_loader = DataLoader(
    train_dataset,
    batch_size=CONFIG['batch_size'],
    shuffle=True,
    num_workers=CONFIG['num_workers'],
    collate_fn=collate_fn
)

val_loader = DataLoader(
    val_dataset,
    batch_size=CONFIG['batch_size'],
    shuffle=False,
    num_workers=CONFIG['num_workers'],
    collate_fn=collate_fn
)

print("‚úì Datasets and DataLoaders created!")
print(f"‚úì Train samples: {len(train_dataset)}")
print(f"‚úì Val samples: {len(val_dataset)}")

# ============================================================================
# 5.4 Setup Optimizer and Scheduler
# ============================================================================

# Optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=CONFIG['learning_rate'], momentum=0.9, weight_decay=0.0005)

# Learning rate scheduler
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)

print("‚úì Optimizer and scheduler setup complete!")

‚úì Training functions defined!
‚úì Dataset class defined!


FileNotFoundError: COCO annotation file not found: E:\GitHub\data\splits\train.json
Project root: E:\GitHub\Wound-infection-detection-model
Current dir: e:\GitHub\Wound-infection-detection-model\notebooks
Original path: E:\GitHub\Wound-infection-detection-model\..\data\splits\train.json
Please check the path and ensure the file exists.

## üéØ Part 6: Start Training



In [None]:
# ============================================================================
# Training Loop
# ============================================================================

print("=" * 60)
print("Starting Training")
print("=" * 60)

best_val_loss = float('inf')
checkpoints_dir = Path(CONFIG['checkpoints_dir'])
checkpoints_dir.mkdir(exist_ok=True)

for epoch in range(1, CONFIG['epochs'] + 1):
    print(f"\nEpoch {epoch}/{CONFIG['epochs']}")
    print("-" * 40)
    
    # Train
    train_loss = train_one_epoch(model, optimizer, train_loader, CONFIG['device'])
    
    # Validate
    val_loss = validate(model, val_loader, CONFIG['device'])
    
    # Step scheduler
    lr_scheduler.step()
    
    # Print stats
    print(f"\nTrain Loss: {train_loss:.4f}")
    print(f"Val Loss: {val_loss:.4f}")
    print(f"LR: {optimizer.param_groups[0]['lr']:.6f}")
    
    # Save best model
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'val_loss': val_loss,
        }, checkpoints_dir / 'best_model.pth')
        print(f"‚úì Saved best model (val_loss: {val_loss:.4f})")
    
    # Save checkpoint every 5 epochs
    if epoch % 5 == 0:
        torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
        }, checkpoints_dir / f'checkpoint_epoch_{epoch}.pth')

print("\n" + "=" * 60)
print("Training completed!")
print(f"Best validation loss: {best_val_loss:.4f}")
print("=" * 60)


## üîç Part 7: Prediction Functions


In [None]:
# ============================================================================
# 7.1 Helper Functions for Prediction
# ============================================================================

def calculate_wound_area(predictions, marker_class_id=8, marker_size_cm=3.0):
    """ÿ≠ÿ≥ÿßÿ® ŸÖÿ≥ÿßÿ≠ÿ© ÿßŸÑÿ¨ÿ±ÿ≠ ÿ®ÿßÿ≥ÿ™ÿÆÿØÿßŸÖ ÿπŸÑÿßŸÖÿ© ÿßŸÑŸÇŸäÿßÿ≥ 3√ó3 ÿ≥ŸÖ"""
    
    labels = predictions['labels'].cpu().numpy()
    masks = predictions['masks'].cpu().numpy()
    
    # Find marker
    marker_idx = np.where(labels == marker_class_id)[0]
    
    if len(marker_idx) == 0:
        return None, None
    
    # Get marker mask
    marker_mask = masks[marker_idx[0]][0] > 0.5
    marker_area_pixels = marker_mask.sum()
    
    if marker_area_pixels == 0:
        return None, None
    
    # Calculate pixel to cm ratio
    pixel_to_cm = marker_size_cm / np.sqrt(marker_area_pixels)
    
    # Find wound (class 0)
    wound_idx = np.where(labels == 0)[0]
    
    if len(wound_idx) == 0:
        return None, pixel_to_cm
    
    # Get wound mask
    wound_mask = masks[wound_idx[0]][0] > 0.5
    wound_area_pixels = wound_mask.sum()
    
    # Convert to cm¬≤
    wound_area_cm2 = wound_area_pixels * (pixel_to_cm ** 2)
    
    return wound_area_cm2, pixel_to_cm


def detect_infection(predictions, infection_classes=[4, 5, 6, 15]):
    """ŸÉÿ¥ŸÅ ÿßŸÑÿπÿØŸàŸâ ŸÖŸÜ ÿßŸÑŸÅÿ¶ÿßÿ™ ÿßŸÑŸÖŸàÿ¨ŸàÿØÿ©"""
    
    labels = predictions['labels'].cpu().numpy()
    scores = predictions['scores'].cpu().numpy()
    
    # Check for infection indicators
    infection_detections = []
    
    for label, score in zip(labels, scores):
        if label in infection_classes:
            infection_detections.append(float(score))
    
    if len(infection_detections) > 0:
        return True, np.mean(infection_detections)
    
    return False, 0.0

print("‚úì Helper functions defined!")


In [None]:
# ============================================================================
# 7.2 Prediction Function
# ============================================================================

@torch.no_grad()
def predict_image(image_path: str, model, device, conf_threshold=0.5):
    """ÿßŸÑÿ™ŸÜÿ®ÿ§ ÿπŸÑŸâ ÿµŸàÿ±ÿ© Ÿàÿßÿ≠ÿØÿ©"""
    
    model.eval()
    
    # Convert to absolute path if needed
    img_path = Path(image_path)
    if not img_path.is_absolute():
        current_dir = Path.cwd()
        if current_dir.name == 'notebooks':
            img_path = current_dir.parent / img_path
        else:
            img_path = current_dir / img_path
    
    # Load image
    image = cv2.imread(str(img_path))
    
    # Check if image loaded successfully
    if image is None:
        raise FileNotFoundError(f"Could not load image from: {img_path}\nPlease check the path and make sure the image exists.")
    
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    
    # Resize
    image_resized = cv2.resize(image_rgb, tuple(CONFIG['image_size']))
    
    # To tensor
    image_tensor = torch.from_numpy(image_resized).permute(2, 0, 1).float() / 255.0
    
    # Normalize
    mean = torch.tensor([0.485, 0.456, 0.406]).view(3, 1, 1)
    std = torch.tensor([0.229, 0.224, 0.225]).view(3, 1, 1)
    image_tensor = (image_tensor - mean) / std
    
    # Predict
    predictions = model([image_tensor.to(device)])[0]
    
    # Filter by confidence
    keep = predictions['scores'] >= conf_threshold
    filtered = {
        'boxes': predictions['boxes'][keep],
        'labels': predictions['labels'][keep],
        'scores': predictions['scores'][keep],
        'masks': predictions['masks'][keep]
    }
    
    # Calculate wound area
    wound_area, _ = calculate_wound_area(filtered)
    
    # Detect infection
    has_infection, infection_conf = detect_infection(filtered)
    
    # Build result
    result = {
        'image_path': image_path,
        'num_detections': len(filtered['labels']),
        'wound_area_cm2': float(wound_area) if wound_area else None,
        'has_infection': has_infection,
        'infection_confidence': float(infection_conf),
        'findings': {
            'edema': 4 in filtered['labels'].cpu().numpy(),
            'hyperemia': 5 in filtered['labels'].cpu().numpy(),
            'necrosis': 6 in filtered['labels'].cpu().numpy(),
            'granulation': 7 in filtered['labels'].cpu().numpy(),
            'fibrin': 1 in filtered['labels'].cpu().numpy(),
        }
    }
    
    return result, filtered

print("‚úì Prediction function defined!")


In [None]:
# ============================================================================
# 7.3 Visualization Function
# ============================================================================

def visualize_prediction(image_path: str, predictions):
    """ÿ±ÿ≥ŸÖ ÿßŸÑŸÜÿ™ÿßÿ¶ÿ¨ ÿπŸÑŸâ ÿßŸÑÿµŸàÿ±ÿ©"""
    
    # Convert to absolute path if needed
    img_path = Path(image_path)
    if not img_path.is_absolute():
        current_dir = Path.cwd()
        if current_dir.name == 'notebooks':
            img_path = current_dir.parent / img_path
        else:
            img_path = current_dir / img_path
    
    image = cv2.imread(str(img_path))
    
    # Check if image loaded successfully
    if image is None:
        print(f"‚ö†Ô∏è Warning: Could not load image from: {img_path}")
        return
    
    image = cv2.resize(image, tuple(CONFIG['image_size']))
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    
    masks = predictions['masks'].cpu().numpy()
    labels = predictions['labels'].cpu().numpy()
    scores = predictions['scores'].cpu().numpy()
    
    # Draw masks
    for mask, label, score in zip(masks, labels, scores):
        mask = (mask[0] > 0.5).astype(np.uint8)
        
        # Random color
        color = tuple(np.random.randint(0, 255, 3).tolist())
        
        # Apply mask
        colored_mask = np.zeros_like(image_rgb)
        colored_mask[mask > 0] = color
        image_rgb = cv2.addWeighted(image_rgb, 0.7, colored_mask, 0.3, 0)
    
    # Display
    plt.figure(figsize=(12, 8))
    plt.imshow(image_rgb)
    plt.axis('off')
    plt.title(f"Predictions: {len(labels)} detections")
    plt.show()

print("‚úì Visualization function defined!")


## üéØ Part 8: Run Prediction

**ÿπÿØŸëŸÑ ŸÖÿ≥ÿßÿ± ÿßŸÑÿµŸàÿ±ÿ© ŸÅŸä ÿßŸÑÿÆŸÑŸäÿ© ÿßŸÑÿ™ÿßŸÑŸäÿ© ÿ´ŸÖ ÿ¥ÿ∫ŸëŸÑŸáÿß**


In [None]:
# ============================================================================
# Load Model and Predict
# ============================================================================

# Load best model
model_path = Path(CONFIG['checkpoints_dir']) / 'best_model.pth'

if model_path.exists():
    print(f"Loading model from: {model_path}")
    checkpoint = torch.load(model_path, map_location=CONFIG['device'])
    model.load_state_dict(checkpoint['model_state_dict'])
    model.eval()
    print("‚úì Model loaded successfully!")
else:
    print("‚ö†Ô∏è Model not found! Please train the model first (Part 6)")

# ÿ™ÿ∫ŸäŸäÿ± ÿßŸÑŸÖÿ≥ÿßÿ± ÿ•ŸÑŸâ ÿµŸàÿ±ÿ© ÿ™ÿ±ŸäÿØ ÿßŸÑÿ™ŸÜÿ®ÿ§ ÿπŸÑŸäŸáÿß
image_path = 'data/task_0/data/2.jpg'  # ‚¨ÖÔ∏è ÿπÿØŸëŸÑ Ÿáÿ∞ÿß ÿßŸÑŸÖÿ≥ÿßÿ±

print("\n" + "=" * 60)
print("Wound Detection - Prediction")
print("=" * 60)
print(f"\nImage: {image_path}")

# Predict
print("\nPredicting...")
result, predictions = predict_image(image_path, model, CONFIG['device'])

# Print result
print("\nResults:")
print("-" * 40)
print(f"Detections: {result['num_detections']}")
print(f"Wound Area: {result['wound_area_cm2']} cm¬≤" if result['wound_area_cm2'] else "Wound Area: N/A")
print(f"Infection: {'YES' if result['has_infection'] else 'NO'} (confidence: {result['infection_confidence']:.2f})")
print("\nFindings:")
for finding, present in result['findings'].items():
    print(f"  {finding}: {'‚úì' if present else '‚úó'}")

# Visualize
print("\nVisualizing...")
visualize_prediction(image_path, predictions)

# Save result
output_dir = Path(CONFIG['results_dir'])
output_dir.mkdir(exist_ok=True)

result_file = output_dir / f"{Path(image_path).stem}_result.json"
with open(result_file, 'w') as f:
    json.dump(result, f, indent=2)
print(f"\n‚úì Result saved: {result_file}")
